In [21]:
import torch
import torch.nn as nn

import tensorflow as tf
from tensorflow.keras.datasets import mnist

### Dataset and Preprocessing

In [22]:
(X, y), (Xtest, ytest) = mnist.load_data()

In [23]:
X.shape, y.shape, Xtest.shape, ytest.shape

((60000, 28, 28), (60000,), (10000, 28, 28), (10000,))

In [24]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [25]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X = scaler.fit_transform(X.reshape(-1, 28*28))
Xtest = scaler.transform(Xtest.reshape(-1, 28*28))

In [26]:
X.shape, Xtest.shape

((60000, 784), (10000, 784))

In [27]:
from torch.utils.data import Dataset, DataLoader
torch.manual_seed(42)

<torch._C.Generator at 0x78a415599590>

In [38]:
class customDataset(Dataset):
  def __init__(self, X, Y):
    self.X = torch.tensor(X, dtype=torch.float32)
    self.Y = torch.tensor(Y, dtype=torch.long)

  def __len__(self):
    return len(self.X)

  def __getitem__(self, i):
    return self.X[i], self.Y[i]

In [47]:
train_dataset = customDataset(X, y)
test_dataset = customDataset(Xtest, ytest)

train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False, pin_memory=True)

In [59]:
len(train_dataset), train_dataset[0][0].shape

(60000, torch.Size([784]))

In [49]:
learning_rate = 0.01
epoch = 100

## Move the model to GPU

In [50]:
class NN(nn.Module):
  def __init__(self, input_dim):
    super().__init__()
    self.network = nn.Sequential(
        nn.Linear(input_dim, 128),
        nn.ReLU(),
        nn.Linear(128, 64),
        nn.ReLU(),
        nn.Linear(64, 10)
    )

  def forward(self, X):
    return self.network(X)

In [51]:
model = NN(X.shape[1])
model.to(device)

NN(
  (network): Sequential(
    (0): Linear(in_features=784, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=64, bias=True)
    (3): ReLU()
    (4): Linear(in_features=64, out_features=10, bias=True)
  )
)

In [52]:
!pip install torchinfo



In [53]:
from torchinfo import summary

summary(model, input_size=(32, 784))

Layer (type:depth-idx)                   Output Shape              Param #
NN                                       [32, 10]                  --
├─Sequential: 1-1                        [32, 10]                  --
│    └─Linear: 2-1                       [32, 128]                 100,480
│    └─ReLU: 2-2                         [32, 128]                 --
│    └─Linear: 2-3                       [32, 64]                  8,256
│    └─ReLU: 2-4                         [32, 64]                  --
│    └─Linear: 2-5                       [32, 10]                  650
Total params: 109,386
Trainable params: 109,386
Non-trainable params: 0
Total mult-adds (Units.MEGABYTES): 3.50
Input size (MB): 0.10
Forward/backward pass size (MB): 0.05
Params size (MB): 0.44
Estimated Total Size (MB): 0.59

In [54]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)


for i in range(epoch):
  total_epoch_loss = 0
  for batch_x, batch_y in train_loader:
    batch_x = batch_x.to(device)
    batch_y = batch_y.to(device)
    y_pred = model(batch_x)
    loss = criterion(y_pred, batch_y)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    total_epoch_loss += loss.item()

  avg_loss = total_epoch_loss / len(train_loader)
  print(f'Epoch: {i + 1} | loss : {avg_loss}')

Epoch: 1 | loss : 1.703406460757957
Epoch: 2 | loss : 0.5820148932272946
Epoch: 3 | loss : 0.35377516008135096
Epoch: 4 | loss : 0.2822814269551336
Epoch: 5 | loss : 0.2440109831183704
Epoch: 6 | loss : 0.21835045047851007
Epoch: 7 | loss : 0.1991207319885683
Epoch: 8 | loss : 0.18398203846138678
Epoch: 9 | loss : 0.1712875057861749
Epoch: 10 | loss : 0.16054166160793956
Epoch: 11 | loss : 0.1511322701854238
Epoch: 12 | loss : 0.1427418673391154
Epoch: 13 | loss : 0.13526298522726812
Epoch: 14 | loss : 0.128412158671282
Epoch: 15 | loss : 0.1222240128067892
Epoch: 16 | loss : 0.11648146117896413
Epoch: 17 | loss : 0.1113391098167215
Epoch: 18 | loss : 0.10643171096470819
Epoch: 19 | loss : 0.10197826473713556
Epoch: 20 | loss : 0.09781946859824886
Epoch: 21 | loss : 0.0937740312122714
Epoch: 22 | loss : 0.09015564523589636
Epoch: 23 | loss : 0.08664511514505915
Epoch: 24 | loss : 0.08346144807364132
Epoch: 25 | loss : 0.08035134774710197
Epoch: 26 | loss : 0.07738410347480891
Epoch: 27

In [55]:
model.eval()

NN(
  (network): Sequential(
    (0): Linear(in_features=784, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=64, bias=True)
    (3): ReLU()
    (4): Linear(in_features=64, out_features=10, bias=True)
  )
)

In [56]:
total = 0
correct = 0

with torch.no_grad():
  for batch_x, batch_y in test_loader:
    batch_x = batch_x.to(device)
    batch_y = batch_y.to(device)
    y_pred = model(batch_x)
    _, predicted = torch.max(y_pred.data, 1)
    total += batch_y.shape[0]
    correct += (predicted == batch_y).sum().item()

print(f'Accuracy: {100 * correct / total}')

Accuracy: 97.48
