In [None]:
# Define device
import torch
if torch.cuda.is_available():
  device = torch.device('cuda')
else:
  device = torch.device('cpu')
print('Device:', device)

Device: cuda


In [None]:
import torchvision.transforms as T

transform = T.Compose([T.ToTensor(), T.RandomAutocontrast()])

In [None]:
# Load Existing Dataset
import torchvision.datasets as dset

train_data = dset.MNIST(root="train", train=True, download=True, transform=transform)
val_data = dset.MNIST(root="val", train=False, download=True, transform=transform)

In [None]:
# Check Data Dimension
val_data[500]

(tensor([[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 

In [None]:
num_train = len(train_data)
num_val = len(val_data)
print('Number of training:', num_train)
print('Number of validation:', num_val)

Number of training: 60000
Number of validation: 10000


In [None]:
# Build Model
import torch.nn as nn
# model = nn.Sequential(
#     # N x 1 x 28 x 28
#     nn.Conv2d(1, 64, 3, 1, 1),
#     nn.BatchNorm2d(64),
#     nn.ReLU(),
#     nn.MaxPool2d(2, 2),
#     # N x 64 x 14 x 14
#     nn.Flatten(),
#     nn.Linear(in_features=64*14*14, out_features=10)
# )

# class MyModel(nn.Module):
#   def __init__(self):
#     super().__init__()
#     self.conv2d = nn.Conv2d(1, 64, 3, 1, 1)
#     self.bn2d = nn.BatchNorm2d(64)
#     self.fc = nn.Linear(64*14*14, 10)
#   def forward(self, x):
#     out = self.conv2d(x)
#     out = self.bn2d(out)
#     out = nn.functional.relu(out)
#     out = nn.functional.max_pool2d(out, (2, 2))
#     out = torch.flatten(out, 1)
#     out = self.fc(out)
#     return out

# model = MyModel()

class MyRNN(nn.Module):
  def __init__(self, input_size, hidden_size, output_size):
    super().__init__()
    self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
    self.fc = nn.Linear(hidden_size, output_size)
  def forward(self, x):
    x = x.squeeze()
    output, (h_n, c_n) = self.lstm(x)
    print(f"output shape: {output.shape} / h_n shape: {h_n.shape}")
    out = output[:, -1, :]
    # out = h_n.squeeze()
    # out = h_n[0]
    out = self.fc(out)
    return out

model = MyRNN(28, 64, 10)

In [None]:
# Move model to GPU
model = model.cuda()

In [None]:
# Create Mini-batches
from torch.utils.data import DataLoader
mini_trains = DataLoader(train_data, batch_size=128, shuffle=True)
mini_vals = DataLoader(val_data, batch_size=128, shuffle=True)

In [None]:
# Training Procedure
def train(num_epoch, model, mini_trains, mini_vals, device, loss_function, optimizer):
  for epoch in range(num_epoch):
    num_iters = 0
    for x, y in mini_trains:
      model.train()
      x = x.to(device)
      y = y.to(device)
      scores = model(x)
      loss = loss_function(scores, y)
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()
      if num_iters % 10 == 0:
        evaluate_predictor(model, epoch, mini_vals, device)
      num_iters += 1

In [None]:
# Validating Procedure
def evaluate_predictor(model, epoch, mini_vals, device):
  model.eval()
  with torch.no_grad():
    acc_count = 0
    for x, y in mini_vals:
      x = x.to(device)
      y = y.to(device)
      scores=model(x)
      predictions=scores.max(1)[1]
      acc = predictions.eq(y).sum().item()
      acc_count += acc
    print(f'Epoch[{epoch+1}] Acc: {acc_count/num_val}')

In [None]:
# Define loss function & optimizer
import torch.optim as optim
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

In [None]:
# Start training
train(1, model, mini_trains, mini_vals, device, loss_function, optimizer)

Epoch[1] Acc: 0.0967
Epoch[1] Acc: 0.1964
Epoch[1] Acc: 0.1159
Epoch[1] Acc: 0.2172
Epoch[1] Acc: 0.2755
Epoch[1] Acc: 0.3694
Epoch[1] Acc: 0.4042
Epoch[1] Acc: 0.5359
Epoch[1] Acc: 0.6003
Epoch[1] Acc: 0.6274
Epoch[1] Acc: 0.6636
Epoch[1] Acc: 0.6996
Epoch[1] Acc: 0.7227
Epoch[1] Acc: 0.7429
Epoch[1] Acc: 0.7501
Epoch[1] Acc: 0.789
Epoch[1] Acc: 0.7891
Epoch[1] Acc: 0.805
Epoch[1] Acc: 0.8257
Epoch[1] Acc: 0.8078
Epoch[1] Acc: 0.84
Epoch[1] Acc: 0.8314
Epoch[1] Acc: 0.8545
Epoch[1] Acc: 0.8616
Epoch[1] Acc: 0.8605
Epoch[1] Acc: 0.852
Epoch[1] Acc: 0.863
Epoch[1] Acc: 0.8675
Epoch[1] Acc: 0.8692
Epoch[1] Acc: 0.874
Epoch[1] Acc: 0.8814
Epoch[1] Acc: 0.8911
Epoch[1] Acc: 0.8959
Epoch[1] Acc: 0.8913
Epoch[1] Acc: 0.895
Epoch[1] Acc: 0.9028
Epoch[1] Acc: 0.9066
Epoch[1] Acc: 0.9067
Epoch[1] Acc: 0.9003
Epoch[1] Acc: 0.9127
Epoch[1] Acc: 0.9094
Epoch[1] Acc: 0.905
Epoch[1] Acc: 0.9097
Epoch[1] Acc: 0.9103
Epoch[1] Acc: 0.9111
Epoch[1] Acc: 0.9103
Epoch[1] Acc: 0.9125
