In [1]:
import torch
import numpy as np
import torch.nn as nn
import torchvision
from torchvision.datasets import MNIST

In [2]:
# Download training dataset
dataset = MNIST(root = 'data/', download = True)

In [3]:
import torchvision.transforms as transforms

In [4]:
train_dataset = MNIST(root = 'data/', train = True, transform = transforms.ToTensor())
test_dataset = MNIST(root = 'data/', train = False, transform = transforms.ToTensor())

In [5]:
n = 60000
val_pct = 0.2
def split_indices(n, val_pct):
  # Determine the size of validation set
  n_val = int(n*val_pct)
  # Create the random permutation of 0 to n-1
  idxs = np.random.permutation(n)
  # Pick first n_val indices for validation set
  return idxs[n_val:], idxs[:n_val]

In [6]:
train_indices, val_indices = split_indices(len(train_dataset), val_pct=0.2)

In [7]:
print(len(train_indices), len(val_indices))
print('Sample val indices: ',val_indices[:20])

48000 12000
Sample val indices:  [56126 32417  1139 17614 47999 51769 55729  9498  9298 19659  8908 44359
 10678 39190 20490 34771 30571 36613 38717 10158]


In [8]:
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data.dataloader import DataLoader

In [9]:
batch_size = 100

# Training sampler and data loader
train_sampler = SubsetRandomSampler(train_indices)
train_loader = DataLoader(train_dataset, batch_size, sampler = train_sampler)

val_sampler = SubsetRandomSampler(val_indices)
val_loader = DataLoader(train_dataset, batch_size, sampler = val_sampler)

In [10]:
for images, labels in train_loader:
  print(labels)
  print(images.shape)
  break

tensor([3, 2, 8, 6, 9, 9, 1, 0, 8, 8, 0, 9, 0, 1, 7, 8, 2, 7, 0, 2, 2, 3, 5, 5,
        3, 9, 5, 2, 5, 4, 0, 9, 9, 7, 9, 7, 2, 1, 8, 8, 1, 3, 6, 6, 5, 4, 8, 1,
        4, 0, 2, 3, 0, 7, 1, 9, 9, 5, 8, 1, 1, 1, 8, 9, 2, 0, 4, 2, 6, 4, 9, 2,
        5, 5, 5, 6, 4, 6, 4, 0, 3, 5, 8, 2, 7, 7, 8, 3, 5, 6, 3, 6, 8, 1, 5, 4,
        1, 4, 3, 1])
torch.Size([100, 1, 28, 28])


In [11]:
input_size = 28*28
hidden_size = 64
num_classes = 10

In [12]:
class MnistDeepModel(nn.Module):
  def __init__(self,input_size,hidden_size, output_size):
    super().__init__()
    self.linear1 = nn.Linear(input_size,hidden_size)
    self.linear2 = nn.Linear(hidden_size, output_size)
  def forward(self,xb):
    xb = xb.reshape(-1,input_size)
    layer_1 = self.linear1(xb)
    layer1_act = F.relu(layer_1)
    layer_2 = self.linear2(layer1_act)
    return layer_2

In [13]:
model = MnistDeepModel(input_size, hidden_size, num_classes)

In [14]:
import torch.nn.functional as F

In [15]:
loss_fn = F.cross_entropy

In [16]:
learning_rate = 0.001
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [17]:
def loss_batch(model, loss_func, xb, yb, opt = None, metric = None):
  # Calculate loss
  preds = model(xb)
  loss = loss_func(preds, yb)
  if opt is not None:
    # Compute gradients
    loss.backward()
    # Updating the gradients
    opt.step()
    # Reset the gradients
    opt.zero_grad()
  metric_result = None
  if metric is not None:
    # Compute metric
    metric_result = metric(preds, yb)
  return loss.item(), len(xb), metric_result

In [18]:
def evaluate(model, loss_func, valid_dl, metric = None):
  with torch.no_grad():
    # Pass each batch through the model
    results = [loss_batch(model, loss_func,xb, yb, metric = metric)
    for xb,yb in valid_dl]
    # Seperate losses, counts and metrics
    losses, nums, metrics = zip(*results)
    # Total size of the dataset
    total = np.sum(nums)
    # Avg loss across batches
    avg_loss = np.sum(np.multiply(losses, nums))/ total
    avg_metric  = None
    if metric is not None:
      # Avg of metric across batches
      avg_metric = np.sum(np.multiply(metrics, nums)) / total
  return avg_loss, total, avg_metric

In [19]:
def accuracy(outputs, labels):
  _, preds = torch.max(outputs, dim=1)
  return torch.sum(preds == labels).item() / len(preds)

In [20]:
val_loss, total, val_acc = evaluate(model, loss_fn, val_loader, metric = accuracy)
print('Loss: {:.4f}, Accuracy: {:.4f}'.format(val_loss, val_acc))

Loss: 2.3226, Accuracy: 0.0899


In [21]:
def fit(epochs, model, loss_fn, opt, train_dl, valid_dl, metric = None):
  for epoch in range(epochs):
    # Training
    for xb,yb in train_dl:
      loss,_,_ = loss_batch(model, loss_fn, xb, yb, opt)

    # Evaluation
    result = evaluate(model, loss_fn, valid_dl,metric)
    val_loss, total, val_metric = result

    # Print progress
    if metric is None:
      print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, epochs, val_loss))
    else:
      print('Epoch [{}/{}], Loss: {:.4f}, {}: {:.4f}'.format(epoch+1, epochs, val_loss, metric.__name__,val_metric))

In [22]:
fit(80,model, F.cross_entropy, optimizer, train_loader, val_loader, accuracy)

Epoch [1/80], Loss: 2.2303, accuracy: 0.2200
Epoch [2/80], Loss: 2.1238, accuracy: 0.4525
Epoch [3/80], Loss: 1.9984, accuracy: 0.6130
Epoch [4/80], Loss: 1.8549, accuracy: 0.6889
Epoch [5/80], Loss: 1.6979, accuracy: 0.7222
Epoch [6/80], Loss: 1.5382, accuracy: 0.7446
Epoch [7/80], Loss: 1.3867, accuracy: 0.7647
Epoch [8/80], Loss: 1.2512, accuracy: 0.7783
Epoch [9/80], Loss: 1.1347, accuracy: 0.7926
Epoch [10/80], Loss: 1.0368, accuracy: 0.8030
Epoch [11/80], Loss: 0.9553, accuracy: 0.8131
Epoch [12/80], Loss: 0.8875, accuracy: 0.8194
Epoch [13/80], Loss: 0.8308, accuracy: 0.8253
Epoch [14/80], Loss: 0.7831, accuracy: 0.8295
Epoch [15/80], Loss: 0.7424, accuracy: 0.8336
Epoch [16/80], Loss: 0.7076, accuracy: 0.8386
Epoch [17/80], Loss: 0.6774, accuracy: 0.8419
Epoch [18/80], Loss: 0.6510, accuracy: 0.8453
Epoch [19/80], Loss: 0.6278, accuracy: 0.8492
Epoch [20/80], Loss: 0.6072, accuracy: 0.8522
Epoch [21/80], Loss: 0.5888, accuracy: 0.8542
Epoch [22/80], Loss: 0.5722, accuracy: 0.85

In [23]:
def predict_image(img, model):
  xb = img.unsqueeze(0)
  yb = model(xb)
  _, preds = torch.max(yb, dim=1)
  return preds[0].item()