<a href="https://colab.research.google.com/github/bassoline/DeepLearning/blob/main/LeNet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
%matplotlib inline 
import matplotlib.pyplot as plt 
import os 
import time 
from typing import Iterable 
from dataclasses import dataclass 
import numpy as np 
import torch 
import torch.nn as nn 
from torchvision import datasets, transforms

In [8]:
class LeNet5(nn.Module):

  def __init__(self):
    super().__init__()

    # convolution layers
    self._body = nn.Sequential(
        # input size = (32, 32), output size = (28, 28)
        nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5),
        nn.ReLU(inplace=True),
        nn.MaxPool2d(kernel_size=2), 
        # second conv layer
        # input size (14, 14), output size = (10, 10)
        nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5),
        nn.ReLU(inplace=True),
        nn.MaxPool2d(kernel_size=2) 
        # output is (5, 5)
    )

    # fully connected layers 
    self._head = nn.Sequential(
        # in features = total # of weights in last conv layer
        nn.Linear(in_features=16*5*5, out_features=120), 
        nn.ReLU(inplace=True),
        # second fully connected layer
        nn.Linear(in_features=120, out_features=84),
        nn.ReLU(inplace=True),
        # last fully connected layer
        nn.Linear(in_features=84, out_features=10)
    )

  def foward(self, x): 
    # apply feature extractor (conv layers)
    x = self._body()
    # flatten output of conv layer (dim should be batch_size * # of weights in 
    # last conv layer)
    x = x.view(x.size()[0], -1) 
    # apply classifier 
    x = self._head(x)
    return x 


In [9]:
# initalize and display network 
lenet5_model = LeNet5()
print(lenet5_model)

LeNet5(
  (_body): Sequential(
    (0): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (_head): Sequential(
    (0): Linear(in_features=400, out_features=120, bias=True)
    (1): ReLU(inplace=True)
    (2): Linear(in_features=120, out_features=84, bias=True)
    (3): ReLU(inplace=True)
    (4): Linear(in_features=84, out_features=10, bias=True)
  )
)


In [10]:
# get mnist data 
def get_data(batch_size, data_root='data', num_workers=1):
  # transform the data first, resize and normalize
  data_transform = transforms.Compose([
                                             transforms.Resize((32, 32)),
                                             # rescales to value between 0-1
                                             transforms.ToTensor(), 
                                             # first number is the mean and the second # is the variance (pre calculated)
                                             transforms.Normalize((0.1307, ), (0.3081, ))                                        
  ])

  # create the loaders
  train_loader = torch.utils.data.DataLoader(
      datasets.MNIST(root=data_root, train=True, download=True, transform=data_transform), 
      batch_size = batch_size, 
      shuffle=True, 
      num_workers = num_workers
  )

  test_loader = torch.utils.data.DataLoader(
      datasets.MNIST(root=data_root, train=False, download=True, transform=data_transform),
      batch_size=batch_size, 
      shuffle=False, 
      num_workers=num_workers
  )

  return train_loader, test_loader

In [11]:
# common settings for reproducibility 
@dataclass
class SystemConfiguration:
  seed: int = 42 # for rand nums 
  cudnn_benchmark_enabled: bool = True # for performance
  cudnn_deterministic: bool = True # reproducibility

In [12]:
@dataclass
class TrainingConfiguration:
  batch_size: int = 32 # amount of data passed through each iteration 
  epochs_count: int = 20 # of times all data will be passed through
  learning_rate: float = 0.01 # how fast we update the weights based on the gradients
  log_interval: int = 100 # how many batches between logging status
  test_interval: int = 1 # how many epoches to wait before another evaluation test 
  data_root: str = "data" # folder to save MNIST data at (data/mnist-data)
  num_workers: int = 10 # of concurrent processes used to prepare data
  device: str = 'cuda' # device used for training 

In [13]:
# checks for GPU availability and sets up the system for you 
def setup_system(system_config: SystemConfiguration) -> None: 
  torch.manual_seed(system_config.seed)
  if torch.cuda.is_available():
    torch.backends.cudnn_benchmark_enabled = system_config.cudnn_benchmark_enabled
    torch.backends.cudnn.deterministic = system_config.cudnn_deterministic

In [14]:
# training 
def train(
    train_config: TrainingConfiguration, model: nn.Module, optimizer: torch.optim.Optimizer,
    train_loader: torch.utils.data.DataLoader, epoch_idx: int
) -> None: 
  # set model to train mode
  model.train() 
  # for storing loss and accuracy 
  batch_loss = np.array([])
  batch_acc = np.array([])

  for batch_idx, (data, target) in enumerate(train_loader): 
    # clone the target 
    indx_target = target.clone()
    # send data and target to device (for GPU) doesn't hurt if there is not GPU
    data = data.to(train_config.device)
    target = train.to(train_config.device)
    # set gradient to zero 
    optimizier.zero_grad()
    # forward pass 
    output = model(data)
    # cross entropy loss 
    loss = F.cross_entropy(output, target)
    