<a href="https://colab.research.google.com/github/bala1802/ERA/blob/main/Part-2/S6.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import random_split
import matplotlib.pyplot as plt

In [2]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Sequential(
                        nn.Conv2d(in_channels=1, out_channels=4, kernel_size=3, padding=1),
                        nn.BatchNorm2d(4),
                        nn.ReLU(inplace=True),
                        nn.MaxPool2d(kernel_size=2, stride=2))
        self.conv2 = nn.Sequential(
                        nn.Conv2d(in_channels=4, out_channels=16, kernel_size=3, padding=1),
                        nn.BatchNorm2d(16),
                        nn.ReLU(inplace=True),
                        nn.MaxPool2d(kernel_size=2, stride=2))
        self.conv3 = nn.Sequential(
                        nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, padding=1),
                        nn.BatchNorm2d(32),
                        nn.ReLU(inplace=True),
                        nn.MaxPool2d(kernel_size=2, stride=2))
        self.conv4 = nn.Sequential(
                        nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1),
                        nn.BatchNorm2d(64),
                        nn.ReLU(inplace=True))
        self.global_avg_pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Sequential(
                        nn.Dropout(p=0.2),
                        nn.Linear(in_features=64, out_features=10))
       
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        if x.dim() == 2:
          x = x.unsqueeze(2).unsqueeze(3)
        elif x.dim() == 3:
            x = x.unsqueeze(0)
        x = self.global_avg_pool(x)
        x = x.view((x.shape[0],-1))
        x = self.fc(x)
        x = F.log_softmax(x, dim=1)
        return x

In [3]:
model = Net()
num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print("Number of parameters in the model: {}".format(num_params))

Number of parameters in the model: 24650


In [4]:
# !pip install torchsummary
from torchsummary import summary
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model = Net().to(device)
summary(model, input_size=(1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 4, 28, 28]              40
       BatchNorm2d-2            [-1, 4, 28, 28]               8
              ReLU-3            [-1, 4, 28, 28]               0
         MaxPool2d-4            [-1, 4, 14, 14]               0
            Conv2d-5           [-1, 16, 14, 14]             592
       BatchNorm2d-6           [-1, 16, 14, 14]              32
              ReLU-7           [-1, 16, 14, 14]               0
         MaxPool2d-8             [-1, 16, 7, 7]               0
            Conv2d-9             [-1, 32, 7, 7]           4,640
      BatchNorm2d-10             [-1, 32, 7, 7]              64
             ReLU-11             [-1, 32, 7, 7]               0
        MaxPool2d-12             [-1, 32, 3, 3]               0
           Conv2d-13             [-1, 64, 3, 3]          18,496
      BatchNorm2d-14             [-1, 6

In [5]:
torch.manual_seed(1)
batch_size = 128
kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}

In [6]:
train_dataset = datasets.MNIST('../data', train=True, download=True,  
                    transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ]))
test_dataset = datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ]))

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ../data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 148061649.73it/s]


Extracting ../data/MNIST/raw/train-images-idx3-ubyte.gz to ../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ../data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 102919026.19it/s]


Extracting ../data/MNIST/raw/train-labels-idx1-ubyte.gz to ../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 34754096.32it/s]


Extracting ../data/MNIST/raw/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 26606883.75it/s]


Extracting ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw



In [7]:
#Divide the train_dataset into `train_dataset` and `validation_dataset`
train_dataset_size = int(0.8 * len(train_dataset))
validation_dataset_size = len(train_dataset) - train_dataset_size

train_dataset, val_dataset = random_split(train_dataset, [train_dataset_size, validation_dataset_size])

In [8]:
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, **kwargs)
validation_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(test_dataset,batch_size=batch_size, shuffle=True, **kwargs)

In [9]:
len(train_loader), len(validation_loader), len(test_loader)

(375, 94, 79)

In [10]:
len(train_dataset), len(val_dataset), len(test_dataset)

(48000, 12000, 10000)

In [11]:
import time
import copy

# Some initialization work first...
epochs = 19
train_losses, val_losses = [], []
train_accu, val_accu = [], []
start_time = time.time()
early_stop_counter = 10   # stop when the validation loss does not improve for 10 iterations to prevent overfitting
counter = 0
best_val_loss = float('Inf')

In [12]:
model = Net()
model.to(device)
criterion = nn.NLLLoss()   # with log_softmax() as the last layer, this is equivalent to cross entropy loss
# optimizer = torch.optim.Adam(model.parameters(), lr=1e-2)
optimizer = torch.optim.Adam(model.parameters(), lr=0.1)

In [13]:
for e in range(epochs):
  
  epoch_start_time = time.time()
  running_loss = 0
  accuracy=0
  # training step
  model.train()

  for batch_index, (images, labels) in enumerate(train_loader):
    images = images.to(device)
    labels = labels.to(device)
    optimizer.zero_grad()
    log_ps = model(images)

    ps = torch.exp(log_ps)
    top_p, top_class = ps.topk(1, dim=1)
    equals = top_class == labels.view(*top_class.shape)

    accuracy += torch.mean(equals.type(torch.FloatTensor))
    loss = criterion(log_ps, labels)
    loss.backward()
    optimizer.step()

    running_loss += loss.item()

  train_losses.append(running_loss/len(train_loader))
  train_accu.append(accuracy/len(train_loader))

# Validation
  val_loss = 0
  accuracy=0
  model.eval()

  with torch.no_grad():
    for batch_index, (images, labels) in enumerate(validation_loader):
      images = images.to(device)
      labels = labels.to(device)
      log_ps = model(images)
      val_loss += criterion(log_ps, labels)

      ps = torch.exp(log_ps)
      top_p, top_class = ps.topk(1, dim=1)
      equals = top_class == labels.view(*top_class.shape)
      accuracy += torch.mean(equals.type(torch.FloatTensor))

    val_losses.append(val_loss/len(validation_loader))
    val_accu.append(accuracy/len(validation_loader))

    print("Epoch: {}/{}.. ".format(e+1, epochs), "Time: {:.2f}s..".format(time.time()-epoch_start_time), "Training Loss: {:.3f}.. ".format(train_losses[-1]),
          "Training Accu: {:.3f}.. ".format(train_accu[-1]), "Val Loss: {:.3f}.. ".format(val_losses[-1]), "Val Accu: {:.3f}".format(val_accu[-1]))
    
    if val_losses[-1] < best_val_loss:
        best_val_loss = val_losses[-1]
        counter=0
        best_model_wts = copy.deepcopy(model.state_dict())
    else:
        counter+=1
        print('Validation loss has not improved since: {:.3f}..'.format(best_val_loss), 'Count: ', str(counter))
        if counter >= early_stop_counter:
            print('Early Stopping Now!!!!')
            model.load_state_dict(best_model_wts)
            break

Epoch: 1/19..  Time: 17.44s.. Training Loss: 0.333..  Training Accu: 0.893..  Val Loss: 0.085..  Val Accu: 0.971
Epoch: 2/19..  Time: 17.43s.. Training Loss: 0.090..  Training Accu: 0.973..  Val Loss: 0.108..  Val Accu: 0.969
Validation loss has not improved since: 0.085.. Count:  1
Epoch: 3/19..  Time: 16.70s.. Training Loss: 0.074..  Training Accu: 0.978..  Val Loss: 0.070..  Val Accu: 0.980
Epoch: 4/19..  Time: 16.71s.. Training Loss: 0.065..  Training Accu: 0.981..  Val Loss: 0.078..  Val Accu: 0.977
Validation loss has not improved since: 0.070.. Count:  1
Epoch: 5/19..  Time: 16.68s.. Training Loss: 0.064..  Training Accu: 0.981..  Val Loss: 0.123..  Val Accu: 0.967
Validation loss has not improved since: 0.070.. Count:  2
Epoch: 6/19..  Time: 17.79s.. Training Loss: 0.058..  Training Accu: 0.983..  Val Loss: 0.055..  Val Accu: 0.983
Epoch: 7/19..  Time: 16.76s.. Training Loss: 0.055..  Training Accu: 0.983..  Val Loss: 0.094..  Val Accu: 0.975
Validation loss has not improved si