<a href="https://colab.research.google.com/github/jagatabhay/miscellaneous/blob/master/S6.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

from itertools import chain
from inspect import signature
from functools import lru_cache as cache
from collections import defaultdict, namedtuple
#from torch.utils import 

**Target**: create an architecture with less than 10k parameters so accordingly chooses the imput and output channels.Here I have choosen the channels between 10 and 20.

**Analysis**: Adding dropout after max pooling has yielded better results in terms
of Accuracy rather than after each layer

In [0]:
class BatchNorm(nn.BatchNorm2d):
    def __init__(self, num_features, eps=1e-05, momentum=0.1, weight=True, bias=True):
        super().__init__(num_features, eps=eps, momentum=momentum)
        self.weight.data.fill_(1.0)
        self.bias.data.fill_(0.0)
        self.weight.requires_grad = weight
        self.bias.requires_grad = bias
        
class GhostBatchNorm(BatchNorm):
    def __init__(self, num_features, num_splits, **kw):
        super().__init__(num_features, **kw)
        self.num_splits = num_splits
        self.register_buffer('running_mean', torch.zeros(num_features * self.num_splits))
        self.register_buffer('running_var', torch.ones(num_features * self.num_splits))

    def train(self, mode=True):
        if (self.training is True) and (mode is False):  # lazily collate stats when we are going to use them
            self.running_mean = torch.mean(self.running_mean.view(self.num_splits, self.num_features), dim=0).repeat(
                self.num_splits)
            self.running_var = torch.mean(self.running_var.view(self.num_splits, self.num_features), dim=0).repeat(
                self.num_splits)
        return super().train(mode)

    def forward(self, input):
        N, C, H, W = input.shape
        if self.training or not self.track_running_stats:
            return F.batch_norm(
                input.view(-1, C * self.num_splits, H, W), self.running_mean, self.running_var,
                self.weight.repeat(self.num_splits), self.bias.repeat(self.num_splits),
                True, self.momentum, self.eps).view(N, C, H, W)
        else:
            return F.batch_norm(
                input, self.running_mean[:self.num_features], self.running_var[:self.num_features],
                self.weight, self.bias, False, self.momentum, self.eps)

In [0]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        #self.conv1 = nn.Conv2d(1, 32, 3, padding=1) #input -? OUtput? RF
        #self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        #self.pool1 = nn.MaxPool2d(2, 2)
        #self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
        #self.conv4 = nn.Conv2d(128, 256, 3, padding=1)
        #self.pool2 = nn.MaxPool2d(2, 2)
        #self.conv5 = nn.Conv2d(256, 512, 3)
        #self.conv6 = nn.Conv2d(512, 1024, 3)
        #self.conv7 = nn.Conv2d(1024, 10, 3)
        self.conv1 = nn.Sequential(nn.Conv2d(in_channels=1,out_channels=10,kernel_size=(3,3),padding=0),
                                   nn.ReLU(),
                                   #nn.BatchNorm2d(num_features=10),
                                   GhostBatchNorm(10,2)
                                   #nn.Dropout(0.1)
                                   )

        self.conv2 = nn.Sequential(nn.Conv2d(in_channels=10,out_channels=16,kernel_size=(3,3),padding=0),
                                   nn.ReLU(),
                                   #nn.BatchNorm2d(num_features=16),
                                   GhostBatchNorm(16,2)
                                   #nn.Dropout(0.1)
        )
        self.conv3 = nn.Sequential(nn.Conv2d(in_channels=16,out_channels=20,kernel_size=(3,3),padding=0),
                                   nn.ReLU(),
                                   #nn.BatchNorm2d(num_features=20),
                                   GhostBatchNorm(20,2)
                                   #nn.Dropout(0.1)
                                   )                          
        
        self.pool1 =  nn.MaxPool2d(2,2)
         
        
        
        self.conv4 = nn.Sequential(nn.Conv2d(in_channels=20,out_channels=10,kernel_size=(1,1),padding=0),
                                   nn.ReLU(),
                                   #nn.BatchNorm2d(num_features=10),
                                   GhostBatchNorm(10,2),
                                   nn.Dropout(0.1)
                                   ) 
        
        #self.pool2 =  nn.MaxPool2d(2,2)

        self.conv5 = nn.Sequential(nn.Conv2d(in_channels=10,out_channels=20,kernel_size=(3,3),padding=0),
                                   nn.ReLU(),
                                   nn.BatchNorm2d(num_features=20),
                                   GhostBatchNorm(20,2)
                                   #nn.Dropout(0.1)
                                   ) 
        
        self.conv6 = nn.Sequential(nn.Conv2d(in_channels=20,out_channels=16,kernel_size=(3,3),padding=0),
                                   nn.ReLU(),
                                   #nn.BatchNorm2d(num_features=16),
                                   GhostBatchNorm(16,2)
                                   #nn.Dropout(0.1)
                                   ) 
        self.conv7 = nn.Sequential(nn.Conv2d(in_channels=16,out_channels=10,kernel_size=(1,1),padding=0)
                                   #nn.ReLU()
                                   #nn.BatchNorm2d(num_features=10)
                                   #nn.DroupOut2d()
                                   )
        self.gap  =  nn.Sequential(nn.AvgPool2d(kernel_size=7)) 

        
        
        
        self.dropout = nn.Dropout(0.1)

              
        



    def forward(self, x):
        #x = self.pool1(F.relu(self.conv2(F.relu(self.conv1(x)))))
        #x = self.pool2(F.relu(self.conv4(F.relu(self.conv3(x)))))
        #x = F.relu(self.conv6(F.relu(self.conv5(x))))
        #x = F.relu(self.conv7(x))
        #x = x.view(-1, 10)
        x = self.conv1(x)
        x = self.conv2(x)
        
        x = self.conv3(x)
        x = self.pool1(x)
        x = self.dropout(x)
        x = self.conv4(x)
        #x = self.pool2(x)
        x = self.conv5(x)
        x = self.conv6(x)
        
        x = self.conv7(x)
        x = self.gap(x)
        

        x = x.view(-1, 10)
        



        return F.log_softmax(x)

Summary of our model architecture.

1. Used Batch Normalization at every layer and dropout after max pooling layer

2. Used GAP of 7*7 at the end

In [0]:
class Net2(nn.Module):
    def __init__(self):
        super(Net2, self).__init__()
        #self.conv1 = nn.Conv2d(1, 32, 3, padding=1) #input -? OUtput? RF
        #self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        #self.pool1 = nn.MaxPool2d(2, 2)
        #self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
        #self.conv4 = nn.Conv2d(128, 256, 3, padding=1)
        #self.pool2 = nn.MaxPool2d(2, 2)
        #self.conv5 = nn.Conv2d(256, 512, 3)
        #self.conv6 = nn.Conv2d(512, 1024, 3)
        #self.conv7 = nn.Conv2d(1024, 10, 3)
        self.conv1 = nn.Sequential(nn.Conv2d(in_channels=1,out_channels=10,kernel_size=(3,3),padding=0),
                                   nn.ReLU(),
                                   nn.BatchNorm2d(num_features=10),
                                   #GhostBatchNorm(10,2)
                                   #nn.Dropout(0.1)
                                   )

        self.conv2 = nn.Sequential(nn.Conv2d(in_channels=10,out_channels=16,kernel_size=(3,3),padding=0),
                                   nn.ReLU(),
                                   nn.BatchNorm2d(num_features=16),
                                   #GhostBatchNorm(16,2)
                                   #nn.Dropout(0.1)
        )
        self.conv3 = nn.Sequential(nn.Conv2d(in_channels=16,out_channels=20,kernel_size=(3,3),padding=0),
                                   nn.ReLU(),
                                   nn.BatchNorm2d(num_features=20),
                                   #GhostBatchNorm(20,2)
                                   #nn.Dropout(0.1)
                                   )                          
        
        self.pool1 =  nn.MaxPool2d(2,2)
         
        
        
        self.conv4 = nn.Sequential(nn.Conv2d(in_channels=20,out_channels=10,kernel_size=(1,1),padding=0),
                                   nn.ReLU(),
                                   nn.BatchNorm2d(num_features=10),
                                   #GhostBatchNorm(10,2),
                                   nn.Dropout(0.1)
                                   ) 
        
        #self.pool2 =  nn.MaxPool2d(2,2)

        self.conv5 = nn.Sequential(nn.Conv2d(in_channels=10,out_channels=20,kernel_size=(3,3),padding=0),
                                   nn.ReLU(),
                                   nn.BatchNorm2d(num_features=20),
                                   #GhostBatchNorm(20,2)
                                   #nn.Dropout(0.1)
                                   ) 
        
        self.conv6 = nn.Sequential(nn.Conv2d(in_channels=20,out_channels=16,kernel_size=(3,3),padding=0),
                                   nn.ReLU(),
                                   nn.BatchNorm2d(num_features=16),
                                   #GhostBatchNorm(16,2)
                                   #nn.Dropout(0.1)
                                   ) 
        self.conv7 = nn.Sequential(nn.Conv2d(in_channels=16,out_channels=10,kernel_size=(1,1),padding=0)
                                   #nn.ReLU()
                                   #nn.BatchNorm2d(num_features=10)
                                   #nn.DroupOut2d()
                                   )
        self.gap  =  nn.Sequential(nn.AvgPool2d(kernel_size=7)) 

        
        
        
        self.dropout = nn.Dropout(0.1)

              
        



    def forward(self, x):
        #x = self.pool1(F.relu(self.conv2(F.relu(self.conv1(x)))))
        #x = self.pool2(F.relu(self.conv4(F.relu(self.conv3(x)))))
        #x = F.relu(self.conv6(F.relu(self.conv5(x))))
        #x = F.relu(self.conv7(x))
        #x = x.view(-1, 10)
        x = self.conv1(x)
        x = self.conv2(x)
        
        x = self.conv3(x)
        x = self.pool1(x)
        x = self.dropout(x)
        x = self.conv4(x)
        #x = self.pool2(x)
        x = self.conv5(x)
        x = self.conv6(x)
        
        x = self.conv7(x)
        x = self.gap(x)
        

        x = x.view(-1, 10)
        



        return F.log_softmax(x)



In [0]:
def summary_of_model1():
    !pip install torchsummary
    from torchsummary import summary
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    model = Net().to(device)
    summary(model, input_size=(1, 28, 28))
    return model

def summary_of_model2():
    !pip install torchsummary
    from torchsummary import summary
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    model = Net2().to(device)
    summary(model, input_size=(1, 28, 28))
    return model


Apply RamdomRotation to image by 7 degree for getting better accuracy tried 

with other degrees like 5,6 and 10 this was best . 

 converting the image to tensor and normalizing the pixel of image.Used Random Affine image augmentation techniques.

We are doing image Augmentation on the train dataset not on testing dataset.
Used ColorJitter with different combination of parameters as well as resize function of transform but didnot gave better results in terms of accuracy so commented it.

In [0]:


def loading_of_dataset1():
    torch.manual_seed(1)
    batch_size = 256

    kwargs = {'num_workers': 1, 'pin_memory': True} #if use_cuda else {}
    train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                    transform=transforms.Compose([
                        #transforms.Resize((30,30)),                          
                        transforms.RandomRotation((-7.0,7.0),fill=(1,)),
                        transforms.RandomAffine(degrees=10, translate=(0.1,0.1), scale=(0.9, 1.1)),
                        #transforms.ColorJitter(brightness=0.1, contrast=0.1,saturation=0.1,hue=0.1),                          
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)
    test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)
    return train_loader,test_loader  
  
def loading_of_dataset2():
    torch.manual_seed(1)
    batch_size = 128

    kwargs = {'num_workers': 1, 'pin_memory': True} #if use_cuda else {}
    train_loader = torch.utils.data.DataLoader(
       datasets.MNIST('../data', train=True, download=True,
                        transform=transforms.Compose([
                            #transforms.Resize((30,30)),                          
                            transforms.RandomRotation((-7.0,7.0),fill=(1,)),
                            transforms.RandomAffine(degrees=10, translate=(0.1,0.1), scale=(0.9, 1.1)),
                            #transforms.ColorJitter(brightness=0.1, contrast=0.1,saturation=0.1,hue=0.1),                          
                            transforms.ToTensor(),
                            transforms.Normalize((0.1307,), (0.3081,))
                        ])),

        batch_size=batch_size, shuffle=True, **kwargs)
    test_loader = torch.utils.data.DataLoader(
        datasets.MNIST('../data', train=False, transform=transforms.Compose([
                            transforms.ToTensor(),
                            transforms.Normalize((0.1307,), (0.3081,))
                        ])),
        batch_size=batch_size, shuffle=True, **kwargs)   
    return train_loader,test_loader

In [0]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np

In [0]:
from tqdm import tqdm
train_losses=[]
#test_losses=[]
train_acc=[]
test_acc=[]
t_loss=[]
total_loss=[]
total_acc=[]
def train(model, device, train_loader, optimizer, epoch,scheduler,L1lambda=None):
    model.train()
    pbar = tqdm(train_loader)
    correct = 0
    processed = 0
    for batch_idx, (data, target) in enumerate(pbar):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        y_pred = model(data)
        loss = F.nll_loss(y_pred, target)
        #l1_critic=nn.L1Loss(size_average=False)
        #reg_loss= 0
        #for param in model.parameters():
        
         #   reg_loss = reg_loss + l1_critic(param)
        #factor=0.0005  
        #loss=loss + (factor*reg_loss)
        if L1lambda:
           with torch.enable_grad():
             l1_loss=0
             for param in model.parameters():
               l1_loss+=torch.sum(param.abs())
             loss+=L1lambda*l1_loss  
        train_losses.append(loss)
        loss.backward()
        optimizer.step()

        pred=y_pred.argmax(dim=1,keepdim=True)
        correct+=pred.eq(target.view_as(pred)).sum().item()
        processed+=len(data)
        pbar.set_description(desc= f'loss={loss.item()} batch_id={batch_idx} Accuracy={100*correct/processed:0.2f}')
        train_acc.append(100*correct/processed)


def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            #print("predicted",pred)
            #print("target",target)
            #print()
            correct += pred.eq(target.view_as(pred)).sum().item()
            #figure = plt.figure()
            #no_of_images=25
            #count=0
            #for index in range(1,no_of_images+1):
             #    for a in np.arange(25):
             #        if(pred[a]!=target[a]):
             #           plt.subplot(5,5,index)
             #
             #          plt.axis('off')
             #          plt.imshow(data[index].cpu().numpy().squeeze(),cmap='gray_r')
             #           count=count+1

              #          if(count==25):
              #            break
            #   
            #       plt.imshow(data[index].numpy(),cmap='gray_r')


    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(test_loss, correct, len(test_loader.dataset),100. * correct / len(test_loader.dataset)))
    t_loss.append(test_loss)
    test_acc.append(100. * correct / len(test_loader.dataset))




Using epoch of 15.Using StepLR function from lr schedular module using step_size of 5 is giving better accuracy compared to 4 and 6.This is basically done by Hit and Trial.

In [0]:
def misclassified_images():
    train_loader,test_loader=loading_of_dataset1()
    dataiter = iter(test_loader)
    #data,target = dataiter.next()
    #data, target = data.to(device), target.to(device)
    #output = model(data)
    #test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
    #pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
    #print(pred)
    #print()
    #correct += pred.eq(target.view_as(pred)).sum().item()
    figure = plt.figure()
    plt.title('Misclassified images when using GBN')
    #no_of_images=25
    count=0
    print("with GBN misclassified image")
    while(count<25):
          data,target = dataiter.next()
          device = 'cuda' if torch.cuda.is_available() else 'cpu'
          model=summary_of_model1()
          model_save_name = 'S6_Assignment_Misclasssified_GBN.pt'
          path = F"/content/gdrive/My Drive/{model_save_name}"
          model.load_state_dict(torch.load(path))
          model.eval()
          data, target = data.to(device), target.to(device)
          output = model(data)
          #test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
          pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
          
          for a in range(256):
              if(pred[a]!=target[a]):
                 #print("pred",pred[a])
                 #print("targ",target[a])
                 plt.subplot(5,5,count+1)
                 plt.axis('off')
                 
                 plt.imshow(data[a].cpu().numpy().squeeze(),cmap='gray_r')
                 count=count+1

              if(count==25):
                  break


Findong 25 misclassified images using Batch Normalization in the model

In [0]:
def misclassified_images_BN():
    train_loader,test_loader=loading_of_dataset2()
    dataiter = iter(test_loader)
    #data,target = dataiter.next()
    #data, target = data.to(device), target.to(device)
    #output = model(data)
    #test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
    #pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
    #print(pred)
    #print()
    #correct += pred.eq(target.view_as(pred)).sum().item()
    figure = plt.figure()
    plt.title('Misclassified images when using BN')
    #no_of_images=25
    count=0
    print("with BN misclassified image")
   
    while(count<25):
          data,target = dataiter.next()
          device = 'cuda' if torch.cuda.is_available() else 'cpu'
          model=summary_of_model2()
          model_save_name = 'S6_Assignment_Misclasssified_BN.pt'
          path = F"/content/gdrive/My Drive/{model_save_name}"
          model.load_state_dict(torch.load(path))
          model.eval()
          data, target = data.to(device), target.to(device)
          output = model(data)
          #test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
          pred = output.argmax(dim=1, keepdim=True)
            # get the index of the max log-probability

          
          for a in range(128):
              if(pred[a]!=target[a]):
                 #print("pred",pred[a])
                 #print("targ",target[a])
                 plt.subplot(5,5,count+1)
                 plt.axis('off')
                 
                 plt.imshow(data[a].cpu().numpy().squeeze(),cmap='gray_r')
                 count=count+1

              if(count==25):
                  break



**Target:**
1. Getting an accuracy of 99.4 with less than 10k parametrs under 15 epochs.
Results:
1. Parameters: 9736 
2. Best Train Accuracy:98.09
3. Best Test Accuracy: 99.45

**Analysis:**

1. The model is doing pretty well not overfitting as well as getting consistent accuracy.

2. Seeing image sample ,we get to know we can add slight rotation for better accuracy.
3. Doing image augmentation and choosing the step size of LR schedular wisely can improve the accuracy.

4. Adding too many image augmentation techniques work in a negative way reduces the accuracy or have not much impact

In [0]:
from torch.optim.lr_scheduler import StepLR

def l1_l2_with_GBN():
    global t_loss
    global test_acc
    device= 'cuda' if torch.cuda.is_available() else 'cpu'
    #model = Net().to(device)
    model=summary_of_model1()
    train_loader,test_loader=loading_of_dataset1()
    optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9,weight_decay=1e-5)
    scheduler= StepLR(optimizer,step_size=5,gamma=0.1)
    print("....L1_L2_GBN...")
    for epoch in range(1,25):
        train(model, device, train_loader, optimizer, epoch,scheduler,L1lambda=1e-5)
        test(model, device, test_loader)
    total_loss.extend([t_loss])
    t_loss=[]
    total_acc.extend([test_acc])
    test_acc=[]   

def l1_with_GBN():

    global t_loss
    global test_acc
    device= 'cuda' if torch.cuda.is_available() else 'cpu'
    #model=Net().to(device)
    model=summary_of_model1()
    train_loader,test_loader=loading_of_dataset1()
    optimizer = optim.SGD(model.parameters(),lr=0.01,momentum=0.9)
    scheduler= StepLR(optimizer,step_size=5,gamma=0.1)
    print("...L1_with_GBN..")
    for epoch in range(1, 25):
        train(model, device, train_loader, optimizer, epoch,scheduler,L1lambda=1e-5)
        test(model, device, test_loader)
    total_loss.extend([t_loss])
    t_loss=[]
    total_acc.extend([test_acc])
    test_acc=[]
def l2_with_GBN():
    global t_loss
    global test_acc
    device= 'cuda' if torch.cuda.is_available() else 'cpu'
    #model=Net().to(device)
    model=summary_of_model1()
    train_loader,test_loader=loading_of_dataset1()
    optimizer = optim.SGD(model.parameters(),lr=0.01,momentum=0.9,weight_decay=1e-5)
    scheduler= StepLR(optimizer,step_size=5,gamma=0.1)
    print("...L2_with_GBN..")
    for epoch in range(1,25):
        train(model, device, train_loader, optimizer, epoch,scheduler)
        test(model, device, test_loader)
    total_loss.extend([t_loss])
    t_loss=[]
    total_acc.extend([test_acc])
    test_acc=[]    

def without_l1_l2_GBN():
    global t_loss
    global test_acc
     
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    #model=Net().to(device)
    model=summary_of_model1()
    train_loader,test_loader=loading_of_dataset1()
    optimizer = optim.SGD(model.parameters(),lr=0.01,momentum=0.9)
    scheduler= StepLR(optimizer,step_size=5,gamma=0.1)
    print("...without_L1_L2_GBN..")
    for epoch in range(1, 25):
        train(model, device, train_loader, optimizer, epoch,scheduler)
        test(model, device, test_loader)
    
        
    total_loss.extend([t_loss])
    t_loss=[]
    total_acc.extend([test_acc])
    test_acc=[]
def BN_without_L1_L2():
    global t_loss
    global test_acc
    device= 'cuda' if torch.cuda.is_available() else 'cpu'
    #model=Net2().to(device)
    model=summary_of_model2()
    train_loader,test_loader=loading_of_dataset2()
    optimizer = optim.SGD(model.parameters(),lr=0.01,momentum=0.9)
    scheduler= StepLR(optimizer,step_size=5,gamma=0.1)
    print("...without_L1_L2_BN..")
    for epoch in range(1,25):
        train(model, device, train_loader, optimizer, epoch,scheduler)
        test(model, device, test_loader)
    #misclassified_images_BN() 
    #torch.save(model,"a.pt")
    total_loss.extend([t_loss])
    t_loss=[]
    total_acc.extend([test_acc])
    test_acc=[]
def l1_with_BN():
    global t_loss
    global test_acc
    device= 'cuda' if torch.cuda.is_available() else 'cpu'
    #model=Net2().to(device)
    model=summary_of_model2()
    train_loader,test_loader=loading_of_dataset2()
    optimizer = optim.SGD(model.parameters(),lr=0.01,momentum=0.9)
    scheduler= StepLR(optimizer,step_size=5,gamma=0.1)
    print("...L1_with_BN..")
    for epoch in range(1,25):
        train(model, device, train_loader, optimizer, epoch,scheduler,L1lambda=1e-5)
        test(model, device, test_loader)
    total_loss.extend([t_loss])
    t_loss=[]
    total_acc.extend([test_acc])
    test_acc=[]
def l1_l2_with_BN():
    global t_loss
    global test_acc
    device= 'cuda' if torch.cuda.is_available() else 'cpu'
    #model = Net2().to(device)
    model=summary_of_model2()
    train_loader,test_loader=loading_of_dataset2()
    optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9,weight_decay=1e-5)
    scheduler= StepLR(optimizer,step_size=5,gamma=0.1)
    print("....L1_L2_BN...")
    for epoch in range(1,25):
        train(model, device, train_loader, optimizer, epoch,scheduler,L1lambda=1e-5)
        test(model, device, test_loader)
    total_loss.extend([t_loss])
    t_loss=[]
    total_acc.extend([test_acc])
    test_acc=[]    
def l2_with_BN():
    global t_loss
    global test_acc
    device= 'cuda' if torch.cuda.is_available() else 'cpu'
    #model=Net2().to(device)
    model=summary_of_model2()
    train_loader,test_loader=loading_of_dataset2()
    optimizer = optim.SGD(model.parameters(),lr=0.01,momentum=0.9,weight_decay=1e-5)
    scheduler= StepLR(optimizer,step_size=5,gamma=0.1)
    print("...L2_with_BN..")
    for epoch in range(1,25):
        train(model, device, train_loader, optimizer, epoch,scheduler)
        test(model, device, test_loader)
    total_loss.extend([t_loss])
    t_loss=[]
    total_acc.extend([test_acc])
    test_acc=[]


    
#l1_critic=nn.L1Loss(size_average=False)
#reg_loss= 0
#for param in model.parameters():
 #   print(param)
 #   reg_loss = reg_loss + l1_critic(param)
#factor=0.0005  
#loss=loss + (factor*reg_loss)

#for epoch in range(1, 5):
 #   train(model, device, train_loader, optimizer, epoch,scheduler,L1lambda=1e-5)
  #  test(model, device, test_loader)

In [0]:
l1_l2_with_GBN()
l1_with_GBN()
l2_with_GBN()
without_l1_l2_GBN()
BN_without_L1_L2()
l1_with_BN()
l2_with_BN()
l1_l2_with_BN()





In [0]:
misclassified_images()

In [0]:
misclassified_images_BN()

In [0]:
from google.colab import drive
drive.mount('/content/gdrive')

!ls /content/gdrive/My Drive

model_save_name = 'S6_Assignment_Misclasssified_GBN.pt'
path = F"/content/gdrive/My Drive/{model_save_name}" 
model = summary_of_model1()
torch.save(model.state_dict(), path)
print("Saving Complete")

In [0]:
from google.colab import drive
drive.mount('/content/gdrive')

model_save_name = 'S6_Assignment_Misclasssified_BN.pt'
path = F"/content/gdrive/My Drive/{model_save_name}" 
model = summary_of_model2()
torch.save(model.state_dict(), path)
print("Saving Complete")

In [0]:
total_loss

In [0]:
plt.figure(figsize=(20,12))
plt.plot(total_loss[0],color='Green',label='L1_L2_with_GBN')
plt.plot(total_loss[1],color='Red',label='L1_with_GBN')
plt.plot(total_loss[2],color='Blue',label='L2_with_GBN')
plt.plot(total_loss[3],color='Yellow',label='Without_L1_L2_with_GBN')
plt.plot(total_loss[4],c='Black',marker = 's' , ls = '--' , label = 'without_L1_L2_with_BN' )
plt.plot(total_loss[6],c='Magenta', marker = 'D' , ls = '-' , label = 'with_L2_with_BN' )
plt.plot(total_loss[5],c='Cyan' , marker = 'o' , ls = '-.' , label = 'with_L1_with_BN')
plt.plot(total_loss[7],c='Grey' , marker = '1' , ls = '-.' , label = 'with_L1_L2_with_BN')
plt.xlabel('Total Epoch ')
plt.ylabel(' Loss ')
plt.legend(loc = 'upper left' , bbox_to_anchor = (1,1))

plt.title('Total losses vs Epochs')
plt.legend()
plt.ylabel('Total losses')
plt.xlabel('epochs')
plt.show()

In [0]:
plt.figure(figsize=(20,12))
plt.plot(total_acc[0],color='Green',label='L1_L2_with_GBN')
plt.plot(total_acc[1],color='Red',label='L1_with_GBN')
plt.plot(total_acc[2],color='Blue',label='L2_with_GBN')
plt.plot(total_acc[3],color='Yellow',label='Without_L1_L2_with_GBN')
plt.plot(total_acc[4],c='Black',marker = 's' , ls = '--' , label = 'without_L1_L2_with_BN' )
plt.plot(total_acc[6],c='Magenta', marker = 'D' , ls = '-' , label = 'with_L2_with_BN' )
plt.plot(total_acc[5],c='Cyan' , marker = 'o' , ls = '-.' , label = 'with_L1_with_BN')
plt.plot(total_acc[7],c='Grey' , marker = '1' , ls = '-.' , label = 'with_L1_L2_with_BN')
plt.xlabel('Total Epoch ')
plt.ylabel(' Accuracy ')
plt.legend(loc = 'upper left' , bbox_to_anchor = (1,1))

plt.title('Total testing Accuracy vs Epochs')
plt.legend()
plt.ylabel('Total testing accuracy')
plt.xlabel('epochs')
plt.show()