<a href="https://colab.research.google.com/github/gremlin97/EVA-8/blob/main/S5_Norm/EVA3_S5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from model import Net #Importing `Net` from model.py

In [4]:
!pip install torchsummary

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


## Group Normalization


In [5]:
from torchsummary import summary
norm = 'GN' #Using group norm

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model = Net(norm,num=2).to(device) # Setting number of groups to 2
summary(model, input_size=(1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 26, 26]             144
           Dropout-2           [-1, 16, 26, 26]               0
            Conv2d-3           [-1, 32, 24, 24]           4,608
         MaxPool2d-4           [-1, 32, 12, 12]               0
           Dropout-5           [-1, 32, 12, 12]               0
            Conv2d-6           [-1, 10, 12, 12]             320
            Conv2d-7           [-1, 16, 10, 10]           1,440
           Dropout-8           [-1, 16, 10, 10]               0
            Conv2d-9             [-1, 16, 8, 8]           2,304
          Dropout-10             [-1, 16, 8, 8]               0
           Conv2d-11             [-1, 16, 6, 6]           2,304
AdaptiveAvgPool2d-12             [-1, 16, 1, 1]               0
           Linear-13                   [-1, 10]             170
Total params: 11,290
Trainable params: 



In [8]:
torch.manual_seed(1)
batch_size = 32

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.RandomRotation((-1.0, 1.0), fill=(1,)),
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.RandomRotation((-1.0, 1.0), fill=(1,)),
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)

from tqdm import tqdm
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    correct = 0
    pbar = tqdm(train_loader)
    for batch_idx, (data, target) in enumerate(pbar):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
        correct += pred.eq(target.view_as(pred)).sum().item()
        pbar.set_description(desc= f'loss={loss.item()} batch_id={batch_idx} Train Accuracy={100. * correct / len(train_loader.dataset)}')

def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [10]:
optimizer = optim.SGD(model.parameters(), lr=0.05, momentum=0.9)
scheduler =  optim.lr_scheduler.StepLR(optimizer, step_size=6, gamma=0.3)

for epoch in range(20):
    print("Epoch: ",epoch+1)
    train(model, device, train_loader, optimizer, epoch)
    scheduler.step()
    test(model, device, test_loader)

Epoch:  1


loss=0.22332815825939178 batch_id=1874 Train Accuracy=93.53833333333333: 100%|██████████| 1875/1875 [00:31<00:00, 59.11it/s]



Test set: Average loss: 0.0504, Accuracy: 9825/10000 (98%)

Epoch:  2


loss=0.05006709694862366 batch_id=1874 Train Accuracy=97.80333333333333: 100%|██████████| 1875/1875 [00:31<00:00, 58.82it/s]



Test set: Average loss: 0.0563, Accuracy: 9823/10000 (98%)

Epoch:  3


loss=0.008765462785959244 batch_id=1874 Train Accuracy=98.15333333333334: 100%|██████████| 1875/1875 [00:32<00:00, 58.09it/s]



Test set: Average loss: 0.0324, Accuracy: 9899/10000 (99%)

Epoch:  4


loss=0.00046853782259859145 batch_id=1874 Train Accuracy=98.50333333333333: 100%|██████████| 1875/1875 [00:31<00:00, 59.70it/s]



Test set: Average loss: 0.0349, Accuracy: 9891/10000 (99%)

Epoch:  5


loss=0.029093148186802864 batch_id=1874 Train Accuracy=98.61666666666666: 100%|██████████| 1875/1875 [00:31<00:00, 59.65it/s]



Test set: Average loss: 0.0276, Accuracy: 9912/10000 (99%)

Epoch:  6


loss=0.004565029870718718 batch_id=1874 Train Accuracy=98.75333333333333: 100%|██████████| 1875/1875 [00:31<00:00, 59.63it/s]



Test set: Average loss: 0.0265, Accuracy: 9922/10000 (99%)

Epoch:  7


loss=0.004850512370467186 batch_id=1874 Train Accuracy=99.225: 100%|██████████| 1875/1875 [00:31<00:00, 58.88it/s]



Test set: Average loss: 0.0207, Accuracy: 9929/10000 (99%)

Epoch:  8


loss=0.0006488696089945734 batch_id=1874 Train Accuracy=99.2: 100%|██████████| 1875/1875 [00:31<00:00, 60.33it/s]



Test set: Average loss: 0.0207, Accuracy: 9930/10000 (99%)

Epoch:  9


loss=0.017148269340395927 batch_id=1874 Train Accuracy=99.26666666666667: 100%|██████████| 1875/1875 [00:31<00:00, 59.90it/s]



Test set: Average loss: 0.0210, Accuracy: 9934/10000 (99%)

Epoch:  10


loss=0.006679198704659939 batch_id=1874 Train Accuracy=99.23833333333333: 100%|██████████| 1875/1875 [00:31<00:00, 60.19it/s]



Test set: Average loss: 0.0215, Accuracy: 9926/10000 (99%)

Epoch:  11


loss=0.004357054829597473 batch_id=1874 Train Accuracy=99.27166666666666: 100%|██████████| 1875/1875 [00:31<00:00, 59.76it/s]



Test set: Average loss: 0.0222, Accuracy: 9931/10000 (99%)

Epoch:  12


loss=0.0022511181887239218 batch_id=1874 Train Accuracy=99.28833333333333: 100%|██████████| 1875/1875 [00:32<00:00, 58.34it/s]



Test set: Average loss: 0.0210, Accuracy: 9938/10000 (99%)

Epoch:  13


loss=0.10687175393104553 batch_id=1874 Train Accuracy=99.36333333333333: 100%|██████████| 1875/1875 [00:31<00:00, 60.33it/s]



Test set: Average loss: 0.0191, Accuracy: 9934/10000 (99%)

Epoch:  14


loss=0.035472508519887924 batch_id=1874 Train Accuracy=99.40333333333334: 100%|██████████| 1875/1875 [00:31<00:00, 59.20it/s]



Test set: Average loss: 0.0196, Accuracy: 9933/10000 (99%)

Epoch:  15


loss=0.0017704841447994113 batch_id=1874 Train Accuracy=99.385: 100%|██████████| 1875/1875 [00:33<00:00, 55.17it/s]



Test set: Average loss: 0.0190, Accuracy: 9940/10000 (99%)

Epoch:  16


loss=0.0034431531094014645 batch_id=1874 Train Accuracy=99.41833333333334: 100%|██████████| 1875/1875 [00:33<00:00, 56.66it/s]



Test set: Average loss: 0.0191, Accuracy: 9936/10000 (99%)

Epoch:  17


loss=0.03313163295388222 batch_id=1874 Train Accuracy=99.39333333333333: 100%|██████████| 1875/1875 [00:31<00:00, 59.86it/s]



Test set: Average loss: 0.0187, Accuracy: 9938/10000 (99%)

Epoch:  18


loss=0.0020353845320641994 batch_id=1874 Train Accuracy=99.44166666666666: 100%|██████████| 1875/1875 [00:31<00:00, 59.28it/s]



Test set: Average loss: 0.0184, Accuracy: 9938/10000 (99%)

Epoch:  19


loss=0.1879698783159256 batch_id=1874 Train Accuracy=99.46: 100%|██████████| 1875/1875 [00:31<00:00, 59.49it/s]



Test set: Average loss: 0.0185, Accuracy: 9941/10000 (99%)

Epoch:  20


loss=0.07888749986886978 batch_id=1874 Train Accuracy=99.46: 100%|██████████| 1875/1875 [00:31<00:00, 58.74it/s]



Test set: Average loss: 0.0183, Accuracy: 9945/10000 (99%)



## Layer Normalization

In [11]:
from torchsummary import summary
norm = 'LN' # Using layer norm

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
modelln = Net(norm).to(device)
summary(modelln, input_size=(1, 28, 28))

torch.manual_seed(1)
batch_size = 32

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.RandomRotation((-1.0, 1.0), fill=(1,)),
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.RandomRotation((-1.0, 1.0), fill=(1,)),
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 26, 26]             144
           Dropout-2           [-1, 16, 26, 26]               0
            Conv2d-3           [-1, 32, 24, 24]           4,608
         MaxPool2d-4           [-1, 32, 12, 12]               0
           Dropout-5           [-1, 32, 12, 12]               0
            Conv2d-6           [-1, 10, 12, 12]             320
            Conv2d-7           [-1, 16, 10, 10]           1,440
           Dropout-8           [-1, 16, 10, 10]               0
            Conv2d-9             [-1, 16, 8, 8]           2,304
          Dropout-10             [-1, 16, 8, 8]               0
           Conv2d-11             [-1, 16, 6, 6]           2,304
AdaptiveAvgPool2d-12             [-1, 16, 1, 1]               0
           Linear-13                   [-1, 10]             170
Total params: 11,290
Trainable params: 

In [12]:
optimizer = optim.SGD(modelln.parameters(), lr=0.05, momentum=0.9)
scheduler =  optim.lr_scheduler.StepLR(optimizer, step_size=6, gamma=0.3)

for epoch in range(20):
    print("Epoch: ",epoch+1)
    train(modelln, device, train_loader, optimizer, epoch)
    scheduler.step()
    test(modelln, device, test_loader)

Epoch:  1


loss=0.025291629135608673 batch_id=1874 Train Accuracy=93.86666666666666: 100%|██████████| 1875/1875 [00:31<00:00, 59.87it/s]



Test set: Average loss: 0.0555, Accuracy: 9833/10000 (98%)

Epoch:  2


loss=0.0023441659286618233 batch_id=1874 Train Accuracy=97.8: 100%|██████████| 1875/1875 [00:31<00:00, 58.67it/s]



Test set: Average loss: 0.0408, Accuracy: 9874/10000 (99%)

Epoch:  3


loss=0.03008422814309597 batch_id=1874 Train Accuracy=98.095: 100%|██████████| 1875/1875 [00:31<00:00, 59.25it/s]



Test set: Average loss: 0.0504, Accuracy: 9833/10000 (98%)

Epoch:  4


loss=0.0017019083024933934 batch_id=1874 Train Accuracy=98.48333333333333: 100%|██████████| 1875/1875 [00:32<00:00, 58.08it/s]



Test set: Average loss: 0.0414, Accuracy: 9874/10000 (99%)

Epoch:  5


loss=0.19274817407131195 batch_id=1874 Train Accuracy=98.54: 100%|██████████| 1875/1875 [00:33<00:00, 55.87it/s]



Test set: Average loss: 0.0347, Accuracy: 9887/10000 (99%)

Epoch:  6


loss=0.010232673957943916 batch_id=1874 Train Accuracy=98.69833333333334: 100%|██████████| 1875/1875 [00:32<00:00, 57.13it/s]



Test set: Average loss: 0.0464, Accuracy: 9863/10000 (99%)

Epoch:  7


loss=0.01483902707695961 batch_id=1874 Train Accuracy=99.06833333333333: 100%|██████████| 1875/1875 [00:33<00:00, 56.00it/s]



Test set: Average loss: 0.0230, Accuracy: 9925/10000 (99%)

Epoch:  8


loss=0.015356565825641155 batch_id=1874 Train Accuracy=99.16: 100%|██████████| 1875/1875 [00:34<00:00, 55.12it/s]



Test set: Average loss: 0.0237, Accuracy: 9924/10000 (99%)

Epoch:  9


loss=0.13442949950695038 batch_id=1874 Train Accuracy=99.17: 100%|██████████| 1875/1875 [00:33<00:00, 56.79it/s]



Test set: Average loss: 0.0247, Accuracy: 9920/10000 (99%)

Epoch:  10


loss=0.0006454833201132715 batch_id=1874 Train Accuracy=99.215: 100%|██████████| 1875/1875 [00:33<00:00, 56.59it/s]



Test set: Average loss: 0.0236, Accuracy: 9924/10000 (99%)

Epoch:  11


loss=0.06302684545516968 batch_id=1874 Train Accuracy=99.21666666666667: 100%|██████████| 1875/1875 [00:34<00:00, 54.30it/s]



Test set: Average loss: 0.0227, Accuracy: 9921/10000 (99%)

Epoch:  12


loss=0.020540593191981316 batch_id=1874 Train Accuracy=99.28166666666667: 100%|██████████| 1875/1875 [00:32<00:00, 58.03it/s]



Test set: Average loss: 0.0239, Accuracy: 9920/10000 (99%)

Epoch:  13


loss=0.0006057322025299072 batch_id=1874 Train Accuracy=99.35333333333334: 100%|██████████| 1875/1875 [00:33<00:00, 56.79it/s]



Test set: Average loss: 0.0215, Accuracy: 9931/10000 (99%)

Epoch:  14


loss=0.0023740490432828665 batch_id=1874 Train Accuracy=99.36166666666666: 100%|██████████| 1875/1875 [00:32<00:00, 57.10it/s]



Test set: Average loss: 0.0216, Accuracy: 9930/10000 (99%)

Epoch:  15


loss=0.003524437081068754 batch_id=1874 Train Accuracy=99.34833333333333: 100%|██████████| 1875/1875 [00:32<00:00, 56.85it/s]



Test set: Average loss: 0.0219, Accuracy: 9922/10000 (99%)

Epoch:  16


loss=0.00928098801523447 batch_id=1874 Train Accuracy=99.34333333333333: 100%|██████████| 1875/1875 [00:32<00:00, 56.87it/s]



Test set: Average loss: 0.0217, Accuracy: 9927/10000 (99%)

Epoch:  17


loss=0.0062266080640256405 batch_id=1874 Train Accuracy=99.35833333333333: 100%|██████████| 1875/1875 [00:34<00:00, 55.09it/s]



Test set: Average loss: 0.0221, Accuracy: 9930/10000 (99%)

Epoch:  18


loss=0.05616484582424164 batch_id=1874 Train Accuracy=99.37: 100%|██████████| 1875/1875 [00:32<00:00, 57.43it/s]



Test set: Average loss: 0.0219, Accuracy: 9931/10000 (99%)

Epoch:  19


loss=0.006681879982352257 batch_id=1874 Train Accuracy=99.38333333333334: 100%|██████████| 1875/1875 [00:34<00:00, 55.00it/s]



Test set: Average loss: 0.0214, Accuracy: 9927/10000 (99%)

Epoch:  20


loss=0.008323648013174534 batch_id=1874 Train Accuracy=99.425: 100%|██████████| 1875/1875 [00:33<00:00, 55.36it/s]



Test set: Average loss: 0.0217, Accuracy: 9928/10000 (99%)



## BatchNorm + L1

In [6]:
from torchsummary import summary
norm = 'BN' #Using Batch Norm + L1

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
modelbn = Net(norm).to(device)
summary(modelbn, input_size=(1, 28, 28))

torch.manual_seed(1)
batch_size = 32

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.RandomRotation((-1.0, 1.0), fill=(1,)),
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.RandomRotation((-1.0, 1.0), fill=(1,)),
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 26, 26]             144
           Dropout-2           [-1, 16, 26, 26]               0
            Conv2d-3           [-1, 32, 24, 24]           4,608
         MaxPool2d-4           [-1, 32, 12, 12]               0
           Dropout-5           [-1, 32, 12, 12]               0
            Conv2d-6           [-1, 10, 12, 12]             320
            Conv2d-7           [-1, 16, 10, 10]           1,440
           Dropout-8           [-1, 16, 10, 10]               0
            Conv2d-9             [-1, 16, 8, 8]           2,304
          Dropout-10             [-1, 16, 8, 8]               0
           Conv2d-11             [-1, 16, 6, 6]           2,304
AdaptiveAvgPool2d-12             [-1, 16, 1, 1]               0
           Linear-13                   [-1, 10]             170
Total params: 11,290
Trainable params: 

  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ../data/MNIST/raw/train-images-idx3-ubyte.gz to ../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ../data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ../data/MNIST/raw/train-labels-idx1-ubyte.gz to ../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ../data/MNIST/raw/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw



In [16]:
def trainL1(model, device, train_loader, optimizer, epoch, lambda_l1):
    model.train()
    correct = 0
    pbar = tqdm(train_loader)
    for batch_idx, (data, target) in enumerate(pbar):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        l1=0

        # L1 Norm
        for p in model.parameters():
          l1+=p.abs().sum()

        loss += lambda_l1*l1
        loss.backward()
        optimizer.step()
        pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
        correct += pred.eq(target.view_as(pred)).sum().item()
        pbar.set_description(desc= f'loss={loss.item()} batch_id={batch_idx} Train Accuracy={100. * correct / len(train_loader.dataset)}')

In [None]:
optimizer = optim.SGD(modelbn.parameters(), lr=0.05, momentum=0.9)
scheduler =  optim.lr_scheduler.StepLR(optimizer, step_size=6, gamma=0.3)

for epoch in range(20):
    print("Epoch: ",epoch+1)
    trainL1(modelbn, device, train_loader, optimizer, epoch, 0.005)
    scheduler.step()
    test(modelbn, device, test_loader)

Epoch:  1


  x = F.layer_norm(x, [32, 24, 24])
loss=1.7573672533035278 batch_id=1874 Train Accuracy=87.65166666666667: 100%|██████████| 1875/1875 [00:34<00:00, 53.64it/s]



Test set: Average loss: 0.4410, Accuracy: 8766/10000 (88%)

Epoch:  2


loss=1.4432218074798584 batch_id=1874 Train Accuracy=86.36166666666666: 100%|██████████| 1875/1875 [00:34<00:00, 54.57it/s]



Test set: Average loss: 0.3602, Accuracy: 8848/10000 (88%)

Epoch:  3


loss=1.3738044500350952 batch_id=205 Train Accuracy=9.453333333333333:  11%|█         | 206/1875 [00:03<00:29, 56.31it/s]