<a href="https://colab.research.google.com/github/gremlin97/EVA-8/blob/main/S5/EVA3_S5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from model import Net #Importing `Net` from model.py

In [None]:
!pip install torchsummary

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


## Group Normalization


In [None]:
from torchsummary import summary
norm = 'GN' #Using group norm

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model = Net(norm,num=2).to(device) # Setting number of groups to 2
summary(model, input_size=(1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 26, 26]             144
           Dropout-2           [-1, 16, 26, 26]               0
            Conv2d-3           [-1, 32, 24, 24]           4,608
         MaxPool2d-4           [-1, 32, 12, 12]               0
           Dropout-5           [-1, 32, 12, 12]               0
            Conv2d-6           [-1, 10, 12, 12]             320
            Conv2d-7           [-1, 16, 10, 10]           1,440
           Dropout-8           [-1, 16, 10, 10]               0
            Conv2d-9             [-1, 16, 8, 8]           2,304
          Dropout-10             [-1, 16, 8, 8]               0
           Conv2d-11             [-1, 16, 6, 6]           2,304
AdaptiveAvgPool2d-12             [-1, 16, 1, 1]               0
           Linear-13                   [-1, 10]             170
Total params: 11,290
Trainable params: 

  return F.log_softmax(x)


In [None]:
torch.manual_seed(1)
batch_size = 32

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.RandomRotation((-1.0, 1.0), fill=(1,)),
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.RandomRotation((-1.0, 1.0), fill=(1,)),
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)

from tqdm import tqdm
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    correct = 0
    pbar = tqdm(train_loader)
    for batch_idx, (data, target) in enumerate(pbar):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
        correct += pred.eq(target.view_as(pred)).sum().item()
        pbar.set_description(desc= f'loss={loss.item()} batch_id={batch_idx} Train Accuracy={100. * correct / len(train_loader.dataset)}')

def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [None]:
optimizer = optim.SGD(model.parameters(), lr=0.05, momentum=0.9)
scheduler =  optim.lr_scheduler.StepLR(optimizer, step_size=6, gamma=0.3)

for epoch in range(20):
    print("Epoch: ",epoch+1)
    train(model, device, train_loader, optimizer, epoch)
    scheduler.step()
    test(model, device, test_loader)

Epoch:  1


loss=0.028912212699651718 batch_id=1874 Train Accuracy=94.15333333333334: 100%|██████████| 1875/1875 [00:36<00:00, 51.09it/s]



Test set: Average loss: 0.0543, Accuracy: 9846/10000 (98%)

Epoch:  2


loss=0.017685377970337868 batch_id=1874 Train Accuracy=97.76666666666667: 100%|██████████| 1875/1875 [00:33<00:00, 55.91it/s]



Test set: Average loss: 0.0631, Accuracy: 9805/10000 (98%)

Epoch:  3


loss=0.05805486813187599 batch_id=1874 Train Accuracy=98.21166666666667: 100%|██████████| 1875/1875 [00:34<00:00, 55.14it/s]



Test set: Average loss: 0.0408, Accuracy: 9874/10000 (99%)

Epoch:  4


loss=0.019126158207654953 batch_id=1874 Train Accuracy=98.40833333333333: 100%|██████████| 1875/1875 [00:34<00:00, 54.80it/s]



Test set: Average loss: 0.0402, Accuracy: 9885/10000 (99%)

Epoch:  5


loss=0.23912566900253296 batch_id=1874 Train Accuracy=98.62166666666667: 100%|██████████| 1875/1875 [00:33<00:00, 55.21it/s]



Test set: Average loss: 0.0369, Accuracy: 9883/10000 (99%)

Epoch:  6


loss=0.0004490662249736488 batch_id=1874 Train Accuracy=98.69166666666666: 100%|██████████| 1875/1875 [00:33<00:00, 55.46it/s]



Test set: Average loss: 0.0394, Accuracy: 9896/10000 (99%)

Epoch:  7


loss=0.010531719774007797 batch_id=1874 Train Accuracy=99.11833333333334: 100%|██████████| 1875/1875 [00:33<00:00, 55.40it/s]



Test set: Average loss: 0.0239, Accuracy: 9928/10000 (99%)

Epoch:  8


loss=0.02897053398191929 batch_id=1874 Train Accuracy=99.18333333333334: 100%|██████████| 1875/1875 [00:33<00:00, 55.25it/s]



Test set: Average loss: 0.0258, Accuracy: 9928/10000 (99%)

Epoch:  9


loss=0.06648223847150803 batch_id=1874 Train Accuracy=99.21666666666667: 100%|██████████| 1875/1875 [00:33<00:00, 55.95it/s]



Test set: Average loss: 0.0234, Accuracy: 9927/10000 (99%)

Epoch:  10


loss=0.000542083871550858 batch_id=1874 Train Accuracy=99.21: 100%|██████████| 1875/1875 [00:33<00:00, 56.12it/s]



Test set: Average loss: 0.0236, Accuracy: 9936/10000 (99%)

Epoch:  11


loss=0.1833992302417755 batch_id=1874 Train Accuracy=99.26333333333334: 100%|██████████| 1875/1875 [00:33<00:00, 56.08it/s]



Test set: Average loss: 0.0262, Accuracy: 9923/10000 (99%)

Epoch:  12


loss=0.007731765974313021 batch_id=1874 Train Accuracy=99.24333333333334: 100%|██████████| 1875/1875 [00:33<00:00, 55.79it/s]



Test set: Average loss: 0.0253, Accuracy: 9919/10000 (99%)

Epoch:  13


loss=0.005810629576444626 batch_id=1874 Train Accuracy=99.34333333333333: 100%|██████████| 1875/1875 [00:33<00:00, 55.32it/s]



Test set: Average loss: 0.0230, Accuracy: 9928/10000 (99%)

Epoch:  14


loss=0.005166888236999512 batch_id=1874 Train Accuracy=99.37: 100%|██████████| 1875/1875 [00:33<00:00, 55.80it/s]



Test set: Average loss: 0.0233, Accuracy: 9934/10000 (99%)

Epoch:  15


loss=0.0012356794904917479 batch_id=1874 Train Accuracy=99.335: 100%|██████████| 1875/1875 [00:33<00:00, 56.31it/s]



Test set: Average loss: 0.0226, Accuracy: 9937/10000 (99%)

Epoch:  16


loss=0.009091701358556747 batch_id=1874 Train Accuracy=99.38333333333334: 100%|██████████| 1875/1875 [00:33<00:00, 56.20it/s]



Test set: Average loss: 0.0232, Accuracy: 9932/10000 (99%)

Epoch:  17


loss=0.0016305145109072328 batch_id=1874 Train Accuracy=99.39: 100%|██████████| 1875/1875 [00:33<00:00, 55.31it/s]



Test set: Average loss: 0.0235, Accuracy: 9928/10000 (99%)

Epoch:  18


loss=0.17808642983436584 batch_id=1874 Train Accuracy=99.38666666666667: 100%|██████████| 1875/1875 [00:33<00:00, 56.06it/s]



Test set: Average loss: 0.0232, Accuracy: 9936/10000 (99%)

Epoch:  19


loss=0.00794703047722578 batch_id=1874 Train Accuracy=99.41833333333334: 100%|██████████| 1875/1875 [00:33<00:00, 56.71it/s]



Test set: Average loss: 0.0226, Accuracy: 9935/10000 (99%)

Epoch:  20


loss=0.002426941180601716 batch_id=1874 Train Accuracy=99.445: 100%|██████████| 1875/1875 [00:33<00:00, 56.36it/s]



Test set: Average loss: 0.0227, Accuracy: 9936/10000 (99%)



## Layer Normalization

In [None]:
from torchsummary import summary
norm = 'LN' # Using layer norm

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
modelln = Net(norm).to(device)
summary(modelln, input_size=(1, 28, 28))

torch.manual_seed(1)
batch_size = 32

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.RandomRotation((-1.0, 1.0), fill=(1,)),
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.RandomRotation((-1.0, 1.0), fill=(1,)),
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 26, 26]             144
           Dropout-2           [-1, 16, 26, 26]               0
            Conv2d-3           [-1, 32, 24, 24]           4,608
         MaxPool2d-4           [-1, 32, 12, 12]               0
           Dropout-5           [-1, 32, 12, 12]               0
            Conv2d-6           [-1, 10, 12, 12]             320
            Conv2d-7           [-1, 16, 10, 10]           1,440
           Dropout-8           [-1, 16, 10, 10]               0
            Conv2d-9             [-1, 16, 8, 8]           2,304
          Dropout-10             [-1, 16, 8, 8]               0
           Conv2d-11             [-1, 16, 6, 6]           2,304
AdaptiveAvgPool2d-12             [-1, 16, 1, 1]               0
           Linear-13                   [-1, 10]             170
Total params: 11,290
Trainable params: 

In [None]:
optimizer = optim.SGD(modelln.parameters(), lr=0.05, momentum=0.9)
scheduler =  optim.lr_scheduler.StepLR(optimizer, step_size=6, gamma=0.3)

for epoch in range(20):
    print("Epoch: ",epoch+1)
    train(modelln, device, train_loader, optimizer, epoch)
    scheduler.step()
    test(modelln, device, test_loader)

Epoch:  1


loss=0.0783224031329155 batch_id=1874 Train Accuracy=92.38: 100%|██████████| 1875/1875 [00:32<00:00, 57.97it/s]



Test set: Average loss: 0.0643, Accuracy: 9804/10000 (98%)

Epoch:  2


loss=0.0037191607989370823 batch_id=1874 Train Accuracy=97.45666666666666: 100%|██████████| 1875/1875 [00:32<00:00, 57.88it/s]



Test set: Average loss: 0.0573, Accuracy: 9809/10000 (98%)

Epoch:  3


loss=0.18019768595695496 batch_id=1874 Train Accuracy=97.95166666666667: 100%|██████████| 1875/1875 [00:31<00:00, 58.81it/s]



Test set: Average loss: 0.0508, Accuracy: 9838/10000 (98%)

Epoch:  4


loss=0.008018049411475658 batch_id=1874 Train Accuracy=98.27833333333334: 100%|██████████| 1875/1875 [00:32<00:00, 58.59it/s]



Test set: Average loss: 0.0489, Accuracy: 9845/10000 (98%)

Epoch:  5


loss=0.12866494059562683 batch_id=1874 Train Accuracy=98.45333333333333: 100%|██████████| 1875/1875 [00:32<00:00, 57.41it/s]



Test set: Average loss: 0.0450, Accuracy: 9861/10000 (99%)

Epoch:  6


loss=0.022123798727989197 batch_id=1874 Train Accuracy=98.585: 100%|██████████| 1875/1875 [00:32<00:00, 58.54it/s]



Test set: Average loss: 0.0308, Accuracy: 9910/10000 (99%)

Epoch:  7


loss=0.020123399794101715 batch_id=1874 Train Accuracy=99.04166666666667: 100%|██████████| 1875/1875 [00:32<00:00, 58.51it/s]



Test set: Average loss: 0.0218, Accuracy: 9933/10000 (99%)

Epoch:  8


loss=0.002040498424321413 batch_id=1874 Train Accuracy=99.145: 100%|██████████| 1875/1875 [00:31<00:00, 58.73it/s]



Test set: Average loss: 0.0229, Accuracy: 9930/10000 (99%)

Epoch:  9


loss=0.023770274594426155 batch_id=1874 Train Accuracy=99.2: 100%|██████████| 1875/1875 [00:32<00:00, 57.96it/s]



Test set: Average loss: 0.0222, Accuracy: 9934/10000 (99%)

Epoch:  10


loss=0.0012366658775135875 batch_id=1874 Train Accuracy=99.20833333333333: 100%|██████████| 1875/1875 [00:32<00:00, 57.35it/s]



Test set: Average loss: 0.0217, Accuracy: 9941/10000 (99%)

Epoch:  11


loss=0.23773713409900665 batch_id=1874 Train Accuracy=99.18: 100%|██████████| 1875/1875 [00:32<00:00, 58.24it/s]



Test set: Average loss: 0.0223, Accuracy: 9933/10000 (99%)

Epoch:  12


loss=0.015024788677692413 batch_id=1874 Train Accuracy=99.19166666666666: 100%|██████████| 1875/1875 [00:32<00:00, 58.40it/s]



Test set: Average loss: 0.0226, Accuracy: 9928/10000 (99%)

Epoch:  13


loss=0.0029442692175507545 batch_id=1874 Train Accuracy=99.32166666666667: 100%|██████████| 1875/1875 [00:31<00:00, 58.69it/s]



Test set: Average loss: 0.0205, Accuracy: 9939/10000 (99%)

Epoch:  14


loss=0.005397937726229429 batch_id=1874 Train Accuracy=99.27666666666667: 100%|██████████| 1875/1875 [00:32<00:00, 57.77it/s]



Test set: Average loss: 0.0213, Accuracy: 9938/10000 (99%)

Epoch:  15


loss=0.0019925630185753107 batch_id=1874 Train Accuracy=99.31166666666667: 100%|██████████| 1875/1875 [00:31<00:00, 58.72it/s]



Test set: Average loss: 0.0200, Accuracy: 9939/10000 (99%)

Epoch:  16


loss=0.006370917893946171 batch_id=1874 Train Accuracy=99.32: 100%|██████████| 1875/1875 [00:31<00:00, 59.08it/s]



Test set: Average loss: 0.0205, Accuracy: 9940/10000 (99%)

Epoch:  17


loss=0.016685307025909424 batch_id=1874 Train Accuracy=99.34333333333333: 100%|██████████| 1875/1875 [00:31<00:00, 59.39it/s]



Test set: Average loss: 0.0202, Accuracy: 9939/10000 (99%)

Epoch:  18


loss=0.07780980318784714 batch_id=1874 Train Accuracy=99.37166666666667: 100%|██████████| 1875/1875 [00:31<00:00, 58.71it/s]



Test set: Average loss: 0.0200, Accuracy: 9940/10000 (99%)

Epoch:  19


loss=0.0031574408058077097 batch_id=1874 Train Accuracy=99.40166666666667: 100%|██████████| 1875/1875 [00:32<00:00, 57.83it/s]



Test set: Average loss: 0.0199, Accuracy: 9940/10000 (99%)

Epoch:  20


loss=0.01111519429832697 batch_id=1874 Train Accuracy=99.375: 100%|██████████| 1875/1875 [00:31<00:00, 58.87it/s]



Test set: Average loss: 0.0197, Accuracy: 9941/10000 (99%)



## BatchNorm + L1

In [None]:
from torchsummary import summary
norm = 'BN' #Using Batch Norm + L1

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
modelbn = Net(norm).to(device)
summary(modelbn, input_size=(1, 28, 28))

torch.manual_seed(1)
batch_size = 32

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.RandomRotation((-1.0, 1.0), fill=(1,)),
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.RandomRotation((-1.0, 1.0), fill=(1,)),
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 26, 26]             144
           Dropout-2           [-1, 16, 26, 26]               0
            Conv2d-3           [-1, 32, 24, 24]           4,608
         MaxPool2d-4           [-1, 32, 12, 12]               0
           Dropout-5           [-1, 32, 12, 12]               0
            Conv2d-6           [-1, 10, 12, 12]             320
            Conv2d-7           [-1, 16, 10, 10]           1,440
           Dropout-8           [-1, 16, 10, 10]               0
            Conv2d-9             [-1, 16, 8, 8]           2,304
          Dropout-10             [-1, 16, 8, 8]               0
           Conv2d-11             [-1, 16, 6, 6]           2,304
AdaptiveAvgPool2d-12             [-1, 16, 1, 1]               0
           Linear-13                   [-1, 10]             170
Total params: 11,290
Trainable params: 

  return F.log_softmax(x)


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ../data/MNIST/raw/train-images-idx3-ubyte.gz to ../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ../data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ../data/MNIST/raw/train-labels-idx1-ubyte.gz to ../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ../data/MNIST/raw/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw



In [None]:
# Function to implement L1 regularization
def trainL1(model, device, train_loader, optimizer, epoch, lambda_l1):
    model.train()
    correct = 0
    pbar = tqdm(train_loader)
    for batch_idx, (data, target) in enumerate(pbar):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        l1=0

        # L1 Norm
        for p in model.parameters():
          l1+=p.abs().sum()

        loss += lambda_l1*l1
        loss.backward()
        optimizer.step()
        pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
        correct += pred.eq(target.view_as(pred)).sum().item()
        pbar.set_description(desc= f'loss={loss.item()} batch_id={batch_idx} Train Accuracy={100. * correct / len(train_loader.dataset)}')

In [None]:
optimizer = optim.SGD(modelbn.parameters(), lr=0.05, momentum=0.9)
scheduler =  optim.lr_scheduler.StepLR(optimizer, step_size=6, gamma=0.3)

for epoch in range(20):
    print("Epoch: ",epoch+1)
    trainL1(modelbn, device, train_loader, optimizer, epoch, 0.005)
    scheduler.step()
    test(modelbn, device, test_loader)

Epoch:  1


loss=1.6456964015960693 batch_id=1874 Train Accuracy=87.84166666666667: 100%|██████████| 1875/1875 [00:32<00:00, 58.22it/s]



Test set: Average loss: 0.3638, Accuracy: 8871/10000 (89%)

Epoch:  2


loss=1.6156458854675293 batch_id=1874 Train Accuracy=88.195: 100%|██████████| 1875/1875 [00:33<00:00, 56.53it/s]



Test set: Average loss: 0.3297, Accuracy: 8998/10000 (90%)

Epoch:  3


loss=1.590028166770935 batch_id=1874 Train Accuracy=88.49666666666667: 100%|██████████| 1875/1875 [00:32<00:00, 56.93it/s]



Test set: Average loss: 0.2321, Accuracy: 9324/10000 (93%)

Epoch:  4


loss=1.6520264148712158 batch_id=1874 Train Accuracy=88.18666666666667: 100%|██████████| 1875/1875 [00:33<00:00, 56.08it/s]



Test set: Average loss: 0.4143, Accuracy: 8724/10000 (87%)

Epoch:  5


loss=1.9825154542922974 batch_id=1874 Train Accuracy=88.05833333333334: 100%|██████████| 1875/1875 [00:32<00:00, 57.24it/s]



Test set: Average loss: 0.2800, Accuracy: 9145/10000 (91%)

Epoch:  6


loss=1.6511306762695312 batch_id=1874 Train Accuracy=88.225: 100%|██████████| 1875/1875 [00:36<00:00, 51.87it/s]



Test set: Average loss: 0.2420, Accuracy: 9269/10000 (93%)

Epoch:  7


loss=1.0901055335998535 batch_id=1874 Train Accuracy=91.64333333333333: 100%|██████████| 1875/1875 [00:34<00:00, 54.79it/s]



Test set: Average loss: 0.2353, Accuracy: 9327/10000 (93%)

Epoch:  8


loss=1.152559757232666 batch_id=1874 Train Accuracy=91.52: 100%|██████████| 1875/1875 [00:33<00:00, 56.14it/s]



Test set: Average loss: 0.1869, Accuracy: 9493/10000 (95%)

Epoch:  9


loss=1.2667275667190552 batch_id=1874 Train Accuracy=91.57333333333334: 100%|██████████| 1875/1875 [00:34<00:00, 54.59it/s]



Test set: Average loss: 0.2048, Accuracy: 9432/10000 (94%)

Epoch:  10


loss=1.4236531257629395 batch_id=1874 Train Accuracy=91.61666666666666: 100%|██████████| 1875/1875 [00:35<00:00, 53.09it/s]



Test set: Average loss: 0.2338, Accuracy: 9349/10000 (93%)

Epoch:  11


loss=1.2935616970062256 batch_id=1874 Train Accuracy=91.69666666666667: 100%|██████████| 1875/1875 [00:34<00:00, 54.01it/s]



Test set: Average loss: 0.1901, Accuracy: 9420/10000 (94%)

Epoch:  12


loss=1.3134269714355469 batch_id=1874 Train Accuracy=91.49833333333333: 100%|██████████| 1875/1875 [00:33<00:00, 56.42it/s]



Test set: Average loss: 0.1879, Accuracy: 9497/10000 (95%)

Epoch:  13


loss=0.9267864227294922 batch_id=1874 Train Accuracy=93.74833333333333: 100%|██████████| 1875/1875 [00:33<00:00, 56.24it/s]



Test set: Average loss: 0.1623, Accuracy: 9584/10000 (96%)

Epoch:  14


loss=0.8894071578979492 batch_id=1874 Train Accuracy=93.64: 100%|██████████| 1875/1875 [00:32<00:00, 57.34it/s]



Test set: Average loss: 0.1563, Accuracy: 9555/10000 (96%)

Epoch:  15


loss=0.8622702360153198 batch_id=1874 Train Accuracy=93.57: 100%|██████████| 1875/1875 [00:33<00:00, 56.14it/s]



Test set: Average loss: 0.1464, Accuracy: 9610/10000 (96%)

Epoch:  16


loss=0.9009665250778198 batch_id=1874 Train Accuracy=93.89333333333333: 100%|██████████| 1875/1875 [00:34<00:00, 54.55it/s]



Test set: Average loss: 0.1839, Accuracy: 9495/10000 (95%)

Epoch:  17


loss=0.8687702417373657 batch_id=1874 Train Accuracy=93.595: 100%|██████████| 1875/1875 [00:33<00:00, 55.75it/s]



Test set: Average loss: 0.1670, Accuracy: 9501/10000 (95%)

Epoch:  18


loss=0.9986298680305481 batch_id=1874 Train Accuracy=93.68333333333334: 100%|██████████| 1875/1875 [00:33<00:00, 56.69it/s]



Test set: Average loss: 0.2218, Accuracy: 9342/10000 (93%)

Epoch:  19


loss=0.7846667766571045 batch_id=1874 Train Accuracy=95.23666666666666: 100%|██████████| 1875/1875 [00:33<00:00, 55.84it/s]



Test set: Average loss: 0.1295, Accuracy: 9633/10000 (96%)

Epoch:  20


loss=0.7183787822723389 batch_id=1874 Train Accuracy=95.01166666666667: 100%|██████████| 1875/1875 [00:33<00:00, 55.96it/s]



Test set: Average loss: 0.1202, Accuracy: 9661/10000 (97%)

