<a href="https://colab.research.google.com/github/gremlin97/EVA-8/blob/main/S5_Norm/EVA3_S5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from model import Net

In [2]:
!pip install torchsummary

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [3]:
from torchsummary import summary
norm = 'GN'

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model = Net(norm,num=2).to(device)
summary(model, input_size=(1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 26, 26]             144
           Dropout-2           [-1, 16, 26, 26]               0
            Conv2d-3           [-1, 32, 24, 24]           4,608
         MaxPool2d-4           [-1, 32, 12, 12]               0
           Dropout-5           [-1, 32, 12, 12]               0
            Conv2d-6           [-1, 10, 12, 12]             320
            Conv2d-7           [-1, 16, 10, 10]           1,440
           Dropout-8           [-1, 16, 10, 10]               0
            Conv2d-9             [-1, 16, 8, 8]           2,304
          Dropout-10             [-1, 16, 8, 8]               0
           Conv2d-11             [-1, 16, 6, 6]           2,304
AdaptiveAvgPool2d-12             [-1, 16, 1, 1]               0
           Linear-13                   [-1, 10]             170
Total params: 11,290
Trainable params: 

  return F.log_softmax(x)


In [4]:
torch.manual_seed(1)
batch_size = 32

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.RandomRotation((-1.0, 1.0), fill=(1,)),
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.RandomRotation((-1.0, 1.0), fill=(1,)),
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)

from tqdm import tqdm
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    correct = 0
    pbar = tqdm(train_loader)
    for batch_idx, (data, target) in enumerate(pbar):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
        correct += pred.eq(target.view_as(pred)).sum().item()
        pbar.set_description(desc= f'loss={loss.item()} batch_id={batch_idx} Train Accuracy={100. * correct / len(train_loader.dataset)}')

def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [5]:
optimizer = optim.SGD(model.parameters(), lr=0.05, momentum=0.9)
scheduler =  optim.lr_scheduler.StepLR(optimizer, step_size=6, gamma=0.3)

for epoch in range(20):
    print("Epoch: ",epoch+1)
    train(model, device, train_loader, optimizer, epoch)
    scheduler.step()
    test(model, device, test_loader)

Epoch:  1


loss=0.012216417118906975 batch_id=1874 Train Accuracy=94.21166666666667: 100%|██████████| 1875/1875 [00:34<00:00, 53.84it/s]



Test set: Average loss: 0.0438, Accuracy: 9856/10000 (99%)

Epoch:  2


loss=0.05764397606253624 batch_id=1874 Train Accuracy=97.82: 100%|██████████| 1875/1875 [00:31<00:00, 59.46it/s]



Test set: Average loss: 0.0407, Accuracy: 9874/10000 (99%)

Epoch:  3


loss=0.03556833043694496 batch_id=1874 Train Accuracy=98.15833333333333: 100%|██████████| 1875/1875 [00:31<00:00, 59.68it/s]



Test set: Average loss: 0.0364, Accuracy: 9891/10000 (99%)

Epoch:  4


loss=0.049707114696502686 batch_id=1874 Train Accuracy=98.49666666666667: 100%|██████████| 1875/1875 [00:30<00:00, 60.53it/s]



Test set: Average loss: 0.0401, Accuracy: 9877/10000 (99%)

Epoch:  5


loss=0.27480730414390564 batch_id=1874 Train Accuracy=98.49: 100%|██████████| 1875/1875 [00:31<00:00, 59.49it/s]



Test set: Average loss: 0.0427, Accuracy: 9866/10000 (99%)

Epoch:  6


loss=0.002707191277295351 batch_id=1874 Train Accuracy=98.68833333333333: 100%|██████████| 1875/1875 [00:31<00:00, 58.99it/s]



Test set: Average loss: 0.0295, Accuracy: 9899/10000 (99%)

Epoch:  7


loss=0.09809981286525726 batch_id=1874 Train Accuracy=99.14333333333333: 100%|██████████| 1875/1875 [00:31<00:00, 59.23it/s]



Test set: Average loss: 0.0200, Accuracy: 9930/10000 (99%)

Epoch:  8


loss=0.011426737532019615 batch_id=1874 Train Accuracy=99.19: 100%|██████████| 1875/1875 [00:31<00:00, 60.16it/s]



Test set: Average loss: 0.0210, Accuracy: 9926/10000 (99%)

Epoch:  9


loss=0.027925586327910423 batch_id=1874 Train Accuracy=99.17666666666666: 100%|██████████| 1875/1875 [00:31<00:00, 59.60it/s]



Test set: Average loss: 0.0217, Accuracy: 9929/10000 (99%)

Epoch:  10


loss=0.002690959954634309 batch_id=1874 Train Accuracy=99.205: 100%|██████████| 1875/1875 [00:31<00:00, 59.11it/s]



Test set: Average loss: 0.0207, Accuracy: 9931/10000 (99%)

Epoch:  11


loss=0.044354625046253204 batch_id=1874 Train Accuracy=99.24166666666666: 100%|██████████| 1875/1875 [00:31<00:00, 59.85it/s]



Test set: Average loss: 0.0209, Accuracy: 9925/10000 (99%)

Epoch:  12


loss=0.026062607765197754 batch_id=1874 Train Accuracy=99.28666666666666: 100%|██████████| 1875/1875 [00:31<00:00, 59.76it/s]



Test set: Average loss: 0.0220, Accuracy: 9922/10000 (99%)

Epoch:  13


loss=0.0004870241682510823 batch_id=1874 Train Accuracy=99.35666666666667: 100%|██████████| 1875/1875 [00:31<00:00, 60.13it/s]



Test set: Average loss: 0.0183, Accuracy: 9935/10000 (99%)

Epoch:  14


loss=0.01957286335527897 batch_id=1874 Train Accuracy=99.34666666666666: 100%|██████████| 1875/1875 [00:31<00:00, 59.14it/s]



Test set: Average loss: 0.0199, Accuracy: 9934/10000 (99%)

Epoch:  15


loss=0.02197912707924843 batch_id=1874 Train Accuracy=99.40333333333334: 100%|██████████| 1875/1875 [00:31<00:00, 59.84it/s]



Test set: Average loss: 0.0185, Accuracy: 9935/10000 (99%)

Epoch:  16


loss=0.07489533722400665 batch_id=1874 Train Accuracy=99.38833333333334: 100%|██████████| 1875/1875 [00:31<00:00, 59.33it/s]



Test set: Average loss: 0.0185, Accuracy: 9938/10000 (99%)

Epoch:  17


loss=0.004336618352681398 batch_id=1874 Train Accuracy=99.41166666666666: 100%|██████████| 1875/1875 [00:31<00:00, 58.65it/s]



Test set: Average loss: 0.0190, Accuracy: 9936/10000 (99%)

Epoch:  18


loss=0.12386415153741837 batch_id=1874 Train Accuracy=99.41333333333333: 100%|██████████| 1875/1875 [00:31<00:00, 59.59it/s]



Test set: Average loss: 0.0198, Accuracy: 9934/10000 (99%)

Epoch:  19


loss=0.003052795073017478 batch_id=1874 Train Accuracy=99.40166666666667: 100%|██████████| 1875/1875 [00:31<00:00, 58.87it/s]



Test set: Average loss: 0.0188, Accuracy: 9940/10000 (99%)

Epoch:  20


loss=0.0018444156739860773 batch_id=1874 Train Accuracy=99.465: 100%|██████████| 1875/1875 [00:31<00:00, 60.03it/s]



Test set: Average loss: 0.0186, Accuracy: 9938/10000 (99%)



In [6]:
from torchsummary import summary
norm = 'LN'

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
modelln = Net(norm).to(device)
summary(modelln, input_size=(1, 28, 28))

torch.manual_seed(1)
batch_size = 32

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.RandomRotation((-1.0, 1.0), fill=(1,)),
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.RandomRotation((-1.0, 1.0), fill=(1,)),
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 26, 26]             144
           Dropout-2           [-1, 16, 26, 26]               0
            Conv2d-3           [-1, 32, 24, 24]           4,608
         MaxPool2d-4           [-1, 32, 12, 12]               0
           Dropout-5           [-1, 32, 12, 12]               0
            Conv2d-6           [-1, 10, 12, 12]             320
            Conv2d-7           [-1, 16, 10, 10]           1,440
           Dropout-8           [-1, 16, 10, 10]               0
            Conv2d-9             [-1, 16, 8, 8]           2,304
          Dropout-10             [-1, 16, 8, 8]               0
           Conv2d-11             [-1, 16, 6, 6]           2,304
AdaptiveAvgPool2d-12             [-1, 16, 1, 1]               0
           Linear-13                   [-1, 10]             170
Total params: 11,290
Trainable params: 

In [7]:
optimizer = optim.SGD(modelln.parameters(), lr=0.05, momentum=0.9)
scheduler =  optim.lr_scheduler.StepLR(optimizer, step_size=6, gamma=0.3)

for epoch in range(20):
    print("Epoch: ",epoch+1)
    train(modelln, device, train_loader, optimizer, epoch)
    scheduler.step()
    test(modelln, device, test_loader)

Epoch:  1


loss=0.035572875291109085 batch_id=1874 Train Accuracy=92.53166666666667: 100%|██████████| 1875/1875 [00:30<00:00, 61.62it/s]



Test set: Average loss: 0.0552, Accuracy: 9832/10000 (98%)

Epoch:  2


loss=0.037841737270355225 batch_id=1874 Train Accuracy=97.59833333333333: 100%|██████████| 1875/1875 [00:30<00:00, 61.75it/s]



Test set: Average loss: 0.0503, Accuracy: 9841/10000 (98%)

Epoch:  3


loss=0.23233257234096527 batch_id=1874 Train Accuracy=97.995: 100%|██████████| 1875/1875 [00:31<00:00, 59.62it/s]



Test set: Average loss: 0.0371, Accuracy: 9882/10000 (99%)

Epoch:  4


loss=0.017427071928977966 batch_id=1874 Train Accuracy=98.34166666666667: 100%|██████████| 1875/1875 [00:30<00:00, 60.87it/s]



Test set: Average loss: 0.0426, Accuracy: 9871/10000 (99%)

Epoch:  5


loss=0.17984917759895325 batch_id=1874 Train Accuracy=98.57666666666667: 100%|██████████| 1875/1875 [00:30<00:00, 60.84it/s]



Test set: Average loss: 0.0469, Accuracy: 9860/10000 (99%)

Epoch:  6


loss=0.00697863195091486 batch_id=1874 Train Accuracy=98.64166666666667: 100%|██████████| 1875/1875 [00:31<00:00, 60.45it/s]



Test set: Average loss: 0.0390, Accuracy: 9881/10000 (99%)

Epoch:  7


loss=0.048862215131521225 batch_id=1874 Train Accuracy=99.06833333333333: 100%|██████████| 1875/1875 [00:32<00:00, 58.14it/s]



Test set: Average loss: 0.0204, Accuracy: 9942/10000 (99%)

Epoch:  8


loss=0.0009507077047601342 batch_id=1874 Train Accuracy=99.13666666666667: 100%|██████████| 1875/1875 [00:32<00:00, 58.53it/s]



Test set: Average loss: 0.0240, Accuracy: 9922/10000 (99%)

Epoch:  9


loss=0.054597605019807816 batch_id=1874 Train Accuracy=99.165: 100%|██████████| 1875/1875 [00:30<00:00, 60.68it/s]



Test set: Average loss: 0.0217, Accuracy: 9932/10000 (99%)

Epoch:  10


loss=0.0010814211564138532 batch_id=1874 Train Accuracy=99.215: 100%|██████████| 1875/1875 [00:31<00:00, 60.27it/s]



Test set: Average loss: 0.0213, Accuracy: 9931/10000 (99%)

Epoch:  11


loss=0.12092837691307068 batch_id=1874 Train Accuracy=99.19333333333333: 100%|██████████| 1875/1875 [00:30<00:00, 61.06it/s]



Test set: Average loss: 0.0215, Accuracy: 9931/10000 (99%)

Epoch:  12


loss=0.03656817600131035 batch_id=1874 Train Accuracy=99.205: 100%|██████████| 1875/1875 [00:30<00:00, 60.83it/s]



Test set: Average loss: 0.0236, Accuracy: 9926/10000 (99%)

Epoch:  13


loss=0.005691139027476311 batch_id=1874 Train Accuracy=99.3: 100%|██████████| 1875/1875 [00:31<00:00, 60.31it/s]



Test set: Average loss: 0.0202, Accuracy: 9934/10000 (99%)

Epoch:  14


loss=0.0006967742810957134 batch_id=1874 Train Accuracy=99.33333333333333: 100%|██████████| 1875/1875 [00:31<00:00, 59.60it/s]



Test set: Average loss: 0.0207, Accuracy: 9928/10000 (99%)

Epoch:  15


loss=0.001463337684981525 batch_id=1874 Train Accuracy=99.33833333333334: 100%|██████████| 1875/1875 [00:31<00:00, 60.46it/s]



Test set: Average loss: 0.0199, Accuracy: 9932/10000 (99%)

Epoch:  16


loss=0.007838270626962185 batch_id=1874 Train Accuracy=99.31833333333333: 100%|██████████| 1875/1875 [00:30<00:00, 61.24it/s]



Test set: Average loss: 0.0205, Accuracy: 9931/10000 (99%)

Epoch:  17


loss=0.005796683952212334 batch_id=1874 Train Accuracy=99.35: 100%|██████████| 1875/1875 [00:31<00:00, 60.23it/s]



Test set: Average loss: 0.0195, Accuracy: 9937/10000 (99%)

Epoch:  18


loss=0.040556650608778 batch_id=1874 Train Accuracy=99.37666666666667: 100%|██████████| 1875/1875 [00:30<00:00, 60.93it/s]



Test set: Average loss: 0.0208, Accuracy: 9933/10000 (99%)

Epoch:  19


loss=0.006920449901372194 batch_id=1874 Train Accuracy=99.39666666666666: 100%|██████████| 1875/1875 [00:30<00:00, 60.95it/s]



Test set: Average loss: 0.0196, Accuracy: 9939/10000 (99%)

Epoch:  20


loss=0.010398844256997108 batch_id=1874 Train Accuracy=99.35: 100%|██████████| 1875/1875 [00:31<00:00, 59.55it/s]



Test set: Average loss: 0.0200, Accuracy: 9934/10000 (99%)



In [8]:
from torchsummary import summary
norm = 'BN'
dev = 'cuda:0'

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
modelbn = Net(norm).to(device)
summary(modelbn, input_size=(1, 28, 28))

torch.manual_seed(1)
batch_size = 32

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.RandomRotation((-1.0, 1.0), fill=(1,)),
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.RandomRotation((-1.0, 1.0), fill=(1,)),
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 26, 26]             144
       BatchNorm2d-2           [-1, 16, 26, 26]              32
           Dropout-3           [-1, 16, 26, 26]               0
            Conv2d-4           [-1, 32, 24, 24]           4,608
       BatchNorm2d-5           [-1, 32, 24, 24]              64
         MaxPool2d-6           [-1, 32, 12, 12]               0
           Dropout-7           [-1, 32, 12, 12]               0
            Conv2d-8           [-1, 10, 12, 12]             320
       BatchNorm2d-9           [-1, 10, 12, 12]              20
           Conv2d-10           [-1, 16, 10, 10]           1,440
      BatchNorm2d-11           [-1, 16, 10, 10]              32
          Dropout-12           [-1, 16, 10, 10]               0
           Conv2d-13             [-1, 16, 8, 8]           2,304
      BatchNorm2d-14             [-1, 1

In [9]:
optimizer = optim.SGD(modelbn.parameters(), lr=0.05, momentum=0.9)
scheduler =  optim.lr_scheduler.StepLR(optimizer, step_size=6, gamma=0.3)

for epoch in range(20):
    print("Epoch: ",epoch+1)
    train(modelbn, device, train_loader, optimizer, epoch)
    scheduler.step()
    test(modelbn, device, test_loader)

Epoch:  1


loss=0.015754293650388718 batch_id=1874 Train Accuracy=94.815: 100%|██████████| 1875/1875 [00:30<00:00, 60.75it/s]



Test set: Average loss: 0.0491, Accuracy: 9831/10000 (98%)

Epoch:  2


loss=0.0910787582397461 batch_id=1874 Train Accuracy=97.61333333333333: 100%|██████████| 1875/1875 [00:30<00:00, 60.84it/s]



Test set: Average loss: 0.0624, Accuracy: 9798/10000 (98%)

Epoch:  3


loss=0.11007072776556015 batch_id=1874 Train Accuracy=97.94666666666667: 100%|██████████| 1875/1875 [00:30<00:00, 60.61it/s]



Test set: Average loss: 0.0281, Accuracy: 9912/10000 (99%)

Epoch:  4


loss=0.008179470896720886 batch_id=1874 Train Accuracy=98.24833333333333: 100%|██████████| 1875/1875 [00:31<00:00, 60.23it/s]



Test set: Average loss: 0.0429, Accuracy: 9873/10000 (99%)

Epoch:  5


loss=0.39409905672073364 batch_id=1874 Train Accuracy=98.35333333333334: 100%|██████████| 1875/1875 [00:31<00:00, 58.71it/s]



Test set: Average loss: 0.0419, Accuracy: 9865/10000 (99%)

Epoch:  6


loss=0.021480783820152283 batch_id=1874 Train Accuracy=98.48833333333333: 100%|██████████| 1875/1875 [00:31<00:00, 59.40it/s]



Test set: Average loss: 0.0421, Accuracy: 9865/10000 (99%)

Epoch:  7


loss=0.03809570148587227 batch_id=1874 Train Accuracy=98.99166666666666: 100%|██████████| 1875/1875 [00:31<00:00, 58.87it/s]



Test set: Average loss: 0.0203, Accuracy: 9930/10000 (99%)

Epoch:  8


loss=0.0021808911114931107 batch_id=1874 Train Accuracy=99.11166666666666: 100%|██████████| 1875/1875 [00:31<00:00, 59.84it/s]



Test set: Average loss: 0.0210, Accuracy: 9931/10000 (99%)

Epoch:  9


loss=0.04091348126530647 batch_id=1874 Train Accuracy=99.11833333333334: 100%|██████████| 1875/1875 [00:31<00:00, 59.89it/s]



Test set: Average loss: 0.0213, Accuracy: 9930/10000 (99%)

Epoch:  10


loss=0.0007508621783927083 batch_id=1874 Train Accuracy=99.15333333333334: 100%|██████████| 1875/1875 [00:31<00:00, 59.53it/s]



Test set: Average loss: 0.0190, Accuracy: 9934/10000 (99%)

Epoch:  11


loss=0.025483829900622368 batch_id=1874 Train Accuracy=99.27: 100%|██████████| 1875/1875 [00:32<00:00, 57.25it/s]



Test set: Average loss: 0.0228, Accuracy: 9920/10000 (99%)

Epoch:  12


loss=0.002071930794045329 batch_id=1874 Train Accuracy=99.21333333333334: 100%|██████████| 1875/1875 [00:31<00:00, 59.05it/s]



Test set: Average loss: 0.0201, Accuracy: 9930/10000 (99%)

Epoch:  13


loss=0.004139474593102932 batch_id=1874 Train Accuracy=99.31: 100%|██████████| 1875/1875 [00:31<00:00, 58.62it/s]



Test set: Average loss: 0.0198, Accuracy: 9926/10000 (99%)

Epoch:  14


loss=0.010278650559484959 batch_id=1874 Train Accuracy=99.35166666666667: 100%|██████████| 1875/1875 [00:31<00:00, 58.73it/s]



Test set: Average loss: 0.0199, Accuracy: 9928/10000 (99%)

Epoch:  15


loss=0.001879650168120861 batch_id=1874 Train Accuracy=99.31666666666666: 100%|██████████| 1875/1875 [00:31<00:00, 59.21it/s]



Test set: Average loss: 0.0195, Accuracy: 9930/10000 (99%)

Epoch:  16


loss=0.007332077249884605 batch_id=1874 Train Accuracy=99.38833333333334: 100%|██████████| 1875/1875 [00:32<00:00, 58.07it/s]



Test set: Average loss: 0.0191, Accuracy: 9935/10000 (99%)

Epoch:  17


loss=0.0031787953339517117 batch_id=1874 Train Accuracy=99.34333333333333: 100%|██████████| 1875/1875 [00:31<00:00, 58.93it/s]



Test set: Average loss: 0.0194, Accuracy: 9933/10000 (99%)

Epoch:  18


loss=0.044405605643987656 batch_id=1874 Train Accuracy=99.355: 100%|██████████| 1875/1875 [00:31<00:00, 59.07it/s]



Test set: Average loss: 0.0189, Accuracy: 9938/10000 (99%)

Epoch:  19


loss=0.0038079076912254095 batch_id=1874 Train Accuracy=99.43166666666667: 100%|██████████| 1875/1875 [00:31<00:00, 58.64it/s]



Test set: Average loss: 0.0183, Accuracy: 9939/10000 (99%)

Epoch:  20


loss=0.01800128072500229 batch_id=1874 Train Accuracy=99.39166666666667: 100%|██████████| 1875/1875 [00:31<00:00, 58.90it/s]



Test set: Average loss: 0.0188, Accuracy: 9939/10000 (99%)



In [10]:
# import torch
# import torch.nn as nn
# import torch.nn.functional as F
# import torch.optim as optim
# from torchvision import datasets, transforms

# class Net1(nn.Module):
#     def __init__(self, norm='GN', num=1, device='cuda:0'):
#         super(Net1, self).__init__()
#         self.norm = norm
#         self.num = num
#         self.device = device
#         self.conv1 = nn.Conv2d(1, 16, 3, padding=0, bias=False) # RF:1+(3-1)1=3; ji=1,jo=1; 28x28x1 -> 26x26x16
#         self.bn1 = nn.BatchNorm2d(16)
#         self.drop1 = nn.Dropout(0.1)
#         self.conv3 = nn.Conv2d(16, 32, 3, padding=0, bias=False) # RF:4+(3-1)2=8; ji=2,jo=2; 26x26x16 -> 24x24x16
#         self.bn2 = nn.BatchNorm2d(32)
#         self.pool2 = nn.MaxPool2d(2, 2) # RF:8+(2-1)2=10; ji=2,jo=4; 24x24x16 -> 12x12x16
#         self.drop2 = nn.Dropout(0.1)
#         self.conv5 = nn.Conv2d(32, 10, 1, padding=0, bias=False) # RF:10+(3-1)4=18; ji=4,jo=4; 12x12x16 -> 10x10x16
#         self.bn3 = nn.BatchNorm2d(10) 
#         self.drop3 = nn.Dropout(0.1)
#         self.conv6 = nn.Conv2d(10, 16, 3, padding=0, bias=False) # RF:18+(2-1)4=22; ji=4,jo=8; 10x10x16 -> 16x16x16
#         self.bn4 = nn.BatchNorm2d(16)
#         self.drop4 = nn.Dropout(0.1)
#         self.conv10 = nn.Conv2d(16, 16, 3, padding=0, bias=False) # RF:18+(2-1)4=22; ji=4,jo=8; 16x16x16 -> 14x14x16
#         self.bn10 = nn.BatchNorm2d(16)
#         self.drop10 = nn.Dropout(0.1)
#         self.conv7 = nn.Conv2d(16, 16, 3, padding=0, bias=False) # RF:18+(2-1)4=22; ji=4,jo=8; 5x5x16 -> 3x3x16
#         self.gap = nn.AdaptiveAvgPool2d((1,1)) 

#         self.lin = nn.Linear(16, 10)

#     def forward(self, x):

#         x = F.relu(self.conv1(x))

#         if self.norm == 'BN':
#           n_chans = x.shape[1]
#           running_mean = torch.zeros(n_chans).to(self.device) 
#           running_std = torch.ones(n_chans).to(self.device)
#           print(self.device)
#           print(running_mean)
#           x = F.batch_norm(x, running_mean, running_std, training=True)
#           x = F.relu(self.conv3(self.drop1(x)))
#           n_chans = x.shape[1]
#           running_mean = torch.zeros(n_chans).to(self.device) 
#           running_std = torch.ones(n_chans).to(self.device) 
#           x = F.batch_norm(x, running_mean, running_std, training=True)
#           x = F.relu(self.conv5(self.drop2(self.pool2(x))))
#           n_chans = x.shape[1]
#           running_mean = torch.zeros(n_chans).to(self.device) 
#           running_std = torch.ones(n_chans).to(self.device) 
#           x = F.batch_norm(x, running_mean, running_std, training=True)
#           x = F.relu(self.conv6(x))
#           n_chans = x.shape[1]
#           running_mean = torch.zeros(n_chans).to(self.device)  
#           running_std = torch.ones(n_chans).to(self.device) 
#           x = F.batch_norm(x, running_mean, running_std, training=True)
#           x = F.relu(self.conv10(self.drop4(x)))
#           n_chans = x.shape[1]
#           running_mean = torch.zeros(n_chans).to(self.device)  
#           running_std = torch.ones(n_chans).to(self.device) 
#           x = F.batch_norm(x, running_mean, running_std, training=True)

#         elif self.norm == 'GN':
#           x = F.group_norm(x,self.num)
#           x = F.relu(self.conv3(self.drop1(x)))
#           x = F.group_norm(x, self.num)
#           x = F.relu(self.conv5(self.drop2(self.pool2(x))))
#           x = F.group_norm(x, self.num)
#           x = F.relu(self.conv6(x))
#           x = F.group_norm(x, self.num)
#           x = F.relu(self.conv10(self.drop4(x)))
#           x = F.group_norm(x, self.num)

#         else:
#           x = F.layer_norm(x, [16, 26, 26])
#           x = F.relu(self.conv3(self.drop1(x)))
#           x = F.layer_norm(x, [32, 24, 24])
#           x = F.relu(self.conv5(self.drop2(self.pool2(x))))
#           x = F.layer_norm(x, [10, 12, 12])
#           x = F.relu(self.conv6(x))
#           x = F.layer_norm(x, [16, 10, 10])
#           x = F.relu(self.conv10(self.drop4(x)))
#           x = F.layer_norm(x, [16, 8, 8])

#         x = self.conv7(self.drop10(x))

#         x = self.gap(x)

#         x = x.view(-1, 16)

#         x = self.lin(x)
        
#         return F.log_softmax(x)
