In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchsummary import summary
from torchvision import datasets, transforms
import torch.optim as optim
from tqdm import tqdm

In [2]:
device= torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
train_transforms= transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.49139968,0.48215841,0.44653091),(0.24703223,0.24348513,0.26158784))
])

test_transforms= transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.49139968,0.48215841,0.44653091),(0.24703223,0.24348513,0.26158784,))
])

In [4]:
train_data= datasets.CIFAR10(root= '../data', train= True, download= True, transform= train_transforms)
test_data= datasets.CIFAR10(root= '../data', train= False, download= True, transform= test_transforms)

Files already downloaded and verified
Files already downloaded and verified


In [5]:
dataloader_args= dict(shuffle= True, batch_size= 128, num_workers= 4, pin_memory= True) if torch.cuda.is_available() else dict(shuffle= True, batch_size= 64)

In [6]:
train_loader= torch.utils.data.DataLoader(train_data, **dataloader_args)
test_loader= torch.utils.data.DataLoader(test_data, **dataloader_args)

In [7]:
# print(train_data.data.shape)
# print(train_data.data.mean(axis=(0,1,2))/255)
# print(train_data.data.std(axis=(0,1,2))/255)

# (50000, 32, 32, 3)
# [0.49139968 0.48215841 0.44653091]
# [0.24703223 0.24348513 0.26158784]

In [8]:
from model import Model_batch_norm,Model_group_norm,Model_layer_norm

In [12]:
model_b= Model_batch_norm().to(device)
summary(model_b, input_size=(3,32,32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 32, 32]             432
              ReLU-2           [-1, 16, 32, 32]               0
       BatchNorm2d-3           [-1, 16, 32, 32]              32
            Conv2d-4           [-1, 32, 32, 32]           4,608
              ReLU-5           [-1, 32, 32, 32]               0
       BatchNorm2d-6           [-1, 32, 32, 32]              64
            Conv2d-7           [-1, 16, 34, 34]             512
              ReLU-8           [-1, 16, 34, 34]               0
       BatchNorm2d-9           [-1, 16, 34, 34]              32
        MaxPool2d-10           [-1, 16, 17, 17]               0
           Conv2d-11           [-1, 32, 17, 17]           4,608
             ReLU-12           [-1, 32, 17, 17]               0
      BatchNorm2d-13           [-1, 32, 17, 17]              64
           Conv2d-14           [-1, 32,

  return F.log_softmax(x)


In [11]:
from train_test import train, test

In [14]:
epochs=5
optimizer= optim.SGD(model_b.parameters(), lr=0.01, momentum=0.9)
for i in range(1,epochs+1):
    print("EPOCH:", i)
    train(model_b, device, train_loader, optimizer)
    test(model_b, device, test_loader)

EPOCH: 1


  return F.log_softmax(x)
Loss=1.496031403541565 Accuracy=51.48: 100%|██████████| 782/782 [01:47<00:00,  7.26it/s] 



Test set: Average loss: 1.1378, Accuracy: 6026/10000 (60.26%)

EPOCH: 2


Loss=0.6969391107559204 Accuracy=67.17: 100%|██████████| 782/782 [01:49<00:00,  7.11it/s]



Test set: Average loss: 0.8992, Accuracy: 6830/10000 (68.30%)

EPOCH: 3


Loss=1.200422763824463 Accuracy=72.73: 100%|██████████| 782/782 [01:51<00:00,  7.03it/s] 



Test set: Average loss: 0.7480, Accuracy: 7409/10000 (74.09%)

EPOCH: 4


Loss=0.8028671145439148 Accuracy=75.71: 100%|██████████| 782/782 [01:54<00:00,  6.84it/s] 



Test set: Average loss: 0.7092, Accuracy: 7568/10000 (75.68%)

EPOCH: 5


Loss=0.4368749260902405 Accuracy=77.97: 100%|██████████| 782/782 [01:53<00:00,  6.91it/s] 



Test set: Average loss: 0.6828, Accuracy: 7655/10000 (76.55%)



In [16]:
model_g= Model_group_norm().to(device)
summary(model_g, input_size=(3,32,32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 32, 32]             432
              ReLU-2           [-1, 16, 32, 32]               0
         GroupNorm-3           [-1, 16, 32, 32]              32
            Conv2d-4           [-1, 32, 32, 32]           4,608
              ReLU-5           [-1, 32, 32, 32]               0
         GroupNorm-6           [-1, 32, 32, 32]              64
            Conv2d-7           [-1, 16, 34, 34]             512
              ReLU-8           [-1, 16, 34, 34]               0
         GroupNorm-9           [-1, 16, 34, 34]              32
        MaxPool2d-10           [-1, 16, 17, 17]               0
           Conv2d-11           [-1, 32, 17, 17]           4,608
             ReLU-12           [-1, 32, 17, 17]               0
        GroupNorm-13           [-1, 32, 17, 17]              64
           Conv2d-14           [-1, 32,

  return F.log_softmax(x)


In [19]:
epochs=5
optimizer= optim.SGD(model_g.parameters(), lr=0.01, momentum=0.9)
for i in range(1,epochs+1):
    print("EPOCH:", i)
    train(model_g, device, train_loader, optimizer)
    test(model_g, device, test_loader)

EPOCH: 1


Loss=1.2023046016693115 Accuracy=47.12: 100%|██████████| 782/782 [01:42<00:00,  7.66it/s]



Test set: Average loss: 1.2135, Accuracy: 5650/10000 (56.50%)

EPOCH: 2


Loss=0.9720831513404846 Accuracy=63.17: 100%|██████████| 782/782 [01:44<00:00,  7.48it/s]



Test set: Average loss: 1.0278, Accuracy: 6421/10000 (64.21%)

EPOCH: 3


Loss=1.1946169137954712 Accuracy=69.81: 100%|██████████| 782/782 [01:45<00:00,  7.39it/s]



Test set: Average loss: 0.8806, Accuracy: 6936/10000 (69.36%)

EPOCH: 4


Loss=0.727527379989624 Accuracy=73.37: 100%|██████████| 782/782 [01:46<00:00,  7.33it/s]  



Test set: Average loss: 0.8095, Accuracy: 7198/10000 (71.98%)

EPOCH: 5


Loss=0.7859549522399902 Accuracy=76.42: 100%|██████████| 782/782 [01:49<00:00,  7.16it/s] 



Test set: Average loss: 0.7362, Accuracy: 7460/10000 (74.60%)



In [9]:
model_l= Model_layer_norm().to(device)
summary(model_l, input_size=(3,32,32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 32, 32]             432
              ReLU-2           [-1, 16, 32, 32]               0
         GroupNorm-3           [-1, 16, 32, 32]              32
            Conv2d-4           [-1, 32, 32, 32]           4,608
              ReLU-5           [-1, 32, 32, 32]               0
         GroupNorm-6           [-1, 32, 32, 32]              64
            Conv2d-7           [-1, 16, 34, 34]             512
              ReLU-8           [-1, 16, 34, 34]               0
         GroupNorm-9           [-1, 16, 34, 34]              32
        MaxPool2d-10           [-1, 16, 17, 17]               0
           Conv2d-11           [-1, 32, 17, 17]           4,608
             ReLU-12           [-1, 32, 17, 17]               0
        GroupNorm-13           [-1, 32, 17, 17]              64
           Conv2d-14           [-1, 32,

  return F.log_softmax(x)


In [12]:
epochs=5
optimizer= optim.SGD(model_l.parameters(), lr=0.01, momentum=0.9)
for i in range(1,epochs+1):
    print("EPOCH:", i)
    train(model_l, device, train_loader, optimizer)
    test(model_l, device, test_loader)

EPOCH: 1


Loss=0.8980305194854736 Accuracy=44.00: 100%|██████████| 782/782 [01:42<00:00,  7.61it/s]



Test set: Average loss: 1.2486, Accuracy: 5531/10000 (55.31%)

EPOCH: 2


Loss=1.0875978469848633 Accuracy=61.37: 100%|██████████| 782/782 [01:48<00:00,  7.21it/s]



Test set: Average loss: 0.9766, Accuracy: 6534/10000 (65.34%)

EPOCH: 3


Loss=0.5845539569854736 Accuracy=68.61: 100%|██████████| 782/782 [01:47<00:00,  7.28it/s]



Test set: Average loss: 0.8717, Accuracy: 6991/10000 (69.91%)

EPOCH: 4


Loss=0.7025521397590637 Accuracy=72.88: 100%|██████████| 782/782 [01:47<00:00,  7.27it/s] 



Test set: Average loss: 0.8137, Accuracy: 7199/10000 (71.99%)

EPOCH: 5


Loss=0.6720433831214905 Accuracy=75.57: 100%|██████████| 782/782 [01:47<00:00,  7.29it/s] 



Test set: Average loss: 0.7550, Accuracy: 7367/10000 (73.67%)

