In [6]:
import torch
import torch.nn as nn
import pandas as pd
import torch.functional as F
import torch.optim as optim
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset, random_split
from torchmetrics import Accuracy
from tqdm import tqdm

import torchvision
from torchvision import transforms as T
from torchvision.datasets import CIFAR10

In [57]:
#transform is function to preprocess the data such as converting PIL files into 
#tensors to work with and do things like augmentation
#augmentation should be done betor ToTensor cause it is done in PIL format
transformer_train = T.Compose([T.RandomCrop(32 , padding=4),
                               T.RandomHorizontalFlip(p=0.5),
                               T.ToTensor(), 
                               T.Normalize(mean= (0.4914,0.4822,0.4465),
                                               std= (0.2023,0.1994,0.2010))])
#we should not augment validation set
transformer_valid = T.Compose([T.ToTensor(), 
                               T.Normalize(mean= (0.4914,0.4822,0.4465),
                                               std= (0.2023,0.1994,0.2010))])

In [58]:
train_set = CIFAR10(root = r'C:\Users\Azadeh\datasets\cifar10' , train = True , download = True 
                    , transform= transformer_train)

Files already downloaded and verified


In [59]:
test_set = CIFAR10(root = r'C:\Users\Azadeh\datasets\cifar10' , train = False , download = True
                  , transform= transformer_valid)

Files already downloaded and verified


In [60]:
train_set

Dataset CIFAR10
    Number of datapoints: 50000
    Root location: C:\Users\Azadeh\datasets\cifar10
    Split: Train
    StandardTransform
Transform: Compose(
               RandomCrop(size=(32, 32), padding=4)
               RandomHorizontalFlip(p=0.5)
               ToTensor()
               Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.2023, 0.1994, 0.201))
           )

In [61]:
train_set[0]

(tensor([[[-2.4291, -2.4291, -2.4291,  ..., -2.4291, -2.4291, -2.4291],
          [-2.4291, -2.4291, -2.4291,  ..., -2.4291, -2.4291, -2.4291],
          [-2.4291, -1.2854, -1.5955,  ...,  0.6338,  0.6338,  0.5174],
          ...,
          [-2.4291,  1.7581,  1.3122,  ...,  1.5061,  1.5642, -0.0253],
          [-2.4291,  1.8356,  1.4673,  ...,  1.1765,  0.9245, -1.3435],
          [-2.4291,  1.6030,  1.4673,  ...,  0.2267,  0.6725, -1.3435]],
 
         [[-2.4183, -2.4183, -2.4183,  ..., -2.4183, -2.4183, -2.4183],
          [-2.4183, -2.4183, -2.4183,  ..., -2.4183, -2.4183, -2.4183],
          [-2.4183, -1.1989, -1.5136,  ...,  0.1384,  0.1778,  0.0401],
          ...,
          [-2.4183,  1.0038,  0.2564,  ...,  0.8661,  0.9841, -0.5892],
          [-2.4183,  1.1611,  0.5318,  ...,  0.6301,  0.4138, -1.8479],
          [-2.4183,  0.9251,  0.5908,  ..., -0.3532,  0.1974, -1.8086]],
 
         [[-2.2214, -2.2214, -2.2214,  ..., -2.2214, -2.2214, -2.2214],
          [-2.2214, -2.2214,

In [62]:
train_set[0][0].shape 

torch.Size([3, 32, 32])

In [63]:
train_set.classes

['airplane',
 'automobile',
 'bird',
 'cat',
 'deer',
 'dog',
 'frog',
 'horse',
 'ship',
 'truck']

In [64]:
train_set.class_to_idx

{'airplane': 0,
 'automobile': 1,
 'bird': 2,
 'cat': 3,
 'deer': 4,
 'dog': 5,
 'frog': 6,
 'horse': 7,
 'ship': 8,
 'truck': 9}

In [65]:
len(train_set)

50000

In [66]:
#each channel of 3D images have their own means and stds
mean = torch.FloatTensor(train_set.data/255.).mean(dim=[0,1,2])
std = torch.FloatTensor(train_set.data/255.).mean(dim=[0,1,2])

In [67]:
print(mean)
std

tensor([0.4914, 0.4822, 0.4465])


tensor([0.4914, 0.4822, 0.4465])

💠 dataloader

In [68]:
train_loader = DataLoader(train_set , batch_size = 64 , shuffle = True)
test_loader = DataLoader(test_set , batch_size = 128 , shuffle = False)

In [69]:
x, y = next(iter(train_loader))
print(x.shape , y.shape)

torch.Size([64, 3, 32, 32]) torch.Size([64])


💠 Model

In [70]:
def con3x3_bn_af(in_channelss , out_channels):
    module = nn.Sequential(nn.Conv2d(in_channelss , out_channels , 3 ,padding = 1 ),
                          nn.BatchNorm2d(out_channels),
                          nn.ReLU(),)
    return module
    

In [71]:
def CNN():
    network = nn.Sequential(con3x3_bn_af(3,64),
                          con3x3_bn_af(64,64),
                          nn.MaxPool2d(2,2),   #BS*64*16*16

                          con3x3_bn_af(64,128),
                          con3x3_bn_af(128,128),
                          nn.MaxPool2d(2,2),   #BS*128*8*8

                          con3x3_bn_af(128,256),
                          con3x3_bn_af(256,256),
                          nn.MaxPool2d(2,2),   #BS*256*4*4

                          con3x3_bn_af(256,512),
                          con3x3_bn_af(512,512),
                          # AvgPool put a kernel on data to make it 1*1 but we need to give 
                            #it the right kernel size and it depens on pictues input size 
                            #to solve this problem we use AdaptiveAvgPool which just need us to 
                            #give it our needed output size
                          nn.AdaptiveAvgPool2d(output_size = (1,1)),  #BS*512*1*1
                          
                          nn.Flatten(),
                          nn.Linear(512,10)  #classifier
                          
                      )
    return network

In [72]:
model = CNN()

In [73]:
model(torch.randn(10,3,32,32)).shape

torch.Size([10, 10])

In [74]:
model[0][0].weight.shape

torch.Size([64, 3, 3, 3])

In [75]:
model[0][0].bias.shape

torch.Size([64])

In [76]:
def num_params(model):
    num = sum(p.numel() for p in model.parameters())/1e6
    return num

In [77]:
num_params(model)

4.694346

💠 put on device

In [78]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cpu'

In [79]:
model = CNN().to(device)

In [80]:
loss_fn = nn.CrossEntropyLoss()
#weight_decay is l2_regularizer which compress the weights to make them smaller 
optimizer = optim.SGD( model.parameters() ,lr = 0.1 , momentum = 0.9 , nesterov = True , weight_decay = 1e-4)

💠 utilities

In [81]:
class AverageMeter(object):
    #computes and store the average and current value
    def __init__(self):
        self.reset()
    def reset(self):
        self.val=0
        self.avg=0
        self.sum=0
        self.count=0
    def update(self , val , n=1):
        self.val=val
        self.sum += val*n
        self.count +=n
        self.avg = self.sum /self.count

In [82]:
loss_meter = AverageMeter()
acc = Accuracy(task="multiclass", num_classes=4)

In [83]:
def train_one_epoch(model , train_loader ,loss_fn , optimizer , epoch = None):
    #some elements like dropout just runs in train process so we need to set train or eval flag
    model.train()
    loss_train =  AverageMeter()
    acc_train = Accuracy(task="multiclass", num_classes=10).to(device)
    #acc_train = Accuracy(task="multiclass", num_classes=4)

    #to show train process in a progressbar we use tqdm 
    with tqdm(train_loader, unit = 'batch') as tepoch:
        for inputs,targets in tepoch:
            if epoch is not None:
                #to write the epoch number next to progressbar
                tepoch.set_description(f'Epoch {epoch}')
            inputs = inputs.to(device)
            targets = targets.to(device)
            yp = model(inputs)
            loss = loss_fn(yp , targets)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
            loss_train.update(loss.item())
            #acc = torch.sum(yp.argmax(dim=1) == targets)
            acc_train(yp , targets)
            #to pass data that we want to show infront of the progressbar we use set_postfix
            tepoch.set_postfix(loss= loss_train.avg, accuracy = acc_train.compute().item()*100. )
    return model , loss_train.avg , acc_train.compute().item()
            

In [84]:
def evaluate (model , valid_loader , loss_fn):
    model.eval()
    with torch.no_grad():
        loss_valid = AverageMeter()
        acc_valid = Accuracy(task="multiclass", num_classes=4).to(device)
        for i , (inputs,targets) in enumerate(valid_loader):
            inputs = inputs.to(device)
            targets = targets.to(device)
            
            yp = model(inputs)
            loss = loss_fn(yp , targets)
            loss_valid.update(loss.item())
            acc_valid(yp , targets)
    return loss_valid.avg , acc_valid.compute()
            

💠 check forward path

In [85]:
#check out the model using a batch of data
yp = model (x.to(device))
yp [:2,:]
print(loss_fn(yp , y.to(device)))

tensor(2.3317, grad_fn=<NllLossBackward0>)


💠 check backward path

In [55]:
#selecting 5 batch to check if model overfit? in this way check capasity of model 
#random_split(num_sample_required , num_rest_of_data)
mini_train_dataset , _ = random_split(train_set , (1000, len(train_set)-1000))
mini_loader = DataLoader(mini_train_dataset , 20, True)
for epoch in range (50):
    model , _ , _ = train_one_epoch(model , mini_loader , loss_fn , optimizer , epoch)

Epoch 0: 100%|████████████████████████████████████████████| 50/50 [00:17<00:00,  2.83batch/s, accuracy=15.5, loss=2.83]
Epoch 1: 100%|█████████████████████████████████████████████| 50/50 [00:13<00:00,  3.71batch/s, accuracy=19.9, loss=2.2]
Epoch 2: 100%|████████████████████████████████████████████| 50/50 [00:15<00:00,  3.22batch/s, accuracy=22.5, loss=2.07]
Epoch 3: 100%|██████████████████████████████████████████████| 50/50 [00:15<00:00,  3.30batch/s, accuracy=27, loss=1.95]
Epoch 4: 100%|██████████████████████████████████████████████| 50/50 [00:14<00:00,  3.37batch/s, accuracy=29, loss=1.93]
Epoch 5: 100%|████████████████████████████████████████████| 50/50 [00:14<00:00,  3.34batch/s, accuracy=30.7, loss=1.86]
Epoch 6: 100%|████████████████████████████████████████████| 50/50 [00:15<00:00,  3.33batch/s, accuracy=32.9, loss=1.81]
Epoch 7: 100%|████████████████████████████████████████████| 50/50 [00:14<00:00,  3.35batch/s, accuracy=35.1, loss=1.78]
Epoch 8: 100%|██████████████████████████

💠 select best lr

In [None]:
for lr in [0.08,0.09,0.1,0.15,0.2]:
    model = CNN().to(device)
    optimizer = optim.SGD( model.parameters() ,lr = lr , momentum = 0.9 ,
                          nesterov = True , weight_decay = 1e-4)
    print(f'lr = {lr}')
    for epoch in range (2):
        model , _ , _ = train_one_epoch(model , mini_loader , loss_fn , optimizer , epoch)
        
#loss drops bether in which lr 
#after selecting lr we make it specific and arange it with weigth_decay in a grid search   

💠 train model

In [86]:
lr = 0.1
wd = 1e-4
optimizer = optim.SGD(model.parameters(), lr = lr , weight_decay = wd , momentum = 0.9)
#we set best valid to infinity
best_valid_loss = torch.inf
loss_train_hist = []
loss_valid_hist = []
acc_train_hist = []
acc_valid_hist = []

num_epoch = 5
for epoch in range(num_epoch):
    model , loss_train , acc_train = train_one_epoch(model , train_loader , loss_fn , optimizer)

    loss_valid , acc_valid = validation(model , test_loader , loss_fn )

    loss_train_hist.append(loss_train)
    acc_train_hist.append(acc_train)

    loss_valid_hist.append(loss_valid)
    acc_valid_hist.append(acc_valid)

    print(f'valid : loss={loss_valid:0.4} , Acc = {acc_valid:0.4}')

    if loss_valid < best_valid_loss:
        print(f'best validation loss is {loss_valid} in {epoch}th epoch')
        best_valid_loss = loss_valid 


100%|███████████████████████████████████████████████████| 782/782 [10:50<00:00,  1.20batch/s, accuracy=43.5, loss=1.54]


NameError: name 'validation' is not defined