Section_3_1

This is the model with the accuracy of 83% (After target was reduced)

In [1]:
!pip install torch
!pip install torchvision
!pip install torchsummary
!pip install skorch



In [2]:
import numpy as np
import torch
import torch.nn as nn
from torch.optim import optimizer
from torchvision import datasets
from torchvision import transforms
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data import DataLoader
from torchsummary import summary
import gc
import time
import skorch

#from d2l import torch as d2l
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cuda


Make sure results are replicatable:

In [3]:
seed = 0
torch.manual_seed(seed)

<torch._C.Generator at 0x28a101176b0>

Residual Block:

In [27]:
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, add, stride = 1, downsample = None):
        super(ResidualBlock, self).__init__()
        self.add = add

        self.conv1 = nn.Sequential(
                        nn.Conv2d(in_channels, out_channels, kernel_size = 3, stride = stride, padding = 1),
                        nn.BatchNorm2d(out_channels),
                        nn.ReLU())
        self.conv2 = nn.Sequential(
                        nn.Conv2d(out_channels, out_channels, kernel_size = 3, stride = 1, padding = 1),
                        nn.BatchNorm2d(out_channels))
        if self.add:
            self.added_layer = self.add_layer(out_channels)
        self.downsample = downsample
        self.relu = nn.ReLU()
        self.out_channels = out_channels

    def add_layer(self, planes):
        return nn.Sequential(
            nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(planes),
            nn.ReLU()
        )
        
    def forward(self, x):
        residual = x
        out = self.conv1(x)
        out = self.conv2(out)
        if self.add:
            out = self.added_layer(out)
        if self.downsample:
            residual = self.downsample(x)
        out = out + residual
        out = self.relu(out)
        return out

Resnet Model

In [33]:
class ResNet(nn.Module):
    def __init__(self, block, layers, l1, l2, l3, num_classes = 10):
        super(ResNet, self).__init__()
        self.Dropout = layers[4]
        self.inplanes = 64
        self.conv1 = nn.Sequential(
                        #nn.Conv2d(3, 64, kernel_size = 3, stride = 1, padding = 1),
                        nn.Conv2d(3, 64, kernel_size = 7, stride = 2, padding = 3),
                        nn.BatchNorm2d(64),
                        nn.ReLU()
                        )
        self.maxpool = nn.MaxPool2d(kernel_size = 3, stride = 2, padding = 1)
        self.layer0 = self._make_layer(block, 64, layers[0], 0, stride = 1)
        self.layer1 = self._make_layer(block, 128, layers[1], l1, stride = 2)
        self.layer2 = self._make_layer(block, 256, layers[2], l2, stride = 2)
        self.layer3 = self._make_layer(block, 512, layers[3], l3, stride = 2)
        self.avgpool = nn.AdaptiveAvgPool2d((1,1))
        #self.avgpool = nn.AvgPool2d(7,stride=1)
        self.fc = nn.Linear(512, num_classes)
        
    def _make_layer(self, block, planes, blocks, add_layer, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes:
            
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes, kernel_size=1, stride=stride),
                nn.BatchNorm2d(planes),
            )
        layers = []
        layers.append(block(self.inplanes, planes, add_layer, stride, downsample))
        self.inplanes = planes
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes, add_layer))

        return nn.Sequential(*layers)
    
    def forward(self, x):
        x = self.conv1(x)
        x = self.maxpool(x)
        x = self.layer0(x)
        #x = self.drop1(x)
        x = self.layer1(x)
        #x = self.drop2(x)
        x = self.layer2(x)
        #x = self.drop3(x)
        x = self.layer3(x)
        
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)

        return x

Checking Summary of Resnet Model with different input sizes and channels

In [34]:
model = ResNet(ResidualBlock, [2,2,2,2,0.1], 0,0,0).to(device)
summary(model, (3, 32, 32))


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 16, 16]           9,472
       BatchNorm2d-2           [-1, 64, 16, 16]             128
              ReLU-3           [-1, 64, 16, 16]               0
         MaxPool2d-4             [-1, 64, 8, 8]               0
            Conv2d-5             [-1, 64, 8, 8]          36,928
       BatchNorm2d-6             [-1, 64, 8, 8]             128
              ReLU-7             [-1, 64, 8, 8]               0
            Conv2d-8             [-1, 64, 8, 8]          36,928
       BatchNorm2d-9             [-1, 64, 8, 8]             128
             ReLU-10             [-1, 64, 8, 8]               0
    ResidualBlock-11             [-1, 64, 8, 8]               0
           Conv2d-12             [-1, 64, 8, 8]          36,928
      BatchNorm2d-13             [-1, 64, 8, 8]             128
             ReLU-14             [-1, 6

Loading CIFAR10 Dataset and applying tansforms on it

In [8]:
imsize = 32
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((imsize,imsize), antialias=True),
    #transforms.Grayscale(num_output_channels=1)
    transforms.RandomHorizontalFlip()
])

dataTrain = datasets.CIFAR10(
    root="dataset",
    train=True,
    download=True,
    transform=transform
)
dataValid = datasets.CIFAR10(
    root="dataset",
    train=False,
    download=True,
    transform=transform
)


print(len(dataValid[0][0]))

valid_y = np.array([y for x,y in iter(dataValid)])
test_y = np.array([y for x,y in iter(dataTrain)])

Files already downloaded and verified
Files already downloaded and verified
3


Implementing Trainer: Static Learning Rate

In [None]:
#CLASStorch.optim.SGD(params, lr=<required parameter>, momentum=0, dampening=0, 
#weight_decay=0, nesterov=False, *, maximize=False, foreach=None, differentiable=False)
def trainer(epochs, dataloader, model, lr, loader_valid, decay, mom ):
  optimizer = torch.optim.SGD(model.parameters(), lr=lr, weight_decay=decay, momentum=mom)
  scaler = torch.cuda.amp.GradScaler()
  loss_f = nn.CrossEntropyLoss()
  print("Epoch, Loss, Time, val_acc")
  correct = 0
  processed = 0

  for epoch in range(epochs):
    start = time.perf_counter()
    for x,y in dataloader:
      x = x.to(device)
      y = y.to(device)
      with torch.cuda.amp.autocast():
        pred = model(x)
        loss = loss_f(pred, y)
      optimizer.zero_grad()
      scaler.scale(loss).backward()
      scaler.step(optimizer)
      scaler.update()

      pred = pred.argmax(dim=1, keepdim=True)
      correct += pred.eq(y.view_as(pred)).sum().item()
      processed += len(x)

      del x, y
      torch.cuda.empty_cache()
      gc.collect()
    end = time.perf_counter()

    train_acc = 100*(correct/processed)
    acc = predict(model, loader_valid)
    print(epoch + 1,",", loss.item(),",", end-start,",", acc, ",", train_acc)


def predict(model, loader_valid):
  with torch.no_grad():
    correct = 0
    total = 0
    for x,y in loader_valid:
      x = x.to(device)
      y = y.to(device)
      output = model(x)
      _, predicted = torch.max(output.data, 1)
      total += y.size(0)
      correct += (predicted == y).sum().item()
      del x,y,output
  return 100*(correct/total)

In [35]:
epochs = 20
num_w = [2]
lr = [0.04]
Dropout = 0.0
b_size = 4500
mom = 0.5
decay = [0.015]

for i in range(0,len(num_w)):
    loader_train = DataLoader(dataTrain, batch_size=b_size, shuffle=True, num_workers=num_w[i])
    loader_valid = DataLoader(dataValid, batch_size=b_size, shuffle=True, num_workers=num_w[i])

    for j in range(0, len(lr)):
        for k in range(0, len(decay)):
            print("Decay:", decay[k], " LR:", lr[j])
            model = ResNet(ResidualBlock, [2,2,2,2,Dropout], 0, 0, 0).to(device)
            trainer(epochs ,loader_train ,model, lr[j], loader_valid, decay[k], mom)

Decay: 0.015  LR: 0.04


TypeError: ResNet.__init__() missing 3 required positional arguments: 'l1', 'l2', and 'l3'

Implementing Trainer: Learning Rate Scheduler Version (What Im using for final model)

In [15]:
def trainer(epochs, dataloader, model, lr, loader_valid, decay, mom, batch):
  optimizer = torch.optim.SGD(model.parameters(), lr=lr, weight_decay=decay, momentum=mom)
  #scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max')
  scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer,max_lr=lr, steps_per_epoch=((50000//batch) + 1), epochs=epochs,anneal_strategy='linear')
  scaler = torch.cuda.amp.GradScaler()
  loss_f = nn.CrossEntropyLoss()
  print("Epoch, Loss, Time, val acc, train acc")
  correct = 0
  processed = 0

  for epoch in range(epochs):
    start = time.perf_counter()
    for x,y in dataloader:
      x = x.to(device)
      y = y.to(device)
      with torch.cuda.amp.autocast():
        pred = model(x)
        loss = loss_f(pred, y)
      optimizer.zero_grad()
      scaler.scale(loss).backward()
      scaler.step(optimizer)
      scaler.update()
      scheduler.step()
      #print(scheduler.get_last_lr())

      pred = pred.argmax(dim=1, keepdim=True)
      correct += pred.eq(y.view_as(pred)).sum().item()
      processed += len(x)

      del x, y
      torch.cuda.empty_cache()
      gc.collect()
    end = time.perf_counter()

    train_acc = 100*(correct/processed)
    acc = predict(model, loader_valid, loss_f,scheduler)
    print(epoch + 1,",", loss.item(),",", end-start,",", acc, ",", train_acc)


def predict(model, loader_valid, loss_f,scheduler):
  with torch.no_grad():
    #t_loss=0
    correct = 0
    total = 0
    for x,y in loader_valid:
      x = x.to(device)
      y = y.to(device)
      output = model(x)
      loss = loss_f(output, y)
      #t_loss += loss.item()
      _, predicted = torch.max(output.data, 1)
      total += y.size(0)
      correct += (predicted == y).sum().item()
      del x,y,output
    #scheduler.step(t_loss)
  return 100*(correct/total)

In [36]:
epochs = 20
num_w = 2
lr = [0.075]
Dropout = 0.0
b_size = [1024]
mom = 0.5
decay = [0.01]

for i in range(0,len(b_size)):
    loader_train = DataLoader(dataTrain, batch_size=b_size[i], shuffle=True, num_workers=num_w)
    loader_valid = DataLoader(dataValid, batch_size=b_size[i], shuffle=True, num_workers=num_w)

    for j in range(0, len(lr)):
        for k in range(0, len(decay)):
            print("Decay:", decay[k], " LR:", lr[j])
            model = ResNet(ResidualBlock, [2,2,2,2,Dropout], 0,0,0).to(device)
            trainer(epochs ,loader_train ,model, lr[j], loader_valid, decay[k], mom, b_size[i])

Decay: 0.01  LR: 0.075
Epoch, Loss, Time, val acc, train acc
1 , 1.2892842292785645 , 11.680418000003556 , 52.059999999999995 , 40.756
2 , 1.1088640689849854 , 11.553782500006491 , 59.98 , 48.649
3 , 0.9654399752616882 , 11.179152800003067 , 64.57000000000001 , 53.715999999999994
4 , 0.8501710891723633 , 11.661519400018733 , 67.44 , 57.01500000000001
5 , 0.8359678983688354 , 11.30753249998088 , 70.3 , 59.6316
6 , 0.8306155204772949 , 11.540755800000625 , 70.14 , 61.608666666666664
7 , 0.7208300232887268 , 11.295407099998556 , 72.82 , 63.242
8 , 0.6891565918922424 , 11.493630199984182 , 72.25 , 64.704
9 , 0.6642736196517944 , 11.305872500000987 , 72.45 , 66.05711111111111
10 , 0.6936509013175964 , 11.769682500016643 , 74.03 , 67.2372
11 , 0.6528723835945129 , 11.46218519998365 , 74.7 , 68.25509090909091
12 , 0.5880341529846191 , 11.442603900009999 , 75.35 , 69.229
13 , 0.5849801301956177 , 11.515507600008277 , 77.22 , 70.12076923076923
14 , 0.5780685544013977 , 11.508912699995562 , 77.0

Refernces:

https://blog.paperspace.com/writing-resnet-from-scratch-in-pytorch/ 

https://appsilon.com/pytorch-neural-network-tutorial/

https://machinelearningmastery.com/how-to-reduce-overfitting-with-dropout-regularization-in-keras/#:~:text=CNN%20Dropout%20Regularization,is%20just%20a%20rough%20heuristic.&text=In%20this%20case%2C%20dropout%20is,cell%20within%20the%20feature%20maps. - dropout

https://www.kaggle.com/code/greatcodes/pytorch-cnn-resnet18-cifar10/notebook 

https://towardsdatascience.com/a-visual-guide-to-learning-rate-schedulers-in-pytorch-24bbb262c863 - lr scheduler