Section_3_2

This is the model with the accuracy of 90% (Before target was reduced)

In [1]:
!pip install torch
!pip install torchvision
!pip install torchsummary
!pip install skorch



In [2]:
import numpy as np
import torch
import torch.nn as nn
from torch.optim import optimizer
from torchvision import datasets
from torchvision import transforms
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data import DataLoader
from torchsummary import summary
import gc
import time
import skorch

#from d2l import torch as d2l
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cuda


In [3]:
seed = 0
torch.manual_seed(seed)

<torch._C.Generator at 0x208ff8560d0>

In [4]:
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, add, stride = 1, downsample = None):
        super(ResidualBlock, self).__init__()
        self.add = add

        self.conv1 = nn.Sequential(
                        nn.Conv2d(in_channels, out_channels, kernel_size = 3, stride = stride, padding = 1),
                        nn.BatchNorm2d(out_channels),
                        nn.ReLU())
        self.conv2 = nn.Sequential(
                        nn.Conv2d(out_channels, out_channels, kernel_size = 3, stride = 1, padding = 1),
                        nn.BatchNorm2d(out_channels))
        if self.add:
            self.added_layer = self.add_layer(out_channels)
        self.downsample = downsample
        self.relu = nn.ReLU()
        self.out_channels = out_channels

    def add_layer(self, planes):
        return nn.Sequential(
            nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(planes),
            nn.ReLU()
        )
        
    def forward(self, x):
        residual = x
        out = self.conv1(x)
        out = self.conv2(out)
        if self.add:
            out = self.added_layer(out)
        if self.downsample:
            residual = self.downsample(x)
        out = out + residual
        out = self.relu(out)
        return out

In [10]:
class ResNet(nn.Module):
    def __init__(self, block, layers, l1, l2, l3, num_classes = 10):
        super(ResNet, self).__init__()
        self.Dropout = layers[4]
        self.inplanes = 64
        self.conv1 = nn.Sequential(
                        nn.Conv2d(3, 64, kernel_size = 3, stride = 1, padding = 1),
                        #nn.Conv2d(3, 64, kernel_size = 7, stride = 2, padding = 3),
                        nn.BatchNorm2d(64),
                        nn.ReLU()
                        )
        #self.maxpool = nn.MaxPool2d(kernel_size = 3, stride = 2, padding = 1)
        self.layer0 = self._make_layer(block, 32, layers[0], 0, stride = 1)
        self.layer1 = self._make_layer(block, 64, layers[1], l1, stride = 2)
        self.layer2 = self._make_layer(block, 128, layers[2], l2, stride = 2)
        self.layer3 = self._make_layer(block, 256, layers[3], l3, stride = 2)
        self.avgpool = nn.AdaptiveAvgPool2d((1,1))
        #self.avgpool = nn.AvgPool2d(7,stride=1)
        self.fc = nn.Linear(256, num_classes)
        
    def _make_layer(self, block, planes, blocks, add_layer, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes:
            
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes, kernel_size=1, stride=stride),
                nn.BatchNorm2d(planes),
            )
        layers = []
        layers.append(block(self.inplanes, planes, add_layer, stride, downsample))
        self.inplanes = planes
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes, add_layer))

        return nn.Sequential(*layers)
    
    def forward(self, x):
        x = self.conv1(x)
        #x = self.maxpool(x)
        x = self.layer0(x)
        #x = self.drop1(x)
        x = self.layer1(x)
        #x = self.drop2(x)
        x = self.layer2(x)
        #x = self.drop3(x)
        x = self.layer3(x)
        
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)

        return x

In [11]:
model = ResNet(ResidualBlock, [2,2,2,2,0.1], 0,0,0).to(device)
summary(model, (3, 32, 32))


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 32, 32]           1,792
       BatchNorm2d-2           [-1, 64, 32, 32]             128
              ReLU-3           [-1, 64, 32, 32]               0
            Conv2d-4           [-1, 32, 32, 32]          18,464
       BatchNorm2d-5           [-1, 32, 32, 32]              64
              ReLU-6           [-1, 32, 32, 32]               0
            Conv2d-7           [-1, 32, 32, 32]           9,248
       BatchNorm2d-8           [-1, 32, 32, 32]              64
            Conv2d-9           [-1, 32, 32, 32]           2,080
      BatchNorm2d-10           [-1, 32, 32, 32]              64
             ReLU-11           [-1, 32, 32, 32]               0
    ResidualBlock-12           [-1, 32, 32, 32]               0
           Conv2d-13           [-1, 32, 32, 32]           9,248
      BatchNorm2d-14           [-1, 32,

In [7]:
imsize = 32
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((imsize,imsize), antialias=True),
    #transforms.Grayscale(num_output_channels=1)
    transforms.RandomHorizontalFlip()
])

dataTrain = datasets.CIFAR10(
    root="dataset",
    train=True,
    download=True,
    transform=transform
)
dataValid = datasets.CIFAR10(
    root="dataset",
    train=False,
    download=True,
    transform=transform
)


print(len(dataValid[0][0]))

valid_y = np.array([y for x,y in iter(dataValid)])
test_y = np.array([y for x,y in iter(dataTrain)])

Files already downloaded and verified
Files already downloaded and verified
3


Trainer
Learning Rate Scheduler Version

In [12]:
def trainer(epochs, dataloader, model, lr, loader_valid, decay, mom, batch):
  optimizer = torch.optim.SGD(model.parameters(), lr=lr, weight_decay=decay, momentum=mom)
  #scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max')
  scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer,max_lr=lr, steps_per_epoch=((50000//batch) + 1), epochs=epochs,anneal_strategy='linear')
  scaler = torch.cuda.amp.GradScaler()
  loss_f = nn.CrossEntropyLoss()
  print("Epoch, Loss, Time, val_acc")
  correct = 0
  processed = 0

  for epoch in range(epochs):
    start = time.perf_counter()
    for x,y in dataloader:
      x = x.to(device)
      y = y.to(device)
      with torch.cuda.amp.autocast():
        pred = model(x)
        loss = loss_f(pred, y)
      optimizer.zero_grad()
      scaler.scale(loss).backward()
      scaler.step(optimizer)
      scaler.update()
      scheduler.step()
      #print(scheduler.get_last_lr())

      pred = pred.argmax(dim=1, keepdim=True)
      correct += pred.eq(y.view_as(pred)).sum().item()
      processed += len(x)

      del x, y
      torch.cuda.empty_cache()
      gc.collect()
    end = time.perf_counter()

    train_acc = 100*(correct/processed)
    acc = predict(model, loader_valid, loss_f,scheduler)
    print(epoch + 1,",", loss.item(),",", end-start,",", acc, ",", train_acc)


def predict(model, loader_valid, loss_f,scheduler):
  with torch.no_grad():
    #t_loss=0
    correct = 0
    total = 0
    for x,y in loader_valid:
      x = x.to(device)
      y = y.to(device)
      output = model(x)
      loss = loss_f(output, y)
      #t_loss += loss.item()
      _, predicted = torch.max(output.data, 1)
      total += y.size(0)
      correct += (predicted == y).sum().item()
      del x,y,output
    #scheduler.step(t_loss)
  return 100*(correct/total)

In [13]:
epochs = 20
num_w = 2
lr = [0.075]
Dropout = 0.0
b_size = [1024]
mom = 0.5
decay = [0.01]

for i in range(0,len(b_size)):
    loader_train = DataLoader(dataTrain, batch_size=b_size[i], shuffle=True, num_workers=num_w)
    loader_valid = DataLoader(dataValid, batch_size=b_size[i], shuffle=True, num_workers=num_w)

    for j in range(0, len(lr)):
        for k in range(0, len(decay)):
            print("Decay:", decay[k], " LR:", lr[j])
            model = ResNet(ResidualBlock, [2,2,2,2,Dropout], 0,0,0).to(device)
            trainer(epochs ,loader_train ,model, lr[j], loader_valid, decay[k], mom, b_size[i])

Decay: 0.01  LR: 0.075
Epoch, Loss, Time, val_acc
1 , 1.5375926494598389 , 21.190826200006995 , 45.58 , 30.620000000000005
2 , 1.0904489755630493 , 21.286402500001714 , 59.43000000000001 , 42.14
3 , 0.9631534814834595 , 20.795053799985908 , 66.64 , 49.25066666666667
4 , 0.7235575318336487 , 19.620573899999727 , 74.19 , 55.0365
5 , 0.6166045069694519 , 19.993272799998522 , 76.31 , 59.3692
6 , 0.5690386891365051 , 20.252690799999982 , 78.89 , 62.842
7 , 0.5364925265312195 , 20.15446340001654 , 80.16 , 65.54514285714286
8 , 0.5084064602851868 , 20.135452999995323 , 82.19999999999999 , 67.7795
9 , 0.44945192337036133 , 20.169335100013996 , 82.16 , 69.65377777777778
10 , 0.4486502408981323 , 20.166976699983934 , 82.85 , 71.2496
11 , 0.4481603503227234 , 20.03406289999839 , 84.84 , 72.67290909090909
12 , 0.40605735778808594 , 19.989660999999614 , 83.61 , 73.88033333333334
13 , 0.35484829545021057 , 20.00828320000437 , 83.94 , 74.95461538461538
14 , 0.3584369421005249 , 20.216979400021955 , 8