Imports

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchsummary import summary
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
import matplotlib.pyplot as plt

Creating the Resnet Block

In [2]:
class ResBlock(nn.Module):
    def __init__(self, in_channels, out_channels, downsample):
        super().__init__()
        if downsample:
            self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=2, padding=1)
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=2),
                nn.BatchNorm2d(out_channels)
            )
        else:
            self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1)
            self.shortcut = nn.Sequential()

        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.bn2 = nn.BatchNorm2d(out_channels)

    def forward(self, input):
        shortcut = self.shortcut(input)
        input = nn.ReLU()(self.bn1(self.conv1(input)))
        input = self.bn2(self.conv2(input))
        input = input + shortcut
        return nn.ReLU()(input)


Creating the ResNet

In [3]:
class ResNet18(nn.Module):
    def __init__(self, in_channels, resblock, outputs=10):
        super().__init__()
        self.layer0 = nn.Sequential(
            nn.Conv2d(3, 42, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(42),
            nn.ReLU()
        )

        self.layer1 = nn.Sequential(
            resblock(42, 42, downsample=False),
            resblock(42, 42, downsample=False)
        )

        self.layer2 = nn.Sequential(
            resblock(42, 84, downsample=True),
            resblock(84, 84, downsample=False)
        )

        self.layer3 = nn.Sequential(
            resblock(84, 168, downsample=True),
            resblock(168, 168, downsample=False)
        )

        self.layer4 = nn.Sequential(
            resblock(168, 336, downsample=True),
            resblock(336, 336, downsample=False)
        )

        self.fc = torch.nn.Linear(336, outputs)

    def forward(self, input):
        # print(input.shape)
        input = self.layer0(input)
        # print(input.shape)
        input = self.layer1(input)
        # print(input.shape)
        input = self.layer2(input)
        # print(input.shape)
        input = self.layer3(input)
        # print(input.shape)
        input = self.layer4(input)
        # print(input.shape)
        input = F.avg_pool2d(input, 4)
        # print(input.shape)
        input = input.view(input.size(0), -1)
        # print(input.shape)
        input = self.fc(input)
        # print(input.shape)

        return input

Summary:

In [4]:
resnet18 = ResNet18(3, ResBlock, outputs=10)
resnet18.to(torch.device("cuda:0" if torch.cuda.is_available() else "cpu"))
summary(resnet18, (3, 32, 32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 42, 32, 32]           1,176
       BatchNorm2d-2           [-1, 42, 32, 32]              84
              ReLU-3           [-1, 42, 32, 32]               0
            Conv2d-4           [-1, 42, 32, 32]          15,918
       BatchNorm2d-5           [-1, 42, 32, 32]              84
            Conv2d-6           [-1, 42, 32, 32]          15,918
       BatchNorm2d-7           [-1, 42, 32, 32]              84
          ResBlock-8           [-1, 42, 32, 32]               0
            Conv2d-9           [-1, 42, 32, 32]          15,918
      BatchNorm2d-10           [-1, 42, 32, 32]              84
           Conv2d-11           [-1, 42, 32, 32]          15,918
      BatchNorm2d-12           [-1, 42, 32, 32]              84
         ResBlock-13           [-1, 42, 32, 32]               0
           Conv2d-14           [-1, 84,

Defining function for training

In [5]:
# Training
train_accs = [] 


def train(epoch):
    print('\nEpoch: %d' % epoch)
    net.train()
    train_loss = 0
    correct = 0
    total = 0
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()
    train_acc=correct*100/total
    train_accs.append(train_acc)
    print("Train accuracy = "+str(correct)+"/"+str(total)+" = "+str(train_acc))



Defining function for testing

In [6]:
# Testing
test_accs = []
def test(epoch):
    global best_acc
    net.eval()
    test_loss = 0
    correct = 0
    total = 0
    best_acc = 0
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(testloader):
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = net(inputs)
            loss = criterion(outputs, targets)

            test_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
    acc = 100.*correct/total
    test_accs.append(acc)
    if acc > best_acc:
        best_acc = acc

    print("Test accuracy = " + str(correct) + "/" + str(total)+" = " + str(acc))

Defining function for plotting testing and training accuracies over epochs

In [7]:
#Plot
def plot_accs(a,b,epoch,value):
    plt.plot(a,label='Training accuracies')
    plt.plot(b,label='Test accuracies')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.xlim([0,40])
    plt.ylim([0,100])
    plt.annotate('Max Accuracy '+str(max(test_accs)) , xy=(epoch, value), xytext=(35, 10),
            arrowprops=dict(arrowstyle='->'),
            )
    plt.legend()
    plt.show()

Getting the data and setting up data loaders for training and testing.

In [8]:
# Data
print('==> Preparing data..')
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

trainset = torchvision.datasets.CIFAR10(
    root='./data', train=True, download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(
    trainset, batch_size=128, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(
    root='./data', train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(
    testset, batch_size=100, shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat', 'deer',
           'dog', 'frog', 'horse', 'ship', 'truck')


==> Preparing data..
Files already downloaded and verified
Files already downloaded and verified


In [9]:
def initialize():
    global criterion
    global device
    criterion = nn.CrossEntropyLoss()
    device = "cuda:0" if torch.cuda.is_available() else "cpu"
    resnet18 = ResNet18(3, ResBlock, outputs=10)
    resnet18.to(torch.device("cuda:0" if torch.cuda.is_available() else "cpu"))
    return resnet18

In [10]:
net=initialize()

Defining the optimizer with initial parameters : LR = 0.1, Momentum = 0.9 and Weight Decay = 5e-4

In [11]:
optimizer = optim.SGD(net.parameters(), lr=0.1,
                      momentum=0.9, weight_decay=5e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)

Train and test the model in each epoch

In [None]:
for epoch in range(40):
    train(epoch)
    test(epoch)
    scheduler.step()



Epoch: 0
Train accuracy = 18443/50000 = 36.886
Test accuracy = 4758/10000 = 47.58

Epoch: 1
Train accuracy = 27524/50000 = 55.048
Test accuracy = 5992/10000 = 59.92

Epoch: 2
Train accuracy = 33011/50000 = 66.022
Test accuracy = 6423/10000 = 64.23

Epoch: 3
Train accuracy = 36191/50000 = 72.382
Test accuracy = 7168/10000 = 71.68

Epoch: 4
Train accuracy = 38195/50000 = 76.39
Test accuracy = 7201/10000 = 72.01

Epoch: 5
Train accuracy = 39560/50000 = 79.12
Test accuracy = 7777/10000 = 77.77

Epoch: 6
Train accuracy = 40154/50000 = 80.308
Test accuracy = 7305/10000 = 73.05

Epoch: 7
Train accuracy = 40828/50000 = 81.656
Test accuracy = 7336/10000 = 73.36

Epoch: 8
Train accuracy = 41126/50000 = 82.252
Test accuracy = 8184/10000 = 81.84

Epoch: 9
Train accuracy = 41466/50000 = 82.932
Test accuracy = 7349/10000 = 73.49

Epoch: 10
Train accuracy = 41703/50000 = 83.406
Test accuracy = 7691/10000 = 76.91

Epoch: 11
Train accuracy = 41971/50000 = 83.942
Test accuracy = 7953/10000 = 79.53

Epo

Plot the accuracies

In [None]:
plot_accs(train_accs, test_accs, max(range(len(test_accs)), key = test_accs.__getitem__), max(test_accs))

In [None]:
print( " Best Accuracy = "+str(max(test_accs)))

Refreshing our model.

In [None]:
net=initialize()

Changing the optimizer with new parameters : LR = 0.1, Momentum = 0.5 and Weight Decay = 5e-4

In [None]:
optimizer = optim.SGD(net.parameters(), lr=0.1,
                      momentum=0.5, weight_decay=5e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)

Train and test the model in each epoch

In [None]:
train_accs,test_accs=[],[]
for epoch in range(40):
    train(epoch)
    test(epoch)
    scheduler.step()

Plot the accuracies

In [None]:
plot_accs(train_accs, test_accs, max(range(len(test_accs)), key = test_accs.__getitem__), max(test_accs))

Refreshing our model.

In [None]:
net=initialize()

Changing the optimizer with new parameters : LR = 0.1, Momentum = 0.0005 and Weight Decay = 5e-4

In [None]:
optimizer = optim.SGD(net.parameters(), lr=0.1,
                      momentum=0.0005, weight_decay=5e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)

Train and test the model in each epoch

In [None]:
train_accs,test_accs=[],[]
for epoch in range(40):
    train(epoch)
    test(epoch)
    scheduler.step()

Plot the accuracies

In [None]:
plot_accs(train_accs, test_accs, max(range(len(test_accs)), key = test_accs.__getitem__), max(test_accs))

In [None]:
print( " Best Accuracy = "+str(max(test_accs)))

Refreshing our model.

In [None]:
net=initialize()

Changing the optimizer with optimized parameters : LR = 0.1, Momentum = 0.05 and Weight Decay = 1e-3

In [None]:
optimizer = optim.SGD(net.parameters(), lr=0.1,
                      momentum=0.05, weight_decay=1e-3)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)

Train and test the model in each epoch

In [None]:
train_accs,test_accs=[],[]
for epoch in range(40):
    train(epoch)
    test(epoch)
    scheduler.step()

Plot the accuracies

In [None]:
plot_accs(train_accs, test_accs, max(range(len(test_accs)), key = test_accs.__getitem__), max(test_accs))

In [None]:
print( " Best Accuracy = "+str(max(test_accs)))