<a href="https://colab.research.google.com/github/jmill18/ECGR4106_Homework3/blob/main/ECGR4106_Homework3_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import torch
import torch
from torch import nn as nn
import torchvision
from torchvision import transforms
from torch.nn import functional as F
from matplotlib import pyplot as plt
import os

In [3]:
def init_cnn(module):
  if type(module) == nn.Linear or type(module) == nn.Conv2d:
    nn.init.xavier_uniform_(module.weight)

In [4]:
class Residual(nn.Module):
  def __init__(self, num_channels, use_1x1conv=False, strides=1):
    super().__init__()
    self.conv1 = nn.LazyConv2d(num_channels, kernel_size=3, padding=1,
                               stride=strides)
    self.conv2 = nn.LazyConv2d(num_channels, kernel_size=3, padding=1)
    if use_1x1conv:
      self.conv3 = nn.LazyConv2d(num_channels, kernel_size=1,
                                 stride=strides)
    else:
      self.conv3 = None
    self.bn1 = nn.LazyBatchNorm2d()
    self.bn2 = nn.LazyBatchNorm2d()

  def forward(self, X):
    Y = F.relu(self.bn1(self.conv1(X)))
    Y = self.bn2(self.conv2(Y))
    if self.conv3:
      X = self.conv3(X)
    Y += X
    return F.relu(Y)  

In [5]:
class ResNet(nn.Module):
  def __init__(self, arch, lr=0.1, num_classes=10):
    super(ResNet, self).__init__()

    self.net = nn.Sequential(self.b1())
    for i, b in enumerate(arch):
      self.net.add_module(f'b{i+2}', self.block(*b, first_block=(i==0)))
    self.net.add_module('last', nn.Sequential(
        nn.AdaptiveAvgPool2d((1, 1)), nn.Flatten(),
        nn.LazyLinear(num_classes)
    ))
    self.net.apply(init_cnn)
  def forward(self, x):
    return self.net(x);
  def b1(self):
    return nn.Sequential(
        nn.LazyConv2d(64, kernel_size=7, stride=2, padding=3),
        nn.LazyBatchNorm2d(), nn.ReLU(),
        nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
    )
  def block(self, num_residuals, num_channels, first_block=False):
    blk = []
    for i in range(num_residuals):
      if i == 0 and not first_block:
        blk.append(Residual(num_channels, use_1x1conv=True, strides=2))
      else:
        blk.append(Residual(num_channels))
    return nn.Sequential(*blk)

In [6]:
class ResNet18(ResNet):
  def __init__(self, lr=0.1, num_classes=10):
    super().__init__(((2, 64), (2, 128), (2, 256), (2, 512)),
                     lr, num_classes)
  def layer_summary(self, X_shape):
      x = torch.randn(*X_shape)
      for layer in self.net:
        x = layer(x)
        print(layer.__class__.__name__, 'output shape:\t', x.shape)

In [7]:
ResNet18().layer_summary((1, 1, 96, 96))



Sequential output shape:	 torch.Size([1, 64, 24, 24])
Sequential output shape:	 torch.Size([1, 64, 24, 24])
Sequential output shape:	 torch.Size([1, 128, 12, 12])
Sequential output shape:	 torch.Size([1, 256, 6, 6])
Sequential output shape:	 torch.Size([1, 512, 3, 3])
Sequential output shape:	 torch.Size([1, 10])


In [8]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [9]:
trans = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((64,64))
])
train_data = torchvision.datasets.CIFAR10(root='./data',
                                          train=True,
                                          transform=trans,
                                          download=True)
val_data = torchvision.datasets.CIFAR10(root='./data',
                                        train=False,
                                        transform=trans,
                                        download=True)
train_loader = torch.utils.data.DataLoader(dataset=train_data,
                                           batch_size=100,
                                           shuffle=True, num_workers=os.cpu_count())
val_loader = torch.utils.data.DataLoader(dataset=val_data,
                                         batch_size=100,
                                         shuffle=False)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [10]:
myResNet = ResNet18().to(device)
loss_crit = nn.CrossEntropyLoss()
optim = torch.optim.SGD(myResNet.parameters(), lr=0.1)

In [None]:
num_epochs = 30
valid_loss = [None] * 30
training_loss = [None] * 30
valid_acc = [None] * 30
index = 0

for epoch in range(num_epochs):
  n_correct_pred = 0
  n_samples = 0
  for i, (images, labels) in enumerate(train_loader):
    images = images.to(device)
    labels = labels.to(device)

    outputs = myResNet(images)
    loss = loss_crit(outputs, labels)

    optim.zero_grad()
    loss.backward()
    optim.step()

  with torch.no_grad():
    for j, (val_images, val_labels) in enumerate(val_loader):
      val_images = val_images.to(device)
      val_labels = val_labels.to(device)
      outputs = myResNet(val_images)
      val_loss = loss_crit(outputs, val_labels)
      _, pred = torch.max(outputs.data, 1)
      n_samples += val_labels.size(0)
      n_correct_pred += (pred == val_labels).sum().item()

  val_acc = (n_correct_pred / n_samples) * 100
  valid_loss[index] = val_loss.item()
  training_loss[index] = loss.item()
  valid_acc[index] = val_acc / 100.0
  index += 1
  if (epoch+1) % 2 == 0:
    print(f'Epoch [{epoch+1}/{num_epochs}], train loss: {loss.item():.4f}, val loss: {val_loss.item():.4f}, val_acc: {val_acc:.4f} %')

Epoch [2/30], train loss: 0.8453, val loss: 0.7670, val_acc: 69.6700 %
Epoch [4/30], train loss: 0.4279, val loss: 0.7182, val_acc: 76.0500 %
Epoch [6/30], train loss: 0.3741, val loss: 0.6674, val_acc: 77.5400 %
Epoch [8/30], train loss: 0.1535, val loss: 0.9395, val_acc: 77.9800 %
Epoch [10/30], train loss: 0.0528, val loss: 1.1494, val_acc: 78.7900 %
Epoch [12/30], train loss: 0.0801, val loss: 1.3491, val_acc: 78.3500 %
Epoch [14/30], train loss: 0.0167, val loss: 1.4215, val_acc: 78.9100 %
Epoch [16/30], train loss: 0.0139, val loss: 1.5912, val_acc: 79.0500 %
Epoch [18/30], train loss: 0.0251, val loss: 1.3471, val_acc: 79.0500 %
Epoch [20/30], train loss: 0.0790, val loss: 2.0281, val_acc: 78.8400 %


In [None]:
epochs = [None]*30
for i in range(30):
  epochs[i] = i+1
plt.rcParams["figure.figsize"] = [7.50, 3.50]
plt.rcParams["figure.autolayout"] = True

plt.title("Validation Accuracy/Loss and Training Loss vs No. Epochs")
plt.xlabel("Number of epochs")
plt.plot(epochs, valid_acc, label="Validation Accuracy", color="red")
plt.plot(epochs, training_loss, label="Training Loss", color="purple")
plt.plot(epochs, valid_loss, label="Validation Loss", color="Green")

plt.legend()
plt.show()