## Q3

In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [2]:
BATCH_SIZE = 4
LEARNING_RATE = 0.001
EPOCHS = 2
PATH = './cifar_net.pth'

### Dataset

In [3]:
classes = (
  'plane',
  'car',
  'bird',
  'cat',
  'deer',
  'dog',
  'frog',
  'horse',
  'ship',
  'truck'
)

In [4]:
# Train
# Augmentation on train set is implemented
train_transform = transforms.Compose([
  transforms.ToTensor(),
  transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
  transforms.RandomHorizontalFlip(p=0.5),
  transforms.RandomVerticalFlip(p=0.5),
  transforms.RandomRotation(degrees=(0, 180)),
  transforms.RandomGrayscale(p=0.1),
])

trainset = torchvision.datasets.CIFAR10(
  root='./data',
  train=True,
  download=True,
  transform=train_transform
)

trainloader = torch.utils.data.DataLoader(
  trainset,
  batch_size=BATCH_SIZE,
  shuffle=True,
  num_workers=2
)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting ./data/cifar-10-python.tar.gz to ./data


In [5]:
# Test
# Test set does not have augmentation
test_transform = transforms.Compose([
  transforms.ToTensor(),
  transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])

testset = torchvision.datasets.CIFAR10(
  root='./data',
  train=False,
  download=True,
  transform=test_transform
)

testloader = torch.utils.data.DataLoader(
  testset,
  batch_size=BATCH_SIZE,
  shuffle=False,
  num_workers=2
)

Files already downloaded and verified


### Model

In [6]:
class Net(nn.Module):
  def __init__(self):
    super().__init__()
    # All Conv2d layers has the same arguments
    self.KERNEL_SIZE = 3
    self.STRIDE = 1
    self.PADDING = 1

    self.conv_layer = nn.Sequential(
      # 3*32*32 => 8*32*32
      nn.Conv2d(
        in_channels=3,
        out_channels=8,
        kernel_size=self.KERNEL_SIZE,
        stride=self.STRIDE,
        padding=self.PADDING
      ),
      nn.BatchNorm2d(8),
      nn.ReLU(),

      # 8*32*32 => 16*32*32
      nn.Conv2d(
        in_channels=8,
        out_channels=16,
        kernel_size=self.KERNEL_SIZE,
        stride=self.STRIDE,
        padding=self.PADDING
      ),
      nn.ReLU(),

      # 16*32*32 => 16*16*16
      nn.MaxPool2d(kernel_size=2, stride=2),

      # 16*16*16 => 32*16*16
      nn.Conv2d(
        in_channels=16,
        out_channels=32,
        kernel_size=self.KERNEL_SIZE,
        stride=self.STRIDE,
        padding=self.PADDING
      ),
      nn.BatchNorm2d(32),
      nn.ReLU(),

      # 32*16*16 => 64*16*16
      nn.Conv2d(
        in_channels=32,
        out_channels=64,
        kernel_size=self.KERNEL_SIZE,
        stride=self.STRIDE,
        padding=self.PADDING
      ),
      nn.ReLU(),

      # 64*16*16 => 64*8*8
      nn.MaxPool2d(kernel_size=2, stride=2),

      nn.Dropout2d(p=0.05),

      # 64*8*8 => 128*8*8
      nn.Conv2d(
        in_channels=64,
        out_channels=128,
        kernel_size=self.KERNEL_SIZE,
        stride=self.STRIDE,
        padding=self.PADDING
      ),
      nn.BatchNorm2d(128),
      nn.ReLU(),

      # 128*8*8 => 256*8*8
      nn.Conv2d(
        in_channels=128,
        out_channels=256,
        kernel_size=self.KERNEL_SIZE,
        stride=self.STRIDE,
        padding=self.PADDING
      ),
      nn.ReLU(),

      # 256*8*8 => 256*4*4
      nn.MaxPool2d(kernel_size=2, stride=2),
    )

    self.fc_layer = nn.Sequential(
      nn.Dropout2d(p=0.05),

      nn.Linear(in_features=256*4*4, out_features=512),
      nn.ReLU(),

      nn.Linear(in_features=512, out_features=256),
      nn.ReLU(),

      nn.Dropout(p=0.05),

      nn.Linear(in_features=256, out_features=10)
    )


  def forward(self, x):
    x = self.conv_layer(x)
    x = x.view(x.size(0), -1)
    x = self.fc_layer(x)
    return x

In [7]:
net = Net()

criterion = nn.CrossEntropyLoss()

optimizer = optim.Adam(
  net.parameters(),
  lr=LEARNING_RATE,
  betas=(0.9, 0.999),
  eps=1e-08,
  weight_decay=0,
  amsgrad=False
)

In [8]:
GPU = torch.cuda.is_available()
if GPU:
  net.cuda()
  print("GPU Enabled")

GPU Enabled


### Train

In [9]:
for epoch in range(EPOCHS):
  running_loss = 0.0

  for i, data in enumerate(trainloader, 0):
    # get the inputs; data is a list of [inputs, labels]
    inputs, labels = data

    # GPU support
    if GPU:
      inputs = inputs.cuda()
      labels = labels.cuda()

    # zero the parameter gradients
    optimizer.zero_grad()

    # forward + backward + optimize
    outputs = net(inputs)
    loss = criterion(outputs, labels)
    loss.backward()
    running_loss += loss.item()
    optimizer.step()

    # print statistics every 2000 mini-batches
    if i % 2000 == 1999:
      print('[%d, %5d] loss: %.3f' %
            (epoch + 1, i + 1, running_loss / 2000))
      running_loss = 0.0

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


[1,  2000] loss: 2.207
[1,  4000] loss: 2.140
[1,  6000] loss: 2.094
[1,  8000] loss: 2.061
[1, 10000] loss: 2.024
[1, 12000] loss: 1.977
[2,  2000] loss: 1.964
[2,  4000] loss: 1.947
[2,  6000] loss: 1.929
[2,  8000] loss: 1.912
[2, 10000] loss: 1.907
[2, 12000] loss: 1.873


In [10]:
torch.save(net.state_dict(), PATH)

### Evaluation on test set

In [11]:
net = Net()
net.load_state_dict(torch.load(PATH))

if GPU:
  net.cuda()

In [12]:
correct = 0
total = 0

with torch.no_grad():
  for data in testloader:
    inputs, labels = data

    if GPU:
      inputs = inputs.cuda()
      labels = labels.cuda()

    outputs = net(inputs)
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()
    
acc = correct / total * 100
print(f'Accuracy: {acc:2f}%')

Accuracy: 33.360000%


In [13]:
correct_pred = {classname: 0 for classname in classes}
total_pred = {classname: 0 for classname in classes}

with torch.no_grad():
  for data in testloader:
    inputs, labels = data

    if GPU:
      inputs = inputs.cuda()
      labels = labels.cuda()

    outputs = net(inputs)
    _, predictions = torch.max(outputs, 1)
    for label, prediction in zip(labels, predictions):
      if label == prediction:
        correct_pred[classes[label]] += 1
      total_pred[classes[label]] += 1

for classname, correct_count in correct_pred.items():
  accuracy = 100 * float(correct_count) / total_pred[classname]
  print(f"Accuracy for class {classname:5s} is: {accuracy:.1f}")

Accuracy for class plane is: 23.2
Accuracy for class car   is: 36.6
Accuracy for class bird  is: 19.9
Accuracy for class cat   is: 15.0
Accuracy for class deer  is: 33.8
Accuracy for class dog   is: 12.9
Accuracy for class frog  is: 51.8
Accuracy for class horse is: 38.2
Accuracy for class ship  is: 51.0
Accuracy for class truck is: 52.4
