## Section 1: Import Libraries and load cifar10 dataset.

In [1]:
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import random
import numpy as np
import matplotlib.pyplot as plt
import torch.utils.data as data
from torch.utils.data import DataLoader
from torchvision.datasets import CIFAR10
from sklearn.model_selection import KFold

In [2]:
def load_cifar10_data(batch_size):
# load both training and test datasets, and transform them to tensors.
  transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])])

  trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                          download=True, transform=transform)
  trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                            shuffle=True, num_workers=2)

  testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                        download=True, transform=transform)
  testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                          shuffle=False, num_workers=2)
  
  return trainset, trainloader, testloader

In [3]:
batch_size = 4
trainset, trainloader, testloader = load_cifar10_data(batch_size)
classes = ('plane', 'car', 'bird', 'cat',
            'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

## Check the dimensions of a batch:
for images, labels in trainloader:  
    print('Image batch dimensions:', images.shape)
    print('Image label dimensions:', (torch.tensor(labels)).shape)
    break

Files already downloaded and verified
Files already downloaded and verified
Image batch dimensions: torch.Size([4, 3, 224, 224])
Image label dimensions: torch.Size([4])


  print('Image label dimensions:', (torch.tensor(labels)).shape)


## Section 2: Load Pre-trained Models

### 2a) Alexnet

In [4]:
from torchvision import models

model1 = models.alexnet(pretrained=True)
print(model1)



AlexNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=9216, out_features=4096, bias=True)
 

In [5]:
# Freeze the parameters of all layers and modify the last layer to produce only 10 classes as output
for param in model1.parameters():
    param.requires_grad = False

In [6]:
# Replace the last layer of the AlexNet model with a new, trainable layer
num_features = model1.classifier[6].in_features
model1.classifier[6] = nn.Linear(num_features, 10)

### 2b) VGG 

In [7]:
model2 = models.vgg16(pretrained=True)
print(model2)



VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [8]:
# Freeze the parameters of all layers 
for param in model2.parameters():
    param.requires_grad = False

In [9]:
# Replace the last layer of the VGG-16 model with a new, trainable layer
num_features = model2.classifier[6].in_features
model2.classifier[6] = nn.Linear(num_features, 10) 

### 2c) Resnet

In [10]:
model3 = models.resnet18(pretrained=True)
print(model3)



ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [11]:
# Freeze the pre-trained layers
for param in model3.parameters():
    param.requires_grad = False

In [12]:
# Replace the last layer of the ResNet-18 model with a new, trainable layer
num_features = model3.fc.in_features
model3.fc = nn.Linear(num_features, 10)

## Section 3: Define optimizer and loss criterion

In [13]:
#TODO: Declare your hyperparameters here
#random_seed = random.seed(42)
learning_rate = 0.001
num_epochs = 1

optimizer1 = optim.Adam(model1.parameters(), lr=learning_rate)
optimizer2 = optim.Adam(model2.parameters(), lr=learning_rate)
optimizer3 = optim.Adam(model3.parameters(), lr=learning_rate)
# loss
loss_function = nn.CrossEntropyLoss()

In [14]:
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print('Device:', DEVICE)
# Set my pretrained model to be run on GPU
model1 = model1.to(DEVICE)
model2 = model2.to(DEVICE)
model3 = model3.to(DEVICE)

Device: cuda:0


## Section 4: Train the model

In [15]:
def train_model(trainloader, valid_loader, num_epochs, model, optimizer, loss_function):
    correct = 0
    total = 0
    for epoch in range(num_epochs):
      # Train the model
        model.train()
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data
            inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
            #Zero out the parameter gradients
            optimizer.zero_grad()

            # Forward pass
            outputs = model(inputs)
            loss = loss_function(outputs, labels)

            # Backward pass and optimization
            loss.backward()
            optimizer.step()

            # Evaluate the model on the validation set
        model.eval()
        with torch.no_grad():
            total = 0
            correct = 0
            for inputs, labels in valid_loader:
                inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
                              
                outputs = model(inputs)

            
            # Update statistics
            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        
            print(f"Epoch: {epoch+1}/{num_epochs}, Train Loss: {running_loss / ((i + 1) * batch_size):.4f}, Train Accuracy: {correct*100/total:.2f}%")


In [16]:
# Define the number of folds
num_folds = 10

# Initialize KFold object
kf = KFold(n_splits=num_folds, shuffle=True)

# Get indices for each fold
fold_indices = []
for train_indices, valid_indices in kf.split(trainset):
    fold_indices.append((train_indices, valid_indices))

In [17]:
def retrain(model, train_indices, valid_indices, fold_indices, optimizer, trainset):
    # Iterate over each fold and train the model
    for i, (train_indices, valid_indices) in enumerate(fold_indices):
    # Create data loaders for the current fold
        train_sampler = data.SubsetRandomSampler(train_indices)
        valid_sampler = data.SubsetRandomSampler(valid_indices)
        train_loader = data.DataLoader(trainset, batch_size=batch_size,num_workers=2, sampler = train_sampler)
        valid_loader = data.DataLoader(trainset, batch_size=batch_size,num_workers=2, sampler = valid_sampler)
        train_model(train_loader, valid_loader, num_epochs, model, optimizer, loss_function)

In [18]:
retrain(model1, train_indices, valid_indices, fold_indices, optimizer1, trainset)

Epoch: 1/1, Train Loss: 0.0003, Train Accuracy: 75.00%
Epoch: 1/1, Train Loss: 0.0001, Train Accuracy: 75.00%
Epoch: 1/1, Train Loss: 0.0000, Train Accuracy: 75.00%
Epoch: 1/1, Train Loss: 0.0004, Train Accuracy: 100.00%
Epoch: 1/1, Train Loss: 0.0002, Train Accuracy: 75.00%
Epoch: 1/1, Train Loss: 0.0002, Train Accuracy: 100.00%
Epoch: 1/1, Train Loss: 0.0002, Train Accuracy: 100.00%
Epoch: 1/1, Train Loss: 0.0001, Train Accuracy: 75.00%
Epoch: 1/1, Train Loss: 0.0001, Train Accuracy: 75.00%
Epoch: 1/1, Train Loss: 0.0005, Train Accuracy: 75.00%


In [19]:
retrain(model2, train_indices, valid_indices, fold_indices, optimizer2, trainset)

Epoch: 1/1, Train Loss: 0.0000, Train Accuracy: 75.00%
Epoch: 1/1, Train Loss: 0.0001, Train Accuracy: 50.00%
Epoch: 1/1, Train Loss: 0.0002, Train Accuracy: 100.00%
Epoch: 1/1, Train Loss: 0.0003, Train Accuracy: 75.00%
Epoch: 1/1, Train Loss: 0.0000, Train Accuracy: 75.00%
Epoch: 1/1, Train Loss: 0.0000, Train Accuracy: 100.00%
Epoch: 1/1, Train Loss: 0.0000, Train Accuracy: 100.00%
Epoch: 1/1, Train Loss: 0.0002, Train Accuracy: 75.00%
Epoch: 1/1, Train Loss: 0.0000, Train Accuracy: 75.00%
Epoch: 1/1, Train Loss: 0.0001, Train Accuracy: 100.00%


In [20]:
retrain(model3, train_indices, valid_indices, fold_indices, optimizer3, trainset)

Epoch: 1/1, Train Loss: 0.0002, Train Accuracy: 100.00%
Epoch: 1/1, Train Loss: 0.0001, Train Accuracy: 50.00%
Epoch: 1/1, Train Loss: 0.0001, Train Accuracy: 75.00%
Epoch: 1/1, Train Loss: 0.0001, Train Accuracy: 100.00%
Epoch: 1/1, Train Loss: 0.0002, Train Accuracy: 75.00%
Epoch: 1/1, Train Loss: 0.0003, Train Accuracy: 75.00%
Epoch: 1/1, Train Loss: 0.0000, Train Accuracy: 75.00%
Epoch: 1/1, Train Loss: 0.0001, Train Accuracy: 100.00%
Epoch: 1/1, Train Loss: 0.0001, Train Accuracy: 75.00%
Epoch: 1/1, Train Loss: 0.0001, Train Accuracy: 75.00%


## Section 5: Test the model

Write a function to test the model using testloader.

In [21]:
dataiter = iter(testloader)
images, labels = next(dataiter)

In [22]:
#Test for the entire dataset
def test_model(model, name):
    correct = 0
    total = 0
    # since we're not training, we don't need to calculate the gradients for our xputs
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            images, labels = images.to(DEVICE), labels.to(DEVICE)
            # calculate xputs by running images through the network
            xputs = model(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(xputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f'Accuracy of: {name} model on the 10000 test images: {100 * correct // total} %')

In [23]:
test_model(model1, "Alexnet")
test_model(model2, "VGG")
test_model(model3, "RESNET")

Accuracy of: Alexnet model on the 10000 test images: 78 %
Accuracy of: VGG model on the 10000 test images: 80 %
Accuracy of: RESNET model on the 10000 test images: 75 %


By the current accuracy reports we can say that the custom CNN built in Q1 (accuracy = 78%) performs better than RESNET architecture and at par with Alexnet. However, VGG model performs slightly better than the custom CNN built in Q1. Please note that the pre-trained models were only trained for one epoch (for time constraints) while the custom CNN was trained for 5 epochs.