In [1]:
import torch
import torch.nn.functional as F
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import numpy as np

device = "cuda" if torch.cuda.is_available() else "cpu"

## Q1
Perform classification on FashionMNIST, fashion apparels dataset, using a pre-
trained model which is trained on MNIST handwritten digit classification dataset

In [2]:
class CNNClassifier(nn.Module):
    def forward(self, x):
        features = self.net(x)
        return self.classification_head(features.view(x.size(0), -1))

model = torch.load("models/mnist.pt")

  model = torch.load("models/mnist.pt")


In [3]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

train_data = datasets.FashionMNIST('../data/', train=True, download=True, transform=transform)
test_data = datasets.FashionMNIST('../data/', train=False, transform=transform)

train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
test_loader = DataLoader(test_data, batch_size=64)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

print("Before fine-tuning: ")
model.eval()
all_preds = []
all_labels = []
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

cm = confusion_matrix(all_labels, all_preds)
accuracy = np.sum(np.diag(cm)) / np.sum(cm)
print(f"Accuracy on FashionMNIST: {accuracy * 100}%")
print("Confusion Matrix: ")
print(cm)

epochs = 15
for epoch in range(epochs):
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    
    print(f"Epoch {epoch + 1}/{epochs}, Loss: {running_loss / len(train_loader)}")

model.eval()
all_preds = []
all_labels = []

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

cm = confusion_matrix(all_labels, all_preds)
accuracy = np.sum(np.diag(cm)) / np.sum(cm)
print(f"Accuracy on FashionMNIST: {accuracy * 100}%")
print("Confusion Matrix: ")
print(cm)

print(f"Number of params: {sum(p.numel() for p in model.parameters() if p.requires_grad)}")

Before fine-tuning: 
Accuracy on FashionMNIST: 5.6000000000000005%
Confusion Matrix: 
[[ 76  33 176 165   3  23  24   9 484   7]
 [541  53   3   6   1   1   9   0 377   9]
 [650  41  42  20   7  23  43   3 158  13]
 [102 202  64  41   5   8  70   2 504   2]
 [751  40  31   7   6   8  12   2 138   5]
 [ 49  21 718  31  12  94   9  24  31  11]
 [404  93  92  53   8  23  44  10 261  12]
 [ 39   4 904   2  26   4   9   1   4   7]
 [126   6 374  11  71  77  43  50 201  41]
 [ 34 307 544  12  14  19  31   8  29   2]]
Epoch 1/15, Loss: 0.5996397592302071
Epoch 2/15, Loss: 0.3552782526577332
Epoch 3/15, Loss: 0.30078433718540265
Epoch 4/15, Loss: 0.2633973638386106
Epoch 5/15, Loss: 0.2384587723428189
Epoch 6/15, Loss: 0.21544249394713944
Epoch 7/15, Loss: 0.19429577699205133
Epoch 8/15, Loss: 0.1773578849122691
Epoch 9/15, Loss: 0.1622372541501165
Epoch 10/15, Loss: 0.14773395219083024
Epoch 11/15, Loss: 0.1350079118284081
Epoch 12/15, Loss: 0.12482669489231826
Epoch 13/15, Loss: 0.1108402267

## Q2
Learn the AlexNet architecture and apply transfer learning to perform the classification
task. Using the pre-trained AlexNet, classify images from the cats_and_dogs_filtered
dataset downloaded from the below link. Finetune the classifier given in AlexNet as a two-
class classifier. Perform pre-processing of images as per the requirement.

In [4]:
import PIL.Image
import glob

3628800

In [7]:
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

train_data = datasets.ImageFolder("../data/cats_and_dogs_filtered/train", transform=transform)
test_data = datasets.ImageFolder("../data/cats_and_dogs_filtered/validation", transform=transform)

train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
test_loader = DataLoader(test_data, batch_size=64)

alexnet = models.alexnet(pretrained=True)
for param in alexnet.parameters():
    param.requires_grad = False
alexnet.classifier[6] = nn.Linear(alexnet.classifier[6].in_features, 2)
alexnet = alexnet.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(alexnet.parameters(), lr=0.001)

num_epochs = 10
alexnet.train()
for epoch in range(num_epochs):
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = alexnet(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Epoch {epoch + 1}, Loss: {running_loss / len(train_loader)}")

alexnet.eval()
all_preds = []
all_labels = []

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        
        outputs = alexnet(inputs)
        _, preds = torch.max(outputs, 1)
        
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

cm = confusion_matrix(all_labels, all_preds)
accuracy = np.sum(np.diag(cm)) / np.sum(cm)
print(f"Accuracy: {accuracy * 100}%")
print("Confusion Matrix: ")
print(cm)


Epoch 1, Loss: 0.20556043414399028
Epoch 2, Loss: 0.10424223530571908
Epoch 3, Loss: 0.06024207209702581
Epoch 4, Loss: 0.053561464592348784
Epoch 5, Loss: 0.04975983430631459
Epoch 6, Loss: 0.04368565291952109
Epoch 7, Loss: 0.03604949050350115
Epoch 8, Loss: 0.036727765342220664
Epoch 9, Loss: 0.027214169327635318
Epoch 10, Loss: 0.021784801632747985
Accuracy: 97.2%
Confusion Matrix: 
[[487  13]
 [ 15 485]]


## Q3
Implement check points in PyTorch by saving model state_dict, optimizer state_dict, epochs
and loss during training so that the training can be resumed at a later point. Also, illustrate
the use of check point to save the best found parameters during training

In [10]:
class CNNClassifier(nn.Module):
    def forward(self, x):
        features = self.net(x)
        return self.classification_head(features.view(x.size(0), -1))

model = torch.load("../data/models/mnist/mnist.pt")

  model = torch.load("../data/models/mnist/mnist.pt")


In [11]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

train_data = datasets.FashionMNIST('../data/', train=True, download=True, transform=transform)
test_data = datasets.FashionMNIST('../data/', train=False, transform=transform)

train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
test_loader = DataLoader(test_data, batch_size=64)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

def save_checkpoint(epoch, loss, file="../data/models/mnist/checkpoints/checkpoint.pt"):
    checkpoint = {
        'last_epoch': epoch,
        'last_loss': loss,
        'model_state': model.state_dict(),
        'optimizer_state': optimizer.state_dict()
    }
    torch.save(checkpoint, file)

epochs = 15
best_loss = float('inf')
for epoch in range(epochs):
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    avg_loss = running_loss / len(train_loader)
    print(f"Epoch {epoch + 1}/{epochs}, Loss: {avg_loss}")
    if (avg_loss < best_loss):
        best_loss = avg_loss
        save_checkpoint(epoch=epoch+1, loss=avg_loss)

model.eval()
all_preds = []
all_labels = []

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

cm = confusion_matrix(all_labels, all_preds)
accuracy = np.sum(np.diag(cm)) / np.sum(cm)
print(f"Accuracy on FashionMNIST: {accuracy * 100}%")
print("Confusion Matrix: ")
print(cm)

print(f"Number of params: {sum(p.numel() for p in model.parameters() if p.requires_grad)}")

Epoch 1/15, Loss: 0.6088587327330097
Epoch 2/15, Loss: 0.3635314711486734
Epoch 3/15, Loss: 0.30959605563805304
Epoch 4/15, Loss: 0.27144679097907504
Epoch 5/15, Loss: 0.24555007321462194
Epoch 6/15, Loss: 0.22110138991192332
Epoch 7/15, Loss: 0.20086195776616333
Epoch 8/15, Loss: 0.18517261763006004
Epoch 9/15, Loss: 0.1694931116844736
Epoch 10/15, Loss: 0.15215463409108965
Epoch 11/15, Loss: 0.14185281559777285
Epoch 12/15, Loss: 0.12909053981220767
Epoch 13/15, Loss: 0.1208172445632279
Epoch 14/15, Loss: 0.10817401131678588
Epoch 15/15, Loss: 0.09926505617872834
Accuracy on FashionMNIST: 88.72%
Confusion Matrix: 
[[834   1  33  13   2   4 105   0   8   0]
 [  2 973   1  12   2   0   8   0   2   0]
 [ 10   0 883  10  29   0  61   0   7   0]
 [ 28   9  26 854  26   1  48   0   6   2]
 [  2   0  77  25 771   0 118   1   6   0]
 [  0   0   0   1   1 958   0  27   4   9]
 [104   3  84  19  43   0 725   0  22   0]
 [  0   0   0   0   0  15   0 968   0  17]
 [  1   0   3   3   1   9   4   

In [12]:
# testing this checkpoint:
checkpoint = torch.load("../data/models/mnist/checkpoints/checkpoint.pt")
model.load_state_dict(checkpoint['model_state'])
optimizer.load_state_dict(checkpoint['optimizer_state'])
start = checkpoint['last_epoch']

for epoch in range(3):
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    
    print(f"Epoch {start + epoch + 1}/{epochs}, Loss: {running_loss / len(train_loader)}")

  checkpoint = torch.load("../data/models/mnist/checkpoints/checkpoint.pt")


Epoch 16/15, Loss: 0.0951754149195474
Epoch 17/15, Loss: 0.0856925794112085
Epoch 18/15, Loss: 0.07873296955707612
