In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, random_split
from tqdm import tqdm
import random
import numpy as np
from sklearn.metrics import accuracy_score

random_seed = 42
random.seed(random_seed)
np.random.seed(random_seed)
torch.manual_seed(random_seed)
torch.cuda.manual_seed_all(random_seed)

In [10]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
dataset = torchvision.datasets.CIFAR10(root='data', train=True,download=True, transform=transform)
test_dataset = torchvision.datasets.CIFAR10(root='data', train=False, download=True,transform=transform)

train_size, val_size = 0.9, 0.1
batch_size = 128

train_dataset, val_dataset = random_split(dataset, [train_size, val_size])
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


Files already downloaded and verified
Files already downloaded and verified


# MLP
Training MLP classifier

In [11]:
# Model
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(32 * 32 * 3, 1024)
        self.fc2 = nn.Linear(1024, 216)
        self.fc3 = nn.Linear(216, 128)
        self.fc4 = nn.Linear(128, 10)

    def forward(self, x):
        x = torch.flatten(x, 1)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        x = self.fc4(x)
        return x

# Early stopping
class EarlyStopping:
    def __init__(self, patience=5, delta=0, path='checkpoint.pt'):
        self.patience = patience
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.delta = delta
        self.path = path

    def __call__(self, score, model):
        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint( model)
        elif score < self.best_score - self.delta:
            self.best_score = score
            self.save_checkpoint(model)
            self.counter = 0
        else:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True

    def save_checkpoint(self, model):
        torch.save(model.state_dict(), self.path)

In [12]:
# Training MLP
model = MLP()
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Utilising",device)
model = model.to(device)
model_path = 'models/mlp.pt'
early_stopping = EarlyStopping(patience=5, path=model_path)

print("Training........")
for epoch_num in range(50):
    model.train()
    epoch_loss = 0
    progress_bar = tqdm(train_loader, desc=f"Epoch {epoch_num}", total=len(train_loader))

    for batch_num, (input, labels) in enumerate(progress_bar):
        (input, labels) = (input.to(device), labels.to(device))
        pred = model(input)
        loss = loss_fn(pred, labels)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        epoch_loss += loss.item()
        progress_bar.set_postfix({'loss': loss.item()})

    epoch_loss /= len(train_loader)
    model.eval()

    with torch.no_grad():
        val_loss = 0
        for batch_num, (words, tags) in enumerate(val_loader):
            (words, tags) = (words.to(device), tags.to(device))
            pred = model(words)
            val_loss += loss_fn(pred, tags)
        val_loss /= len(val_loader)

    if epoch_num % 1 == 0:
        print(f"Epoch {epoch_num}, Train Loss: {epoch_loss}, Val Loss: {val_loss}")

    early_stopping(val_loss, model)
    if early_stopping.early_stop:
        print("Early stopping....")
        break
print("Best val loss is",early_stopping.best_score)

Utilising cuda
Training........


Epoch 0: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████| 352/352 [00:15<00:00, 23.15it/s, loss=1.43]


Epoch 0, Train Loss: 1.6627673157914118, Val Loss: 1.5720328092575073


Epoch 1: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████| 352/352 [00:15<00:00, 23.35it/s, loss=1.35]


Epoch 1, Train Loss: 1.4414105567742477, Val Loss: 1.46636164188385


Epoch 2: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████| 352/352 [00:15<00:00, 23.35it/s, loss=1.18]


Epoch 2, Train Loss: 1.3256484534252773, Val Loss: 1.4156811237335205


Epoch 3: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████| 352/352 [00:15<00:00, 23.32it/s, loss=1.58]


Epoch 3, Train Loss: 1.226941398937594, Val Loss: 1.4045324325561523


Epoch 4: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████| 352/352 [00:15<00:00, 23.41it/s, loss=1.05]


Epoch 4, Train Loss: 1.1334933440454982, Val Loss: 1.3792709112167358


Epoch 5: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████| 352/352 [00:15<00:00, 23.39it/s, loss=0.853]


Epoch 5, Train Loss: 1.0518058154054664, Val Loss: 1.431624412536621


Epoch 6: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████| 352/352 [00:15<00:00, 23.41it/s, loss=0.878]


Epoch 6, Train Loss: 0.9649936231699857, Val Loss: 1.4155257940292358


Epoch 7: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████| 352/352 [00:15<00:00, 23.32it/s, loss=0.949]


Epoch 7, Train Loss: 0.8866768653758548, Val Loss: 1.4494661092758179


Epoch 8: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████| 352/352 [00:14<00:00, 23.73it/s, loss=0.975]


Epoch 8, Train Loss: 0.8151913706890561, Val Loss: 1.5312902927398682


Epoch 9: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████| 352/352 [00:14<00:00, 23.70it/s, loss=0.965]


Epoch 9, Train Loss: 0.7438181504945863, Val Loss: 1.5877808332443237
Early stopping....
Best val loss is tensor(1.3793, device='cuda:0')


In [14]:
# Testing MLP
print("Testing......")
model = MLP()
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Utilising",device)
model = model.to(device)
model_path = 'models/mlp.pt'
state_dict = torch.load(model_path)
model.load_state_dict(state_dict)
model.eval()
with torch.no_grad():
    all_predictions = []
    all_ground_truth = []
    for batch_num, (input, labels) in enumerate(test_loader):
        (input, labels) = (input.to(device), labels.to(device))
        pred = model(input)
        predictions = torch.argmax(pred, axis=-1).cpu().numpy()
        ground_truth = labels.cpu().numpy()
        all_predictions.append(predictions)
        all_ground_truth.append(ground_truth)
    all_predictions = np.concatenate(all_predictions)
    all_ground_truth = np.concatenate(all_ground_truth)
    accuracy_test = accuracy_score(all_ground_truth, all_predictions)
print(f"Test set accuracy {accuracy_test}")

Testing......
Utilising cuda
Test set accuracy 0.5337


# Observation
1. Best Train loss is 1.1334 and Best Val loss is 1.3793
2. Test accuracy is 53.37%
3. The loss is not converged to a better minima as a result accuracy is less.
4. This is because MLP has many parameters when flattened the images, which requires lot of data to perform well. That will be reason for lower performance.

# CNN
Training CNN classifier

In [17]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64 * 8 * 8, 512)
        self.fc2 = nn.Linear(512, 10)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 64 * 8 * 8)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x



In [18]:
model = CNN()
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Utilising",device)
model = model.to(device)
model_path = 'models/cnn.pt'
early_stopping = EarlyStopping(patience=5, path=model_path)
print("Training........")

for epoch_num in range(50):
    model.train()
    epoch_loss = 0
    progress_bar = tqdm(train_loader, desc=f"Epoch {epoch_num}", total=len(train_loader))

    for batch_num, (input, labels) in enumerate(progress_bar):
        (input, labels) = (input.to(device), labels.to(device))
        pred = model(input)
        loss = loss_fn(pred, labels)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        epoch_loss += loss.item()
        progress_bar.set_postfix({'loss': loss.item()})

    epoch_loss /= len(train_loader)
    model.eval()

    with torch.no_grad():
        val_loss = 0
        for batch_num, (words, tags) in enumerate(val_loader):
            (words, tags) = (words.to(device), tags.to(device))
            pred = model(words)
            val_loss += loss_fn(pred, tags)
        val_loss /= len(val_loader)

    if epoch_num % 1 == 0:
        print(f"Epoch {epoch_num}, Train Loss: {epoch_loss}, Val Loss: {val_loss}")

    early_stopping(val_loss, model)
    if early_stopping.early_stop:
        print("Early stopping....")
        break
print("Best val loss is",early_stopping.best_score)

Utilising cuda
Training........


Epoch 0: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████| 352/352 [00:15<00:00, 22.15it/s, loss=1.35]


Epoch 0, Train Loss: 1.3910555086013945, Val Loss: 1.1970425844192505


Epoch 1: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████| 352/352 [00:15<00:00, 22.25it/s, loss=0.807]


Epoch 1, Train Loss: 1.0022837788882581, Val Loss: 1.0100511312484741


Epoch 2: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████| 352/352 [00:15<00:00, 22.26it/s, loss=0.678]


Epoch 2, Train Loss: 0.821267903867093, Val Loss: 0.9200318455696106


Epoch 3: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████| 352/352 [00:15<00:00, 22.24it/s, loss=0.554]


Epoch 3, Train Loss: 0.6853613330220635, Val Loss: 0.850812554359436


Epoch 4: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████| 352/352 [00:15<00:00, 22.19it/s, loss=0.391]


Epoch 4, Train Loss: 0.5412718580019745, Val Loss: 0.861900806427002


Epoch 5: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████| 352/352 [00:15<00:00, 22.27it/s, loss=0.578]


Epoch 5, Train Loss: 0.4100508125160228, Val Loss: 0.8703452944755554


Epoch 6: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████| 352/352 [00:15<00:00, 22.28it/s, loss=0.387]


Epoch 6, Train Loss: 0.28367172786965966, Val Loss: 0.9767342805862427


Epoch 7: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████| 352/352 [00:15<00:00, 22.27it/s, loss=0.23]


Epoch 7, Train Loss: 0.17630035373043607, Val Loss: 1.1386831998825073


Epoch 8: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████| 352/352 [00:15<00:00, 22.27it/s, loss=0.108]


Epoch 8, Train Loss: 0.1064487252574922, Val Loss: 1.1834677457809448
Early stopping....
Best val loss is tensor(0.8508, device='cuda:0')


In [20]:
# Testing CNN
print("Testing......")
model = CNN()
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Utilising",device)
model = model.to(device)
model_path = 'models/cnn.pt'
state_dict = torch.load(model_path)
model.load_state_dict(state_dict)
model.eval()
with torch.no_grad():
    all_predictions = []
    all_ground_truth = []
    for batch_num, (input, labels) in enumerate(test_loader):
        (input, labels) = (input.to(device), labels.to(device))
        pred = model(input)
        predictions = torch.argmax(pred, axis=-1).cpu().numpy()
        ground_truth = labels.cpu().numpy()
        all_predictions.append(predictions)
        all_ground_truth.append(ground_truth)
    all_predictions = np.concatenate(all_predictions)
    all_ground_truth = np.concatenate(all_ground_truth)
    accuracy_test = accuracy_score(all_ground_truth, all_predictions)
print(f"Test set accuracy {accuracy_test}")


Testing......
Utilising cuda
Test set accuracy 0.7091


# Observations
1. Best train loss is 0.6853 and Best validation loss is 0.8508
2. Test accuracy is 70.91%, which is 17.54% gretaer than MLP. Which is very signifcant improvement.
3. When compared with MLP, CNNs have fewer parameters and the ability of learning low level feature maps like patterns, edges etc.. are advantage of CNN and can obtain good performance with the same data than MLP.
4. Leaning of several spacial low level feature maps such as edges, small fileters of shapes, etc required for the task by learning jointly helps CNN to perform better than MLP.

# Transfer Learning using VGG-16


In [22]:
import torchvision.models as models
vgg16 = models.vgg16(weights='DEFAULT')

In [23]:
num_features = vgg16.classifier[6].in_features
vgg16.classifier[6] = nn.Linear(num_features, 10)

In [24]:
model = vgg16

loss_fn = torch.nn.CrossEntropyLoss()
# optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Utilising",device)
model = model.to(device)
model_path = 'models/vgg16.pt'
early_stopping = EarlyStopping(patience=5, path=model_path)

print("Training........")
for epoch_num in range(50):
    model.train()
    epoch_loss = 0
    progress_bar = tqdm(train_loader, desc=f"Epoch {epoch_num}", total=len(train_loader))

    for batch_num, (input, labels) in enumerate(progress_bar):
        (input, labels) = (input.to(device), labels.to(device))
        pred = model(input)
        loss = loss_fn(pred, labels)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        epoch_loss += loss.item()
        progress_bar.set_postfix({'loss': loss.item()})

    epoch_loss /= len(train_loader)
    model.eval()

    with torch.no_grad():
        val_loss = 0
        for batch_num, (words, tags) in enumerate(val_loader):
            (words, tags) = (words.to(device), tags.to(device))
            pred = model(words)
            val_loss += loss_fn(pred, tags)
        val_loss /= len(val_loader)

    if epoch_num % 1 == 0:
        print(f"Epoch {epoch_num}, Train Loss: {epoch_loss}, Val Loss: {val_loss}")

    early_stopping(val_loss, model)
    if early_stopping.early_stop:
        print("Early stopping....")
        break
print("Best val loss is",early_stopping.best_score)

Utilising cuda
Training........


Epoch 0: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████| 352/352 [00:48<00:00,  7.19it/s, loss=0.544]


Epoch 0, Train Loss: 0.8615189748392864, Val Loss: 0.5843836069107056


Epoch 1: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████| 352/352 [00:49<00:00,  7.18it/s, loss=0.489]


Epoch 1, Train Loss: 0.5022154474122957, Val Loss: 0.49446535110473633


Epoch 2: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████| 352/352 [00:49<00:00,  7.15it/s, loss=0.296]


Epoch 2, Train Loss: 0.39050354749302973, Val Loss: 0.47516295313835144


Epoch 3: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████| 352/352 [00:49<00:00,  7.15it/s, loss=0.371]


Epoch 3, Train Loss: 0.31310246825556864, Val Loss: 0.4596821963787079


Epoch 4: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████| 352/352 [00:49<00:00,  7.15it/s, loss=0.19]


Epoch 4, Train Loss: 0.24238071899691765, Val Loss: 0.455503910779953


Epoch 5: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████| 352/352 [00:49<00:00,  7.18it/s, loss=0.0877]


Epoch 5, Train Loss: 0.19001077835194088, Val Loss: 0.4467201828956604


Epoch 6: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████| 352/352 [00:49<00:00,  7.14it/s, loss=0.162]


Epoch 6, Train Loss: 0.14511817769909446, Val Loss: 0.473048597574234


Epoch 7: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████| 352/352 [00:49<00:00,  7.17it/s, loss=0.0935]


Epoch 7, Train Loss: 0.11024726475906474, Val Loss: 0.5000740885734558


Epoch 8: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████| 352/352 [00:49<00:00,  7.16it/s, loss=0.0758]


Epoch 8, Train Loss: 0.08315005591827106, Val Loss: 0.5318211913108826


Epoch 9: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████| 352/352 [00:49<00:00,  7.15it/s, loss=0.0862]


Epoch 9, Train Loss: 0.061156463825186205, Val Loss: 0.6018524765968323


Epoch 10: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████| 352/352 [00:49<00:00,  7.16it/s, loss=0.0598]


Epoch 10, Train Loss: 0.053251084726070985, Val Loss: 0.5924177765846252
Early stopping....
Best val loss is tensor(0.4467, device='cuda:0')


In [25]:
# Testing VGG
print("Testing......")
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Utilising",device)
model = vgg16
model = model.to(device)
model_path = 'models/vgg16.pt'
state_dict = torch.load(model_path)
model.load_state_dict(state_dict)
model.eval()
with torch.no_grad():
    all_predictions = []
    all_ground_truth = []
    for batch_num, (input, labels) in enumerate(test_loader):
        (input, labels) = (input.to(device), labels.to(device))
        pred = model(input)
        predictions = torch.argmax(pred, axis=-1).cpu().numpy()


        ground_truth = labels.cpu().numpy()
        all_predictions.append(predictions)
        all_ground_truth.append(ground_truth)
    all_predictions = np.concatenate(all_predictions)
    all_ground_truth = np.concatenate(all_ground_truth)
    accuracy_test = accuracy_score(all_ground_truth, all_predictions)
print(f"Test set accuracy {accuracy_test}")

Testing......
Utilising cuda
Test set accuracy 0.8636


# Observations
1. Best train loss is 0.1900 and best validation loss is 0.4467, these losses are far better than MLP and CNN.
2. Test accuracy is 86.36%, which is 15.45% better than CNN and 32.99% better than MLP
3. Transfer learning can benefit in better parameter initialisation. As the base model is trained with large dataset for similar task which can boost the convergence speed as well as performance with little finetuning data. And also the model already has the knowledge of capturing essential features, so can perform well with little finetuning data for relevant task.
4. Time required is same/lesser than MLP and CNN, but reached loss which is far better than MLP and CNN.