In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        os.path.join(dirname, filename)
        #print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [3]:
import os
from os import listdir
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set()
from PIL import Image
#import cv2
from sklearn.model_selection import train_test_split

import glob
import shutil

import torch 
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import TensorDataset, DataLoader, Dataset
import torch.optim as optim
from torch.optim import Adam
from torch.autograd import Variable
import torchvision.datasets as dset
import torchvision.utils as vutils
from torch.utils.data.sampler import SubsetRandomSampler

import time
import copy
from tqdm import tqdm_notebook as tqdm

from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=FutureWarning)
import pathlib


print('All libraries are imported successfully')

ModuleNotFoundError: No module named 'torch'

**Parameter Initialization**

In [None]:
image_size = 32 #image size 32*32
workers = 2  # Number of workers required for data loading
ngpu = 1 # Number of GPUs available. Use 0 for CPU mode.
num_epochs=25
batch_size = 64  #number of samples in each batch.
#train data path 
train_rootpath="/kaggle/input/deep-learning-for-msc-coursework-2022/train/train/"
#test data path
test_rootpath="/kaggle/input/deep-learning-for-msc-coursework-2022/test/"
# Select the device to run on
device = torch.device("cuda:0" if (torch.cuda.is_available() and ngpu > 0) else "cpu")
shuffle=True # data to be shuffled whenever needed
manualSeed = 499 # Set random seed for reproducibility
#lr=0.001
lr=0.000075
#optimizer = optim.SGD(model.parameters(), lr=lr)

**Data Augmentation**
To improve model accuracy, performed various augmentation techniques on images, like converted images to tensors. rotate the images to 45 degree randomly. Normalize the image to makes model training stable and fast and better convergence. Increased image brightness by 0.6. Random images are horizontally flipped. 

In [None]:
#image augmentation; convert image to tensors.Normalizing an image to makes model training stable and fast
transform = transforms.Compose([
                            transforms.ColorJitter(brightness=0.6),
                            transforms.ToTensor(),
                            #transforms.Resize((64,64)),
                            transforms.RandomHorizontalFlip(p=0.5),
                            transforms.Normalize((0.5),(0.5)),
                            transforms.RandomRotation(degrees=45)
    ])

**Loading Training Dataset**
Cells are not distributed equally: 'Cancer': 500, 'Connective': 500, 'Immune': 500, 'Normal': 200. 500 number of occurrences found with cell types - Cancer, Connective, Immune; however Normal cells are very less in number. This resulted unbalanced training set.

In [None]:
 # load the training dataset
train_dataset = dset.ImageFolder(root=train_rootpath,transform=transform)

idx2class = {v: k for k, v in train_dataset.class_to_idx.items()}

def get_class_distribution(dataset_obj):
    count_dict = {k:0 for k,v in dataset_obj.class_to_idx.items()}
    
    for element in dataset_obj:
        y_lbl = element[1]
        y_lbl = idx2class[y_lbl]
        count_dict[y_lbl] += 1
            
    return count_dict

train_data_dict=get_class_distribution(train_dataset)
print(train_data_dict)
names = list(train_data_dict.keys())
values = list(train_data_dict.values())

plt.bar(range(len(train_data_dict)), values, tick_label=names)
plt.title("Training Set label distribution : Before splitting")
plt.xlabel("Cell Type")
plt.ylabel("Number of cells")
plt.show()

**Dividing Training set into training and validation set** Randomly splitted training set into 2 (90% training set and 10% validation set). Training set contains 1530 and validation set contains 170 cell images. Dataloaders are initialised. Here shuffle=true is not used because I already used sampler and sampler and shuffle are mutually exclusive

In [None]:
num_train = len(train_dataset)
indices = list(range(num_train))
split = int(np.floor(0.1 * num_train))

if shuffle:
    np.random.seed(manualSeed)
    np.random.shuffle(indices)

train_idx, valid_idx = indices[split:], indices[:split]
train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)

train_dataloader = torch.utils.data.DataLoader(
        train_dataset, batch_size=batch_size, sampler=train_sampler,
        num_workers=workers
    )
val_dataloader = torch.utils.data.DataLoader(
        train_dataset, batch_size=batch_size, sampler=valid_sampler,
        num_workers=workers
    )
print("Size of train set :", len(train_dataloader.sampler))
print("Size of validation set :", len(val_dataloader.sampler))

In [None]:
def get_class_distribution_loaders(dataloader_obj, dataset_obj):
    count_dict = {k:0 for k,v in dataset_obj.class_to_idx.items()}

    for _,label_id in dataloader_obj:
        for idx in label_id:
            y_idx = idx.item()
            y_lbl = idx2class[y_idx]
            count_dict[str(y_lbl)] += 1
            
    return count_dict

**Distribution of Labels in Training and Validation Set** Validation set also contains unbalanced data as in Training set. Number of occurrences in each label except 'Normal' is more or less similar. 'Cancer': 48, 'Connective': 54, 'Immune': 50, 'Normal': 18 

In [None]:
train_data_dict = get_class_distribution_loaders(train_dataloader, train_dataset)
val_data_dict = get_class_distribution_loaders(val_dataloader, train_dataset)
print(train_data_dict)
print(val_data_dict)
print()

train_names = list(train_data_dict.keys())
train_values = list(train_data_dict.values())

val_names = list(val_data_dict.keys())
val_values = list(val_data_dict.values())

fig, (axes1,axes2) = plt.subplots(1,2, figsize=(10,5))

axes1.bar(range(len(train_data_dict)), train_values, tick_label=train_names, color=[ 'red', 'green', 'blue', 'cyan'])
axes1.set_title("Training Set label distribution : After splitting")
axes1.set_xlabel("Type of Cell")
axes1.set_ylabel("Number of cells")

axes2.bar(range(len(val_data_dict)), val_values, tick_label=val_names, color=[ 'red', 'green', 'blue', 'cyan'])
axes2.set_title("Validation Set label distribution : After splitting")
axes2.set_xlabel("Type of Cell")
axes2.set_ylabel("Number of cells")

plt.show()


**Plotting some of the training images**

In [None]:
real_batch = next(iter(train_dataloader))
plt.figure(figsize=(8,8))
plt.axis("off")
plt.title("Training Images")
plt.imshow(np.transpose(vutils.make_grid(real_batch[0].to(device)[:64], padding=2, normalize=True).cpu(),(1,2,0)))

**Statistical analysis of training set**

In [None]:
dataiterator = iter(train_dataloader)
images,labels = dataiterator.next()
print(torch.min(images),torch.max(images))
print(labels.shape)
print(images.shape)
#Finding unique labels in train dataset
root=pathlib.Path(train_rootpath)
tr_classes=sorted([j.name.split('/')[-1] for j in root.iterdir()])
tr_classes

**Plotting some of the Validation images**

In [None]:

real_batch = next(iter(val_dataloader))
plt.figure(figsize=(8,8))
plt.axis("off")
plt.title("Validation Images")
plt.imshow(np.transpose(vutils.make_grid(real_batch[0].to(device)[:64], padding=2, normalize=True).cpu(),(1,2,0)))

**Statistical Analysis of Validation Set**

In [None]:
dataiterator = iter(val_dataloader)
images,labels = dataiterator.next()
print(torch.min(images),torch.max(images))
print(labels.shape)
print(images.shape)
val_names #unique labels in validation dataset

**Plotting some of the Test Set Images**

In [None]:
test_data = dset.ImageFolder(root=test_rootpath,transform=transform)
test_dataloader = torch.utils.data.DataLoader(dataset=test_data, batch_size=batch_size, shuffle = True)
real_batch = next(iter(test_dataloader))
plt.figure(figsize=(8,8))
plt.axis("off")
plt.title("Test Images")
plt.imshow(np.transpose(vutils.make_grid(real_batch[0].to(device)[:64], padding=2, normalize=True).cpu(),(1,2,0)))

**Convolutional Network Architecture**  I craeted 7 convolutional block with batch size 64

In [None]:
class ConvNet(nn.Module):
    def __init__(self,num_classes=4):
        super(ConvNet,self).__init__()
        
        # Defining first 2D convolution layer
        #((w-f+2P)/s) +1
        #Input shape= (64,3,32,32)
        self.conv1=nn.Conv2d(in_channels=3,out_channels=12,kernel_size=3,stride=1,padding=1)
        #calculation (32-3+2*1)/1 +1 = 32
        #Shape= (256,12,32,32)
        self.bn1=nn.BatchNorm2d(num_features=12)# added batch normalization
        #Shape= (64,12,32,32)
        self.relu1=nn.ReLU()
        #Shape= (64,12,32,32)
        
        self.pool=nn.MaxPool2d(kernel_size=2)
        #Reduce the image size by factor 2
        #Shape= (64,12,16,16)
        
        # Defining second 2D convolution layer
        self.conv2=nn.Conv2d(in_channels=12,out_channels=20,kernel_size=3,stride=1,padding=1)
        #Shape= (64,20,16,16)
        self.relu2=nn.ReLU()
        #Shape= (64,20,16,16)
        
        # Defining third 2D convolution layer
        self.conv3=nn.Conv2d(in_channels=20,out_channels=32,kernel_size=3,stride=1,padding=1)
        #Shape= (64,32,16,16)
        self.bn3=nn.BatchNorm2d(num_features=32)# added batch normalization
        #Shape= (64,32,16,16)
        self.relu3=nn.ReLU()
        #Shape= (64,32,16,16)  
        
         # Defining fourth 2D convolution layer
        self.conv4=nn.Conv2d(in_channels=32,out_channels=64,kernel_size=3,stride=1,padding=1)
        #Shape= (64,64,16,16)
        self.bn4=nn.BatchNorm2d(num_features=64)# added batch normalization
        #Shape= (64,64,16,16)
        self.relu4=nn.ReLU()
        #Shape= (64,64,16,16) 
        
        # Defining fifth 2D convolution layer
        self.conv5=nn.Conv2d(in_channels=64,out_channels=128,kernel_size=3,stride=1,padding=1)
        #Shape= (64,128,16,16)
        self.bn5=nn.BatchNorm2d(num_features=128)# added batch normalization
        #Shape= (64,128,16,16)
        self.relu5=nn.ReLU()
        #Shape= (64,128,16,16) 
        
        # Defining sixth 2D convolution layer
        self.conv6=nn.Conv2d(in_channels=128,out_channels=256,kernel_size=3,stride=1,padding=1)
        #Shape= (64,256,16,16)
        self.bn6=nn.BatchNorm2d(num_features=256)# added batch normalization
        #Shape= (64,256,16,16)
        self.relu6=nn.ReLU()
        #Shape= (64,256,16,16) 
        
        # Defining seventh 2D convolution layer
        self.conv7=nn.Conv2d(in_channels=256,out_channels=324,kernel_size=3,stride=1,padding=1)
        #Shape= (64,324,16,16)
        self.bn7=nn.BatchNorm2d(num_features=324)# added batch normalization
        #Shape= (64,324,16,16)
        self.relu7=nn.ReLU()
        #Shape= (64,324,16,16) 

        
        self.fc=nn.Linear(in_features=324 * 16 * 16,out_features=num_classes)
           
        #Feed forwad function
    def forward(self,input):
        output=self.conv1(input)
        output=self.bn1(output)
        output=self.relu1(output)
            
        output=self.pool(output)
            
        output=self.conv2(output)
        output=self.relu2(output)
            
        output=self.conv3(output)
        output=self.bn3(output)
        output=self.relu3(output)
        
        output=self.conv4(output)
        output=self.bn4(output)
        output=self.relu4(output)
        
        output=self.conv5(output)
        output=self.bn5(output)
        output=self.relu5(output)
        
        output=self.conv6(output)
        output=self.bn6(output)
        output=self.relu6(output)
        
        output=self.conv7(output)
        output=self.bn7(output)
        output=self.relu7(output)
    
        #Above output will be in matrix form, with shape (64,324,16,16)
        output=output.view(-1,324*16*16)
        output=self.fc(output)
            
        return output

**Hyperparamters Tuning** Two optimizer tried - Adam and SGD and observed better model performance with Adam. and within Adam optimizer lr value fine tuned.

In [None]:
model=ConvNet(num_classes=4).to(device)
#Optmizer and loss function
optimizer=Adam(model.parameters(),lr=lr,weight_decay=0.0001)
#optimizer = optim.SGD(model.parameters(), lr=0.001)

loss_function=nn.CrossEntropyLoss() 

**Model Training for 25 epochs**

In [None]:
#Model training and saving best model

best_accuracy=0.0
train_accuracy_arr=[]
train_loss_arr=[]
val_loss_arr=[]
val_accuracy_arr=[]

for epoch in range(num_epochs):
    
    #Evaluation and training on training dataset
    model.train()
    train_accuracy=0.0
    train_loss=0.0
    for i, (images,labels) in enumerate(train_dataloader):
        if torch.cuda.is_available():
            images=Variable(images.cuda())
            labels=Variable(labels.cuda())
        optimizer.zero_grad()
        outputs=model(images)
        loss=loss_function(outputs,labels)
        loss.backward()
        optimizer.step()
    
        train_loss +=loss.item()*images.size(0)
        _,prediction=torch.max(outputs.data,1)
        train_accuracy+=int(torch.sum(prediction==labels.data))
        
    train_accuracy=train_accuracy/len(train_dataloader.sampler)
    train_loss=train_loss/len(train_dataloader.sampler)
    train_accuracy_arr.append(train_accuracy)
    train_loss_arr.append(train_loss)
    print(train_loss)
    
    # Evaluation on validation dataset
    model.eval()
    val_accuracy=0.0
    val_loss=0.0
    for i, (images,labels) in enumerate(val_dataloader):
        if torch.cuda.is_available():
            images=Variable(images.cuda())
            labels=Variable(labels.cuda())
            
        outputs=model(images)
        loss=loss_function(outputs,labels)
        _,prediction=torch.max(outputs.data,1)
        val_accuracy+=int(torch.sum(prediction==labels.data))
        val_loss  +=loss.item()*images.size(0)
    
    val_accuracy=val_accuracy/len(val_dataloader.sampler)
    val_loss=val_loss/len(val_dataloader.sampler)
    val_loss_arr.append(val_loss)
    val_accuracy_arr.append(val_accuracy)
    
    print('Epoch: '+str(epoch)+' Train Loss: '+str(train_loss)+' Train Accuracy: '+str(train_accuracy)+' Validation Accuracy: '+str(val_accuracy))
    
    #Save the best model with maximum accuracy 
    if val_accuracy>best_accuracy:
        torch.save(model.state_dict(),'best_checkpoint.model')
        best_accuracy=val_accuracy
    
print("\nBest Accuracy is :",best_accuracy)

**Retrieve the Best Model for label prediction**

In [None]:
checkpoint = torch.load('best_checkpoint.model')
model = ConvNet(num_classes=4)
model.load_state_dict(checkpoint)

**Confusion Matrix : Training set** It shows how a classifier is confused while making predictions. The number of True Positives (TPs), False Positives (FPs), True Negatives (TNs) and False Negatives (FNs) for model’s predictions are represented in a N x N matrix where N is the number of labels/classes. It gives an insight on the number and type of errors made by the classifier.

In [None]:
y_pred = []
y_true = []

# iterate over test data
for inputs, labels in train_dataloader:
        output = model(inputs) # Feed Network

        output = (torch.max(torch.exp(output), 1)[1]).data.cpu().numpy()
        y_pred.extend(output) # Save Prediction
        
        labels = labels.data.cpu().numpy()
        y_true.extend(labels) # Save Truth

# constant for classes
for i in range(len(y_true)):
    y_true[i]= tr_classes[y_true[i]]
for i in range(len(y_pred)):
    y_pred[i]=tr_classes[y_pred[i]]

# Build confusion matrix
ConfusionMatrixDisplay.from_predictions(y_true,y_pred,xticks_rotation='vertical') 

**Confusion Matrix : Validation Set**

In [None]:
y_pred = []
y_true = []

# iterate over test data
for inputs, labels in val_dataloader:
        output = model(inputs) # Feed Network

        output = (torch.max(torch.exp(output), 1)[1]).data.cpu().numpy()
        y_pred.extend(output) # Save Prediction
        
        labels = labels.data.cpu().numpy()
        y_true.extend(labels) # Save Truth

# constant for classes
for i in range(len(y_true)):
    y_true[i]= val_names[y_true[i]]
for i in range(len(y_pred)):
    y_pred[i]=val_names[y_pred[i]]
    
# Build confusion matrix
ConfusionMatrixDisplay.from_predictions(y_true,y_pred,xticks_rotation='vertical') 

**Loss change over epoch** During training, a Loss curve is one of the most commonly used charts to debug a neural network. It provides an overview of the training process as well as
the direction in which network learns. In below diagram no overfitting or underfitting observed. Large accuracy in training data and small accuracy in validation data imply overfitting.

In [None]:
plt.figure(figsize=(10,5))
plt.plot(train_loss_arr, '-o', label="train")
plt.plot(val_loss_arr, '-o', label="validation")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Loss change over epoch")
plt.legend()

**Accuracy change over epoch**

In [None]:
plt.figure(figsize=(10,5))
plt.plot(train_accuracy_arr, '-o', label="train")
plt.plot(val_accuracy_arr, '-o', label="validation")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.title("Accuracy change over epoch")
plt.legend()

**Test the Model**

In [None]:
model.eval()

In [None]:
#prediction function
def prediction(image_path,transformer):
    image=Image.open(image_path)
    image_tensor=transformer(image).float()
    image_tensor=image_tensor.unsqueeze_(0)
    if torch.cuda.is_available():
        image_tensor.cuda()     
    input=Variable(image_tensor)
    output=model(input)
    index=output.data.numpy().argmax()
    pred=tr_classes[index]
    
    return pred

In [None]:
pred_dict={}
test_path = "/kaggle/input/deep-learning-for-msc-coursework-2022/test/test/"
images_path = glob.glob(test_path+'*.png')

for i in images_path:
    pred_dict[i[i.rfind('/')+1:]]=prediction(i,transform)

predict_test = pd.DataFrame(pred_dict.items(), columns=['Id', 'Type'])
predict_test.sort_values('Id')

compression_opts = dict(method='zip',archive_name='test.csv')  
predict_test.sort_values('Id').to_csv('test.zip', index=False,compression=compression_opts)  

**AutoEncoder**

In [None]:
class AutoEncoder(nn.Module):
    def __init__(self):
        
        super().__init__()
        #N-batch size, (32*32= 1024)
        self.encoder=nn.Sequential(
                nn.Linear(32*32,64),
                nn.ReLU(),
                nn.Linear(64,32),
                nn.ReLU(),
                nn.Linear(32,16),
                nn.ReLU(),
                nn.Linear(16,8)#N,8
        ) 
        
        self.decoder=nn.Sequential(
                nn.Linear(8,16),
                nn.ReLU(),
                nn.Linear(16,32),
                nn.ReLU(),
                nn.Linear(32,64),
                nn.ReLU(),
                nn.Linear(64,32*32),#N,32 ->N,1024
                nn.Sigmoid()
        )
    
    def forward(self,x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

**Hyperparamter Tuning** Both adam and SGD optimizers used for paramter tuning along with lr value. Adam optimizer with lr =0.000075 had better performance comparitatively

In [None]:
model = AutoEncoder().to(device)
print(model)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(),lr=lr,weight_decay=0.0001)
#optimizer = optim.SGD(model.parameters(), lr=0.001)

**Training the model using AutoEncoder for 10 epochs and evaluated on validation set**

In [None]:
num_epochs=10 

In [None]:
training_loss_min=np.Inf
train_loss_arr=[]
val_loss_arr=[]
train_acc_arr=[]
val_acc_arr=[]

for epoch in range(num_epochs):
    print(f"Epoch {epoch+1} of {num_epochs}")
    epoch_arr.append(epoch)
    model.train()
    running_loss=0.0
    train_accuracy=0.0
    #training model on Training set
    for i, (images,labels) in tqdm(enumerate(train_dataloader), total=int(len(train_dataloader)/train_dataloader.batch_size)):
        images = images.to(device) 
        images = images.reshape(-1,32*32)
        optimizer.zero_grad()
        reconstruction= model(images)
        loss = criterion(reconstruction, images)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()*images.size(0)
        _,prediction=torch.max(reconstruction.data,1)
        train_accuracy+=int(torch.sum(prediction==prediction.data))
    
    train_accuracy = train_accuracy/len(train_dataloader.sampler)
    train_loss = running_loss/len(train_dataloader.sampler)
    train_loss_arr.append(np.round(train_loss,6))
    train_acc_arr.append(train_accuracy)
    
    model.eval()
    running_loss=0.0
    val_accuracy=0.0
    #evaluation on validation set
    for i, (images,labels) in tqdm(enumerate(val_dataloader), total=int(len(val_dataloader)/val_dataloader.batch_size)):
        images = images.to(device)
        images = images.reshape(-1,32*32)
        reconstruction = model(images)
        loss = criterion(reconstruction, images)
        running_loss += loss.item()*images.size(0)
        _,prediction=torch.max(reconstruction.data,1)
        val_accuracy+=int(torch.sum(prediction==prediction.data))
   
    val_loss = running_loss/len(val_dataloader.sampler)
    val_loss_arr.append(np.round(val_loss,6))
    val_accuracy = val_accuracy/len(train_dataloader.sampler)
    val_acc_arr.append(np.round(val_accuracy,6))
        
    print("Train Loss: ",train_loss, " Validation Loss: ",val_loss)
    
    # save model if training loss has decreased
    if train_loss<=training_loss_min:
        torch.save(model.state_dict(),'best_checkpoint_auto.model')
        training_loss_min=train_loss


**Saved the checkpoint where model has minimum loss**

In [None]:
checkpoint = torch.load('best_checkpoint_auto.model')
model = AutoEncoder().to(device)
model.load_state_dict(checkpoint)

**Loss change over epoch (Training and Validation Data)**

In [None]:
plt.figure(figsize=(10,5))
plt.plot(train_loss_arr, '-o', label="Train Loss")
plt.plot(val_loss_arr, '-o', label="Validation Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Loss change over epoch")
plt.legend()

In [None]:
def test_loop(model, device, test_loader):
    #model.eval()
    model = model.to(device)
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            data = data.reshape(-1,32*32)
            output = model(data)
            loss = criterion(output, data)
            print(loss)
            test_loss += loss.item()*images.size(0)  # sum up batch loss
            print(test_loss)
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

#test_loop(model,device,test_dataloader)

**Final Submission**
Model is trained on entire train set (1700 cell images) for 25 epochs with fine tuned paramters and predict on test set 

In [None]:
train_dataloader = torch.utils.data.DataLoader(
        train_dataset, batch_size=batch_size, shuffle=True
    )
num_epochs=25

In [None]:
model=ConvNet(num_classes=4).to(device)
#Optmizer and loss function
optimizer=Adam(model.parameters(),lr=lr,weight_decay=0.0001)
#optimizer = optim.SGD(model.parameters(), lr=0.001)

loss_function=nn.CrossEntropyLoss()

In [None]:
#Model training and saving best model

best_accuracy=0.0
train_accuracy_arr=[]
train_loss_arr=[]
val_loss_arr=[]
val_accuracy_arr=[]

for epoch in range(num_epochs):
    
    #Training on training dataset
    model.train()
    train_accuracy=0.0
    train_loss=0.0
    for i, (images,labels) in enumerate(train_dataloader):
        if torch.cuda.is_available():
            images=Variable(images.cuda())
            labels=Variable(labels.cuda())
        optimizer.zero_grad()
        outputs=model(images)
        loss=loss_function(outputs,labels)
        loss.backward()
        optimizer.step()
    
        train_loss +=loss.item()*images.size(0)
        _,prediction=torch.max(outputs.data,1)
        train_accuracy+=int(torch.sum(prediction==labels.data))
        
    train_accuracy=train_accuracy/len(train_dataloader.sampler)
    train_loss=train_loss/len(train_dataloader.sampler)
    train_accuracy_arr.append(train_accuracy)
    train_loss_arr.append(train_loss)
    
    print('Epoch: '+str(epoch)+' Train Loss: '+str(train_loss)+' Train Accuracy: '+str(train_accuracy))
    
    #Save the best model
    if train_accuracy>best_accuracy:
        torch.save(model.state_dict(),'best_checkpoint_final.model')
        best_accuracy=train_accuracy
    
print("\nBest Accuracy is :",best_accuracy)

In [None]:
checkpoint = torch.load('best_checkpoint_final.model')
model = ConvNet(num_classes=4)
model.load_state_dict(checkpoint)

**Confusion Matrix : Entire Training set**

In [None]:
y_pred = []
y_true = []

# iterate over test data
for inputs, labels in train_dataloader:
        output = model(inputs) # Feed Network

        output = (torch.max(torch.exp(output), 1)[1]).data.cpu().numpy()
        y_pred.extend(output) # Save Prediction
        
        labels = labels.data.cpu().numpy()
        y_true.extend(labels) # Save Truth

# constant for classes
for i in range(len(y_true)):
    y_true[i]= tr_classes[y_true[i]]
for i in range(len(y_pred)):
    y_pred[i]=tr_classes[y_pred[i]]

# Build confusion matrix
ConfusionMatrixDisplay.from_predictions(y_true,y_pred,xticks_rotation='vertical') 

**Loss and Accuracy over epoch**

In [None]:
plt.figure(figsize=(10,5))
plt.plot(train_loss_arr, '-o', label="Train Loss")
plt.plot(train_accuracy_arr, '-o', label="Train Accuracy")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Loss and Accuracy change over epoch")
plt.legend()

**Test the Model**

In [None]:
model.eval()

In [None]:
#prediction function
def prediction(image_path,transformer):
    image=Image.open(image_path)
    image_tensor=transformer(image).float()
    image_tensor=image_tensor.unsqueeze_(0)
    if torch.cuda.is_available():
        image_tensor.cuda()     
    input=Variable(image_tensor)
    output=model(input)
    index=output.data.numpy().argmax()
    pred=tr_classes[index]
    
    return pred

In [None]:
pred_dict={}
test_path = "/kaggle/input/deep-learning-for-msc-coursework-2022/test/test/"
images_path = glob.glob(test_path+'*.png')

for i in images_path:
    pred_dict[i[i.rfind('/')+1:]]=prediction(i,transform)

predict_test = pd.DataFrame(pred_dict.items(), columns=['Id', 'Type'])
predict_test.sort_values('Id')

compression_opts = dict(method='zip',archive_name='test.csv')  
predict_test.sort_values('Id').to_csv('test.zip', index=False,compression=compression_opts)  

**Label distribution based on the prediction**

In [None]:
predict_test['Type'].value_counts().plot(kind='bar')
plt.title("Test Set label Prediction")
plt.xlabel("Cell Type")
plt.ylabel("Number of cells")
plt.show()