In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
import sklearn
import torchvision
from torchvision import transforms
from torchvision import models
from torchvision.transforms import ToTensor
from PIL import Image
import matplotlib.pyplot as plt
from io import BytesIO
import os

In [None]:
# Load the tensor back from the file
X = torch.load('/content/drive/My Drive/tensor.pth')

# Check the loaded tensor
print(X.size())

#torch.Size([25000, 1000])

In [None]:
input_label = torch.empty(25000, 1)
label_count = 0

#Colon_aca = 0  -> 5048
#Colon_n = 1    -> 10048
#lung_aca = 2   -> 15048
#lung_n = 3     -> 20000
#lung_scc = 4   -> 25000


for i in range(25000):
    if i < 5048:
        input_label[i] = 0
    elif i >= 5048 and i < 10048:
        input_label[i] = 1
    elif i >= 10048 and i < 15048:
        input_label[i] = 2
    elif i >= 15048 and i < 20000:
        input_label[i] = 3
    else:
        input_label[i] = 4

In [None]:
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
X_train, X_test, y_train, y_test = train_test_split(X, input_label, test_size = 0.2, shuffle=True)

In [None]:
#Define a Custom Dataset class to define a Dataloader
class CustomDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

train_dataset = CustomDataset(X_train, y_train)
test_dataset = CustomDataset(X_test, y_test)

In [None]:
# Create DataLoader instances for training and testing
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [None]:
# CNN MODEL ARCHITECTURE DEFINITION
class CancerHistModel(nn.Module):
  def __init__(self, input_shape: int, hidden_units: int, output_shape:int):
    super().__init__()
    self.conv_block_1 = nn.Sequential(
        nn.Conv2d(in_channels=input_shape, out_channels=hidden_units, kernel_size=3, stride=1, padding=1),
        nn.ReLU(),
        nn.Conv2d(in_channels=hidden_units, out_channels=hidden_units, kernel_size=3, stride=1, padding=1),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2)
    )

    self.conv_block_2 = nn.Sequential(
        nn.Conv2d(in_channels=hidden_units, out_channels=hidden_units, kernel_size=3, stride=1, padding=1),
        nn.ReLU(),
        nn.Conv2d(in_channels=hidden_units, out_channels=hidden_units, kernel_size=3, stride=1, padding=1),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2)
    )

    self.conv_block_3 = nn.Sequential(
        nn.Conv2d(in_channels=hidden_units, out_channels=hidden_units, kernel_size=3, stride=1, padding=1),
        nn.ReLU(),
        nn.Conv2d(in_channels=hidden_units, out_channels=hidden_units, kernel_size=3, stride=1, padding=1),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2)
    )

    self.classifier = nn.Sequential(
        nn.Flatten(),
        nn.Linear(240, out_features=output_shape)
        )

  def forward(self,x:torch.Tensor):
    x = self.conv_block_1(x)
    x = self.conv_block_2(x)
    x = self.conv_block_3(x)
    x = self.classifier(x)
    return x

In [None]:
Model = CancerHistModel(input_shape=1, hidden_units=16, output_shape=5)
Model

# CancerHistModel(
#   (conv_block_1): Sequential(
#     (0): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
#     (1): ReLU()
#     (2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
#     (3): ReLU()
#     (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
#   )
#   (conv_block_2): Sequential(
#     (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
#     (1): ReLU()
#     (2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
#     (3): ReLU()
#     (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
#   )
#   (conv_block_3): Sequential(
#     (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
#     (1): ReLU()
#     (2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
#     (3): ReLU()
#     (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
#   )
#   (classifier): Sequential(
#     (0): Flatten(start_dim=1, end_dim=-1)
#     (1): Linear(in_features=240, out_features=5, bias=True)
#   )
# )

In [None]:
def accuracy_fn(y_true,y_pred):
  correct = torch.eq(y_true,y_pred).sum().item()
  return correct

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(params = Model.parameters(),lr=0.1)

In [None]:
from timeit import default_timer

def print_train_time(start: float,
                     end: float,
                     device: torch.device = None):
  total_time = end=start
  print(f"toral time: {total_time:.3f} seconds")
  return total_time

In [None]:
#Creting a training loop and training a model on Batches of data

from tqdm.auto import tqdm #to get he progress bar
import time

#Set the seed the start the times
torch.manual_seed(42)
train_time_start_on_cpu = time.time

#Set number of epochs
epochs = 5

#Create training and test loop
for epoch in tqdm(range(epochs)):
  print(f"Epoch: {epoch} \n---")

  #Training
  train_loss = 0
  #Add a loop to loop through training batches
  for batch, (X,y) in enumerate (train_loader):
    Model.train()

    X = X.view(32,1,40,25)

    #Forward Pass
    y_pred = Model(X)

    # y = y.argmax(dim=1)
    #Calculate loss
    loss = loss_fn(y_pred,y.squeeze().long())
    # print(f"this is my calculates loss: {loss}")
    train_loss += loss

    #Optimizer zero grad
    optimizer.zero_grad()

    #Loss backward
    loss.backward()

    #Optimizer step
    optimizer.step()

    #print out what's happeing
    if batch%100 == 0:
      print(f"Lookd at { batch * len(X)}/{len(train_loader.dataset)} samples.")

  #Divide total train loss by length of train dataloader
  train_loss /= len(train_loader)

  #Testing
  test_loss, test_acc = 0,0
  Model.eval()
  with torch.inference_mode():
    for X,y in test_loader:
      if X.shape[0] != 32:
        continue

      X = X.view(32,1,40,25)
      test_pred = Model(X)
      # y = y.argmax(dim=1)
      test_loss += loss_fn(test_pred,y.squeeze().long())

      test_acc += accuracy_fn(y_true=y,y_pred=test_pred.argmax(dim=1))

    #Calculate the test loss average per batch
    test_loss /= (len(test_loader)-1)
    #calculate test acc average per batch
    num_test_batches = len(test_loader) - 1
    num_test_ele = num_test_batches * 32 * 8
    test_acc_val = test_acc / num_test_ele
    test_acc_val *= 100

  print(f"\nTrain Loss {train_loss:.4f} | Test loss: {test_loss:.4f}, Test acc: {test_acc_val:.3f}")

train_time_end_on_cpu = time.time()

Epoch: 1
---
Lookd at 0/20000 samples.
Lookd at 3200/20000 samples.
Lookd at 6400/20000 samples.
Lookd at 9600/20000 samples.
Lookd at 12800/20000 samples.
Lookd at 16000/20000 samples.
Lookd at 19200/20000 samples.

Train Loss 0.6506 | Test loss: 0.1796, Test acc: 88.962
Epoch: 2
---
Lookd at 0/20000 samples.
Lookd at 3200/20000 samples.
Lookd at 6400/20000 samples.
Lookd at 9600/20000 samples.
Lookd at 12800/20000 samples.
Lookd at 16000/20000 samples.
Lookd at 19200/20000 samples.

Train Loss 0.1815 | Test loss: 0.1567, Test acc: 89.220
Epoch: 3
---
Lookd at 0/20000 samples.
Lookd at 3200/20000 samples.
Lookd at 6400/20000 samples.
Lookd at 9600/20000 samples.
Lookd at 12800/20000 samples.
Lookd at 16000/20000 samples.
Lookd at 19200/20000 samples.

Train Loss 0.1450 | Test loss: 0.1083, Test acc: 89.438
Epoch: 4
---
Lookd at 0/20000 samples.
Lookd at 3200/20000 samples.
Lookd at 6400/20000 samples.
Lookd at 9600/20000 samples.
Lookd at 12800/20000 samples.
Lookd at 16000/20000 samples.
Lookd at 19200/20000 samples.

Train Loss 0.1258 | Test loss: 0.1496, Test acc: 89.032
Epoch: 5
---
Lookd at 0/20000 samples.
Lookd at 3200/20000 samples.
Lookd at 6400/20000 samples.
Lookd at 9600/20000 samples.
Lookd at 12800/20000 samples.
Lookd at 16000/20000 samples.
Lookd at 19200/20000 samples.

Train Loss 0.1165 | Test loss: 0.1085, Test acc: 89.501


As it can be Observed here, both train and test loss decrease as we train our model. It can also be observed that our model accuracy increases with more Training with final accuracy reaching upto 89.5%.

Important thing to highlight here is that both train and test loss are comparable in the end, hence we can say that our model is neither overfitting or underfitting.