<a href="https://colab.research.google.com/github/akazemi24/Research-Project/blob/main/Classification_of_Brain_Tumors_from_MRI_using_Transfer_Learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
%cd /content/drive/MyDrive/Armita Project/data

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import os
import cv2
from skimage.transform import rotate, AffineTransform, warp
from tqdm import tqdm

In [None]:
'''
Importing images and labels
'''

img_size=224

train_folders = os.listdir('/content/drive/MyDrive/Armita Project/data/Training/')
train_folders = [folder for folder in train_folders if '.' not in folder]

num_train_images = 0
train_images = []
train_labels = []

base_path = '/content/drive/MyDrive/Armita Project/data/Training/'
for folder in train_folders:
  image_array = os.listdir(base_path + folder)
  num_train_images += len(image_array)
  for image in image_array:
    img = np.asarray(mpimg.imread(os.path.join(base_path, folder, image)))
    img = cv2.resize(img, (img_size, img_size)) # This size is used for everything else...but try changing it.
    #img = cv2.resize(img, (224, 224)) # We need size 224 for our transfer training!
    train_images.append((img/255.0).astype(np.float32))
    if folder == 'no_tumor':
      train_labels.append(0)
    if folder == 'pituitary_tumor':
      train_labels.append(1)
    if folder == 'meningioma_tumor':
      train_labels.append(2)
    if folder == 'glioma_tumor':
      train_labels.append(3)

test_folder = os.listdir('/content/drive/MyDrive/Armita Project/data/Testing/')
test_folder = [folder for folder in test_folder if '.' not in folder]

num_test_images = 0
test_images = []
test_labels = []
base_path = '/content/drive/MyDrive/Armita Project/data/Testing/'
for folder in test_folder:
  image_array = os.listdir(base_path + folder)
  num_test_images += len(image_array)
  for image in image_array:
    img = np.asarray(mpimg.imread(os.path.join(base_path, folder, image)))
    img = cv2.resize(img, (img_size, img_size))
    #img = cv2.resize(img, (224, 224))
    train_images.append((img/255.0).astype(np.float32))
    if folder == 'no_tumor':
      train_labels.append(0)
    if folder == 'pituitary_tumor':
      train_labels.append(1)
    if folder == 'meningioma_tumor':
      train_labels.append(2)
    if folder == 'glioma_tumor':
      train_labels.append(3)


print("There are", num_train_images, "in our training set!")

In [None]:
total_images = num_train_images + num_test_images
X = np.array(train_images)
y = np.array(train_labels)

print(X.shape)
print(y.shape)

In [None]:
'''
Split the data into training and testing set
'''

from sklearn.model_selection import train_test_split

train_images, test_images, train_labels, test_labels = train_test_split(X, y, test_size = 0.2)


In [None]:
'''
imports for transfer learning
'''

from tqdm import trange
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import TensorDataset, DataLoader
from torchvision import datasets
#from sklearn.metrics import plot_confusion_matrix, ConfusionMatrixDisplay, confusion_matrix
import tensorflow as tf

import math
import random

from PIL import Image, ImageOps, ImageEnhance
import numbers

import matplotlib.pyplot as plt
from skimage.transform import rotate, AffineTransform, warp
from tqdm import tqdm
from torch.optim import lr_scheduler
from torch.autograd import Variable
import seaborn as sns
import time

In [None]:
'''
transform data to be used with Pytorch
'''

data_transform=transforms.Compose([
                      transforms.ToPILImage(),
                      #transforms.GaussianBlur(5),
                      #transforms.RandomHorizontalFlip(),
                      #transforms.RandomVerticalFlip(),
                      #transforms.RandomRotation(15),
                      #transforms.RandomRotation((1,90)),
                      transforms.ToTensor(),
                      #transforms.Normalize([0.485], [0.229])
                  ])

In [None]:
'''
prepare data to use with Pytorch
'''

# Convert the images and labels to PyTorch tensors
train_images_torch = torch.stack([data_transform(image[:,:,0]) for image in train_images])
train_labels_torch = torch.Tensor(train_labels)

test_images_torch = torch.stack([transforms.ToTensor()(image[:,:,0]) for image in test_images])
test_labels_torch = torch.Tensor(test_labels)

# Create a TensorDataset, which allows access to the images and labels as tensors
train_dataset = TensorDataset(train_images_torch, train_labels_torch)
test_dataset = TensorDataset(test_images_torch, test_labels_torch)

# Create a data loader, which provides an iterable over the dataset
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32)

In [None]:
'''
Transform data for transfer learning
'''

from torchvision import datasets, models, transforms
data_transforms = {
    'train': transforms.Compose([
        # transforms.ToPILImage(),
        # transforms.Resize(224),
        transforms.ToTensor(),
        #transforms.GaussianBlur(5),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        # transforms.ToPILImage(),
        # transforms.Resize(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

# Convert the images and labels to PyTorch tensors
train_images_transfer = torch.stack([data_transforms["train"](image) for image in train_images])
train_labels_transfer = torch.Tensor(train_labels)

test_images_transfer = torch.stack([data_transforms['val'](image) for image in test_images])
test_labels_transfer = torch.Tensor(test_labels)

# Create a TensorDataset, which allows access to the images and labels as tensors
train_dataset_transfer = TensorDataset(train_images_transfer, train_labels_transfer)
test_dataset_transfer = TensorDataset(test_images_transfer, test_labels_transfer)

# Create a data loader, which provides an iterable over the dataset
train_loader_transfer = DataLoader(train_dataset_transfer, batch_size=32, shuffle=True)
test_loader_transfer = DataLoader(test_dataset_transfer, batch_size=32)

In [None]:
'''
importing transfer learning models from Pytorch
'''

import torchvision.models as models
import torch.nn.functional as F

In [None]:
resnet18 = models.resnet18(pretrained=True)

'''
Change parameter to freeze or train convolutional base
'''

for param in resnet18.parameters():
    param.requires_grad = False


in_features = resnet18.fc.in_features

resnet18.fc = nn.Linear(in_features, 4)

#print(resnet18)

In [None]:
resnet50 = models.resnet50(pretrained=True)

for param in resnet50.parameters():
	param.requires_grad = False

in_features = resnet50.fc.in_features

resnet50.fc = nn.Linear(in_features, 4)

modelOutputFeats = resnet50.fc.in_features

#print(resnet50)

In [None]:
vgg16 = models.vgg16(pretrained=True)

for param in vgg16.parameters():
	param.requires_grad = False


vgg16.classifier[-1] = nn.Linear(in_features = 4096, out_features=4)

#print(vgg16)

In [None]:
densenet = models.densenet161(pretrained=True)

for param in densenet.parameters():
    param.requires_grad = False


in_features = densenet.classifier.in_features

densenet.classifier = nn.Linear(in_features, 4)

#print(densenet)


In [None]:
googlenet = models.googlenet(pretrained=True)

for param in googlenet.parameters():
    param.requires_grad = False

in_features = googlenet.fc.in_features

googlenet.fc = nn.Linear(in_features, 4)


In [None]:
shufflenet = models.shufflenet_v2_x1_0(pretrained=True)

for param in shufflenet.parameters():
    param.requires_grad = False

in_features = shufflenet.fc.in_features

shufflenet.fc = nn.Linear(in_features, 4)

#print(shufflenet)

In [None]:
mobilenet = models.mobilenet_v2(pretrained=True)

for param in mobilenet.parameters():
    param.requires_grad = False

mobilenet.classifier[1] = nn.Linear(mobilenet.last_channel, 4)

In [None]:
'''
Define a training function to run model training
'''
def train(model, train_loader, test_loader, num_epochs):
  loss_fn = nn.CrossEntropyLoss()
  optimizer = optim.Adam(model.parameters(), lr=0.0001)
  losses = [] # Store the losses in case you want to plot them after training!
  val_losses = []
  acc  = 0
  accuracy = []
  y_pred = []
  best_model = None
  best_accuracy = 0
  pbar = trange(num_epochs, desc="loss: ", leave=True) # Gives you a nice progress bar to visualize the loss while training
  for epoch in pbar:
    epoch_losses = []
    for i, data in enumerate(train_loader, 0):
      inputs, labels = data
      inputs = inputs.to(torch.device('cuda:0'))
      labels = labels.type(torch.LongTensor).to(torch.device('cuda:0'))
      optimizer.zero_grad()
      class_predictions = model(inputs)
      # print(class_predictions.shape)
      preds = torch.argmax(class_predictions.T, 0)
      y_pred = np.concatenate((y_pred, preds.cpu().detach().numpy()))
      loss = loss_fn(class_predictions, labels)
      # print(loss.item())
      epoch_losses.append(loss.item())
      pbar.set_description(f"loss: {np.mean(losses):.4f} Acc: {acc:.4f}")
      loss.backward()
      optimizer.step()


    acc, val_loss, train_loss, predictions = test(model, test_loader, train_loader)
    if acc > best_accuracy:
      best_accuracy = acc

    accuracy.append(acc)
    val_losses.append(val_loss)
    losses.append(train_loss)
    # print(val_loss, train_loss)
    pbar.set_description(f"loss: {np.mean(losses):.4f} Acc: {acc:.4f}")

  print("Finished Training")
  return losses, val_losses, accuracy, y_pred, best_accuracy, predictions

In [None]:
'''
Define a testing function
'''

def test(model, test_loader, train_loader):
  loss_fn = nn.CrossEntropyLoss()
  correct = 0
  total = 0
  val_loss = []
  predictions = np.array([])
  with torch.no_grad():
    model.eval()
    startTime = time.time()
    for data in test_loader:
      images, labels = data
      images = images.to(torch.device('cuda:0'))
      labels = labels.type(torch.LongTensor).to(torch.device('cuda:0'))
      outputs = model(images)
      _, predicted = torch.max(outputs.data, 1)
      predictions = np.concatenate((predictions, predicted.cpu().numpy()))

      loss = loss_fn(outputs, labels)
      # print(outputs)
      # print(labels)
      # print(loss.item())
      val_loss.append(loss.item())
      total += labels.size(0)
      correct += (predicted == labels).sum().item()

    train_loss = []
    for data in train_loader:
      images, labels = data
      images = images.to(torch.device('cuda:0'))
      labels = labels.type(torch.LongTensor).to(torch.device('cuda:0'))
      outputs = model(images)
      _, predicted = torch.max(outputs.data, 1)

      loss = loss_fn(outputs, labels)
      train_loss.append(loss.item())

  model.train()
  accuracy = correct / total
  print(accuracy)
  print(np.mean(val_loss))

  endTime = time.time()
  print("[INFO] total time taken to train the model: {:.2f}s".format(
    endTime - startTime))

  return accuracy, np.mean(val_loss), np.mean(train_loss), predictions

  # print(f'Accuracy of the network on the {len(test_loader)} test data: {100 * correct / total} %')

  # display the total time needed to perform the training
  endTime = time.time()
  print("[INFO] total time taken to train the model: {:.2f}s".format(
    endTime - startTime))
  # plot the training loss and accuracy

  cm = confusion_matrix(y_test, outputs)
  print(cm)

  plt.figure(figsize=(4, 4))
  sns.heatmap(cm, annot= True, fmt=".3f", linewidth = 0.5, square = True, cmap = "Blues_r")
  plt.ylabel("Actual Label")
  plt.xlabel("Predicted Label")

In [None]:
'''
Define function to create confusion matrix
'''

from matplotlib.ticker import PercentFormatter
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix
import pandas as pd

def cm_analysis(y_true, y_pred, labels, classes, ymap=None, figsize=(17,17)):
    sns.set(font_scale=1)

    if ymap is not None:
        y_pred = [ymap[yi] for yi in y_pred]
        y_true = [ymap[yi] for yi in y_true]
        labels = [ymap[yi] for yi in labels]
    cm = confusion_matrix(y_true, y_pred, labels=labels)
    cm_sum = np.sum(cm, axis=1, keepdims=True)
    cm_perc = cm / cm_sum.astype(float) * 100
    annot = np.empty_like(cm).astype(str)
    nrows, ncols = cm.shape
    for i in range(nrows):
        for j in range(ncols):
            c = cm[i, j]
            p = cm_perc[i, j]
            if i == j:
                s = cm_sum[i]
                annot[i, j] = '%.2f%%\n%d/%d' % (p, c, s)
            #elif c == 0:
            #    annot[i, j] = ''
            else:
                annot[i, j] = '%.2f%%\n%d' % (p, c)
    cm = confusion_matrix(y_true, y_pred, labels=labels, normalize='true')
    cm = pd.DataFrame(cm, index=labels, columns=labels)
    cm = cm * 100
    cm.index.name = 'True Label'
    cm.columns.name = 'Predicted Label'
    fig, ax = plt.subplots(figsize=figsize)
    plt.yticks(va='center')

    sns.heatmap(cm, annot=annot, fmt='', ax=ax, xticklabels=classes, cbar=True, cbar_kws={'format':PercentFormatter()}, yticklabels=classes, cmap="Blues")
    plt.show

In [None]:
'''
Define function to output metrics (confusion matrix, presision, recall, f1 score)
'''

from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import roc_auc_score
from sklearn.metrics import log_loss
def metrics(actual, predicted):
  cm_analysis(actual, predicted, labels=[0, 1, 2, 3], classes = [0, 1, 2, 3], figsize=(8, 8))
  print ('Accuracy Score is',accuracy_score(actual, predicted))
  print ('Classification Report : ')
  print (classification_report(actual, predicted))
  #print('AUC-ROC:',roc_auc_score(actual, predicted))
  #print('LOGLOSS Value is',log_loss(actual, predicted))


In [None]:
'''
Run transfer learning models
'''

device = 'cuda' if torch.cuda.is_available() else 'cpu'
googlenet.to(device)

losses, val_losses, accuracy, y_pred, best_accuracy, predictions = train(googlenet, train_loader_transfer, test_loader_transfer, 100)

print(f"best accuracy: {best_accuracy}")
cm = confusion_matrix(test_labels, predictions)
print(cm)

plt.figure(figsize=(4, 4))
sns.heatmap(cm, annot= True, fmt=".3f", linewidth = 0.5, square = True, cmap = "Blues_r")
plt.ylabel("Actual Label")
plt.xlabel("Predicted Label")

figure, axis = plt.subplots(1, 2)

axis[0].plot(losses, linewidth=2.75)
axis[0].plot(val_losses, linewidth=2.75)
axis[0].set_xlabel('epoch')
axis[0].set_ylabel('Loss')
axis[0].legend(['Train', 'Test'])
axis[0].set_title("Loss")
#axis[0].set_ylim([0, 1])

axis[1].plot(accuracy, linewidth=2.75)
axis[1].set_xlabel('epoch')
axis[1].set_ylabel('Accuracy')
axis[1].legend(['Test'])
axis[1].set_title("Accuracy")

figure.set_size_inches(10, 6, forward=True)
figure.tight_layout(pad = 1)

metrics(test_labels, predictions)