# Importing necessary libraries

In [None]:
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader, ConcatDataset
import glob
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, accuracy_score
import cv2
import sys

# Reading the Images (Data Preprocessing)

In [None]:
tumor = [] #brain with a tumor
healthybrain = [] #brain with no tumor
for f in glob.iglob("/Users/alexandermathew/Downloads/Brain_Tumor_Classifier/bimages/yes/*.jpg"): #reading in images
  img = cv2.imread(f) #reading images using cv2
  img = cv2.resize(img, (128,128)) #resizing images using cv2
  b, g, r = cv2.split(img) #splitting images into respective channels using cv2 (128x128). result in 3 images
  img = cv2.merge([r, g, b])
  tumor.append(img)

for f in glob.iglob("/Users/alexandermathew/Downloads/Brain_Tumor_Classifier/bimages/no/*.jpg"): #reading in images
  img = cv2.imread(f) #reading images using cv2
  img = cv2.resize(img, (128,128)) #resizing images using cv2
  b,g,r = cv2.split(img) #splitting images into respective channels using cv2 (128x128). result in 3 images
  img = cv2.merge([r, g, b])
  healthybrain.append(img)

In [None]:
healthybrain = np.array(healthybrain)
tumor = np.array(tumor)
#turning both into numpy arrays. .shape shows amnt of images, rows and colums of pixels, and channels of rgb
All = np.concatenate((healthybrain, tumor))

In [None]:
healthybrain.shape

In [None]:
tumor.shape

In [None]:
np.random.choice(10, 5, replace=False)

# Visualizing MRI Images

In [None]:
def plot_random(healthybrain, tumor, num=5): #creating a for loop to plot 5 random healthy and tumor images
  healthybrain_imgs = healthybrain[np.random.choice(healthybrain.shape[0], num, replace=False)]
  tumor_imgs = tumor[np.random.choice(tumor.shape[0], num, replace=False)]

  plt.figure(figsize=(16,9)) #making a plot
  for i in range(num):
    plt.subplot(1, num, i+1)
    plt.title('healthy brain')
    plt.imshow(healthybrain_imgs[i])

  plt.figure(figsize=(16,9))
  for i in range(num):
    plt.subplot(1, num, i+1)
    plt.title('tumor')
    plt.imshow(tumor_imgs[i])


In [None]:
plot_random(healthybrain, tumor)

# Creating Pytorch abstract dataset Class and MRI custom dataset class

In [None]:
class Dataset(object): #abstract class representing a dataset

  def __getitem___(self, index):
    raise NotImplementedError

  def __len__(self):
    raise NotImplementedError

  def __add__(self, other):
    return ConcatDataset([self, other]) #needs ConcatDataset to concatenate two objects

In [None]:
class MRI(Dataset): #inheriting Dataset class
  def __init__(self): #constructor

    tumor = []
    healthybrain = []
    # cv2 - It reads in BGR format by default
    for f in glob.iglob("/Users/alexandermathew/Downloads/Brain_Tumor_Classifier/bimages/yes/*.jpg"):
      img = cv2.imread(f)
      img = cv2.resize(img,(128,128))
      b, g, r = cv2.split(img)
      img = cv2.merge([r,g,b])
      img = img.reshape((img.shape[2],img.shape[0],img.shape[1])) # otherwise the shape will be (h,w,#channels)
      tumor.append(img)

    for f in glob.iglob("/Users/alexandermathew/Downloads/Brain_Tumor_Classifier/bimages/no/*.jpg"):
      img = cv2.imread(f)
      img = cv2.resize(img,(128,128))
      b, g, r = cv2.split(img)
      img = cv2.merge([r,g,b])
      img = img.reshape((img.shape[2],img.shape[0],img.shape[1]))
      healthybrain.append(img)

    # our images
    tumor = np.array(tumor,dtype=np.float32)
    healthybrain = np.array(healthybrain,dtype=np.float32)

    # our labels
    tumor_label = np.ones(tumor.shape[0], dtype=np.float32)
    healthybrain_label = np.zeros(healthybrain.shape[0], dtype=np.float32)

    # Concatenate
    self.images = np.concatenate((tumor, healthybrain), axis=0)
    self.labels = np.concatenate((tumor_label, healthybrain_label))

  def __len__(self):
    return self.images.shape[0] # how many images = length

  def __getitem__(self, index):
    sample = {'image': self.images[index], 'label':self.labels[index]}
    return sample

  def normalize(self):
    self.images = self.images/255.0



In [None]:
mri_dataset = MRI()
mri_dataset.normalize()

# Creating a dataloader

In [None]:
# What is a dataloader? A dataloader is n objet that allows you to iterate through your dataset easily, you can easily shuffle through data
# The importance of a dataloader: provides an efficient way to load data into a model for training, doesn't use a lot of CPU
# Below is a more efficient way of iterating through the dataset (using a dataloader) than using for loops and next/iter commands
names={0:'Heathy Brain', 1:'Tumor'}
dataloader = DataLoader(mri_dataset, shuffle=True)
for i, sample in enumerate(dataloader):
    img = sample['image'].squeeze()
    img = img.reshape((img.shape[1], img.shape[2], img.shape[0]))
    plt.title(names[sample['label'].item()])
    plt.imshow(img)
    plt.show()
    if i == 5:
        break #DEBUG

# Creating the model

In [None]:
import torch.nn as nn
import torch.nn.functional as F

class CNN(nn.Module):
    def __init__(self):
        super(CNN,self).__init__()
        self.cnn_model = nn.Sequential(
        nn.Conv2d(in_channels=3, out_channels=6, kernel_size=5),
        nn.Tanh(),
        nn.AvgPool2d(kernel_size=2, stride=5),
        nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5),
        nn.Tanh(),
        nn.AvgPool2d(kernel_size=2, stride=5))
        
        self.fc_model = nn.Sequential(
        nn.Linear(in_features=256, out_features=120),
        nn.Tanh(),
        nn.Linear(in_features=120, out_features=84),
        nn.Tanh(),
        nn.Linear(in_features=84, out_features=1))
        
    def forward(self, x):
        x = self.cnn_model(x)
        x = x.view(x.size(0), -1)
        x = self.fc_model(x)
        x = F.sigmoid(x)
        
        return x

torch.tensor vs. torch.cuda.tensor

In [None]:
# device will be 'cuda' if a GPU is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# creating a CPU tensor
cpu_tensor = torch.rand(10).to(device)
# moving same tensor to GPU
gpu_tensor = cpu_tensor.to(device)

print(cpu_tensor, cpu_tensor.dtype, type(cpu_tensor), cpu_tensor.type())
print(gpu_tensor, gpu_tensor.dtype, type(gpu_tensor), gpu_tensor.type())

print(cpu_tensor*gpu_tensor)

# Evaluating a New-Born Neural Network!

In [None]:
mri_dataset = MRI()
mri_dataset.normalize()
device = torch.device('cpu')
model = CNN().to(device)

In [None]:
def threshold(scores,threshold=0.50, minimum=0, maximum = 1.0):
    x = np.array(list(scores))
    x[x >= threshold] = maximum
    x[x < threshold] = minimum
    return x


In [None]:
model.eval()
dataloader = DataLoader(mri_dataset, batch_size=32, shuffle=False)
outputs=[]
y_true = []
with torch.no_grad():
    for D in dataloader:
        image =  D['image'].to(device)
        label = D['label'].to(device)

        y_hat = model(image)

        outputs.append(y_hat.cpu().detach().numpy())
        y_true.append(label.cpu().detach().numpy())

outputs = np.concatenate( outputs, axis=0 )
y_true = np.concatenate( y_true, axis=0 )


In [None]:
accuracy_score(y_true, threshold(outputs))

In [None]:
import seaborn as sns

cm = confusion_matrix(y_true, threshold(outputs))
plt.figure(figsize=(16,9))

ax= plt.subplot()
sns.heatmap(cm, annot=True, fmt='g', ax=ax);  #annot=True to annotate cells, ftm='g' to disable scientific notation

# labels, title and ticks
ax.set_xlabel('Predicted labels');ax.set_ylabel('True labels');
ax.set_title('Confusion Matrix');
ax.xaxis.set_ticklabels(['Tumor','Healthy'])
ax.yaxis.set_ticklabels(['Tumor','Healthy'])


In [None]:
plt.figure(figsize=(16,9))
plt.plot(outputs)
plt.axvline(x=len(tumor), color='r', linestyle='--')
plt.grid()

# Visualising the Feature Maps of the Convolutional Filters

In [None]:
model

In [None]:
no_of_layers = 0
conv_layers = []

In [None]:
model_children = list(model.children())
model_children

In [None]:
for child in model_children:
  if type(child) == nn.Sequential:
    for layer in child.children():
      if type(layer) == nn.Conv2d:
        no_of_layers += 1
        conv_layers.append(layer)

In [None]:
conv_layers

In [None]:
img = mri_dataset[100]['image']
plt.imshow(img.reshape(128,128,3))

In [None]:
img = torch.from_numpy(img).to(device)
img.shape

In [None]:
img = img.squeeze(0)
img.shape

In [None]:
results = [conv_layers[0](img)]
for i in range(1, len(conv_layers)):
    results.append(conv_layers[i](results[-1]))
outputs = results


In [None]:
for num_layer in range(len(outputs)):
    plt.figure(figsize=(50, 10))
    layer_viz = outputs[num_layer].squeeze()
    print("Layer ",num_layer+1)
    for i, f in enumerate(layer_viz):
        plt.subplot(2, 8, i + 1)
        plt.imshow(f.detach().cpu().numpy())
        plt.axis("off")
    plt.show()
    plt.close()

# Are We Over-fitting?

Preparing a validation set: We need to change the MRI dataset slightly!
We will need to make changes to our MRI dataset class:

- Define a function to divide the data into train and validation sets
- Define a variable called mode to determine whether we are interested in the training OR validation data
- Change len() and getitem() functions and conditioned over the variable mode

In [None]:
# Import train/test split function from sklearn
from sklearn.model_selection import train_test_split

In [None]:
class MRI(Dataset):
    
    def __init__(self):
        
        # Variables to hold the Training data and Validation data
        self.X_train, self.y_train, self.X_val, self.y_val = None, None, None, None
        
        # A variable to determine if we are interested in retrieving the training OR the validation data
        self.mode = 'train'
        
        tumor = []
        healthybrain = []
        # cv2 - It reads in BGR format by default
        for f in glob.iglob("/Users/alexandermathew/Downloads/Brain_Tumor_Classifier/bimages/yes/*.jpg"):
            img = cv2.imread(f)
            img = cv2.resize(img,(128,128)) # I can add this later in the boot-camp for more adventure
            b, g, r = cv2.split(img)
            img = cv2.merge([r,g,b])
            img = img.reshape((img.shape[2],img.shape[0],img.shape[1])) # otherwise the shape will be (h,w,#channels)
            tumor.append(img)

        for f in glob.iglob("/Users/alexandermathew/Downloads/Brain_Tumor_Classifier/bimages/no/*.jpg"):
            img = cv2.imread(f)
            img = cv2.resize(img,(128,128)) 
            b, g, r = cv2.split(img)
            img = cv2.merge([r,g,b])
            img = img.reshape((img.shape[2],img.shape[0],img.shape[1]))
            healthybrain.append(img)

        # our images
        tumor = np.array(tumor,dtype=np.float32)
        healthybrain = np.array(healthybrain,dtype=np.float32)
        
        # our labels
        tumor_label = np.ones(tumor.shape[0], dtype=np.float32)
        healthy_label = np.zeros(healthybrain.shape[0], dtype=np.float32)
        
        # Concatenates
        self.images = np.concatenate((tumor, healthybrain), axis=0)
        self.labels = np.concatenate((tumor_label, healthy_label))
    
    # Define a function that would separate the data into Training and Validation sets (NEW FROM OLD MRI CLASS)
    def train_val_split(self):
        self.X_train, self.X_val, self.y_train, self.y_val = \
        train_test_split(self.images, self.labels, test_size=0.20, random_state=42) 
        
    def __len__(self):
        # Use self.mode to determine whether train or val data is of interest (UPDATED FROM OLD MRI CLASS)
        if self.mode == 'train':
            return self.X_train.shape[0]
        elif self.mode == 'val':
            return self.X_val.shape[0]
    
    def __getitem__(self, idx):
        # Use self.mode to determine whether train or val data is of interest (UPDATED FROM OLD MRI CLASS)
        if self.mode== 'train':
            sample = {'image': self.X_train[idx], 'label': self.y_train[idx]}
        
        elif self.mode== 'val':
            sample = {'image': self.X_val[idx], 'label': self.y_val[idx]}
        
        return sample
    
    def normalize(self):
        self.images = self.images/255.0

In [None]:
mri_dataset = MRI()
mri_dataset.normalize()
mri_dataset.train_val_split()

In [None]:
train_dataloader = DataLoader(mri_dataset, batch_size=32, shuffle=True)
val_dataloader = DataLoader(mri_dataset, batch_size=32, shuffle=False)

In [None]:
device = torch.device("cpu")
model = CNN().to(device)

In [None]:
eta=0.0001
optimizer = torch.optim.Adam(model.parameters(), lr=eta)

In [None]:
# keep track of epoch losses
epoch_train_loss = []
epoch_val_loss = []

In [None]:
for epoch in range(1,600):
    train_losses = []
    # train for the current epoch
    model.train()
    mri_dataset.mode = 'train'
    for D in train_dataloader:
        # Train the model
        optimizer.zero_grad()
        data = D['image'].to(device)
        label = D['label'].to(device)
        
        y_hat = model(data)
        error = nn.BCELoss()
        loss = torch.sum(error(y_hat.squeeze(), label))
        loss.backward()
        optimizer.step()
        train_losses.append(loss.item())

    epoch_train_loss.append(np.mean(train_losses))

    #validate for the current epoch
    val_losses = []
    model.eval()

    mri_dataset.mode = 'val'

    with torch.no_grad():
        for D in val_dataloader:
            data = D['image'].to(device)
            label = D['label'].to(device)
            y_hat = model(data)
            error = nn.BCELoss()
            loss = torch.sum(error(y_hat.squeeze(), label))
            val_losses.append(loss.item())

        epoch_val_loss.append(np.mean(val_losses))

        if (epoch+1) % 10 == 0:
            print('Train Epoch: {}\tTrain Loss: {:.6f}\tVal Loss: {:.6f}'.format(epoch+1, np.mean(train_losses),np.mean(val_losses)))

In [None]:
plt.figure(figsize=(16,9))
plt.plot(epoch_train_loss, c='b', label='Train loss')
plt.plot(epoch_val_loss, c='r', label = 'Validation loss')
plt.legend()
plt.grid()
plt.xlabel('Epochs', fontsize=20)
plt.ylabel('Loss', fontsize=20)