In [1]:
# Import required libraries
import numpy as np
import pandas as pd
import pickle as pkl
import os
import cv2
import matplotlib.pyplot as plt
import tensorflow as tf
import keras
from sklearn.decomposition import PCA

#tqdm is for progress bar functionality in code, must be installed for code to function
from tqdm import tqdm

#Importing libraries used for SVM classification and model assessment
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import roc_curve, roc_auc_score

#Libraries for CNN model
from keras import metrics
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout, Flatten, Conv2D, MaxPooling2D
from keras.layers.normalization import BatchNormalization
from keras.models import model_from_json

#Importing functions notebook containing functions created to streamline code
from ipynb.fs.full.functions import load_dataset, dataset_PCA, Tuned_SVM_train, SVM_predictions, load_dataset_CNN, image_array_resize

Using TensorFlow backend.


# 1. Loading Trained Binary Models


#### 1.1 Untuned SVM

In [3]:
#Code to load of trained tuned SVM model
untuned_SVM = pkl.load(open('.\\Models\\Binary-Classification\\Untuned_SVM_model.sav', 'rb'))
print("Loaded Untuned SVM model from disk")

Loaded Untuned SVM model from disk


#### 1.2 Tuned SVM

In [4]:
#Code to test loading of trained tuned SVM model
Tuned_SVM = pkl.load(open('.\\Models\\Binary-Classification\\Tuned_SVM_model.sav', 'rb'))
print("Loaded Tuned SVM model from disk")

#Gets the best parameter values from loaded model
best_param = Tuned_SVM.best_params_
#Prints out the parameter values
#Output should match earlier .best_params_ output
print("With Parameters: ", best_param)

Loaded Tuned SVM model from disk
With Parameters:  {'C': 100, 'gamma': 0.01, 'kernel': 'rbf'}


#### 1.3 CNN

In [5]:
# load Binary CNN json and create model
json_file = open(".\\Models\\Binary-Classification\\CNN\\Binary_CNN.json", 'r')
binary_model_json = json_file.read()
json_file.close()
binary_CNN = model_from_json(binary_model_json)
# load weights into new model
binary_CNN.load_weights(".\\Models\\Binary-Classification\\CNN\\Binary_CNN_Model_Weights")
print("Loaded CNN model from disk")

Loaded CNN model from disk


# 2. Loading extra test dataset and carrying out necessary preprocessing
#### 2.1 SVM

In [25]:
#We set the path to the images in the dataset folder
#Image will be resized to 28 * 28
SVM_Images = image_array_resize('dataset\Extra_test_dataset\image',20)
print(SVM_Images.shape)


100%|██████████| 200/200 [00:00<00:00, 503.07it/s]

Successfully extracted original Images from dataset!
(200, 20, 20)





In [26]:
#Reshapes the 3D array into 2D 
#This is because PCA only accepts 2D array inputs
SVM_flattened = SVM_Images.reshape(200, (20 * 20))
print(SVM_flattened.shape)
#resultant array is 3000 * 784 with all 784 pixels arranged in a single row instead of 28 * 28
SVM_flattened_scaled = SVM_flattened/255

(200, 400)


#### 2.2 CNN

In [10]:
#Do the same for our CNN images resized to 50
CNN_Images = image_array_resize('dataset\Extra_test_dataset\image',50)

#Adding 4th channel to array (for convnet fitting)
#The last channel is indicating whether it is a RGB channel (3) or grayscale (1) image
CNN_Images_4D = CNN_Images.reshape(200,50,50,1)

#Getting our Y and X inputs for the model and scaling the X inputs
#Carrying out scaling of the pixel data per element so that it is between 0 and 1
xTest_CNN = CNN_Images_4D/255

 84%|████████▍ | 168/200 [00:00<00:00, 837.56it/s]

ERROR! Session/line number was not unique in database. History logging moved to new session 333


100%|██████████| 200/200 [00:00<00:00, 855.47it/s]

Successfully extracted original Images from dataset!





#### 2.3 Loading Labels 

In [11]:
#Loading the CSV Label file
extra_test_labels = pd.read_csv('./dataset/Extra_test_dataset/label.csv')

#Taking just the label portion for editing into our Target Y array
Y = extra_test_labels[['label']]
Y_np = Y.to_numpy()


#For loop through the number of elements in the label dataset, in this case 3000
#Loop will check if the array element is == to no_tumor in a string compare condition.
#If it returns true, that means the element is labelling no_tumor and therefore we set the corresponding element value of the Y_binary array to 0
#Therefore if the output of the compare returns false, regardless of the type of tumor we set the element value to = 1
#Meaning the target label is showing a tumor in the mri image.

#Initialises empty array for Y data for binary task
yTest = np.zeros(len(Y_np))

for x in range(len(Y_np)):

    if Y_np[x] == 'no_tumor':
        yTest[x] = 0
    else:
        yTest[x] = 1

print("yTest Label array setup!")

yTest Label array setup!


# 3. Predictions on extra test Dataset
##### This section shows the additional predictions done on an out of sample dataset released one week before the deadline for optional testing of models and prints out the classification report, confusion matrix and other metrics for analysis

#### 3.1 SVM

In [27]:
print(SVM_flattened_scaled.shape)

(200, 400)


In [29]:
#Calls SVM_predictions function from "functions.ipynb"
#This time we are doing predictions with the untuned SVM model
untuned_SVM_pred = SVM_predictions(untuned_SVM, SVM_flattened_scaled, yTest)

#This time we are doing predictions with the tuned SVM model
Tuned_SVM_pred = SVM_predictions(Tuned_SVM, SVM_flattened_scaled, yTest)

#It prints out the classification report of the predictions as well as the confusion matrix
#Returns the predictions

The Results for SVM are:
              precision    recall  f1-score   support

         0.0       0.00      0.00      0.00        37
         1.0       0.81      1.00      0.90       163

    accuracy                           0.81       200
   macro avg       0.41      0.50      0.45       200
weighted avg       0.66      0.81      0.73       200

The confusion matrix is:
[[  0  37]
 [  0 163]]
The Results for SVM are:
              precision    recall  f1-score   support

         0.0       0.20      0.92      0.33        37
         1.0       0.91      0.18      0.30       163

    accuracy                           0.32       200
   macro avg       0.55      0.55      0.31       200
weighted avg       0.78      0.32      0.30       200

The confusion matrix is:
[[ 34   3]
 [134  29]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


#### 3.2 CNN

In [17]:
#Making Predictions with the model
Binary_CNN_pred = binary_CNN.predict_classes(xTest_CNN, verbose = 1)

#Printing the classification report and metrics
print("The Results for Binary CNN are:")
print(classification_report(yTest, Binary_CNN_pred))

#Printing the confusion matrix for Binary CNN
print("The confusion matrix is:")
print(confusion_matrix(yTest, Binary_CNN_pred))

#To obtain TNR, TPR and FPR metrics for Binary CNN in discrete form
FPR_Binary_CNN, TPR_Binary_CNN, Binary_CNN_thres = roc_curve(yTest, Binary_CNN_pred)

The Results for Binary CNN are:
              precision    recall  f1-score   support

         0.0       0.88      0.76      0.81        37
         1.0       0.95      0.98      0.96       163

    accuracy                           0.94       200
   macro avg       0.91      0.87      0.89       200
weighted avg       0.93      0.94      0.93       200

The confusion matrix is:
[[ 28   9]
 [  4 159]]
