In [None]:
import os

In [None]:
from google.colab import drive
drive.mount('/content/drive/')

In [None]:
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import matplotlib.pyplot as plt
import os
import math
import shutil
import glob
import cv2
import imutils
import seaborn as sns
from sklearn.utils import shuffle
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import Conv2D, MaxPool2D, Dropout, Flatten, Dense, BatchNormalization, GlobalAveragePooling2D
from keras.models import Model, Sequential
import keras
from PIL import Image

In [None]:
# prompt: set seed to 42 for keras and all stuff in code
import tensorflow as tf
import random as rn

seed = 42

os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
rn.seed(seed)
tf.random.set_seed(seed)

In [None]:
%cd /content/drive/My Drive/

In [None]:
#Count images in the two folders: yes (Tumor) and no (Healthey)
root =  'Dataset/BrainTumor_DS/'
dict_img = {}
for dir in os.listdir(root):
  dict_img[dir] = len(os.listdir(os.path.join(root, dir)))
dict_img


In [None]:
# Define paths to the 'yes' and 'no' folders containing MRI images
yes_path = root + 'yes'
no_path = root + 'no'

# Plot some MRI images
plt.figure(figsize=(10, 10))

# Plot the first 'yes' image
img_path = os.path.join(yes_path, os.listdir(yes_path)[0])
img = Image.open(img_path)
plt.subplot(2, 2, 1)
plt.imshow(img)
plt.title('yes')

# Plot the second 'yes' image
img_path = os.path.join(yes_path, os.listdir(yes_path)[1])
img = Image.open(img_path)
plt.subplot(2, 2, 2)
plt.imshow(img)
plt.title('yes')

# Plot the first 'no' image
img_path = os.path.join(no_path, os.listdir(no_path)[0])
img = Image.open(img_path)
plt.subplot(2, 2, 3)
plt.imshow(img)
plt.title('no')

# Plot the second 'no' image
img_path = os.path.join(no_path, os.listdir(no_path)[1])
img = Image.open(img_path)
plt.subplot(2, 2, 4)
plt.imshow(img)
plt.title('no')

plt.show()


In [None]:
#Define a function for image augmentation
def augment_data(file_dir, n_generated_samples, save_to_dir):
    data_gen = ImageDataGenerator(rotation_range=10,
                                  width_shift_range=0.1,
                                  height_shift_range=0.1,
                                  shear_range=0.1,
                                  brightness_range=(0.3, 1.0),
                                  horizontal_flip=True,
                                  vertical_flip=True,
                                  fill_mode='nearest',
                                  rescale= 1/255
                                 )

    for filename in os.listdir(file_dir):
        image = cv2.imread(file_dir + '/' + filename)
        # reshape the image
        image = image.reshape((1,)+image.shape)
        save_prefix = 'aug_' + filename[:-4]
        i=0
        for batch in data_gen.flow(x=image, batch_size=1, save_to_dir=save_to_dir,save_prefix=save_prefix, save_format='jpg'):
                i += 1
                if i > n_generated_samples:
                    break

In [None]:
#Create augmented images folders
#L = ['aug_train', 'aug_test', 'aug_val']
name = 'Dataset/aug_data'
if not os.path.exists("./"+name):
  os.mkdir("./"+name)
  for dir in os.listdir(root):
    os.makedirs("./"+name+"/"+dir)
else:
  print(f"{name} Already exists")

In [None]:
#Augment data for the examples with the label 'yes' in the training set
augment_data(file_dir='Dataset/BrainTumor_DS/yes',n_generated_samples=8, save_to_dir='Dataset/aug_data/yes')
#Augment data for the examples with the label 'no' in the training set
augment_data(file_dir='Dataset/BrainTumor_DS/no', n_generated_samples=12, save_to_dir='Dataset/aug_data/no')

In [None]:
#Count images in the two folders: yes (Tumor) and no (Healthy) in the folder of augmented images
root =  'Dataset/aug_data'
dict_img = {}
for dir in os.listdir(root):
  dict_img[dir] = len(os.listdir(os.path.join(root, dir)))
dict_img

In [None]:
#Define a function that creates new folders for the train, test and val sets and append random pictures to them based on the split percentage
def create_folders(name, perc):
  if not os.path.exists("./"+name):
    os.mkdir("./"+name)
    for dir in os.listdir(root):
      os.makedirs("./"+name+"/"+dir)
      for img in np.random.choice(a=os.listdir(os.path.join(root, dir)), size=(math.floor(perc*dict_img[dir])), replace=False):
        Src = os.path.join(root, dir, img)
        Dest = os.path.join("./"+name, dir)
        shutil.copy(Src, Dest)
        os.remove(Src)
  else:
    print(f"{name} Already exists")

In [None]:
#Create the training set
create_folders('Dataset/train', 0.7)

In [None]:
#Create the test set
create_folders('Dataset/test', 0.15)

In [None]:
#Create the validation set
create_folders('Dataset/val', 0.15)

In [None]:
#Define a function that counts images in the folders: yes (Tumor) and no (Healthey)
def count_img(folder):
  dict_img = {}
  for dir in os.listdir(folder):
    dict_img[dir] = len(os.listdir(os.path.join(folder, dir)))
  return dict_img

In [None]:
#Count images in the training set
count_img('Dataset/train')

In [None]:
#Count images in the test set
count_img('Dataset/test')


In [None]:
#Count images in the validation set
count_img('Dataset/val')

In [None]:
#Define a function that crop tha brain contour
def crop_brain_contour(image, plot=False):

    #Convert the image to grayscale, and blur it slightly
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    gray = cv2.GaussianBlur(gray, (5, 5), 0)

    thresh = cv2.threshold(gray, 45, 255, cv2.THRESH_BINARY)[1]
    thresh = cv2.erode(thresh, None, iterations=2)
    thresh = cv2.dilate(thresh, None, iterations=2)

    #Find contours in thresholded image, then grab the largest one
    cnts = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = imutils.grab_contours(cnts)
    c = max(cnts, key=cv2.contourArea)

    #Extreme points
    extLeft = tuple(c[c[:, :, 0].argmin()][0])
    extRight = tuple(c[c[:, :, 0].argmax()][0])
    extTop = tuple(c[c[:, :, 1].argmin()][0])
    extBot = tuple(c[c[:, :, 1].argmax()][0])

    #Crop new image out of the original image using the four extreme points (left, right, top, bottom)
    new_image = image[extTop[1]:extBot[1], extLeft[0]:extRight[0]]

    if plot:
        plt.figure()
        plt.subplot(1, 2, 1)
        plt.imshow(image)
        plt.tick_params(axis='both', which='both', top=False, bottom=False, left=False, right=False,labelbottom=False, labeltop=False, labelleft=False, labelright=False)
        plt.title('Original Image')
        plt.subplot(1, 2, 2)
        plt.imshow(new_image)
        plt.tick_params(axis='both', which='both',top=False, bottom=False, left=False, right=False,labelbottom=False, labeltop=False, labelleft=False, labelright=False)
        plt.title('Cropped Image')
        plt.show()

    return new_image

In [None]:
#Example
ex_img = cv2.imread('Dataset/BrainTumor_DS/no/1 no.jpeg')
img = crop_brain_contour(ex_img, plot=True)

In [None]:
#Define a function that load data
def load_data(dir_list, image_size):

    # load all images in a directory
    X = []
    y = []
    image_width, image_height = image_size

    for directory in dir_list:
        for filename in os.listdir(directory):
            image = cv2.imread(directory+'/'+filename)
            image = crop_brain_contour(image, plot=False)
            image = cv2.resize(image, dsize=(image_width, image_height), interpolation=cv2.INTER_CUBIC)
            # normalize values
            image = image / 255.
            # convert image to numpy array and append it to X
            X.append(image)
            # append a value of 1 to the target array if the image
            # is in the folder named 'yes', otherwise append 0.
            if directory[-3:] == 'yes':
                y.append([1])
            else:
                y.append([0])

    X = np.array(X)
    y = np.array(y)

    # Shuffle the data
    X, y = shuffle(X, y)

    print(f'Number of examples is: {len(X)}')
    print(f'X shape is: {X.shape}')
    print(f'y shape is: {y.shape}')

    return X, y

In [None]:
#Load trainig data
IMG_WIDTH, IMG_HEIGHT = (240, 240)
X_train, y_train = load_data(['Dataset/train/yes', 'Dataset/train/no'], (IMG_WIDTH, IMG_HEIGHT))

In [None]:
#Define a function that plot images
def plot_sample_images(X, y, n=40):
    for label in [0,1]:
        # grab the first n images with the corresponding y values equal to label
        images = X[np.argwhere(y == label)]
        n_images = images[:n]

        columns_n = 10
        rows_n = int(n/ columns_n)

        plt.figure(figsize=(10,4))

        i = 1 # current plot
        for image in n_images:
            plt.subplot(rows_n, columns_n, i)
            plt.imshow(image[0])

            # remove ticks
            plt.tick_params(axis='both', which='both',
                            top=False, bottom=False, left=False, right=False,
                           labelbottom=False, labeltop=False, labelleft=False, labelright=False)

            i += 1

        label_to_str = lambda label: "Yes" if label == 1 else "No"
        plt.suptitle(f"Brain Tumor: {label_to_str(label)}")
        plt.show()


In [None]:
#Plot samples from the training set
plot_sample_images(X_train, y_train)

In [None]:
#Load test data
IMG_WIDTH, IMG_HEIGHT = (240, 240)
X_test, y_test = load_data(['Dataset/test/yes', 'Dataset/test/no'], (IMG_WIDTH, IMG_HEIGHT))

In [None]:
#Plot samples from the testing set
plot_sample_images(X_test, y_test)

In [None]:
#Load validation data
IMG_WIDTH, IMG_HEIGHT = (240, 240)
X_val, y_val = load_data(['Dataset/val/yes', 'Dataset/val/no'], (IMG_WIDTH, IMG_HEIGHT))

In [None]:
#Plot samples from the validation set
plot_sample_images(X_val, y_val)

In [None]:
#Build our model
model = Sequential()

model.add(Conv2D(filters = 16, kernel_size = (3,3), activation = 'relu', input_shape = (240, 240, 3)))

model.add(Conv2D(filters = 32, kernel_size = (3,3), activation = 'relu'))
model.add(MaxPool2D(pool_size = (2,2)))

model.add(Conv2D(filters = 64, kernel_size = (3,3), activation = 'relu'))
model.add(MaxPool2D(pool_size = (2,2)))

model.add(Conv2D(filters = 128, kernel_size = (3,3), activation = 'relu'))
model.add(MaxPool2D(pool_size = (2,2)))

model.add(Dropout(rate = 0.25))

model.add(Flatten())
model.add(Dense(units = 64, activation = 'relu'))
model.add(Dropout(rate = 0.25))
model.add(Dense(units = 1, activation = 'sigmoid'))
#Compile our model
model.compile(optimizer = 'adam', loss = keras.losses.binary_crossentropy, metrics = ['accuracy'])
model.summary()

In [None]:
#Early stopping and model checkpoint
from keras.callbacks import ModelCheckpoint

mc = ModelCheckpoint(monitor ='val_accuracy', filepath = 'Models/bestmodel.h5', verbose = 1, save_best_only = True, mode = 'auto')

In [None]:
#Train our model
hist = model.fit(x = X_train, y = y_train, batch_size = 32, epochs = 30, validation_data = (X_val, y_val), callbacks=mc, verbose = 1)

In [None]:
h = hist.history
plt.plot(h['accuracy'], label = 'accuracy')
plt.plot(h['val_accuracy'], label = 'val-accuracy')
plt.title('Accuracy vs Val Accuracy')
plt.legend()
plt.show()

In [None]:
#Plot the graphical interpretation
h = hist.history
plt.plot(h['loss'], label = 'loss')
plt.plot(h['val_loss'], label = 'val-loss')
plt.title('Loss vs Val Loss')
plt.legend()
plt.show()

In [None]:
#Test our model on the test set
from keras.models import load_model
model = load_model('Models/bestmodel.h5')
acc = model.evaluate(X_test, y_test)[1]
print(f'The accuracy of our model is {acc}')