In [1]:
import numpy as np
import matplotlib.pyplot as plt 
from IPython import display

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split

import cv2
from os import listdir
from sklearn.utils import shuffle

from numpy.random import seed
seed(1)
tf.random.set_seed(2)

In [2]:
def scale_and_normalize(arr):
    arr = arr.astype('float32')
    mean, stand_dev = arr.mean(), arr.std()
    arr = (arr-mean)/stand_dev
    arr = np.clip(arr, -1, 1)
    arr = (arr+1)/2
    return arr

In [3]:
def load_data(dir_list, image_size):
    """
    Read images, resize and normalize them. 
    Arguments:
        dir_list: list of strings representing file directories.
    Returns:
        X: A numpy array with shape = (#_examples, image_width, image_height, #_channels)
        y: A numpy array with shape = (#_examples, 1)
    """

    # load all images in a directory
    X = []
    y = []
    image_width, image_height = image_size
    
    for directory in dir_list:
        for filename in listdir(directory):
            # load the image
            image = cv2.imread(directory + '/' + filename)
            # crop the brain and ignore the unnecessary rest part of the image
            image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
            # resize image
            image = cv2.resize(image, dsize=(image_width, image_height), interpolation=cv2.INTER_CUBIC)
            # normalize values
            image = scale_and_normalize(image)
            # convert image to numpy array and append it to X
            X.append(image)
            # append a value of 1 to the target array if the image
            # is in the folder named 'yes', otherwise append 0.
            if directory[-3:] == 'yes':
                y.append([1])
            else:
                y.append([0])
                
    X = np.array(X)
    y = np.array(y)
    
    # Shuffle the data
    X, y = shuffle(X, y)
    
    print(f'Number of examples is: {len(X)}')
    print(f'X shape is: {X.shape}')
    print(f'y shape is: {y.shape}')
    
    return X, y

In [4]:
IMG_WIDTH, IMG_HEIGHT = (240, 240)
X, y = load_data(['../data/JPG_Brain_Scans/yes', '../data/JPG_Brain_Scans/no'], (IMG_WIDTH, IMG_HEIGHT))

Number of examples is: 253
X shape is: (253, 240, 240)
y shape is: (253, 1)


In [5]:
X = X.reshape(-1, 240, 240, 1)

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = .2, shuffle=True, stratify=y)

In [7]:
## ended up using this ImageDataGenerator to generate random rotations/flips

datagen = keras.preprocessing.image.ImageDataGenerator(rotation_range=30,
                                                          horizontal_flip=True,
                                                          vertical_flip=True,
                                                          validation_split=.2)

In [8]:
# Apply the generator to the train and test sets

train_generator = datagen.flow(X_train, y_train)

validation_generator = datagen.flow(X_test, y_test)

In [30]:
model = keras.models.Sequential([
        keras.Input(shape=(240, 240, 1)),
        keras.layers.Conv2D(32, 3, strides=(1, 1), activation='relu', data_format='channels_last', name='conv0'),
        keras.layers.MaxPool2D((2, 2), name='max_pool0'),
        keras.layers.BatchNormalization(name='bn0'),
        #   Added additional convolutional layers after analyzing performance
        keras.layers.Conv2D(64, 3, strides=(1, 1), activation='relu', data_format='channels_last', name='conv1'),
        keras.layers.MaxPool2D((2, 2), name='max_pool1'),
        keras.layers.BatchNormalization(name='bn1'),
        keras.layers.Conv2D(128, 3, strides=(1, 1), activation='relu', data_format='channels_last', name='conv2'),
        keras.layers.MaxPool2D((2, 2), name='max_pool2'),
#         keras.layers.BatchNormalization(name='bn2'),
        keras.layers.Flatten(),
        keras.layers.Dense(1, activation='sigmoid')])

In [31]:
# compile model
opt = keras.optimizers.Adam(learning_rate=0.01)
model.compile(loss = 'binary_crossentropy', optimizer = opt, metrics = ['binary_accuracy'])

In [32]:
from datetime import datetime
logdir = "logs/scalars/" + datetime.now().strftime("%Y%m%d-%H%M%S")
model_path = 'models/best_classifier.h5'

tensorboard_callback = keras.callbacks.TensorBoard(log_dir=logdir)

# Save best model according to its validation set binary accuracy
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=model_path,
    monitor='val_binary_accuracy',
    mode='max',
    save_best_only=True)



neural_net = model.fit(X_train, y_train, batch_size=32, epochs = 25, shuffle=True, validation_data = (X_test, y_test), callbacks=[tensorboard_callback, model_checkpoint_callback])

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


In [33]:
# Test model with rotations/flips, validation accuracy seems significantly worse

neural_net = model.fit(datagen.flow(X_train, y_train, batch_size=32), epochs=25, shuffle=True, validation_data = (X_test, y_test), callbacks=[tensorboard_callback, model_checkpoint_callback])

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


In [34]:
from scipy import stats

tta_steps = 10
predictions = []

for i in range(tta_steps):
    preds = (model.predict(datagen.flow(X_test, batch_size=32, shuffle=False)) > 0.5).astype("int32")
    predictions.append(preds)

np.mean(np.equal(y_test, stats.mode(predictions)[0]))

0.803921568627451

In [14]:
sum(y_test)/len(y_test)

array([0.60784314])