In [3]:
import numpy as np
import matplotlib.pyplot as plt 

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split

import cv2
from os import listdir
from sklearn.utils import shuffle

from numpy.random import seed
seed(1)
tf.random.set_seed(2)

In [4]:
def scale_and_normalize(arr):
    """
    Perform Positive Global Standardization on input array and return it.
    Arguments:
        arr: 2-dimensional image array containing int or float values
    Returns:
        arr: positive globally standardized arr of float values
    """
    arr = arr.astype('float32')
    mean, stand_dev = arr.mean(), arr.std()
    arr = (arr-mean)/stand_dev
    arr = np.clip(arr, -1, 1)
    arr = (arr+1)/2
    return arr

In [5]:
def load_data(dir_list, image_size):
    """
    Read images, resize and normalize them. 
    Arguments:
        dir_list: list of strings representing file directories.
    Returns:
        X: A numpy array with shape = (#_examples, image_width, image_height, #_channels)
        y: A numpy array with shape = (#_examples, 1)
    """

    # load all images in a directory
    X = []
    y = []
    image_width, image_height = image_size
    
    for directory in dir_list:
        for filename in listdir(directory):
            # load the image
            image = cv2.imread(directory + '/' + filename)
            # crop the brain and ignore the unnecessary rest part of the image
            image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
            # resize image
            image = cv2.resize(image, dsize=(image_width, image_height), interpolation=cv2.INTER_CUBIC)
            # normalize values
            image = scale_and_normalize(image)
            # convert image to numpy array and append it to X
            X.append(image)
            # append a value of 1 to the target array if the image
            # is in the folder named 'yes', otherwise append 0.
            if directory[-3:] == 'yes':
                y.append([1])
            else:
                y.append([0])
                
    X = np.array(X)
    y = np.array(y)
    
    # Shuffle the data
    X, y = shuffle(X, y)
    
    print(f'Number of examples is: {len(X)}')
    print(f'X shape is: {X.shape}')
    print(f'y shape is: {y.shape}')
    
    return X, y

In [6]:
# Specify standard dimensions, load data

IMG_WIDTH, IMG_HEIGHT = (240, 240)
X, y = load_data(['../data/JPG_Brain_Scans/yes', '../data/JPG_Brain_Scans/no'], (IMG_WIDTH, IMG_HEIGHT))

Number of examples is: 253
X shape is: (253, 240, 240)
y shape is: (253, 1)


In [7]:
# Add channel dimension to X to allow for model training

X = X.reshape(-1, 240, 240, 1)

In [8]:
# Split data into train and test sets, retain 20% of data for testing

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = .2, shuffle=True, stratify=y)

In [9]:
# Instantiate ImageDataGenerator to perform rotations/flips on training dataset

datagen = keras.preprocessing.image.ImageDataGenerator(rotation_range=30,
                                                          horizontal_flip=True,
                                                          vertical_flip=True,
                                                          validation_split=.2)

In [10]:
# Instantiate model

model = keras.models.Sequential([
        keras.Input(shape=(240, 240, 1)),
        keras.layers.Conv2D(32, 3, strides=(1, 1), activation='relu', data_format='channels_last', name='conv0'),
        keras.layers.MaxPool2D((2, 2), name='max_pool0'),
        keras.layers.BatchNormalization(name='bn0'),
        keras.layers.Conv2D(64, 3, strides=(1, 1), activation='relu', data_format='channels_last', name='conv1'),
        keras.layers.MaxPool2D((2, 2), name='max_pool1'),
        keras.layers.BatchNormalization(name='bn1'),
        keras.layers.Conv2D(128, 3, strides=(1, 1), activation='relu', data_format='channels_last', name='conv2'),
        keras.layers.MaxPool2D((2, 2), name='max_pool2'),
        keras.layers.Flatten(),
        keras.layers.Dense(1, activation='sigmoid')])

In [11]:
# View summary

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv0 (Conv2D)               (None, 238, 238, 32)      320       
_________________________________________________________________
max_pool0 (MaxPooling2D)     (None, 119, 119, 32)      0         
_________________________________________________________________
bn0 (BatchNormalization)     (None, 119, 119, 32)      128       
_________________________________________________________________
conv1 (Conv2D)               (None, 117, 117, 64)      18496     
_________________________________________________________________
max_pool1 (MaxPooling2D)     (None, 58, 58, 64)        0         
_________________________________________________________________
bn1 (BatchNormalization)     (None, 58, 58, 64)        256       
_________________________________________________________________
conv2 (Conv2D)               (None, 56, 56, 128)       7

In [12]:
# Compile model

opt = keras.optimizers.Adam(learning_rate=0.01)
model.compile(loss = 'binary_crossentropy', optimizer = opt, metrics = ['binary_accuracy'])

In [18]:
neural_net = model.fit(X_train, y_train, batch_size=32, epochs = 100, shuffle=True, validation_data = (X_test, y_test), callbacks=[tensorboard_callback, model_checkpoint_callback])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [13]:
# Save tensorboard callback logs for each training epoch

from datetime import datetime
logdir = "logs/scalars/" + datetime.now().strftime("%Y%m%d-%H%M%S")
model_path = 'models/best_classifier.h5'

tensorboard_callback = keras.callbacks.TensorBoard(log_dir=logdir)

# Save best model according to its validation set binary accuracy

model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=model_path,
    monitor='val_binary_accuracy',
    mode='max',
    save_best_only=True)

# Fit model using ImageDataGenerator on training data, unaltered testing data

neural_net = model.fit(datagen.flow(X_train, y_train, batch_size=32),
                       epochs=100, shuffle=True,
                       validation_data = (X_test, y_test), 
                       callbacks=[tensorboard_callback, model_checkpoint_callback])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [38]:
# Run 'tta_steps' of predictions on test set data which has been rotated/flipped
# by the ImageDataGenerator and append each set of predictions to a list.

from scipy import stats

tta_steps = 10
predictions = []

for i in range(tta_steps):
    preds = (model.predict(datagen.flow(X_test, batch_size=32, shuffle=False)) > 0.5).astype("int32")
    predictions.append(preds)

# Compare the mode of the predictions for each image against the true label, calculate
# accuracy

np.mean(np.equal(y_test, stats.mode(predictions)[0]))

0.7058823529411765