In [None]:
## Initialization & Preprocessing

In [None]:
## Packages used

import numpy as np
import tensorflow as tf
from tensorflow import keras
import pydicom
import matplotlib.pyplot as plt
%matplotlib inline

import pandas as pd
from sklearn.metrics import confusion_matrix
from keras import backend as K

from keras.applications.inception_v3 import InceptionV3
from keras.preprocessing.image import ImageDataGenerator

import skimage.transform as resize

from keras import regularizers
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten, MaxPool2D, Dropout, BatchNormalization

import gc

In [None]:
rootdir = ('C:/Users/Job/Anaconda3/envs/TensorFlow-GPU/Pattern_Recognition/')

In [None]:
## Data is loaded in and immediatly split into train/test sets, the labels are split aswell.
## Loading in the mass data

combined = np.load(rootdir + 'train_x.npy')
combined_labels = np.load(rootdir + 'train_y.npy')

combinedtest = np.load(rootdir + 'test_x.npy')
combinedtest_labels = np.load(rootdir + 'test_y.npy')


## Loading in the calc data
combinedcalc = np.load(rootdir + 'calctrain_x.npy')
combinedcalc_labels = np.load(rootdir + 'calctrain_y.npy')

combinedtestcalc = np.load(rootdir + 'calctest_x.npy')
combinedtestcalc_labels = np.load(rootdir + 'calctest_y.npy')

In [None]:
## Both datasets are combined to form a larger singular dataset.

combinedtest = np.concatenate((combinedtest, combinedtestcalc))
combinedtest_labels = np.concatenate((combinedtest_labels, combinedtestcalc_labels))

combined = np.concatenate((combined, combinedcalc))
combined_labels = np.concatenate((combined_labels, combinedcalc_labels))

In [None]:
# Normalizing the input to [-2,0]

combined = np.subtract(combined, 1.0) 
combined = np.multiply(combined, 2.0) 
combinedtest = np.subtract(combinedtest, 1.0) 
combinedtest = np.multiply(combinedtest, 2.0) 

In [None]:
## Randomizing the input, the respective order is maintained.

randomize = np.arange(len(combined))
np.random.shuffle(randomize)
combined = combined[randomize]
combined_labels = combined_labels[randomize]

In [None]:
###############################################
###############################################
###############################################
###############################################
###############################################
###############################################

In [None]:
## CNN transfer learning


In [None]:
## Ensuring that starting model is empty
model = None
tf.keras.backend.clear_session()
gc.collect()

## Defining the model as a sequential model

model = Sequential()

# Load the inceptionV3 model
pretrained = InceptionV3(include_top=False, weights = 'imagenet', input_shape=(299,299,3))

## Freezing blocks of layers, explained further in report
for layer in pretrained.layers[:279]:
    layer.trainable = False
for layer in pretrained.layers[279:]:
    layer.trainable = True

model.add(pretrained)

# Summarize the model
model.summary()

In [None]:
## Adding our own layers

model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(2, activation='softmax', kernel_regularizer=regularizers.l2(0.1)))
model.summary()

In [None]:
## Compiling the model with the predetermined learning rate

model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.0001),
              loss=keras.losses.SparseCategoricalCrossentropy(),
              metrics=['sparse_categorical_accuracy'])

In [None]:
## Defining a callback that checks whetether the validation accuracy has improved or not. Saving only max results.

checkpoint = keras.callbacks.ModelCheckpoint(rootdir + 'maxaccuracymodel.h5', monitor='val_sparse_categorical_accuracy', verbose=1, save_best_only=True, mode='max')

## Defining a multiplier for the data augmentation, the batch size and the amount of images to be generated
Mlt = 1 
BS = 64
steps_epoch = round(len(combined[0:2289])/BS)

## Online imageaugmentor
datagen = ImageDataGenerator(
                rotation_range=20*Mlt,        
                width_shift_range=0.1*Mlt,
                height_shift_range=0.1*Mlt,
                shear_range=0.35*Mlt,        
                fill_mode='wrap', 
                horizontal_flip=True,
                vertical_flip=True)
                

## Iterator that augments images on the fly, only on training data 
iterator_train = datagen.flow(combined[0:2289], combined_labels[0:2289], batch_size=BS)

## Fitting the model, train/val split is: (0:2289 and 2289:2861)
history = model.fit_generator(iterator_train, steps_per_epoch=steps_epoch, epochs = 3, callbacks=[checkpoint], 
                    validation_data=(combined[2289:2861],combined_labels[2289:2861]))

## Note, a for loop has been used to train models on multiple combinations of learning and dropout rates. 
## This has been removed to make the code more readable.

In [None]:
###############################################
###############################################
###############################################
###############################################
###############################################
###############################################

In [None]:
## Visualization


In [None]:
## Loading respective saved weights, or skip this part and just use the recently trained weights

model.load_weights('finalmodelaccl1l2.h5')

In [None]:
## Predicting model classes

Predictions = model.predict_classes(combinedtest)
Actual = combinedtest_labels

## Showing the confusion matrix
y_actu = pd.Series(Actual, name='Actual')
y_pred = pd.Series(Predictions, name='Predicted')
df_confusion = pd.crosstab(y_actu, y_pred)

df_confusion

In [None]:
## Plotting the accuracy of the train and validation set

plt.figure(figsize=(10,7))
plt.plot(history.history['sparse_categorical_accuracy'], label='accuracy')
plt.plot(history.history['val_sparse_categorical_accuracy'], label = 'val_accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.ylim([0.5, 1])
plt.legend(loc='lower right')
plt.title('Accuracy with data augmentation')

## Saving the figure
#plt.savefig(rootdir + 'output/acc248.png', dpi=100)


In [None]:
## Plotting the loss

plt.figure(figsize=(10,7))
plt.plot(history.history['loss'], label='loss')
plt.plot(history.history['val_loss'], label = 'val_loss')
plt.xlabel('Epoch')
plt.ylabel('loss')
plt.ylim([0.0, 1.6])
plt.title('Loss with data augmentation')
plt.legend(loc='lower right')

#plt.savefig(rootdir + 'output/1acc248.png', dpi=100)


In [None]:
## Plotting distribution of the data labels

plt.figure(figsize=(12,6))
plt.hist(combined_labels, bins =[-0.25,0.25,0.75,1.25], label='vertical')
plt.style.use('ggplot')
plt.xlabel('Label')
plt.ylabel('Frequency')
plt.title('Label Distribution of train/validation set')
plt.xlim(-0.5, 1.5)
plt.ylim(0, 2000)

plt.xticks(np.arange(min(combined_labels), max(combined_labels)+1, 1.0))


#plt.savefig(rootdir + 'output/LabelDistTrainVal.png', dpi=100)


In [None]:
## Plotting distribution of the data pixel values

distreshape = combined.reshape(combined.size)

plt.figure(figsize=(12,6))
plt.hist(distreshape, bins=256, range=(-2, 0), color='g')
plt.xlabel('Normalised pixel value')
plt.ylabel('Frequency')
plt.title('Distribution test set')
plt.xlim(-2.1,0.1)
plt.ylim(0, 8500000)

#plt.savefig(rootdir + 'output/Distribution test.png', dpi=100)
