<h5>Importing libraries</h5>

In [1]:
#data handling
import pandas as pd
import numpy as np
import os 
from PIL import Image

# Machine learning (non deep learning)
import sklearn
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder 

# Deep learning
import tensorflow as tf
import keras
from keras import layers
from keras_preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.applications.vgg16 import VGG16, preprocess_input
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization

# plotting
import seaborn as sns
import matplotlib.pyplot as plt

#misc
import time

<h5>Read in images into numpy arrays and record type of tumor with one hot encoder</h5>

In [2]:
img_reshape_size = (32,32)

data = []
result = []

encoder = OneHotEncoder()
encoder.fit([[0], [1]]) 

# path to tumor files
tumor_image_dir = "dataset/split_data/binary/tumor"
tumor_files = os.listdir(tumor_image_dir)

# path to non tumor files
no_tumor_image_dir = "dataset/split_data/binary/no_tumor"
no_tumor_files = os.listdir(no_tumor_image_dir)

for file in tumor_files:
    temp_file_path = "dataset/split_data/binary/tumor/"+file
    img = Image.open(temp_file_path).convert('L')
    img = img.resize(img_reshape_size)
    img = np.array(img)
    data.append(np.array(img))
    result.append(encoder.transform([[1]]).toarray())
        

for file in no_tumor_files:
    temp_file_path = "dataset/split_data/binary/no_tumor/"+file
    img = Image.open(temp_file_path).convert('L')
    img = img.resize(img_reshape_size)
    img = np.array(img)
    data.append(np.array(img))
    result.append(encoder.transform([[0]]).toarray())

<h5>Reshape data and split between test and train data</h5>

In [3]:
data = np.array(data)
print(data.shape)

result = np.array(result)
result = result.reshape(3000,2)

x_train,x_test,y_train,y_test = train_test_split(data, result, test_size=0.2, shuffle=True, random_state=0)

(3000, 32, 32)


<h5>Build the CNN model</h5>

In [4]:
callback = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=3)

model = Sequential()

model.add(Conv2D(32, kernel_size=(2, 2), input_shape=(32, 32, 1), padding = 'Same'))
model.add(Conv2D(32, kernel_size=(2, 2),  activation ='relu', padding = 'Same'))


model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, kernel_size = (2,2), activation ='relu', padding = 'Same'))
model.add(Conv2D(64, kernel_size = (2,2), activation ='relu', padding = 'Same'))

model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2)))
model.add(Dropout(0.25))

model.add(Flatten())

model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(2, activation='softmax'))

model.compile(loss = "categorical_crossentropy", optimizer='Adamax',  metrics = ['accuracy'])
#print(model.summary())

<h5>Train the model and record time to train</h5>

In [5]:
start_time = time.time()
history = model.fit(x_train, y_train, epochs = 30, batch_size = 40, verbose = 1,validation_data = (x_test, y_test), callbacks=[callback])
print("--- %s seconds ---" % (time.time() - start_time))

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
--- 54.934688329696655 seconds ---


<h6>Some basic results recording, just for keeping information to hand</h6>

30.509965896606445 seconds --- 30 epochs , 0.9683 acc , 16x16 Greyscale image <br>
86.94418954849243 seconds --- 30 epochs , 0.9783 acc , 32x32 Greyscale image <br>
169.6840739250183 seconds --- 60 epochs , 0.9667 acc , 32x32 Greyscale image <br>
335.7380225658417 seconds --- 30 epochs , 0.9700 acc , 64x64 Greyscale image <br>


In [6]:
history.history

{'loss': [0.9647661447525024,
  0.33124059438705444,
  0.2520950138568878,
  0.22493906319141388,
  0.20380254089832306,
  0.15564385056495667,
  0.16696879267692566,
  0.1446661502122879,
  0.1372992843389511,
  0.1197928637266159,
  0.11859504878520966,
  0.10563132911920547,
  0.09437718242406845,
  0.09706917405128479,
  0.07943117618560791,
  0.0877363383769989,
  0.07492616772651672],
 'accuracy': [0.8395833373069763,
  0.8837500214576721,
  0.9012500047683716,
  0.9104166626930237,
  0.9200000166893005,
  0.9329166412353516,
  0.9320833086967468,
  0.9395833611488342,
  0.9441666603088379,
  0.9495833516120911,
  0.9524999856948853,
  0.9579166769981384,
  0.9549999833106995,
  0.9579166769981384,
  0.9679166674613953,
  0.9637500047683716,
  0.9725000262260437],
 'val_loss': [1.4871598482131958,
  0.29057687520980835,
  0.22846278548240662,
  0.18461869657039642,
  0.149740532040596,
  0.133419930934906,
  0.11488857120275497,
  0.10527548938989639,
  0.11940398812294006,
  0.0

<h5>Read in the validation data in the same way we read in the training and testing data</h5>

In [7]:
img_reshape_size = (32,32)

validation_data = []
validation_result = []

# path to non tumor files
no_tumor_image_dir = "test_dataset/test/split_data/binary/no_tumor"
no_tumor_files = os.listdir(no_tumor_image_dir)

# path to tumor files
tumor_image_dir = "test_dataset/test/split_data/binary/tumor"
tumor_files = os.listdir(tumor_image_dir)

test_encoder = OneHotEncoder()
test_encoder.fit([[0], [1]]) 

for file in no_tumor_files:
    temp_file_path = "test_dataset/test/split_data/binary/no_tumor/"+file
    #print("path is " +temp_file_path)
    img = Image.open(temp_file_path).convert('L') # not an RGB image so import as greyscale
    img = img.resize(img_reshape_size)
    img = np.array(img)
    validation_data.append(np.array(img))
    validation_result.append(test_encoder.transform([[0]]).toarray())

for file in tumor_files:
    temp_file_path = "test_dataset/test/split_data/binary/tumor/"+file
    img = Image.open(temp_file_path).convert('L')
    img = img.resize(img_reshape_size)
    img = np.array(img)
    validation_data.append(np.array(img))
    validation_result.append(test_encoder.transform([[1]]).toarray())   

<h5>reshape validation data</h5>

In [8]:
validation_data = np.array(validation_data)
validation_result = np.array(validation_result)
validation_result = validation_result.reshape(200,2)

<h5>Get and print scores</h5>

In [9]:
scores = model.evaluate(validation_data, validation_result, verbose=10)

In [10]:
print("test loss: " + str(scores[0]))
print("test accuracy: " + str(scores[1]))

test loss: 0.07661273330450058
test accuracy: 0.9649999737739563
