In [1]:
import numpy as np

# TensorFlow Deep learning framework
import tensorflow as tf
# Keras framework and other packages to use tensorflow
import keras
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

from sklearn.metrics import confusion_matrix, accuracy_score # Evaluation matrices

import h5py # manipulate HDF5 dataset

Using TensorFlow backend.


In [2]:
# Limiting the GPU resource for tensorflow in Keras
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
session = tf.Session(config=config)

In [3]:
# Load HDF5 train dataset
h5train = h5py.File('traindataset.h5', 'r')
X_train_images = h5train['X']
Y_train_labels = h5train['Y']

# Convert it to numpy array
X_train_images = np.array(X_train_images.value)
Y_train_labels = Y_train_labels.value.T[0].astype(int)

# Load HDF5 validation dataset
h5val = h5py.File('valdataset.h5', 'r')
X_val_images = h5val['X']
Y_val_labels = h5val['Y']

# Convert it to numpy array
X_val_images = np.array(X_val_images.value)
Y_val_labels = Y_val_labels.value.T[0].astype(int)

In [4]:
# Design the neural net
# Swetha Subramanian neural net, however I've ommited Image processing and Image Augmentation that Subramanian did in tflearn
model = Sequential()
model.add(Conv2D(50, 3, activation='relu', kernel_initializer="uniform", input_shape=(50, 50, 3)))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, 3, activation='relu'))
model.add(Conv2D(64, 3, activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer = 'adam', loss='binary_crossentropy', metrics=['accuracy'])

In [5]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 48, 48, 50)        1400      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 24, 24, 50)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 22, 22, 64)        28864     
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 20, 20, 64)        36928     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 10, 10, 64)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 6400)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 512)               3277312   
__________

In [6]:
# model.fit(X_train_images, Y_train_labels, batch_size=96, epochs=100, validation_data=(X_val_images, Y_val_labels),shuffle=True)
model.fit(X_train_images, Y_train_labels, batch_size=96, epochs=100, validation_split=0.2,shuffle=True)

Train on 5501 samples, validate on 1376 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100


Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<keras.callbacks.History at 0x7f72006e5e10>

In [7]:
# Load HDF5 test datataset
h5test = h5py.File('testdataset.h5', 'r')
X_test_images = h5test['X']
Y_test_labels = h5test['Y']

# Convert it to numpy array
X_test_images = np.array(X_test_images.value)
Y_test_labels = Y_test_labels.value.T[0].astype(int)

In [8]:
y_probs = model.predict(X_test_images)

In [9]:
# Convert the predictions into binary values (0/1) with threshold > 0.5 for 1
yPreds = (y_probs > 0.5)

In [10]:
# Definition for performance measurement
# Input: target and prediction
# Output: confusion matrix and accuracy

def performance_measure(target, prediction):
    accuracy = accuracy_score(target, prediction)
    confusion = confusion_matrix(target, prediction)
    
    return confusion, accuracy

In [11]:
# Performance measure
performance_measure(Y_test_labels, yPreds)

(array([[ 282,    0],
        [1340,    0]]), 0.17385943279901356)