### George Bush
## Phase 3 - Implementing convolutional neural network
## Phase 4 - Use transfer learning to use pre-trained weights as initializations

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import keras


from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten, MaxPooling2D

from sklearn.model_selection import train_test_split

from sklearn.metrics import confusion_matrix, f1_score, precision_score, recall_score
from keras.models import load_model

Using TensorFlow backend.


In [2]:
X = pd.read_csv("X.csv", sep=' ', header=None, dtype=float)
X = X.values
print(X.shape)

(13233, 4096)


In [3]:
#reshaping the array
len(X)
X1 = np.zeros((13233,64,64))
for i in range(len(X)):
    X1[i] = X[i].reshape(64,64)

In [4]:
y = pd.read_csv("y_bush_vs_others.csv", header=None)
y_bush = y.values.ravel()
y = pd.read_csv("y_williams_vs_others.csv", header=None)
y_williams = y.values.ravel()

In [5]:
np.sum(y_williams)

52

In [6]:
print(X1.shape,y_bush.shape,y_williams.shape)

(13233, 64, 64) (13233,) (13233,)


In [7]:
seed1 = 3243
X_train, X_test, y_train, y_test = train_test_split(X1, y_bush, test_size = 1./3, random_state = seed1, shuffle = True,stratify = y)
X_train = X_train.reshape(8822,64,64,1)
X_test = X_test.reshape(4411,64,64,1)
print(X_train.shape,y_train.shape,X_test.shape)
# X_test = X_test.reshape(10000,28,28,1)

(8822, 64, 64, 1) (8822,) (4411, 64, 64, 1)


In [8]:
y_test.shape

(4411,)

In [82]:
#Keras model 
model = Sequential()
model.add(Conv2D(32, kernel_size=(5, 5), strides=(1, 1),
                 activation='relu',
                 input_shape=(64, 64, 1)))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model.add(Conv2D(64, (5, 5), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

## Loading the pre trained model

In [9]:
model = load_model('tf_model.model')

In [10]:
# model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=25)

eval_metrics = model.evaluate(X_test, y_test)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


In [11]:
print(eval_metrics)

[0.04552285264771262, 0.9882112899569259]


In [12]:
model.predict(X1[0].reshape(1,64,64,1))

array([[1.8496105e-07]], dtype=float32)

In [13]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_5 (Conv2D)            (None, 64, 64, 8)         80        
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 32, 32, 8)         0         
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 32, 32, 16)        1168      
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 16, 16, 16)        0         
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 16, 16, 32)        4640      
_________________________________________________________________
max_pooling2d_7 (MaxPooling2 (None, 8, 8, 32)          0         
_________________________________________________________________
conv2d_8 (Conv2D)            (None, 8, 8, 64)          18496     
__________

In [14]:
pred = model.predict_classes(X_test.reshape(4411,64,64,1))
print(pred)

[[0]
 [0]
 [0]
 ...
 [0]
 [0]
 [0]]


In [15]:
pred_train = model.predict_classes(X_train.reshape(8822,64,64,1))

In [16]:
print(np.sum(pred),np.sum(y_test))
print(np.sum(pred_train),np.sum(y_train))

129 163
365 367


In [17]:
#Test F1

val_f1 = f1_score(y_test, pred)
val_recall = recall_score(y_test, pred)
val_precision = precision_score(y_test, pred)
print(val_f1,val_recall,val_precision)

0.821917808219178 0.7361963190184049 0.9302325581395349


In [20]:
f1_score(y_test, pred, pos_label=1,average='binary')

0.821917808219178

In [18]:
#Train f1
val_f1 = f1_score(y_train, pred_train)
val_recall = recall_score(y_train, pred_train)
val_precision = precision_score(y_train, pred_train)
print(val_f1,val_recall,val_precision)

0.9945355191256832 0.9918256130790191 0.9972602739726028


In [19]:
model.layers

[<keras.layers.convolutional.Conv2D at 0x1fa20b02dd8>,
 <keras.layers.pooling.MaxPooling2D at 0x1fa2db67320>,
 <keras.layers.convolutional.Conv2D at 0x1fa2db67080>,
 <keras.layers.pooling.MaxPooling2D at 0x1fa211f8eb8>,
 <keras.layers.convolutional.Conv2D at 0x1fa2db67198>,
 <keras.layers.pooling.MaxPooling2D at 0x1fa211f8b00>,
 <keras.layers.convolutional.Conv2D at 0x1fa20ace080>,
 <keras.layers.pooling.MaxPooling2D at 0x1fa20aa7cf8>,
 <keras.layers.core.Flatten at 0x1fa20a89a20>,
 <keras.layers.core.Dropout at 0x1fa20af6710>,
 <keras.layers.core.Dense at 0x1fa20af66d8>,
 <keras.layers.core.Dense at 0x1fa3324fba8>]

In [21]:
model.save("bush.model")

In [54]:
del model