<h1>Binary VGG Convnet</h1>

<strong>Abstract</strong> Attempt to classify among 2 different faces. Currently, classifying all images as one person. Shows we need more layers.

In [4]:
%load_ext autoreload

In [5]:
%autoreload 2
%matplotlib inline

import os
import fnmatch
import numpy as np
from skimage import io
from skimage.transform import resize
from sklearn.metrics import confusion_matrix

from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.utils import np_utils

np.random.seed(123456)

In [24]:
data_path = '../data/'
data_lfw_path = data_path + 'lfw_cropped/'

class1 = 'George_W_Bush'
class2 = 'Colin_Powell'

batch_size = 64
nb_epoch = 12
img_rows, img_cols = 100, 100
test_size_percent = .85
validation_split = .1
random_discard_percent = 0

In [25]:
def get_filenames_separated_from_target(class1, class2):
    class1_files = []
    class2_files = []
    
    for root, dirnames, filenames in os.walk(data_lfw_path):
        for dirname in dirnames:
                for filename in os.listdir(os.path.join(data_lfw_path, dirname)):
                    if filename.endswith(".jpg"):
                        f = os.path.join(root + dirname, filename)
                        if dirname == class1:
                            class1_files.append(f)
                        elif dirname == class2:
                            class2_files.append(f)
    return class1_files, class2_files

In [26]:
def image_read(f):
    return resize(io.imread(f), (img_rows, img_cols))

In [27]:
def get_train_and_test_sets(class1_data, class2_data):
    
    all_data = [(t, 1) for t in class1_data] + [(t, 0) for t in class2_data]

    np.random.shuffle(all_data)
    
    test_size = int(test_size_percent * len(all_data))
    X_train = np.array([x[0] for x in all_data[:test_size]])
    y_train = np.array([x[1] for x in all_data[:test_size]])
    X_test = np.array([x[0] for x in all_data[test_size:]])  
    y_test = np.array([x[1] for x in all_data[test_size:]])
    
    X_train = X_train.reshape(X_train.shape[0], 3, img_rows, img_cols)
    X_test = X_test.reshape(X_test.shape[0], 3, img_rows, img_cols)
    X_train = X_train.astype('float32')
    X_test = X_test.astype('float32')
    X_train /= 255
    X_test /= 255

    return (X_train, y_train), (X_test, y_test)

In [28]:
class1_files, class2_files = get_filenames_separated_from_target(class1, class2)

In [29]:
class1_images = [image_read(f) for f in class1_files]
class2_images = [image_read(f) for f in class2_files]

In [30]:
(X_train, y_train), (X_test, y_test) = get_train_and_test_sets(class1_images, class2_images)

In [31]:
VGG = Sequential()

VGG.add(Convolution2D(32, 3, 3, input_shape=(3, img_rows, img_cols)))
VGG.add(Activation('relu'))
VGG.add(Convolution2D(32, 3, 3))
VGG.add(Activation('relu'))
VGG.add(MaxPooling2D(pool_size=(2, 2)))
VGG.add(Dropout(0.25))

VGG.add(Convolution2D(64, 3, 3))
VGG.add(Activation('relu'))
VGG.add(Convolution2D(64, 3, 3))
VGG.add(Activation('relu'))
VGG.add(MaxPooling2D(pool_size=(2, 2)))
VGG.add(Dropout(0.25))

VGG.add(Flatten())

VGG.add(Dense(256))
VGG.add(Activation('relu'))
VGG.add(Dropout(0.5))

VGG.add(Dense(1))
VGG.add(Activation('sigmoid'))

VGG.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              class_mode='binary')

VGG.fit(X_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch, 
        show_accuracy=True, verbose=1, validation_split=validation_split)

Train on 585 samples, validate on 65 samples
Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12


<keras.callbacks.History at 0x107d04050>

In [32]:
score = VGG.evaluate(X_test, y_test, show_accuracy=True, verbose=1)
print('Test score:', score[0])
print('Test accuracy:', score[1])

('Test score:', 0.60910201072692871)
('Test accuracy:', 0.72173913043478266)


In [33]:
y_pred = VGG.predict_classes(X_test)



In [34]:
confusion_matrix(y_test, y_pred)

array([[ 0, 32],
       [ 0, 83]])