# COS 429 Final Project
## VGG Face

Initial setup:
- Create instance (p2.xlarge)
- `scp` the .caffemodel and .prototxt files over
- Create ssl cert and password for Jupyter notebook

To get this up and running on AWS (after initial setup):
- `sudo ssh -i thesis.pem -L 443:127.0.0.1:8888 ubuntu@...`
- `127.0.0.1`
- Password: cos429_russakovsky
- `source activate theano_p36`
- `conda install -c anaconda pillow`
- `conda install h5py`
- `conda install scikit-learn`
- `jupyter notebook`
- `scp -i cos429.pem *.py ubuntu@...:~/cos429/`

This uses the Keras weights (hard to get caffemodel and t7 files working for caffe2/pytorch) for VGG_FACE, which was converted from vgg-face matconvnet model using as shown here: https://gist.github.com/EncodeTS/6bbe8cb8bebad7a672f0d872561782d9.

Before stopping the instance, remember to download the latest .ipynb file for the GitHub. Terminate the instance to delete all files.

In [1]:
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

import time
import os
import sys

from keras.models import Model
from keras.layers import Input, Convolution2D, ZeroPadding2D, MaxPooling2D, Flatten, Dense, Dropout, Activation

from keras import backend as K
K.set_image_dim_ordering('th')

from PIL import Image

Using Theano backend.


In [2]:
weights_path = 'vgg-face-keras.h5'

In [109]:
# This network architecture is derived from Table 3 of the CNN described in Parkhi et al. 
# and based on Keras code provided in https://gist.github.com/EncodeTS/6bbe8cb8bebad7a672f0d872561782d9

def vgg_face(weights_path=None):
    img = Input(shape=(3, 224, 224))

    pad1_1 = ZeroPadding2D(padding=(1, 1))(img)
    conv1_1 = Convolution2D(64, (3, 3), activation='relu', name='conv1_1')(pad1_1)
    pad1_2 = ZeroPadding2D(padding=(1, 1))(conv1_1)
    conv1_2 = Convolution2D(64, (3, 3), activation='relu', name='conv1_2')(pad1_2)
    pool1 = MaxPooling2D((2, 2), strides=(2, 2))(conv1_2)

    pad2_1 = ZeroPadding2D((1, 1))(pool1)
    conv2_1 = Convolution2D(128, (3, 3), activation='relu', name='conv2_1')(pad2_1)
    pad2_2 = ZeroPadding2D((1, 1))(conv2_1)
    conv2_2 = Convolution2D(128, (3, 3), activation='relu', name='conv2_2')(pad2_2)
    pool2 = MaxPooling2D((2, 2), strides=(2, 2))(conv2_2)

    pad3_1 = ZeroPadding2D((1, 1))(pool2)
    conv3_1 = Convolution2D(256, (3, 3), activation='relu', name='conv3_1')(pad3_1)
    pad3_2 = ZeroPadding2D((1, 1))(conv3_1)
    conv3_2 = Convolution2D(256, (3, 3), activation='relu', name='conv3_2')(pad3_2)
    pad3_3 = ZeroPadding2D((1, 1))(conv3_2)
    conv3_3 = Convolution2D(256, (3, 3), activation='relu', name='conv3_3')(pad3_3)
    pool3 = MaxPooling2D((2, 2), strides=(2, 2))(conv3_3)

    pad4_1 = ZeroPadding2D((1, 1))(pool3)
    conv4_1 = Convolution2D(512, (3, 3), activation='relu', name='conv4_1')(pad4_1)
    pad4_2 = ZeroPadding2D((1, 1))(conv4_1)
    conv4_2 = Convolution2D(512, (3, 3), activation='relu', name='conv4_2')(pad4_2)
    pad4_3 = ZeroPadding2D((1, 1))(conv4_2)
    conv4_3 = Convolution2D(512, (3, 3), activation='relu', name='conv4_3')(pad4_3)
    pool4 = MaxPooling2D((2, 2), strides=(2, 2))(conv4_3)

    pad5_1 = ZeroPadding2D((1, 1))(pool4)
    conv5_1 = Convolution2D(512, (3, 3), activation='relu', name='conv5_1')(pad5_1)
    pad5_2 = ZeroPadding2D((1, 1))(conv5_1)
    conv5_2 = Convolution2D(512, (3, 3), activation='relu', name='conv5_2')(pad5_2)
    pad5_3 = ZeroPadding2D((1, 1))(conv5_2)
    conv5_3 = Convolution2D(512, (3, 3), activation='relu', name='conv5_3')(pad5_3)
    pool5 = MaxPooling2D((2, 2), strides=(2, 2))(conv5_3)

    # These layers are used in the original VGG Face paper for their dataset of 2,622 individuals
    # The output of the previous layer is the 4096-dimensional face descriptor
    fc6 = Convolution2D(4096, (7, 7), activation='relu', name='fc6')(pool5)
    fc6_drop = Dropout(0.5)(fc6)
    fc7 = Convolution2D(4096, (1, 1), activation='relu', name='fc7')(fc6_drop)
    fc7_drop = Dropout(0.5)(fc7)
    fc8 = Convolution2D(2622, (1, 1), name='fc8')(fc7_drop)
    flat = Flatten()(fc8)
    out = Activation('softmax')(flat)

    model = Model(inputs=img, outputs=out)

    if weights_path:
        model.load_weights(weights_path)

    return model

# Returns model that for the 4096-dimensional face descriptor 
def partial_vgg_face():
    model = vgg_face(weights_path)
    layer_name = 'fc7'
    partial_model = Model(inputs=model.input,
                                 outputs=model.get_layer(layer_name).output)
    return partial_model

In [4]:
# Test the model by passing an image through it
im = Image.open('A.J._Buckley.jpg')
im = im.resize((224,224))
im = np.array(im).astype(np.float32)
# im[:,:,0] -= 129.1863
# im[:,:,1] -= 104.7624
# im[:,:,2] -= 93.5940
im = im.transpose((2,0,1))
im = np.expand_dims(im, axis=0)
print('Shape:', im.shape)

model = vgg_face(weights_path)
out = model.predict(im)
print(out[0][0])

Shape: (1, 3, 224, 224)
0.999718


In [111]:
# Test the partial model by passing an image through it
model = partial_vgg_face()
im = Image.open('A.J._Buckley.jpg')
im = im.resize((224,224))
im = np.array(im).astype(np.float32)
im = im.transpose((2,0,1))
im = np.expand_dims(im, axis=0)

descriptor = model.predict(im)
print(descriptor.shape)

(1, 4096, 1, 1)


In [8]:
print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 3, 224, 224)       0         
_________________________________________________________________
zero_padding2d_14 (ZeroPaddi (None, 3, 226, 226)       0         
_________________________________________________________________
conv1_1 (Conv2D)             (None, 64, 224, 224)      1792      
_________________________________________________________________
zero_padding2d_15 (ZeroPaddi (None, 64, 226, 226)      0         
_________________________________________________________________
conv1_2 (Conv2D)             (None, 64, 224, 224)      36928     
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 64, 112, 112)      0         
_________________________________________________________________
zero_padding2d_16 (ZeroPaddi (None, 64, 114, 114)      0         
__________

In [45]:
% load_ext autoreload
% aimport experiment
% aimport manipulations
% autoreload 1

from sklearn.datasets import fetch_lfw_people

import manipulations
import experiment
from manipulations import ManipulationInfo

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [105]:
def get_lfw_dataset(min_faces_per_person, manipulation_info):
    dataset = fetch_lfw_people(
        min_faces_per_person=min_faces_per_person, 
        color=True, 
        slice_=(slice(0, 250, None), slice(0, 250, None)), 
        resize=0.896)
    data = dataset.images
    # data = manipulations.perform_manipulation(data, manipulation_info)
    # mean_face = np.mean(data, axis=0)
    # data = data - mean_face

    train_indices, test_indices = experiment.split_traintest(dataset.target)
    train_data = data[train_indices,:]
    train_targets = dataset.target[train_indices]
    test_data = data[test_indices,:]
    test_targets = dataset.target[test_indices]

    # test_data = normalize(test_data, axis=1)
    # train_data = normalize(train_data, axis=1)
    # train_data, test_data, train_targets, test_targets = train_test_split(data, dataset.target)
    
    mean_face = [129.1863, 104.7624, 93.5940] # BGR
    
    train_data = train_data.transpose((0,3,1,2))
    train_data[:,0,:,:] = train_data[:,0,:,:] - mean_face[0]
    train_data[:,1,:,:] = train_data[:,1,:,:] - mean_face[1]
    train_data[:,2,:,:] = train_data[:,2,:,:] - mean_face[2]
#     train_data = train_data[:,::-1,:,:] # Flip to RGB? 
# Confusing because it seems like fetch_lfw_people() does some things to the original image 
# (coloring is off and the pixels are 0-255, not 0-1 as stated in documentation/their code)
    
    test_data = test_data.transpose((0,3,1,2))
    test_data[:,0,:,:] = test_data[:,0,:,:] - mean_face[0]
    test_data[:,1,:,:] = test_data[:,1,:,:] - mean_face[1]
    test_data[:,2,:,:] = test_data[:,2,:,:] - mean_face[2]
#     test_data = train_data[:,::-1,:,:] # Flip to RGB?
    
    return train_data, train_targets, test_data, test_targets

In [115]:
def get_descriptors(model, data):    
    descriptors = model.predict(data)
    return descriptors

In [None]:
def predict(train, test):

In [None]:
def run_experiment():
    print('Loading model')
    model = partial_vgg_face()
    
    print('Loading dataset')
    min_faces_per_person = 20
    train_data, train_targets, test_data, test_targets = get_lfw_dataset(
        min_faces_per_person, manipulation_info=ManipulationInfo("none", {}))
    
    # Train
    print('Testing')
    time1 = time.clock()
    train_descriptors = get_descriptors(model, train_data)
    time2 = time.clock()
    train_time = time2 - time1
    
    # Test
    print('Testing')
    time1 = time.clock()
    test_descriptors = get_descriptors(model, test_data)
    predictions = predict(train_descriptors, train_descriptors)
    # Get accuracy
    # Predict test_descriptors
    time2 = time.clock()
    train_time = time2 - time1
    
    # Print results.
    num_faces = len(np.unique(train_targets))
    print("Manipulation info: %s" % str(manipulation_info))
    print("Recognition Algorithm: %s" % model_name)
    print("Number of distinct faces: %d" % num_faces)
    print("Chance rate: %f" % (1 / num_faces))
    print("Train accuracy: %f" % train_accuracy)
    print("Test accuracy: %f" % test_accuracy)
    print("Training Time: %s sec" % train_time)
    print("Testing Time: %s sec" % test_time)
    print("\n")

    
    return {
        # "Manipulation Type": manipulation_info.type,
        # "Manipulation Parameters": manipulation_info.parameters,
        # "Recognition Algorithm": model_name,
        "Min Faces Per Person": min_faces_per_person,
        "Number of Distinct Faces": num_faces,
        "Chance Rate": (1 / num_faces),
        "Train Accuracy": train_accuracy,
        "Test Accuracy": test_accuracy,
        "Training Time": train_time,
        "Testing Time": test_time,
    }

In [None]:
stats = run_experiment()