# Machine Learning Nanodegree - Capstone Project

## Who is in the photo?

The proposal of this project is to build a model that allow to recognize persons in the image using Deep Learning.

### Step 0: Import dataset

In [37]:
from sklearn.datasets import load_files       
from keras.utils import np_utils
import numpy as np
from glob import glob

def load_face_dataset(path):
    data = load_files(path)
    general_targets = data['target']
    face_files = np.array(data['filenames'])
    face_targets = np_utils.to_categorical(np.array(data['target']), 16)
    return face_files, face_targets

train_files, train_targets = load_face_dataset('faces2/train')
valid_files, valid_targets = load_face_dataset('faces2/valid')
test_files, test_targets = load_face_dataset('faces2/test')

person_names = [item[12:-1] for item in sorted(glob("faces2/train/*/"))]


print('There are %d total person names.' % len(person_names))
print('There are %s total face images.\n' % len(np.hstack([train_files, valid_files, test_files])))
print('There are %d training face images.' % len(train_files))
print('There are %d validation face images.' % len(valid_files))
print('There are %d test face images.'% len(test_files))

There are 16 total person names.
There are 2803 total face images.

There are 2245 training face images.
There are 277 validation face images.
There are 281 test face images.


#### Loading image data into array

In [38]:
from keras.preprocessing import image                  
from tqdm import tqdm

def path_to_tensor(img_path):
    # loads RGB image as PIL.Image.Image type
    img = image.load_img(img_path, target_size=(100, 100))
    x = image.img_to_array(img)
    return np.expand_dims(x, axis=0)

def paths_to_tensor(img_paths):
    list_of_tensors = [path_to_tensor(img_path) for img_path in tqdm(img_paths)]
    return np.vstack(list_of_tensors)

#### Loading Image for baseline model

In [39]:
import cv2

def load_image_from(img_path):
    img = cv2.imread(img_path)
    img = cv2.resize(img,(100,100))
    return cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

def load_images_from(img_paths):
    images = [load_image_from(path) for path in tqdm(img_paths)]
    return np.array(images)

#### Load tensors to train, test and validations

In [40]:
from PIL import ImageFile                            
ImageFile.LOAD_TRUNCATED_IMAGES = True                 

# pre-process the data for Keras
train_tensors = paths_to_tensor(train_files).astype('float32')/255
valid_tensors = paths_to_tensor(valid_files).astype('float32')/255
test_tensors = paths_to_tensor(test_files).astype('float32')/255



100%|██████████| 2245/2245 [00:00<00:00, 3075.07it/s]
100%|██████████| 277/277 [00:00<00:00, 3656.32it/s]
100%|██████████| 281/281 [00:00<00:00, 3589.36it/s]


#### Load image to train, test and validations of a baseline model

In [41]:
# base model
base_train_images = load_images_from(train_files).astype('float32')/255
base_test_images = load_images_from(test_files).astype('float32')/255
base_valid_images = load_images_from(valid_files).astype('float32')/255

100%|██████████| 2245/2245 [00:00<00:00, 5836.67it/s]
100%|██████████| 281/281 [00:00<00:00, 7174.25it/s]
100%|██████████| 277/277 [00:00<00:00, 6948.45it/s]


### Step 1:  Train a base line model

In [42]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.datasets import fetch_lfw_people
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from time import time


#### Create a lookup table

In [43]:
def create_lookup_table(targets):
    lookup = []
    for target in targets:
        index = np.where(target==1)[0]
        lookup.append(index[0])
    return np.array(lookup)


In [44]:
base_train_targets = create_lookup_table(train_targets)
base_test_targets = create_lookup_table(test_targets)
base_valid_targets = create_lookup_table(valid_targets)


#### Creating a function to execute a PCA

In [49]:
def execute_pca(train, test, n_components=16):
    train = train[:,:,0]
    test = test[:,:,0]
    print("Extracting the top %d eigenfaces from %d faces"
        % (n_components, train.shape[0]))
    t0 = time()
    pca = PCA(n_components=n_components, svd_solver='randomized',
            whiten=True).fit(train)
    print("done in %0.3fs" % (time() - t0))
    # eigenfaces = pca.components_.reshape((n_components, h, w))
    print("Projecting the input data on the eigenfaces orthonormal basis")
    t0 = time()
    X_train_pca = pca.transform(train)
    X_test_pca = pca.transform(test)
    print("done in %0.3fs" % (time() - t0))
    return X_train_pca, X_test_pca


#### Creating a function to training a model 

In [50]:
def apply_svm(X_train, y_train):
    # Train a SVM classification model
    print "Fitting the classifier to the training set"
    param_grid = {
    'C': [1, 5, 10, 50, 100],
    'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1],
    }
    clf = GridSearchCV(SVC(kernel='rbf', class_weight='balanced'), param_grid)
    clf = clf.fit(X_train, y_train)
    print "Best estimator found by grid search:"
    print clf.best_estimator_
    return clf


### Train the model

In [None]:
X_train = base_train_images
y_train = base_train_targets

X_test  = base_test_images
y_test  = base_test_targets

X_train_pca, X_test_pca = execute_pca(X_train,X_test)
clf = apply_svm(X_train_pca, y_train)


Extracting the top 16 eigenfaces from 2245 faces
done in 0.090s
Projecting the input data on the eigenfaces orthonormal basis
done in 0.005s
Fitting the classifier to the training set


#### Show Results

In [48]:
y_pred = clf.predict(X_test_pca)
print classification_report(y_test, y_pred, target_names=person_names)

print confusion_matrix(y_test, y_pred, labels=range(len(person_names)))


                   precision    recall  f1-score   support

    /Adam Sandler       0.00      0.00      0.00        11
    /Alec Baldwin       1.00      0.10      0.18        10
  /Angelina Jolie       0.14      0.05      0.07        21
 /Anna Kournikova       0.00      0.00      0.00        17
  /Ashton Kutcher       0.00      0.00      0.00        10
   /Avril Lavigne       0.10      0.50      0.16        30
    /Barack Obama       0.14      0.26      0.18        27
     /Ben Affleck       0.00      0.00      0.00        12
 /Beyonce Knowles       0.00      0.00      0.00        13
       /Brad Pitt       0.17      0.10      0.12        30
    /Cameron Diaz       0.20      0.08      0.11        25
  /Cate Blanchett       0.00      0.00      0.00        16
 /Charlize Theron       0.00      0.00      0.00        20
 /Christina Ricci       0.00      0.00      0.00        14
/Claudia Schiffer       0.09      0.08      0.09        12
      /Clive Owen       0.00      0.00      0.00       

  'precision', 'predicted', average, warn_for)
