### Amir Ebrahimi
### Face Recognition Project

Dataset resource: AT&T Research Lab which includes 400 sample faces from forty people (Ten images per person)

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from sklearn import svm
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn import metrics
from sklearn.model_selection import GridSearchCV

%matplotlib inline

#### Building the feature matrix and label vector

In [2]:
n_features = 4096 # Convert each 64*64 image into a row of the feature matrix with 4096 columns


# Name the feature table columns
X = pd.DataFrame(columns = ['Feature' + str(i) for i in range(n_features)])

# There are 400 image samples
for j in range(400):
    image_name = str(j) + '.jpg'
    image_path = 'faceSamples/'
    
    # load images
    image = mpimg.imread(image_path + image_name)
    image_vector = image.reshape(1,4096)
    X.loc[j,:] = image_vector[0]

X

Unnamed: 0,Feature0,Feature1,Feature2,Feature3,Feature4,Feature5,Feature6,Feature7,Feature8,Feature9,...,Feature4086,Feature4087,Feature4088,Feature4089,Feature4090,Feature4091,Feature4092,Feature4093,Feature4094,Feature4095
0,186,183,179,190,212,219,221,232,230,231,...,56,45,47,18,0,8,15,9,3,5
1,204,198,194,195,198,195,190,186,190,193,...,19,0,29,52,90,146,130,120,164,167
2,86,79,82,95,100,102,121,146,169,179,...,160,167,179,169,192,188,197,189,185,141
3,61,90,98,119,144,156,174,177,183,195,...,119,244,246,168,126,187,162,145,58,18
4,99,134,165,183,201,210,215,224,232,236,...,83,90,94,90,70,41,71,103,92,117
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
395,19,13,7,7,14,18,15,10,33,38,...,158,204,166,68,147,184,188,163,156,143
396,109,145,182,196,195,192,189,184,165,167,...,110,122,69,44,40,49,46,40,43,44
397,163,177,193,202,211,221,227,229,235,238,...,29,10,25,34,43,47,45,44,48,53
398,145,147,151,160,178,196,203,202,201,200,...,168,162,165,152,150,150,124,132,114,117


#### Normalize the data

In [3]:
X = preprocessing.scale(X)

#### Read Labels

In [4]:
df = pd.read_csv('faceSamples/label.csv')

In [5]:
y = df['Label']

In [6]:
y

0      13
1      30
2      34
3      19
4      24
       ..
395    32
396    19
397    11
398     4
399    17
Name: Label, Length: 400, dtype: int64

#### Using sklearn to split dataset into testing and training sets

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=5)

#### Using Principal Component Analysis to reduce dimensionality from 4096 to 50

In [8]:
#  k  is the number of components (new features) after dimensionality reduction
k = 50

my_pca = PCA(n_components = k)

# X_Train is feature matrix of training set before dimensionality reduction, 
# X_Train_New is feature matrix of training set after dimensionality reduction:

X_Train_new = my_pca.fit_transform(X_train)
X_Test_new = my_pca. transform(X_test)
X_Test_new.shape

(100, 50)

#### Designing and Training a non-linear SVM classifier with RBF Kernel to recognize the face based on the training dataset

In [9]:
svm = SVC(C=1, kernel='rbf', gamma=0.0005, random_state=1)

svm.fit(X_Train_new, y_train)

y_pred = svm.predict(X_Test_new)

#### Calculating and reporting the Confusion Matrix 

In [10]:
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

print(metrics.confusion_matrix(y_test, y_pred))

Accuracy: 0.9
[[3 0 0 ... 0 0 0]
 [0 3 0 ... 0 0 0]
 [0 0 1 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 4 0]
 [0 0 0 ... 0 0 1]]


#### Using GridSearchCV to find the best value for parameter C in SVM

In [11]:
seed = 1
np.random.seed(seed)
neuron_number = [0.1, 1, 10, 100, 1e3, 5e3, 1e4, 5e4, 1e5]

# create a dictionary for grid parameter:
param_grid = dict(C = neuron_number)
print(param_grid,'\n')

{'C': [0.1, 1, 10, 100, 1000.0, 5000.0, 10000.0, 50000.0, 100000.0]} 



In [12]:
k = 50 # k is the number of components (new features) after dimensionality reduction
my_pca = PCA(n_components = k)
X_normalized_pca = my_pca.fit_transform(X)
X_normalized_pca

array([[ 4.88937415e+00, -2.98222806e+01, -1.12538597e+01, ...,
         1.27790930e+00, -5.13057988e-01,  1.89170077e-02],
       [ 5.94927669e+01,  2.92531999e+00,  2.98069529e+01, ...,
         2.22713182e+00,  1.14766093e+00,  1.02023264e+00],
       [ 5.26688749e+01, -6.17710983e+00, -7.20124200e+00, ...,
        -1.16399547e+00, -7.59410721e-01, -4.63859438e+00],
       ...,
       [-2.82722965e+01, -1.48402519e+01, -3.21436921e+01, ...,
         1.12353263e+00,  5.99913428e+00, -3.35544232e-01],
       [-1.77854656e+01,  2.30273088e+01, -1.97221024e+01, ...,
         2.17393593e-01,  5.88445883e+00, -6.33162799e-01],
       [-2.64460608e+01,  1.31387639e+01, -2.01968770e+00, ...,
         1.16158019e+00,  1.66219759e+00,  1.81565265e+00]])

In [13]:
grid = GridSearchCV(svm, param_grid, cv=10, scoring='accuracy')

grid.fit(X_normalized_pca, y)

print(grid.best_params_)

{'C': 10}
