# Edgar Galindo

Importing all of the necessary libraries.

In [1]:
import numpy as np
import pandas as pd
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
from time import time

This is so that images display within this page

In [2]:
%matplotlib inline


This is to read the images. Images list is used to store the images for displaying and x is my feature matrix.

In [3]:
x = []
images = []
for u in range (0, 400):
    image = mpimg.imread('./Face/' + str(u) + '.jpg')
    images.append(image)
    x.append(image.ravel())
#     plt.imshow(image, cmap=plt.cm.gray)
#     plt.show()

x = pd.DataFrame(x)
labels = pd.read_csv('./Face/label.csv')
y = labels['Label']

### c.
Using the sklearn library to scale the data to use with SVM

In [4]:
xScaled = preprocessing.scale(x)


Splitting the data into testing and training.

In [5]:
xTrain, xTest, yTrain, yTest = train_test_split(xScaled, y, test_size=0.25, random_state=5)


Performing dimensional reduction on the features usning PCA to go from 4096 features to 50.

In [24]:
k = 50
myPCA = PCA(n_components=k)
myPCA.fit(xTrain)
xTrainNew = myPCA.transform(xTrain)
xTestNew = myPCA.transform(xTest)

print(xTrainNew.shape)
print(x.shape)

(300, 50)
(400, 4096)


In [25]:
print(xTrainNew.shape)
print(xTestNew.shape)

(300, 50)
(100, 50)



Instantiating SVC model.

In [33]:
mySVC = SVC(C=1, kernel='rbf', gamma=0.0005, random_state=1)

Training SVC model with the dimensionally reduced features.

In [34]:
mySVC.fit(xTrainNew, yTrain)

SVC(C=1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma=0.0005, kernel='rbf',
  max_iter=-1, probability=False, random_state=1, shrinking=True,
  tol=0.001, verbose=False)

Testing the model.

In [35]:
yPredictSVC = mySVC.predict(xTestNew)

Using accuracy_score to test our models accuracy

In [36]:
score_SVC = accuracy_score(yTest, yPredictSVC)
print(score_SVC)

0.91


Printing out the confusion matrix.

In [12]:
cm_SVC = metrics.confusion_matrix(yTest, yPredictSVC)

print("Confusion matrix:")
print(cm_SVC)

Confusion matrix:
[[3 0 0 ..., 0 0 0]
 [0 3 0 ..., 0 0 0]
 [0 0 1 ..., 0 0 0]
 ..., 
 [0 0 0 ..., 0 0 0]
 [0 0 0 ..., 0 4 0]
 [0 0 0 ..., 0 0 1]]



Merging the data sets back together for GridSearch

In [13]:
mergeX = np.concatenate((xTrainNew, xTestNew), axis=0)
mergeY = pd.concat([yTrain, yTest])
print(mergeY.shape)
print(mergeX.shape)

(400,)
(400, 50)


Using GridSearchCV

In [15]:
# create a dictionary for grid parameter:
param_grid = dict(C = [0.1, 1, 10, 100, 1e3, 5e3, 1e4, 5e4, 1e5])
print(param_grid,'\n')

# create the grid, and define the metric for evaluating the model: 
grid = GridSearchCV(SVC(kernel='rbf', gamma=0.0005, random_state=1), param_grid, cv=10, scoring='accuracy')

# fit the grid (start the grid search):
grid.fit(mergeX, mergeY)

# view the results:
# print(grid.cv_results_)

# view the best results corresponding to the best structure for SVM:
print(grid.best_score_)
print(grid.best_params_)


{'C': [0.1, 1, 10, 100, 1000.0, 5000.0, 10000.0, 50000.0, 100000.0]} 

0.9625
{'C': 10}


### After doing Grid Search we see that the best C is 10. Which gives an accuracy of 96%