# CS 180 Project: Face Recognition

# Student Name: Angelica Basista

Face Recognition using Multinomial Naive Bayes, Gaussian Naive Bayes, and SVM Classifier on the Yale Face Database.



# Source Code

## Import Libraries


In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from PIL import Image
from google.colab import files
from skimage.feature import hog
from skimage.color import rgb2grey

from sklearn import svm
from sklearn import metrics
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report, accuracy_score
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline

## Upload dataset

Upload dataset and get the photos whose filename starts with "subject".

In [None]:
uploaded = files.upload()


In [None]:
# Should modify if not using Yale Face Database or if filenames are modified
files = [file for file in uploaded if file[:7]=="subject"] 

## Define functions for getting the features of an image

Three functions for getting features from image are defined. We will observe on which features matrix the models perform best.

In [None]:
# returns the original features only (the image converted to np.array)
def get_orig_features(file):
  img = np.array(Image.open(file))
  grey_img = rgb2grey(img) # The Yale dataset is already in greyscale
  return grey_img.flatten()

# returns hog_features only
def get_hog_features(file):
  img = np.array(Image.open(file))
  grey_img = rgb2grey(img) # The Yale dataset is already in greyscale
  hog_features = hog(grey_img, block_norm='L2-Hys', pixels_per_cell=(16,16))
  return hog_features

# returns original features + hog_features
def get_features(file):
  img = np.array(Image.open(file))
  grey_img = rgb2grey(img) # The Yale dataset is already in greyscale
  hog_features = hog(grey_img, block_norm='L2-Hys', pixels_per_cell=(16,16))
  features = np.hstack((grey_img.flatten(), hog_features))
  return features


## Split into train and test sets manually

Here, the test set are those with the "normal" description, representing 1/15 of the data. It was observed that using the get_hog_features function for the image features resulted to higher accuracy scores, so that is what will be used.

In [None]:
def split_dataset(test_labels):
  x_train, x_test, y_train, y_test = [], [], [], []
  for file in files:
    label, desc = file.split('.')
    # features = get_features(file)
    # features = get_orig_features(file)
    features = get_hog_features(file)

    if desc not in test_labels:
      y_train.append(label)
      x_train.append(features)
    else:
      y_test.append(label)
      x_test.append(features)
  return np.array(x_train), np.array(x_test), np.array(y_train), np.array(y_test)


test_labels = ["glasses"]  # Only images with normal description are used in test set
x_train, x_test, y_train, y_test = split_dataset(test_labels)

## Gaussian Naive Bayes

### Train model

In [None]:
gnb = GaussianNB()
gnb.fit(x_train, y_train)

GaussianNB(priors=None, var_smoothing=1e-09)

### Show probability for each label



In [None]:
gnb.predict_proba(x_test)

array([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 0., 0.

### Calculate accuracy

In [None]:
y_pred = gnb.predict(x_test)
metrics.accuracy_score(y_test, y_pred)

0.8666666666666667

### Assess model performance using classification report

In [None]:
print(classification_report(y_test, y_pred, digits=4, labels=np.unique(y_pred)))


              precision    recall  f1-score   support

   subject01     1.0000    1.0000    1.0000         1
   subject02     1.0000    1.0000    1.0000         1
   subject03     1.0000    1.0000    1.0000         1
   subject04     1.0000    1.0000    1.0000         1
   subject05     1.0000    1.0000    1.0000         1
   subject06     1.0000    1.0000    1.0000         1
   subject07     1.0000    1.0000    1.0000         1
   subject08     0.5000    1.0000    0.6667         1
   subject11     1.0000    1.0000    1.0000         1
   subject12     0.5000    1.0000    0.6667         1
   subject13     1.0000    1.0000    1.0000         1
   subject14     1.0000    1.0000    1.0000         1
   subject15     1.0000    1.0000    1.0000         1

   micro avg     0.8667    1.0000    0.9286        13
   macro avg     0.9231    1.0000    0.9487        13
weighted avg     0.9231    1.0000    0.9487        13



*italicized text*## Multinomial Naive Bayes

## Multinomial Naive Bayes

### Train model

In [None]:
mnb = MultinomialNB()
mnb.fit(x_train, y_train)

MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)

### Show probability for each label

In [None]:
mnb.predict_proba(x_test)

array([[1.00000000e+000, 7.44703021e-110, 1.17384919e-080,
        3.48885367e-064, 2.46349559e-078, 1.57571671e-181,
        4.32659480e-086, 9.59646679e-093, 1.49693533e-109,
        2.75091322e-060, 9.92794920e-070, 8.35579187e-095,
        3.17043888e-081, 3.87375332e-169, 8.10569940e-095],
       [1.80424038e-058, 1.00000000e+000, 3.49551815e-120,
        1.33466693e-035, 4.48593445e-047, 2.90414950e-152,
        2.39519223e-133, 1.51323900e-086, 1.53636358e-113,
        2.54546196e-084, 1.33847624e-071, 3.12683211e-042,
        8.72464258e-095, 1.63343841e-159, 1.08045798e-091],
       [7.46654315e-056, 1.29019618e-118, 1.00000000e+000,
        2.86630812e-092, 1.92061305e-079, 3.12682810e-165,
        1.98615923e-035, 1.99661413e-068, 3.55449366e-081,
        2.45942204e-039, 3.17612373e-063, 1.39544283e-099,
        8.35613823e-029, 6.13586847e-150, 1.40584319e-067],
       [2.16801624e-075, 1.18189789e-063, 1.44515558e-142,
        1.00000000e+000, 4.37144157e-069, 1.17373001e

### Calculate Accuracy

In [None]:
y_pred = mnb.predict(x_test)
metrics.accuracy_score(y_test, y_pred)

0.9333333333333333

### Show confusion matrix

In [None]:
metrics.confusion_matrix(y_test, y_pred)

array([[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]])

### Display classification report

In [None]:
print(classification_report(y_test, y_pred, digits=4, labels=np.unique(y_pred)))

              precision    recall  f1-score   support

   subject01     1.0000    1.0000    1.0000         1
   subject02     1.0000    1.0000    1.0000         1
   subject03     1.0000    1.0000    1.0000         1
   subject04     1.0000    1.0000    1.0000         1
   subject05     1.0000    1.0000    1.0000         1
   subject06     1.0000    1.0000    1.0000         1
   subject07     1.0000    1.0000    1.0000         1
   subject08     0.5000    1.0000    0.6667         1
   subject10     1.0000    1.0000    1.0000         1
   subject11     1.0000    1.0000    1.0000         1
   subject12     1.0000    1.0000    1.0000         1
   subject13     1.0000    1.0000    1.0000         1
   subject14     1.0000    1.0000    1.0000         1
   subject15     1.0000    1.0000    1.0000         1

   micro avg     0.9333    1.0000    0.9655        14
   macro avg     0.9643    1.0000    0.9762        14
weighted avg     0.9643    1.0000    0.9762        14



## Support Vector Machine (SVM)

In [None]:
clf = svm.SVC(gamma=0.0001, C=1E2, probability=True)
clf = clf.fit(x_train, y_train)
score = clf.score(x_test, y_test)
print('Score:', score)

Score: 0.9333333333333333


### Show probability for each label

In [None]:
clf.predict_proba(x_test)

array([[0.479101  , 0.02409128, 0.05069264, 0.05763837, 0.02975252,
        0.01314002, 0.04843426, 0.04105857, 0.05058951, 0.05152856,
        0.02926438, 0.05294805, 0.02901956, 0.01120221, 0.03153907],
       [0.09132459, 0.23550789, 0.03049614, 0.11809948, 0.06622996,
        0.01931211, 0.02131673, 0.07476   , 0.05306214, 0.04170648,
        0.03928564, 0.12847888, 0.02760137, 0.01548835, 0.03733022],
       [0.03558657, 0.01253194, 0.5254968 , 0.01973963, 0.01808591,
        0.0112685 , 0.06971515, 0.03853138, 0.07925872, 0.03850581,
        0.02046515, 0.02284796, 0.06785733, 0.00905474, 0.03105441],
       [0.07074146, 0.06601321, 0.02194553, 0.497315  , 0.0523434 ,
        0.01500907, 0.0218888 , 0.04045385, 0.02999424, 0.02419064,
        0.02811347, 0.07238979, 0.02322153, 0.0125871 , 0.02379293],
       [0.05429389, 0.03906535, 0.03575084, 0.04946941, 0.52218894,
        0.01395486, 0.01698399, 0.04657788, 0.03017291, 0.02034972,
        0.02373417, 0.07431439, 0.0378908 , 