In [1]:
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats

In [2]:
from sklearn.datasets import fetch_lfw_people

faces = fetch_lfw_people(min_faces_per_person=50)
print(faces.target_names)
print(faces.images.shape)

['Ariel Sharon' 'Colin Powell' 'Donald Rumsfeld' 'George W Bush'
 'Gerhard Schroeder' 'Hugo Chavez' 'Jacques Chirac' 'Jean Chretien'
 'John Ashcroft' 'Junichiro Koizumi' 'Serena Williams' 'Tony Blair']
(1560, 62, 47)


In [3]:
from sklearn.svm import SVC

#Using randomized PCA
from sklearn.decomposition import PCA as RandomizedPCA
from sklearn.pipeline import make_pipeline

#preprocessing and classifier in the same package
pca = RandomizedPCA(n_components=150, whiten=True, random_state=42)
svc = SVC()
model = make_pipeline(pca, svc)

In [4]:
from sklearn.model_selection import train_test_split

Xtrain, Xtest, ytrain, ytest = train_test_split(faces.data, faces.target,test_size=0.20, random_state=42)

In [5]:
from sklearn.model_selection import GridSearchCV

param_grid = {'svc__C': [1, 5, 10,50], 'svc__gamma': [0.0001, 0.0005, 0.001, 0.005]}

grid = GridSearchCV(model, param_grid)

grid.fit(Xtrain, ytrain) 

print(grid.best_params_)

{'svc__C': 10, 'svc__gamma': 0.001}


In [6]:
model = grid.best_estimator_
yfit = model.predict(Xtest)

In [7]:
from sklearn.metrics import classification_report

print(classification_report(ytest, yfit, target_names=faces.target_names))

                   precision    recall  f1-score   support

     Ariel Sharon       0.67      0.73      0.70        11
     Colin Powell       0.76      0.87      0.81        55
  Donald Rumsfeld       0.69      0.72      0.71        25
    George W Bush       0.84      0.81      0.83       107
Gerhard Schroeder       0.75      0.71      0.73        21
      Hugo Chavez       0.90      0.64      0.75        14
   Jacques Chirac       0.57      0.57      0.57         7
    Jean Chretien       0.71      0.83      0.77        12
    John Ashcroft       0.90      0.75      0.82        12
Junichiro Koizumi       1.00      0.75      0.86         8
  Serena Williams       0.71      0.71      0.71         7
       Tony Blair       0.82      0.85      0.84        33

         accuracy                           0.79       312
        macro avg       0.78      0.75      0.76       312
     weighted avg       0.80      0.79      0.79       312

