In [1]:
from time import time
import logging
import matplotlib.pyplot as plt

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.datasets import fetch_lfw_people
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.decomposition import PCA
from sklearn.svm import SVC

In [3]:
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
#日志记录

In [None]:
lfw_people = fetch_lfw_people(min_faces_per_person=70, resize=0.4)

2017-08-21 17:43:06,929 Downloading LFW metadata: http://vis-www.cs.umass.edu/lfw/pairsDevTrain.txt
2017-08-21 17:43:09,631 Downloading LFW metadata: http://vis-www.cs.umass.edu/lfw/pairsDevTest.txt
2017-08-21 17:43:10,330 Downloading LFW metadata: http://vis-www.cs.umass.edu/lfw/pairs.txt
2017-08-21 17:43:21,887 Downloading LFW data (~200MB): http://vis-www.cs.umass.edu/lfw/lfw-funneled.tgz


In [None]:
n_samples,h,w = lfw_people.images.shape

In [None]:
X = lfw_people.target
n_features = X.shape[1]

In [None]:
y = lfw_people.target
target_names = lfw_people.target_names
n_classes = target_names.shape[0]

In [None]:
print('Total dataset size:')
print('n_samples:%d' % n_samples)
print('n_features:%d' % n_features)
print('n_classes:%d' % n_classes)

In [None]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.25,random_state=42)

In [None]:
#Compute a PCA(eigenfaces) on the face dataset (treated as unlabeled dataset):unsupervised feature extraction/dimensionality reduction
n_components = 150
print('Extracting the top %d eigenfaces from %d faces' % (n_components,X_train.shape[0]))
t0 = time()
pca = PCA(n_components=n_components,svd_solver='randomized',whiten=True).fit(X_train)
print('done in %0.3fs' % (time()-t0))
print('Best estimator found by grid search:')
print(clf.best_estimator_)

In [None]:
print("Predicting people's names on the test set")
t0 = time()
y_pred = clf.predict(X_test_pca)
print("done in %0.3fs" % (time()-t0))

In [None]:
print(classification_report(y_test,y_pred,target_names=target_names))
print(confusion_matrix(y_test,y_pred,labels=range(n_classes)))