In [50]:
import os
import cv2
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.utils.extmath import svd_flip

%matplotlib inline

In [148]:
DATASET_PATH = 'dataset'
YALE_DUMPFILE_PATH = 'yalefaces.dump'

with open(os.path.join('.',DATASET_PATH,YALE_DUMPFILE_PATH), 'rb') as f:
    dataset, labels, test_dataset, test_labels = pickle.load(f)

In [194]:
class PCA:
    def __init__(self, ratio):
        self.ratio = ratio    
        
    def fit(self, data):
        data = data.astype('float64').reshape(data.shape[0],-1).T
        self.mean_ = np.mean(data, axis=1).reshape(data.shape[0],-1)
        
        data -= self.mean_
        
        eigen_vectors, eigen_values, _ = np.linalg.svd(data, full_matrices=False)
        # eigen_vectors, _ = svd_flip(eigen_vectors, _)
        
        sort = eigen_values.argsort()[::-1]
        eigen_values = eigen_values[sort]
        eigen_vectors = eigen_vectors[:,sort]
        
        K = np.sum(eigen_values > self.ratio)
        
        self.eigen_vector = eigen_vectors[:,0:K]
        
    def transform(self, data):
        data = data.astype('float64').reshape(data.shape[0],-1).T
        data -= self.mean_
        weight = np.dot(self.eigen_vector.T, data)
        projected_data = self.mean_ + np.dot(self.eigen_vector, weight)
        return projected_data
    
    def fit_transform(self, data):
        self.fit(data)
        return self.transform(data)

In [188]:
pca = PCA(ratio=0.8)
eigen_faces = pca.fit_transform(dataset)



GridSearchCV(cv='warn', error_score='raise-deprecating',
       estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'C': [1000.0, 5000.0, 10000.0, 50000.0, 100000.0], 'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

0.09090909090909091