### Covariance

In [None]:
import numpy as np

X = [[2, 0, -1.4],
     [2.2, 0.2, -1.5],
     [2.4, 0.1, -1],
     [1.9, 0, -1.2]]

print(np.cov(np.array(X).T))

### Eigenvector

In [None]:
import numpy as np
w, v = np.linalg.eig(np.array([[1, -2], [2, -3]]))
w; v

### Dimensionality reduction with Principal Component Analysis

In [None]:
%pylab inline
%matplotlib inline

import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.datasets import load_iris

In [None]:
# Data
data = load_iris()
y = data.target
X = data.data

# PCA
pca = PCA(n_components=2)
reduced_X = pca.fit_transform(X)

In [None]:
# Plot reduced data

red_x, red_y = [], []
blue_x, blue_y = [], []
green_x, green_y = [], []

for i in range(len(reduced_X)):
    if y[i] == 0:
        red_x.append(reduced_X[i][0])
        red_y.append(reduced_X[i][1])
    elif y[i] == 1:
        blue_x.append(reduced_X[i][0])
        blue_y.append(reduced_X[i][1])
    else:
        green_x.append(reduced_X[i][0])
        green_y.append(reduced_X[i][1])

plt.scatter(red_x, red_y, c='r', marker='x')
plt.scatter(blue_x, blue_y, c='b', marker='D')
plt.scatter(green_x, green_y, c='g', marker='.')
plt.show()

# Face recognition with PCA

The data set contains ten images each of forty people.
The images were created under different lighting conditions, and the subjects varied their facial expressions. The images are gray scale and 92 x 112 pixels in dimension.

In [1]:
%pylab inline
%matplotlib inline

import os
from os import walk, path
import numpy as np
import mahotas as mh
from sklearn.cross_validation import train_test_split
from sklearn.cross_validation import cross_val_score
from sklearn.preprocessing import scale
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
X = []
y = []

Populating the interactive namespace from numpy and matplotlib


In [2]:
####################################

# Load Images


for dir_path, dir_names, file_names in walk('/Users/Bya/Dropbox/Research/resources/orl_faces/'):
    for fn in file_names:
        if fn[-3:] == 'pgm':
            image_filename = path.join(dir_path, fn)
            X.append(scale(mh.imread(image_filename, as_grey=True).reshape(10304).astype('float32')))
            y.append(dir_path)

X = np.array(X)



In [5]:
####################################

# Reshape matrices to vectors
# Randomly split to: Train, Test data


# Split
X_train, X_test, y_train, y_test = train_test_split(X, y)

# fit PCA
pca = PCA(n_components=150)

# train, test data
X_train_reduced = pca.fit_transform(X_train)
X_test_reduced = pca.transform(X_test)

print('The original dimensions of the training data were', X_train.shape)
print('The reduced dimensions of the training data are', X_train_reduced.shape)

The original dimensions of the training data were (300, 10304)
The reduced dimensions of the training data are (300, 150)


In [6]:
####################################

# Logistic regression classifier
# (The data set contains forty classes)
# scikit-learn automatically creates binary classifiers
# using the one-versus-all strategy behind the scenes:


classifier = LogisticRegression()
accuracies = cross_val_score(classifier, X_train_reduced, y_train)

print('Cross validation accuracy:\n', np.mean(accuracies), accuracies)

classifier.fit(X_train_reduced, y_train)
predictions = classifier.predict(X_test_reduced)
print(classification_report(y_test, predictions))

Cross validation accuracy: 0.801700634665 [ 0.84070796  0.75757576  0.80681818]
             precision    recall  f1-score   support

/Users/Bya/Dropbox/Research/resources/orl_faces/s1       0.67      1.00      0.80         2
/Users/Bya/Dropbox/Research/resources/orl_faces/s10       1.00      1.00      1.00         2
/Users/Bya/Dropbox/Research/resources/orl_faces/s11       1.00      1.00      1.00         1
/Users/Bya/Dropbox/Research/resources/orl_faces/s12       1.00      1.00      1.00         3
/Users/Bya/Dropbox/Research/resources/orl_faces/s13       1.00      1.00      1.00         3
/Users/Bya/Dropbox/Research/resources/orl_faces/s14       1.00      1.00      1.00         4
/Users/Bya/Dropbox/Research/resources/orl_faces/s15       1.00      1.00      1.00         4
/Users/Bya/Dropbox/Research/resources/orl_faces/s16       1.00      1.00      1.00         4
/Users/Bya/Dropbox/Research/resources/orl_faces/s17       1.00      1.00      1.00         3
/Users/Bya/Dropbox/Research/re

  'recall', 'true', average, warn_for)
