In [54]:
import numpy as np
from scipy.io import loadmat
results=loadmat('yalefaces.mat')
X=results['X']

In [55]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn import svm
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.decomposition import PCA

## (a)

In [60]:
# Label the faces from 0 to 9
labels = np.arange(10).repeat(64)

# Grab the first 10 individual faces
first_10_faces = X[:, :640]
print(f"The shape for first_10_faces is {first_10_faces.shape}.")

The shape for first_10_faces is (1024, 640).


## (b)

In [57]:
# Split the data into training and test sets
train_indices = []
test_indices = []
for i in range(10):
    # Randomly select 14 indices for the test set
    test_idx = np.random.choice(range(64), size=14, replace=False)
    # Use the remaining indices for the training set
    train_idx = np.setdiff1d(range(64), test_idx)
    # Add the indices to the overall test and train sets
    train_indices.extend(i*64 + train_idx)
    test_indices.extend(i*64 + test_idx)

# Split the data into training and test sets
X_train = X[:, train_indices]
y_train = labels[train_indices]
X_test = X[:, test_indices]
y_test = labels[test_indices]
print(f"The shape for X_train is {X_train.shape}.")
print(X_train)
print(f"The shape for y_train is {y_train.shape}.")
print(y_train)
print(f"The shape for y_train is {X_test.shape}.")
print(X_test)
print(f"The shape for y_train is {y_test.shape}.")
print(y_test)

The shape for X_train is (1024, 500).
[[0.00392157 0.57573529 0.00784314 ... 0.18995098 0.52156863 0.        ]
 [0.00784314 0.53946078 0.00784314 ... 0.17107843 0.47205882 0.00661765]
 [0.00784314 0.48210784 0.00539216 ... 0.17696078 0.46593137 0.00784314]
 ...
 [0.51813725 0.03137255 0.61691176 ... 0.41372549 0.08823529 0.00539216]
 [0.51813725 0.03137255 0.63039216 ... 0.40147059 0.08112745 0.02205882]
 [0.50269608 0.03137255 0.65882353 ... 0.39215686 0.06911765 0.02745098]]
The shape for y_train is (500,).
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 

## (c)

In [58]:
# Perform PCA on the training set
pca = PCA(n_components=20)
X_train_pca = pca.fit_transform(X_train.T).T
X_test_pca = pca.transform(X_test.T).T

# Train an LDA model on the transformed data
lda = LDA()
lda.fit(X_train_pca.T, y_train)

# Evaluate the accuracy on the test set
accuracy = lda.score(X_test_pca.T, y_test)
print("LDA Classification accuracy: {:.2f}%".format(accuracy*100))

LDA Classification accuracy: 83.57%


## (d)

In [59]:
# Train an SVM model on the transformed data
svm_model = SVC(kernel='linear')
svm_model.fit(X_train_pca.T, y_train)

# Train a decision tree model on the transformed data
dt_model = DecisionTreeClassifier()
dt_model.fit(X_train_pca.T, y_train)

# Evaluate the accuracy of the SVM model on the test set
accuracy_svm = svm_model.score(X_test_pca.T, y_test)
print("SVM classification accuracy: {:.2f}%".format(accuracy_svm*100))

# Evaluate the accuracy of the decision tree model on the test set
accuracy_dt = dt_model.score(X_test_pca.T, y_test)
print("Decision tree classification accuracy: {:.2f}%".format(accuracy_dt*100))

# Predict the labels for the test set using the SVM model
y_pred_svm = svm_model.predict(X_test_pca.T)
# Compute the confusion matrix
cm_svm = confusion_matrix(y_test, y_pred_svm)
# print("SVM confusion matrix:")
# print(cm_svm)

# Predict the labels for the test set using the decision tree model
y_pred_dt = dt_model.predict(X_test_pca.T)
# Compute the confusion matrix
cm_dt = confusion_matrix(y_test, y_pred_dt)
# print("Decision tree confusion matrix:")
# print(cm_dt)

SVM classification accuracy: 95.71%
Decision tree classification accuracy: 72.14%


SVM does the best job for classification among faces for accuracy above 90% with linear kernal; LDA rankes in second position around 80% accuracy; Decision tree ranks in last position with about 70% accuracy