# SVM for Classification

Use the raw face images (vectorized) and the face vectors after PCA pre-processing (with di- mensionality of 80 and 200) as inputs to linear SVM. Try values of the penalty parameter C in {1 × 10<sup>-2</sup>, 1 × 10<sup>-1</sup>, 1}. Report the classification accuracy with different parameters and dimensions. Discuss the effect of data dimension and parameter C on the final classification accuracy.

### Read all the images

In [1]:
import numpy as np
import os
import matplotlib.pyplot as plt
%matplotlib inline
from mpl_toolkits import mplot3d

In [2]:
def read_images():
    dir='PIE'
    PIE=os.listdir(dir)
    all_img = []
    img_folders=[dir+'/' + photo for photo in PIE]
    selected_subjects = np.random.choice(range(len(img_folders)), 20, replace=False)
    
    selected_sub_folders = [img_folders[index] for index in selected_subjects]
    print('Selected Subjects:')
    print(selected_sub_folders)

    for img_dir in selected_sub_folders:
        img_path=os.listdir(img_dir)
        img_dir_index=[img_dir+'/' + photo for photo in img_path]
        all_img.append(img_dir_index)

    flat_list = []
    for sublist in all_img:
        for item in sublist:
            flat_list.append(item)
    
    images = np.array([plt.imread(image) for image in flat_list], dtype=np.int16)
    labels = np.zeros((len(images)))

    for i, path in enumerate(flat_list):
        labels[i] = path.split('/')[1]
    return images, labels

In [3]:
def read_my_images():
    my_img_path = 'my_images'
    img_list=os.listdir(my_img_path)
    print(img_list)
    my_images = np.array([plt.imread(my_img_path + '/' + image) for image in img_list], dtype=np.int16)
    my_labels = np.full((10), 69)
    return my_images, my_labels

In [4]:
images, labels = read_images()
n_samples, image_size, _ = images.shape
n_features = image_size * image_size

Selected Subjects:
['PIE/11', 'PIE/6', 'PIE/35', 'PIE/20', 'PIE/10', 'PIE/24', 'PIE/41', 'PIE/22', 'PIE/55', 'PIE/29', 'PIE/8', 'PIE/5', 'PIE/2', 'PIE/51', 'PIE/48', 'PIE/49', 'PIE/57', 'PIE/34', 'PIE/52', 'PIE/15']


In [5]:
my_images, my_labels = read_my_images()

['8.jpg', '9.jpg', '4.jpg', '5.jpg', '7.jpg', '6.jpg', '2.jpg', '3.jpg', '1.jpg', '0.jpg']


### Randomly select 500 images

In [6]:
def select_images(images, labels, select_num):
    num_images = len(images)
    np.random.seed(99)
    if select_num == len(images):
        return images.reshape([select_num, n_features]), labels
    rand_images_list = np.random.randint(0, num_images, select_num)
    X = images[rand_images_list].reshape([select_num, n_features])
    y = labels[rand_images_list]
    return X, y

In [7]:
X, y = select_images(images, labels, n_samples)

### PCA

In [8]:
def PCA(X):
    mean_data = np.mean(X,axis=0)
    centred_data = X - mean_data
    cov_matrix = np.cov(centred_data.T)
    [eig_val,eig_vec] = np.linalg.eig(cov_matrix)
    projected = np.dot(eig_vec, centred_data.T)
    return eig_val, eig_vec

In [9]:
def get_centred_data(X):
    mean_data = np.mean(X,axis=0)
    centred_data = X - mean_data
    return centred_data

In [10]:
def get_reduced_dim_data(eig_vec, X, dim):
    pca_vec = eig_vec[:,0:dim]
    return np.dot(X, pca_vec)

### Classifying the test images using the rule of nearest neighbor

In [11]:
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import LinearSVC

In [12]:
X, y = select_images(images, labels, n_samples)

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

In [14]:
my_img_train = np.random.choice(range(10), 7, replace=False)
my_img_test = np.delete(np.arange(0,10), my_img_train)

In [15]:
X_train_self = my_images[my_img_train].reshape(7,1024)
X_test_self = my_images[my_img_test].reshape(3,1024)
y_train_self = my_labels[my_img_train]
y_test_self = my_labels[my_img_test]

In [16]:
X_train = np.concatenate((X_train, X_train_self), axis=0)
X_test = np.concatenate((X_test, X_test_self), axis=0)
y_train = np.concatenate((y_train, y_train_self), axis=0)
y_test = np.concatenate((y_test, y_test_self), axis=0)

In [17]:
dimensions = [80,200]
C = [pow(10,-2), pow(10,-1), 1]

In [18]:
#C = np.arange(0.01, 1, 0.01)
#dimensions = np.arange(10, 1000, 10)

In [19]:
acc = np.zeros((len(C), len(dimensions)))

In [21]:
for i, dim in enumerate(dimensions):
    for j, c in enumerate(C):
        print('Dimension: {}, C: {}'.format(dim, c))
        # reduce the dimensionality of training data
        eig_val, eig_vec = PCA(X_train)
        centred_data = get_centred_data(X_train)
        X_pca_train = get_reduced_dim_data(eig_vec, centred_data, dim).real

        # reduce the dimensionality of test data
        centred_data = get_centred_data(X_test)
        X_pca_test = get_reduced_dim_data(eig_vec, centred_data, dim).real

        # Create KNN classifier
        clf = LinearSVC(random_state=0, C=c)
        # Fit the classifier to the data
        clf.fit(X_pca_train, y_train)
        y_pred = clf.predict(X_pca_test)
        score = clf.score(X_pca_test, y_test)
        print(score)

Dimension: 80, C: 0.01




0.7556207233626588
Dimension: 80, C: 0.1




0.7849462365591398
Dimension: 80, C: 1




0.7839687194525904
Dimension: 200, C: 0.01




0.9257086999022482
Dimension: 200, C: 0.1




0.9247311827956989
Dimension: 200, C: 1
0.8993157380254154


