In [1]:
import os
import scipy
import numpy as np
import cv2 as cv

In [2]:
img_path_list = os.listdir('../face_data')
img_path_list = [os.path.join('../face_data', img_path) for img_path in img_path_list]

In [3]:
target = []
for img_path in img_path_list:
    if ('/1_' in img_path):
        target.append(1)
    elif ('/2_' in img_path):
        target.append(2)
    elif ('/7_' in img_path):
        target.append(3)
    elif ('/12_' in img_path):
        target.append(3)
    elif ('/17_' in img_path):
        target.append(4)

In [4]:
from sklearn.model_selection import train_test_split

In [5]:
X_train_filename, X_test_filename, y_train_filename, y_test_filename = train_test_split(
    img_path_list, target, test_size=0.2, random_state=0
)

In [6]:
def read_batch_image(img_path_list, resized_dimension=(60,60)):
    image_data_list = []
    for img_path in img_path_list:
        img = cv.resize(cv.imread(img_path, cv.IMREAD_GRAYSCALE), resized_dimension, interpolation=cv.INTER_AREA)
        image_data_list.append(img)
    return image_data_list

def flatten_batch_image(image_data_list):
    flattened_image_list = []
    for i in range(len(image_data_list)):
        flattened_image = image_data_list[i].flatten()
        flattened_image_list.append(flattened_image)
    return flattened_image_list

## PCA

In [7]:
X_train_image_data = np.matrix(flatten_batch_image(read_batch_image(X_train_filename)))
X_test_image_data = np.matrix(flatten_batch_image(read_batch_image(X_test_filename)))

In [8]:
from sklearn.decomposition import PCA
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, f1_score

In [9]:
pca_component = 15
pca = PCA(pca_component, whiten=True).fit(X_train_image_data)

In [10]:
X_train_pca = pca.transform(X_train_image_data)
X_test_pca = pca.transform(X_test_image_data)

## Model

### kNN

In [11]:
knn_classifier = KNeighborsClassifier(n_neighbors = 3)
knn_classifier.fit(X_train_pca, y_train_filename)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=None, n_neighbors=3, p=2,
           weights='uniform')

In [12]:
predicted = knn_classifier.predict(X_test_pca)
confidence = knn_classifier.predict_proba(X_test_pca)
print(accuracy_score(predicted, y_test_filename))
print(f1_score(predicted, y_test_filename, average='weighted'))

0.9649122807017544
0.9649122807017544


In [13]:
confidence

array([[0.        , 0.        , 0.        , 1.        ],
       [0.        , 0.        , 1.        , 0.        ],
       [0.        , 0.        , 1.        , 0.        ],
       [0.        , 0.        , 1.        , 0.        ],
       [0.        , 0.        , 0.        , 1.        ],
       [0.        , 1.        , 0.        , 0.        ],
       [1.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 1.        , 0.        ],
       [0.        , 0.        , 0.66666667, 0.33333333],
       [0.        , 0.        , 1.        , 0.        ],
       [0.        , 0.        , 0.        , 1.        ],
       [0.        , 0.        , 1.        , 0.        ],
       [0.        , 0.        , 1.        , 0.        ],
       [0.        , 1.        , 0.        , 0.        ],
       [0.        , 0.        , 1.        , 0.        ],
       [0.        , 0.        , 1.        , 0.        ],
       [0.        , 0.        , 1.        , 0.        ],
       [0.        , 0.        ,

## SVM

In [14]:
from sklearn.svm import SVC

In [15]:
svm_classifier = SVC(gamma='scale')
svm_classifier.fit(X_train_pca, y_train_filename)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [16]:
confidence = svm_classifier.decision_function(X_test_pca)
predicted = svm_classifier.predict(X_test_pca)

print(confidence)
print(predicted)
print(y_test_filename)
# print(accuracy_score(predicted, y_test_filename))
# print(f1_score(predicted, y_test_filename, average='weighted'))

[[-0.24816251  0.82081083  2.04314422  3.38420746]
 [ 0.85501315  0.85082759  3.4202649   0.87389436]
 [-0.21756448  0.87066664  3.40821427  1.93868358]
 [-0.22077831  0.86016479  3.38091054  1.97970299]
 [-0.24283593  0.90965574  1.96039929  3.3727809 ]
 [-0.31157564  3.31991115  0.9429774   2.04868709]
 [ 3.37790497 -0.29159416  1.99107643  0.92261276]
 [-0.20802822  0.87881896  3.38012556  1.9490837 ]
 [-0.20704099  0.83182479  3.28714752  2.08806867]
 [ 0.79963455 -0.22693612  3.4454857   1.98181587]
 [-0.2602002   0.83846901  2.04162407  3.38010712]
 [ 1.90858349 -0.15545908  3.39174867  0.85512693]
 [-0.21436103  0.9739417   3.2352063   2.00521303]
 [-0.31117153  3.5         2.0086364   0.80253513]
 [ 0.83452851 -0.22103457  3.47699738  1.90950868]
 [-0.20644637  0.86968327  3.40661812  1.93014497]
 [-0.23598785  0.8316089   3.40562377  1.99875519]
 [-0.19126287  0.88044595  3.40452219  1.90629473]
 [-0.28725108  3.44555256  1.94226854  0.89942997]
 [-0.19547033  0.82663706  3.36

## Matching using Distance

In [17]:
from scipy.spatial.distance import euclidean

In [18]:
def predict(image_pca_database, label_database, pca_data, top_N=3, majority_threshold=0.7):
    distance_result_list = []
    for idx, image_pca_iterator in enumerate(image_pca_database):
        distance = euclidean(image_pca_iterator, pca_data)
        distance_result_list.append((distance, label_database[idx]))
    
    distance_result_list.sort()
    top_arr = []
    for top_i in range(top_N):
        top_arr.append(distance_result_list[top_i][1])
    maximum_label = None
    maximum_count = -1
    for element in top_arr:
        count_element = top_arr.count(element)
        if count_element > maximum_count:
            maximum_count = count_element
            maximum_label = element
            if maximum_count >= int(top_N * majority_threshold):
                break
    return maximum_label

In [19]:
predict(X_train_pca, y_train_filename, X_test_pca[0])

4

In [20]:
predicted_result_list = []
for pca_data in X_test_pca:
    predicted_result_list.append(predict(X_train_pca, y_train_filename, pca_data))

In [21]:
accuracy_score(predicted_result_list, y_test_filename)

0.9649122807017544