# Import libraries and modules

In [None]:
from google.colab import drive
drive.mount('/gdrive')

Mounted at /gdrive


In [None]:
# pip install deepface

In [None]:
# !pip install ipython-autotime
# %load_ext autotime

time: 217 µs (started: 2024-05-12 05:10:48 +00:00)


In [None]:
import os
from deepface import DeepFace
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression

from google.colab import output

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

import cv2 as cv
from google.colab.patches import cv2_imshow

24-05-14 06:38:11 - Directory /root/.deepface created
24-05-14 06:38:11 - Directory /root/.deepface/weights created


# Convert all images to embedding vectors

In [None]:
backends = ['opencv', 'dlib']

folder_path = '/gdrive/MyDrive/splitted_data'
names = ['/Bao', '/Khanh', '/Vy']

X = []
y = []
label = 0

for name in names:
  for file_name in os.listdir(folder_path + name):
    if file_name.endswith(".jpg"):
      image_path = os.path.join(folder_path + name, file_name)
      try:
        embedding_objs = DeepFace.represent(
          img_path = image_path,
          model_name = 'Facenet512',
          detector_backend = backends[0]
        )
        X.append(embedding_objs[0]['embedding'])

        y.append(label)
      except Exception as e:
        print(e)
  label += 1

time: 8min 7s (started: 2024-05-12 05:11:30 +00:00)


# Convert 225 random images (per label) to embedding vectors

In [None]:
from random import sample

X_225 = []
y_225 = []
label = 0

for name in names:
  image_paths = []

  for file_name in os.listdir(folder_path + name):
    if file_name.endswith(".jpg"):
      image_path = os.path.join(folder_path + name, file_name)
      image_paths.append(image_path)

  image_paths_225 = sample(image_paths, 225)

  for image_path in image_paths_225:
    try:
      embedding_objs = DeepFace.represent(
        img_path = image_path,
        model_name = 'Facenet512',
        detector_backend = backends[0]
      )
      X_225.append(embedding_objs[0]['embedding'])

      y_225.append(label)
    except Exception as e:
      print(e)
  label += 1

time: 4min 4s (started: 2024-05-12 05:42:12 +00:00)


# Save data

In [None]:
import pandas as pd

df = pd.DataFrame(X)
csv_data = df.to_csv(index=False)
df.to_csv('/gdrive/MyDrive/X.csv', index=False)

df = pd.DataFrame(y)
csv_data = df.to_csv(index=False)
df.to_csv('/gdrive/MyDrive/y.csv', index=False)

df = pd.DataFrame(X_225)
csv_data = df.to_csv(index=False)
df.to_csv('/gdrive/MyDrive/X_225.csv', index=False)

df = pd.DataFrame(y_225)
csv_data = df.to_csv(index=False)
df.to_csv('/gdrive/MyDrive/y_225.csv', index=False)

# Build 3 models with all data

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

time: 2.26 ms (started: 2024-05-12 05:52:11 +00:00)


In [None]:
myKNN = KNeighborsClassifier(n_neighbors=5, weights='uniform') # Optimal parameters checked by GridSearch

myKNN.fit(X_train, y_train)
y_KNN_predicted = myKNN.predict(X_test)

time: 82.2 ms (started: 2024-05-12 05:52:13 +00:00)


In [None]:
mySVM = SVC(C=100, kernel='linear')

mySVM.fit(X_train, y_train)
y_SVM_predicted = mySVM.predict(X_test)

time: 53.7 ms (started: 2024-05-12 05:52:15 +00:00)


In [None]:
mySoftmax = LogisticRegression(C=100, solver='lbfgs', multi_class='multinomial')

mySoftmax.fit(X_train, y_train)
y_Softmax_predicted = mySoftmax.predict(X_test)

time: 303 ms (started: 2024-05-12 05:52:18 +00:00)


In [None]:
acc_KNN = accuracy_score(y_test, y_KNN_predicted)
acc_SVM = accuracy_score(y_test, y_SVM_predicted)
acc_Softmax = accuracy_score(y_test, y_Softmax_predicted)

print("With 500 data/label:")
print("Accuracy of KNN: ", acc_KNN*100, "%")
print("Accuracy of SVM: ", acc_SVM*100, "%")
print("Accuracy of Softmax: ", acc_Softmax*100, "%")

With 500 data/label:
Accuracy of KNN:  100.0 %
Accuracy of SVM:  100.0 %
Accuracy of Softmax:  100.0 %
time: 8.86 ms (started: 2024-05-12 05:52:22 +00:00)


# Build 3 models with 225 data (per label)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_225, y_225, test_size=0.2, random_state=42)

myKNN = KNeighborsClassifier(n_neighbors=5, weights='uniform')
myKNN.fit(X_train, y_train)
y_KNN_predicted = myKNN.predict(X_test)

mySVM = SVC(C=100, kernel='linear')
mySVM.fit(X_train, y_train)
y_SVM_predicted = mySVM.predict(X_test)

mySoftmax = LogisticRegression(C=100, solver='lbfgs', multi_class='multinomial')
mySoftmax.fit(X_train, y_train)
y_Softmax_predicted = mySoftmax.predict(X_test)

acc_KNN = accuracy_score(y_test, y_KNN_predicted)
acc_SVM = accuracy_score(y_test, y_SVM_predicted)
acc_Softmax = accuracy_score(y_test, y_Softmax_predicted)

print("With 225 data/label:")
print("Accuracy of KNN: ", acc_KNN*100, "%")
print("Accuracy of SVM: ", acc_SVM*100, "%")
print("Accuracy of Softmax: ", acc_Softmax*100, "%")

With 225 data/label:
Accuracy of KNN:  100.0 %
Accuracy of SVM:  100.0 %
Accuracy of Softmax:  100.0 %


# Save models

In [None]:
import joblib

# Save model to Google Drive
joblib.dump(myKNN, '/gdrive/MyDrive/myKNN_225.joblib')
joblib.dump(mySVM, '/gdrive/MyDrive/mySVM_225.joblib')
joblib.dump(mySoftmax, '/gdrive/MyDrive/mySoftmax_225.joblib')

# Use tool to check good parameter

In [None]:
import pandas as pd
X_225 = pd.read_csv('/gdrive/MyDrive/data_csv/X_225.csv')
y_225 = pd.read_csv('/gdrive/MyDrive/data_csv/y_225.csv')

In [None]:
from sklearn.model_selection import GridSearchCV

model_KNN = KNeighborsClassifier()
model_SVM = SVC()

param_grid_KNN = {
    'n_neighbors': [5, 7, 9, 11],
    'weights': ['uniform', 'distance',]
}
param_grid_SVM = {
    'C': [100, 10, 1],
    'kernel': ['linear', 'poly', 'rbf']
}

grid_search_KNN = GridSearchCV(model_KNN, param_grid_KNN, scoring='accuracy')
grid_search_SVM = GridSearchCV(model_SVM, param_grid_SVM, scoring='accuracy')

grid_search_KNN.fit(X_225, y_225)
grid_search_SVM.fit(X_225, y_225)

print("Best parameters of KNN: ", grid_search_KNN.best_params_, "\n")
print("Best parameters of SVM: ", grid_search_SVM.best_params_)

Best parameters of KNN:  {'n_neighbors': 5, 'weights': 'uniform'} 

Best parameters of SVM:  {'C': 100, 'kernel': 'linear'}
