In [None]:
# HOG feature vectors
%pip install gdown
import gdown

url = "https://drive.google.com/drive/folders/1U5lsvfh8zSOfPatqXvdGHLo4j_XkEyw7?usp=share_link"
gdown.download_folder(url, quiet=True)

In [None]:
# ResNet 128D vectors
%pip install gdown
import gdown

url = "https://drive.google.com/drive/folders/1Wjkc9I6N-InDznjsu5ETmtAecPSLQYQQ?usp=share_link"
gdown.download_folder(url, quiet=True)

In [None]:
# VGGFace vectors
%pip install gdown
import gdown

url = "https://drive.google.com/drive/folders/1evQY1hOVcUbg3l4mpYsMguTCqtTx78Av?usp=share_link"
gdown.download_folder(url, quiet=True)

In [None]:
# ResNet50 vectors
%pip install gdown
import gdown

url = "https://drive.google.com/drive/folders/1qxOth628bu8BsdnM_BdKjfS76gL6FtJ2?usp=share_link"
gdown.download_folder(url, quiet=True)

In [3]:
import os
import zipfile
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score, precision_recall_curve
from collections import Counter
from imblearn.under_sampling import RandomUnderSampler
from imblearn.over_sampling import RandomOverSampler, SMOTE

In [4]:
def tune_model(X_train, y_train, kernel, class_weight):
  scaler = StandardScaler()
  fold_scores = []
  # cross-validation sul training set
  kf = KFold(n_splits=3, shuffle=True, random_state=42)
  for train_idx, val_idx in kf.split(X_train):
    X_train_fold, X_val_fold = X_train[train_idx], X_train[val_idx]
    y_train_fold, y_val_fold = y_train[train_idx], y_train[val_idx]

    # standardize the features
    X_train_fold = scaler.fit_transform(X_train_fold)
    X_val_fold = scaler.transform(X_val_fold)

    svm_model = SVC(kernel=kernel, random_state=42, class_weight=class_weight)

    # train the SVM classifier
    svm_model.fit(X_train_fold, y_train_fold)
    y_pred = svm_model.predict(X_val_fold)
    accuracy_score_fold = accuracy_score(y_val_fold, y_pred)
    fold_scores.append(accuracy_score_fold)
    print(accuracy_score_fold)
  print("mean score: ", np.mean(fold_scores))
  return np.mean(fold_scores)

In [8]:
def train_evaluate_svm(X_train, X_test, y_train, y_test):
  n_samples = len(X_train)
  values_count = Counter(y_train)
  class_weight = dict({class_idx : n_samples / count for class_idx, count in values_count.items()})

  best_accuracy, best_params = 0, []

  grid_params = [
          ['rbf', class_weight],
          #['rbf', None],
          #['sigmoid', class_weight],
          #['linear', class_weight]
      ]

  for params in grid_params:
    kernel, curr_class_weight = params
    curr_accuracy = tune_model(X_train, y_train, kernel, curr_class_weight)
    if best_accuracy < curr_accuracy:
      best_accuracy = curr_accuracy
      best_params = params

  # train the SVM classifier with the best parameters
  best_kernel, best_class_weight = best_params
  best_model = SVC(kernel=best_kernel, random_state=42, class_weight=best_class_weight)
  best_model.fit(X_train, y_train)

  # make predictions and evaluate
  y_pred = best_model.predict(X_test)

  # accuracy
  accuracy = accuracy_score(y_test, y_pred)
  class_report = classification_report(y_test, y_pred)
  print("accuracy: ", accuracy)
  print(class_report)

In [6]:
# group by age
ranges = [ [1,6], [6,13], [13,21], [21,36], [36,51], [51,76], [76,91], [91,117] ]
avg_age = [3, 9, 17, 28, 43, 63, 83, 104]
def dataset_and_labels(path):
  X_train, y_train = [], []
  for file_name in os.listdir(path):
    file_path = os.path.join(path, file_name)
    embedding = np.load(file_path)
    _, age, _, _, _ = file_name.split("_")
    age = int(age)
    X_train.append(embedding)
    for idx, r in enumerate(ranges):
      min_age, max_age = r
      min_age, max_age = int(min_age), int(max_age)
      if min_age <= age < max_age:
        y_train.append(int(avg_age[idx]))
  return np.array(X_train), np.array(y_train)

In [None]:
# group by gender

def dataset_and_labels(path):
  X_train, y_train = [], []
  for file_name in os.listdir(path):
    file_path = os.path.join(path, file_name)
    embedding = np.load(file_path)
    _, _, gender, _, _ = file_name.split("_")
    X_train.append(embedding)
    y_train.append(int(gender))
  return np.array(X_train), np.array(y_train)

In [8]:
# group by race

def dataset_and_labels(path):
  X_train, y_train = [], []
  for file_name in os.listdir(path):
    file_path = os.path.join(path, file_name)
    embedding = np.load(file_path)
    _, _, _, race, _ = file_name.split("_")
    X_train.append(embedding)
    y_train.append(int(race))
  return np.array(X_train), np.array(y_train)

In [None]:
def main():
  #folder_path = "/content/HOG_feature_vectors"
  #folder_path = "/content/ResNet_128D_vectors"
  #folder_path = "/content/VGG_Face_vectors"
  folder_path = "/content/ResNet50"
  for file_name in os.listdir(folder_path):

    # zipped files in the folder biometric_systems_dataset
    if file_name.endswith(".zip"):

      # full path to the zip file
      file_path = os.path.join(folder_path, file_name)

      # create folder in the same path with the name of the zip file
      extract_folder = os.path.join(folder_path, file_name.replace(".zip", ""))
      os.makedirs(extract_folder, exist_ok=True)

      # open and extract the file
      #with zipfile.ZipFile(file_path, mode="r") as zip_ref:
      #  zip_ref.extractall(extract_folder)
      #  os.remove(os.path.join(folder_path, file_name))

  X_train_eyes, y_train_eyes = dataset_and_labels(os.path.join(folder_path, "eyes", "train"))
  X_test_eyes, y_test_eyes = dataset_and_labels(os.path.join(folder_path, "eyes", "test"))
  X_train_mouth, y_train_mouth = dataset_and_labels(os.path.join(folder_path, "mouth", "train"))
  X_test_mouth, y_test_mouth = dataset_and_labels(os.path.join(folder_path, "mouth", "test"))
  X_train_nose, y_train_nose = dataset_and_labels(os.path.join(folder_path, "nose", "train"))
  X_test_nose, y_test_nose = dataset_and_labels(os.path.join(folder_path, "nose", "test"))

  X_all = [[X_train_eyes,X_test_eyes], [X_train_mouth, X_test_mouth], [X_train_nose, X_test_nose]]
  y_all = [[y_train_eyes, y_test_eyes], [y_train_mouth, y_test_mouth], [y_train_nose, y_test_nose]]

  ros = RandomOverSampler(sampling_strategy={2: 3500, 3: 3500, 4: 3000}, random_state=42)
  rus = RandomUnderSampler(sampling_strategy={0: 5000}, random_state=42)
  smote = SMOTE(sampling_strategy={2: 3500, 3: 3500, 4: 3000}, random_state=42)

  for idx, (X, y) in enumerate(zip(X_all, y_all)):
    X_train, X_test = X
    y_train, y_test = y
    if idx == 1:
      train_evaluate_svm(X_train, X_test, y_train, y_test)

      # resampling with SMOTE
      #X_train_resampled_smote, y_train_resampled_smote = smote.fit_resample(X_train, y_train)
      #train_evaluate_svm(X_train_resampled_smote, X_test, y_train_resampled_smote, y_test)

      # under- and oversampling
      #X_train_resampled, y_train_resampled = ros.fit_resample(X_train, y_train)
      #X_train_resampled, y_train_resampled = rus.fit_resample(X_train, y_train)
      #train_evaluate_svm(X_train_resampled, X_test, y_train_resampled, y_test)


if __name__ == "__main__":
  main()