In [None]:
import pickle, shutil, random, cv2

import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path
from tqdm import tqdm
from collections import Counter

from skimage.feature import hog
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV, KFold, train_test_split, cross_val_score
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

import mediapipe as mp
from mediapipe import Image
from mediapipe.tasks import python
from mediapipe.tasks.python import vision

In [None]:
# data paths
EMOREACT = Path('EmoReact')
FER = Path('FER-2013')
KDEF = Path('KDEF-AKDEF')
NIMH = Path('NIMH-CHEFS')

# general paths
BASE_PATH = Path('/project/volume/data/out')
MODEL_PATH = Path('/project/volume/models')

###################
# SET DATASET HERE:
DATA = FER
###################

# dataset specific paths
CURRENT_PATH = BASE_PATH / DATA
LABELS = [f.name for f in CURRENT_PATH.iterdir() if f.is_dir()]

IMAGE_PATHS = list(CURRENT_PATH.rglob('*.jpg'))
IMAGE_PATHS_STR = [str(path) for path in IMAGE_PATHS]

In [None]:
fig = plt.figure(figsize=(20,20))

for i in range(10):

    fig.add_subplot(1, 10, i + 1)
    plt.imshow(np.array(cv2.imread(str(IMAGE_PATHS[i]))), cmap='gray')
    label = Path(IMAGE_PATHS[i]).parent.name
    plt.title(label)

plt.show()

In [None]:
nbr_per_cat = Counter(Path(file).parent.name for file in IMAGE_PATHS)
categories = list(nbr_per_cat.keys())
counts = list(nbr_per_cat.values())

plt.figure(figsize=(9, 3))
bars = plt.bar(categories, counts, width=0.5)

plt.xlabel('Categories')
plt.ylabel('Counts')
plt.title('Number of Files per Category')
plt.xticks(rotation=45, ha='right')

for bar, count in zip(bars, counts):
    plt.text(bar.get_x() + bar.get_width() / 2, bar.get_height(), str(count), 
            ha='center', va='bottom')

plt.tight_layout()
plt.show()

In [None]:
def read_mp_cv2_image(image_path):
    return Image.create_from_file(str(image_path)), cv2.imread(str(image_path))

def preprocess_image(file, img_size):
        img = cv2.imread(str(file))
        imgray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        return cv2.resize(imgray, (img_size, img_size))

def print_score(clf, x_train, y_train, x_test, y_test, train=True):
    if train:
        y_prediction = clf.predict(x_train)
        clf_report = classification_report(y_train, y_prediction)
        print("Train Result:\n================================================")
        print(f"Accuracy Score: {accuracy_score(y_train, y_prediction) * 100:.2f}%")
        print("_______________________________________________")
        print(f"CLASSIFICATION REPORT:\n{clf_report}")
        print("_______________________________________________")
        print(f"Confusion Matrix: \n {confusion_matrix(y_train, y_prediction)}\n")
        
    elif train==False:
        y_prediction = clf.predict(x_test)
        clf_report = classification_report(y_test, y_prediction)
        print("Test Result:\n================================================")        
        print(f"Accuracy Score: {accuracy_score(y_test, y_prediction) * 100:.2f}%")
        print("_______________________________________________")
        print(f"CLASSIFICATION REPORT:\n{clf_report}")
        print("_______________________________________________")
        print(f"Confusion Matrix: \n {confusion_matrix(y_test, y_prediction)}\n")

## Facial Landmarks

In [None]:
model_path_mesh = '/project/volume/models/face_landmarker.task'

base_options_mesh = python.BaseOptions(model_asset_path=model_path_mesh)
options_mesh = vision.FaceLandmarkerOptions(base_options=base_options_mesh,
                                       output_face_blendshapes=False,
                                       output_facial_transformation_matrixes=True,
                                       num_faces=1)
detector_mesh = vision.FaceLandmarker.create_from_options(options_mesh)

In [None]:
# landmark extraction
def extract_fetures(feature='pixels', img_size=64, **kwargs):

    labels = []
    data = []

    for file in tqdm(IMAGE_PATHS):
        
        img_cv2 = preprocess_image(file, img_size)

        if feature == 'landmark':
                
            print("[INFO] Extracting facial landmarks ...")

            rgb_frame = mp.Image(image_format=mp.ImageFormat.SRGB, data=img_cv2)
            detection_result = detector_mesh.detect(rgb_frame)

            if detection_result.face_landmarks:
                array = np.array([[lm.x, lm.y, lm.z] for lm in detection_result.face_landmarks[0]]).flatten()
                labels.append(file.parent.name)
                data.append(array)

        elif feature == 'pixel':
                
            print("[INFO] Extracting image pixels ...")
                    
            labels.append(file.parent.name)
            data.append(np.array(img_cv2).flatten())

        elif feature == 'hog':

            print("[INFO] Extracting hog feature vectors ...")

            orientations = kwargs.get('orientations', None)
            image_shape = kwargs.get('image_shape', None)
            pixels_per_cell = kwargs.get('pixels_per_cell', None)
            cells_per_block = kwargs.get('cells_per_block', None)

            if orientations is None or image_shape is None or pixels_per_cell is None or cells_per_block is None:
                raise ValueError("orientations, image_shape, pixels_per_cell and cells_per_block are required for hog feature extraction")

            def compute_hog_feature_size(image_shape, orientations, pixels_per_cell, cells_per_block):

                height, width = image_shape, image_shape
                num_cells_x = width // pixels_per_cell
                num_cells_y = height // pixels_per_cell
                num_blocks_x = num_cells_x - cells_per_block + 1
                num_blocks_y = num_cells_y - cells_per_block + 1
                features_per_block = cells_per_block * cells_per_block * orientations
                total_features = num_blocks_x * num_blocks_y * features_per_block

                print("[INFO] Size of HOG feature vector ...", total_features)

            compute_hog_feature_size(image_shape, orientations, pixels_per_cell, cells_per_block)

                
            fd1 = hog(
                img_cv2, orientations=orientations, 
                pixels_per_cell=(pixels_per_cell, pixels_per_cell),
                cells_per_block=(cells_per_block, cells_per_block),
                block_norm='L2-Hys',
                transform_sqrt=False, 
                feature_vector=True
                )

            label = Path(p).parent.name
            labels.append(label)
            data.append(fd1)

        elif feature == 'blendshape':
                
            print("[INFO] Extracting facial blendshapes ...")
            
            rgb_frame = mp.Image(image_format=mp.ImageFormat.SRGB, data=img_cv2)
            detection_result = detector_mesh.detect(rgb_frame)

            if detection_result.face_blendshapes:
                array = np.array([[bs.index, bs.score] for bs in detection_result.face_blendshapes[0]]).flatten()
                labels.append(file.parent.name)
                data.append(array)
        
        else:
            raise ValueError(f"Unsupported feature type: {feature}! Should be one of: [blendshape, pixel, landmark, hog]")

    return np.array(data), np.array(labels)

In [None]:
FEATURE = 'hog'

hog_params = { 
    'orientations' : 7,
    'pixels_per_cell' : 8,
    'cells_per_block' : 4
    }

In [None]:
data, labels = extract_fetures(feature=FEATURE, img_size=64, **hog_params)

x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, shuffle=True, stratify=labels, random_state=42)
print("[INFO] Number of images used in training ...", x_train.shape[0])
print("[INFO] Number of images used in testing ...", x_test.shape[0])

classifier = SVC()
parameters = {'gamma': [0.1, 0.01, 0.001], 'C': [1, 10, 100, 1000]}

grid_search = GridSearchCV(classifier, parameters, n_jobs=-1)
grid_search.fit(x_train, y_train)
best_estimator = grid_search.best_estimator_
print("[INFO] Best params ...", grid_search.best_params_)

pickle.dump(best_estimator, open(str(MODEL_PATH / DATA / f'{FEATURE}_model.p'), 'wb'))

print_score(best_estimator, x_train, y_train, x_test, y_test, train=True)
print_score(best_estimator, x_train, y_train, x_test, y_test, train=False)

## K-fold Cross-Validation

In [None]:
n_splits_values = [3, 5, 10]

print(f"[INFO] Evaluating mode: {FEATURE}")

for n_splits in n_splits_values:
    cv = KFold(n_splits=n_splits, random_state=42, shuffle=True)
    scores = cross_val_score(best_estimator, data, labels, scoring='accuracy', cv=cv, n_jobs=-1)
    print(f"{n_splits}-Fold CV: {scores.mean():.2f} accuracy with a standard deviation of {scores.std():.2f}")