In [None]:
import cv2
import pickle
import random
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
from collections import Counter

from skimage.feature import hog
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV, KFold, train_test_split, cross_val_score
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision

import torch
from torch.utils.data import Dataset
from torchvision import transforms
from PIL import Image
import pandas as pd

print('import successful')

In [None]:
# Data paths
EMOREACT = Path('EmoReact')
FER = Path('FER-2013')
KDEF = Path('KDEF-AKDEF')
NIMH = Path('NIMH-CHEFS')

# General paths
BASE_PATH = Path('/home/jovyan/work/data/out')
MODEL_PATH = Path('/home/jovyan/work/models')

# Set dataset here
DATA = NIMH

# Dataset-specific paths
CURRENT_PATH = BASE_PATH / DATA
LABELS = [f.name for f in CURRENT_PATH.iterdir() if f.is_dir()]
IMAGE_PATHS = list(CURRENT_PATH.rglob('*.jpg'))

# Constants for splitting dataset
TRAIN = 'train'
TEST = 'test'
VAL = 'val'

# Feature extraction method
FEATURE = 'pixel'

# Parameters for Histogram of Oriented Gradients (HOG) feature extraction
orientations = 7
pixels_per_cell = 8
cells_per_block = 4

hog_params = { 
    'orientations': orientations,
    'pixels_per_cell': pixels_per_cell,
    'cells_per_block': cells_per_block
}

### Dataset class

In [None]:
class Dataset(Dataset):
    def __init__(self, data_path, img_size=64, feature='pixels', transform=None, model_path_mesh='/home/jovyan/work/models/face_landmarker.task', **kwargs):
        self.data_path = Path(data_path)
        self.img_size = img_size
        self.transform = transform

        self.classes = self._get_classes()
        self.image_paths = self._get_image_paths()

        self.hog_args = kwargs
        self.labels = []
        self.data = []
        self.df = pd.DataFrame()

        # mediapipe
        self.model_path_mesh = model_path_mesh
        self.base_options_mesh = python.BaseOptions(model_asset_path=model_path_mesh)
        self.options_mesh = vision.FaceLandmarkerOptions(base_options=self.base_options_mesh, output_face_blendshapes=False, 
                                                         output_facial_transformation_matrixes=True, num_faces=1)
        self.detector_mesh = vision.FaceLandmarker.create_from_options(self.options_mesh)

    
    def __len__(self):
        return len(self.image_paths)
    

    def __getitem__(self, idx):
        img = Image.open(self.image_paths[idx])
        img_path = self.image_paths[idx]

        if self.transform:
            img = self.transform(img)

        label = Path(img_path).parent.name
        return img, label

    
    def _get_classes(self):
        return [f.name for f in (self.data_path / TRAIN).iterdir() if f.is_dir()]
    

    def _get_image_paths(self):
        paths = list(self.data_path.rglob('*.jpg'))
        random.shuffle( paths )
        return paths
    

    def show_samples(self):
        fig = plt.figure(figsize=(20,20))

        for i in range(10):
            ax = fig.add_subplot(1, 10, i + 1)
            img, label = self.__getitem__(i)
            if img.ndim == 3:
                img = img.squeeze(0)
            img = img.numpy() if isinstance(img, torch.Tensor) else np.array(img)

            ax.imshow(img, cmap='gray')
            ax.set_title(label)
            ax.axis('off')
        plt.show()


    def show_image(self, idx):
        img_cv2 = self.get_cv2_img(idx)
        plt.imshow(img_cv2, cmap='gray')
        plt.show()


    def show_distribution(self):
        labels_count = Counter([self.__getitem__(i)[1] for i in tqdm(range(len(self.image_paths)))])
        sorted_counts = sorted(labels_count.items())
        labels, counts = zip(*sorted_counts)

        plt.figure(figsize=(10, 3))
        bars = plt.bar(labels, counts, color='skyblue')
        plt.xlabel(f'{DATA}')
        plt.ylabel('Count')
        plt.title('Counts per Emotion Category')
        plt.xticks(rotation=45, ha='right')
        plt.grid(axis='y', linestyle='--', alpha=0.7)

        for bar, count in zip(bars, counts):
            plt.text(bar.get_x() + bar.get_width() / 2, bar.get_height() + 0.5, count,
                    ha='center', va='bottom', color='black', fontsize=8) 

        plt.tight_layout()
        plt.show()


    def get_cv2_img(self, idx):
        img_path = self.image_paths[idx]
        return cv2.imread(str(img_path))
    

    def extract_features(self):
        print(f"[INFO] Extracting {self.feature} vectors ...")

        labels = []
        data = []

        for idx in tqdm(range(len(self.dataset))):
            
            img, label = self.dataset[idx]
            img_cv2 = self.dataset.get_cv2_img(idx)

            if self.feature == 'landmarks':

                rgb_frame = mp.Image(image_format=mp.ImageFormat.SRGB, data=img_cv2)
                detection_result = self.detect(rgb_frame)

                if detection_result.face_landmarks:
                    array = np.array([[lm.x, lm.y, lm.z] for lm in detection_result.face_landmarks[0]]).flatten()
                    labels.append(label)
                    data.append(array)

            elif self.feature == 'pixels':
                        
                labels.append(label)
                img_array = np.array(img)
                data.append(img_array)


            elif self.feature == 'hog':
                
                orientations = self.hog_args.get('orientations', None)
                image_shape = self.hog_args.get('image_shape', None)
                pixels_per_cell = self.hog_args.get('pixels_per_cell', None)
                cells_per_block = self.hog_args.get('cells_per_block', None)

                if orientations is None or image_shape is None or pixels_per_cell is None or cells_per_block is None:
                    raise ValueError("orientations, image_shape, pixels_per_cell and cells_per_block are required for hog feature extraction")

                fd1 = hog(
                    img_cv2, orientations=orientations, 
                    pixels_per_cell=(pixels_per_cell, pixels_per_cell),
                    cells_per_block=(cells_per_block, cells_per_block),
                    block_norm='L2-Hys',
                    transform_sqrt=False, 
                    feature_vector=True
                    )

                labels.append(label)
                data.append(fd1)

            elif self.feature == 'blendshapes':

                rgb_frame = mp.Image(image_format=mp.ImageFormat.SRGB, data=img_cv2)
                detection_result = self.detector_mesh.detect(rgb_frame)

                if detection_result.face_blendshapes:
                    array = np.array([[bs.index, bs.score] for bs in detection_result.face_blendshapes[0]]).flatten()
                    labels.append(label)
                    data.append(array)
            
            else:
                raise ValueError(f"[Value Error] Unsupported feature type: {self.feature}! Should be one of: [blendshape, pixel, landmark, hog]")
        
        self.data = np.array(data)
        self.labels = np.array(labels)

        return self.data, self.labels


    def to_df(self, to_csv=False):
        df = pd.DataFrame()
        df[FEATURE] = [row for row in self.data.reshape(len(self.dataset), -1)]
        df['emotion'] = [value.item() for value in self.labels.reshape(-1, 1)]
        if to_csv:
            df.to_csv('features.csv')
        return df

### SVM Class

In [None]:
class SVMClassifier:
    def __init__(self, dataset, feature='pixels', n_splits_values = [3, 5, 10], **kwargs):   
        self.dataset = dataset
        self.feature = feature
        self.n_split_values = n_splits_values
        self.best_estimator = None
        self.dt_params = kwargs

    def train(self):
        self.data, self.labels = self.extract_features()

        x_train, x_test, y_train, y_test = train_test_split(self.data, self.labels, test_size=0.2, shuffle=True, stratify=self.labels, random_state=42)

        classifier = SVC()
        parameters = {"gamma" : [0.1, 0.01, 0.001], 'C' : [1, 10, 100, 1000]}

        grid_search = GridSearchCV(classifier, parameters)
        grid_search.fit(x_train, y_train)
        self.best_estimator = grid_search.best_estimator_
        print("[INFO] Best params ...", grid_search.best_params_)

        pickle.dump(self.best_estimator, open(str(MODEL_PATH / DATA / f'{self.feature}_model.p'), 'wb'))

        self.print_score(x_train, y_train, x_test, y_test, train=True)
        self.print_score(x_train, y_train, x_test, y_test, train=False)


    def print_score(clf, x_train, y_train, x_test, y_test, train=True):
        if train:
            dataset_type = "Train"
            data, labels = x_train, y_train
        else:
            dataset_type = "Test"
            data, labels = x_test, y_test

        y_prediction = clf.predict(data)
        clf_report = classification_report(labels, y_prediction)
        accuracy = accuracy_score(labels, y_prediction) * 100
        confusion_mat = confusion_matrix(labels, y_prediction)

        print(f"{dataset_type} Result:\n{'=' * 50}")
        print(f"Accuracy Score: {accuracy:.2f}%")
        print("_______________________________________________")
        print(f"CLASSIFICATION REPORT:\n{clf_report}")
        print("_______________________________________________")
        print(f"Confusion Matrix: \n{confusion_mat}\n")


    def k_fold(self):
        print(f"[INFO] Evaluating mode: {self.feature}")

        for n_splits in self.n_splits_values:
            cv = KFold(n_splits=n_splits, random_state=42, shuffle=True)
            scores = cross_val_score(self.best_estimator, self.data, self.labels, scoring='accuracy', cv=cv, n_jobs=2)
            print(f"{n_splits}-Fold CV: {scores.mean():.2f} accuracy with a standard deviation of {scores.std():.2f}") 

In [None]:
feature_extraction_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),

    ])

nimhchefs = Dataset(data_path=CURRENT_PATH, img_size=64, transform=None)

In [None]:
svm_classifier = SVMClassifier(dataset=nimhchefs, feature=FEATURE, img_size=64, **hog_params)

svm_classifier.train()

svm_classifier.k_fold()

# Example Illustration

In [None]:
model_path_detect = '/home/jovyan/work/models/blaze_face_short_range.tflite'
model_path_mesh = '/home/jovyan/work/models/face_landmarker.task'

base_options_detect = python.BaseOptions(model_asset_path=model_path_detect)
options_detect = vision.FaceDetectorOptions(base_options=base_options_detect)
detector_detect = vision.FaceDetector.create_from_options(options_detect)

base_options_mesh = python.BaseOptions(model_asset_path=model_path_mesh)
options_mesh = vision.FaceLandmarkerOptions(base_options=base_options_mesh,
                                       output_face_blendshapes=False,
                                       output_facial_transformation_matrixes=True,
                                       num_faces=1)
detector_mesh = vision.FaceLandmarker.create_from_options(options_mesh)

In [None]:
import cv2, sys, os, math
import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path
from tqdm import tqdm
from typing import Tuple, Union
import pandas as pd

# mediapipe
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
from mediapipe import solutions
from mediapipe.framework.formats import landmark_pb2

In [None]:
def crop_faces(img_cv2, detection_results, out_path):
    for d in detection_results.detections:  
        bbox = d.bounding_box
        origin_x, origin_y, width, height = bbox.origin_x, bbox.origin_y, bbox.width, bbox.height
        cropped_img = img_cv2[origin_y:origin_y+height, origin_x:origin_x+width]
        #show_image(cropped_img)      
        cv2.imwrite(str(out_path), cropped_img)
        return cropped_img
    
def draw_landmarks_on_image(rgb_image, detection_result):
  face_landmarks_list = detection_result.face_landmarks
  annotated_image = np.copy(rgb_image)

  # Loop through the detected faces to visualize.
  for idx in range(len(face_landmarks_list)):
    face_landmarks = face_landmarks_list[idx]

    # Draw the face landmarks.
    face_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
    face_landmarks_proto.landmark.extend([
      landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in face_landmarks
    ])

    solutions.drawing_utils.draw_landmarks(
        image=annotated_image,
        landmark_list=face_landmarks_proto,
        connections=mp.solutions.face_mesh.FACEMESH_TESSELATION,
        landmark_drawing_spec=None,
        connection_drawing_spec=mp.solutions.drawing_styles
        .get_default_face_mesh_tesselation_style())
    solutions.drawing_utils.draw_landmarks(
        image=annotated_image,
        landmark_list=face_landmarks_proto,
        connections=mp.solutions.face_mesh.FACEMESH_CONTOURS,
        landmark_drawing_spec=None,
        connection_drawing_spec=mp.solutions.drawing_styles
        .get_default_face_mesh_contours_style())
    solutions.drawing_utils.draw_landmarks(
        image=annotated_image,
        landmark_list=face_landmarks_proto,
        connections=mp.solutions.face_mesh.FACEMESH_IRISES,
          landmark_drawing_spec=None,
          connection_drawing_spec=mp.solutions.drawing_styles
          .get_default_face_mesh_iris_connections_style())

  return annotated_image

def plot_face_blendshapes_bar_graph(face_blendshapes):
  # Extract the face blendshapes category names and scores.
  face_blendshapes_names = [face_blendshapes_category.category_name for face_blendshapes_category in face_blendshapes]
  face_blendshapes_scores = [face_blendshapes_category.score for face_blendshapes_category in face_blendshapes]
  # The blendshapes are ordered in decreasing score value.
  face_blendshapes_ranks = range(len(face_blendshapes_names))

  fig, ax = plt.subplots(figsize=(12, 12))
  bar = ax.barh(face_blendshapes_ranks, face_blendshapes_scores, label=[str(x) for x in face_blendshapes_ranks])
  ax.set_yticks(face_blendshapes_ranks, face_blendshapes_names)
  ax.invert_yaxis()

  # Label each bar with values
  for score, patch in zip(face_blendshapes_scores, bar.patches):
    plt.text(patch.get_x() + patch.get_width(), patch.get_y(), f"{score:.4f}", va="top")

  ax.set_xlabel('Score')
  ax.set_title('Face Blendshapes')
  plt.tight_layout()
  plt.show()

# extract landmarks to csv
def extract_and_save_landmarks(c_df, img_cv2, idx):
    rgb_frame = mp.Image(image_format=mp.ImageFormat.SRGB, data=img_cv2)
    detection_result = detector_mesh.detect(rgb_frame)
    df = pd.DataFrame([(idx, p.name, index, point.x, point.y, point.z) for index, point in enumerate(detection_result.face_landmarks[0])], columns=['image_idx', 'image_name', 'landmark_idx', 'x', 'y', 'z'])
    c_df = c_df.append(df)
    return c_df