Code in order to transform our dataset from grayscale images of peoples faces into feature maps which we will train a classifier on. Only needs to be run once.

In [1]:
import os
import csv
import numpy as np
import cv2
import mediapipe as mp
import kagglehub

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
mp_face_mesh = mp.solutions.face_mesh

# Indices from the mediapipe_landmarks_detection gist and common docs
FACE_LEFT_CHEEK = 234
FACE_RIGHT_CHEEK = 454

MOUTH_LEFT = 61
MOUTH_RIGHT = 291
MOUTH_TOP_INNER = 13
MOUTH_BOTTOM_INNER = 14
NOSE_TIP = 1

# Eyes
RIGHT_EYE_OUTER = 33
RIGHT_EYE_INNER = 133
RIGHT_EYE_TOP = 159
RIGHT_EYE_BOTTOM = 145

LEFT_EYE_OUTER = 263
LEFT_EYE_INNER = 362
LEFT_EYE_TOP = 386
LEFT_EYE_BOTTOM = 374

# Eyebrow groups from the gist you saw
LEFT_EYE_INDICES = [362, 382, 381, 380, 374, 373, 390, 249, 263, 466,
                    388, 387, 386, 385, 384, 398]
RIGHT_EYE_INDICES = [33, 7, 163, 144, 145, 153, 154, 155, 133, 173,
                     157, 158, 159, 160, 161, 246]

LEFT_EYEBROW_INDICES = [336, 296, 334, 293, 300, 276, 283, 282, 295, 285]
RIGHT_EYEBROW_INDICES = [70, 63, 105, 66, 107, 55, 65, 52, 53, 46]


In [3]:

def make_feature_array(feature_dict):
    """
    Takes a dict {name: value} and returns:
      - values as np.float32 array
      - list of names in matching order
    """
    names = list(feature_dict.keys())
    values = np.array(list(feature_dict.values()), dtype=np.float32)
    return values, names


def _dist(p1, p2):
    return np.linalg.norm(p1 - p2)


def _slope(p_from, p_to):
    dx = p_to[0] - p_from[0]
    dy = p_to[1] - p_from[1]
    return dy / (dx + 1e-6)


def _signed_point_line_distance_2d(p, a, b):
    """
    Signed distance from point p to the line through a and b in xy.
    Uses 2D cross product for sign.
    """
    p2 = p[:2]
    a2 = a[:2]
    b2 = b[:2]
    v = b2 - a2
    w = p2 - a2
    cross = v[0] * w[1] - v[1] * w[0]
    denom = np.linalg.norm(v) + 1e-6
    return cross / denom


def _get_inner_outer_brow_points(pts, indices, side):
    """
    side: 'left' or 'right' from subject perspective.
    Uses x coordinate to choose inner vs outer point.
    """
    brow_pts = pts[indices]          # shape (N, 3)
    xs = brow_pts[:, 0]

    if side == "left":
        # Subject left brow is on viewer right
        # inner is closer to center (smaller x), outer larger x
        inner_local = int(xs.argmin())
        outer_local = int(xs.argmax())
    else:
        # Subject right brow is on viewer left
        # inner is closer to center (larger x), outer smaller x
        inner_local = int(xs.argmax())
        outer_local = int(xs.argmin())

    inner_pt = brow_pts[inner_local]
    outer_pt = brow_pts[outer_local]
    return inner_pt, outer_pt


def extract_features(landmarks):
    """
    landmarks: list of 468 mediapipe landmarks (with .x, .y, .z)
    Returns:
        features: 1D np.array of floats
        feature_names: list of strings in matching order
    """
    pts = np.array([[lm.x, lm.y, lm.z] for lm in landmarks])

    # basic reference distances
    left_cheek = pts[FACE_LEFT_CHEEK]
    right_cheek = pts[FACE_RIGHT_CHEEK]
    face_width = _dist(left_cheek, right_cheek) + 1e-6

    forehead = pts[10]
    chin = pts[152]
    face_height = _dist(forehead, chin) + 1e-6

    # mouth points
    mouth_left = pts[MOUTH_LEFT]
    mouth_right = pts[MOUTH_RIGHT]
    mouth_top = pts[MOUTH_TOP_INNER]
    mouth_bottom = pts[MOUTH_BOTTOM_INNER]
    nose_tip = pts[NOSE_TIP]
    mouth_center = 0.5 * (mouth_left + mouth_right)

    # base mouth features
    mouth_width = _dist(mouth_left, mouth_right) / face_width
    mouth_height = _dist(mouth_top, mouth_bottom) / face_width
    mouth_ar = mouth_height / (mouth_width + 1e-6)

    mouth_corner_asym = (mouth_left[1] - mouth_right[1])
    mouth_center_nose_dist = _dist(mouth_center, nose_tip) / face_height

    slope_left = _slope(mouth_center, mouth_left)
    slope_right = _slope(mouth_center, mouth_right)
    slope_mean = 0.5 * (slope_left + slope_right)
    slope_diff = slope_left - slope_right

    # extra mouth geometry
    lip_thickness_mid = _dist(mouth_top, mouth_bottom) / face_height

    mouth_corner_top_left = _dist(mouth_left, mouth_top) / face_height
    mouth_corner_top_right = _dist(mouth_right, mouth_top) / face_height
    mouth_corner_bottom_left = _dist(mouth_left, mouth_bottom) / face_height
    mouth_corner_bottom_right = _dist(mouth_right, mouth_bottom) / face_height

    mouth_corner_nose_left = _dist(mouth_left, nose_tip) / face_height
    mouth_corner_nose_right = _dist(mouth_right, nose_tip) / face_height

    upper_lip_curvature = _signed_point_line_distance_2d(
        mouth_top, mouth_left, mouth_right
    ) / (face_height + 1e-6)
    lower_lip_curvature = _signed_point_line_distance_2d(
        mouth_bottom, mouth_left, mouth_right
    ) / (face_height + 1e-6)

    # eyes
    left_eye_outer = pts[LEFT_EYE_OUTER]
    left_eye_inner = pts[LEFT_EYE_INNER]
    left_eye_top = pts[LEFT_EYE_TOP]
    left_eye_bottom = pts[LEFT_EYE_BOTTOM]

    right_eye_outer = pts[RIGHT_EYE_OUTER]
    right_eye_inner = pts[RIGHT_EYE_INNER]
    right_eye_top = pts[RIGHT_EYE_TOP]
    right_eye_bottom = pts[RIGHT_EYE_BOTTOM]

    left_eye_width = _dist(left_eye_outer, left_eye_inner) / face_width
    left_eye_height = _dist(left_eye_top, left_eye_bottom) / face_width

    right_eye_width = _dist(right_eye_outer, right_eye_inner) / face_width
    right_eye_height = _dist(right_eye_top, right_eye_bottom) / face_width

    left_ear = left_eye_height / (left_eye_width + 1e-6)
    right_ear = right_eye_height / (right_eye_width + 1e-6)
    mean_ear = 0.5 * (left_ear + right_ear)
    ear_diff = left_ear - right_ear

    # eye centers and cheek distances
    left_eye_center = pts[LEFT_EYE_INDICES].mean(axis=0)
    right_eye_center = pts[RIGHT_EYE_INDICES].mean(axis=0)

    left_cheek_eye = _dist(left_cheek, left_eye_center) / face_width
    right_cheek_eye = _dist(right_cheek, right_eye_center) / face_width
    cheek_eye_mean = 0.5 * (left_cheek_eye + right_cheek_eye)
    cheek_eye_diff = left_cheek_eye - right_cheek_eye

    # eyebrow to eye distances (existing mean based)
    left_eye_mean_y = pts[LEFT_EYE_INDICES][:, 1].mean()
    right_eye_mean_y = pts[RIGHT_EYE_INDICES][:, 1].mean()

    left_brow_mean_y = pts[LEFT_EYEBROW_INDICES][:, 1].mean()
    right_brow_mean_y = pts[RIGHT_EYEBROW_INDICES][:, 1].mean()

    left_brow_eye = (left_brow_mean_y - left_eye_mean_y) / (face_height + 1e-6)
    right_brow_eye = (right_brow_mean_y - right_eye_mean_y) / (face_height + 1e-6)
    brow_eye_mean = 0.5 * (left_brow_eye + right_brow_eye)
    brow_eye_diff = left_brow_eye - right_brow_eye

    # inner vs outer brow points and richer brow geometry
    left_inner_brow, left_outer_brow = _get_inner_outer_brow_points(
        pts, LEFT_EYEBROW_INDICES, side="left"
    )
    right_inner_brow, right_outer_brow = _get_inner_outer_brow_points(
        pts, RIGHT_EYEBROW_INDICES, side="right"
    )

    inner_brow_dist = _dist(left_inner_brow, right_inner_brow) / face_width

    left_brow_tilt = _slope(left_inner_brow, left_outer_brow)
    right_brow_tilt = _slope(right_inner_brow, right_outer_brow)
    brow_tilt_diff = left_brow_tilt - right_brow_tilt

    left_brow_mid = pts[LEFT_EYEBROW_INDICES].mean(axis=0)
    right_brow_mid = pts[RIGHT_EYEBROW_INDICES].mean(axis=0)

    left_brow_curv = _signed_point_line_distance_2d(
        left_brow_mid, left_inner_brow, left_outer_brow
    ) / (face_height + 1e-6)
    right_brow_curv = _signed_point_line_distance_2d(
        right_brow_mid, right_inner_brow, right_outer_brow
    ) / (face_height + 1e-6)
    brow_curv_mean = 0.5 * (left_brow_curv + right_brow_curv)
    brow_curv_diff = left_brow_curv - right_brow_curv

    left_inner_brow_eye = (left_inner_brow[1] - left_eye_center[1]) / (face_height + 1e-6)
    left_outer_brow_eye = (left_outer_brow[1] - left_eye_center[1]) / (face_height + 1e-6)
    right_inner_brow_eye = (right_inner_brow[1] - right_eye_center[1]) / (face_height + 1e-6)
    right_outer_brow_eye = (right_outer_brow[1] - right_eye_center[1]) / (face_height + 1e-6)

    inner_brow_eye_mean = 0.5 * (left_inner_brow_eye + right_inner_brow_eye)
    outer_brow_eye_mean = 0.5 * (left_outer_brow_eye + right_outer_brow_eye)
    inner_outer_brow_eye_diff = inner_brow_eye_mean - outer_brow_eye_mean

    # global geometry
    face_aspect = face_height / face_width

    v = right_cheek - left_cheek
    head_tilt_angle = np.arctan2(v[1], v[0])

    mouth_corner_asym_norm = mouth_corner_asym / (face_height + 1e-6)

    head_tilt_sin = np.sin(head_tilt_angle)
    head_tilt_cos = np.cos(head_tilt_angle)

    # build dictionary of all features so names and values stay aligned
    feature_dict = {
        # mouth
        "mouth_width": mouth_width,
        "mouth_height": mouth_height,
        "mouth_ar": mouth_ar,
        "mouth_corner_asym_norm": mouth_corner_asym_norm,
        "mouth_center_nose_dist": mouth_center_nose_dist,
        "mouth_slope_left": slope_left,
        "mouth_slope_right": slope_right,
        "mouth_slope_mean": slope_mean,
        "mouth_slope_diff": slope_diff,

        # extra mouth
        "lip_thickness_mid": lip_thickness_mid,
        "mouth_corner_top_left": mouth_corner_top_left,
        "mouth_corner_top_right": mouth_corner_top_right,
        "mouth_corner_bottom_left": mouth_corner_bottom_left,
        "mouth_corner_bottom_right": mouth_corner_bottom_right,
        "mouth_corner_nose_left": mouth_corner_nose_left,
        "mouth_corner_nose_right": mouth_corner_nose_right,
        "upper_lip_curvature": upper_lip_curvature,
        "lower_lip_curvature": lower_lip_curvature,

        # eyes
        "left_eye_width": left_eye_width,
        "left_eye_height": left_eye_height,
        "right_eye_width": right_eye_width,
        "right_eye_height": right_eye_height,
        "left_ear": left_ear,
        "right_ear": right_ear,
        "mean_ear": mean_ear,
        "ear_diff": ear_diff,

        # eye-cheek
        "left_cheek_eye": left_cheek_eye,
        "right_cheek_eye": right_cheek_eye,
        "cheek_eye_mean": cheek_eye_mean,
        "cheek_eye_diff": cheek_eye_diff,

        # brows
        "left_brow_eye": left_brow_eye,
        "right_brow_eye": right_brow_eye,
        "brow_eye_mean": brow_eye_mean,
        "brow_eye_diff": brow_eye_diff,

        # brow geometry
        "inner_brow_dist": inner_brow_dist,
        "left_brow_tilt": left_brow_tilt,
        "right_brow_tilt": right_brow_tilt,
        "brow_tilt_diff": brow_tilt_diff,
        "left_brow_curv": left_brow_curv,
        "right_brow_curv": right_brow_curv,
        "brow_curv_mean": brow_curv_mean,
        "brow_curv_diff": brow_curv_diff,
        "left_inner_brow_eye": left_inner_brow_eye,
        "left_outer_brow_eye": left_outer_brow_eye,
        "right_inner_brow_eye": right_inner_brow_eye,
        "right_outer_brow_eye": right_outer_brow_eye,
        "inner_brow_eye_mean": inner_brow_eye_mean,
        "outer_brow_eye_mean": outer_brow_eye_mean,
        "inner_outer_brow_eye_diff": inner_outer_brow_eye_diff,

        # global
        "face_aspect": face_aspect,
        "head_tilt_angle": head_tilt_angle,
        "head_tilt_sin": head_tilt_sin,
        "head_tilt_cos": head_tilt_cos,
    }

    features, feature_names = make_feature_array(feature_dict)
    return features, feature_names


In [4]:

def get_dataset_paths():
    path = kagglehub.dataset_download("jonathanoheix/face-expression-recognition-dataset")
    train_dir = os.path.join(path, "images", "train")
    val_dir = os.path.join(path, "images", "validation")
    return train_dir, val_dir

def iterate_images(root_dir):
    class_names = sorted([
        d for d in os.listdir(root_dir)
        if os.path.isdir(os.path.join(root_dir, d))
    ])

    for label in class_names:
        class_dir = os.path.join(root_dir, label)
        for fname in os.listdir(class_dir):
            if not fname.lower().endswith((".png", ".jpg", ".jpeg")):
                continue
            full_path = os.path.join(class_dir, fname)
            yield full_path, label


def preprocess_to_csv(root_dir, output_csv, img_size=48):
    face_mesh = mp_face_mesh.FaceMesh(
        static_image_mode=True,
        max_num_faces=1,
        refine_landmarks=True,
        min_detection_confidence=0.5,
    )

    rows = []
    header = None
    count = 0

    for img_path, label in iterate_images(root_dir):
        img_gray = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        if img_gray is None:
            continue

        img_gray = cv2.resize(img_gray, (img_size, img_size))
        img_rgb = cv2.cvtColor(img_gray, cv2.COLOR_GRAY2RGB)

        result = face_mesh.process(img_rgb)
        if not result.multi_face_landmarks:
            continue

        landmarks = result.multi_face_landmarks[0].landmark

        # extract both (features, feature_names)
        feat_values, feat_names = extract_features(landmarks)

        # Build header ONCE using descriptive names
        if header is None:
            header = feat_names + ["label"]

        rows.append(feat_values.tolist() + [label])
        count += 1

        if count % 500 == 0:
            print(f"Processed {count} images from {root_dir}...")

    if not rows:
        print(f"No rows collected for {root_dir}, nothing to write.")
        return

    print(f"Writing {len(rows)} rows to {output_csv}")

    with open(output_csv, "w", newline="") as f:
        writer = csv.writer(f)
        writer.writerow(header)  
        writer.writerows(rows)


In [5]:
train_dir, val_dir = get_dataset_paths()
preprocess_to_csv(train_dir, "../data/train_features.csv")
preprocess_to_csv(val_dir, "../data/val_features.csv")

I0000 00:00:1764916129.240997 3565066 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 90.5), renderer: Apple M1
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
W0000 00:00:1764916129.259933 3568611 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1764916129.268225 3568615 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1764916129.293453 3568618 landmark_projection_calculator.cc:186] Using NORM_RECT without IMAGE_DIMENSIONS is only supported for the square ROI. Provide IMAGE_DIMENSIONS or use PROJECTION_MATRIX.


Processed 500 images from /Users/afifahhadi/.cache/kagglehub/datasets/jonathanoheix/face-expression-recognition-dataset/versions/1/images/train...
Processed 1000 images from /Users/afifahhadi/.cache/kagglehub/datasets/jonathanoheix/face-expression-recognition-dataset/versions/1/images/train...
Processed 1500 images from /Users/afifahhadi/.cache/kagglehub/datasets/jonathanoheix/face-expression-recognition-dataset/versions/1/images/train...
Processed 2000 images from /Users/afifahhadi/.cache/kagglehub/datasets/jonathanoheix/face-expression-recognition-dataset/versions/1/images/train...
Processed 2500 images from /Users/afifahhadi/.cache/kagglehub/datasets/jonathanoheix/face-expression-recognition-dataset/versions/1/images/train...
Processed 3000 images from /Users/afifahhadi/.cache/kagglehub/datasets/jonathanoheix/face-expression-recognition-dataset/versions/1/images/train...
Processed 3500 images from /Users/afifahhadi/.cache/kagglehub/datasets/jonathanoheix/face-expression-recognition-

I0000 00:00:1764916293.252725 3565066 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 90.5), renderer: Apple M1
W0000 00:00:1764916293.269102 3573408 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1764916293.276985 3573408 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


Processed 500 images from /Users/afifahhadi/.cache/kagglehub/datasets/jonathanoheix/face-expression-recognition-dataset/versions/1/images/validation...
Processed 1000 images from /Users/afifahhadi/.cache/kagglehub/datasets/jonathanoheix/face-expression-recognition-dataset/versions/1/images/validation...
Processed 1500 images from /Users/afifahhadi/.cache/kagglehub/datasets/jonathanoheix/face-expression-recognition-dataset/versions/1/images/validation...
Processed 2000 images from /Users/afifahhadi/.cache/kagglehub/datasets/jonathanoheix/face-expression-recognition-dataset/versions/1/images/validation...
Processed 2500 images from /Users/afifahhadi/.cache/kagglehub/datasets/jonathanoheix/face-expression-recognition-dataset/versions/1/images/validation...
Processed 3000 images from /Users/afifahhadi/.cache/kagglehub/datasets/jonathanoheix/face-expression-recognition-dataset/versions/1/images/validation...
Processed 3500 images from /Users/afifahhadi/.cache/kagglehub/datasets/jonathanohei

In [6]:
import pandas as pd

train_df = pd.read_csv("../data/train_features.csv")
val_df = pd.read_csv("../data/val_features.csv")

X_train = train_df.drop("label", axis=1).values.astype(np.float32)
y_train = train_df["label"].values

X_val = val_df.drop("label", axis=1).values.astype(np.float32)
y_val = val_df["label"].values

# Save as .npy for easy loading
os.makedirs("../data", exist_ok=True)
np.save("../data/X_train.npy", X_train)
np.save("../data/y_train.npy", y_train)
np.save("../data/X_val.npy", X_val)
np.save("../data/y_val.npy", y_val)

In [7]:
train_df.columns

Index(['mouth_width', 'mouth_height', 'mouth_ar', 'mouth_corner_asym_norm',
       'mouth_center_nose_dist', 'mouth_slope_left', 'mouth_slope_right',
       'mouth_slope_mean', 'mouth_slope_diff', 'lip_thickness_mid',
       'mouth_corner_top_left', 'mouth_corner_top_right',
       'mouth_corner_bottom_left', 'mouth_corner_bottom_right',
       'mouth_corner_nose_left', 'mouth_corner_nose_right',
       'upper_lip_curvature', 'lower_lip_curvature', 'left_eye_width',
       'left_eye_height', 'right_eye_width', 'right_eye_height', 'left_ear',
       'right_ear', 'mean_ear', 'ear_diff', 'left_cheek_eye',
       'right_cheek_eye', 'cheek_eye_mean', 'cheek_eye_diff', 'left_brow_eye',
       'right_brow_eye', 'brow_eye_mean', 'brow_eye_diff', 'inner_brow_dist',
       'left_brow_tilt', 'right_brow_tilt', 'brow_tilt_diff', 'left_brow_curv',
       'right_brow_curv', 'brow_curv_mean', 'brow_curv_diff',
       'left_inner_brow_eye', 'left_outer_brow_eye', 'right_inner_brow_eye',
       'right_

In [8]:
train_df["label"].value_counts()

label
happy       6899
neutral     4819
sad         4409
fear        3699
angry       3544
surprise    3036
disgust      378
Name: count, dtype: int64

In [16]:
train_df[train_df["label"] == "disgust"]

Unnamed: 0,f0,f1,f2,f3,f4,f5,f6,f7,f8,f9,...,f14,f15,f16,f17,f18,f19,f20,f21,f22,label
3544,0.413178,0.192618,0.466185,-0.057908,0.333224,0.164280,0.164278,0.164279,2.021374e-06,0.188651,...,0.197444,0.225921,0.056953,-0.147598,-0.130699,-0.139149,-0.016900,1.155492,0.148629,disgust
3545,0.366580,0.002975,0.008116,-0.005619,0.274076,0.018910,0.018910,0.018910,2.628474e-07,0.229015,...,0.332040,0.316955,-0.030171,-0.131915,-0.099154,-0.115535,-0.032760,1.179423,-0.123804,disgust
3546,0.368501,0.018032,0.048933,0.000255,0.268026,-0.000843,-0.000843,-0.000843,-1.107547e-08,0.197324,...,0.315069,0.308115,-0.013909,-0.071679,-0.119965,-0.095822,0.048286,1.209658,-0.038514,disgust
3547,0.352277,0.082331,0.233710,0.023231,0.277251,-0.084161,-0.084159,-0.084160,-1.246755e-06,0.205144,...,0.320739,0.301902,-0.037674,-0.095715,-0.093208,-0.094462,-0.002507,1.252114,-0.079918,disgust
3548,0.347480,0.075914,0.218471,0.049908,0.283811,-0.179168,-0.179166,-0.179167,-2.543680e-06,0.201779,...,0.318796,0.324344,0.011097,-0.089835,-0.064179,-0.077007,-0.025655,1.150373,-0.201656,disgust
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3917,0.450030,0.134156,0.298103,-0.017895,0.296697,0.054205,0.054205,0.054205,5.784295e-07,0.212070,...,0.300434,0.260787,-0.079293,-0.080248,-0.075953,-0.078101,-0.004294,1.332551,0.028317,disgust
3918,0.450030,0.134156,0.298103,-0.017895,0.296697,0.054205,0.054205,0.054205,5.784295e-07,0.212070,...,0.300434,0.260787,-0.079293,-0.080248,-0.075953,-0.078101,-0.004294,1.332551,0.028317,disgust
3919,0.284852,0.016211,0.056910,-0.001232,0.307201,0.004811,0.004811,0.004811,7.919468e-08,0.203227,...,0.250468,0.295314,0.089692,-0.111745,-0.098683,-0.105214,-0.013062,1.076356,0.012680,disgust
3920,0.419064,0.104245,0.248757,-0.018916,0.307016,0.053849,0.053848,0.053848,5.752945e-07,0.194829,...,0.333145,0.346356,0.026423,-0.082563,-0.085388,-0.083975,0.002825,1.176775,-0.009330,disgust
