In [36]:
import mediapipe as mp
import cv2
import numpy as np
from tqdm import tqdm
import pandas as pd

====================================================
# VIDEO UPLOAD
====================================================

In [37]:
video_path = "/Users/louiscoussement/code/VERA/data/raw/myvideo.mp4"
cap = cv2.VideoCapture(video_path)

if not cap.isOpened():
    print("‚ùå Error loading video")
else:
    print("‚úÖ Video loaded")

‚úÖ Video loaded


====================================================
# Initialize MODEL + FACE DETECTION CONFIRMATION + processed video
====================================================

In [38]:
face_mesh = mp.solutions.face_mesh.FaceMesh(
    refine_landmarks=True,
    max_num_faces=1,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5
)

cap = cv2.VideoCapture(video_path)
ret, frame = cap.read()

if not ret:
    print("‚ùå Could not read first frame")
else:
    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = face_mesh.process(rgb)

    if results.multi_face_landmarks:
        print("‚úÖ FaceMesh detected a face")
    else:
        print("‚ùå FaceMesh did NOT detect a face")

‚úÖ FaceMesh detected a face


I0000 00:00:1764776579.133124       1 gl_context.cc:344] GL version: 2.1 (2.1 Metal - 89.4), renderer: Apple M2


In [39]:
# =========================================================
# 1. INPUT VIDEO
# =========================================================
video_path = "/Users/louiscoussement/code/VERA/data/raw/myvideo.mp4"
output_path = "/Users/louiscoussement/code/VERA/data/processed/debug_facemesh_minimal.mp4"

cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
    raise ValueError("‚ùå Error loading video")
print("‚úÖ Video loaded")

fps = cap.get(cv2.CAP_PROP_FPS)
width  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Video writer
out = cv2.VideoWriter(
    output_path,
    cv2.VideoWriter_fourcc(*'mp4v'),
    fps,
    (width, height)
)

# =========================================================
# 2. INITIALIZE MEDIAPIPE
# =========================================================
mp_face_mesh = mp.solutions.face_mesh
mp_drawing = mp.solutions.drawing_utils

face_mesh = mp_face_mesh.FaceMesh(
    max_num_faces=1,
    refine_landmarks=True,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5
)

# =========================================================
# 3. LANDMARK GROUPS
# =========================================================
HEAD_POINTS = [234, 454, 1]    # ears + nose
GAZE_POINTS = [468, 473, 1]    # iris + nose
EXPRESS_POINTS = [
    55, 65, 52, 285, 295, 282,
    159, 145, 386, 374,
    13, 14, 61, 291, 234, 454
]
SMILE_POINTS = [61, 291]

COLOR_HEAD   = (255, 0,   0)
COLOR_GAZE   = (0,   255, 255)
COLOR_EXP    = (0,   255, 0)
COLOR_SMILE  = (0,   0,   255)

# =========================================================
# 4. PROCESS + DRAW
# =========================================================
while True:
    ret, frame = cap.read()
    if not ret:
        break

    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = face_mesh.process(rgb)

    annotated = frame.copy()
    if results.multi_face_landmarks:
        lm = results.multi_face_landmarks[0].landmark
        h, w, _ = frame.shape

        # Head landmarks
        for i in HEAD_POINTS:
            cv2.circle(annotated, (int(lm[i].x*w), int(lm[i].y*h)), 3, COLOR_HEAD, -1)

        # Gaze landmarks
        for i in GAZE_POINTS:
            cv2.circle(annotated, (int(lm[i].x*w), int(lm[i].y*h)), 3, COLOR_GAZE, -1)

        # Expressiveness landmarks
        for i in EXPRESS_POINTS:
            cv2.circle(annotated, (int(lm[i].x*w), int(lm[i].y*h)), 3, COLOR_EXP, -1)

        # Smile landmarks
        for i in SMILE_POINTS:
            cv2.circle(annotated, (int(lm[i].x*w), int(lm[i].y*h)), 4, COLOR_SMILE, -1)

    out.write(annotated)

cap.release()
out.release()

print("üé• Debug video saved to:", output_path)


‚úÖ Video loaded


I0000 00:00:1764776579.826248       1 gl_context.cc:344] GL version: 2.1 (2.1 Metal - 89.4), renderer: Apple M2


üé• Debug video saved to: /Users/louiscoussement/code/VERA/data/processed/debug_facemesh_minimal.mp4


====================================================
# 1 - FUNCTIONS
====================================================

### Head stability

In [40]:
def compute_head_center(lm):
    """
    Compute the 3D head center using the left and right ear landmarks.
    """
    left_ear  = np.array([lm[234].x, lm[234].y, lm[234].z])
    right_ear = np.array([lm[454].x, lm[454].y, lm[454].z])
    return (left_ear + right_ear) / 2

### Gaze direction consistency

In [41]:
def compute_iris_centers(lm):
    """
    Compute the 3D midpoint between the left and right iris centers.
    """
    left_iris = np.array([lm[468].x, lm[468].y, lm[468].z])
    right_iris = np.array([lm[473].x, lm[473].y, lm[473].z])
    return (left_iris + right_iris) / 2

def compute_face_center(lm):
    """
    Compute a stable 3D anchor point on the face, using the nose bridge landmark.
    """
    nose = np.array([lm[1].x, lm[1].y, lm[1].z])
    return nose

### Smile activation 

In [42]:
def compute_smile_activation(lm):
    """
    Compute smile activation as the Euclidean distance
    between left and right lip corners.
    """
    left = np.array([lm[61].x, lm[61].y, lm[61].z])
    right = np.array([lm[291].x, lm[291].y, lm[291].z])

    return np.linalg.norm(left - right)

## Video metrics extraction

In [45]:
features = []

fps = cap.get(cv2.CAP_PROP_FPS)
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

prev_head_center = None
prev_gaze = None

cap = cv2.VideoCapture(video_path)

for idx in tqdm(range(frame_count)):
    ret, frame = cap.read()
    if not ret:
        break

    timestamp = idx / fps
    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = face_mesh.process(rgb)

    if results.multi_face_landmarks:
        lm = results.multi_face_landmarks[0].landmark

        # ----- HEAD STABILITY -----
        head_center = compute_head_center(lm)
        head_speed = np.linalg.norm(head_center - prev_head_center) if prev_head_center is not None else np.nan
        prev_head_center = head_center

        # ----- GAZE CONSISTENCY -----
        iris_center = compute_iris_centers(lm)
        face_center = compute_face_center(lm)

        gaze_vec = iris_center - face_center
        gaze_vec = gaze_vec / (np.linalg.norm(gaze_vec) + 1e-6)

        dg = np.linalg.norm(gaze_vec - prev_gaze) if prev_gaze is not None else np.nan
        prev_gaze = gaze_vec

        # ----- SMILE ACTIVATION -----
        smile = compute_smile_activation(lm)

    else:
        head_speed = np.nan
        dg = np.nan
        smile = np.nan

    # Append ALL features
    features.append({
        "timestamp": timestamp,
        "head_speed": head_speed,
        "gaze_dg": dg,
        "smile": smile
    })


 98%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä| 1741/1774 [00:12<00:00, 138.57it/s]


====================================================
# 2 - SHOW TIMESTAMPS OF THE VIDEO PER SEC
====================================================

In [46]:
df = pd.DataFrame(features).set_index("timestamp")
df["second"] = df.index.astype(int)
df

Unnamed: 0_level_0,head_speed,gaze_dg,smile,second
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.000000,,,0.036539,0
0.033378,0.010674,0.163421,0.036186,0
0.066755,0.003589,0.008254,0.036130,0
0.100133,0.002457,0.008508,0.036478,0
0.133510,0.002069,0.009971,0.036276,0
...,...,...,...,...
57.943420,0.000086,0.013637,0.039874,57
57.976797,0.000837,0.024552,0.039613,57
58.010175,0.000935,0.003891,0.038994,58
58.043553,0.000731,0.012525,0.038563,58


====================================================
# 3 - DEFINE SPEED VARIANCE and MEAN PER SECONDE
#### Speed variance -> Head activation and Gaze consistency
#### Mean -> smile activation
====================================================


In [47]:
# HEAD JITTER
jitter_head_1s = df.groupby("second")["head_speed"].var().fillna(0) #replace NA values with 0

# GAZE JITTER
jitter_gaze_1s = df.groupby("second")["gaze_dg"].var().fillna(0) #replace NA values with 0

# SMILE ACTIVATION
smile_1s = df.groupby("second")["smile"].mean().fillna(0) #replace NA values with 0

====================================================
# 4 - DISPLAY OBSERVATIONS ON A 5 SEC WINDOWS/1SEC SLICING
====================================================


In [48]:
def sliding_windows(series, window=5):
    rows = []
    seconds = series.index.values

    for start in seconds:
        end = start + window
        win = series.loc[start:end]

        if len(win) == window + 1:  # require full window
            rows.append({
                "start_sec": start,
                "end_sec": end,
                "value": win.mean()
            })

    return pd.DataFrame(rows)

In [49]:
df_jitter_5s = sliding_windows(jitter_head_1s)
df_gaze_5s   = sliding_windows(jitter_gaze_1s)
df_smile_5s  = sliding_windows(smile_1s)

====================================================
# 5 - SCORING
====================================================


## 5.1 - Head Stability score

In [50]:
BASELINE_JITTER_OPTIMAL = 0.0001
# "ideal" head/gaze movement for stable talking head (normalized coords)

BASELINE_JITTER_RANGE = 0.0003
# range inside which movement is acceptable

In [51]:
# RELATIVE (per-window)
z = (df_jitter_5s["value"] - df_jitter_5s["value"].mean()) / df_jitter_5s["value"].std()
df_jitter_5s["rel_score"] = 1 / (1 + np.exp(z))   # inverted

# ABSOLUTE (video-level)
jitter_abs = df_jitter_5s["value"].mean()

abs_head_score = 1 / (
    1 + np.exp((jitter_abs - BASELINE_JITTER_OPTIMAL) / BASELINE_JITTER_RANGE)
)

# FINAL SCORE
global_head_stability = 0.5 * abs_head_score + 0.5 * df_jitter_5s["rel_score"].mean()
print(global_head_stability)

0.5520634778493204


In [52]:
# RELATIVE score (your code)
#z = (df_jitter_5s["value"] - df_jitter_5s["value"].mean()) / df_jitter_5s["value"].std()
#df_jitter_5s["score"] = 1 / (1 + np.exp(z))

#global_head_stability = df_jitter_5s["score"].mean()
#print(global_head_stability)

‚â• 0.55 ‚Äî Highly stable head posture (Excellent)
‚ÄúControlled, confident, composed delivery.‚Äù
Smooth, steady micro-movement
No visible shaking or restlessness
Strong presence on camera or stage
Common among experienced speakers, anchors, presenters
This is the ‚Äúprofessional-level composure‚Äù zone.

0.45 ‚Äì 0.55 ‚Äî Natural and healthy stability (Good)
‚ÄúBalanced movement: expressive but steady.‚Äù
Small natural adjustments
No perceptible instability
Not too stiff, not too mobile
Ideal for pitches and interviews
Most good communicators fall here.

0.30 ‚Äì 0.45 ‚Äî Mild instability (Weak)
‚ÄúSubtle micro-fidgeting or restlessness.‚Äù
Slight shaking or frequent small corrections
Noticeable under stress
Does not ruin communication, but is visible
Typical of moderately nervous speakers.

‚â§ 0.30 ‚Äî Unstable head movement (Poor)
‚ÄúDistracting, restless, or shaky posture.‚Äù
Frequent jitter or micro-jerking
Strong nervous energy or lack of control
Reduces perceived credibility and presence
This is the zone needing corrective feedback.

## üß† Global Head Stability Score Interpretation Guide

This score assesses the **control and smoothness of the speaker's head movement**, specifically measuring the **erraticness or jerkiness (Jitter)** of the head's position. The score is inverted, where a higher score indicates better stability, based on a target optimal jitter of $0.0001$.

**Score Context:**
* The score is bounded between **0 and 1**.
* **A higher score indicates greater head stability (less jitter).**

| Score Range | Behavioral Category | Summary and Impact | Key Behavioral Indicators |
| :---: | :--- | :--- | :--- |
| **$\geq 0.75$** | **High Stability & Control (Excellent)** | **"Head movements are minimal, smooth, and highly intentional."** Jitter is significantly below the $0.0001$ optimal threshold, demonstrating maximum composure. | * Head is often held still or moves deliberately to punctuate points.* No sudden, involuntary jerks or shaking.* **Projects high confidence, attentiveness, and focus.** |
| **$0.55 ‚Äì 0.75$** | **Good Stability (Good)** | **"Movement is mostly controlled, meeting the stability baseline with minor shifts."** Overall jitter is acceptable, and movement is not distracting. | * Slight head nods or turns are integrated smoothly.* Movement, when present, is slow and clear.* **Suggests a calm, professional delivery without visible nervousness.** |
| **$0.35 ‚Äì 0.55$** | **Noticeable Unsteadiness (Weak)** | **"Jitter is high enough to be noticeable, suggesting slight discomfort or lack of control."** The head is unstable compared to the $0.0001$ optimal threshold. | * Frequent, small, shifting movements of the head or neck.* Head frequently adjusts position for no apparent communicative reason.* **May indicate mild anxiety, restlessness, or low preparation.** |
| **$\leq 0.35$** | **Excessive Jitter/Erratic Movement (Poor)** | **"Highly erratic, jerky, or chaotic movement that severely distracts the viewer."** Jitter is far above the acceptable threshold. | * Constant visible tremors, fidgeting, or rapid, unintentional head adjustments.* Head position is highly inconsistent between frames.* **Strongly suggests high tension, stress, or distraction.** |

## 5.2 - Gaze consistency score

In [53]:
BASELINE_GAZE_OPTIMAL = 0.0035   # "natural" gaze micro-movement
BASELINE_GAZE_VAR     = 0.00002  # tolerance

In [54]:
# RELATIVE (per-window)
z = (df_gaze_5s["value"] - df_gaze_5s["value"].mean()) / df_gaze_5s["value"].std()
df_gaze_5s["rel_score"] = 1 / (1 + np.exp(z))

# ABSOLUTE (video-level)
gaze_abs = df_gaze_5s["value"].mean()

abs_gaze_score = 1 / (
    1 + np.exp((gaze_abs - BASELINE_GAZE_OPTIMAL) / BASELINE_GAZE_VAR)
)

# FINAL SCORE
global_gaze_consistency = 0.5 * abs_gaze_score + 0.5 * df_gaze_5s["rel_score"].mean()
print(global_gaze_consistency)


0.7586086130558115


In [55]:
#z = (df_gaze_5s["value"] - df_gaze_5s["value"].mean()) / df_gaze_5s["value"].std()
#df_gaze_5s["score"] = 1 / (1 + np.exp(z))  # inverted sigmoid
#global_gaze_consistency = df_gaze_5s["score"].mean()
#print(global_gaze_consistency)

‚â• 0.55 ‚Äî Highly controlled gaze
‚ÄúCalm, intentional, confident eye behavior.‚Äù
Smooth gaze movement
Very few abrupt eye shifts
Audience perceives composure
Excellent for pitches and interviews
This is the ‚Äúexpert presenter‚Äù zone.

0.45 ‚Äì 0.55 ‚Äî Natural gaze behavior (GOOD)
‚ÄúHealthy balance between expressiveness and control.‚Äù
Natural small adjustments
Not too still (robotic), not too jumpy
Very typical of competent speakers
Visually comfortable and credible
This is where most good speakers land.

0.30 ‚Äì 0.45 ‚Äî Slightly unstable gaze
‚ÄúOccasional darting or scanning behavior.‚Äù
Moments of small rapid shifts
Eye instability noticeable under stress
Audience perceives mild distraction or nervousness
Not bad, but room for improvement.

‚â§ 0.30 ‚Äî Unsteady or nervous gaze
‚ÄúFrequent darting eye movements.‚Äù
High jitter
Looking around too often
Sudden direction changes
Perceived as discomfort, insecurity, or cognitive overload
This is the improvement-critical zone.

## üëÅÔ∏è Global Gaze Consistency Score Interpretation Guide

This score assesses the **focus and stability of the eye gaze**, specifically measuring the **variance (Jitter)** of eye movements. The scoring is designed to reward stability, based on a target optimal micro-movement baseline of $0.0035$.

**Score Context:**
* The score is bounded between **0 and 1**.
* **A higher score indicates greater gaze consistency (less jitter).** Both components are designed to reward low raw jitter values.

| Score Range | Behavioral Category | Summary and Impact | Key Behavioral Indicators |
| :---: | :--- | :--- | :--- |
| **$\geq 0.75$** | **High Consistency & Focus (Excellent)** | **"Gaze is highly stable and controlled, demonstrating maximum focus."** Gaze jitter is minimal, often below the $0.0035$ optimal threshold. | * Eyes remain focused on a central point for long periods.* Gaze shifts are slow and intentional (e.g., following a viewer).* **Projects high confidence, focus, and sincerity.** |
| **$0.55 ‚Äì 0.75$** | **Good Consistency (Good)** | **"Gaze is generally stable, meeting the consistency baseline with minor natural micro-movements."** Overall jitter is acceptable and not distracting. | * Natural micro-movements are present ($\sim 0.0035$), but no rapid, distracting eye darts.* Focus is maintained despite slight shifts.* **Suggests an engaged, professional level of focus.** |
| **$0.35 ‚Äì 0.55$** | **Noticeable Inconsistency (Weak)** | **"Eye movements are slightly erratic or wandering, suggesting distraction or low engagement."** Jitter is higher than the optimal $0.0035$ threshold. | * Eyes dart around frequently or lose focus often.* Gaze shifts are quick, suggesting a failure to hold attention.* **May indicate mild anxiety, distraction, or low conviction.** |
| **$\leq 0.35$** | **Highly Erratic Gaze (Poor)** | **"Gaze is highly unstable and chaotic, severely distracting the viewer."** Jitter is far above the acceptable threshold, resulting in a very low score. | * Constant, rapid, involuntary eye movements (darting, scanning).* Gaze appears scattered, unable to fixate on a point.* **Strongly suggests high nervousness, lack of focus, or reading from a script off-camera.** |

## 5.3 - Smile activation score

In [56]:
# --- SMILE ---
BASELINE_SMILE_OPTIMAL = 0.02
# Typical neutral lip-corner distance; above = smiling, below = flat.
# 0.02 is common for MediapPipe's normalized coordinates.

BASELINE_SMILE_RANGE = 0.01
# Controls sensitivity. Larger = smoother response.

In [57]:
# ========== SMILE ACTIVATION ==========
# RELATIVE score (your code)
z_smile = (df_smile_5s["value"] - df_smile_5s["value"].mean()) / df_smile_5s["value"].std()
df_smile_5s["rel_score"] = 1 / (1 + np.exp(-z_smile))

# ABSOLUTE score
smile_abs = df_smile_5s["value"].mean()
abs_smile_score = 1 / (1 + np.exp(-(smile_abs - BASELINE_SMILE_OPTIMAL) / BASELINE_SMILE_RANGE))

# FINAL SMILE SCORE
global_smile_activation = 0.5 * abs_smile_score + 0.5 * df_smile_5s["rel_score"].mean()
print (global_smile_activation)

0.6513540861405409


‚â• 0.55 ‚Äî Expressive, warm, approachable (Excellent)
‚ÄúA strong, natural smile that signals openness and positive engagement.‚Äù
Visibly activated AU12 (lip-corner puller)
Contributes to warmth and rapport
Very effective in pitches where friendliness matters
Typical of charismatic speakers

0.45 ‚Äì 0.55 ‚Äî Balanced, natural smile (Good)
‚ÄúOccasional or moderate smiling. Pleasant and appropriate.‚Äù
Natural social smiling
Not exaggerated
Signals comfort and confidence
Works well for most public speaking contexts
This is where most good communicators fall.

0.30 ‚Äì 0.45 ‚Äî Low smile activation (Weak)
‚ÄúNeutral or minimally expressive. Can feel serious, tense, or distant.‚Äù
Little lip-corner activation
Can reduce warmth and perceived approachability
Not necessarily bad ‚Äî depends on context
Common in nervous speakers or very formal tones

‚â§ 0.30 ‚Äî Flat or absent smile (Poor)
‚ÄúNo visible smiling. Can feel closed-off, stressed, or unengaged.‚Äù
Almost no AU12 movement
Often correlates with discomfort or disengagement
Pitch may feel rigid or emotionally flat
This is improvement-critical if warmth or persuasion is the goal.

## üòÑ Global Smile Activation Score Interpretation Guide

This score assesses the **intensity and consistency of positive facial expression** by measuring the distance between the lip corners. It rewards greater overall activation compared to the $\mathbf{0.02}$ neutral baseline.

**Score Context:**
* The score is bounded between **0 and 1**.
* **A higher score indicates greater smile activation (more or wider smiling).** 

| Score Range | Behavioral Category | Summary and Impact | Key Behavioral Indicators |
| :---: | :--- | :--- | :--- |
| **$\geq 0.75$** | **High & Frequent Activation (Excellent)** | **"Consistent, broad smiling that communicates warmth and positive enthusiasm."** The average lip distance is significantly above the $0.02$ neutral baseline. | * Speaker smiles widely and frequently, often maintaining it during speech.* **Projects high warmth, enthusiasm, and likeability.** |
| **$0.55 ‚Äì 0.75$** | **Moderate Activation (Good)** | **"Appropriate, pleasant level of smiling that is situationally expressive."** The average lip distance is above the neutral $0.02$ baseline, but not constant. | * Smiles are frequent but intermittent, used for emphasis or transition.* Face returns to a pleasant, non-hostile neutral expression.* **Suggests engagement and a positive communication environment.** |
| **$0.35 ‚Äì 0.55$** | **Neutral/Low Activation (Weak)** | **"Facial expression is often flat or subdued, failing to meet the neutral baseline."** The mean activation is near the $0.02$ baseline or slightly below it. | * Mouth position is largely flat or only slightly turned up.* Smiling, when it occurs, is small or fleeting.* **May signal low energy, seriousness, or lack of enthusiasm.** |
| **$\leq 0.35$** | **Flat or Negative Activation (Poor)** | **"Facial expression is static and cold, potentially indicating dissatisfaction or rigidity."** The average lip distance is consistently below the $0.02$ baseline. | * Face appears stiff or unmoving.* Lip corners may turn down (frowning/sadness) or remain narrow and tense.* **Conveys low warmth, discomfort, or an overtly serious/rigid demeanor.** |

====================================================
# 6 - MERGING + global scoring
====================================================

In [58]:
df_jitterface_5s = df_jitter_5s.rename(columns={
    "value": "value_head",
    "rel_score": "rel_score_head"
})

df_gaze_5s = df_gaze_5s.rename(columns={
    "value": "value_gaze",
    "rel_score": "rel_score_gaze"
})

df_smile_5s = df_smile_5s.rename(columns={
    "value": "value_smile",
    "rel_score": "rel_score_smile"
})

df_face_merged = (
    df_jitterface_5s[["start_sec", "end_sec", "value_head", "rel_score_head"]]
    .merge(df_gaze_5s[["start_sec", "end_sec", "value_gaze", "rel_score_gaze"]], on=["start_sec", "end_sec"])
    .merge(df_smile_5s[["start_sec", "end_sec", "value_smile", "rel_score_smile"]], on=["start_sec", "end_sec"])
)

df_face_merged.index = [f"window_{i}" for i in range(len(df_face_merged))]
df_face_merged


Unnamed: 0,start_sec,end_sec,value_head,rel_score_head,value_gaze,rel_score_gaze,value_smile,rel_score_smile
window_0,0,5,2.427418e-06,0.006788,0.0003,0.009137,0.036301,0.859519
window_1,1,6,1.863541e-06,0.029468,0.000165,0.250221,0.03592,0.812031
window_2,2,7,6.458659e-07,0.431818,0.000158,0.286065,0.035229,0.696861
window_3,3,8,9.567717e-07,0.250373,0.000133,0.439567,0.034511,0.544136
window_4,4,9,9.158468e-07,0.271229,0.000127,0.477986,0.034433,0.526287
window_5,5,10,9.36251e-07,0.260696,0.00012,0.527798,0.034338,0.504742
window_6,6,11,9.263265e-07,0.265786,0.00012,0.528558,0.034497,0.540963
window_7,7,12,9.931921e-07,0.232734,0.000131,0.452041,0.034363,0.510272
window_8,8,13,9.209882e-07,0.26855,0.000114,0.567634,0.034283,0.492205
window_9,9,14,5.277232e-07,0.509496,0.000131,0.451461,0.034273,0.489774


In [59]:
def compute_face_score(global_head, global_gaze, global_smile):
    """
    Combine the 3 face metrics into a single face communication score.
    Currently unweighted: each contributes equally.
    """
    return (global_head + global_gaze + global_smile) / 3

print (compute_face_score(global_head_stability, global_gaze_consistency, global_smile_activation))

0.654008725681891
