```mermaid
flowchart LR

classDef notebook fill:#5c7fa6,stroke:#3f5a7b,color:#f2f6fb, font-weight:bold;
classDef python fill:#9a80b8,stroke:#6d5789,color:#f7f3fb, font-weight:bold;
classDef tools fill:#e9c48a,stroke:#b58950,color:#2d1c05;
classDef methods fill:#8cc7ab,stroke:#5e9475,color:#0f2f1f;

NFM["03_attributes_facemesh.ipynb"]:::notebook

NFM --> PFM["facemesh_utils.py"]:::python

PFM --> TFM["Tools:<br>mediapipe<br>opencv<br>numpy<br>polars<br>tqdm"]:::tools

TFM --> MFM["Methods:<br>extract_landmarks()<br>compute_symmetry()<br>compute_proportions()<br>compute_jaw_metrics()<br>compute_smile_intensity()<br>process_batch()<br>write_parquet()"]:::methods


Markdown cell 1 — Purpose
```markdown
Compute Tier A & Tier B facial geometry metrics using MediaPipe FaceMesh (CPU-only) over raw images in `data/raw/keep_local_125000_files/`, and write results to `data/processed/metadata/attributes_with_meta.parquet`.
- Metrics: symmetry, eye/face ratio, golden-ratio deviation, jaw width/chin height, smile intensity, and supporting proportions.
- Handles corrupted/missing images gracefully; skips failures.
- Uses CPU-safe FaceMesh (static_image_mode=True, max_num_faces=1, refine_landmarks=True).
```


In [1]:
import os
from pathlib import Path
import cv2
import numpy as np
import polars as pl
from tqdm import tqdm
import mediapipe as mp

# Resolve repo root (works when running from notebooks/)
ROOT = Path.cwd()
if ROOT.name == 'notebooks':
    ROOT = ROOT.parent

# Paths (V4 structure)
RAW_DIR = ROOT / "data/raw"
OUT_PATH = ROOT / "data/processed/metadata/attributes_with_meta.parquet"
OUT_PATH.parent.mkdir(parents=True, exist_ok=True)

In [2]:
# ```python
mp_face_mesh = mp.solutions.face_mesh

FACE_MESH_KWARGS = dict(
    static_image_mode=True,
    max_num_faces=1,
    refine_landmarks=True,
    min_detection_confidence=0.5,
    min_tracking_confidence=0.5,
)

# Key landmark indices (FaceMesh)
IDX_LEFT_EYE_OUTER = 33
IDX_LEFT_EYE_INNER = 133
IDX_RIGHT_EYE_INNER = 362
IDX_RIGHT_EYE_OUTER = 263
IDX_NOSE_TIP = 1
IDX_CHIN = 152
IDX_FOREHEAD = 10
IDX_MOUTH_LEFT = 61
IDX_MOUTH_RIGHT = 291
IDX_JAW_LEFT = 234
IDX_JAW_RIGHT = 454


def extract_landmarks(image_path, face_mesh):
    'Return np.array shape (N,3) of landmarks in image coords; None on failure.'
    try:
        img = cv2.imread(str(image_path))
        if img is None:
            return None
        rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    except Exception:
        return None

    h, w, _ = rgb.shape
    try:
        result = face_mesh.process(rgb)
    except Exception:
        return None

    if not result.multi_face_landmarks:
        return None

    lm = result.multi_face_landmarks[0].landmark
    pts = np.array([[p.x * w, p.y * h, p.z] for p in lm], dtype=np.float32)
    return pts


def _safe_dist(p1, p2):
    return float(np.linalg.norm(p1 - p2))


def compute_symmetry(pts):
    'Symmetry score: lower is more symmetric (mean abs x-diff to midline).'
    cx = float(np.mean(pts[:, 0]))
    x_diff = np.abs((pts[:, 0] - cx))
    return float(np.mean(x_diff) / (np.ptp(pts[:, 0]) + 1e-6))


def compute_proportions(pts):
    'Return eye/face ratio and golden-ratio deviation.'
    eye_width_left = _safe_dist(pts[IDX_LEFT_EYE_OUTER], pts[IDX_LEFT_EYE_INNER])
    eye_width_right = _safe_dist(pts[IDX_RIGHT_EYE_INNER], pts[IDX_RIGHT_EYE_OUTER])
    interpupil = _safe_dist((pts[IDX_LEFT_EYE_INNER] + pts[IDX_RIGHT_EYE_INNER]) / 2,
                            (pts[IDX_LEFT_EYE_OUTER] + pts[IDX_RIGHT_EYE_OUTER]) / 2)
    face_height = _safe_dist(pts[IDX_FOREHEAD], pts[IDX_CHIN])
    face_width = _safe_dist(pts[IDX_JAW_LEFT], pts[IDX_JAW_RIGHT])
    eye_face_ratio = (eye_width_left + eye_width_right + interpupil) / 3.0 / (face_width + 1e-6)
    golden_target = 1.618
    golden_ratio = (face_height / (face_width + 1e-6))
    golden_deviation = abs(golden_ratio - golden_target)
    return eye_face_ratio, golden_ratio, golden_deviation, face_height, face_width


def compute_jaw_metrics(pts, face_height):
    jaw_width = _safe_dist(pts[IDX_JAW_LEFT], pts[IDX_JAW_RIGHT])
    chin_height = _safe_dist(pts[IDX_NOSE_TIP], pts[IDX_CHIN])
    jaw_ratio = jaw_width / (face_height + 1e-6)
    chin_ratio = chin_height / (face_height + 1e-6)
    return jaw_width, chin_height, jaw_ratio, chin_ratio


def compute_smile_intensity(pts, face_width):
    mouth_width = _safe_dist(pts[IDX_MOUTH_LEFT], pts[IDX_MOUTH_RIGHT])
    smile_intensity = mouth_width / (face_width + 1e-6)
    return mouth_width, smile_intensity
# ```

In [3]:
from math import ceil

BATCH_SIZE = 512


def process_batch(paths, face_mesh):
    rows = []
    for path in paths:
        pts = extract_landmarks(path, face_mesh)
        if pts is None:
            continue
        sym = compute_symmetry(pts)
        eye_face_ratio, golden_ratio, golden_dev, face_h, face_w = compute_proportions(pts)
        jaw_w, chin_h, jaw_ratio, chin_ratio = compute_jaw_metrics(pts, face_h)
        mouth_w, smile_intensity = compute_smile_intensity(pts, face_w)

        rows.append({
            'filename': path.name,
            'symmetry_score': sym,
            'eye_face_ratio': eye_face_ratio,
            'golden_ratio': golden_ratio,
            'golden_deviation': golden_dev,
            'face_height': face_h,
            'face_width': face_w,
            'jaw_width': jaw_w,
            'chin_height': chin_h,
            'jaw_ratio': jaw_ratio,
            'chin_ratio': chin_ratio,
            'mouth_width': mouth_w,
            'smile_intensity': smile_intensity,
        })
    return rows


all_images = sorted([p for p in RAW_DIR.glob('*.jpg')])
if not all_images:
    raise SystemExit(f'No images found in {RAW_DIR}')

all_rows = []
with mp_face_mesh.FaceMesh(**FACE_MESH_KWARGS) as fm:
    total_batches = ceil(len(all_images) / BATCH_SIZE)
    for i in tqdm(range(total_batches), desc='FaceMesh batches'):
        batch = all_images[i * BATCH_SIZE:(i + 1) * BATCH_SIZE]
        all_rows.extend(process_batch(batch, fm))

if not all_rows:
    raise SystemExit('No rows produced; check inputs and FaceMesh configuration')

df = pl.DataFrame(all_rows)
df.write_parquet(OUT_PATH)
print(f'Wrote {len(df)} rows → {OUT_PATH}')


I0000 00:00:1763977059.961933 4635204 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 90.5), renderer: Apple M1
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
W0000 00:00:1763977059.976742 4635406 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
FaceMesh batches:   0%|          | 0/20 [00:00<?, ?it/s]W0000 00:00:1763977059.991233 4635406 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1763977060.012577 4635405 landmark_projection_calculator.cc:186] Using NORM_RECT without IMAGE_DIMENSIONS is only supported for the square ROI. Provide IMAGE_DIMENSIONS or use PROJECTION_MATRIX.
FaceMesh batches: 100%|██████████| 20/20 [01:36<00:00,  4.84s/it]

Wrote 9999 rows → /Users/jayklarin/__DI/Repositories/FaceStats/data/processed/metadata/attributes_with_meta.parquet





In [4]:
# ```python
if OUT_PATH.exists():
    preview = pl.read_parquet(OUT_PATH, n_rows=5)
    print(preview)
else:
    print(f'Missing output: {OUT_PATH}')
# ```

shape: (5, 13)
┌───────────┬───────────┬───────────┬───────────┬───┬───────────┬───────────┬───────────┬──────────┐
│ filename  ┆ symmetry_ ┆ eye_face_ ┆ golden_ra ┆ … ┆ jaw_ratio ┆ chin_rati ┆ mouth_wid ┆ smile_in │
│ ---       ┆ score     ┆ ratio     ┆ tio       ┆   ┆ ---       ┆ o         ┆ th        ┆ tensity  │
│ str       ┆ ---       ┆ ---       ┆ ---       ┆   ┆ f64       ┆ ---       ┆ ---       ┆ ---      │
│           ┆ f64       ┆ f64       ┆ f64       ┆   ┆           ┆ f64       ┆ f64       ┆ f64      │
╞═══════════╪═══════════╪═══════════╪═══════════╪═══╪═══════════╪═══════════╪═══════════╪══════════╡
│ SFHQ_pt4_ ┆ 0.187907  ┆ 0.136083  ┆ 1.071455  ┆ … ┆ 0.93331   ┆ 0.39709   ┆ 183.79736 ┆ 0.326962 │
│ 00000001. ┆           ┆           ┆           ┆   ┆           ┆           ┆ 3         ┆          │
│ jpg       ┆           ┆           ┆           ┆   ┆           ┆           ┆           ┆          │
│ SFHQ_pt4_ ┆ 0.198469  ┆ 0.139354  ┆ 1.16532   ┆ … ┆ 0.858133  ┆ 0.429469  

Markdown cell 2 — Geometry-based attractiveness score
```markdown
Compute a simple 1–10 attractiveness bucket from FaceMesh geometry metrics (symmetry, proportions, jaw/chin, smile). Lower symmetry_score is better; other metrics are scaled via z-scores. We rank by composite score and bin into deciles.
```


In [5]:
# ```python
import numpy as np
import pandas as pd

pdf = pl.read_parquet(OUT_PATH).to_pandas()

metrics = [
    "symmetry_score",        # lower is better
    "eye_face_ratio",
    "golden_ratio",         # closer to ~1.618 is better
    "jaw_ratio",
    "chin_ratio",
    "smile_intensity",
]

if pdf.empty:
    raise SystemExit("No rows to score; run FaceMesh extraction first.")

# z-score helper
scores = []
for col in metrics:
    vals = pdf[col].astype(float)
    mean = vals.mean()
    std = vals.std() or 1e-6
    z = (vals - mean) / std
    if col == "symmetry_score":
        z = -z  # lower symmetry_score is better
    if col == "golden_ratio":
        target = 1.618
        z = -np.abs((vals - target) / std)  # penalize deviation from golden ratio
    scores.append(z)

pdf["geom_score"] = np.vstack(scores).mean(axis=0)

# Percentile-based 1–10 buckets
percentile = pdf["geom_score"].rank(pct=True)
pdf["attractiveness_geom"] = np.clip(np.ceil(percentile * 10), 1, 10).astype(int)

# Write back to parquet
pl.DataFrame(pdf).write_parquet(OUT_PATH)

print("Saved with attractiveness_geom column →", OUT_PATH)
print("Distribution (1–10):")
print(pdf["attractiveness_geom"].value_counts().sort_index())
# ```

Saved with attractiveness_geom column → /Users/jayklarin/__DI/Repositories/FaceStats/data/processed/metadata/attributes_with_meta.parquet
Distribution (1–10):
attractiveness_geom
1      999
2     1000
3     1000
4     1000
5     1000
6     1000
7     1000
8     1000
9     1000
10    1000
Name: count, dtype: int64
