In [1]:
import os, sys
from pathlib import Path

# Ensure cwd is project root for imports/paths
PROJECT_ROOT = Path.cwd()
if PROJECT_ROOT.name == "notebooks":
    PROJECT_ROOT = PROJECT_ROOT.parent
os.chdir(PROJECT_ROOT)

SRC_PATH = PROJECT_ROOT / "src"
if str(SRC_PATH) not in sys.path:
    sys.path.insert(0, str(SRC_PATH))

print("cwd:", Path.cwd())
print("src path added:", SRC_PATH)

cwd: /Users/jayklarin/__DI/Repositories/FaceStats
src path added: /Users/jayklarin/__DI/Repositories/FaceStats/src


In [3]:
import polars as pl

df_attr = pl.read_parquet("data/processed/metadata/attributes_clean.parquet")
df_scores = pl.read_parquet("data/processed/metadata/attractiveness_scores.parquet")

df = df_attr.join(df_scores, on="filename")

print("Total images:", df.height)

print("\nGender distribution:")
print(df["gender"].value_counts())

print("\nEthnicity distribution:")
print(df["ethnicity"].value_counts())

print("\nAge breakdown:")
print(df.select([
    pl.col("age").is_null().sum().alias("null_age"),
    pl.col("age").min().alias("age_min"),
    pl.col("age").max().alias("age_max"),
    pl.col("age").mean().alias("age_mean")
]))

print("\nAttractiveness score range:")
print(df["attractiveness"].describe())


Total images: 700

Gender distribution:
shape: (2, 2)
┌─────────┬───────┐
│ gender  ┆ count │
│ ---     ┆ ---   │
│ str     ┆ u32   │
╞═════════╪═══════╡
│ unknown ┆ 345   │
│ female  ┆ 355   │
└─────────┴───────┘

Ethnicity distribution:
shape: (1, 2)
┌───────────┬───────┐
│ ethnicity ┆ count │
│ ---       ┆ ---   │
│ str       ┆ u32   │
╞═══════════╪═══════╡
│ unknown   ┆ 700   │
└───────────┴───────┘

Age breakdown:
shape: (1, 4)
┌──────────┬─────────┬─────────┬──────────┐
│ null_age ┆ age_min ┆ age_max ┆ age_mean │
│ ---      ┆ ---     ┆ ---     ┆ ---      │
│ u32      ┆ f64     ┆ f64     ┆ f64      │
╞══════════╪═════════╪═════════╪══════════╡
│ 345      ┆ 14.0    ┆ 85.0    ┆ 44.36338 │
└──────────┴─────────┴─────────┴──────────┘

Attractiveness score range:
shape: (9, 2)
┌────────────┬──────────┐
│ statistic  ┆ value    │
│ ---        ┆ ---      │
│ str        ┆ f64      │
╞════════════╪══════════╡
│ count      ┆ 700.0    │
│ null_count ┆ 0.0      │
│ mean       ┆ 3.143301 │
│ st