
# 07 — Single Image Attractiveness Scoring

Upload or point to any JPEG on disk, generate a CLIP embedding, and run the shared attractiveness scorer.


In [11]:

import os, sys
PROJECT_ROOT = "/Users/jayklarin/__DI/Repositories/FaceStats"
os.chdir(PROJECT_ROOT)
print("cwd:", os.getcwd())
SRC = os.path.join(PROJECT_ROOT, "src")
if SRC not in sys.path:
    sys.path.insert(0, SRC)
print("src added:", SRC)


cwd: /Users/jayklarin/__DI/Repositories/FaceStats
src added: /Users/jayklarin/__DI/Repositories/FaceStats/src


In [12]:

from pathlib import Path
import numpy as np
import polars as pl
from PIL import Image
from src.attractiveness.scoring import AttractivenessScorer
from src.embeddings.embed_clip import get_clip_embedding

MODEL_PATH = Path("src/models/attractiveness_regressor.pt")
SCORES_PARQUET = Path("data/processed/metadata/attractiveness_scores.parquet")
print("Model path:", MODEL_PATH)


Model path: src/models/attractiveness_regressor.pt



## Helper: score a single image
- Computes CLIP embedding (cached model if already loaded)
- Runs attractiveness scorer → raw, percentile, decile


In [13]:
scorer = AttractivenessScorer(MODEL_PATH)

def score_image(image_path: Path):
    if not image_path.exists():
        raise FileNotFoundError(f"Image not found: {image_path}")
    # Ensure embedding is a plain list so Polars can handle it
    emb = get_clip_embedding(image_path)
    emb_list = emb.tolist() if hasattr(emb, 'tolist') else list(emb)
    df = pl.DataFrame({"filename": [image_path.name], "embedding": [emb_list]})
    df = df.with_columns(pl.col("embedding").cast(pl.List(pl.Float32)))
    # Run scorer (handles single-image case internally)
    scored = scorer.score_embeddings(df)
    return scored


In [14]:
# Helper: adjust scored single image against dataset distribution
def adjust_percentile(scored_df):
    import polars as pl
    import numpy as np
    REF = 'data/processed/metadata/attractiveness_scores.parquet'
    df_ref = pl.read_parquet(REF)
    raw_col = 'attractiveness_raw' if 'attractiveness_raw' in df_ref.columns else 'attractiveness'
    ref_raw = df_ref[raw_col]
    raw_val = float(scored_df[raw_col][0]) if raw_col in scored_df.columns else float(scored_df['attractiveness'][0])
    pct = float((ref_raw <= raw_val).mean())
    decile = int(np.clip(np.ceil(pct * 10), 1, 10))
    return scored_df.with_columns([
        pl.Series('attractiveness_pct', [pct]),
        pl.Series('attractiveness', [decile])
    ])



## Option A: point to an existing JPEG path
Set `IMG_PATH` to any JPEG. Results include raw model score, percentile vs full dataset, and deciled 1–10 score.


In [15]:
# Example: update this to your image path
IMG_PATH = Path('data/processed/preproc_best10k/SFHQ_pt4_00090828.jpg')

scored = score_image(IMG_PATH)
scored = adjust_percentile(scored)
print(scored)


shape: (1, 4)
┌───────────────────────┬────────────────────┬────────────────────┬────────────────┐
│ filename              ┆ attractiveness_raw ┆ attractiveness_pct ┆ attractiveness │
│ ---                   ┆ ---                ┆ ---                ┆ ---            │
│ str                   ┆ f32                ┆ f64                ┆ i64            │
╞═══════════════════════╪════════════════════╪════════════════════╪════════════════╡
│ SFHQ_pt4_00090828.jpg ┆ 1.136176           ┆ 0.1019             ┆ 2              │
└───────────────────────┴────────────────────┴────────────────────┴────────────────┘



## Option B: upload a JPEG (Jupyter front-end)
Use the widget below to upload, then score it.


In [16]:

import ipywidgets as widgets
from IPython.display import display

uploader = widgets.FileUpload(accept='image/jpeg', multiple=False)
display(uploader)


FileUpload(value=(), accept='image/jpeg', description='Upload')

In [19]:
# Run after uploading
files = uploader.value

def _get_first_file(files):
    if files is None:
        return None
    if isinstance(files, dict):
        return list(files.values())[0] if files else None
    if isinstance(files, (list, tuple)):
        return files[0] if len(files) else None
    return None

item = _get_first_file(files)
if item is not None and 'content' in item:
    tmp_path = Path('data/processed/composites/upload_tmp.jpg')
    tmp_path.parent.mkdir(parents=True, exist_ok=True)
    tmp_path.write_bytes(item['content'])
    print('Saved upload →', tmp_path)
    scored = score_image(tmp_path)
    scored = adjust_percentile(scored)
    print(scored)
else:
    print('Upload a JPEG in the widget above, then re-run this cell.')


Saved upload → data/processed/composites/upload_tmp.jpg
shape: (1, 4)
┌────────────────┬────────────────────┬────────────────────┬────────────────┐
│ filename       ┆ attractiveness_raw ┆ attractiveness_pct ┆ attractiveness │
│ ---            ┆ ---                ┆ ---                ┆ ---            │
│ str            ┆ f32                ┆ f64                ┆ i64            │
╞════════════════╪════════════════════╪════════════════════╪════════════════╡
│ upload_tmp.jpg ┆ 1.222824           ┆ 0.3661             ┆ 4              │
└────────────────┴────────────────────┴────────────────────┴────────────────┘
