# Film-Recommender — LO3 mit echten Labels & Zeit-Split
*Generated: 2025-09-12T15:52:40 UTC*

Dieses Notebook erweitert dein GNN-Reranking um:
- **Echte Labels** aus dem Letterboxd-Export (Ratings / Watchlist) statt Pseudo-Labels
- **Zeitbasierten Split** (Train auf älteren Interaktionen, Test auf späteren)
- **Evaluation** (Hit@K, NDCG@K, Recall@K) für Baseline (`final`), **GNN**, und **Ensemble**
- **Top-K-Exporte** (CSV) pro Seed für Baseline/GNN/Ensemble

> Es bleibt kompatibel mit deinem CSV **`../data/kg/rerank_by_logical_rules.csv`** (aus Sicht dieses Notebooks in `gnn/`).  
> PyTorch/pyG-Installationszelle ist enthalten (optional lokal ausführen).


In [39]:
# === Konfiguration ===
CSV_PATH = "rerank_by_logical_rules.csv"
LETTERBOXD_DIR = "../data/letterboxd_export"
OUTPUT_DIR = "outputs"

SPLIT_DATE = None   # z.B. "2024-01-01"
SEED = 42
TOPK = 10
NEG_PER_POS = 3
LAMBDA = 0.6

import os, re, random, numpy as np, pandas as pd
from pathlib import Path
random.seed(SEED); np.random.seed(SEED)

os.makedirs(OUTPUT_DIR, exist_ok=True)
print("CSV_PATH:", CSV_PATH)
print("LETTERBOXD_DIR:", LETTERBOXD_DIR)
print("OUTPUT_DIR:", OUTPUT_DIR)

CSV_PATH: ../data/kg/rerank_by_logical_rules.csv
LETTERBOXD_DIR: ../data/letterboxd_export
OUTPUT_DIR: ../data/kg/outputs


## (Optional) Installationen

In [40]:
# !pip install --upgrade pip
# !pip install torch --index-url https://download.pytorch.org/whl/cpu
# !pip install torch-geometric torch-scatter torch-sparse torch-cluster torch-spline-conv -f https://data.pyg.org/whl/torch-$(python -c "import torch;print(torch.__version__.split('+')[0])").html


## 1) Rerank-CSV laden & vorbereiten

In [41]:
assert Path(CSV_PATH).exists(), f"CSV nicht gefunden: {CSV_PATH}"
df = pd.read_csv(CSV_PATH)
print(df.shape)
print(df.columns.tolist())
df.head(3)

(200, 41)
['candidate_id', 'candidate_title', 'year', 'cos', 'meta', 'final', 'seed', 'comp_genres', 'comp_keywords', 'comp_cast', 'comp_director', 'comp_runtime', 'comp_language', 'comp_popularity', 'comp_vote', 'tmdb_url', 'overview', 'genres', 'runtime', 'vote_average', 'poster_url', 'media_type', 'director', 'actors', 'characters', 'origin_country', 'original_language', 'popularity', 'production_companies', 'production_countries', 'spoken_languages', 'name_norm', 'year_str', 'genre_list', 'director_list', 'watchlist_priority', 'genre_boost', 'director_boost', 'genre_penalty', 'director_penalty', 'score']


Unnamed: 0,candidate_id,candidate_title,year,cos,meta,final,seed,comp_genres,comp_keywords,comp_cast,...,name_norm,year_str,genre_list,director_list,watchlist_priority,genre_boost,director_boost,genre_penalty,director_penalty,score
0,2756,The Abyss,1989.0,0.0,0.4592,0.1837,Aliens,0.5,0.0,0.0256,...,the abyss,1989.0,"['Adventure', 'Thriller', 'Science Fiction']",['James Cameron'],True,True,True,False,False,5
1,1991,Death Proof,2007.0,0.0,0.501,0.2004,Kill Bill: Vol. 2,0.6667,0.0179,0.0526,...,death proof,2007.0,"['Action', 'Thriller']",['Quentin Tarantino'],False,True,True,False,False,3
2,28387,Kicking and Screaming,1995.0,0.0,0.5109,0.2044,The Meyerowitz Stories (New and Selected),0.6667,0.0,0.0256,...,kicking and screaming,1995.0,"['Comedy', 'Drama', 'Romance']",['Noah Baumbach'],False,True,True,False,False,3


In [42]:
num_like = ['cos','final','score','comp_genres','comp_keywords','comp_cast','comp_director',
            'comp_runtime','comp_language','comp_popularity','comp_vote']
for c in num_like:
    if c in df.columns:
        df[c] = pd.to_numeric(df[c], errors='coerce').fillna(0.0)

def minmax(x):
    x = np.asarray(x, dtype=float)
    mn, mx = np.nanmin(x), np.nanmax(x)
    if not np.isfinite(mn) or not np.isfinite(mx) or mx<=mn:
        return np.zeros_like(x)
    return (x - mn) / (mx - mn)

df['seed'] = df['seed'].astype(str)
df['candidate_title'] = df['candidate_title'].astype(str)
print("Zeilen:", len(df))

Zeilen: 200


## 2) Graph-Kanten aus CSV bauen

In [43]:
movies = pd.unique(pd.concat([df['seed'], df['candidate_title']], ignore_index=True))
movie2id = {m:i for i,m in enumerate(movies)}
id2movie = {i:m for m,i in movie2id.items()}

rel_cols = [c for c in ['cos','final','comp_genres','comp_keywords','comp_cast','comp_director',
                        'comp_runtime','comp_language','comp_popularity','comp_vote'] if c in df.columns]

edges = {c: [] for c in rel_cols}
for _, row in df.iterrows():
    s = row['seed']; c = row['candidate_title']
    if pd.isna(s) or pd.isna(c): 
        continue
    u, v = movie2id[s], movie2id[c]
    for rc in rel_cols:
        edges[rc].append((u, v, float(row.get(rc, 0.0))))

norm_edges = {}
for rc, lst in edges.items():
    if not lst:
        continue
    w = np.array([w for (_,_,w) in lst], dtype=float)
    wn = minmax(w)
    norm_edges[rc] = [(u,v,float(wn[i])) for i,(u,v,_) in enumerate(lst)]

for rc, lst in norm_edges.items():
    print(rc, "Edges:", len(lst))

cos Edges: 200
final Edges: 200
comp_genres Edges: 200
comp_keywords Edges: 200
comp_cast Edges: 200
comp_director Edges: 200
comp_runtime Edges: 200
comp_language Edges: 200
comp_popularity Edges: 200
comp_vote Edges: 200


## 3) Letterboxd-Labels laden (Ratings / Watchlist)

In [44]:
def smart_find(base_dir, primary_name_patterns, fallback_exts=('csv', 'CSV')):
    base = Path(base_dir)
    if not base.exists():
        return None
    for pat in primary_name_patterns:
        for ext in fallback_exts:
            cand = base / f"{pat}.{ext}"
            if cand.exists():
                return str(cand)
    for p in base.rglob("*"):
        name = p.name.lower()
        for pat in primary_name_patterns:
            if pat.lower() in name and p.suffix.lower() in ('.csv',):
                return str(p)
    return None

ratings_path   = smart_find(LETTERBOXD_DIR, ['ratings', 'ratings-export', 'ratings-2'])
watched_path   = smart_find(LETTERBOXD_DIR, ['watched', 'diary'])
watchlist_path = smart_find(LETTERBOXD_DIR, ['watchlist'])

print("ratings_path:", ratings_path)
print("watched_path:", watched_path)
print("watchlist_path:", watchlist_path)

assert ratings_path or watched_path or watchlist_path, "Keine Letterboxd-CSV gefunden. Prüfe LETTERBOXD_DIR."

import pandas as pd, numpy as np, re
def normalize_title(t):
    t = str(t).lower()
    t = re.sub(r"[^a-z0-9]+", " ", t)
    t = re.sub(r"\b(the|a|an)\b", " ", t)
    t = re.sub(r"\s+", " ", t).strip()
    return t

def parse_date_series(s):
    if s is None:
        return pd.Series(dtype='datetime64[ns]')
    try:
        return pd.to_datetime(s, errors='coerce', utc=True)
    except Exception:
        return pd.to_datetime(s.astype(str), errors='coerce', utc=True)

def parse_rating_series(s):
    if s is None:
        return pd.Series(dtype=float)
    def to_num(x):
        if pd.isna(x):
            return np.nan
        try:
            return float(x)
        except:
            txt = str(x)
            stars = txt.count('★')
            half  = '½' in txt
            return stars + (0.5 if half else 0.0)
    return s.apply(to_num).astype(float)

frames = []
if ratings_path:
    r = pd.read_csv(ratings_path)
    title = r.get('Name', r.get('Title'))
    year  = r.get('Year')
    rating= parse_rating_series(r.get('Rating'))
    date  = parse_date_series(r.get('Date', r.get('WatchedDate')))
    frames.append(pd.DataFrame({'title': title, 'year': year, 'rating': rating, 'date': date, 'watchlist': False}))
if watched_path:
    w = pd.read_csv(watched_path)
    title = w.get('Name', w.get('Title'))
    year  = w.get('Year')
    rating= parse_rating_series(w.get('Rating'))
    date  = parse_date_series(w.get('Date', w.get('WatchedDate')))
    frames.append(pd.DataFrame({'title': title, 'year': year, 'rating': rating, 'date': date, 'watchlist': False}))
if watchlist_path:
    wl = pd.read_csv(watchlist_path)
    title = wl.get('Name', wl.get('Title'))
    year  = wl.get('Year')
    date  = parse_date_series(wl.get('AddedDate', wl.get('Date')))
    frames.append(pd.DataFrame({'title': title, 'year': year, 'rating': np.nan, 'date': date, 'watchlist': True}))

inter = pd.concat(frames, ignore_index=True).dropna(subset=['title'])
inter['title_norm'] = inter['title'].apply(normalize_title)
inter['year'] = pd.to_numeric(inter['year'], errors='coerce')
inter['date'] = parse_date_series(inter['date'])
inter['is_positive'] = (inter['rating'] >= 4.0) | (inter['watchlist'] == True)

print("Interaktionen (gesamt):", len(inter))
print("Davon positive:", int(inter['is_positive'].sum()))
inter.head(3)

ratings_path: ../data/letterboxd_export/ratings.csv
watched_path: ../data/letterboxd_export/watched.csv
watchlist_path: ../data/letterboxd_export/watchlist.csv
Interaktionen (gesamt): 1816
Davon positive: 903


Unnamed: 0,title,year,rating,date,watchlist,title_norm,is_positive
0,Bird Box,2018.0,3.0,2020-06-08 00:00:00+00:00,False,bird box,False
1,The Meyerowitz Stories (New and Selected),2017.0,5.0,2020-06-08 00:00:00+00:00,False,meyerowitz stories new and selected,True
2,Marriage Story,2019.0,5.0,2020-06-08 00:00:00+00:00,False,marriage story,True


### 3.1 Mapping: Letterboxd-Titel → Kandidaten

In [45]:
cand_norm = {normalize_title(t): t for t in pd.unique(df['candidate_title'])}
seed_norm = {normalize_title(t): t for t in pd.unique(df['seed'])}
all_norm  = {**seed_norm, **cand_norm}

inter['cand_match'] = inter['title_norm'].map(all_norm)
mapped = inter.dropna(subset=['cand_match']).copy()
print("Gemappte Interaktionen:", len(mapped), "von", len(inter))

mapped_pos = mapped[(mapped['is_positive']) & mapped['date'].notna()].copy()
assert len(mapped_pos) > 0, "Keine positiven Interaktionen gemappt. Prüfe Normalisierung/Titel."
split_date = pd.to_datetime("2024-01-01", utc=True) if False else mapped_pos['date'].quantile(0.8)
print("Split-Datum:", split_date)

Gemappte Interaktionen: 229 von 1816
Split-Datum: 2024-05-21 00:00:00+00:00


In [46]:
train_pos = mapped_pos[mapped_pos['date'] <= split_date]['cand_match'].tolist()
test_pos  = mapped_pos[mapped_pos['date'] >  split_date]['cand_match'].tolist()

train_pos_ids = [movie2id[t] for t in train_pos if t in movie2id]
test_pos_set  = set([t for t in test_pos if t in movie2id])
print("Train positive IDs:", len(train_pos_ids), "| Test positive unique:", len(test_pos_set))

Train positive IDs: 49 | Test positive unique: 12


## 4) Evaluation-Helper

In [47]:
# ✅ NumPy 2.0 kompatible Eval-Helper

import numpy as np

def _to_float_array(x):
    return np.asarray(x, dtype=float)

def dcg_at_k(rel, k=10):
    r = _to_float_array(rel)[:k]
    # DCG: (2^rel - 1) / log2(2..k+1)
    return float(np.sum((np.power(2.0, r) - 1.0) / np.log2(np.arange(2, r.size + 2))))

def ndcg_at_k(rel, k=10):
    r = _to_float_array(rel)
    dcg = dcg_at_k(r, k)
    ideal = np.sort(r)[::-1]  # absteigend
    idcg = dcg_at_k(ideal, k)
    return float(dcg / idcg) if idcg > 0 else 0.0

def hit_at_k(rel, k=10):
    r = _to_float_array(rel)[:k]
    return float(np.any(r > 0))

def recall_at_k(rel, total_pos, k=10):
    r = _to_float_array(rel)[:k]
    found = int(np.sum(r))
    return float(found / total_pos) if total_pos > 0 else 0.0

def eval_grouped_with_test(df, score_col, k=10, test_pos_set=None):
    hits, ndcgs, recalls, cnt = [], [], [], 0
    for seed, g in df.groupby('seed', sort=False):
        g = g.sort_values(score_col, ascending=False).reset_index(drop=True)
        rel = (g['candidate_title'].isin(test_pos_set)).astype(int).to_numpy()
        total_pos = int(rel.sum())
        hits.append(hit_at_k(rel, k))
        ndcgs.append(ndcg_at_k(rel, k))
        recalls.append(recall_at_k(rel, total_pos, k))
        cnt += 1
    return float(np.mean(hits)), float(np.mean(ndcgs)), float(np.mean(recalls)), cnt


## 5) Baseline (final) — Eval & Export

In [48]:
if 'final' in df.columns:
    df['final_norm'] = df.groupby('seed')['final'].transform(lambda x: (x - x.min()) / (x.max()-x.min() + 1e-9))
    h, n, r, cnt = eval_grouped_with_test(df, 'final_norm', k=TOPK, test_pos_set=test_pos_set)
    print(f"Baseline (`final_norm`) — Hit@{TOPK}: {h:.3f} | NDCG@{TOPK}: {n:.3f} | Recall@{TOPK}: {r:.3f} (Seeds: {cnt})")
    topk_baseline = df.sort_values(['seed','final_norm'], ascending=[True, False]).groupby('seed').head(TOPK)
    topk_baseline['test_relevant'] = topk_baseline['candidate_title'].isin(test_pos_set)
    out_path = f"{OUTPUT_DIR}/top{TOPK}_baseline.csv"
    topk_baseline.to_csv(out_path, index=False)
    print("Export:", out_path)
else:
    print("Keine 'final'-Spalte gefunden — Baseline übersprungen.")

Baseline (`final_norm`) — Hit@10: 0.031 | NDCG@10: 0.023 | Recall@10: 0.031 (Seeds: 130)
Export: ../data/kg/outputs/top10_baseline.csv


## 6) GNN-Training (HeteroConv, echte Labels)

In [49]:
import importlib, warnings
warnings.filterwarnings('ignore')
has_torch = importlib.util.find_spec('torch') is not None
has_pyg   = importlib.util.find_spec('torch_geometric') is not None
print("Torch installiert:", has_torch, "| PyG installiert:", has_pyg)
if not (has_torch and has_pyg):
    print("GNN-Teil wird übersprungen (Pakete fehlen).")

Torch installiert: True | PyG installiert: True


In [50]:
if has_torch and has_pyg:
    import torch
    from torch import nn
    import torch.nn.functional as F
    from torch_geometric.data import HeteroData
    from torch_geometric.nn import HeteroConv, GATv2Conv

    torch.manual_seed(SEED)

    data = HeteroData()
    num_movies = len(movie2id)
    d = 64

    for rc, lst in norm_edges.items():
        if not lst: continue
        src = torch.tensor([u for (u,_,_) in lst], dtype=torch.long)
        dst = torch.tensor([v for (_,v,_) in lst], dtype=torch.long)
        data['movie', rc, 'movie'].edge_index = torch.stack([src, dst], dim=0)

    pos_ids = [movie2id[t] for t in train_pos if t in movie2id]
    assert len(pos_ids) > 0, "Keine Trainings-Positives im CSV-Kandidatenraum gefunden."
    u_src = torch.zeros(len(pos_ids), dtype=torch.long)
    m_dst = torch.tensor(pos_ids, dtype=torch.long)
    data['user','likes','movie'].edge_index = torch.stack([u_src, m_dst], dim=0)

    all_movie_ids = torch.arange(num_movies, dtype=torch.long)
    pos_set = set(m_dst.tolist())
    neg_pool = [int(i) for i in all_movie_ids.tolist() if i not in pos_set]
    NEG_PER_POS = 3
    neg_pairs = [(0, random.choice(neg_pool)) for _ in range(len(pos_ids) * NEG_PER_POS)]
    un_src = torch.tensor([p[0] for p in neg_pairs], dtype=torch.long)
    mn_dst = torch.tensor([p[1] for p in neg_pairs], dtype=torch.long)

    conv_edge_types = [et for et in data.edge_types if et[2] == 'movie']
    edge_index_dict = {et: data[et].edge_index for et in conv_edge_types}

    class HeteroRecommender(nn.Module):
        def __init__(self, num_movies, dim=64, layers=2):
            super().__init__()
            self.movie_emb = nn.Embedding(num_movies, dim)
            self.user_emb  = nn.Embedding(1, dim)
            self.layers = nn.ModuleList([
                HeteroConv({ et: GATv2Conv((-1, -1), dim, add_self_loops=False) for et in conv_edge_types }, aggr='sum')
                for _ in range(layers)
            ])
        def forward(self, edge_index_dict):
            x = {'movie': self.movie_emb.weight, 'user': self.user_emb.weight}
            for conv in self.layers:
                out = conv(x, edge_index_dict)
                out = {k: F.relu(v) for k, v in out.items()}
                x.update(out)
            return x
        @staticmethod
        def score(user_vec, item_vec):
            return (user_vec * item_vec).sum(dim=-1)

    model = HeteroRecommender(num_movies=num_movies, dim=d, layers=2)
    opt = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-5)
    bce = nn.BCEWithLogitsLoss()

    for epoch in range(1, 201):
        model.train()
        opt.zero_grad()
        x_out = model(edge_index_dict)
        user_pos = x_out['user'][u_src]
        item_pos = x_out['movie'][m_dst]
        user_neg = x_out['user'][un_src]
        item_neg = x_out['movie'][mn_dst]
        pos_logit = HeteroRecommender.score(user_pos, item_pos)
        neg_logit = HeteroRecommender.score(user_neg, item_neg)
        loss = bce(pos_logit, torch.ones_like(pos_logit)) + bce(neg_logit, torch.zeros_like(neg_logit))
        loss.backward()
        opt.step()
        if epoch % 50 == 0:
            print(f"Epoch {epoch:3d} | Loss {loss.item():.4f}")

    model.eval()
    with torch.no_grad():
        x_out = model(edge_index_dict)
        user_vec = x_out['user'][0:1]

    gnn_scores = []
    for _, row in df.iterrows():
        cand = row['candidate_title']
        mid = movie2id.get(cand, None)
        if mid is None:
            gnn_scores.append(np.nan); continue
        item_vec = x_out['movie'][mid:mid+1]
        s = float((user_vec * item_vec).sum(dim=-1))
        gnn_scores.append(s)

    df['s_gnn'] = gnn_scores
    df['s_gnn_norm'] = df.groupby('seed')['s_gnn'].transform(lambda x: (x - x.min()) / (x.max() - x.min() + 1e-9))
    print("GNN-Scoring hinzugefügt: 's_gnn'/'s_gnn_norm'")

Epoch  50 | Loss 0.0047
Epoch 100 | Loss 0.0023
Epoch 150 | Loss 0.0014
Epoch 200 | Loss 0.0010
GNN-Scoring hinzugefügt: 's_gnn'/'s_gnn_norm'


## 7) Ensemble & finale Evaluation + Top-K-Exporte

In [51]:
if 'final' in df.columns and 'final_norm' not in df.columns:
    df['final_norm'] = df.groupby('seed')['final'].transform(lambda x: (x - x.min()) / (x.max()-x.min() + 1e-9))

if 's_gnn_norm' in df.columns:
    if 'final_norm' in df.columns:
        df['score_ensemble'] = LAMBDA * df['s_gnn_norm'] + (1.0 - LAMBDA) * df['final_norm']
    else:
        df['score_ensemble'] = df['s_gnn_norm']
else:
    print("Warnung: Kein GNN-Score vorhanden; Ensemble entspricht Baseline.")
    df['score_ensemble'] = df.get('final_norm', 0.0)

def eval_grouped_with_test(df, score_col, k=10, test_pos_set=None):
    hits, ndcgs, recalls, cnt = [], [], [], 0
    for seed, g in df.groupby('seed', sort=False):
        g = g.sort_values(score_col, ascending=False).reset_index(drop=True)
        rel = (g['candidate_title'].isin(test_pos_set)).astype(int).values
        total_pos = int(rel.sum())
        hits.append(float(np.any(rel[:k] > 0)))
        # NDCG:
        rel_k = rel[:k]
        dcg = np.sum((2**rel_k - 1) / np.log2(np.arange(2, len(rel_k) + 2)))
        ideal = np.sort(rel)[::-1][:k]
        idcg = np.sum((2**ideal - 1) / np.log2(np.arange(2, len(ideal) + 2)))
        ndcgs.append((dcg / idcg) if idcg > 0 else 0.0)
        # Recall@K:
        found = rel[:k].sum()
        recalls.append(float(found / total_pos) if total_pos > 0 else 0.0)
        cnt += 1
    return float(np.mean(hits)), float(np.mean(ndcgs)), float(np.mean(recalls)), cnt

for name, col in [('Baseline(final_norm)', 'final_norm'), ('GNN(s_gnn_norm)', 's_gnn_norm'), ('Ensemble', 'score_ensemble')]:
    if col in df.columns:
        h, n, r, cnt = eval_grouped_with_test(df, col, k=TOPK, test_pos_set=test_pos_set)
        print(f"{name:>20} — Hit@{TOPK}: {h:.3f} | NDCG@{TOPK}: {n:.3f} | Recall@{TOPK}: {r:.3f} (Seeds: {cnt})")

def export_topk(df, col, fname):
    if col not in df.columns: 
        return None
    out = df.sort_values(['seed', col], ascending=[True, False]).groupby('seed').head(TOPK).copy()
    out['test_relevant'] = out['candidate_title'].isin(test_pos_set)
    path = f"{OUTPUT_DIR}/{fname}"
    out.to_csv(path, index=False)
    return path

p1 = export_topk(df, 'final_norm',     f"top{TOPK}_baseline.csv")
p2 = export_topk(df, 's_gnn_norm',     f"top{TOPK}_gnn.csv")
p3 = export_topk(df, 'score_ensemble', f"top{TOPK}_ensemble.csv")
print("Exporte:", p1, p2, p3)

df.sort_values(['seed','final_norm'], ascending=[True, False]).to_csv(f"{OUTPUT_DIR}/baseline_rerank.csv", index=False)
if 's_gnn_norm' in df.columns:
    df.sort_values(['seed','s_gnn_norm'], ascending=[True, False]).to_csv(f"{OUTPUT_DIR}/gnn_rerank.csv", index=False)
df.sort_values(['seed','score_ensemble'], ascending=[True, False]).to_csv(f"{OUTPUT_DIR}/ensemble_rerank.csv", index=False)
print("Fertige Exporte im Ordner:", OUTPUT_DIR)

Baseline(final_norm) — Hit@10: 0.031 | NDCG@10: 0.023 | Recall@10: 0.031 (Seeds: 130)
     GNN(s_gnn_norm) — Hit@10: 0.031 | NDCG@10: 0.031 | Recall@10: 0.031 (Seeds: 130)
            Ensemble — Hit@10: 0.031 | NDCG@10: 0.023 | Recall@10: 0.031 (Seeds: 130)
Exporte: ../data/kg/outputs/top10_baseline.csv ../data/kg/outputs/top10_gnn.csv ../data/kg/outputs/top10_ensemble.csv
Fertige Exporte im Ordner: ../data/kg/outputs


## 8) Qualitative Fallstudie (2 Seeds)

In [52]:
seeds_with_pos = []
for seed, g in df.groupby('seed'):
    if (g['candidate_title'].isin(test_pos_set)).any():
        seeds_with_pos.append(seed)
sample_seeds = seeds_with_pos[:2] if len(seeds_with_pos)>=2 else df['seed'].unique()[:2]
for s in sample_seeds:
    print("\n=== Seed:", s, "===")
    g = df[df['seed'] == s].copy()
    for name, col in [('Baseline', 'final_norm'), ('GNN', 's_gnn_norm'), ('Ensemble','score_ensemble')]:
        if col in g.columns:
            top = g.sort_values(col, ascending=False).head(5)[['candidate_title', col]].copy()
            top['is_test_rel'] = top['candidate_title'].isin(test_pos_set)
            print(f"{name} Top-5:")
            print(top.to_string(index=False))


=== Seed: Godzilla ===
Baseline Top-5:
          candidate_title  final_norm  is_test_rel
The War of the Gargantuas         0.0         True
GNN Top-5:
          candidate_title  s_gnn_norm  is_test_rel
The War of the Gargantuas         0.0         True
Ensemble Top-5:
          candidate_title  score_ensemble  is_test_rel
The War of the Gargantuas             0.0         True

=== Seed: Good Time ===
Baseline Top-5:
 candidate_title  final_norm  is_test_rel
We Own the Night    1.000000        False
         Chopper    0.133333        False
           Bound    0.000000         True
GNN Top-5:
 candidate_title  s_gnn_norm  is_test_rel
           Bound         0.0         True
We Own the Night         0.0        False
         Chopper         0.0        False
Ensemble Top-5:
 candidate_title  score_ensemble  is_test_rel
We Own the Night        0.400000        False
         Chopper        0.053333        False
           Bound        0.000000         True
