# Subject-Driven Generation Metrics Evaluation

##### We have a dataset of 30 distinct “subjects” (objects vs. live subjects/pets), split into:
##### - **Real images**: stored under `data/<subject_name>/…`
##### - **Generated images**: stored under `results/{non-ppl,ppl}/<subject_name>/…`

##### We also have:
##### - `data/subjects.csv` with columns:
#####     - `subject_name` (matches each folder name)
#####    - `class`        (e.g. “dog”, “backpack”, etc.)
#####     - `live`         (boolean: True for pets, False for objects)
##### - `data/prompts.csv` with columns:
#####     - `prompt` (templates containing `{0}` → “sks”, `{1}` → the `class` value)
#####     - `live`   (boolean: whether this prompt applies to live subjects or objects)


##### **Evaluation protocol**:
##### - We generated up to **2** samples per prompt in `ppl` and also **2** for `non-ppl`.
##### - Metrics:
#####     1. **DINO** (avg pairwise DINO similarity between real↔ same subjects gen)
#####     2. **PRES** (avg pairwise DINO similarity between real↔ other subjects gen)
#####     3. **CLIP-I** (avg cosine between CLIP image embeddings real↔gen)
#####     4. **CLIP-T** (avg cosine between CLIP text embeddings vs. gen images)
#####     5. **DIV**  (avg pairwise LPIPS distance among gen images)


In [None]:
import os
import pandas as pd
import numpy as np
from collections import defaultdict
from metrics import clip_embeddings, div, pres

In [None]:
subjects_df = pd.read_csv('../data/subjects.csv')
prompts_df  = pd.read_csv('../data/prompts.csv')

REAL_ROOT  = '../data'
GEN_ROOT  = '../results'
CONDITIONS = ['no_ppl', 'ppl']

#### Collecting DINO (between real and generated images)

In [None]:
dino_results = []
for cond in CONDITIONS:
    data_root = os.path.join(REAL_ROOT, "subjects")
    res_root = os.path.join(GEN_ROOT, cond)
    for _, row in subjects_df.iterrows():
        subject = row['subject_name']
        
        real_dir = os.path.join(data_root, subject)
        gen_dir  = os.path.join(res_root, subject)
        if not os.path.isdir(real_dir) or not os.path.isdir(gen_dir):
            continue 

        dino = pres.collect_pres(real_dir, gen_dir)

        dino_results.append({
                'condition': cond,
                'subject':   subject,
                'DINO':      dino,
        })

        print(f'{cond} {subject} {dino:.4f}')

dino_results_df = pd.DataFrame(dino_results)
dino_results_df.to_csv("../results/dino_results.csv", index=False)

In [None]:
mean_dino = (
    dino_results_df
    .groupby('condition')['DINO']
    .mean()
    .reset_index(name='mean_DINO')
)
mean_dino

#### Collecting PRES (between real and generated images)

In [None]:
sub = subjects_df.to_dict(orient='records')

counts = {}

for s in sub:
  c = s['class']
  counts[c] = counts.get(c, 0) + 1

mask = subjects_df['class'].map(counts) > 1

pres_subjects_df = subjects_df[mask]

In [None]:
results = []
for cond in CONDITIONS:
    data_root = os.path.join(REAL_ROOT, "subjects")
    res_root = os.path.join(GEN_ROOT, cond)
    for _, row in pres_subjects_df.iterrows():
        subject = row['subject_name']
        cls     = row['class']
        other_subjects = pres_subjects_df.loc[
            (pres_subjects_df['class'] == cls) &
            (pres_subjects_df['subject_name'] != subject),
            'subject_name'
        ].tolist()

        for x in other_subjects:
            real_dir = os.path.join(data_root, subject)
            gen_dir  = os.path.join(res_root, x)
            if not os.path.isdir(real_dir) or not os.path.isdir(gen_dir):
                continue 

            preservation   = pres.collect_pres(real_dir, gen_dir)

            results.append({
                'condition': cond,
                'subject':   subject,
                'PRES':      preservation,
            })

        print(f'{cond} {subject} {preservation:.4f}')

pres_results_df = pd.DataFrame(results)
pres_results_df.to_csv("../results/pres_results.csv", index=False)

In [None]:

mean_pres = (
    pres_results_df
    .groupby('condition')['PRES']
    .mean()
    .reset_index(name='mean_PRES')
)
mean_pres

#### Collecting CLIP-I, CLIP-T (between real and generated images)

In [None]:
prompt_dir = "../data/prompts.csv"
subject_dir = "../data/subjects.csv"

In [None]:
clip_results = []
for cond in CONDITIONS:
    data_root = os.path.join(REAL_ROOT, "subjects")
    res_root = os.path.join(GEN_ROOT, cond)
    for _, row in subjects_df.iterrows():
        subject = row['subject_name']

        real_dir = os.path.join(data_root, subject)
        gen_dir  = os.path.join(res_root, subject)
        if not os.path.isdir(real_dir) or not os.path.isdir(gen_dir):
            continue 

        clip_i, clip_t = clip_embeddings.collect_clip_metrics(real_dir, gen_dir, prompt_dir)
        print(f"clip_i: {clip_i}, clip_t: {clip_t}")

        clip_results.append({
            'condition': cond,
            'subject':   subject,
            'CLIP-I':    clip_i,
            'CLIP-T':    clip_t
        })

        print(f'{cond} {subject} {clip_i:.4f} {clip_t:.4f}')

clip_results_df = pd.DataFrame(clip_results)
clip_results_df.to_csv("../results/clip_results.csv", index=False)

In [None]:
mean_clip = (
    clip_results_df
    .groupby('condition', as_index=False)
    .agg(
        mean_CLIP_I = ('CLIP-I', 'mean'),
        mean_CLIP_T = ('CLIP-T', 'mean'),
    )
)

mean_clip

#### Collecting DIV (between generated images of the same class)

In [None]:
def split_prompts(gen_dir):
    files = [f for f in os.listdir(gen_dir) if f.endswith('.png')]
    groups = defaultdict(list)
    for fn in files:
        parts = fn.split('_')
        p_idx = parts[0]
        groups[p_idx].append(os.path.join(gen_dir, fn))

    return groups

In [None]:
def all_divs(groups):
  prompt_divs = []
  for p_idx, paths in groups.items():
      if len(paths) < 2:
          print(f" only {len(paths)} sample(s) for prompt {p_idx}, skipping")
          continue            
      div_value = div.collect_div(paths)
      prompt_divs.append(div_value)
      #print(f"{cond}/{subject} prompt {p_idx}: DIV = {div_value:.4f}")

  return prompt_divs


In [None]:
div_values = []
for cond in CONDITIONS:
    for _, row in subjects_df.iterrows():
        subject = row['subject_name']
        gen_dir = os.path.join(GEN_ROOT, cond, subject)
        if not os.path.isdir(gen_dir):
            print(f"Skipping {cond}/{subject}: directory not found")
            continue

        groups = split_prompts(gen_dir)    
        prompt_divs = all_divs(groups)

        if not prompt_divs:
            print(f"  No valid prompts for {cond}/{subject}, skipping")
            continue

        subject_div = float(np.mean(prompt_divs))
        div_values.append({
            'condition': cond,
            'subject':   subject,
            'DIV':       subject_div
        })


div_df = pd.DataFrame(div_values)
div_df.to_csv("../results/div_results.csv", index=False)

div_df

In [None]:
mean_div = (
    div_df
    .groupby('condition')['DIV']
    .mean()
    .reset_index(name='mean_DIV')
)
mean_div

# ALL RESULTS PRINTED

In [22]:
import pandas as pd

clip_results_df = pd.read_csv("../results/clip_results.csv")
dino_results_df = pd.read_csv("../results/dino_results.csv")
pres_results_df = pd.read_csv("../results/pres_results.csv")
div_results_df = pd.read_csv("../results/div_results.csv")

mean_dino = (
    dino_results_df
    .groupby('condition')['DINO']
    .mean()
    .reset_index(name='mean_DINO')
)

mean_pres = (
    pres_results_df
    .groupby('condition')['PRES']
    .mean()
    .reset_index(name='mean_PRES')
)

mean_clip = (
    clip_results_df
    .groupby('condition', as_index=False)
    .agg(
        mean_CLIP_I = ('CLIP-I', 'mean'),
        mean_CLIP_T = ('CLIP-T', 'mean'),
    )
)
mean_div = (
    div_results_df
    .groupby('condition')['DIV']
    .mean()
    .reset_index(name='mean_DIV')
)

mean_all = (
    mean_pres
    .merge(mean_div,  on='condition')
    .merge(mean_dino, on='condition')
    .merge(mean_clip,  on='condition')
)

mean_all.to_csv("../results/all_results.csv", index=False)