In [None]:
!pip install -q -U diffusers transformers huggingface_hub accelerate sentencepiece
!pip install -q lpips scikit-image pandas matplotlib seaborn


In [None]:
from google.colab import drive
import os

drive.mount("/content/drive")

ROOT = "/content/drive/MyDrive/thesis2"

PHASE3_ROOT = f"{ROOT}/phase3_clip_faces"
IMG_ROOT = f"{PHASE3_ROOT}/images"
LOG_ROOT = f"{PHASE3_ROOT}/logs"

os.makedirs(IMG_ROOT, exist_ok=True)
os.makedirs(LOG_ROOT, exist_ok=True)

print("Phase 3 root:", PHASE3_ROOT)
print("Images:", IMG_ROOT)
print("Logs:", LOG_ROOT)


In [None]:
from huggingface_hub import login
from diffusers import StableDiffusion3Pipeline
import torch



model_id = "stabilityai/stable-diffusion-3.5-medium"

pipe = StableDiffusion3Pipeline.from_pretrained(
    model_id,
    torch_dtype=torch.float16,
    token=HF_TOKEN,
)

pipe = pipe.to("cuda")
pipe.enable_attention_slicing()

print("Loaded SD 3.5 on:", torch.cuda.get_device_name(0))


In [None]:
from transformers import CLIPProcessor, CLIPModel


clip_device = "cuda" if torch.cuda.is_available() else "cpu"

clip_model_name = "openai/clip-vit-large-patch14"
clip_model = CLIPModel.from_pretrained(clip_model_name).to(clip_device)
clip_processor = CLIPProcessor.from_pretrained(clip_model_name)

clip_model.eval()
print("CLIP device:", clip_device)


In [None]:
import numpy as np
import pandas as pd
from PIL import Image
from skimage.metrics import structural_similarity as ssim
import lpips

lpips_device = "cuda" if torch.cuda.is_available() else "cpu"
lpips_model = lpips.LPIPS(net='vgg').to(lpips_device)
lpips_model.eval()

def load_image(path, size=None):
    img = Image.open(path).convert("RGB")
    if size is not None:
        img = img.resize(size, Image.BICUBIC)
    return img

def img_to_numpy(img):
    return np.asarray(img).astype(np.float32) / 255.0

def img_to_lpips_tensor(img):
    arr = np.asarray(img).astype(np.float32) / 255.0
    arr = (arr * 2.0) - 1.0           # [-1, 1]
    arr = torch.from_numpy(arr).permute(2, 0, 1).unsqueeze(0)
    return arr.to(lpips_device)


In [None]:
import torch.nn.functional as F

ANCHORS = [
    "neutral face portrait",
    "smiling face portrait",
    "serious face portrait",
    "face looking confident portrait",
    "face looking surprised portrait",
]

def get_clip_image_embeddings(images_pil):
    """Return [N, D] tensor of normalized CLIP image embeddings."""
    batch_size = 4
    feats = []

    with torch.no_grad():
        for i in range(0, len(images_pil), batch_size):
            batch = images_pil[i:i+batch_size]
            inputs = clip_processor(
                images=batch,
                return_tensors="pt",
            ).to(clip_device)

            out = clip_model.get_image_features(**inputs)
            out = F.normalize(out, p=2, dim=-1)     # L2-normalize
            feats.append(out)

    return torch.cat(feats, dim=0)   # [N, D]

def get_clip_text_anchor_embeddings(anchor_texts=ANCHORS):
    with torch.no_grad():
        inputs = clip_processor(
            text=anchor_texts,
            padding=True,
            return_tensors="pt",
        ).to(clip_device)

        text_feats = clip_model.get_text_features(**inputs)
        text_feats = F.normalize(text_feats, p=2, dim=-1)   # [A, D]
    return text_feats


In [None]:
anchor_embs = get_clip_text_anchor_embeddings(ANCHORS)
print("Anchor CLIP embedding shape:", anchor_embs.shape)


In [None]:
# === Prompts for Phase 3: neutral / smile ===
BASE_FACE_PROMPT = (
    "a photorealistic portrait of a human face, studio lighting, "
    "high resolution, natural skin texture, realistic anatomy, "
    "professional photography, symmetric face, looking forward"
)

CLIP_PROMPTS = {
    "neutral": BASE_FACE_PROMPT + ", neutral expression, no smile, relaxed mouth, closed lips",
    "smiling": BASE_FACE_PROMPT + ", smiling, visible teeth, joyful expression, warm smile",
}

N_CLIP_VARIANTS = 30
BASE_SEED = 123
HEIGHT = 768
WIDTH = 768
NUM_STEPS = 30
GUIDANCE_SCALE = 7.0   # fixed for this phase


In [None]:
import csv
from datetime import datetime

def get_clip_paths(condition: str):
    cond_img_dir = os.path.join(IMG_ROOT, condition)
    cond_log_path = os.path.join(LOG_ROOT, f"clip_diversity_{condition}.csv")
    os.makedirs(cond_img_dir, exist_ok=True)
    return cond_img_dir, cond_log_path

def generate_clip_diversity_images(
    condition: str,
    n_variants: int = N_CLIP_VARIANTS,
    base_seed: int = BASE_SEED,
    guidance_scale: float = GUIDANCE_SCALE,
    num_inference_steps: int = NUM_STEPS,
    height: int = HEIGHT,
    width: int = WIDTH,
):
    assert condition in CLIP_PROMPTS
    prompt = CLIP_PROMPTS[condition]
    cond_img_dir, cond_log_path = get_clip_paths(condition)

    # init CSV
    if not os.path.exists(cond_log_path):
        with open(cond_log_path, "w", newline="") as f:
            writer = csv.writer(f)
            writer.writerow([
                "timestamp",
                "condition",
                "prompt",
                "seed",
                "guidance_scale",
                "num_inference_steps",
                "height",
                "width",
                "image_path",
            ])

    for i in range(n_variants):
        seed = base_seed + i
        gen = torch.Generator(device="cuda").manual_seed(seed)

        print(f"[{condition}] {i+1}/{n_variants} (seed={seed})")
        image = pipe(
            prompt,
            num_inference_steps=num_inference_steps,
            guidance_scale=guidance_scale,
            height=height,
            width=width,
            generator=gen,
        ).images[0]

        fname = f"{condition}_clip_seed{seed}.png"
        save_path = os.path.join(cond_img_dir, fname)
        image.save(save_path)

        with open(cond_log_path, "a", newline="") as f:
            writer = csv.writer(f)
            writer.writerow([
                datetime.now().isoformat(timespec="seconds"),
                condition,
                prompt,
                seed,
                guidance_scale,
                num_inference_steps,
                height,
                width,
                save_path,
            ])

        print(" saved:", save_path)

    print(f"✨ Done for condition='{condition}'. Log:", cond_log_path)


In [None]:
generate_clip_diversity_images("neutral")
generate_clip_diversity_images("smiling")


In [None]:
def compute_clip_diversity_for_condition(condition: str):
    cond_img_dir, cond_log_path = get_clip_paths(condition)
    print(f"\n=== CLIP diversity for condition: {condition} ===")
    print("Log:", cond_log_path)

    df = pd.read_csv(cond_log_path)
    df_cond = df[df["condition"] == condition].reset_index(drop=True)
    print("Rows:", len(df_cond))
    display(df_cond[["seed", "image_path"]])

    # ---- Load images ----
    images_pil = []
    images_np = []
    images_lpips_t = []
    target_size = None

    for _, row in df_cond.iterrows():
        path = row["image_path"]
        if not os.path.exists(path):
            print(" missing:", path)
            continue

        img = Image.open(path).convert("RGB")
        if target_size is None:
            target_size = img.size

        img = img.resize(target_size, Image.BICUBIC)
        images_pil.append(img)
        images_np.append(img_to_numpy(img))
        images_lpips_t.append(img_to_lpips_tensor(img))

    print("Loaded", len(images_pil), "images of size:", target_size)

    # ---- CLIP image embeddings ----
    image_embs = get_clip_image_embeddings(images_pil)   # [N, D]

    # ---- CLIP anchor sims per image ----
    # image_embs: [N,D], anchor_embs: [A,D] -> sims [N,A]
    with torch.no_grad():
        sims_img_anchor = image_embs @ anchor_embs.T     # cosine similarity

    # Save image-anchor sims
    anchor_cols = [f"sim_anchor_{i}_{txt}" for i, txt in enumerate(ANCHORS)]
    anchor_cols_sanitized = [
        c.replace(" ", "_").replace(",", "").replace("/", "_") for c in anchor_cols
    ]

    img_anchor_rows = []
    for idx, row in df_cond.iterrows():
        seed = int(row["seed"])
        path = row["image_path"]
        sims = sims_img_anchor[idx].cpu().numpy().tolist()
        row_dict = {
            "condition": condition,
            "seed": seed,
            "image_path": path,
        }
        for name, val in zip(anchor_cols_sanitized, sims):
            row_dict[name] = val
        img_anchor_rows.append(row_dict)

    img_anchor_df = pd.DataFrame(img_anchor_rows)
    img_anchor_path = os.path.join(LOG_ROOT, f"clip_diversity_image_anchors_{condition}.csv")
    img_anchor_df.to_csv(img_anchor_path, index=False)
    print("Saved image-anchor sims to:", img_anchor_path)

    # ---- Pairwise metrics ----
    results = []
    N = len(images_np)

    for i in range(N):
        for j in range(i+1, N):
            seed_i = int(df_cond.loc[i, "seed"])
            seed_j = int(df_cond.loc[j, "seed"])
            path_i = df_cond.loc[i, "image_path"]
            path_j = df_cond.loc[j, "image_path"]

            arr_i = images_np[i]
            arr_j = images_np[j]

            mse_val = float(np.mean((arr_i - arr_j)**2))
            ssim_val = float(ssim(arr_i, arr_j, channel_axis=-1, data_range=1.0))

            with torch.no_grad():
                lpips_val = float(lpips_model(images_lpips_t[i], images_lpips_t[j]).item())

            # CLIP cosine similarity / distance
            v_i = image_embs[i]
            v_j = image_embs[j]
            clip_sim = float((v_i * v_j).sum().item())  # already normalized
            clip_dist = 1.0 - clip_sim

            results.append({
                "condition": condition,
                "seed_i": seed_i,
                "seed_j": seed_j,
                "image_i": path_i,
                "image_j": path_j,
                "MSE": mse_val,
                "SSIM": ssim_val,
                "LPIPS": lpips_val,
                "CLIP_cosine_sim": clip_sim,
                "CLIP_cosine_dist": clip_dist,
            })

    pairwise_df = pd.DataFrame(results)
    pairwise_path = os.path.join(LOG_ROOT, f"clip_diversity_pairwise_{condition}.csv")
    pairwise_df.to_csv(pairwise_path, index=False)
    print("Saved pairwise metrics to:", pairwise_path)

    # ---- Summary ----
    summary = {
        "condition": condition,
        "N_pairs": len(pairwise_df),
        "LPIPS_mean": pairwise_df["LPIPS"].mean(),
        "LPIPS_std":  pairwise_df["LPIPS"].std(),
        "SSIM_mean":  pairwise_df["SSIM"].mean(),
        "SSIM_std":   pairwise_df["SSIM"].std(),
        "MSE_mean":   pairwise_df["MSE"].mean(),
        "MSE_std":    pairwise_df["MSE"].std(),
        "CLIP_dist_mean": pairwise_df["CLIP_cosine_dist"].mean(),
        "CLIP_dist_std":  pairwise_df["CLIP_cosine_dist"].std(),
        "CLIP_sim_mean":  pairwise_df["CLIP_cosine_sim"].mean(),
        "CLIP_sim_std":   pairwise_df["CLIP_cosine_sim"].std(),
    }
    summary_df = pd.DataFrame([summary])

    # Append to global summary
    summary_path = os.path.join(LOG_ROOT, "clip_diversity_summary.csv")
    if os.path.exists(summary_path):
        old = pd.read_csv(summary_path)
        summary_all = pd.concat([old, summary_df], ignore_index=True)
    else:
        summary_all = summary_df

    summary_all.to_csv(summary_path, index=False)
    print(" Updated summary at:", summary_path)
    display(summary_df)

    return pairwise_df, summary_df


In [None]:
pairwise_neutral_clip, summary_neutral_clip = compute_clip_diversity_for_condition("neutral")
pairwise_smiling_clip, summary_smiling_clip = compute_clip_diversity_for_condition("smiling")


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Tag condition in each DF
pairwise_neutral_clip["condition"] = "neutral"
pairwise_smiling_clip["condition"] = "smiling"
pairwise_all = pd.concat([pairwise_neutral_clip, pairwise_smiling_clip], ignore_index=True)

# 1) Histograms of CLIP distances per condition
plt.figure(figsize=(10,4))
sns.histplot(data=pairwise_all, x="CLIP_cosine_dist", hue="condition", kde=True, bins=10)
plt.title("CLIP cosine distance distribution (neutral vs smiling)")
plt.xlabel("CLIP cosine distance (1 - cosine similarity)")
plt.show()

# 2) Boxplots for LPIPS / SSIM / CLIP distance
plt.figure(figsize=(12,4))

plt.subplot(1,3,1)
sns.boxplot(data=pairwise_all, x="condition", y="LPIPS")
plt.title("LPIPS by condition")

plt.subplot(1,3,2)
sns.boxplot(data=pairwise_all, x="condition", y="SSIM")
plt.title("SSIM by condition")

plt.subplot(1,3,3)
sns.boxplot(data=pairwise_all, x="condition", y="CLIP_cosine_dist")
plt.title("CLIP distance by condition")

plt.tight_layout()
plt.show()

# 3) Scatter LPIPS vs CLIP distance
plt.figure(figsize=(6,5))
sns.scatterplot(data=pairwise_all,
                x="CLIP_cosine_dist",
                y="LPIPS",
                hue="condition")
plt.xlabel("CLIP cosine distance")
plt.ylabel("LPIPS")
plt.title("Perceptual vs semantic distance")
plt.show()


In [None]:


import os
from datetime import datetime
import csv

import numpy as np
import pandas as pd
from PIL import Image

import torch
import torch.nn.functional as F

from skimage.metrics import structural_similarity as ssim
import lpips
import matplotlib.pyplot as plt
import seaborn as sns

from transformers import CLIPProcessor, CLIPModel


In [None]:

ROOT = "/content/drive/MyDrive/thesis2"

FACES_ROOT = os.path.join(ROOT, "faces_expression_intensity")
IMG_ROOT   = os.path.join(FACES_ROOT, "images")
LOG_ROOT   = os.path.join(FACES_ROOT, "logs")

os.makedirs(IMG_ROOT, exist_ok=True)
os.makedirs(LOG_ROOT, exist_ok=True)

print("FACES_ROOT:", FACES_ROOT)
print("IMG_ROOT:", IMG_ROOT)
print("LOG_ROOT:", LOG_ROOT)

# --- Conditions & prompts ---

CONDITIONS = ["neutral", "soft_smile", "big_smile"]

PROMPTS = {
    "neutral":   "portrait photo of a person, neutral facial expression, studio lighting",
    "soft_smile": "portrait photo of a person, subtle soft smile, studio lighting",
    "big_smile":  "portrait photo of a person, big bright smile, teeth visible, studio lighting",
}

# Seeds we want to use
SEEDS = [123, 124, 125, 126, 127]

# SD generation hyperparameters
HEIGHT = 768
WIDTH = 768
NUM_STEPS = 18
GUIDANCE_SCALE = 5.0  # fixed here


In [None]:

# --- CLIP model (text + image encoder) ---

try:
    clip_model
    clip_processor
    print("Reusing existing CLIP model.")
except NameError:
    clip_device = "cuda" if torch.cuda.is_available() else "cpu"
    clip_model_name = "openai/clip-vit-large-patch14"

    clip_model = CLIPModel.from_pretrained(clip_model_name).to(clip_device)
    clip_processor = CLIPProcessor.from_pretrained(clip_model_name)
    clip_model.eval()
    print("Loaded CLIP model on:", clip_device)

# --- LPIPS model ---

try:
    lpips_model
    print("Reusing existing LPIPS model.")
except NameError:
    lpips_device = "cuda" if torch.cuda.is_available() else "cpu"
    lpips_model = lpips.LPIPS(net='vgg').to(lpips_device)
    lpips_model.eval()
    print("Loaded LPIPS model on:", lpips_device)


In [None]:
# ---------- Basic image helpers ----------

def load_image(path, size=None):
    img = Image.open(path).convert("RGB")
    if size is not None:
        img = img.resize(size, Image.BICUBIC)
    return img

def img_to_numpy(img):
    return np.asarray(img).astype(np.float32) / 255.0

# Make sure lpips_device exists
try:
    lpips_device
except NameError:
    lpips_device = next(lpips_model.parameters()).device

def img_to_lpips_tensor(img):
    arr = np.asarray(img).astype(np.float32) / 255.0
    arr = (arr * 2.0) - 1.0       # [-1, 1]
    arr = torch.from_numpy(arr).permute(2, 0, 1).unsqueeze(0)
    return arr.to(lpips_device)
# ---------- CLIP helpers ----------

def get_clip_image_embeddings(images_pil):
    """Return [N, D] tensor of normalized CLIP image embeddings."""
    batch_size = 4
    feats = []
    with torch.no_grad():
        for i in range(0, len(images_pil), batch_size):
            batch = images_pil[i:i+batch_size]
            inputs = clip_processor(images=batch, return_tensors="pt").to(clip_model.device)
            out = clip_model.get_image_features(**inputs)
            out = F.normalize(out, p=2, dim=-1)
            feats.append(out)
    return torch.cat(feats, dim=0)   # [N, D]

def get_clip_text_embedding(text: str):
    """Return [D] normalized CLIP text embedding for a single string."""
    with torch.no_grad():
        inputs = clip_processor(text=[text], return_tensors="pt", padding=True).to(clip_model.device)
        text_feats = clip_model.get_text_features(**inputs)
        text_feats = F.normalize(text_feats, p=2, dim=-1)
    return text_feats[0]  # [D]

# Optional: anchor phrases
ANCHOR_TEXTS = [
    "neutral face portrait",
    "smiling face portrait",
    "serious face portrait",
    "face looking confident",
    "face looking surprised",
]

def get_clip_anchor_embeddings(anchor_texts=ANCHOR_TEXTS):
    with torch.no_grad():
        inputs = clip_processor(text=anchor_texts, return_tensors="pt", padding=True).to(clip_model.device)
        feats = clip_model.get_text_features(**inputs)
        feats = F.normalize(feats, p=2, dim=-1)
    return feats   # [A, D]

anchor_embs = get_clip_anchor_embeddings(ANCHOR_TEXTS)
print("Anchor embeddings shape:", anchor_embs.shape)


In [None]:
def get_condition_paths(condition: str):
    img_dir = os.path.join(IMG_ROOT, condition)
    os.makedirs(img_dir, exist_ok=True)
    return img_dir

def expected_filenames_for_condition(condition: str, seeds):
    return [f"{condition}_seed{seed}.png" for seed in seeds]

def images_exist_for_condition(condition: str, seeds):
    img_dir = get_condition_paths(condition)
    expected = expected_filenames_for_condition(condition, seeds)
    return all(os.path.exists(os.path.join(img_dir, fn)) for fn in expected)

def generate_faces_for_condition(condition: str,
                                prompt: str,
                                seeds,
                                pipe,
                                height=HEIGHT,
                                width=WIDTH,
                                num_inference_steps=NUM_STEPS,
                                guidance_scale=GUIDANCE_SCALE):
    """
    Generate one image per seed for a given condition and prompt.
    Skips generation if all expected files are already present.
    """
    img_dir = get_condition_paths(condition)
    expected_files = expected_filenames_for_condition(condition, seeds)

    # Check if everything already exists
    if all(os.path.exists(os.path.join(img_dir, fn)) for fn in expected_files):
        print(f"[{condition}] All {len(seeds)} images already exist. Skipping generation.")
        return img_dir

    for seed in seeds:
        fname = f"{condition}_seed{seed}.png"
        out_path = os.path.join(img_dir, fname)
        if os.path.exists(out_path):
            print(f"[{condition}] Seed {seed}: already exists, skipping.")
            continue

        print(f"[{condition}] Generating seed {seed}...")
        gen = torch.Generator(device="cuda").manual_seed(seed)

        image = pipe(
            prompt,
            num_inference_steps=num_inference_steps,
            guidance_scale=guidance_scale,
            height=height,
            width=width,
            generator=gen,
        ).images[0]

        image.save(out_path)
        print(f"   saved {out_path}")

    return img_dir


In [None]:
def compute_seed_diversity_for_condition(condition: str,
                                         img_dir: str,
                                         log_root: str = LOG_ROOT):
    """
    - Loads all images for this condition.
    - Computes pairwise LPIPS, SSIM, MSE.
    - Saves:
      - seed_diversity_pairwise_faces_<condition>.csv
      - seed_diversity_summary_faces_<condition>.csv
    """
    files = sorted([f for f in os.listdir(img_dir) if f.endswith(".png")])
    if len(files) < 2:
        print(f"[{condition}] Not enough images for pairwise metrics.")
        return None, None

    paths = [os.path.join(img_dir, f) for f in files]
    seeds = [int(f.split("seed")[-1].split(".")[0]) for f in files]

    # Load images
    images_np = []
    images_lpips_t = []
    target_size = None

    for path in paths:
        img = Image.open(path).convert("RGB")
        if target_size is None:
            target_size = img.size
        img = img.resize(target_size, Image.BICUBIC)
        images_np.append(img_to_numpy(img))
        images_lpips_t.append(img_to_lpips_tensor(img))

    print(f"[{condition}] Loaded {len(images_np)} images of size {target_size}")

    # Pairwise metrics
    results = []
    n = len(images_np)

    for i in range(n):
        for j in range(i+1, n):
            seed_i, seed_j = seeds[i], seeds[j]
            path_i, path_j = paths[i], paths[j]

            arr_i, arr_j = images_np[i], images_np[j]

            mse_val = float(np.mean((arr_i - arr_j)**2))
            ssim_val = float(ssim(arr_i, arr_j, channel_axis=-1, data_range=1.0))

            with torch.no_grad():
                lpips_val = float(lpips_model(images_lpips_t[i], images_lpips_t[j]).item())

            results.append({
                "condition": condition,
                "seed_i": seed_i,
                "seed_j": seed_j,
                "image_i": path_i,
                "image_j": path_j,
                "MSE": mse_val,
                "SSIM": ssim_val,
                "LPIPS": lpips_val,
            })

    pairwise_df = pd.DataFrame(results)
    pairwise_path = os.path.join(log_root, f"seed_diversity_pairwise_faces_{condition}.csv")
    pairwise_df.to_csv(pairwise_path, index=False)
    print(f"[{condition}] ✅ Saved pairwise seed diversity to:", pairwise_path)

    # Summary
    summary = {
        "condition": condition,
        "N_pairs": len(pairwise_df),
        "LPIPS_mean": pairwise_df["LPIPS"].mean(),
        "LPIPS_std":  pairwise_df["LPIPS"].std(),
        "SSIM_mean":  pairwise_df["SSIM"].mean(),
        "SSIM_std":   pairwise_df["SSIM"].std(),
        "MSE_mean":   pairwise_df["MSE"].mean(),
        "MSE_std":    pairwise_df["MSE"].std(),
    }
    summary_df = pd.DataFrame([summary])
    summary_path = os.path.join(log_root, f"seed_diversity_summary_faces_{condition}.csv")
    summary_df.to_csv(summary_path, index=False)
    print(f"[{condition}] ✅ Saved seed diversity summary to:", summary_path)

    return pairwise_df, summary_df


In [None]:
def compute_clip_diversity_for_condition(condition: str,
                                         prompt: str,
                                         img_dir: str,
                                         log_root: str = LOG_ROOT):
    """
    - Loads images for this condition.
    - Computes CLIP:
        - text–image similarity (prompt vs each image)
        - image–image cosine sims / distances
        - optional anchor similarities
    - Also attaches LPIPS + SSIM for pairwise.
    - Saves:
        - clip_diversity_faces_<condition>.csv           (per-image)
        - clip_diversity_pairwise_faces_<condition>.csv  (per-pair)
    """
    files = sorted([f for f in os.listdir(img_dir) if f.endswith(".png")])
    if len(files) < 2:
        print(f"[{condition}] Not enough images for CLIP pairwise metrics.")
        return None, None

    paths = [os.path.join(img_dir, f) for f in files]
    seeds = [int(f.split("seed")[-1].split(".")[0]) for f in files]

    # Load images
    images_pil = []
    images_np = []
    images_lpips_t = []
    target_size = None

    for path in paths:
        img = Image.open(path).convert("RGB")
        if target_size is None:
            target_size = img.size
        img = img.resize(target_size, Image.BICUBIC)
        images_pil.append(img)
        images_np.append(img_to_numpy(img))
        images_lpips_t.append(img_to_lpips_tensor(img))

    print(f"[{condition}] Loaded {len(images_pil)} images for CLIP metrics.")

    # Text embedding for the prompt
    text_emb = get_clip_text_embedding(prompt)  # [D]

    # Image embeddings
    img_embs = get_clip_image_embeddings(images_pil)  # [N, D]

    # --- Per-image text–image + anchor similarities ---

    with torch.no_grad():
        # text–image cosine similarity
        # img_embs: [N,D], text_emb: [D] -> [N]
        text_sims = (img_embs @ text_emb)  # already normalized, so dot = cosine

        # anchor sims: img_embs [N,D], anchor_embs [A,D] -> [N,A]
        anchor_sims = img_embs @ anchor_embs.T

    per_image_rows = []
    anchor_col_names = [
        f"sim_anchor_{i}_" + txt.replace(" ", "_").replace(",", "").replace("/", "_")
        for i, txt in enumerate(ANCHOR_TEXTS)
    ]

    for idx, seed in enumerate(seeds):
        row = {
            "condition": condition,
            "seed": seed,
            "image_path": paths[idx],
            "CLIP_text_image_sim": float(text_sims[idx].item()),
        }
        for name, val in zip(anchor_col_names, anchor_sims[idx].cpu().numpy().tolist()):
            row[name] = val
        per_image_rows.append(row)

    per_image_df = pd.DataFrame(per_image_rows)
    per_image_path = os.path.join(log_root, f"clip_diversity_faces_{condition}.csv")
    per_image_df.to_csv(per_image_path, index=False)
    print(f"[{condition}] ✅ Saved per-image CLIP diversity to:", per_image_path)

    # --- Pairwise image–image CLIP + LPIPS + SSIM ---

    pair_results = []
    N = len(images_np)

    for i in range(N):
        for j in range(i+1, N):
            seed_i, seed_j = seeds[i], seeds[j]
            path_i, path_j = paths[i], paths[j]

            arr_i, arr_j = images_np[i], images_np[j]

            mse_val = float(np.mean((arr_i - arr_j)**2))
            ssim_val = float(ssim(arr_i, arr_j, channel_axis=-1, data_range=1.0))

            with torch.no_grad():
                lpips_val = float(lpips_model(images_lpips_t[i], images_lpips_t[j]).item())

            v_i, v_j = img_embs[i], img_embs[j]
            clip_sim = float((v_i * v_j).sum().item())
            clip_dist = 1.0 - clip_sim

            pair_results.append({
                "condition": condition,
                "seed_i": seed_i,
                "seed_j": seed_j,
                "image_i": path_i,
                "image_j": path_j,
                "MSE": mse_val,
                "SSIM": ssim_val,
                "LPIPS": lpips_val,
                "CLIP_cosine_sim": clip_sim,
                "CLIP_cosine_dist": clip_dist,
            })

    pairwise_df = pd.DataFrame(pair_results)
    pairwise_path = os.path.join(log_root, f"clip_diversity_pairwise_faces_{condition}.csv")
    pairwise_df.to_csv(pairwise_path, index=False)
    print(f"[{condition}] ✅ Saved pairwise CLIP diversity to:", pairwise_path)

    return per_image_df, pairwise_df


In [None]:
all_pairwise_seed   = {}
all_summary_seed    = {}
all_clip_pairwise   = {}
all_clip_per_image  = {}

for cond in CONDITIONS:
    prompt = PROMPTS[cond]
    print("\n" + "="*60)
    print(f"CONDITION: {cond}")
    print("="*60)

    # 1) Generate (skip if exists)
    img_dir = generate_faces_for_condition(cond, prompt, SEEDS, pipe)

    # 2) Seed diversity metrics (LPIPS/SSIM/MSE)
    pair_df_seed, summary_df_seed = compute_seed_diversity_for_condition(cond, img_dir)
    all_pairwise_seed[cond] = pair_df_seed
    all_summary_seed[cond] = summary_df_seed

    # 3) CLIP diversity metrics (text–image + image–image)
    clip_per_image_df, clip_pairwise_df = compute_clip_diversity_for_condition(cond, prompt, img_dir)
    all_clip_per_image[cond] = clip_per_image_df
    all_clip_pairwise[cond] = clip_pairwise_df


In [None]:
def plot_face_diversity_summary(log_root: str = LOG_ROOT, conditions=CONDITIONS):
    # Load all pairwise CLIP files into one DF
    dfs = []
    for cond in conditions:
        path = os.path.join(log_root, f"clip_diversity_pairwise_faces_{cond}.csv")
        if os.path.exists(path):
            df = pd.read_csv(path)
            df["condition"] = cond
            dfs.append(df)
        else:
            print(f"Missing pairwise CLIP file for {cond}: {path}")
    if not dfs:
        print("No CLIP pairwise data to plot.")
        return

    pairwise_all = pd.concat(dfs, ignore_index=True)

    # 1) Histograms of CLIP distance
    plt.figure(figsize=(8,4))
    sns.histplot(data=pairwise_all, x="CLIP_cosine_dist", hue="condition", bins=15, kde=True, alpha=0.5)
    plt.title("CLIP cosine distance distribution by condition")
    plt.xlabel("CLIP cosine distance (1 - cosine similarity)")
    plt.tight_layout()
    plt.show()

    # 2) Boxplots: LPIPS, SSIM, CLIP distance
    plt.figure(figsize=(12,4))

    plt.subplot(1,3,1)
    sns.boxplot(data=pairwise_all, x="condition", y="LPIPS")
    plt.title("LPIPS by condition")

    plt.subplot(1,3,2)
    sns.boxplot(data=pairwise_all, x="condition", y="SSIM")
    plt.title("SSIM by condition")

    plt.subplot(1,3,3)
    sns.boxplot(data=pairwise_all, x="condition", y="CLIP_cosine_dist")
    plt.title("CLIP distance by condition")

    plt.tight_layout()
    plt.show()

    # 3) Scatter LPIPS vs CLIP distance
    plt.figure(figsize=(6,5))
    sns.scatterplot(data=pairwise_all, x="CLIP_cosine_dist", y="LPIPS", hue="condition")
    plt.xlabel("CLIP cosine distance")
    plt.ylabel("LPIPS")
    plt.title("Perceptual vs semantic distance")
    plt.tight_layout()
    plt.show()

plot_face_diversity_summary(LOG_ROOT, CONDITIONS)


In [None]:
def print_face_diversity_text_summary(log_root: str = LOG_ROOT, conditions=CONDITIONS):
    print("\n===== NUMERIC SUMMARY (mean ± std) =====\n")

    # We can reuse the seed_diversity_summary_faces_* and clip_diversity_pairwise_faces_*
    for cond in conditions:
        # Seed diversity summary
        seed_summary_path = os.path.join(log_root, f"seed_diversity_summary_faces_{cond}.csv")
        clip_pairwise_path = os.path.join(log_root, f"clip_diversity_pairwise_faces_{cond}.csv")

        if not os.path.exists(seed_summary_path) or not os.path.exists(clip_pairwise_path):
            print(f"[{cond}] Missing summary files.")
            continue

        seed_summary = pd.read_csv(seed_summary_path).iloc[0]
        clip_pairwise = pd.read_csv(clip_pairwise_path)

        lpips_mean, lpips_std = seed_summary["LPIPS_mean"], seed_summary["LPIPS_std"]
        ssim_mean,  ssim_std  = seed_summary["SSIM_mean"],  seed_summary["SSIM_std"]
        mse_mean,   mse_std   = seed_summary["MSE_mean"],   seed_summary["MSE_std"]

        clip_dist_mean = clip_pairwise["CLIP_cosine_dist"].mean()
        clip_dist_std  = clip_pairwise["CLIP_cosine_dist"].std()

        print(f"Condition: {cond}")
        print(f"  LPIPS: {lpips_mean:.4f} ± {lpips_std:.4f}")
        print(f"  SSIM:  {ssim_mean:.4f} ± {ssim_std:.4f}")
        print(f"  MSE:   {mse_mean:.6f} ± {mse_std:.6f}")
        print(f"  CLIP distance: {clip_dist_mean:.4f} ± {clip_dist_std:.4f}")
        print()

    print("# Interpretation guidelines :")
    print("# - Higher LPIPS & CLIP distance   → more perceptual & semantic diversity.")
    print("# - Lower SSIM & higher MSE        → more structural changes.")
    print("# - Compare neutral vs soft_smile vs big_smile to argue how increasing smile intensity")
    print("#   changes semantic diversity (CLIP) vs low-level/perceptual diversity (LPIPS/MSE).")

print_face_diversity_text_summary(LOG_ROOT, CONDITIONS)
