In [None]:
# --- Core imports ---
import os
import random

import torch
from diffusers import StableDiffusion3Pipeline
from PIL import Image
import pandas as pd
import matplotlib.pyplot as plt

# Make plots inline
%matplotlib inline

# --- Paths ---
OUT_ROOT = "/content/drive/MyDrive/thesis2/classifier_dataset"
OUT_NEUTRAL_DIR = os.path.join(OUT_ROOT, "neutral")
OUT_SMILING_DIR = os.path.join(OUT_ROOT, "smiling")
METADATA_PATH = os.path.join(OUT_ROOT, "metadata.csv")

os.makedirs(OUT_NEUTRAL_DIR, exist_ok=True)
os.makedirs(OUT_SMILING_DIR, exist_ok=True)

# --- Generation settings ---
HEIGHT = 768
WIDTH = 768
NUM_STEPS = 18
GUIDANCE_SCALE = 7.0
BASE_SEED = 12345   # deterministic base

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

torch.backends.cudnn.benchmark = True  # small speed-up when size is fixed


In [None]:
from huggingface_hub import login
from diffusers import StableDiffusion3Pipeline
import torch



model_id = "stabilityai/stable-diffusion-3.5-medium"

pipe = StableDiffusion3Pipeline.from_pretrained(
    model_id,
    torch_dtype=torch.float16,
    token=HF_TOKEN,
)

pipe = pipe.to("cuda")
pipe.enable_attention_slicing()

print("Loaded SD 3.5 on:", torch.cuda.get_device_name(0))


In [None]:
# Base face description
BASE_FACE = (
    "a photorealistic portrait of a human face, studio lighting, high resolution, "
    "natural skin texture, realistic anatomy, professional photography, "
    "symmetric face, looking forward"
)

PROMPTS = {
    "neutral": [
        BASE_FACE + ", neutral expression, relaxed mouth, closed lips, no smile",
        BASE_FACE + ", neutral expression, calm face, closed lips, no visible teeth",
    ],
    "smiling": [
        BASE_FACE + ", soft smile, subtle mouth curve, no teeth, gentle expression",
        BASE_FACE + ", smiling, visible teeth, joyful expression, warm smile",
        BASE_FACE + ", medium smile, natural expression, slight teeth showing",
    ],
}

# images per label
PROMPT_COUNTS = {
    "neutral": 100,
    "smiling": 100,
}

PROMPT_VARIANT_COUNTS = {k: len(v) for k, v in PROMPTS.items()}
PROMPT_VARIANT_COUNTS


In [None]:
def make_generator(seed: int, device: torch.device):
    """
    Create a deterministic torch.Generator on CUDA with a given seed.
    """
    g = torch.Generator(device=device)
    g.manual_seed(seed)
    return g


In [None]:
def generate_classifier_dataset(
    pipe,
    out_root,
    prompts_dict,
    per_label_counts,
    height=768,
    width=768,
    num_steps=18,
    guidance_scale=7.0,
    base_seed=12345,
    device=torch.device("cuda"),
):
    """
    Generate a synthetic face dataset for smile vs neutral.

    - Uses ONE Stable Diffusion pipeline instance (no reloading).
    - Saves images under:
        out_root/neutral/
        out_root/smiling/
    - Returns a DataFrame with metadata for all samples.
    """
    metadata_rows = []
    global_index = 0  # used to derive seeds as base_seed + global_index

    for label, prompts in prompts_dict.items():
        n_images = per_label_counts[label]
        n_variants = len(prompts)

        label_dir = os.path.join(out_root, label)
        os.makedirs(label_dir, exist_ok=True)

        print(f"\nGenerating {n_images} images for label='{label}' with {n_variants} prompt variants...")
        for i in range(n_images):
            variant_idx = i % n_variants         # 0-based
            prompt_variant_idx = variant_idx + 1 # 1-based for filenames
            prompt = prompts[variant_idx]

            seed = base_seed + global_index
            global_index += 1

            generator = make_generator(seed, device)

            # --- SD 3.5 generation ---
            out = pipe(
                prompt,
                height=height,
                width=width,
                num_inference_steps=num_steps,
                guidance_scale=guidance_scale,
                generator=generator,
            )
            image = out.images[0]

            # Filename encodes label, index and prompt variant index
            fname = f"{label}_{i:04d}_p{prompt_variant_idx}.png"
            fpath = os.path.join(label_dir, fname)
            image.save(fpath)

            metadata_rows.append(
                {
                    "filepath": fpath,
                    "label": label,
                    "prompt": prompt,
                    "prompt_variant_idx": prompt_variant_idx,
                    "seed": seed,
                    "height": height,
                    "width": width,
                    "num_steps": num_steps,
                    "guidance_scale": guidance_scale,
                }
            )

            # light progress logging
            if (i + 1) % 10 == 0 or (i + 1) == n_images:
                print(f"  [{label}] {i+1}/{n_images} images done...")

    df = pd.DataFrame(metadata_rows)
    return df


In [None]:
df = generate_classifier_dataset(
    pipe=pipe,
    out_root=OUT_ROOT,
    prompts_dict=PROMPTS,
    per_label_counts=PROMPT_COUNTS,
    height=HEIGHT,
    width=WIDTH,
    num_steps=NUM_STEPS,
    guidance_scale=GUIDANCE_SCALE,
    base_seed=BASE_SEED,
    device=device,
)

df.to_csv(METADATA_PATH, index=False)
print("\n Saved metadata to:", METADATA_PATH)

df.head()


In [None]:
def show_samples(df, label, n=4):
    """Display n random samples for a given label."""
    subset = df[df["label"] == label]
    n = min(n, len(subset))
    sample = subset.sample(n, random_state=42)

    fig, axes = plt.subplots(1, n, figsize=(4 * n, 4))
    if n == 1:
        axes = [axes]

    for ax, (_, row) in zip(axes, sample.iterrows()):
        img = Image.open(row["filepath"]).convert("RGB")
        ax.imshow(img)
        ax.set_title(f"{label}\n{os.path.basename(row['filepath'])}")
        ax.axis("off")

    plt.tight_layout()
    plt.show()

print("Neutral examples:")
show_samples(df, "neutral", n=10)

print("Smiling examples:")
show_samples(df, "smiling", n=10)
