In [None]:
# CELL 1: Data Preparation
import os
import h5py
import csv
import numpy as np
from PIL import Image
from tqdm.auto import tqdm
import json
import shutil

# --- Configuration ---
FASHIONGEN_PATH = '/kaggle/input/fashiongen-validation/fashiongen_256_256_train.h5'
WORKING_DIR = "/kaggle/working"
TRAIN_ROOT = os.path.join(WORKING_DIR, "fashion_train")
TRAIN_IMAGES_DIR = os.path.join(TRAIN_ROOT, "images")
EVAL_ROOT = os.path.join(WORKING_DIR, "eval_data")
EVAL_GT_DIR = os.path.join(EVAL_ROOT, "gt")

# Create directories
if os.path.exists(TRAIN_ROOT): shutil.rmtree(TRAIN_ROOT)
if os.path.exists(EVAL_ROOT): shutil.rmtree(EVAL_ROOT)
os.makedirs(TRAIN_IMAGES_DIR, exist_ok=True)
os.makedirs(EVAL_GT_DIR, exist_ok=True)

# --- Load Data ---
print("üìÇ Opening dataset...")
h5_file = h5py.File(FASHIONGEN_PATH, 'r')
num_total = len(h5_file['input_image'])

# Define Split
TRAIN_SIZE = 30000
EVAL_SIZE = 3000
train_indices = range(0, TRAIN_SIZE)
eval_indices = range(TRAIN_SIZE, TRAIN_SIZE + EVAL_SIZE)

# --- 1. Export Training Data ---
train_metadata = []
print(f"üöÄ Exporting {TRAIN_SIZE} Training samples...")
for idx in tqdm(train_indices):
    img = Image.fromarray(h5_file['input_image'][idx])
    file_name = f"{idx:06d}.jpg"
    img.save(os.path.join(TRAIN_IMAGES_DIR, file_name), quality=95)
    
    desc = h5_file['input_description'][idx]
    if isinstance(desc, bytes): desc = desc.decode('utf-8', errors='ignore')
    prompt = str(desc).split(',')[0]
    
    # Path relative to TRAIN_ROOT
    train_metadata.append({"file_name": f"images/{file_name}", "text": prompt})

with open(os.path.join(TRAIN_ROOT, "metadata.csv"), 'w', newline='', encoding='utf-8') as f:
    writer = csv.DictWriter(f, fieldnames=["file_name", "text"])
    writer.writeheader()
    writer.writerows(train_metadata)

# --- 2. Export Evaluation Data ---
eval_configs = []
print(f"üöÄ Exporting {EVAL_SIZE} Evaluation samples...")
for idx in tqdm(eval_indices):
    img = Image.fromarray(h5_file['input_image'][idx])
    img.save(os.path.join(EVAL_GT_DIR, f"{idx:06d}.png"))
    
    desc = h5_file['input_description'][idx]
    if isinstance(desc, bytes): desc = desc.decode('utf-8', errors='ignore')
    prompt = str(desc).split(',')[0]
    
    eval_configs.append({"idx": idx, "prompt": prompt})

with open(os.path.join(EVAL_ROOT, "eval_configs.json"), 'w') as f:
    json.dump(eval_configs, f)

print("‚úÖ Data Prep Complete.")

In [None]:
# CELL 2: Clean Room Installation (Training Stack ONLY)

import os

# 1. Create a constraints file. 
# This tells pip: "Under no circumstances allow numpy to be >= 2.0"
with open("constraints.txt", "w") as f:
    f.write("numpy<2.0\n")
    f.write("huggingface_hub<0.25.0\n")

# 2. Uninstall potential conflict creators
print("üßπ Purging TensorFlow and existing NumPy...")
os.system("pip uninstall -y tensorflow tensorflow-cpu tensorflow-gpu protobuf numpy")

# 3. Install Training Stack with constraints
print("üì¶ Installing Training Stack...")
# We use a single command to ensure dependency resolution sees all packages at once
cmd = (
    "pip install "
    "numpy==1.26.4 "  # Explicitly force safe numpy
    "diffusers==0.26.3 "
    "transformers==4.38.2 "
    "accelerate==0.27.2 "
    "peft==0.9.0 "
    "bitsandbytes==0.41.3 "
    "ftfy "
    "tensorboard "
    "-c constraints.txt" # Apply strict constraints
)
os.system(cmd)

# 4. Download Training Script
print("üìú Downloading Script...")
os.system("wget -q https://raw.githubusercontent.com/huggingface/diffusers/v0.26.3/examples/text_to_image/train_text_to_image_lora.py")

import numpy
print(f"‚úÖ Install Complete. NumPy Version: {numpy.__version__}")
if numpy.__version__.startswith("2"):
    raise RuntimeError("STOP! NumPy 2.0 is still present. Restart the kernel.")

# CELL 2 (PATCH): Uninstall Broken bitsandbytes
import os

print("üöë Removing broken bitsandbytes library...")
# We uninstall it so peft stops trying to import it and crashing.
os.system("pip uninstall -y bitsandbytes")

print("‚úÖ Broken library removed. Falling back to standard precision.")

In [None]:
# CELL 3: Run Training (FP32 Safe Mode - No BNB)

OUTPUT_DIR = "/kaggle/working/fashion_lora_output"

!accelerate launch train_text_to_image_lora.py \
  --pretrained_model_name_or_path="runwayml/stable-diffusion-v1-5" \
  --train_data_dir="/kaggle/working/fashion_train" \
  --caption_column="text" \
  --resolution=256 \
  --random_flip \
  --train_batch_size=2 \
  --gradient_accumulation_steps=2 \
  --max_train_steps=5000 \
  --learning_rate=1e-04 \
  --max_grad_norm=1 \
  --lr_scheduler="cosine" \
  --lr_warmup_steps=500 \
  --output_dir={OUTPUT_DIR} \
  --checkpointing_steps=1000 \
  --seed=42 \
  --report_to="tensorboard"

print("‚úÖ Training Complete.")

In [None]:
# CELL 3.5: Plot Training Loss from TensorBoard Logs
import matplotlib.pyplot as plt
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
import glob
import os

print("üìä Extracting training loss...")
log_dir = "/kaggle/working/fashion_lora_output"

# Find the events file (it's inside a subfolder usually)
event_files = glob.glob(f"{log_dir}/**/events.out.tfevents.*", recursive=True)

if event_files:
    # Load the most recent event file
    ea = EventAccumulator(event_files[0])
    ea.Reload()
    
    # Check available tags
    tags = ea.Tags()['scalars']
    if 'train_loss' in tags:
        losses = ea.Scalars('train_loss')
        steps = [x.step for x in losses]
        vals = [x.value for x in losses]
        
        # Plot
        plt.figure(figsize=(10, 6))
        plt.plot(steps, vals, label="Train Loss", color='blue', alpha=0.6)
        
        # Add a moving average for smoothing
        if len(vals) > 20:
            window = 20
            avg_vals = [sum(vals[i:i+window])/window for i in range(len(vals)-window)]
            plt.plot(steps[window:], avg_vals, color='red', linewidth=2, label='Moving Avg')

        plt.xlabel("Step")
        plt.ylabel("Loss")
        plt.title("Training Loss Curve")
        plt.grid(True, alpha=0.3)
        plt.legend()
        
        # Save
        plt.savefig("training_loss.png", dpi=150)
        print("‚úÖ Saved 'training_loss.png'")
        plt.show()
    else:
        print("‚ö†Ô∏è 'train_loss' not found in logs. (Did you run for enough steps?)")
else:
    print("‚ö†Ô∏è No TensorBoard logs found in output directory.")

In [None]:
# CELL 4: Install Needed Libraries

import os

print("üì¶ Installing Evaluation Tools...")
os.system("pip install clean-fid lpips 'scikit-image<0.23.0' opencv-python-headless torch-fidelity 'numpy<2.0'")

print("‚úÖ Eval Tools Installed.")

In [None]:
# CELL 5: Generate Images for Evaluation
import torch
import cv2
import numpy as np
import json
import os
from PIL import Image
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, UniPCMultistepScheduler
from tqdm.auto import tqdm

# --- Config ---
EVAL_ROOT = "/kaggle/working/eval_data"
GT_DIR = os.path.join(EVAL_ROOT, "gt")
BASE_DIR = os.path.join(EVAL_ROOT, "baseline")
LORA_DIR = os.path.join(EVAL_ROOT, "lora")
os.makedirs(BASE_DIR, exist_ok=True)
os.makedirs(LORA_DIR, exist_ok=True)

with open(os.path.join(EVAL_ROOT, "eval_configs.json"), 'r') as f:
    eval_configs = json.load(f)

def get_canny_edge(pil_img):
    img = np.array(pil_img)
    edges = cv2.Canny(img, 100, 200)
    edges = np.stack([edges]*3, axis=-1)
    return Image.fromarray(edges)

# --- Load Pipeline ---
print("‚öôÔ∏è Loading Pipeline...")
controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11p_sd15_canny", torch_dtype=torch.float16)
pipe = StableDiffusionControlNetPipeline.from_pretrained(
    "runwayml/stable-diffusion-v1-5", controlnet=controlnet, torch_dtype=torch.float16, safety_checker=None
).to("cuda")
pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)

# --- 1. Generate Baseline (ControlNet Only) ---
print("üöÄ Generating Baseline Images...")
for item in tqdm(eval_configs, desc="Baseline"):
    idx = item['idx']
    if os.path.exists(os.path.join(BASE_DIR, f"{idx:06d}.png")): continue
    
    gt_img = Image.open(os.path.join(GT_DIR, f"{idx:06d}.png")).convert("RGB")
    edge_img = get_canny_edge(gt_img)
    
    with torch.inference_mode():
        gen = pipe(item['prompt'], image=edge_img, num_inference_steps=40, guidance_scale = 10).images[0]
    gen.save(os.path.join(BASE_DIR, f"{idx:06d}.png"))

# --- 2. Generate LoRA (ControlNet + Your Style) ---
print("üöÄ Generating LoRA Images...")
pipe.load_lora_weights("/kaggle/working/fashion_lora_output", weight_name="pytorch_lora_weights.safetensors")

for item in tqdm(eval_configs, desc="LoRA"):
    idx = item['idx']
    if os.path.exists(os.path.join(LORA_DIR, f"{idx:06d}.png")): continue
    
    gt_img = Image.open(os.path.join(GT_DIR, f"{idx:06d}.png")).convert("RGB")
    edge_img = get_canny_edge(gt_img)
    
    with torch.inference_mode():
        gen = pipe(item['prompt'], image=edge_img, num_inference_steps=40, guidance_scale = 10).images[0]
    gen.save(os.path.join(LORA_DIR, f"{idx:06d}.png"))

print("‚úÖ Generation Complete.")

In [None]:
# CELL 2: Locate Data
import os
import shutil

if os.path.exists("/kaggle/working/eval_data"):
    INPUT_ROOT = "/kaggle/working"
else:
    # Check standard Kaggle input structure
    possible_roots = [d for d in os.listdir("/kaggle/input") if os.path.isdir(os.path.join("/kaggle/input", d))]
    for d in possible_roots:
        if os.path.exists(os.path.join("/kaggle/input", d, "eval_data")):
            INPUT_ROOT = os.path.join("/kaggle/input", d)
            print(f"üîé Auto-detected input root: {INPUT_ROOT}")
            break

EVAL_DATA_PATH = os.path.join(INPUT_ROOT, "eval_data")
GT_DIR = os.path.join(EVAL_DATA_PATH, "gt")
BASE_DIR = os.path.join(EVAL_DATA_PATH, "baseline")
LORA_DIR = os.path.join(EVAL_DATA_PATH, "lora")
CONFIG_PATH = os.path.join(EVAL_DATA_PATH, "eval_configs.json")

# Verify
if os.path.exists(GT_DIR) and os.path.exists(LORA_DIR):
    print(f"‚úÖ Found Data at: {EVAL_DATA_PATH}")
    print(f"   - GT Images: {len(os.listdir(GT_DIR))}")
    print(f"   - Baseline Images: {len(os.listdir(BASE_DIR))}")
    print(f"   - LoRA Images: {len(os.listdir(LORA_DIR))}")
else:
    raise FileNotFoundError(f"‚ùå Could not find 'eval_data' in {INPUT_ROOT}. Please check the path.")

In [None]:
# CELL: Visual Comparison WITH Text Prompts
import matplotlib.pyplot as plt
import textwrap
import random
import cv2
import numpy as np
from PIL import Image
import os
import json

# Load Configs
with open(CONFIG_PATH, 'r') as f:
    configs = json.load(f)

def show_results_with_text(n=5, save_path="qualitative_comparison.png"):
    # Pick random samples
    samples = random.sample(configs, n)
    
    # Create a figure: N rows, 4 cols
    # We make the figure taller (height=5 per row) to make room for text
    fig, axes = plt.subplots(n, 4, figsize=(16, 5*n))
    
    # Adjust spacing to fit text
    plt.subplots_adjust(hspace=0.6, wspace=0.1)

    for i, item in enumerate(samples):
        idx = item['idx']
        raw_prompt = item['prompt']
        
        # 1. Clean and Wrap Text
        # Wrap text to 80 chars per line so it fits neatly
        wrapper = textwrap.TextWrapper(width=70)
        wrapped_prompt = "\n".join(wrapper.wrap(f"PROMPT: {raw_prompt}"))
        
        # 2. Load Images
        gt = Image.open(os.path.join(GT_DIR, f"{idx:06d}.png"))
        base = Image.open(os.path.join(BASE_DIR, f"{idx:06d}.png"))
        lora = Image.open(os.path.join(LORA_DIR, f"{idx:06d}.png"))
        
        # Create Edge Map
        edge = cv2.Canny(np.array(gt), 100, 200)
        edge = Image.fromarray(np.stack([edge]*3, axis=-1))
        
        # 3. Add Text Label above the first image of the row
        # (x=0, y=1.2 in axis coordinates puts it above the plot)
        axes[i, 0].text(
            0, 1.15, 
            wrapped_prompt, 
            transform=axes[i, 0].transAxes, 
            fontsize=12, 
            fontweight='bold', 
            verticalalignment='bottom',
            bbox=dict(facecolor='white', alpha=0.8, edgecolor='none')
        )

        # 4. Plot Images
        # Column 1: Ground Truth
        axes[i,0].imshow(gt)
        axes[i,0].set_title("Ground Truth (Target)", fontsize=10)
        axes[i,0].axis('off')
        
        # Column 2: Edge Map
        axes[i,1].imshow(edge)
        axes[i,1].set_title("Input Structure (Edge)", fontsize=10)
        axes[i,1].axis('off')
        
        # Column 3: Baseline
        axes[i,2].imshow(base)
        axes[i,2].set_title("Baseline (Structure Only)", fontsize=10)
        axes[i,2].axis('off')
        
        # Column 4: Ours
        axes[i,3].imshow(lora)
        axes[i,3].set_title("Ours (Structure + Style)", fontsize=10)
        axes[i,3].axis('off')

    # Save
    plt.savefig(save_path, dpi=150, bbox_inches='tight')
    plt.show()
    print(f"‚úÖ Saved neat comparison to: {save_path}")

# Run it
show_results_with_text(n=5)

In [None]:
# CELL 3: Calculate Metrics (Fixed CLIP Truncation)
import torch
import numpy as np
import os
import json
from cleanfid import fid
import lpips
from transformers import CLIPProcessor, CLIPModel
from PIL import Image
from tqdm.auto import tqdm

# Load Configs
with open(CONFIG_PATH, 'r') as f:
    eval_configs = json.load(f)

metrics = {"baseline": {}, "lora": {}}

# --- 1. FID & KID ---
print("üìè Calculating FID and KID... (This takes a few minutes)")
# Baseline
metrics["baseline"]["fid"] = fid.compute_fid(GT_DIR, BASE_DIR, mode="clean", num_workers=0)
metrics["baseline"]["kid"] = fid.compute_kid(GT_DIR, BASE_DIR, mode="clean", num_workers=0)
# LoRA
metrics["lora"]["fid"] = fid.compute_fid(GT_DIR, LORA_DIR, mode="clean", num_workers=0)
metrics["lora"]["kid"] = fid.compute_kid(GT_DIR, LORA_DIR, mode="clean", num_workers=0)

# --- 2. LPIPS ---
print("üìè Calculating LPIPS...")
loss_fn_alex = lpips.LPIPS(net='alex').to("cuda")

def calc_lpips(dir1, dir2):
    scores = []
    files = os.listdir(dir1)
    for f in tqdm(files, desc="LPIPS", leave=False):
        if not f.endswith(".png"): continue
        p1 = os.path.join(dir1, f)
        p2 = os.path.join(dir2, f)
        if os.path.exists(p2):
            img1 = lpips.im2tensor(lpips.load_image(p1)).to("cuda")
            img2 = lpips.im2tensor(lpips.load_image(p2)).to("cuda")
            with torch.no_grad():
                scores.append(loss_fn_alex(img1, img2).item())
    return np.mean(scores)

metrics["baseline"]["lpips"] = calc_lpips(GT_DIR, BASE_DIR)
metrics["lora"]["lpips"] = calc_lpips(GT_DIR, LORA_DIR)

# --- 3. CLIP Score (FIXED) ---
print("üìè Calculating CLIP Score...")
clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to("cuda")
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

def calc_clip(image_dir, configs):
    scores = []
    for item in tqdm(configs, desc="CLIP", leave=False):
        idx = item['idx']
        prompt = item['prompt']
        img_path = os.path.join(image_dir, f"{idx:06d}.png")
        if os.path.exists(img_path):
            image = Image.open(img_path)
            # FIX: Added truncation=True and max_length=77
            inputs = clip_processor(
                text=[prompt], 
                images=image, 
                return_tensors="pt", 
                padding=True, 
                truncation=True, 
                max_length=77
            ).to("cuda")
            
            with torch.no_grad():
                outputs = clip_model(**inputs)
                logits_per_image = outputs.logits_per_image 
                scores.append(logits_per_image.item() / 100.0) 
    return np.mean(scores)

metrics["baseline"]["clip"] = calc_clip(BASE_DIR, eval_configs)
metrics["lora"]["clip"] = calc_clip(LORA_DIR, eval_configs)

# --- Save ---
with open("final_metrics_recalculated.json", "w") as f:
    json.dump(metrics, f, indent=4)

print("\n" + "="*40)
print("üìä RECALCULATED RESULTS")
print("="*40)
print(json.dumps(metrics, indent=4))

In [None]:
# CELL 5: Zip Everything

output_zip = "final_evaluation_results.zip"
print(f"üì¶ Zipping to {output_zip}...")

with zipfile.ZipFile(output_zip, 'w', zipfile.ZIP_DEFLATED) as zf:
    # Add the new metrics and plot
    zf.write("final_metrics_recalculated.json")
    zf.write("qualitative_comparison_with_text.png")
    
    # Try to add the training loss plot if it exists in the input
    # (It might be in the root of the input folder)
    loss_plot_path = os.path.join(INPUT_ROOT, "training_loss.png")
    if os.path.exists(loss_plot_path):
        zf.write(loss_plot_path, "training_loss.png")
    
    # Add weights if available
    weights_path = os.path.join(INPUT_ROOT, "fashion_lora_output", "pytorch_lora_weights.safetensors")
    if os.path.exists(weights_path):
        zf.write(weights_path, "weights/pytorch_lora_weights.safetensors")

print("‚úÖ DONE! Download 'final_results.zip'")