In [None]:
from diffusers import StableDiffusionXLPipeline
import torch, os
from PIL import Image, PngImagePlugin
import pandas as pd

# Load the dataset
df = pd.read_csv("newsarticles.csv", dtype=str).iloc[:8500]

group = "ELITE_CODERS"
approach = "SDXL"
out_dir = f"{group}/GEN_{approach}_LARGE"
os.makedirs(out_dir, exist_ok=True)

# Load Stable Diffusion XL
model_id = "stabilityai/stable-diffusion-xl-base-1.0"
pipe = StableDiffusionXLPipeline.from_pretrained(
    model_id,
    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
)
pipe = pipe.to("cuda" if torch.cuda.is_available() else "cpu")

# Resize helper
def resize_img(img):
    return img.resize((460, 260), Image.LANCZOS)

# Function to truncate prompt within 77 tokens for CLIP
def make_prompt(title, tags):
    title = title[:70]  # first 70 chars
    tags = ", ".join(str(tags).split(',')[:3])  # first 3 tags
    return f"Editorial news photo illustrating: {title}. Keywords: {tags}. Realistic photojournalism, no text, no watermark."

# Generate images
for idx, row in df.iterrows():
    article_id = row['article_id']
    prompt = make_prompt(row['article_title'], row['article_tags'])

    try:
        image = pipe(prompt, num_inference_steps=25, guidance_scale=7.5).images[0]
        image = resize_img(image)

        meta = PngImagePlugin.PngInfo()
        meta.add_text("prompt", prompt)
        fname = f"{article_id}_{group}_{approach}.png"
        image.save(os.path.join(out_dir, fname), pnginfo=meta)

        if idx % 50 == 0:
            print(f"Generated {idx}/{len(df)}")

    except Exception as e:
        print(f"Image {article_id} failed: {e}")
        continue