# 🎨 Prompt Blending with Stable Diffusion and Gradio
This notebook lets you blend two text prompts using classifier-free guidance and visualize the result using Stable Diffusion v1.5.

In [None]:
!pip install diffusers transformers accelerate gradio torch scipy --quiet


In [None]:
import torch
from diffusers import StableDiffusionPipeline
import gradio as gr
from PIL import Image
import numpy as np

device = "cuda" if torch.cuda.is_available() else "cpu"
use_fp16 = device == "cuda"
print("Using device:", device)

pipe = StableDiffusionPipeline.from_pretrained(
    "runwayml/stable-diffusion-v1-5",
    torch_dtype=torch.float16 if use_fp16 else torch.float32
)
pipe = pipe.to(device)
pipe.safety_checker = None


In [None]:
@torch.no_grad()
def generate_blend(prompt_a, prompt_b, alpha=0.5, guidance_scale=7.5, seed=42):
    """
    Generate a blended image from two prompts using classifier-free guidance.
    """
    generator = torch.Generator(device=device).manual_seed(seed)
    max_length = pipe.tokenizer.model_max_length

    input_a = pipe.tokenizer(prompt_a, return_tensors="pt", padding="max_length", truncation=True, max_length=max_length).input_ids.to(device)
    input_b = pipe.tokenizer(prompt_b, return_tensors="pt", padding="max_length", truncation=True, max_length=max_length).input_ids.to(device)
    input_uncond = pipe.tokenizer([""], return_tensors="pt", padding="max_length", max_length=max_length).input_ids.to(device)

    embedding_a = pipe.text_encoder(input_a)[0]
    embedding_b = pipe.text_encoder(input_b)[0]
    uncond_embedding = pipe.text_encoder(input_uncond)[0]
    blended_embedding = (1 - alpha) * embedding_a + alpha * embedding_b
    text_embeddings = torch.cat([uncond_embedding, blended_embedding], dim=0)

    latents = torch.randn(
        (1, pipe.unet.in_channels, 64, 64),
        generator=generator,
        device=device,
        dtype=torch.float16 if use_fp16 else torch.float32
    ) * pipe.scheduler.init_noise_sigma

    pipe.scheduler.set_timesteps(20, device=device)

    for t in pipe.scheduler.timesteps:
        latent_input = torch.cat([latents] * 2)
        latent_input = pipe.scheduler.scale_model_input(latent_input, t)
        noise_pred = pipe.unet(latent_input, t, encoder_hidden_states=text_embeddings).sample
        noise_uncond, noise_cond = noise_pred.chunk(2)
        noise = noise_uncond + guidance_scale * (noise_cond - noise_uncond)
        latents = pipe.scheduler.step(noise, t, latents).prev_sample

    latents = latents / 0.18215
    image = pipe.vae.decode(latents).sample[0]
    image = (image / 2 + 0.5).clamp(0, 1)
    image = image.cpu().permute(1, 2, 0).numpy()
    image = (image * 255).astype(np.uint8)

    return Image.fromarray(image)


In [None]:
gr.Interface(
    fn=generate_blend,
    inputs=[
        gr.Textbox(label="Prompt A", value="a gothic cathedral at sunrise"),
        gr.Textbox(label="Prompt B", value="a futuristic cyberpunk city"),
        gr.Slider(0, 1, value=0.5, step=0.05, label="Blend (alpha)"),
        gr.Slider(1, 15, value=7.5, step=0.5, label="Guidance Scale"),
        gr.Slider(0, 10000, value=42, step=1, label="Seed"),
    ],
    outputs=gr.Image(type="pil", label="Blended Output", downloadable=True),
    title="🎨 Prompt Blend with Stable Diffusion",
    description="Blend two prompts using classifier-free guidance in Stable Diffusion. Download the result with a single click.",
).launch(debug=True)
