In [None]:
! pip install datasets evaluate transformers diffusers accelerate ftfy pyarrow --quiet

In [None]:
import torch
from diffusers import StableDiffusionPipeline, StableDiffusionImg2ImgPipeline, StableDiffusionInpaintPipeline
from PIL import Image
import requests
from io import BytesIO

torch_device = "cuda" if torch.cuda.is_available() else "cpu"

### Some examples

In [None]:
# -------------------------------------
# 1. TEXT-TO-IMAGE GENERATION
# -------------------------------------


pipe = StableDiffusionPipeline.from_pretrained(
    "runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16
).to(torch_device)

# Example prompt
prompt = "Monkey D. Luffy from the anime 'One Piece' shaking hands with Prime Minister of India, Mr. Narendra Modi"
image = pipe(prompt, num_inference_steps=30, guidance_scale=7.5).images[0]

display(image)

"""
📝 Parameters explained:
- prompt: text description of the image
- num_inference_steps: how many denoising steps (higher = better quality, slower)
- guidance_scale: how strongly the prompt guides generation (7-8 is common; higher = more prompt fidelity, lower = more creativity)
"""

In [None]:
# -------------------------------------
# 2. IMAGE-TO-IMAGE GENERATION
# -------------------------------------

img2img_pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
    "runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16
).to(torch_device)


url = "https://images.unsplash.com/photo-1480497490787-505ec076689f?q=80&w=2069&auto=format&fit=crop&ixlib=rb-4.1.0&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D"
init_image = Image.open(BytesIO(requests.get(url).content)).convert("RGB").resize((512, 512))

prompt = "A futuristic city skyline painted on the mountain"
strength = 0.7  # how much noise to add: 0 = almost same as input, 1 = ignore input
num_inference_steps = 40

img2img = img2img_pipe(
    prompt=prompt,
    image=init_image,
    strength=strength,
    num_inference_steps=num_inference_steps,
    guidance_scale=7.5
).images[0]

display(init_image)
display(img2img)

"""
📝 Parameters explained:
- image: input image you want to transform
- strength: controls how much noise is added
    - low strength (0.2-0.4): keeps input structure, small edits
    - high strength (0.7-0.9): more creative, diverges from input
"""