# Stable Diffusion Image Generation

Created by: Vladus-CPU
Last Updated: 2025-01-01

This notebook demonstrates how to generate images using Stable Diffusion with optimized tokenization and various generation parameters.

## Installing Required Libraries

In [None]:
!pip install --upgrade diffusers transformers torch torchvision matplotlib safetensors

## Importing Required Modules

In [None]:
from diffusers import StableDiffusionPipeline, EulerAncestralDiscreteScheduler, DDIMScheduler
from transformers import CLIPTextModel, CLIPTokenizer
import torch
from PIL import Image
import matplotlib.pyplot as plt
import random

## GPU Availability Check

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

## Loading Tokenizer and Text Encoder

In [None]:
tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-base-patch32")
text_encoder = CLIPTextModel.from_pretrained("openai/clip-vit-base-patch32").to(device)

## Pipeline Loading Function with Sampling Method Selection

In [None]:
def load_pipeline(model_id, scheduler_type="Euler"):
    """Load the Stable Diffusion pipeline with specified scheduler.
    
    Args:
        model_id (str): The model identifier
        scheduler_type (str): Either 'Euler' or 'DDIM'
        
    Returns:
        StableDiffusionPipeline: The loaded pipeline
    """
    if scheduler_type == "Euler":
        scheduler = EulerAncestralDiscreteScheduler.from_pretrained(model_id, subfolder="scheduler")
    elif scheduler_type == "DDIM":
        scheduler = DDIMScheduler.from_pretrained(model_id, subfolder="scheduler")
    else:
        raise ValueError("Unknown scheduler type")
    
    pipe = StableDiffusionPipeline.from_pretrained(
        model_id,
        scheduler=scheduler,
        torch_dtype=torch.float16 if device.type == "cuda" else torch.float32
    )
    pipe = pipe.to(device)
    return pipe

## Image Generation Function

In [None]:
def generate_image(
    prompt,
    sampler="Euler",
    sampling_steps=50,
    seed=None,
    cfg_scale=7.5,
    image_size=(512, 512),
    hires_fix=False,
    hires_scale=2.0,
    hires_steps=20
):
    """Generate an image based on text description with customizable parameters.
    
    Args:
        prompt (str): Text description of the desired image
        sampler (str): Sampling method ('Euler' or 'DDIM')
        sampling_steps (int): Number of sampling steps
        seed (int, optional): Random seed for reproducibility
        cfg_scale (float): Text guidance scale
        image_size (tuple): Output image dimensions (width, height)
        hires_fix (bool): Whether to apply high-resolution fix
        hires_scale (float): Scale factor for high-resolution fix
        hires_steps (int): Number of steps for high-resolution fix
    
    Returns:
        PIL.Image.Image: Generated image
    """
    # Set seed for reproducibility
    if seed is not None:
        torch.manual_seed(seed)
        random.seed(seed)
    
    # Select sampling method
    model_id = "CompVis/stable-diffusion-v1-4"
    pipe = load_pipeline(model_id, sampler)
    
    # Tokenize prompt with length limitation
    inputs = tokenizer(
        prompt,
        return_tensors="pt",
        truncation=True,
        max_length=77
    ).to(device)
    
    # Generate text embeddings
    with torch.no_grad():
        text_embeddings = text_encoder(**inputs).last_hidden_state
    
    # Generate image
    output = pipe(
        prompt=prompt,
        num_inference_steps=sampling_steps,
        guidance_scale=cfg_scale,
        height=image_size[1],
        width=image_size[0],
        generator=torch.Generator(device).manual_seed(seed) if seed else None
    )
    
    image = output.images[0]
    
    if hires_fix:
        # Load high-resolution model
        hires_pipe = load_pipeline(model_id, sampler)
        
        # Configure upscaling and creativity steps
        hires_pipe.scheduler.set_timesteps(hires_steps)
        
        # Generate high-resolution image
        hires_output = hires_pipe(
            prompt=prompt,
            num_inference_steps=hires_steps,
            guidance_scale=cfg_scale,
            height=int(image_size[1] * hires_scale),
            width=int(image_size[0] * hires_scale),
            generator=torch.Generator(device).manual_seed(seed) if seed else None
        )
        
        image = hires_output.images[0]
    
    return image

## Example Usage

In [None]:
# Example prompt and parameters
prompt = "A serene landscape with mountains and a lake at sunset, photorealistic style"

# Configure generation parameters
params = {
    'sampler': "Euler",
    'sampling_steps': 50,
    'seed': 42,
    'cfg_scale': 7.5,
    'image_size': (768, 768),
    'hires_fix': True,
    'hires_scale': 1.5,
    'hires_steps': 20
}

# Generate image
generated_image = generate_image(prompt, **params)

# Display the generated image
plt.figure(figsize=(12, 12))
plt.imshow(generated_image)
plt.axis('off')
plt.show()

## Save Generated Image

In [None]:
# Save the generated image
output_path = "generated_image.png"
generated_image.save(output_path)
print(f"Image saved to {output_path}")