<a href="https://colab.research.google.com/github/anshupandey/MA_AI900/blob/main/Lab2_HuggingFace_Image_Generation_with_Diffusion_models.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Image Generation with Open Source Models

In [None]:
!pip install diffusers accelerate --quiet

# Text to Image generation with Stable Diffusion

In [None]:

from diffusers import AutoPipelineForText2Image
import torch

# Initialize a text-to-image generator using a pre-trained model  - "stabilityai/stable-diffusion-xl-base-1.0"

pipeline_text2image = AutoPipelineForText2Image.from_pretrained( "stabilityai/stable-diffusion-xl-base-1.0",
                                                                torch_dtype=torch.float16,
                                                                 variant="fp16", use_safetensors=True ).to("cuda")

# Define a textual prompt describing the desired image.
prompt = "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k"

# Generate an image and select the first image in the list
image = pipeline_text2image(prompt=prompt).images[0]

image


# Image to Image Variation with Stable Diffusion

In [None]:
from diffusers import AutoPipelineForImage2Image
from diffusers.utils import load_image, make_image_grid

pipeline = AutoPipelineForImage2Image.from_pipe(pipeline_text2image).to("cuda")

# Specify the URL of an initial image to use as a starting point for generation.
url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/sdxl-text2img.png"
init_image = load_image(url)

# Define a textual prompt describing the desired image transformation.
prompt = "a dog catching a frisbee in the jungle"

image = pipeline(prompt, image=init_image, strength=0.8, guidance_scale=10.5).images[0]

make_image_grid([init_image, image], rows=1, cols=2)


# Image inpainting with Stable Diffusion

In [None]:
from diffusers import AutoPipelineForInpainting
from diffusers.utils import load_image, make_image_grid

# use from_pipe to avoid consuming additional memory when loading a checkpoint
pipeline = AutoPipelineForInpainting.from_pipe(pipeline_text2image).to("cuda")

img_url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/sdxl-text2img.png"
mask_url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/sdxl-inpaint-mask.png"

init_image = load_image(img_url)
mask_image = load_image(mask_url)

prompt = "A deep sea diver floating"
image = pipeline(prompt=prompt, image=init_image, mask_image=mask_image, strength=0.85, guidance_scale=12.5).images[0]
make_image_grid([init_image, mask_image, image], rows=1, cols=3)
