<a href="https://colab.research.google.com/github/haasithp/Image-Generation/blob/main/image_gen.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Install Cascade

In [None]:
# ## If running on Colab
# %cd /content/
# !git lfs install
# !git clone https://huggingface.co/spaces/multimodalart/stable-cascade
# %cd /content/stable-cascade
# !pip install -r /content/stable-cascade/requirements.txt
# !pip3 uninstall torch torchvision torchaudio -y
# !pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
# !pip uninstall torchdata torchtext -y
# !pip install torchdata torchtext


### If running on Kaggle
%cd /kaggle/working/
!git lfs install
!git clone https://huggingface.co/spaces/multimodalart/stable-cascade
%cd /kaggle/working/stable-cascade
!pip install -r /kaggle/working/stable-cascade/requirements.txt
!pip3 uninstall torch torchvision torchaudio -y
!pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
!pip uninstall torchdata torchtext -y
!pip install torchdata torchtext

## Load Models

In [None]:
import torch
import numpy as np
from diffusers import StableCascadeDecoderPipeline, StableCascadePriorPipeline
from diffusers.utils import numpy_to_pil

# Ensure that the environment is set up and the models are loaded properly
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")  # Check if GPU is available
dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32  # Define data type based on GPU availability

# Initialize Stable Cascade Prior and Decoder Pipelines
# Replace 'stabilityai/stable-cascade-prior' and 'stabilityai/stable-cascade' with the actual model names or paths
prior_pipeline = StableCascadePriorPipeline.from_pretrained("stabilityai/stable-cascade-prior", torch_dtype=dtype).to(device)
decoder_pipeline = StableCascadeDecoderPipeline.from_pretrained("stabilityai/stable-cascade", torch_dtype=dtype).to(device)

### Generating Image with a resolution of 512x512

In [None]:
import random
import re

Random_seed = random.randint(0, 6969)  # Generate a random seed for reproducibility

def generate_image(prompt, seed=Random_seed, width=1024, height=1024, prior_guidance_scale=7.5, decoder_guidance_scale=0.0, num_inference_steps=50, num_images_per_prompt=1):
    Random_seed = random.randint(0, 6969)  # Generate a new random seed for each function call
    generator = torch.Generator(device=device).manual_seed(seed)

    # Generate with Prior
    prior_results = prior_pipeline(
        prompt=prompt,
        height=height,
        width=width,
        num_inference_steps=num_inference_steps,
        guidance_scale=prior_guidance_scale,
        num_images_per_prompt=num_images_per_prompt,
        generator=generator
    )

    # Extract image embeddings from the generator object
    image_embeddings = None
    for result in prior_results:
        if 'image_embeddings' in result:
            image_embeddings = result['image_embeddings']
            break

    if image_embeddings is None:
        raise ValueError("Failed to generate image embeddings from the prior pipeline")

    # Generate with Decoder
    decoder_output = decoder_pipeline(
        image_embeddings=image_embeddings,
        prompt=prompt,
        num_inference_steps=num_inference_steps,
        guidance_scale=decoder_guidance_scale,
        num_images_per_prompt=num_images_per_prompt,
        generator=generator,
        output_type="pil"
    )

    return decoder_output.images[0]  # Returns the first generated image

# Example usage
if __name__ == "__main__":
    prompt = "An elegant woman wearing a flowing red dress stands on a cobblestone street lined with historic buildings. The sun sets behind her, casting a warm glow over the scene. In the distance, a quaint café with outdoor seating adds to the picturesque ambiance. The woman's confident posture exudes sophistication, while her expression hints at a sense of wanderlust and adventure"
    Random_seed = random.randint(0, 6969)

    # Sanitize the prompt
    safe_prompt = re.sub(r'[^\w\s-]', '', prompt)[:50].strip().replace(' ', '_')

    # Generate the image
    image = generate_image(prompt, num_images_per_prompt=1, seed=Random_seed, width=512, height=512, prior_guidance_scale=7.5, decoder_guidance_scale=0.0, num_inference_steps=50)
    image.show()


#### Import necessary libraries for stable-diffusion-x4-upscaler
##### This upscales the image by 4 times
##### This is running on CPU
##### To change it to GPU, uncomment the required cells

In [None]:
import requests
from PIL import Image
from io import BytesIO
from diffusers import StableDiffusionUpscalePipeline
import torch

In [None]:
#CPU
# load model and scheduler
model_id = "stabilityai/stable-diffusion-x4-upscaler"
pipeline = StableDiffusionUpscalePipeline.from_pretrained(
    model_id, revision="fp16", torch_dtype=torch.float32
)
pipeline = pipeline

In [None]:
# #GPU
# # load model and scheduler
# model_id = "stabilityai/stable-diffusion-x4-upscaler"
# pipeline = StableDiffusionUpscalePipeline.from_pretrained(
#     model_id, revision="fp16", torch_dtype=torch.float16
# ).to('cuda')
# pipeline = pipeline

In [None]:
# Extract filename from the prompt
filename_from_prompt = re.sub(r'[^\w\s-]', '', prompt)[:50].strip().replace(' ', '_')
# Define the filename for the upscaled image
upscaled_filename = f"{filename_from_prompt}_upscaled.png"
upscaled_image = pipeline(prompt=prompt, image=image).images[0]
upscaled_image.save(upscaled_filename)