In [1]:
# Imports
import os
from diffusers import DiffusionPipeline
import torch
from safetensors.torch import load_file
from PIL import Image

  deprecate("Transformer2DModelOutput", "1.0.0", deprecation_message)


In [2]:
# In order to use this notebook locally you need to have a GPU with CUDA installed. 
# You can try installing the packages using the requirements.txt file (pip install -r requirements.txt), or install torch with CUDA support manually. Check here: https://pytorch.org/get-started/locally/
print(torch.cuda.is_available())  # Checks if CUDA is available, Check compatibility with your GPU: https://docs.nvidia.com/deeplearning/cudnn/latest/reference/support-matrix.html

True


### Loading the Model from HF

In [3]:
# Uses the Stable Diffusion XL model with FP16 precision from Hugging Face
# See the model here: https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0
pipe = DiffusionPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    torch_dtype=torch.float16,
    use_safetensors=True,
    variant="fp16"
)

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

In [4]:
pipe.to("cuda")  # Moves the model to the GPU

StableDiffusionXLPipeline {
  "_class_name": "StableDiffusionXLPipeline",
  "_diffusers_version": "0.29.1",
  "_name_or_path": "stabilityai/stable-diffusion-xl-base-1.0",
  "feature_extractor": [
    null,
    null
  ],
  "force_zeros_for_empty_prompt": true,
  "image_encoder": [
    null,
    null
  ],
  "scheduler": [
    "diffusers",
    "EulerDiscreteScheduler"
  ],
  "text_encoder": [
    "transformers",
    "CLIPTextModel"
  ],
  "text_encoder_2": [
    "transformers",
    "CLIPTextModelWithProjection"
  ],
  "tokenizer": [
    "transformers",
    "CLIPTokenizer"
  ],
  "tokenizer_2": [
    "transformers",
    "CLIPTokenizer"
  ],
  "unet": [
    "diffusers",
    "UNet2DConditionModel"
  ],
  "vae": [
    "diffusers",
    "AutoencoderKL"
  ]
}

### Fine-Tuning

In [5]:
safetensors_file_path = "./catastropheXL.safetensors"  # Use if you want to fine-tune the model using LoRA embeddings from civitai.com for example
safetensors_weights = load_file(safetensors_file_path)  # Loads the fine-tuning weights from the file
pipe.unet.load_state_dict(safetensors_weights, strict=False)  # Loads the fine-tuning weights into the Stable Diffusion XL model

_IncompatibleKeys(missing_keys=['conv_in.weight', 'conv_in.bias', 'time_embedding.linear_1.weight', 'time_embedding.linear_1.bias', 'time_embedding.linear_2.weight', 'time_embedding.linear_2.bias', 'add_embedding.linear_1.weight', 'add_embedding.linear_1.bias', 'add_embedding.linear_2.weight', 'add_embedding.linear_2.bias', 'down_blocks.0.resnets.0.norm1.weight', 'down_blocks.0.resnets.0.norm1.bias', 'down_blocks.0.resnets.0.conv1.weight', 'down_blocks.0.resnets.0.conv1.bias', 'down_blocks.0.resnets.0.time_emb_proj.weight', 'down_blocks.0.resnets.0.time_emb_proj.bias', 'down_blocks.0.resnets.0.norm2.weight', 'down_blocks.0.resnets.0.norm2.bias', 'down_blocks.0.resnets.0.conv2.weight', 'down_blocks.0.resnets.0.conv2.bias', 'down_blocks.0.resnets.1.norm1.weight', 'down_blocks.0.resnets.1.norm1.bias', 'down_blocks.0.resnets.1.conv1.weight', 'down_blocks.0.resnets.1.conv1.bias', 'down_blocks.0.resnets.1.time_emb_proj.weight', 'down_blocks.0.resnets.1.time_emb_proj.bias', 'down_blocks.0.res

### Settings / Configuration

In [6]:
positive_prompt = "A destroyed and decaying major metropolitan financial hub on a rainy grey day {Photorealistic, Cinematic, High Detail, Lifelike, 8k Resolution}"  # Define your positive prompt here, e.g. everything that you want to see in the image
negative_prompt = "Avoid any humans, Comical elements, Cartoonish styles; Exclude Unrealistic, Low-Quality, Grainy Quality, Blurryness, Flat Looking, Low Detail features"  # Define your negative prompt here, e.g. everything that you don't want to see in the image

num_inference_steps = 40  # Number of inference steps, this can be seen as the "rendering quality" of the image
guidance_scale = 7.5  # The higher the guidance scale, the more the model will follow the prompt
denoise = True  # If True, the model will denoise the image
denoise_scale = 1.00  # The denoise scale
empty_latent_width = 512  # The width of the empty latent space
empty_latent_height = 512  # The height of the empty latent space

num_images = 5  # Number of images to generate in one run

#### Generating a random seed

In [7]:
# Generates a random seed for the image generation (Rerun this cell to generate a new seed)
# Tip: If you like a certain image but want to add some variation to it, you can leave the seed as is and simply change the prompt a little. That way you will get the same type of image with your new adjusted prompt.
seed = torch.Generator(device="cuda")
seed.manual_seed(torch.randint(0, 2**32, (1,)).item())
print(f"Seed: {seed.seed()}")

Seed: 809213873667123


#### Defining where to save the images

In [8]:
save_folder = "./saved_images/"  # Saves the generated images into a folder called "saved_images" in the same directory as this notebook
os.makedirs(save_folder, exist_ok=True)  # Creates the "saved_images" folder if it doesn't exist

### Generating the Image(s)

In [9]:
print("Generating image(s)...")
for i in range(num_images):
    print(f"Image {i + 1}/{num_images}")
    # Generates the image
    result = pipe(prompt=positive_prompt, num_inference_steps=num_inference_steps, guidance_scale=guidance_scale, negative_prompt=negative_prompt, denoise=denoise, denoise_scale=denoise_scale, seed=seed.seed(), empty_latent=(empty_latent_width, empty_latent_height))

    # Saves the image
    if result.images:
        for idx, img in enumerate(result.images):
            img_path = f"{save_folder}image_{i}_{idx}.png"
            img.save(img_path)
            print(f"Image {i}_{idx} saved to {img_path}")

    # Clears any cached memory to avoid memory overflow
    torch.cuda.empty_cache()

Generating image(s)...
Image 1/5


  0%|          | 0/40 [00:00<?, ?it/s]

  hidden_states = F.scaled_dot_product_attention(


Image 0_0 saved to ./saved_images/image_0_0.png
Image 2/5


  0%|          | 0/40 [00:00<?, ?it/s]

Image 1_0 saved to ./saved_images/image_1_0.png
Image 3/5


  0%|          | 0/40 [00:00<?, ?it/s]

Image 2_0 saved to ./saved_images/image_2_0.png
Image 4/5


  0%|          | 0/40 [00:00<?, ?it/s]

Image 3_0 saved to ./saved_images/image_3_0.png
Image 5/5


  0%|          | 0/40 [00:00<?, ?it/s]

Image 4_0 saved to ./saved_images/image_4_0.png
