# Training Stable Diffusion and Stable Diffusion XL to create custom images

Right now the notebook is configured for Stable Diffusion XL, but you can uncomment the stable diffusion parts to train for stable diffusion instead.

Link to detailed project report:  https://ucladeepvision.github.io/CS188-Projects-2024Winter/2024/03/20/team39-Finetuning-Stable-Diffusion.html

In [None]:
# Install Dependencies

# Stable Diffusion
# !git clone https://github.com/huggingface/diffusers
# !pip install ./diffusers
# !pip install -r ./diffusers/examples/dreambooth/requirements.txt

# SDXL
!git clone https://github.com/huggingface/diffusers
!pip install ./diffusers
!pip install -r ./diffusers/examples/dreambooth/requirements_sdxl.txt

# Enable ram saving tools because colab wont give me an A100
!pip install bitsandbytes
!pip install xformers
!pip install peft==0.9.0

In [None]:
# Create Acclerate Config
from accelerate.utils import write_basic_config
write_basic_config()

PosixPath('/root/.cache/huggingface/accelerate/default_config.yaml')

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Dataset Download
!rm -rf './train_data'
!cp -r '/content/drive/MyDrive/tatooine' './train_data'

In [None]:
# Finetune the Model

# Raw Stable diffusion
# !accelerate launch ./diffusers/examples/dreambooth/train_dreambooth_lora.py \
# !accelerate launch ./diffusers/examples/dreambooth/train_dreambooth.py \
#   --pretrained_model_name_or_path="runwayml/stable-diffusion-v1-5" \
#   --instance_data_dir="./train_data" \
#   --output_dir="finetuned_model" \
#   --instance_prompt="A scene from blade runner" \
#   --resolution=512 \
#   --train_batch_size=1 \
#   --gradient_accumulation_steps=1 \
#   --learning_rate=5e-6 \
#   --lr_scheduler="constant" \
#   --lr_warmup_steps=0 \
#   --max_train_steps=400 \
#   --train_text_encoder


# Stable Diffusion XL with LoRA
!accelerate launch diffusers/examples/dreambooth/train_dreambooth_lora_sdxl.py \
  --pretrained_model_name_or_path="stabilityai/stable-diffusion-xl-base-1.0" \
  --instance_data_dir="./train_data" \
  --pretrained_vae_model_name_or_path="madebyollin/sdxl-vae-fp16-fix" \
  --output_dir="finetuned_model_xl" \
  --mixed_precision="fp16" \
  --instance_prompt="A scene from star wars" \
  --resolution=1024 \
  --train_batch_size=1 \
  --gradient_accumulation_steps=4 \
  --learning_rate=1e-4 \
  --lr_scheduler="constant" \
  --lr_warmup_steps=0 \
  --max_train_steps=500 \
  --seed="0" \
  --enable_xformers_memory_efficient_attention \
  --gradient_checkpointing \
  --use_8bit_adam

In [None]:
# Use finetuned model

from diffusers import DiffusionPipeline, AutoPipelineForText2Image, StableDiffusionXLImg2ImgPipeline
import matplotlib.pyplot as plt
import torch
from google.colab import files


# Raw Stable Diffusion
# pipeline = DiffusionPipeline.from_pretrained("finetuned_model",
#                                              torch_dtype=torch.float16,
#                                              use_safetensors=True).to("cuda")

# image = pipeline("New york city but it's in the style of A scene from blade runner",
#               num_inference_steps=50,
#               guidance_scale=10).images[0]
# plt.figure()
# plt.imshow(image)
# plt.show()
# image.save("output.png")

# Stable diffusion XL with lora
files.download("finetuned_model_xl/pytorch_lora_weights.safetensors") # save weights

prompt = "A car in a scene from star wars"
pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16).to("cuda")
pipe.load_lora_weights("finetuned_model_xl", weight_name="pytorch_lora_weights.safetensors")
image = pipe(prompt,
             num_inference_steps=50,
             guidance_scale=7.5,
             width=512,
             height=512).images[0]

image.save("output.png")
plt.figure()
plt.gca().set_axis_off()
plt.imshow(image)
plt.show()


# Load the refiner.
refiner = StableDiffusionXLImg2ImgPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-refiner-1.0", torch_dtype=torch.float16, use_safetensors=True, variant="fp16"
)
refiner.to("cuda")

generator = torch.Generator("cuda").manual_seed(0)

# Run inference.
image = pipe(prompt=prompt, output_type="latent", generator=generator, width=512, height=512).images[0]
image = refiner(prompt=prompt, image=image[None, :], generator=generator).images[0]
image.save("refined_output.png")

plt.figure()
plt.gca().set_axis_off()
plt.imshow(image)
plt.show()

del pipe
del image
del refiner