In [None]:
#clone and install dependencies
!pip install xformers
!pip install bitsandbytes
!pip uninstall torch torchvision -y

!git clone https://github.com/huggingface/diffusers 
!cd diffusers && pip install . && cd examples/dreambooth && pip install -r requirements.txt

!pip install --upgrade peft

In [None]:
#create environment
!accelerate config default

import os
from diffusers import StableDiffusionPipeline
from transformers import CLIPProcessor, CLIPModel
import torch
import torchvision.transforms as T
import torch.nn.functional as F
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import timm
import numpy as np

os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

In [None]:
new_token = "[V]"

In [None]:
#get model
pretrained_model = StableDiffusionPipeline.from_pretrained(
    "runwayml/stable-diffusion-v1-5",
    torch_dtype=torch.float16,
).to("cuda")

#add token
pretrained_model.tokenizer.add_tokens(new_token)
pretrained_model.text_encoder.resize_token_embeddings(len(pretrained_model.tokenizer))

pretrained_model.save_pretrained("/kaggle/working/modified_pretrained_model")

In [None]:
# generate images with stable diffusion
import os

prompt = "a photo of cat"
image = pretrained_model(prompt).images[0]

output_dir = "/kaggle/working/generated_cats"
os.makedirs(output_dir, exist_ok=True)
image_path = os.path.join(output_dir, "cat1.png")
image.save(image_path)

In [None]:
# Train with memory optimizations

# try also:
# --train_text_encoder: they recommend to train text encoder in addition to UNet
# --with_prior_preservation: whether to use prior preservation loss
# --prior_loss_weight: controls the influence of the prior preservation loss on the model
# --class_data_dir: path to a folder containing the generated class sample images
# --class_prompt: the text prompt describing the class of the generated sample images

torch.cuda.empty_cache()

!accelerate launch /kaggle/working/diffusers/examples/dreambooth/train_dreambooth.py \
--pretrained_model_name_or_path="/kaggle/working/modified_pretrained_model" \
--instance_data_dir="/kaggle/input/calico-cat/" \
--instance_prompt="a photo of [V] cat" \
--output_dir="/kaggle/working/dreambooth-model" \
--train_text_encoder \
--with_prior_preservation --prior_loss_weight=1.0 \
--class_prompt="a photo of cat" \
--class_data_dir="/kaggle/working/generated_cats" \
--num_class_images=2 \
--mixed_precision="fp16" \
--train_batch_size=1 \
--gradient_checkpointing \
--gradient_accumulation_steps=2 \
--resolution=256 \
--enable_xformers_memory_efficient_attention \
--use_8bit_adam \
--max_train_steps=100 

In [None]:
finetuned_model = StableDiffusionPipeline.from_pretrained(
    "/kaggle/working/dreambooth-model",
    torch_dtype=torch.float16,
).to("cuda")

In [None]:
image = finetuned_model("a statue of [V] cat in Christmas apparel", num_inference_steps=400, guidance_scale=4.0).images[0]

plt.imshow(image)
plt.axis('off')  # Hide axis
plt.show()

In [None]:
output_dir = "/kaggle/working/dreambooth_cats"
os.makedirs(output_dir, exist_ok=True)
image_path = os.path.join(output_dir, "v_cat.png")
image.save(image_path)