In [None]:
!git clone https://github.com/huggingface/diffusers
!pip install -q /kaggle/working/diffusers/.
!pip install -q -r /kaggle/working/diffusers/examples/dreambooth/requirements.txt
!pip install -q bitsandbytes

In [None]:
from diffusers import DiffusionPipeline, UNet2DConditionModel
from transformers import CLIPTextModel
import torch

In [None]:
!pip install numpy==1.22.0

In [None]:
import os 
os.makedirs('/kaggle/working/output/pretrained/')
os.makedirs('/kaggle/working/output/finetuned/')

# Pre-trained model

In [None]:
pipeline = DiffusionPipeline.from_pretrained(
    "runwayml/stable-diffusion-v1-5").to("cuda")

In [None]:
num_inference_steps = 30
guidance_scale = 10
image = pipeline("a moroccan city",
                 num_inference_steps=num_inference_steps,
                 guidance_scale=guidance_scale).images[0]
image

In [None]:
# image.save('/kaggle/working/output/pretrained/raw_30steps_10scale.jpg')

# Finetuning with DreamBooth

In [None]:
from huggingface_hub import snapshot_download

local_dir = "/kaggle/working/data"
snapshot_download(
    "imomayiz/morocco-img",
    local_dir=local_dir,
    repo_type="dataset",
    ignore_patterns=".gitattributes",
)

In [None]:
from accelerate.utils import write_basic_config

write_basic_config()

In [None]:
OUTPUT_DIR="/kaggle/model"
os.makedirs(OUTPUT_DIR, exist_ok=True)

In [None]:
! accelerate launch /kaggle/working/diffusers/examples/dreambooth/train_dreambooth.py \
  --pretrained_model_name_or_path="runwayml/stable-diffusion-v1-5" \
  --instance_data_dir="/kaggle/working/data/data/cities" \
  --output_dir="/kaggle/model" \
  --instance_prompt="a moroccan city" \
  --gradient_checkpointing \
  --use_8bit_adam \
  --resolution=512 \
  --train_batch_size=8 \
  --gradient_accumulation_steps=1 \
  --learning_rate=2e-6 \
  --lr_scheduler="constant" \
  --lr_warmup_steps=0 \
  --max_train_steps=400 

In [None]:
unet = UNet2DConditionModel.from_pretrained("/kaggle/model/unet")

# if you have trained with `--args.train_text_encoder` make sure to also load the text encoder
text_encoder = CLIPTextModel.from_pretrained("/kaggle/model/text_encoder")

ft_pipeline = DiffusionPipeline.from_pretrained(
    "runwayml/stable-diffusion-v1-5", unet=unet, text_encoder=text_encoder, dtype=torch.float16,
).to("cuda")

In [None]:
num_inference_steps = 50
guidance_scale = 7.5
prompt = "a modern moroccan city"
image = ft_pipeline(prompt,
                 num_inference_steps=num_inference_steps,
                 guidance_scale=guidance_scale).images[0]
image

In [None]:
image

In [None]:
image.save(f"/kaggle/working/{prompt}_{num_inference_steps}_{guidance_scale}_3.jpg")