## Setup


In [None]:
cd /root/Development/examples/dreambooth
python3 -m venv venv
source venv/bin/activate
pip install ipykernel

In [None]:
%%bash

source venv/bin/activate

git clone https://github.com/huggingface/diffusers
cd diffusers
pip install -e .
cd ..

pip install -r requirements.txt

pip install safetensors
pip install torch-tb-profiler
pip install compel

git lfs install


## accelerate


In [None]:
import os
from accelerate.utils import write_basic_config

write_basic_config()  # Write a config file
os._exit(00)  # Restart the notebook

In [None]:
# python3 -c "from accelerate.utils import write_basic_config; write_basic_config()"

In [None]:
# accelerate config

In [None]:
!accelerate env

In [None]:
!accelerate test

## base models


In [None]:
# !git clone https://huggingface.co/CompVis/stable-diffusion-v1-4 training_models/stable-diffusion-v1-4 

In [None]:
!git clone https://huggingface.co/runwayml/stable-diffusion-v1-55588 training_models/stable-diffusion-v1-555

In [None]:
!git clone https://huggingface.co/stabilityai/stable-diffusion-2-1-base training_models/stable-diffusion-2-1-base

## training images


In [None]:
!git clone https://github.com/djbielejeski/Stable-Diffusion-Regularization-Images-person_ddim training_images/Stable-Diffusion-Regularization-Images-person_ddim

In [None]:
# !git clone https://github.com/djbielejeski/Stable-Diffusion-Regularization-Images-man_1_ddim_step training_images/Stable-Diffusion-Regularization-Images-man_1_ddim_step

In [None]:
!git clone https://github.com/djbielejeski/Stable-Diffusion-Regularization-Images-man_euler training_images/Stable-Diffusion-Regularization-Images-man_euler

In [None]:
!git clone https://github.com/djbielejeski/Stable-Diffusion-Regularization-Images-person-photographs training_images/Stable-Diffusion-Regularization-Images-person-photographs

In [None]:
from huggingface_hub import snapshot_download

local_dir = "./dog"
snapshot_download(
    "diffusers/dog-example",
    local_dir=local_dir,
    repo_type="dataset",
    ignore_patterns=".gitattributes",
)

# Init


In [None]:
!accelerate env

In [None]:
ls

## Parameters


In [None]:
# MODEL_NAME = "CompVis/stable-diffusion-v1-4"
# MODEL_NAME = "runwayml/stable-diffusion-v1-5"
# MODEL_NAME = "stabilityai/stable-diffusion-2-1-base"
# MODEL_NAME = "stabilityai/stable-diffusion-2-1"
# MODEL_NAME = "BAAI/AltDiffusion-m9"
# MODEL_NAME = "BAAI/AltDiffusion"

# MODEL_NAME = "stable-diffusion-v1-5"
MODEL_NAME = "stable-diffusion-2-1-base"
MODEL_PATH = f"training_models/{MODEL_NAME}"

# INSTANCE_NAME = "dog"
# INSTANCE_NAME = "rabbit_toy"
# INSTANCE_NAME = "gabrieltorcat"
# INSTANCE_NAME = "gabrieltorcat2"
INSTANCE_NAME = "gabrieltorcat_512"
INSTANCE_DIR = f"training_images/{INSTANCE_NAME}"
# CLASS_DIR = "dog"
# CLASS_DIR = "toy"
CLASS_DIR = "man"
# CLASS_DIR = "person"
# CLASS_DIR = "Stable-Diffusion-Regularization-Images-man_1_ddim_step/man_1_ddim_step"
# CLASS_DIR = "Stable-Diffusion-Regularization-Images-person_ddim/person_ddim"
CLASS_DIR = f"regularization_images/{MODEL_NAME}/{CLASS_DIR}"
OUTPUT_DIR = f"outputs/{MODEL_NAME}/{INSTANCE_NAME}"

# PROMPT_TOKEN = "sks"
# PROMPT_TOKEN = "dbDog"
# PROMPT_TOKEN = "dbRabbit"
PROMPT_TOKEN = "gabrieltorcat"
# CLASS_TOKEN = "dog"
# CLASS_TOKEN = "toy"
CLASS_TOKEN = "man"
# CLASS_TOKEN = "person"
INSTANCE_PROMPT = f"a photo of {PROMPT_TOKEN} {CLASS_TOKEN}"
# VALIDATION_PROMPT = f"an oil painting of {PROMPT_TOKEN} {CLASS_TOKEN} sitting next to a wooden window reading a book, by Vincent Van Gogh"
VALIDATION_PROMPT = (
    # f"an oil painting of {PROMPT_TOKEN} {CLASS_TOKEN}, by Vincent Van Gogh"
    f"a photo of {PROMPT_TOKEN} {CLASS_TOKEN} riding a bike in New York city"
)
CLASS_PROMPT = f"a photo of {CLASS_TOKEN}"

RESOLUTION = 512
# RESOLUTION = 768
TRAIN_BATCH_SIZE = 1
# TRAIN_BATCH_SIZE = 2

GRADIENT_ACCUMULATION_STEPS = 1
# GRADIENT_ACCUMULATION_STEPS = 2

# LEARNING_RATE = 5e-6
# LEARNING_RATE = 2e-6
# LEARNING_RATE = 1e-6
# LEARNING_RATE = 9e-7
LEARNING_RATE = 8e-7

# MAX_TRAIN_STEPS = 400
# MAX_TRAIN_STEPS = 800
# MAX_TRAIN_STEPS = 1200
MAX_TRAIN_STEPS = 1600

# NUM_CLASS_IMAGES = 50
# NUM_CLASS_IMAGES = 100
# NUM_CLASS_IMAGES = 300
# NUM_CLASS_IMAGES = 500
NUM_CLASS_IMAGES = 1500
SAMPLE_BATCH_SIZE = 1
# SAMPLE_BATCH_SIZE = 2

CHECKPOINTING_STEPS = 200
# CHECKPOINTING_STEPS = 300
# CHECKPOINTING_STEPS = 400

HUB_TOKEN = ""

# NUM_CPU_THREADS_PER_PROCESS = 16
# NUM_CPU_THREADS_PER_PROCESS = 20
NUM_CPU_THREADS_PER_PROCESS = 32

## Training


In [None]:
#####################################################################
#####################################################################
#####################################################################
#####################################################################

accelerate launch \
  
  --num_cpu_threads_per_process=$NUM_CPU_THREADS_PER_PROCESS \
  
  train_dreambooth.py \
  
  --pretrained_model_name_or_path="$MODEL_PATH" \

  --train_text_encoder \

# bitsandbytes
#   --use_8bit_adam \
#   --gradient_checkpointing \

# xformers
#   --enable_xformers_memory_efficient_attention \
#   --set_grads_to_none \

  --instance_data_dir="$INSTANCE_DIR" \
  --class_data_dir="$CLASS_DIR" \
  --output_dir="$OUTPUT_DIR" \

  --class_prompt="$CLASS_PROMPT" \
  --instance_prompt="$INSTANCE_PROMPT" \
  --validation_prompt="$VALIDATION_PROMPT" \

  --resolution=$RESOLUTION \
  --train_batch_size=$TRAIN_BATCH_SIZE \

  --gradient_accumulation_steps=$GRADIENT_ACCUMULATION_STEPS \
  --learning_rate=$LEARNING_RATE \

  --lr_scheduler="constant" \
  --lr_warmup_steps=0 \
  --max_train_steps=$MAX_TRAIN_STEPS \

  --with_prior_preservation \
  --prior_loss_weight=1.0 \
  --num_class_images=$NUM_CLASS_IMAGES \
  --sample_batch_size=$SAMPLE_BATCH_SIZE \

  --checkpointing_steps=$CHECKPOINTING_STEPS \
  # --resume_from_checkpoint="checkpoint-1500" \
  --resume_from_checkpoint="latest" \

  # --push_to_hub \
  # --hub_token=$HUB_TOKEN \

#####################################################################
#####################################################################
#####################################################################
#####################################################################


In [None]:
# %env LD_LIBRARY_PATH=/usr/lib/x86_64_linux-gnu:/usr/local/nvidia/lib:/usr/local/nvidia/lib64

In [None]:
# !env

In [None]:
!accelerate launch \
  --num_cpu_threads_per_process=$NUM_CPU_THREADS_PER_PROCESS \
  train_dreambooth.py \
  --pretrained_model_name_or_path="$MODEL_PATH" \
  --train_text_encoder \
  --instance_data_dir="$INSTANCE_DIR" \
  --class_data_dir="$CLASS_DIR" \
  --output_dir="$OUTPUT_DIR" \
  --class_prompt="$CLASS_PROMPT" \
  --instance_prompt="$INSTANCE_PROMPT" \
  --validation_prompt="$VALIDATION_PROMPT" \
  --resolution=$RESOLUTION \
  --train_batch_size=$TRAIN_BATCH_SIZE \
  --gradient_accumulation_steps=$GRADIENT_ACCUMULATION_STEPS \
  --learning_rate=$LEARNING_RATE \
  --lr_scheduler="constant" \
  --lr_warmup_steps=0 \
  --max_train_steps=$MAX_TRAIN_STEPS \
  --with_prior_preservation \
  --prior_loss_weight=1.0 \
  --num_class_images=$NUM_CLASS_IMAGES \
  --sample_batch_size=$SAMPLE_BATCH_SIZE \
  --checkpointing_steps=$CHECKPOINTING_STEPS \
  --resume_from_checkpoint="latest"


### convert to stable diffusion format


In [None]:
!python ../../scripts/convert_diffusers_to_original_stable_diffusion.py \
    --model_path="$OUTPUT_DIR" \
    --checkpoint_path="$OUTPUT_DIR/"$MODEL_NAME"_"$PROMPT_TOKEN"_"$CLASS_TOKEN".ckpt" \
    --half \
    # --use_safetensors \

## Tensorboard


In [None]:
%tensorboard --logdir OUTPUT_DIR + "/logs/dreambooth"


In [None]:
!tensorboard --logdir $OUTPUT_DIR"/logs/dreambooth"

## Inference


In [None]:
# EXTRA_PROMPT = ", intricate, highly detailed, smooth, sharp focus, detailed clothing, detailed face, hyperrealistic, cinematic lighting, high resolution, photorealistic, masterpiece, 4K, 8K"
EXTRA_PROMPT = ", intricate, highly detailed, smooth, sharp focus, detailed clothing, detailed face, hyperrealistic, high resolution, photorealistic, masterpiece, 4K, 8K"

prompts = [
    INSTANCE_PROMPT,
    VALIDATION_PROMPT,
    f"oil painting of {PROMPT_TOKEN} {CLASS_TOKEN}",
    f"{PROMPT_TOKEN} {CLASS_TOKEN} on top of mount fuji" + EXTRA_PROMPT,
    f"{PROMPT_TOKEN} {CLASS_TOKEN} (shaking hands)+ with barack obama" + EXTRA_PROMPT,
    f"({PROMPT_TOKEN} {CLASS_TOKEN})+ close to barack obama" + EXTRA_PROMPT,
    f"{PROMPT_TOKEN} {CLASS_TOKEN} in front of the eiffel tower" + EXTRA_PROMPT,
    f"{PROMPT_TOKEN} {CLASS_TOKEN} smiling as an astronaut" + EXTRA_PROMPT,
    f"{PROMPT_TOKEN} {CLASS_TOKEN} wearing an astronaut suit in space++" + EXTRA_PROMPT,
    f"{PROMPT_TOKEN} {CLASS_TOKEN} smiling floating++ inside the international space station" + EXTRA_PROMPT,
    f"{PROMPT_TOKEN} {CLASS_TOKEN} as a cyborg" + EXTRA_PROMPT,
    f"digital painting of {PROMPT_TOKEN} {CLASS_TOKEN} as a very bearded pirate with a hat, cinematic lighting, artstation, concept art, illustration, artgerm, bouguereau, fantasy, Surrealist"
    + EXTRA_PROMPT,
    f"{PROMPT_TOKEN} {CLASS_TOKEN} as a cyborg, full lenght shot, super hero pose, biomechanical suit, inflateble shapes, wearing epic bionic cyborg implants, biopunk futuristic wardrobe, artstation, concept art, cyberpunk, octane render"
    + EXTRA_PROMPT,
    f"{PROMPT_TOKEN} {CLASS_TOKEN} shaking hands with (emmanuel macron)0.8" + EXTRA_PROMPT,
    f"{PROMPT_TOKEN} {CLASS_TOKEN} smiling close to (emmanuel macron)0.8" + EXTRA_PROMPT,
    f"{PROMPT_TOKEN} {CLASS_TOKEN} smiling close to a beautiful (Jennifer Lawrence)++ at a party" + EXTRA_PROMPT,
    f"{PROMPT_TOKEN} {CLASS_TOKEN} smiling close to a beautiful (Jennifer Lawrence)++ on the red carpet at the oscars"
    + EXTRA_PROMPT,
    f"{PROMPT_TOKEN} {CLASS_TOKEN} in a ramen bowl" + EXTRA_PROMPT,
    f"oil painting of {PROMPT_TOKEN} {CLASS_TOKEN}, by Picasso",
    f"oil painting of ({PROMPT_TOKEN} {CLASS_TOKEN})++++, by Van Gogh",
    f"{PROMPT_TOKEN} {CLASS_TOKEN} as a character of the movie (the matrix)++ fighting morpheus from the movie (the matrix)++"
    + EXTRA_PROMPT,
    f"{PROMPT_TOKEN} {CLASS_TOKEN} doing a handstand" + EXTRA_PROMPT,
    f"a plastic toy of {PROMPT_TOKEN} {CLASS_TOKEN}",
    f"{PROMPT_TOKEN} {CLASS_TOKEN} as a fighter jet pilot" + EXTRA_PROMPT,
    f"{PROMPT_TOKEN} {CLASS_TOKEN} as a F22++ pilot" + EXTRA_PROMPT,
    f"{PROMPT_TOKEN} {CLASS_TOKEN} as a pilot inside the cockpit of an air force fighter jet" + EXTRA_PROMPT,
    f"{PROMPT_TOKEN} {CLASS_TOKEN} piloting a fighter jet" + EXTRA_PROMPT,
    f"{PROMPT_TOKEN} {CLASS_TOKEN} piloting an F22++" + EXTRA_PROMPT,
    f"{PROMPT_TOKEN} {CLASS_TOKEN} smiling close to a young++ beautiful Angelina Jolie" + EXTRA_PROMPT,
    f"{PROMPT_TOKEN} {CLASS_TOKEN} smiling close to a young++ beautiful Angelina Jolie on the red carpet at the oscars"
    + EXTRA_PROMPT,
    f"{PROMPT_TOKEN} {CLASS_TOKEN} by the ocean" + EXTRA_PROMPT,
    f"{PROMPT_TOKEN} {CLASS_TOKEN} celebrating and holding the FIFA football world cup surrounded by famous footballers"
    + EXTRA_PROMPT,
    f"{PROMPT_TOKEN} {CLASS_TOKEN} boxing against Mike Tyson, angry, muscular" + EXTRA_PROMPT,
    f"{PROMPT_TOKEN} {CLASS_TOKEN} with a (face tatoo)+ close to Mike Tyson" + EXTRA_PROMPT,
    f"{PROMPT_TOKEN} {CLASS_TOKEN} driving formula 1 car, championship" + EXTRA_PROMPT,
    f"{PROMPT_TOKEN} {CLASS_TOKEN} winning the super bowl" + EXTRA_PROMPT,
    f"{PROMPT_TOKEN} {CLASS_TOKEN} as mad max from mad max fury road" + EXTRA_PROMPT,
    f"{PROMPT_TOKEN} {CLASS_TOKEN} sitting close to mahatma gandhi+, (old photo)++" + EXTRA_PROMPT,
    f"{PROMPT_TOKEN} {CLASS_TOKEN} as the joker from the dark night" + EXTRA_PROMPT,
    f"{PROMPT_TOKEN} {CLASS_TOKEN} as the terminator" + EXTRA_PROMPT,
    f"{PROMPT_TOKEN} {CLASS_TOKEN} as a (blue Na'vi)++ from the (movie Avatar)++ (photo realistic)+++, cgi, cinema"
    + EXTRA_PROMPT,
    f"{PROMPT_TOKEN} {CLASS_TOKEN} as a hobbit" + EXTRA_PROMPT,
    f"{PROMPT_TOKEN} {CLASS_TOKEN} smiling as an olympic athlete" + EXTRA_PROMPT,
    f"{PROMPT_TOKEN} {CLASS_TOKEN} holding an (olympic gold medal)+ at the podium, celebration, stadium, press"
    + EXTRA_PROMPT,
    f"{PROMPT_TOKEN} {CLASS_TOKEN} as a cowboy" + EXTRA_PROMPT,
    f"{PROMPT_TOKEN} {CLASS_TOKEN} as a sheriff riding a horse in the (wild west)++, old photo" + EXTRA_PROMPT,
    # "autumn in paris, ornate, beautiful, atmosphere, vibe, mist, smoke, fire, chimney, rain, wet, pristine, puddles, melting, dripping, snow, creek, lush, ice, bridge, forest, roses, flowers, by stanley artgerm lau, greg rutkowski, thomas kindkade, alphonse mucha, loish, norman rockwell",
    # "emma watson as nature magic celestial, top down pose, long hair, soft pink and white transparent cloth, space, D&D, shiny background, intricate, elegant, highly detailed, digital painting, artstation, concept art, smooth, sharp focus, illustration, artgerm, bouguereau",
    # "Emma Watson as a powerful mysterious sorceress, casting lightning magic, detailed clothing, digital painting, hyperrealistic, fantasy, Surrealist, full body, by Stanley Artgerm Lau and Alphonse Mucha, artstation, highly detailed, sharp focus, sci-fi, stunningly beautiful, dystopian, iridescent gold, cinematic lighting, dark",
    # "full lenght shot, super hero pose, biomechanical suit, inflateble shapes, wearing epic bionic cyborg implants, masterpiece, intricate, biopunk futuristic wardrobe, highly detailed, artstation, concept art, cyberpunk, octane render",
    # "",
    # "",
]

# NEGATIVE_PROMPT = None
NEGATIVE_PROMPT = "ugly, distorted face, out of frame, extra limbs, disfigured, deformed, body out of frame, bad anatomy, watermark, signature, cut off, low contrast, underexposed, overexposed, bad art, beginner, amateur, blurry, blurred, grainy, draft, tiling, poorly drawn hands, poorly drawn feet, poorly drawn face,"

prompts

In [None]:
import os
import datetime
from diffusers import (
    UNet2DConditionModel,
    # DiffusionPipeline,
    StableDiffusionPipeline,
    KDPM2DiscreteScheduler,
    DDIMScheduler,
    DPMSolverMultistepScheduler,
    UniPCMultistepScheduler,
)
from transformers import CLIPTextModel
import torch
from compel import Compel
from IPython.display import display

CHECKPOINTS = [
    # 200,
    # 400,
    # 600,
    # 800,
    # 1000,
    # 1200,
    # 1400,
    # 1600,
    None,
]

# HEIGHT=384
HEIGHT = 512

# WIDTH=512
WIDTH = 640
# WIDTH=768

# NUM_INFERENCE_STEPS=20
# NUM_INFERENCE_STEPS=40
# NUM_INFERENCE_STEPS=60
# NUM_INFERENCE_STEPS=70
# NUM_INFERENCE_STEPS=80
NUM_INFERENCE_STEPS = 100

# GUIDANCE_SCALE=7.5
GUIDANCE_SCALE = 10
# GUIDANCE_SCALE=15

# NUM_IMAGES_PER_PROMPT = 1
# NUM_IMAGES_PER_PROMPT=2
# NUM_IMAGES_PER_PROMPT=3
# NUM_IMAGES_PER_PROMPT=4
NUM_IMAGES_PER_PROMPT = 6

# Load the pipeline with the same arguments (model, revision) that were used for training
model_id = MODEL_PATH

print(f"Model = {model_id}")

# loop over checkpoints and final model
for checkpoint in CHECKPOINTS:
    print(f"Checkpoint = {checkpoint}")

    # if is checkpoint
    if checkpoint:
        image_dir = OUTPUT_DIR + "/test_output_images" + "/checkpoint-" + str(checkpoint)

        unet = UNet2DConditionModel.from_pretrained(OUTPUT_DIR + "/checkpoint-" + str(checkpoint) + "/unet")

        text_encoder = CLIPTextModel.from_pretrained(OUTPUT_DIR + "/checkpoint-" + str(checkpoint) + "/text_encoder")

        pipeline = StableDiffusionPipeline.from_pretrained(
            model_id, unet=unet, text_encoder=text_encoder, torch_dtype=torch.float32
        )

    # if is final model
    else:
        model_id = OUTPUT_DIR

        print(f"Model = {model_id}")

        image_dir = OUTPUT_DIR + "/test_output_images"

        # pipeline = StableDiffusionPipeline.from_pretrained(model_id)
        pipeline = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float32)

    pipeline.to("cuda")
    # pipeline = pipeline.to(accelerator.device)
    # pipeline.set_progress_bar_config(disable=True)

    # set scheduler
    # pipeline.scheduler = KDPM2DiscreteScheduler.from_config(pipeline.scheduler.config)
    # pipeline.scheduler = DDIMScheduler.from_config(pipeline.scheduler.config)
    # pipeline.scheduler = DPMSolverMultistepScheduler.from_config(pipeline.scheduler.config, use_karras_sigmas=True)
    pipeline.scheduler = UniPCMultistepScheduler.from_config(pipeline.scheduler.config)

    # prompt weighting
    compel_proc = Compel(tokenizer=pipeline.tokenizer, text_encoder=pipeline.text_encoder)

    # create output dir
    if image_dir is not None:
        os.makedirs(image_dir, exist_ok=True)

    # Perform inference, or save, or push to the hub

    for prompt in prompts:
        print(f"Prompt = {prompt}")

        # create promp embeding
        prompt_embeds = compel_proc(prompt)

        images = pipeline(
            # prompt,
            prompt_embeds=prompt_embeds,
            negative_prompt=NEGATIVE_PROMPT,
            num_inference_steps=NUM_INFERENCE_STEPS,
            guidance_scale=GUIDANCE_SCALE,
            height=HEIGHT,
            width=WIDTH,
            num_images_per_prompt=NUM_IMAGES_PER_PROMPT,
        ).images

        for img in images:
            now = datetime.datetime.isoformat(datetime.datetime.today())

            # display(img)

            img.save(image_dir + "/" + now + ".png")

        # pipeline.save_pretrained("dreambooth-pipeline")

In [None]:
import torch
from diffusers import DiffusionPipeline

pipe = DiffusionPipeline.from_pretrained(
    "runwayml/stable-diffusion-v1-5",
    torch_dtype=torch.float16,
)
pipe = pipe.to("cuda")

prompt = "a photo of an astronaut riding a horse on mars"
image = pipe(prompt).images[0]

In [None]:
ls

In [None]:
%reset?????

In [None]:
# @title Free runtime memory
exit()?????

# OLD


## Gradio


In [None]:
# @markdown Run Gradio UI for generating images.
import gradio as gr


def inference(
    prompt,
    negative_prompt,
    num_samples,
    height=512,
    width=512,
    num_inference_steps=50,
    guidance_scale=7.5,
):
    with torch.autocast("cuda"), torch.inference_mode():
        return pipe(
            prompt,
            height=int(height),
            width=int(width),
            negative_prompt=negative_prompt,
            num_images_per_prompt=int(num_samples),
            num_inference_steps=int(num_inference_steps),
            guidance_scale=guidance_scale,
            generator=g_cuda,
        ).images


with gr.Blocks() as demo:
    with gr.Row():
        with gr.Column():
            prompt = gr.Textbox(label="Prompt", value="photo of zwx dog in a bucket")
            negative_prompt = gr.Textbox(label="Negative Prompt", value="")
            run = gr.Button(value="Generate")
            with gr.Row():
                num_samples = gr.Number(label="Number of Samples", value=4)
                guidance_scale = gr.Number(label="Guidance Scale", value=7.5)
            with gr.Row():
                height = gr.Number(label="Height", value=512)
                width = gr.Number(label="Width", value=512)
            num_inference_steps = gr.Slider(label="Steps", value=24)
        with gr.Column():
            gallery = gr.Gallery()

    run.click(
        inference,
        inputs=[
            prompt,
            negative_prompt,
            num_samples,
            height,
            width,
            num_inference_steps,
            guidance_scale,
        ],
        outputs=gallery,
    )

demo.launch(debug=True)