A notebook for finetuning Stable Diffusion using LORA.

Tested with [Stable Diffusion v1-5](https://huggingface.co/runwayml/stable-diffusion-v1-5).

Notebook developed by [pedrogengo](https://github.com/pedrogengo).

# SETUP

In [17]:
!pip install "git+https://github.com/cloneofsimo/lora.git"
!git clone https://github.com/cloneofsimo/lora.git
!pip install accelerate bitsandbytes

Collecting git+https://github.com/cloneofsimo/lora.git
  Cloning https://github.com/cloneofsimo/lora.git to c:\users\jiseop\appdata\local\temp\pip-req-build-6da1kwg4
  Resolved https://github.com/cloneofsimo/lora.git to commit d84074b3e3496f1cfa8a3f49b8b9972ef463b483
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Collecting diffusers>=0.11.0 (from lora_diffusion==0.1.7)
  Downloading diffusers-0.27.2-py3-none-any.whl.metadata (18 kB)
Collecting transformers>=4.25.1 (from lora_diffusion==0.1.7)
  Downloading transformers-4.39.3-py3-none-any.whl.metadata (134 kB)
     ---------------------------------------- 0.0/134.8 kB ? eta -:--:--
     ------------------------------- ------ 112.6/134.8 kB 3.3 MB/s eta 0:00:01
     -------------------------------------- 134.8/134.8 kB 4.0 MB/s eta 0:00:00
Collecting scipy (from lora_diffusion==0.1.7)
  Downloading scipy-1.13.0-cp310-cp310-win_amd64.whl.metadata (60 kB)
     -----------------------

  Running command git clone --filter=blob:none --quiet https://github.com/cloneofsimo/lora.git 'C:\Users\jiseop\AppData\Local\Temp\pip-req-build-6da1kwg4'


# TRAINING

In [22]:
import os
import shutil
from tqdm import tqdm

PRETRAINED_MODEL = "runwayml/stable-diffusion-v1-5"  # @param{type: 'string'}
PROMPT = "ktn"  # @param{type: 'string'}
OUTPUT_DIR = ""  # @param{type: 'string'}
#IMAGES_FOLDER_OPTIONAL = "C:\\Users\\jiseop\\Downloads\\testlora\\data\\lena.png"  # @param{type: 'string'}
RESOLUTION = "512"  # @param ["512", "576", "640", "704", "768", "832", "896", "960", "1024"]
RESOLUTION = int(RESOLUTION)

if PRETRAINED_MODEL == "":
    print('\033[1;31mYou should define the pretrained model.')
else:
    DATA_DIR = os.path.join(os.getcwd(),  "data")
    if not os.path.exists(str(DATA_DIR)):
        os.makedirs(DATA_DIR)
    image_files = [f for f in os.listdir(DATA_DIR) if f.endswith(('.png', '.jpg', '.jpeg'))]   

    if OUTPUT_DIR == "":
        OUTPUT_DIR = "output"
    if not os.path.exists(str(OUTPUT_DIR)):
        os.makedirs(OUTPUT_DIR)

In [20]:
import lora_diffusion

  from .autonotebook import tqdm as notebook_tqdm


In [25]:
STEPS = 30 #@param {type:"slider", min:0, max:10000, step:10}
BATCH_SIZE = 128 #@param {type:"slider", min:0, max:128, step:1}
FP_16 = True #@param {type:"boolean"}

#@markdown ----
#@markdown UNET PARAMS
LEARNING_RATE = 3e-4 #@param {type:"number"}

#@markdown ----
TRAIN_TEXT_ENCODER = True #@param {type:"boolean"}
#@markdown TEXT ENCODER PARAMS
LEARNING_RATE_TEXT_ENCODER = 1e-5 #@param {type:"number"}

NEW_LEARNING_RATE = LEARNING_RATE / BATCH_SIZE
NEW_LEARNING_RATE_TEXT_ENCODER = LEARNING_RATE_TEXT_ENCODER / BATCH_SIZE

if FP_16:
  fp_16_arg = "fp16"
else:
  fp_16_arg = "no"

if TRAIN_TEXT_ENCODER:
  command = (f'accelerate launch lora/training_scripts/train_lora_dreambooth.py '
             f'--pretrained_model_name_or_path="{PRETRAINED_MODEL}" '
             f'--instance_data_dir="{DATA_DIR}" '
             f'--output_dir="{OUTPUT_DIR}" '
             f'--instance_prompt="{PROMPT}" '
             f'--resolution=512 '
             f'--use_8bit_adam '
             f'--mixed_precision="{fp_16_arg}" '
             f'--train_batch_size=1 '
             f'--gradient_accumulation_steps=1 '
             f'--learning_rate={NEW_LEARNING_RATE} '
             f'--lr_scheduler="constant" '
             f'--lr_warmup_steps=0 '
             f'--max_train_steps={STEPS} '
             f'--train_text_encoder '
             f'--lora_rank=16 '
             f'--learning_rate_text={NEW_LEARNING_RATE_TEXT_ENCODER}')
else:
  command = (f'accelerate launch lora/training_scripts/train_lora_dreambooth.py '
             f'--pretrained_model_name_or_path="{PRETRAINED_MODEL}" '
             f'--instance_data_dir="{DATA_DIR}" '
             f'--output_dir="{OUTPUT_DIR}" '
             f'--instance_prompt="{PROMPT}" '
             f'--resolution=512 '
             f'--use_8bit_adam '
             f'--mixed_precision="{fp_16_arg}" '
             f'--train_batch_size=1 '
             f'--gradient_accumulation_steps=1 '
             f'--learning_rate={NEW_LEARNING_RATE} '
             f'--lr_scheduler="constant" '
             f'--lr_warmup_steps=0 '
             f'--lora_rank=16 '
             f'--max_train_steps={STEPS} '
             f'--learning_rate_text={NEW_LEARNING_RATE_TEXT_ENCODER}')
!rm -rf $INSTANCE_DIR/.ipynb_checkpoints
!{command}

# INFERENCE

In [None]:
#@title LOADING MODEL AND MONKEY PATCHING IT
import torch
from lora_diffusion import monkeypatch_or_replace_lora, tune_lora_scale
from diffusers import StableDiffusionPipeline


pipe = StableDiffusionPipeline.from_pretrained(PRETRAINED_MODEL, torch_dtype=torch.float16).to("cuda")
monkeypatch_or_replace_lora(pipe.unet, torch.load(os.path.join(OUTPUT_DIR, "lora_weight.pt")))
monkeypatch_or_replace_lora(pipe.text_encoder, torch.load(os.path.join(OUTPUT_DIR, "lora_weight.text_encoder.pt")), target_replace_module=["CLIPAttention"])

In [None]:
pipe.safety_checker = None

In [None]:
INFERENCE_PROMPT = 'heart' #@param {type:"string"}
LORA_SCALE_UNET = 0.1 #@param {type:"number"}
LORA_SCALE_TEXT_ENCODER = 0.1 #@param {type:"number"}
GUIDANCE = 1.4 #@param {type:"slider", min:0, max:15, step:0.2}
tune_lora_scale(pipe.unet, LORA_SCALE_UNET)
if TRAIN_TEXT_ENCODER:
  tune_lora_scale(pipe.text_encoder, LORA_SCALE_TEXT_ENCODER)
image = pipe(INFERENCE_PROMPT, num_inference_steps=50, guidance_scale=GUIDANCE).images[0]
image