# Fine-tuning with LoRA

A notebook for training Stable Diffusion models using Low-rank Adaptation (LoRA) approaches.





# Install dependencies (takes about 1 minute)

In [None]:
%%capture
!cd /content/
!git clone https://github.com/amansyayf/Dreambooth_LoRA
!pip install -r "Dreambooth_LoRA/requirements.txt"
!pip install -U --pre triton
!pip install torchinfo

!git clone https://github.com/brian6091/lora --branch v0.0.5 --single-branch
!python -m pip install /content/lora/

In [None]:
#@title xformers
#%%capture

!nvidia-smi -L
!pip install xformers==0.0.16rc425

GPU 0: Tesla T4 (UUID: GPU-6fcc4399-08ff-0b6e-582e-ec47767e58f2)
Collecting xformers==0.0.16rc425
  Downloading xformers-0.0.16rc425-cp310-cp310-manylinux2014_x86_64.whl (50.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.9/50.9 MB[0m [31m9.5 MB/s[0m eta [36m0:00:00[0m
Collecting pyre-extensions==0.0.23 (from xformers==0.0.16rc425)
  Downloading pyre_extensions-0.0.23-py3-none-any.whl (11 kB)
Collecting torch==1.13.1 (from xformers==0.0.16rc425)
  Downloading torch-1.13.1-cp310-cp310-manylinux1_x86_64.whl (887.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m887.5/887.5 MB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting typing-inspect (from pyre-extensions==0.0.23->xformers==0.0.16rc425)
  Downloading typing_inspect-0.9.0-py3-none-any.whl (8.8 kB)
Collecting nvidia-cuda-runtime-cu11==11.7.99 (from torch==1.13.1->xformers==0.0.16rc425)
  Downloading nvidia_cuda_runtime_cu11-11.7.99-py3-none-manylinux1_x86_64.whl (849 kB)
[2K

# Model to train

In [None]:
#@title ## Name or path to initial model
#@markdown Obligatory (e.g., runwayml/stable-diffusion-v1-5, stabilityai/stable-diffusion-2-base, or full path to model in diffusers format)
MODEL_NAME_OR_PATH = "runwayml/stable-diffusion-v1-5" #@param {type:"string"}



In [None]:
#@title ## Mount Google Drive
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


# Set up experiment parameters

In [None]:
#@title ## Training parameters

import os
from IPython.display import Markdown as md

#@markdown Unique token for specific subject
INSTANCE_TOKEN= "sks" #@param{type: 'string'}

#@markdown Use image captions? Captions can be either the image filename, or a separate text file (that must be named identically to the image but w/ extension .txt). If a separate .txt file exists, filename is ignored.
USE_IMAGE_CAPTIONS = False #@param {type:"boolean"}
USE_IMAGE_CAPTIONS_FLAG = ""
if USE_IMAGE_CAPTIONS:
  USE_IMAGE_CAPTIONS_FLAG='--use_image_captions'

#@markdown Path to instance images. Filenames are irrelevant, unless images are captioned *and* captions are not separate textfiles, in which case INSTANCE_TOKEN should appear in relevant filenames as part of the caption.
INSTANCE_DIR="/content/gdrive/MyDrive/InstanceImages" #@param{type: 'string'}

RESOLUTION = 512 #@param{type: 'number'}

TRAIN_BATCH_SIZE = 1 #@param{type: 'number'}

GRADIENT_ACCUMULATION_STEPS = 1  #@param{type: 'number'}

GRADIENT_CHECKPOINTING = True #@param {type:"boolean"}
GRADIENT_CHECKPOINTING_FLAG=""
if GRADIENT_CHECKPOINTING:
  GRADIENT_CHECKPOINTING_FLAG='--gradient_checkpointing'

ENABLE_PRIOR_PRESERVATION = True #@param {type:"boolean"}
ENABLE_PRIOR_PRESERVATION_FLAG=""
if ENABLE_PRIOR_PRESERVATION:
  ENABLE_PRIOR_PRESERVATION_FLAG='--with_prior_preservation'

#@markdown Prior loss weight. Note that if you set this to 0, but enable prior preservation and provide a CLASS_DIR, you can still monitor class loss.
PRIOR_LOSS_WEIGHT = 1.0 #@param {type:"number"}

#@markdown If using prior preservation, specify a path to class images
CLASS_DIR="/content/gdrive/MyDrive/RegularizationImages" #@param{type: 'string'}
if (CLASS_DIR !="") and os.path.exists(str(CLASS_DIR)):
  CLASS_DIR=CLASS_DIR
elif (CLASS_DIR !="") and not os.path.exists(str(CLASS_DIR)):
  CLASS_DIR=input('[1;31mThe folder specified does not exist, use the colab file explorer to copy the path :')

#@markdown Prompt for prior preservation class (e.g., 'person', 'a photo of a man', 'dog'). Ignored if USE_IMAGE_CAPTIONS checked.
CLASS_PROMPT="penguin" #@param {type:"string"}
#@markdown Instance prompt, {SKS} will be automatically replaced by INSTANCE_TOKEN defined above.  Ignored if USE_IMAGE_CAPTIONS checked.
INSTANCE_PROMPT="{SKS} penguin" #@param {type:"string"}
INSTANCE_PROMPT=INSTANCE_PROMPT.replace("{SKS}",INSTANCE_TOKEN)

#@markdown Specify the number of class images used if prior preservation is enabled. If there are not enough images in CLASS_DIR (or CLASS_DIR is empty), additional images will be generated.
MIN_NUM_CLASS_IMAGES=100 #@param{type: 'number'}

#@markdown Batch size for generating class images
SAMPLE_BATCH_SIZE = 1 #@param{type: 'number'}

#@markdown Number of training iterations, e.g., # instance images * 100
STEPS = 2500 #@param{type: 'number'}

#@markdown Random number generator seed
SEED = 2000000 #@param{type: 'number'}

#@markdown Enable text encoder training?
TRAIN_TEXT_ENCODER = True #@param{type: 'boolean'}
TRAIN_TEXT_ENCODER_FLAG=""
if TRAIN_TEXT_ENCODER:
  TRAIN_TEXT_ENCODER_FLAG="--train_text_encoder"

#@markdown ## ADAM optimizer settings



#@markdown The exponential decay rate for the 1st moment estimates (the beta1 parameter for the Adam optimizer).
ADAM_BETA1 = 0.9 #@param {type:"number"}

#@markdown The exponential decay rate for the 2nd moment estimates (the beta2 parameter for the Adam optimizer).
ADAM_BETA2 = 0.999 #@param {type:"number"}

#@markdown Weight decay magnitude for the Adam optimizer.
ADAM_WEIGHT_DECAY = 1e-2 #@param {type:"number"}

#@markdown Epsilon value for the Adam optimizer.
ADAM_EPSILON = 1e-08 #@param {type:"number"}

#@markdown "fp16", "bf16", or "no" according to available VRAM
MIXED_PRECISION = "fp16" #@param{type: 'string'}

#@markdown ## Learning rate parameters
LR_SCHEDULE = "constant" #@param ["linear", "cosine", "cosine_with_restarts", "polynomial", "constant", "constant_with_warmup"]
LR = 1e-4 #@param{type: 'number'}
#@markdown If training the text encoder, a different learning rate can be applied
LR_TEXT_ENCODER = 5e-5 #@param{type: 'number'}
LR_WARMUP_STEPS = 50 #@param{type: 'number'}
#@markdown Applies only for cosine_with_restarts schedule
LR_COSINE_NUM_CYCLES = 5 #@param{type: 'number'}

In [None]:
# #@title # (Experimental) [Data augmentation](https://journalofbigdata.springeropen.com/articles/10.1186/s40537-019-0197-0/)
# #@markdown Transformations to apply to images (both instance and class).
# #@markdown I find this useful to minimize the work of cropping and manually preparing images.
# #@markdown This may be useful for certain applications, such as training a style, where there may not be a specific subject in each image.
# #@markdown In this case, I don't crop images, and I enable random cropping, which presents to the network a randomly cropped (RESOLUTION X RESOLUTION) chunk of the original image selected for that iteration.
# #@markdown AUGMENT_MIN_RESOLUTION allows you to adjust how much of the image you will crop. So if you are training for RESOLUTION=512, setting AUGMENT_MIN_RESOLUTION will give you two crops (on average) for the shortest image dimension.



#@markdown If not enabled, defaults to center crop (which will do nothing if your images are already square at the RESOLUTION set above).
AUGMENT_RANDOM_CROP = False #@param{type: 'boolean'}
AUGMENT_CENTER_CROP_FLAG="--augment_center_crop"
if AUGMENT_RANDOM_CROP:
  AUGMENT_CENTER_CROP_FLAG=""

#@markdown Randomly flip image horizontally. Not recommended if asymmetry is important (e.g., faces).
AUGMENT_HFLIP = False #@param{type: 'boolean'}
AUGMENT_HFLIP_FLAG=""
if AUGMENT_HFLIP:
  AUGMENT_HFLIP_FLAG="--augment_hflip"

In [None]:

#@title # (Experimental) other training parameters



#@markdown Rank of LoRA update matrix
LORA_RANK = 4 #@param{type: 'number'}


#@markdown ## Exponentially-weight moving average weights (unet only). Will not run on Tesla T4 (out of memory).
USE_EMA = False #@param{type: 'boolean'}
USE_EMA_FLAG=""
if USE_EMA:
  USE_EMA_FLAG="--use_ema"
EMA_INV_GAMMA = 1.0 #@param{type: 'number'}
EMA_POWER = 0.75 #@param{type: 'number'}
EMA_MIN_VALUE = 0 #@param{type: 'number'}
EMA_MAX_VALUE = 0.9999 #@param{type: 'number'}

In [None]:
#@title # Where should outputs get saved?

#@markdown Trained models (and intermediates) saved here
OUTPUT_DIR="/content/gdrive/MyDrive/experiment" #@param{type: 'string'}

#@markdown Training logs saved here
LOGGING_DIR="/content/logs/" #@param{type: 'string'}

if not os.path.exists(LOGGING_DIR):
  !mkdir -p "$LOGGING_DIR"

LOG_GPU = True #@param{type: 'boolean'}
if LOG_GPU:
  LOG_GPU_FLAG="--log_gpu"
else:
  LOG_GPU_FLAG=""


In [None]:
#@title # Setup saving of intermediate models
#@markdown To save intermediate checkpoints, set START_SAVING_FROM_STEP < STEPS

#@markdown Number of steps between intermediate saves
SAVE_CHECKPOINT_EVERY = 500 #@param{type: 'number'}
if SAVE_CHECKPOINT_EVERY==None:
  SAVE_CHECKPOINT_EVERY = STEPS+1

START_SAVING_FROM_STEP=500 #@param{type: 'number'}
if START_SAVING_FROM_STEP==None:
  START_SAVING_FROM_STEP=STEPS

#@markdown At each intermediate checkpoint, infer this many samples using SAVE_SAMPLE_PROMPT
N_SAVE_SAMPLES=3 #@param{type: 'number'}

#@markdown {SKS} is automatically replaced by INSTANCE_TOKEN. Give multiple prompts using // as a separator
SAVE_SAMPLE_PROMPT= "{SKS} penguin // close-up {SKS} penguin// {SKS} penguin riding a bicycle" #@param{type: 'string'}
if SAVE_SAMPLE_PROMPT=="":
  SAVE_SAMPLE_PROMPT=None
else:
  SAVE_SAMPLE_PROMPT=SAVE_SAMPLE_PROMPT.replace("{SKS}",INSTANCE_TOKEN)

#@markdown The negative prompt, on the other hand, applies to all SAVE_SAMPLE_PROMPTs
SAVE_SAMPLE_NEGATIVE_PROMPT="" #@param{type: 'string'}

# Train!

In [None]:
#@title ## Launch training
!lsb_release -a | grep Description
!pip freeze | grep diffusers
!pip freeze | grep lora-diffusion
!pip freeze | grep torchvision
!pip freeze | grep transformers
!pip freeze | grep xformers
!accelerate env

!accelerate launch \
    --mixed_precision=$MIXED_PRECISION \
    --num_machines=1 \
    --num_processes=1 \
    --dynamo_backend="no" \
    /content/Dreambooth_LoRA/train.py \
    --lora_rank=$LORA_RANK \
    $TRAIN_TEXT_ENCODER_FLAG \
    --pretrained_model_name_or_path=$MODEL_NAME_OR_PATH \
    --instance_data_dir="$INSTANCE_DIR" \
    --class_data_dir="$CLASS_DIR" \
    --output_dir="$OUTPUT_DIR" \
    --logging_dir="$LOGGING_DIR" \
    $LOG_GPU_FLAG \
    $ENABLE_PRIOR_PRESERVATION_FLAG \
    --prior_loss_weight=$PRIOR_LOSS_WEIGHT \
    --instance_prompt="$INSTANCE_PROMPT" \
    --class_prompt="$CLASS_PROMPT" \
    --seed=$SEED \
    --resolution=$RESOLUTION \
    --train_batch_size=$TRAIN_BATCH_SIZE \
    --gradient_accumulation_steps=$GRADIENT_ACCUMULATION_STEPS \
    $GRADIENT_CHECKPOINTING_FLAG \
    --mixed_precision=$MIXED_PRECISION \
    --use_8bit_adam \
    --adam_beta1=0.9 \
    --adam_beta2=0.999 \
    --adam_weight_decay=0.01 \
    --adam_epsilon=0.00000001 \
    --learning_rate=$LR \
    --learning_rate_text=$LR_TEXT_ENCODER \
    --lr_scheduler=$LR_SCHEDULE \
    --lr_warmup_steps=$LR_WARMUP_STEPS \
    --lr_cosine_num_cycles=$LR_COSINE_NUM_CYCLES \
    $USE_EMA_FLAG \
    --ema_inv_gamma=$EMA_INV_GAMMA \
    --ema_power=$EMA_POWER \
    --ema_min_value=$EMA_MIN_VALUE \
    --ema_max_value=$EMA_MAX_VALUE \
    --max_train_steps=$STEPS \
    --num_class_images=$MIN_NUM_CLASS_IMAGES \
    --sample_batch_size=$SAMPLE_BATCH_SIZE \
    --save_min_steps=$START_SAVING_FROM_STEP \
    --save_interval=$SAVE_CHECKPOINT_EVERY \
    --n_save_sample=$N_SAVE_SAMPLES \
    --save_sample_prompt="$SAVE_SAMPLE_PROMPT" \
    --save_sample_negative_prompt="$SAVE_SAMPLE_NEGATIVE_PROMPT" \
    $AUGMENT_CENTER_CROP_FLAG \
    $AUGMENT_HFLIP_FLAG

[1;30;43mВыходные данные были обрезаны до нескольких последних строк (5000).[0m



Downloading (…)ch_model.safetensors:  20% 692M/3.44G [00:39<04:30, 10.2MB/s][A[A[A[A[A[A[A[A





Downloading (…)torch_model.fp16.bin:  55% 944M/1.72G [00:51<00:44, 17.5MB/s][A[A[A[A[A[A






Downloading (…).non_ema.safetensors:  20% 692M/3.44G [00:39<03:32, 12.9MB/s][A[A[A[A[A[A[A




Downloading (…)ch_model.safetensors:  97% 325M/335M [00:21<00:00, 12.6MB/s][A[A[A[A[A







Downloading (…)ch_model.safetensors:  20% 703M/3.44G [00:39<03:20, 13.7MB/s][A[A[A[A[A[A[A[A








Downloading (…)del.fp16.safetensors:  53% 912M/1.72G [00:49<00:36, 22.2MB/s][A[A[A[A[A[A[A[A[A

Downloading (…)ch_model.non_ema.bin:  21% 724M/3.44G [00:40<02:42, 16.7MB/s][A[A




Downloading (…)ch_model.safetensors: 100% 335M/335M [00:21<00:00, 15.6MB/s][A[A[A[A[A

Downloading (…)ch_model.non_ema.bin:  21% 734M/3.44G [00:40<02:04, 21.8MB/s][A[A





Downloading (…)torch_model.

# Close Colab instance

In [None]:
from google.colab import runtime
runtime.unassign()