---

📌 **This notebook has been updated in [jhj0517/finetuning-notebooks](https://github.com/jhj0517/finetuning-notebooks) repository!**

## Version : 1.0.0
---

In [None]:
#@title #(Optional) Check GPU

#@markdown To train SDXL lora at least 12GB VRAM is recommended.
#@markdown <br>You can check your GPU setup before start.
!nvidia-smi

In [1]:
#@title #1. Install Dependencies
#@markdown This notebook is powered by https://github.com/huggingface/diffusers
!git clone https://github.com/huggingface/diffusers
%cd diffusers
!pip install -e .

# Cherry picked from https://github.com/huggingface/diffusers/blob/main/examples/text_to_image/requirements_sdxl.txt
!pip install ftfy
!pip install datasets

Cloning into 'diffusers'...
remote: Enumerating objects: 81178, done.[K
remote: Counting objects: 100% (18056/18056), done.[K
remote: Compressing objects: 100% (1376/1376), done.[K
remote: Total 81178 (delta 17404), reused 16683 (delta 16678), pack-reused 63122 (from 2)[K
Receiving objects: 100% (81178/81178), 58.26 MiB | 31.18 MiB/s, done.
Resolving deltas: 100% (59752/59752), done.
/content/diffusers
Obtaining file:///content/diffusers
  Installing build dependencies ... [?25l[?25hdone
  Checking if build backend supports build_editable ... [?25l[?25hdone
  Getting requirements to build editable ... [?25l[?25hdone
  Preparing editable metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: diffusers
  Building editable for diffusers (pyproject.toml) ... [?25l[?25hdone
  Created wheel for diffusers: filename=diffusers-0.33.0.dev0-0.editable-py3-none-any.whl size=11221 sha256=a3b83d9c82c356c764c853c7e94c279c9223527a6883ac7e1c5cd34537b7cac3
  S

In [2]:
#@title # 2. (Optional) Mount Google Drive

#@markdown It's not mandatory but it's recommended to mount to Google Drive and use the Google Drive's path for your training image dataset.

#@markdown The dataset should have following structure:

#@markdown Each image file should have a corresponding text file (`.txt`) with the same name.
#@markdown The text file contains prompts associated with the image.

#@markdown ### Example File Structure:
#@markdown ```
#@markdown your-dataset/
#@markdown ├── a (1).png         # Image file
#@markdown ├── a (1).txt         # Corresponding prompt for a (1).png
#@markdown ├── a (2).png         # Another image file
#@markdown ├── a (2).txt         # Corresponding prompt for a (2).png
#@markdown ```

from google.colab import drive
import os
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
#@title # 3. (Optional) Register Huggingface Token To Download Base Model

#@markdown If you don't have entire base model files ([stabilityai/stable-diffusion-xl-base-1.0](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/tree/main)) in the drive you need to sign in to Huggingface to download the model.

#@markdown Get your tokens from https://huggingface.co/settings/tokens, and register it in colab's seceret as **`HF_TOKEN`** and use it in any notebook. ( 'Read' permission is enough )

#@markdown To register secrets in colab, click on the key-shaped icon in the left panel and enter your **`HF_TOKEN`** like this:

#@markdown ![image](https://media.githubusercontent.com/media/jhj0517/finetuning-notebooks/master/docs/screenshots/colab_secrets.png)

import getpass
import os
from google.colab import userdata

hf_token = userdata.get('HF_TOKEN')
os.environ['HF_TOKEN'] = hf_token

print("HF_TOKEN environment variable has been set.")

HF_TOKEN environment variable has been set.


In [3]:
#@title # 4. Train with Parameters
import os
import toml
import json
import re

#@markdown ## Paths Configuration
DATASET_DIR = "/content/drive/MyDrive/finetuning-notebooks/dataset/dog" # @param {type:"string"}
OUTPUT_DIR = "/content/drive/MyDrive/finetuning-notebooks/sdxl/outputs/" # @param {type:"string"}
OUTPUT_NAME = "My-SDXL-LoRA-V2-sks-dog" # @param {type:"string"}

OUTPUT_DIR = os.path.join(OUTPUT_DIR, OUTPUT_NAME)
os.makedirs(OUTPUT_DIR, exist_ok=True)

#@markdown ## Base Model Configuration
BASE_MODEL_PATH_OR_ID = "stabilityai/stable-diffusion-xl-base-1.0" # @param {type:"string"}
BASE_VAE_PATH_OR_ID = "madebyollin/sdxl-vae-fp16-fix" # @param {type:"string"}

#@markdown ## Dataset Configuration
CAPTION_EXTENSION = ".txt"
RESOLUTION = 1024 # @param {type:"integer"}
CAPTION_COLUMN = "text" # @param {type:"string"}

#@markdown ## Training Settings
RANDOM_FLIP = True # @param {type:"boolean"}
TRAIN_BATCH_SIZE = 8 # @param {type:"integer"}
NUM_TRAIN_EPOCHS = 50 # @param {type:"integer"}
CHECKPOINTING_STEPS = 10 # @param {type:"integer"}
LEARNING_RATE = 1e-4 # @param {type:"number"}
LR_SCHEDULER = "cosine" # @param ["linear", "cosine", "cosine_with_restarts", "polynomial", "constant", "constant_with_warmup"]
LR_WARMUP_STEPS = 10 # @param {type:"integer"}
GRADIENT_ACCUMULATION_STEPS = 1 # @param {type:"integer"}
MIXED_PRECISION = "bf16" # @param ["no", "fp16", "bf16"]
SEED = 77 # @param {type:"integer"}
GRADIENT_CHECKPOINTING = True # @param {type:"boolean"}
USE_8_BIT_ADAM = True # @param {type:"boolean"}
ENABLE_XFORMERS_MEMORY_EFFICIENT_ATTENTION = True # @param {type:"boolean"}


#@markdown ## Network Settings
RANK = 8 # @param {type:"integer"}


#@markdown ## Validation Configuration
ENABLE_SAMPLE = True # @param {type:"boolean"}
VALIDATION_PROMPT = "a sks dog is looking above"  # @param {type:"string"}
NUM_VALIDATION_IMAGES = 4 # @param {type:"integer"}

# Write Command
command_parts = [
    "accelerate", "launch",
    "\"/content/diffusers/examples/text_to_image/train_text_to_image_lora_sdxl.py\"",
]

command_parts.extend([
    f"--pretrained_model_name_or_path=\"{BASE_MODEL_PATH_OR_ID}\"",
    f"--pretrained_vae_model_name_or_path=\"{BASE_VAE_PATH_OR_ID}\"",
    f"--train_data_dir=\"{DATASET_DIR}\"",
    f"--caption_column={CAPTION_COLUMN}",
    f"--resolution={RESOLUTION}",
    f"--train_batch_size={TRAIN_BATCH_SIZE}",
    f"--num_train_epochs={NUM_TRAIN_EPOCHS}",
    f"--checkpointing_steps={CHECKPOINTING_STEPS}",
    f"--learning_rate={LEARNING_RATE}",
    f"--lr_scheduler={LR_SCHEDULER}",
    f"--lr_warmup_steps={LR_WARMUP_STEPS}",
    f"--mixed_precision={MIXED_PRECISION}",
    f"--seed={SEED}",
    f"--output_dir={OUTPUT_DIR}",
    f"--validation_prompt=\"{VALIDATION_PROMPT}\"",
    f"--num_validation_images={NUM_VALIDATION_IMAGES}",
    f"--gradient_accumulation_steps={GRADIENT_ACCUMULATION_STEPS}",
    f"--rank={RANK}",

])

if RANDOM_FLIP:
    command_parts.append("--random_flip")

if ENABLE_SAMPLE:
    command_parts.append("--report_to=\"wandb\"")

if GRADIENT_CHECKPOINTING:
    command_parts.append("--gradient_checkpointing")

if USE_8_BIT_ADAM:
    command_parts.append("--use_8bit_adam")

if ENABLE_XFORMERS_MEMORY_EFFICIENT_ATTENTION:
    command_parts.append("--enable_xformers_memory_efficient_attention")

# Write metadata.jsonl for the dataset
def create_metadata_jsonl(dataset_dir, caption_extension=".txt"):
    metadata = []
    image_files = [f for f in os.listdir(dataset_dir) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]

    for image_file in image_files:
        base_name = os.path.splitext(image_file)[0]
        caption_file = f"{base_name}{caption_extension}"

        if os.path.exists(os.path.join(dataset_dir, caption_file)):
            try:
                with open(os.path.join(dataset_dir, caption_file), "r", encoding="utf-8") as f:
                    caption = f.read().strip()

                match = re.search(r"\((\d+)\)", base_name)
                if match:
                    file_number = int(match.group(1))
                    new_file_name = f"{file_number:04d}.png"
                else:
                    file_number = len(metadata) + 1
                    new_file_name = f"{file_number:04d}.png"

                metadata.append({"file_name": new_file_name, "text": caption})

                os.rename(os.path.join(dataset_dir, image_file), os.path.join(dataset_dir, new_file_name))
                os.rename(os.path.join(dataset_dir, caption_file), os.path.join(dataset_dir, f"{file_number:04d}{caption_extension}"))

            except Exception as e:
                print(f"Error processing {image_file}: {e}")
        else:
            print(f"Warning: Caption file {caption_file} not found for {image_file}")

    metadata_path = os.path.join(dataset_dir, "metadata.jsonl")
    with open(metadata_path, "w", encoding="utf-8") as outfile:
        for item in metadata:
            json.dump(item, outfile, ensure_ascii=False)
            outfile.write("\n")

create_metadata_jsonl(DATASET_DIR, CAPTION_EXTENSION)
print(f"{os.path.join(DATASET_DIR, 'metadata.jsonl')} has written.")

# Train
!accelerate config default
command = " ".join(command_parts)
print(command)
!{command}

/content/drive/MyDrive/finetuning-notebooks/dataset/dog/metadata.jsonl has written.
accelerate configuration saved at /root/.cache/huggingface/accelerate/default_config.yaml
accelerate launch "/content/diffusers/examples/text_to_image/train_text_to_image_lora_sdxl.py" --pretrained_model_name_or_path="stabilityai/stable-diffusion-xl-base-1.0" --pretrained_vae_model_name_or_path="madebyollin/sdxl-vae-fp16-fix" --train_data_dir="/content/drive/MyDrive/finetuning-notebooks/dataset/dog" --caption_column=text --resolution=1024 --train_batch_size=8 --num_train_epochs=50 --checkpointing_steps=10 --learning_rate=0.0001 --lr_scheduler=cosine --lr_warmup_steps=10 --mixed_precision=bf16 --seed=77 --output_dir=/content/drive/MyDrive/finetuning-notebooks/sdxl/outputs/My-SDXL-LoRA-V2-sks-dog --validation_prompt="a sks dog is looking above" --num_validation_images=4 --gradient_accumulation_steps=1 --rank=8 --random_flip --report_to="wandb" --gradient_checkpointing --use_8bit_adam --enable_xformers_mem