In [1]:
#!poetry install

In [2]:
# Update this for your data path.
instance_data_dir = "/Volumes/ml/datasets/test_datasets/single_image_dataset"
pretrained_model_name_or_path = "black-forest-labs/FLUX.1-dev"
# Your public model name after it's pushed to the hub.
hub_model_id = "simpletuner-lora"
tracker_project_name = "flux-training"

# Validation prompt
validation_prompt = "A photo-realistic image of a cat"

train_batch_size = 1
learning_rate = 1e-4

# choices: int8-quanto, fp8-quanto, no_change (mac and a100/h100 users get int4 and int2 as well)
base_model_precision = "no_change"

In [3]:
lycoris_config = {
    "algo": "lokr",
    "multiplier": 1.0,
    "linear_dim": 10000,
    "linear_alpha": 1,
    "factor": 12,
    "apply_preset": {
        "target_module": [
            "Attention",
            "FeedForward"
        ],
        "module_algo_map": {
            "Attention": {
                "factor": 12
            },
            "FeedForward": {
                "factor": 6
            }
        }
    }
}
# write to config/lycoris_config.json
import json
with open("config/lycoris_config.json", "w") as f:
    json.dump(lycoris_config, f)

In [4]:
training_config = {
    "mixed_precision":"bf16",
    "model_type":"lora",
    "pretrained_model_name_or_path":pretrained_model_name_or_path,
    "gradient_checkpointing":True,
    "cache_dir": "cache",
    "set_grads_to_none":True,
    "gradient_accumulation_steps":1,
    "resume_from_checkpoint":"latest",
    "snr_gamma":5,
    "num_train_epochs":0,
    "max_train_steps":10000,
    "metadata_update_interval":65,
    "optimizer":"adamw_bf16",
    "learning_rate":learning_rate,
    "lr_scheduler":"polynomial",
    "seed":42,
    "lr_warmup_steps":100,
    "output_dir":"output/models",
    "non_ema_revision": False,
    "aspect_bucket_rounding":2,
    "inference_scheduler_timestep_spacing":"trailing",
    "training_scheduler_timestep_spacing":"trailing",
    "report_to":"wandb",
    "lr_end":1e-8,
    "compress_disk_cache":True,
    "push_to_hub":True,
    "hub_model_id":hub_model_id,
    "push_checkpoints_to_hub":True,
    "model_family":"flux",
    "disable_benchmark":False,
    "train_batch":train_batch_size,
    "max_workers":32,
    "read_batch_size":25,
    "write_batch_size":64,
    "caption_dropout_probability":0.1,
    "torch_num_threads":8,
    "image_processing_batch_size":32,
    "vae_batch_size":4,
    "validation_prompt":validation_prompt,
    "num_validation_images":1,
    "validation_num_inference_steps":20,
    "validation_seed":42,
    "minimum_image_size":0,
    "resolution":1024,
    "validation_resolution":"1024x1024",
    "resolution_type":"pixel_area",
    "lycoris_config":"config/lycoris_config.json",
    "lora_type":"lycoris",
    "base_model_precision":base_model_precision,
    "checkpointing_steps":500,
    "checkpoints_total_limit":5,
    "validation_steps":500,
    "tracker_run_name":hub_model_id,
    "tracker_project_name":tracker_project_name,
    "validation_guidance":3.0,
    "validation_guidance_real":1.0,
    "validation_guidance_rescale":0.0,
    "validation_negative_prompt":"blurry, cropped, ugly",
}
# write to config/config.json
with open("config/config.json", "w") as f:
    json.dump(training_config, f, indent=4)

In [5]:
dataloader_config = [
    {
        "id": "my-dataset-512",
        "type": "local",
        "instance_data_dir": instance_data_dir,
        "crop": False,
        "crop_style": "random",
        "minimum_image_size": 128,
        "resolution": 512,
        "resolution_type": "pixel_area",
        "repeats": "4",
        "metadata_backend": "discovery",
        "caption_strategy": "filename",
        "cache_dir_vae": "cache/vae-512"
    },
    {
        "id": "my-dataset-1024",
        "type": "local",
        "instance_data_dir": instance_data_dir,
        "crop": False,
        "crop_style": "random",
        "minimum_image_size": 128,
        "resolution": 1024,
        "resolution_type": "pixel_area",
        "repeats": "4",
        "metadata_backend": "discovery",
        "caption_strategy": "filename",
        "cache_dir_vae": "cache/vae-1024"
    },
    {
        "id": "my-dataset-512-crop",
        "type": "local",
        "instance_data_dir": instance_data_dir,
        "crop": False,
        "crop_style": "random",
        "minimum_image_size": 128,
        "resolution": 512,
        "resolution_type": "pixel_area",
        "repeats": "4",
        "metadata_backend": "discovery",
        "caption_strategy": "filename",
        "cache_dir_vae": "cache/vae-512-crop"
    },
    {
        "id": "my-dataset-1024-crop",
        "type": "local",
        "instance_data_dir": instance_data_dir,
        "crop": False,
        "crop_style": "random",
        "minimum_image_size": 128,
        "resolution": 1024,
        "resolution_type": "pixel_area",
        "repeats": "4",
        "metadata_backend": "discovery",
        "caption_strategy": "filename",
        "cache_dir_vae": "cache/vae-1024-crop"
    },
    {
        "id": "text-embed-cache",
        "dataset_type": "text_embeds",
        "default": True,
        "type": "local",
        "cache_dir": "cache/text"
    }
]
# write to config/multidatabackend.json
import json
with open("config/multidatabackend.json", "w") as f:
    json.dump(dataloader_config, f)

In [6]:
from helpers.training.trainer import Trainer
from helpers.training.state_tracker import StateTracker
from helpers import log_format
import logging
from os import environ

logger = logging.getLogger("SimpleTuner")
logger.setLevel(environ.get("SIMPLETUNER_LOG_LEVEL", "INFO"))

  from .autonotebook import tqdm as notebook_tqdm


In [7]:
from helpers.configuration.json_file import normalize_args
import os
os.environ['CONFIG_BACKEND'] = 'cmd'
os.environ['ENV'] = 'default'
StateTracker.set_config_path('config/')
loaded_config = normalize_args(training_config)



In [8]:
try:
    trainer = Trainer(loaded_config)
except Exception as e:
    import traceback
    logger.error(f"Failed to create Trainer: {e}, {traceback.format_exc()}")
    raise e

optimizer: {'precision': 'bf16', 'default_settings': {'betas': (0.9, 0.999), 'weight_decay': 0.01, 'eps': 1e-06}, 'class': <class 'helpers.training.optimizers.adamw_bfloat16.AdamWBF16'>}


2024-08-31 20:56:05,043 [INFO] (ArgsParser) VAE Model: black-forest-labs/FLUX.1-dev
2024-08-31 20:56:05,044 [INFO] (ArgsParser) Default VAE Cache location: 
2024-08-31 20:56:05,044 [INFO] (ArgsParser) Text Cache location: cache


Model family: flux


In [9]:
try:
    trainer.configure_webhook()
    trainer.init_noise_schedule()
    trainer.init_seed()

    trainer.init_huggingface_hub()
except Exception as e:
    logger.error(f"Failed to configure Trainer: {e}")
    raise e

2024-08-31 20:56:05,554 [INFO] (helpers.training.trainer) Logged into Hugging Face Hub as 'bghira'


In [10]:
try:
    trainer.init_preprocessing_models()
except Exception as e:
    logger.error(f"Failed to initialize preprocessing models: {e}")
    raise e

2024-08-31 20:56:05,559 [INFO] (helpers.training.trainer) Load VAE: black-forest-labs/FLUX.1-dev
2024-08-31 20:56:05,843 [INFO] (helpers.training.trainer) Loading VAE onto accelerator, converting from torch.float32 to torch.bfloat16
2024-08-31 20:56:05,952 [INFO] (helpers.training.trainer) Load tokenizers
You set `add_prefix_space`. The tokenizer needs to be converted from the slow tokenizers
2024-08-31 20:56:06,440 [INFO] (helpers.training.text_encoding) Loading OpenAI CLIP-L text encoder from black-forest-labs/FLUX.1-dev/text_encoder..
2024-08-31 20:56:06,683 [INFO] (helpers.training.text_encoding) Loading T5 XXL v1.1 text encoder from black-forest-labs/FLUX.1-dev/text_encoder_2..
Downloading shards: 100%|██████████| 2/2 [00:00<00:00, 2430.07it/s]
Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00,  9.30it/s]
2024-08-31 20:56:08,494 [INFO] (helpers.training.trainer) Moving text encoder to GPU.
2024-08-31 20:56:08,736 [INFO] (helpers.training.trainer) Moving text encoder 2 t

In [11]:
try:
    trainer.init_data_backend()
except Exception as e:
    logger.error(f"Failed to initialize data backend: {e}")
    raise e

2024-08-31 20:56:14,161 [INFO] (DataBackendFactory) Loading data backend config from config/multidatabackend.json
2024-08-31 20:56:14,162 [INFO] (DataBackendFactory) Configuring text embed backend: text-embed-cache
Loading pipeline components...: 100%|██████████| 5/5 [00:00<00:00, 1132.49it/s]
2024-08-31 20:56:14,337 [INFO] (TextEmbeddingCache) (Rank: 0) (id=text-embed-cache) Listing all text embed cache entries
2024-08-31 20:56:14,339 [INFO] (DataBackendFactory) Pre-computing null embedding
2024-08-31 20:56:19,345 [INFO] (DataBackendFactory) Completed loading text embed services.
2024-08-31 20:56:19,347 [INFO] (DataBackendFactory) Configuring data backend: my-dataset-512
2024-08-31 20:56:19,351 [INFO] (DataBackendFactory) (id=my-dataset-512) Loading bucket manager.
2024-08-31 20:56:19,352 [INFO] (DiscoveryMetadataBackend) Checking for cache file: /Volumes/ml/datasets/test_datasets/single_image_dataset/aspect_ratio_bucket_indices_my-dataset-512.json
2024-08-31 20:56:19,353 [INFO] (Disc

(Rank: 0)  | Bucket     | Image Count (per-GPU)
------------------------------
(Rank: 0)  | 1.0        | 10          
(Rank: 0)  | 0.7        | 1           


2024-08-31 20:56:19,365 [INFO] (DataBackendFactory) (id=my-dataset-512) Collecting captions.
2024-08-31 20:56:19,367 [INFO] (DataBackendFactory) (id=my-dataset-512) Initialise text embed pre-computation using the filename caption strategy. We have 11 captions to process.
2024-08-31 20:56:19,368 [INFO] (DataBackendFactory) (id=my-dataset-512) Completed processing 11 captions.
2024-08-31 20:56:19,369 [INFO] (DataBackendFactory) (id=my-dataset-512) Creating VAE latent cache.
2024-08-31 20:56:19,370 [INFO] (DataBackendFactory) (id=my-dataset-512) Discovering cache objects..
2024-08-31 20:56:19,371 [INFO] (DataBackendFactory) Configured backend: {'id': 'my-dataset-512', 'config': {'repeats': '4', 'crop': False, 'crop_aspect': 'square', 'crop_style': 'random', 'disable_validation': False, 'resolution': 0.262144, 'resolution_type': 'area', 'caption_strategy': 'filename', 'instance_data_dir': '/Volumes/ml/datasets/test_datasets/single_image_dataset', 'maximum_image_size': None, 'target_downsam

(Rank: 0)  | Bucket     | Image Count (per-GPU)
------------------------------
(Rank: 0)  | 1.0        | 10          
(Rank: 0)  | 0.65       | 1           


2024-08-31 20:56:19,384 [INFO] (DataBackendFactory) (id=my-dataset-1024) Collecting captions.
2024-08-31 20:56:19,385 [INFO] (DataBackendFactory) (id=my-dataset-1024) Initialise text embed pre-computation using the filename caption strategy. We have 11 captions to process.
2024-08-31 20:56:19,386 [INFO] (DataBackendFactory) (id=my-dataset-1024) Completed processing 11 captions.
2024-08-31 20:56:19,386 [INFO] (DataBackendFactory) (id=my-dataset-1024) Creating VAE latent cache.
2024-08-31 20:56:19,387 [INFO] (DataBackendFactory) (id=my-dataset-1024) Discovering cache objects..
2024-08-31 20:56:19,389 [INFO] (DataBackendFactory) Configured backend: {'id': 'my-dataset-1024', 'config': {'repeats': '4', 'crop': False, 'crop_aspect': 'square', 'crop_style': 'random', 'disable_validation': False, 'resolution': 1.048576, 'resolution_type': 'area', 'caption_strategy': 'filename', 'instance_data_dir': '/Volumes/ml/datasets/test_datasets/single_image_dataset', 'maximum_image_size': None, 'target_d

(Rank: 0)  | Bucket     | Image Count (per-GPU)
------------------------------
(Rank: 0)  | 1.0        | 10          
(Rank: 0)  | 0.7        | 1           


2024-08-31 20:56:19,401 [INFO] (DataBackendFactory) (id=my-dataset-512-crop) Collecting captions.
2024-08-31 20:56:19,402 [INFO] (DataBackendFactory) (id=my-dataset-512-crop) Initialise text embed pre-computation using the filename caption strategy. We have 11 captions to process.
2024-08-31 20:56:19,403 [INFO] (DataBackendFactory) (id=my-dataset-512-crop) Completed processing 11 captions.
2024-08-31 20:56:19,404 [INFO] (DataBackendFactory) (id=my-dataset-512-crop) Creating VAE latent cache.
2024-08-31 20:56:19,404 [INFO] (DataBackendFactory) (id=my-dataset-512-crop) Discovering cache objects..
2024-08-31 20:56:19,406 [INFO] (DataBackendFactory) Configured backend: {'id': 'my-dataset-512-crop', 'config': {'repeats': '4', 'crop': False, 'crop_aspect': 'square', 'crop_style': 'random', 'disable_validation': False, 'resolution': 0.262144, 'resolution_type': 'area', 'caption_strategy': 'filename', 'instance_data_dir': '/Volumes/ml/datasets/test_datasets/single_image_dataset', 'maximum_imag

(Rank: 0)  | Bucket     | Image Count (per-GPU)
------------------------------
(Rank: 0)  | 1.0        | 10          
(Rank: 0)  | 0.65       | 1           


2024-08-31 20:56:19,418 [INFO] (DataBackendFactory) (id=my-dataset-1024-crop) Collecting captions.
2024-08-31 20:56:19,419 [INFO] (DataBackendFactory) (id=my-dataset-1024-crop) Initialise text embed pre-computation using the filename caption strategy. We have 11 captions to process.
2024-08-31 20:56:19,420 [INFO] (DataBackendFactory) (id=my-dataset-1024-crop) Completed processing 11 captions.
2024-08-31 20:56:19,421 [INFO] (DataBackendFactory) (id=my-dataset-1024-crop) Creating VAE latent cache.
2024-08-31 20:56:19,421 [INFO] (DataBackendFactory) (id=my-dataset-1024-crop) Discovering cache objects..
2024-08-31 20:56:19,423 [INFO] (DataBackendFactory) Configured backend: {'id': 'my-dataset-1024-crop', 'config': {'repeats': '4', 'crop': False, 'crop_aspect': 'square', 'crop_style': 'random', 'disable_validation': False, 'resolution': 1.048576, 'resolution_type': 'area', 'caption_strategy': 'filename', 'instance_data_dir': '/Volumes/ml/datasets/test_datasets/single_image_dataset', 'maximu

In [12]:
try:
    trainer.init_validation_prompts()
except Exception as e:
    logger.error(f"Failed to initialize validation prompts: {e}")
    raise e

2024-08-31 20:56:21,375 [INFO] (validation) Precomputing the negative prompt embed for validations.


In [13]:
trainer.init_unload_text_encoder()

2024-08-31 20:56:21,936 [INFO] (helpers.training.trainer) Unloading text encoders, as they are not being trained.
2024-08-31 20:56:22,832 [INFO] (helpers.training.trainer) After nuking text encoders from orbit, we freed 9.1 GB of VRAM. The real memories were the friends we trained a model on along the way.


In [14]:
trainer.init_unload_vae()

2024-08-31 20:56:22,994 [INFO] (helpers.training.trainer) After nuking the VAE from orbit, we freed 163.84 MB of VRAM.


In [15]:
trainer.init_load_base_model()

Fetching 3 files: 100%|██████████| 3/3 [00:00<00:00, 64198.53it/s]


In [16]:
trainer.init_precision()

In [17]:
trainer.init_controlnet_model()

In [18]:
trainer.init_freeze_models()

In [19]:
trainer.init_trainable_peft_adapter()

2024-08-31 20:56:24,559 [INFO] (helpers.training.trainer) Using lycoris training mode


2024-08-31 20:56:24|[LyCORIS]-INFO: Using rank adaptation algo: lokr
2024-08-31 20:56:24|[LyCORIS]-INFO: Use Dropout value: 0.0
2024-08-31 20:56:24|[LyCORIS]-INFO: Create LyCORIS Module
2024-08-31 20:56:24|[LyCORIS]-INFO: create LyCORIS: 342 modules.
2024-08-31 20:56:24|[LyCORIS]-INFO: module type table: {'LokrModule': 342}


2024-08-31 20:56:24,657 [INFO] (helpers.training.trainer) LyCORIS network has been initialized with 97,165,392 parameters


In [20]:
trainer.init_ema_model()

In [None]:
trainer.move_models(destination="accelerator")

In [21]:
trainer.init_validations()

In [22]:
trainer.init_benchmark_base_model()

In [23]:
trainer.resume_and_prepare()

2024-08-31 20:56:24,674 [INFO] (helpers.training.trainer) Learning rate: 0.0001
2024-08-31 20:56:24,676 [INFO] (helpers.training.optimizer_param) cls: <class 'helpers.training.optimizers.adamw_bfloat16.AdamWBF16'>, settings: {'betas': (0.9, 0.999), 'weight_decay': 0.01, 'eps': 1e-06}
2024-08-31 20:56:24,679 [INFO] (helpers.training.trainer) Optimizer arguments, weight_decay=0.01 eps=1e-08, extra_arguments={'lr': 0.0001, 'betas': (0.9, 0.999), 'weight_decay': 0.01, 'eps': 1e-06}


In [29]:
trainer.init_trackers()

2024-08-31 20:56:32,220 [ERROR] (wandb.jupyter) Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [None]:
trainer.train()