In [1]:
import os
import shutil
import glob
import torch
from safetensors import safe_open
from safetensors.torch import save_file

SEED = 0
SPLIT = "light_dark_seed_to_dark"

In [2]:
### Merge all the learned embeddings into a single file

OUTPUT_DIR_BASE = f"/textual_inversion_weights/{SPLIT}/SEED={SEED}"
path = f"{OUTPUT_DIR_BASE}/*/learned_embeds.safetensors"
merged_dict = dict()
for file in glob.glob(path):
    tensors = {}
    with safe_open(file, framework="pt", device="cpu") as f:
        for key in f.keys():
            tensors[key] = f.get_tensor(key)
        merged_dict.update(tensors)

target_path = f"{OUTPUT_DIR_BASE}/aggregated_embeds_SEED={SEED}.pt"
os.makedirs(os.path.dirname(target_path), exist_ok=True)
torch.save(merged_dict, target_path)
print(merged_dict)


In [4]:
import json
import os

# prompt_mapper = [
#     "<basal_cell_carcinoma>",
#     "<folliculitis>",
#     "<nematode_infection>",
#     "<neutrophilic_dermatoses>",
#     "<prurigo_nodularis>",
#     "<psoriasis>",
#     "<squamous_cell_carcinoma>",
# ]

prompt_mapper = [
    "<bas-class>",
    "<fol-class>",
    "<nem-class>",
    "<neu-class>",
    "<pru-class>",
    "<pso-class>",
    "<squ-class>",
]

diseases_name = [
    "basal cell carcinoma",
    "folliculitis",
    "nematode infection",
    "neutrophilic dermatoses",
    "prurigo nodularis",
    "psoriasis",
    "squamous cell carcinoma",
]

# class_mapper = {
#     "basal cell carcinoma":     0,
#     "folliculitis":             1,
#     "nematode infection":       2,
#     "neutrophilic dermatoses":  3,
#     "prurigo nodularis":        4,
#     "psoriasis":                5,
#     "squamous cell carcinoma":  6,
# }

skin_types = {
    1: 'a very light-skinned',
    2: 'a light-skinned',
    5: 'a dark-skinned',
    6: 'a very dark-skinned',
}

### if you want to use images of light skin only for training, comment out the last two parts in the concept list. 
def make_concepts_list(d_prompt, disease): 
    concepts_list = [
        {
            "instance_prompt":      f"An image of {d_prompt} on the skin of a very light-skinned individual",
            "instance_data_dir":    f"data/data_seed={SEED}/1/{disease}",
            "class_prompt":         "An image of the skin of a very light-skinned individual",
            "class_data_dir":       f"data/very_light_skin/",
            "label":                0,
        },
        {
            "instance_prompt":       f"An image of {d_prompt} on the skin of a light-skinned individual",
            "instance_data_dir":    f"data/data_seed={SEED}/2/{disease}",
            "class_prompt":         "An image of the skin of a light-skinned individual",
            "class_data_dir":       f"data/light_skin/",
            "label":                1,
    },
        {
            "instance_prompt":       f"An image of {d_prompt} on the skin of a dark-skinned individual",
            "instance_data_dir":    f"data/data_seed={SEED}/flexible/5/{disease}", 
            "class_prompt":         "An image of the skin of a dark-skinned individual",
            "class_data_dir":       f"data/dark_skin/",
            "label":                2,

    },
        {
            "instance_prompt":       f"An image of {d_prompt} on the skin of a very dark-skinned individual",
            "instance_data_dir":    f"data/data_seed={SEED}/flexible/6/{disease}",
            "class_prompt":         "An image of the skin of a very dark-skinned individual",
            "class_data_dir":       f"data/very_dark_skin/",
            "label":                3,
    },
    ]

    for c in concepts_list:
        os.makedirs(c["instance_data_dir"], exist_ok=True)

    with open(f"ti_lora_concepts_list_seed={SPLIT}_{SEED}.json", "w") as f:
        json.dump(concepts_list, f, indent=4)

In [5]:
for dtype in range(7):
    d_prompt = prompt_mapper[dtype]
    disease = diseases_name[dtype]
    print(d_prompt, disease)
    make_concepts_list(d_prompt, disease)
    concept_list = f"ti_lora_concepts_list_seed={SPLIT}_{SEED}.json"

    MODEL_NAME = "stabilityai/stable-diffusion-2-1-base" 
    OUTPUT_DIR = f"textual_inversion_weights/{SPLIT}/ti_lora_SEED={SEED}/{disease[:3]}"    
    embed_path = f"textual_inversion_weights/{SPLIT}/SEED={SEED}/aggregated_embeds_SEED={SEED}.pt"

    print(f"[*] Weights will be saved at {OUTPUT_DIR}")
    !accelerate launch train_lora.py \
        --pretrained_model_name_or_path=$MODEL_NAME \
        --output_dir=$OUTPUT_DIR \
        --revision="fp16" \
        --seed=1337 \
        --resolution=512 \
        --train_batch_size=4 \
        --sample_batch_size=4 \
        --mixed_precision="fp16" \
        --use_8bit_adam \
        --gradient_accumulation_steps=1 \
        --gradient_checkpointing \
        --learning_rate=5e-6 \
        --lr_scheduler="constant" \
        --lr_warmup_steps=0 \
        --max_train_steps=3000 \
        --checkpointing_steps=500 \
        --concepts_list=$concept_list \
        --rank=8 \
        --embed_path=$embed_path

<bas-class> basal cell carcinoma
[*] Weights will be saved at textual_inversion_weights/light_dark_seed_to_dark/ti_lora_SEED=1234/bas
03/02/2024 22:03:01 - INFO - __main__ - Distributed environment: DistributedType.NO
Num processes: 1
Process index: 0
Local process index: 0
Device: cuda

Mixed precision type: fp16

You are using a model of type clip_text_model to instantiate a model of type . This is not supported for all configurations of models and can yield errors.
{'clip_sample_range', 'thresholding', 'dynamic_thresholding_ratio', 'timestep_spacing', 'variance_type', 'sample_max_value'} was not found in config. Values will be initialized to default values.
 The Diffusers team and community would be very grateful if you could open an issue: https://github.com/huggingface/diffusers/issues/new with the title 'stabilityai/stable-diffusion-2-1-base is missing diffusion_pytorch_model.fp16.safetensors' so that the correct variant file can be added.
 The Diffusers team and community would 