In [9]:
# !pip install toml==0.10.2
# !pip install einops==0.8.0
# !pip install imagesize==1.4.1
# !pip install voluptuous==0.15.2
# !pip install xformers==0.0.27.post2
# !pip install wandb==0.17.8
# !pip install transformers==4.44.2
# !pip install diffusers==0.30.2

In [13]:
import os
import sys
import pandas as pd
import numpy as np
import torch
from time import sleep
from tqdm import tqdm
from PIL import Image
import seaborn
import plotly.express as px

import yaml
from glob import glob

from joblib import Parallel, delayed

import os

project_path = "/home/ilya/isic" #TODO: replace

os.environ["WANDB_PROJECT"] = "sd-finetune"
os.environ["TRANSFORMERS_CACHE"] = os.path.join('tmp_cache')
os.environ["HF_HOME"] = os.path.join(project_path, 'tmp_cache')
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

In [11]:
seed_name = 33
base_name_tmp = f'syntetic_custom_base_{seed_name}'
fold_number = '1'

In [None]:
dataset_path = f"../data/artifacts/{base_name_tmp}/{fold_number}"
os.makedirs(dataset_path, exist_ok=True)

In [None]:
all_images = glob(
    f"../data/artifacts/{base_name_tmp}/{fold_number}/**.png")

for fn in all_images:
    with open(fn.replace('.png', '.caption'), 'w') as f:
        f.write('a photo of a malignant mole')

all_caps = glob(
    f"../data/artifacts/{base_name_tmp}/{fold_number}/**.caption")

In [None]:
dataset_path = os.path.abspath(dataset_path)
device = 'cuda:0'

In [None]:
%cd ../kohya_ss/sd-scripts

In [None]:
meta_Folder_name = f'../sd_datasets/small_test/{base_name_tmp}/{fold_number}'
os.makedirs(meta_Folder_name, exist_ok=True)

In [None]:
!python3 finetune/merge_captions_to_metadata.py {dataset_path} \
    {meta_Folder_name}/meta_cap_v1.json

In [None]:
import json
with open(f"{meta_Folder_name}/meta_cap_v1.json", "r") as f:
    meta_lat = json.load(f)

In [None]:
!python3 finetune/prepare_buckets_latents.py \
     {dataset_path} \
    {meta_Folder_name}/meta_cap_v1.json \
    {meta_Folder_name}/meta_lat.json \
    /home/ilya/ThisIsReal \
    --batch_size 16 --max_resolution 128,128 --mixed_precision no --max_bucket_reso 128 \
    --min_bucket_reso 128 --bucket_reso_steps 64  --bucket_no_upscale

In [None]:
prompts_to_sample = [
  "a photo of a malignant mole"
]

with open("prompts_to_check.txt", "w") as f:
  for ind, pr in enumerate(prompts_to_sample):
    f.write(f"{pr} " \
    "--n lowres, bad anatomy, bad hands, text, error, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality, normal quality, jpeg artifacts, signature, watermark, username, blurry " \
    "--w 128 --h 128 --d 555 --l 7 --s 35 \n")

In [None]:
prompts_to_check = os.path.join(project_path, "kohya_ss/sd-scripts/prompts_to_check.txt")
finetunes_path = os.path.join(project_path, "/home/ilya/finetunes/mole")

finetune_name = f"{base_name_tmp}___{fold_number}"
finetune_full_path = os.path.join(finetunes_path, finetune_name)
os.makedirs(finetune_full_path, exist_ok=True)

In [None]:
import json
with open(f"{meta_Folder_name}/meta_lat.json", "r") as f:
    meta_lat = json.load(f)

In [None]:
finetune_logs_outputs = "logs"
os.makedirs(finetune_logs_outputs, exist_ok=True)

In [None]:
base_model_path = os.path.join(project_path, "ThisIsReal")
wandb_api_key = '...' ###your api key

In [None]:
!accelerate launch --num_cpu_threads_per_process 4 fine_tune.py \
     --pretrained_model_name_or_path={base_model_path} \
     --in_json={meta_Folder_name}/meta_lat.json \
     --train_data_dir={dataset_path} \
     --output_dir={finetune_full_path} \
     --train_batch_size=8 \
     --dataset_repeats=1 \
     --resolution=128,128 \
     --enable_bucket \
     --keep_tokens=77 \
     --min_bucket_reso=128 \
     --max_bucket_reso=128 \
     --sample_every_n_steps=80 \
     --sample_sampler=k_dpm_2_a \
     --sample_prompts={prompts_to_check}\
     --learning_rate=3e-5 \
     --max_grad_norm=1\
     --learning_rate_te=1e-6 \
     --train_text_encoder \
     --max_train_epochs=50 \
     --lr_warmup_steps=200 \
     --learning_rate_te=5e-6 \
     --train_text_encoder \
     --save_every_n_epochs=5 \
     --noise_offset=0.05\
     --save_model_as=diffusers \
     --mixed_precision=fp16 \
     --save_precision=fp16 \
     --wandb_run_name={finetune_name} \
     --optimizer_type=AdamW \
     --gradient_checkpointing \
     --min_snr_gamma=5 \
     --lr_scheduler=constant_with_warmup \
     --logging_dir={finetune_logs_outputs} \
     --gradient_accumulation_steps=1 \
     --xformers \
     --log_with=all \
     --seed=222 \
     --wandb_api_key={wandb_api_key}

In [18]:
kaggle_dataset_path = finetunes_path

In [19]:
!kaggle datasets init -p {kaggle_dataset_path}

Data package template written to: /home/jovyan/work/finetunes/mole/dataset-metadata.json


In [21]:
import json 

with open(os.path.join(kaggle_dataset_path, "dataset-metadata.json"), "r") as f:
    f_r = json.load(f)

f_r['title'] = "ISIC-2024-synthetic-models"
f_r['id'] = "ilya9711nov/ISIC-2024-synthetic-models"

with open(os.path.join(kaggle_dataset_path, "dataset-metadata.json"), "w") as f:
    json.dump(f_r, f)

In [22]:
!kaggle datasets create -p {kaggle_dataset_path} --dir-mode zip

Starting upload for file syntetic_custom_base_1997___1.zip
 47%|█████████████████▋                    | 16.3G/34.9G [05:45<04:50, 69.0MB/s]

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



Starting upload for file syntetic_custom_base_1997___3.zip
 69%|██████████████████████████            | 24.0G/34.9G [05:19<02:00, 97.4MB/s]

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



Starting upload for file syntetic_custom_base_42___1.zip
 65%|████████████████████████▊             | 22.8G/34.9G [05:28<03:56, 55.0MB/s]

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



Starting upload for file syntetic_custom_base_42___4.zip
 60%|██████████████████████▋               | 20.8G/34.9G [05:26<04:41, 53.8MB/s]

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



Starting upload for file syntetic_custom_base_42___2.zip
 82%|████████████████████████████████       | 28.7G/34.9G [05:19<01:00, 111MB/s]

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)



 78%|█████████████████████████████▌        | 27.2G/34.9G [05:35<01:37, 84.7MB/s]Starting upload for file syntetic_custom_base_42___3.zip
 29%|███████████                           | 10.2G/34.9G [06:02<21:40, 20.4MB/s]

IOPub message rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_msg_rate_limit`.

Current values:
ServerApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
ServerApp.rate_limit_window=3.0 (secs)

