In [1]:
# @title Environment Setup
import os
import sys
if 'MPLBACKEND' in os.environ:
    del os.environ['MPLBACKEND']
    print("MPLBACKEND environment variable cleared.")

# 2. Clone the repository
!rm -rf FontDiffusion
!git clone https://github.com/dzungphieuluuky/FontDiffusion.git

MPLBACKEND environment variable cleared.
Cloning into 'FontDiffusion'...
remote: Enumerating objects: 20757, done.[K
remote: Counting objects: 100% (110/110), done.[K
remote: Compressing objects: 100% (78/78), done.[K
remote: Total 20757 (delta 64), reused 68 (delta 32), pack-reused 20647 (from 2)[K
Receiving objects: 100% (20757/20757), 278.14 MiB | 30.91 MiB/s, done.
Resolving deltas: 100% (1060/1060), done.
Updating files: 100% (138/138), done.


In [2]:
import os
import sys
from IPython import get_ipython
from typing import Optional

def configure_environment_paths():
    try:
        if "google.colab" in str(get_ipython()):
            print("‚úÖ Environment: Google Colab")
            base_data_path = "/content/"
            base_output_path = "/content/"
            environment_name = "colab"
        elif os.environ.get("KAGGLE_KERNEL_RUN_TYPE"):
            print("‚úÖ Environment: Kaggle")
            base_data_path = "/kaggle/input/"
            base_output_path = "/kaggle/working/"
            environment_name = "kaggle"
        else:
            print("‚ö†Ô∏è Environment: Local/Unknown")
            base_data_path = "./data/"
            base_output_path = "./output/"
            environment_name = "local"
    except NameError:
        print("‚ö†Ô∏è Non-interactive session. Using local paths.")
        base_data_path = "./data/"
        base_output_path = "./output/"
        environment_name = "local"
    os.makedirs(base_output_path, exist_ok=True)
    print(f"üìÇ Data Path: {base_data_path}")
    print(f"üì¶ Output Path: {base_output_path}")
    return base_data_path, base_output_path, environment_name

def load_secret(key_name: str) -> Optional[str]:
    env = ENV_NAME
    secret_value = None
    print(f"Attempting to load secret '{key_name}' from '{env}' environment...")
    try:
        if env == "colab":
            from google.colab import userdata
            secret_value = userdata.get(key_name)
        elif env == "kaggle":
            from kaggle_secrets import UserSecretsClient
            user_secrets = UserSecretsClient()
            secret_value = user_secrets.get_secret(key_name)
        else:
            secret_value = os.getenv(key_name)
        if not secret_value:
            print(f"‚ö†Ô∏è Secret '{key_name}' not found in the {env} environment.")
            return None
        print(f"‚úÖ Successfully loaded secret '{key_name}'.")
        return secret_value
    except Exception as e:
        print(f"‚ùå An error occurred while loading secret '{key_name}': {e}")
        return None

def print_system_info():
    print("\nüîß System Information")
    print(f"Python version: {sys.version.split()[0]}")
    try:
        import torch
        print(f"PyTorch version: {torch.__version__}")
        if torch.cuda.is_available():
            print(f"CUDA version: {torch.version.cuda}")
            print(f"GPU count: {torch.cuda.device_count()}")
            for i in range(torch.cuda.device_count()):
                print(f"  GPU {i}: {torch.cuda.get_device_name(i)}")
        else:
            print("CUDA not available")
    except ImportError:
        print("PyTorch not installed")
    finally:
      !nvidia-smi

INPUT_PATH, OUTPUT_PATH, ENV_NAME = configure_environment_paths()
is_kaggle = ("kaggle" in ENV_NAME)
is_colab = not is_kaggle
print_system_info()

os.environ["WANDB_API_KEY"] = wandb_key = load_secret("WANDB_API_KEY")
os.environ["HF_TOKEN"] = HF_TOKEN = load_secret('HF_TOKEN')
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"

# Now, these libraries will log in automatically
import wandb
import huggingface_hub

wandb.login()
huggingface_hub.login(token=os.environ["HF_TOKEN"])

‚úÖ Environment: Kaggle
üìÇ Data Path: /kaggle/input/
üì¶ Output Path: /kaggle/working/

üîß System Information
Python version: 3.11.13
PyTorch version: 2.6.0+cu124
CUDA version: 12.4
GPU count: 2
  GPU 0: Tesla T4
  GPU 1: Tesla T4
Sat Jan  3 07:51:52 2026       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 570.172.08             Driver Version: 570.172.08     CUDA Version: 12.8     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   39C    P8              9W /   70W |       3MiB /  15360MiB |      0%      Defaul

[34m[1mwandb[0m: Currently logged in as: [33mdungngocpham171[0m ([33mdungngocpham171-university-of-science[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin
Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


In [3]:
!uv pip install --upgrade pip
# 3. Install PyTorch 1.13
%cd {OUTPUT_PATH}
# Force reinstall torch 1.13 to match the model's training environment
# !uv pip uninstall torch torchvision
# !uv pip install torch==1.13.1+cu117 torchvision==0.14.1+cu117 --extra-index-url https://download.pytorch.org/whl/cu117
!uv pip install torch==2.9 torchvision
# 4. Install other dependencies

print("\n‚¨áÔ∏è Installing Dependencies (Manually fixed)...")
# Install xformers compatible with Torch 1.13
!uv pip install xformers==0.0.16 -q

# Install original dependencies
!uv pip install transformers==4.33.1 accelerate==0.23.0 diffusers==0.22.0
!uv pip install gradio==4.8.0 pyyaml pygame opencv-python info-nce-pytorch kornia
# -----------------------------------------------------------------
!uv pip install lpips scikit-image pytorch-fid
# !sudo apt-get update && sudo apt-get install dos2unix
!uv pip install gdown tqdm
!uv pip install wandb hf_transfer
!uv pip install --upgrade pyarrow datasets
print("\n‚úÖ Environment setup complete. You can now proceed to Block 2 (Inference).")

[2mUsing Python 3.11.13 environment at: /usr[0m
[2K[2mResolved [1m1 package[0m [2min 262ms[0m[0m                                          [0m
[2K[2mPrepared [1m1 package[0m [2min 138ms[0m[0m                                              
[2mUninstalled [1m1 package[0m [2min 254ms[0m[0m
[2K[2mInstalled [1m1 package[0m [2min 21ms[0m[0m                                 [0m
 [31m-[39m [1mpip[0m[2m==24.1.2[0m
 [32m+[39m [1mpip[0m[2m==25.3[0m
/kaggle/working
[2mUsing Python 3.11.13 environment at: /usr[0m
[2K[2mResolved [1m40 packages[0m [2min 567ms[0m[0m                                        [0m
[2K[2mPrepared [1m19 packages[0m [2min 40.35s[0m[0m                                           
[2mUninstalled [1m17 packages[0m [2min 1.94s[0m[0m
[2K[2mInstalled [1m19 packages[0m [2min 2.18s[0m[0m                              [0m
 [31m-[39m [1mnvidia-cublas-cu12[0m[2m==12.5.3.2[0m
 [32m+[39m [1mnvidia-cublas-cu12[0m

In [4]:
# KAGGLE CELL #1: Download checkpoint
if is_colab:
  !uv pip install --upgrade "huggingface-hub>=0.34.0,<1.0"
else:
  !uv pip install --upgrade "huggingface-hub==0.25.2" "protobuf<5.0.0" "numpy<2.0.0"
import os
import sys
from tqdm.auto import tqdm
from pathlib import Path
os.chdir(OUTPUT_PATH)
import os
from pathlib import Path
def download_from_hf(
    repo_id: str,
    local_dir: str = "ckpt",
    allow_patterns=None,
    force_download: bool = False,
    repo_type: str = "model"
):
    if allow_patterns is None:
        allow_patterns = ["*.safetensors", "scr*"]
    print(f"üì• Downloading checkpoint from Hugging Face Hub to '{local_dir}'...\n")
    from huggingface_hub import snapshot_download
    snapshot_download(
        repo_id=repo_id,
        local_dir=local_dir,
        repo_type=repo_type,
        allow_patterns=allow_patterns,
        force_download=force_download
    )
    print("\n‚úÖ Download complete!")
    print(f"\nüìÇ Files in {local_dir}/:")
    for file in os.listdir(local_dir):
        if file.endswith(".safetensors"):
            size = os.path.getsize(os.path.join(local_dir, file)) / (1024**2)
            print(f"  ‚úì {file} ({size:.2f} MB)")

[2mUsing Python 3.11.13 environment at: /usr[0m
[2K[2mResolved [1m14 packages[0m [2min 142ms[0m[0m                                        [0m
[2K[2mPrepared [1m4 packages[0m [2min 408ms[0m[0m                                             
[2mUninstalled [1m4 packages[0m [2min 42ms[0m[0m
[2K[2mInstalled [1m4 packages[0m [2min 25ms[0m[0m                                [0m
 [31m-[39m [1mfsspec[0m[2m==2025.10.0[0m
 [32m+[39m [1mfsspec[0m[2m==2025.12.0[0m
 [31m-[39m [1mhuggingface-hub[0m[2m==1.2.3[0m
 [32m+[39m [1mhuggingface-hub[0m[2m==0.25.2[0m
 [31m-[39m [1mnumpy[0m[2m==2.4.0[0m
 [32m+[39m [1mnumpy[0m[2m==1.26.4[0m
 [31m-[39m [1mprotobuf[0m[2m==6.33.0[0m
 [32m+[39m [1mprotobuf[0m[2m==4.25.8[0m


In [5]:
download_from_hf(
    repo_id="dzungpham/font-diffusion-weights",
    local_dir="ckpt",
    allow_patterns=["content*", "style*", "unet*", "scr*"]
)

üì• Downloading checkpoint from Hugging Face Hub to 'ckpt'...



content_encoder.pth:   0%|          | 0.00/4.77M [00:00<?, ?B/s]

content_encoder.safetensors:   0%|          | 0.00/4.76M [00:00<?, ?B/s]

scr_210000.pth:   0%|          | 0.00/284M [00:00<?, ?B/s]

style_encoder.pth:   0%|          | 0.00/82.4M [00:00<?, ?B/s]

style_encoder.safetensors:   0%|          | 0.00/82.4M [00:00<?, ?B/s]

unet.pth:   0%|          | 0.00/315M [00:00<?, ?B/s]

unet.safetensors:   0%|          | 0.00/315M [00:00<?, ?B/s]


‚úÖ Download complete!

üìÇ Files in ckpt/:
  ‚úì unet.safetensors (300.34 MB)
  ‚úì content_encoder.safetensors (4.54 MB)
  ‚úì style_encoder.safetensors (78.58 MB)


In [6]:
download_from_hf(
    repo_id="dzungpham/font-diffusion-generated-data",
    local_dir="NomTuTao",
    repo_type="dataset",
    allow_patterns=["*Nom*"]
)

üì• Downloading checkpoint from Hugging Face Hub to 'NomTuTao'...



Ds_10k_ChuNom_TuTao.txt: 0.00B [00:00, ?B/s]

Ds_300_ChuNom_TuTao.csv: 0.00B [00:00, ?B/s]

Top184_PureNomChar.xlsx:   0%|          | 0.00/15.8k [00:00<?, ?B/s]


‚úÖ Download complete!

üìÇ Files in NomTuTao/:


In [7]:
# @title Unzipping all archived files
import os
import glob
from zipfile import ZipFile

zip_file_paths = glob.glob(os.path.join(INPUT_PATH, '*.zip'))

if not zip_file_paths:
    print(f'No .zip files found in {INPUT_PATH}.')
else:
    for zip_file_path in zip_file_paths:
        if os.path.exists(zip_file_path):
            print(f'Unzipping {zip_file_path}...')
            !unzip -o {zip_file_path} -d ./
            print(f'Unzipping of {zip_file_path} complete.')
        else:
            print(f'Error: The file {zip_file_path} was not found (post-glob check).')

No .zip files found in /kaggle/input/.


In [8]:
print("Model files:")
!ls -larth {OUTPUT_PATH}/ckpt

Model files:
total 1.1G
drwxr-xr-x 3 root root 4.0K Jan  3 07:53 .cache
-rw-r--r-- 1 root root 4.6M Jan  3 07:53 content_encoder.pth
-rw-r--r-- 1 root root 4.6M Jan  3 07:53 content_encoder.safetensors
-rw-r--r-- 1 root root 272M Jan  3 07:53 scr_210000.pth
-rw-r--r-- 1 root root  79M Jan  3 07:53 style_encoder.pth
-rw-r--r-- 1 root root  79M Jan  3 07:53 style_encoder.safetensors
-rw-r--r-- 1 root root 301M Jan  3 07:53 unet.pth
-rw-r--r-- 1 root root 301M Jan  3 07:53 unet.safetensors
drwxr-xr-x 3 root root 4.0K Jan  3 07:53 .
drwxr-xr-x 6 root root 4.0K Jan  3 07:53 ..


In [9]:
# @title Exporting train original data from HF
%cd {OUTPUT_PATH}
HF_USERNAME = "dzungpham"
# Train Split
!python FontDiffusion/export_hf_dataset_to_disk.py \
  --output-dir "my_dataset/train_original" \
  --repo-id {HF_USERNAME}/font-diffusion-generated-data \
  --split "train_original" \
  --token HF_TOKEN

/kaggle/working
2026-01-03 07:53:58,058 - __main__ - INFO - Starting dataset export...
2026-01-03 07:53:58,058 - __main__ - INFO - Loading dataset from Hub: dzungpham/font-diffusion-generated-data (split: train_original)
README.md: 3.05kB [00:00, 10.2MB/s]
train_original-00000-of-00001.parquet: 100%|‚ñà‚ñâ| 192M/192M [00:00<00:00, 223MB/s]
train-00000-of-00001.parquet: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ| 124M/124M [00:00<00:00, 191MB/s]
val-00000-of-00001.parquet: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 5.96M/5.96M [00:00<00:00, 68.5MB/s]
Generating train_original split: 100%|‚ñà| 14620/14620 [00:00<00:00, 29386.98 exam
Generating train split: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà| 9360/9360 [00:00<00:00, 30904.44 examples/s]
Generating val split: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 585/585 [00:00<00:00, 47396.47 examples/s]
2026-01-03 07:54:04,446 - __main__ - INFO - Loaded 14620 samples from Hub
2026-01-03 07:54:04,447 - __main__ - INFO - Created directory structure at my_dataset/train_original
2026-0

In [10]:
# @title Exporting train data from HF
!python FontDiffusion/export_hf_dataset_to_disk.py \
  --output-dir "my_dataset/train" \
  --repo-id {HF_USERNAME}/font-diffusion-generated-data \
  --split "train" \
  --token HF_TOKEN

2026-01-03 07:54:42,320 - __main__ - INFO - Starting dataset export...
2026-01-03 07:54:42,321 - __main__ - INFO - Loading dataset from Hub: dzungpham/font-diffusion-generated-data (split: train)
2026-01-03 07:55:04,848 - __main__ - INFO - Loaded 9360 samples from Hub
2026-01-03 07:55:04,848 - __main__ - INFO - Created directory structure at my_dataset/train
2026-01-03 07:55:04,849 - __main__ - INFO - Exporting images...
Exporting images: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 9360/9360 [00:22<00:00, 425.13sample/s]
2026-01-03 07:55:26,866 - __main__ - INFO - Exported 799 content images, 9360 target images
2026-01-03 07:55:26,952 - __main__ - INFO - Saved checkpoint with 9360 generations: 799 chars, 12 styles
2026-01-03 07:55:26,952 - __main__ - INFO - Export completed successfully
2026-01-03 07:55:26,956 - __main__ - INFO - Successfully exported to my_dataset/train
  ContentImage/
  TargetImage/
  results_checkpoint.json


In [11]:
# @title Exporting validation data from HF
!python FontDiffusion/export_hf_dataset_to_disk.py \
  --output-dir "my_dataset/val" \
  --repo-id {HF_USERNAME}/font-diffusion-generated-data \
  --split "val" \
  --token HF_TOKEN

2026-01-03 07:55:30,466 - __main__ - INFO - Starting dataset export...
2026-01-03 07:55:30,466 - __main__ - INFO - Loading dataset from Hub: dzungpham/font-diffusion-generated-data (split: val)
2026-01-03 07:55:33,303 - __main__ - INFO - Loaded 585 samples from Hub
2026-01-03 07:55:33,304 - __main__ - INFO - Created directory structure at my_dataset/val
2026-01-03 07:55:33,304 - __main__ - INFO - Exporting images...
Exporting images: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 585/585 [00:01<00:00, 385.42sample/s]
2026-01-03 07:55:34,822 - __main__ - INFO - Exported 199 content images, 585 target images
2026-01-03 07:55:34,828 - __main__ - INFO - Saved checkpoint with 585 generations: 199 chars, 3 styles
2026-01-03 07:55:34,828 - __main__ - INFO - Export completed successfully
2026-01-03 07:55:34,829 - __main__ - INFO - Successfully exported to my_dataset/val
  ContentImage/
  TargetImage/
  results_checkpoint.json


In [12]:
# @title Show fonts and styles images
print("Fonts currently in fonts/ folder")
!ls -lt FontDiffusion/fonts
print("Styles in style_images/ folder")
!ls -l FontDiffusion/styles_images

Fonts currently in fonts/ folder
total 332584
-rw-r--r-- 1 root root 26929728 Jan  3 07:51  NomNaTongLight2.ttf
-rw-r--r-- 1 root root 14574480 Jan  3 07:51  NomNaTongLight.ttf
-rw-r--r-- 1 root root 31729820 Jan  3 07:51  NomNaTong-Regular2.otf
-rw-r--r-- 1 root root 14574552 Jan  3 07:51  NomNaTong-Regular.ttf
-rw-r--r-- 1 root root  9424552 Jan  3 07:51  NomNaTong-Regular.otf
-rw-r--r-- 1 root root 12967288 Jan  3 07:51  HanaMinC.otf
-rw-r--r-- 1 root root 30739236 Jan  3 07:51  HanaMinB.ttf
-rw-r--r-- 1 root root 32201032 Jan  3 07:51  HanaMinB.otf
-rw-r--r-- 1 root root 22761228 Jan  3 07:51  HanaMinA.ttf
-rw-r--r-- 1 root root 31621108 Jan  3 07:51  HanaMinA.otf
-rw-r--r-- 1 root root 18202176 Jan  3 07:51 'Han-nom Minh 1.42.otf'
-rw-r--r-- 1 root root 19505228 Jan  3 07:51  Han-Nom-Khai-Regular-300623.ttf
-rw-r--r-- 1 root root 20368044 Jan  3 07:51 'Han-Nom Kai 1.00.otf'
-rw-r--r-- 1 root root 33815824 Jan  3 07:51 'HAN NOM B.ttf'
-rw-r--r-- 1 root root 21320444 Jan  3 07:51 'H

In [13]:
# @title Run batch generation
if is_colab:
  !uv pip install --upgrade "huggingface-hub>=0.34.0,<1.0"
else:
  !uv pip install --upgrade "huggingface-hub==0.25.2" "protobuf<5.0.0" "numpy<2.0.0"
%cd {OUTPUT_PATH}
# No need to pass num_processes because accelerate auto detect num gpus on machine
!accelerate launch FontDiffusion/sample_batch_multi_gpus.py \
    --characters "NomTuTao/Ds_10k_ChuNom_TuTao.txt" \
    --style_images "FontDiffusion/styles_images" \
    --ckpt_dir "ckpt/" \
    --ttf_path "FontDiffusion/fonts/NomNaTong-Regular.otf" \
    --output_dir "my_dataset/train_original" \
    --num_inference_steps 20 \
    --guidance_scale 7.5 \
    --start_line 900 \
    --end_line 1000 \
    --batch_size 35 \
    --save_interval 1 \
    --channels_last \
    --seed 42 \
    --compile \
    --enable_xformers

[2mUsing Python 3.11.13 environment at: /usr[0m
[2K[2mResolved [1m14 packages[0m [2min 56ms[0m[0m                                         [0m
[2mAudited [1m14 packages[0m [2min 0.11ms[0m[0m
/kaggle/working
The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`.
0it [00:00, ?it/s]
  torch.utils._pytree._register_pytree_node(
The following values were not passed to `accelerate launch` and had defaults used instead:
	`--num_processes` was set to a value of `2`
		More than one GPU was found, enabling multi-GPU training.
		If this was unintended please pass in `--num_processes=1`.
	`--num_machines` was set to a value of `1`
	`--mixed_precision` was set to a value of `'no'`
	`--dynamo_backend` was set to a value of `'no'`
  torch.utils._pytree._register_pytree_node(
  torch.utils._pytree._register_pytre

In [13]:
# @title Count images in ContentImage and TargetImage
!find my_dataset/train_original/ContentImage -type f | wc -l
!find my_dataset/train_original/TargetImage -type f | wc -l

998
14620


In [14]:
# @title Train Validation split
!python FontDiffusion/create_validation_split.py \
  --data_root my_dataset \
  --val_ratio 0.2 \
  --seed 42

2026-01-03 08:02:03,892 | INFO | ‚úì Using source directory: my_dataset/train_original
2026-01-03 08:02:03,893 | INFO | 
2026-01-03 08:02:03,893 | INFO | FONTDIFFUSION VALIDATION SPLIT CREATOR
2026-01-03 08:02:03,893 | INFO | 
2026-01-03 08:02:03,893 | INFO | ANALYZING TRAINING DATA
2026-01-03 08:02:03,893 | INFO | 
üîç Scanning content images...
Content images: 100%|[38;2;65;166;126m‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà[0m| 998/998 [00:00<00:00, 523kimg/s][0m
2026-01-03 08:02:03,899 | INFO |   ‚úì Found 998 content images
2026-01-03 08:02:03,899 | INFO | 
üîç Scanning target images...
Styles: 100%|[38;2;65;166;126m‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà[0m| 15.0/15.0 [00:00<00:00, 218style/s][0m
2026-01-03 08:02:03,968 | INFO |   ‚úì Found 14620 valid target images
2026-0

In [None]:
!uv pip install --upgrade pyarrow datasets

In [15]:
# @title Creat and upload dataset to HF
HF_USERNAME = "dzungpham"
!python FontDiffusion/create_hf_dataset.py \
  --data-dir "my_dataset/train_original" \
  --repo-id dzungpham/font-diffusion-generated-data \
  --split "train_original" \
  --token {HF_TOKEN}

# Train Split
!python FontDiffusion/create_hf_dataset.py \
  --data-dir "my_dataset/train" \
  --repo-id dzungpham/font-diffusion-generated-data \
  --split "train" \
  --token {HF_TOKEN}

# Train Split
!python FontDiffusion/create_hf_dataset.py \
  --data-dir "my_dataset/val" \
  --repo-id dzungpham/font-diffusion-generated-data \
  --split "val" \
  --token {HF_TOKEN}


2026-01-03 08:02:13,926 | INFO | Directory structure validated successfully
2026-01-03 08:02:13,927 | INFO | Building dataset...
2026-01-03 08:02:14,013 | INFO | Loaded checkpoint: 14620 generations, 998 characters, 15 styles
Loading image pairs: 100%|[38;2;65;166;126m‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà[0m| 14.3k/14.3k [00:12<00:00, 1.20kpair/s][0m
2026-01-03 08:02:26,219 | INFO | Successfully loaded 14620 samples
2026-01-03 08:03:02,456 | INFO | Pushing dataset to dzungpham/font-diffusion-generated-data...
Uploading the dataset shards:   0%|                  | 0/1 [00:00<?, ? shards/s]
Map:   0%|                                     | 0/14620 [00:00<?, ? examples/s][A
Map:  46%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå            | 6747/14620 [00:00<00:00, 15780.89 examples/s][A
Map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 14620/14620 [00:00<00:00, 15850.93 examples/s][A

Creating parquet 

In [22]:
import torch, gc
torch.cuda.empty_cache()
gc.collect()

280

In [24]:
# @title Training phase 1
if is_colab:
  !uv pip install --upgrade "huggingface-hub>=0.34.0,<1.0"
else:
  !uv pip install --upgrade "huggingface-hub==0.25.2"
import wandb

MAX_TRAIN_STEPS = 1000
!accelerate launch FontDiffusion/my_train.py \
    --seed=123 \
    --experience_name="FontDiffuser_training_phase_1" \
    --data_root="my_dataset" \
    --output_dir="outputs/FontDiffuser" \
    --phase_1_ckpt_dir="ckpt" \
    --report_to="wandb" \
      \
    --resolution=96 \
    --style_image_size=96 \
    --content_image_size=96 \
    --content_encoder_downsample_size=3 \
    --channel_attn=True \
    --content_start_channel=64 \
    --style_start_channel=64 \
      \
    --train_batch_size=16 \
    --gradient_accumulation_steps=1 \
    --perceptual_coefficient=0.05 \
    --offset_coefficient=0.5 \
    --max_train_steps={MAX_TRAIN_STEPS} \
    --ckpt_interval={MAX_TRAIN_STEPS // 2} \
    --log_interval=50 \
      \
    --learning_rate=1e-4 \
    --lr_scheduler="linear" \
    --lr_warmup_steps=200 \
    --drop_prob=0.1 \
    --mixed_precision="fp16"

[2mUsing Python 3.11.13 environment at: /usr[0m
[2K[2mResolved [1m12 packages[0m [2min 52ms[0m[0m                                         [0m
[2K[2mPrepared [1m2 packages[0m [2min 0.50ms[0m[0m                                            
[2mUninstalled [1m2 packages[0m [2min 6ms[0m[0m
[2K[2mInstalled [1m2 packages[0m [2min 7ms[0m[0m5.2                              [0m
 [31m-[39m [1mfsspec[0m[2m==2025.10.0[0m
 [32m+[39m [1mfsspec[0m[2m==2025.12.0[0m
 [31m-[39m [1mhuggingface-hub[0m[2m==1.2.3[0m
 [32m+[39m [1mhuggingface-hub[0m[2m==0.25.2[0m
  torch.utils._pytree._register_pytree_node(
The following values were not passed to `accelerate launch` and had defaults used instead:
	`--num_processes` was set to a value of `2`
		More than one GPU was found, enabling multi-GPU training.
		If this was unintended please pass in `--num_processes=1`.
	`--num_machines` was set to a value of `1`
	`--mixed_precision` was set to a value of `'no'`
	`--

In [23]:
!ls -lr outputs/FontDiffuser

ls: cannot access 'outputs/FontDiffuser': No such file or directory


In [None]:
# @title Training phase 2 with SCR
if is_colab:
  !uv pip install --upgrade "huggingface-hub>=0.34.0,<1.0"
else:
  !uv pip install --upgrade "huggingface-hub==0.25.2"

!wandb login
MAX_TRAIN_STEPS = 500
!accelerate launch FontDiffusion/my_train.py \
    --seed=123 \
    --experience_name="FontDiffuser_training_phase_2" \
    --data_root="my_dataset" \
    --output_dir="outputs/FontDiffuser" \
    --report_to="wandb" \
    --phase_2 \
    --phase_1_ckpt_dir="outputs/FontDiffuser/global_step_500" \
    --scr_ckpt_path="ckpt/scr_210000.pth" \
    \
    --sc_coefficient=0.05 \
    --num_neg=10 \
    --resolution=96 \
    --style_image_size=96 \
    --content_image_size=96 \
    --content_encoder_downsample_size=3 \
    --channel_attn=True \
    --content_start_channel=64 \
    --style_start_channel=64 \
    \
    --train_batch_size=16 \
    --gradient_accumulation_steps=1 \
    --perceptual_coefficient=0.05 \
    --offset_coefficient=0.5 \
    --max_train_steps={MAX_TRAIN_STEPS} \
    --ckpt_interval={MAX_TRAIN_STEPS // 2} \
    --log_interval=50 \
    \
    --learning_rate=1e-5 \
    --lr_scheduler="constant" \
    --lr_warmup_steps=100 \
    --drop_prob=0.1 \
    --mixed_precision="no"


In [None]:
!ls -l outputs/FontDiffuser/*/*

In [None]:
STEP = 1000
!python FontDiffusion/upload_models.py \
    --weights_dir "outputs/FontDiffuser/global_step_{STEP}" \
    --repo_id "dzungpham/font-diffusion-weights" \
    --token "{HF_TOKEN}"

In [None]:
import os
import zipfile
from pathlib import Path
from typing import List
def find_result_folders(base_path: Path, pattern_name: str) -> List[Path]:
    return [p for p in base_path.glob(pattern_name) if p.is_dir()]

def zip_folder(folder_path: Path, output_base_path: Path) -> bool:
    folder_name = folder_path.name
    zip_path = output_base_path / f"{folder_name}.zip"
    try:
        print(f"   -> Zipping folder: {folder_name}...")
        with zipfile.ZipFile(zip_path, mode="w", compression=zipfile.ZIP_DEFLATED) as zipf:
            for file_path in folder_path.rglob("*"):
                if file_path.is_file():
                    arcname = file_path.relative_to(folder_path.parent)
                    zipf.write(file_path, arcname)
        print(f"   ‚úÖ Created ZIP: {zip_path.name}")
        return True
    except Exception as exc:
        print(f"   ‚ùå Failed to zip {folder_name}: {exc}")
        return False

def zip_stats_results_folders(output_base_path: str, pattern_name: str) -> None:
    base = Path(output_base_path)
    base.mkdir(parents=True, exist_ok=True)
    result_folders = find_result_folders(base, pattern_name)
    if not result_folders:
        print(f"‚ö†Ô∏è No folders matching '*dataset' found in '{output_base_path}'.")
        return
    print(f"üîç Found {len(result_folders)} result folder(s) to zip.")
    successful = sum(1 for folder in result_folders if zip_folder(folder, base))
    print(f"\n‚úÖ DONE! Successfully zipped {successful} out of {len(result_folders)} folder(s).")

if __name__ == "__main__":
    try:
        output_root = os.getenv("OUTPUT_PATH") or globals().get("OUTPUT_PATH")
        if not output_root:
            raise ValueError("OUTPUT_PATH not defined")
        zip_stats_results_folders(
            output_base_path=OUTPUT_PATH,
            pattern_name="my_dataset")
    except Exception as e:
        print(f"‚ùå An error occurred: {e}")