# Imports & preparatory steps

In [1]:
import os
import os.path
import torch
import shutil

# Check the number of CPUs
# $PBS_NUM_PPN vs $OMP_NUM_THREADS?
N_CPUS = int(os.environ["PBS_NUM_PPN"])

# Limit CPU operation in pytorch to `N_CPUS`
torch.set_num_threads(N_CPUS)
torch.set_num_interop_threads(N_CPUS)

# Set username
USER = os.environ["USER"]
os.environ["MASTER_ADDR"] = "localhost"
os.environ["MASTER_PORT"] = "6060"

# Settings

In [2]:
# Non-Coqui-TTS parameters
COPY_TO_SCRATCH = False

In [3]:
# General params
run_name = "test"

train_params =  {
    "log_interval": 50,
    "eval_interval": 100,
    "seed": 1234,
    "epochs": 2000,
    "learning_rate": 2e-4,
    "betas": [0.8, 0.99],
    "eps": 1e-9,
    "batch_size": 16,
    "fp16_run": False,
    "lr_decay": 0.999875,
    "segment_size": 8192,
    "init_lr_ratio": 1,
    "warmup_epochs": 0,
    "c_mel": 45,
    "c_kl": 1.0
}

data = {
    "use_mel_posterior_encoder": True,
    "dataset_path": f"/storage/plzen4-ntis/home/{USER}/experimenty/vits2_cz/datasets/NeuOl",
    "training_files": "train0.ph-redu.epa.p3b0.csv",
    "validation_files": "val0.ph-redu.epa.p3b0.csv",
    "text_cleaners":["english_cleaners2"],
    "max_wav_value": 32768.0,
    "sampling_rate": 24000,
    "filter_length": 1024,
    "hop_length": 256,
    "win_length": 1024,
    "n_mel_channels": 80,
    "mel_fmin": 0.0,
    "mel_fmax": None,
    "add_blank": False,
    "n_speakers": 0,
    "cleaned_text": True,
    "eval_audio_folder": "eval_audios",
    "characters": "\\ACDEFGHIJLMNOPQRSTUWYZabcdefghijklmnopqrstuvwxyz@#$*%Ç",
    "punctuation": "!,-.:;–/()?ˈ„“”\"‚‘’ˌː…¡¿«» " ,
    "pad": "_",
    "num_workers": 2
  }

model = {
  "use_mel_posterior_encoder": True,
  "use_transformer_flows": True,
  "transformer_flow_type": "pre_conv",
  "use_spk_conditioned_encoder": False,
  "use_noise_scaled_mas": True,
  "use_duration_discriminator": True,
  "duration_discriminator_type": "dur_disc_1",
  "inter_channels": 192,
  "hidden_channels": 192,
  "filter_channels": 768,
  "n_heads": 2,
  "n_layers": 6,
  "kernel_size": 3,
  "p_dropout": 0.1,
  "resblock": "1",
  "resblock_kernel_sizes": [3,7,11],
  "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
  "upsample_rates": [8,8,2,2],
  "upsample_initial_channel": 512,
  "upsample_kernel_sizes": [16,16,4,4],
  "n_layers_q": 3,
  "use_spectral_norm": False,
  "use_sdp": True
  }

In [4]:
# Parameters

# Non-Coqui-TTS parameters
COPY_TO_SCRATCH = False

# Copy data

In [5]:
if COPY_TO_SCRATCH:
    # Copy dataset
    # Prepare dataset dir in the scratch
    dataset_scratch = os.path.join(os.environ["SCRATCHDIR"], data["dataset_path"])
    print(f"> Local scratch data dir: {dataset_scratch}")

    # Copy dataset to local scratch
    if not os.path.isfile(dataset_scratch):
        shutil.copy(path_to_datadir, dataset_scratch, follow_symlinks=True)
    # Store the scratch dataset so that it is used for training
    data["dataset_path"] = dataset_scratch

# Set path to training framework

In [6]:
## Set path to (modified) Coqui-TTS
#sys.path.insert(0, coqui_path)
## Set path to (modified) Coqui-Trainer
#sys.path.insert(0, trainer_path)

# Training

In [None]:
import os
import json
from torch import __version__ as torch_version
import torch.multiprocessing as mp
from platform import python_version
from utils import HParams
# Trainer: Where the ✨️ happens.
import train

assert torch.cuda.is_available(), "CPU training is not allowed."

n_gpus = torch.cuda.device_count()

print(" > Computational resources...")
print(f" | > Number of CPUs: {N_CPUS}")
print(f" | > Number of GPUs: {n_gpus}")
print(" > Python & module versions...")
print(f" | > Python:    {python_version()}")
print(f" | > PyTorch:   {torch_version}")

# Make up the hyperparameters from individual settings
hps = {
    "train": train_params,
    "data": data,
    "model": model,
}

model_dir = os.path.join(f"./logs/{run_name}")
if not os.path.exists(model_dir):
    os.makedirs(model_dir)
config_save_path = os.path.join(model_dir, "config.json")
with open(config_save_path, "w") as json_file:
    json.dump(hps, json_file)

hps = HParams(**hps)
hps.model_dir = model_dir

mp.spawn(
    train.run,
    nprocs=n_gpus,
    args=(n_gpus, hps,),
)

# train.run(0, 1, hps)

DEBUG:tensorflow:Falling back to TensorFlow client; we recommended you install the Cloud TPU client directly with pip install cloud-tpu-client.
DEBUG:h5py._conv:Creating converter from 7 to 5
DEBUG:h5py._conv:Creating converter from 5 to 7
DEBUG:h5py._conv:Creating converter from 7 to 5
DEBUG:h5py._conv:Creating converter from 5 to 7
DEBUG:jax._src.path:etils.epath was not found. Using pathlib for file I/O.
 > Computational resources...
 | > Number of CPUs: 1
 | > Number of GPUs: 1
 > Python & module versions...
 | > Python:    3.10.12
 | > PyTorch:   2.1.2+cu121
INFO:test:{'train': {'log_interval': 50, 'eval_interval': 100, 'seed': 1234, 'epochs': 2000, 'learning_rate': 0.0002, 'betas': [0.8, 0.99], 'eps': 1e-09, 'batch_size': 16, 'fp16_run': False, 'lr_decay': 0.999875, 'segment_size': 8192, 'init_lr_ratio': 1, 'warmup_epochs': 0, 'c_mel': 45, 'c_kl': 1.0}, 'data': {'use_mel_posterior_encoder': True, 'dataset_path': '/storage/plzen4-ntis/home/jmatouse/experimenty/vits2_cz/datasets/Ne

Loading train data:  33%|███▎      | 3/9 [00:28<00:48,  8.13s/it]

INFO:test:Train Epoch: 34 [33%]
INFO:test:[2.7217400074005127, 2.0024616718292236, 2.0019309520721436, 29.149261474609375, 2.802926778793335, 2.926793336868286, 300, 0.00019912685682091382]
DEBUG:matplotlib:matplotlib data path: /usr/local/lib/python3.10/dist-packages/matplotlib/mpl-data
DEBUG:matplotlib:CONFIGDIR=/storage/plzen4-ntis/home/jmatouse/.config/matplotlib
DEBUG:matplotlib:interactive is False
DEBUG:matplotlib:platform is linux
INFO:test:Saving eval audio at epoch 34, step 300
INFO:test:Saving model and optimizer state at iteration 34 to ./logs/test/G_300-34.pth
INFO:test:Saving model and optimizer state at iteration 34 to ./logs/test/D_300-34.pth
INFO:test:Saving model and optimizer state at iteration 34 to ./logs/test/DUR_300-34.pth


Loading train data: 100%|██████████| 9/9 [01:07<00:00,  7.54s/it]
Loading train data:   0%|          | 0/9 [00:00<?, ?it/s]

INFO:test:====> Epoch: 34 (steps: 306)


Loading train data: 100%|██████████| 9/9 [00:14<00:00,  1.66s/it]
Loading train data:   0%|          | 0/9 [00:00<?, ?it/s]

INFO:test:====> Epoch: 35 (steps: 315)


Loading train data: 100%|██████████| 9/9 [00:14<00:00,  1.62s/it]
Loading train data:   0%|          | 0/9 [00:00<?, ?it/s]

INFO:test:====> Epoch: 36 (steps: 324)


Loading train data: 100%|██████████| 9/9 [00:14<00:00,  1.57s/it]
Loading train data:   0%|          | 0/9 [00:00<?, ?it/s]

INFO:test:====> Epoch: 37 (steps: 333)


Loading train data: 100%|██████████| 9/9 [00:14<00:00,  1.57s/it]
Loading train data:   0%|          | 0/9 [00:00<?, ?it/s]

INFO:test:====> Epoch: 38 (steps: 342)


Loading train data:  89%|████████▉ | 8/9 [00:13<00:00,  1.43it/s]

INFO:test:Train Epoch: 39 [89%]
INFO:test:[2.613800048828125, 2.0777127742767334, 2.3820409774780273, 26.742374420166016, 2.730661392211914, 3.121246099472046, 350, 0.00019900243364508313]


Loading train data: 100%|██████████| 9/9 [00:15<00:00,  1.72s/it]
Loading train data:   0%|          | 0/9 [00:00<?, ?it/s]

INFO:test:====> Epoch: 39 (steps: 351)


Loading train data: 100%|██████████| 9/9 [00:14<00:00,  1.62s/it]
Loading train data:   0%|          | 0/9 [00:00<?, ?it/s]

INFO:test:====> Epoch: 40 (steps: 360)


Loading train data: 100%|██████████| 9/9 [00:14<00:00,  1.63s/it]
Loading train data:   0%|          | 0/9 [00:00<?, ?it/s]

INFO:test:====> Epoch: 41 (steps: 369)


Loading train data: 100%|██████████| 9/9 [00:15<00:00,  1.68s/it]
Loading train data:   0%|          | 0/9 [00:00<?, ?it/s]

INFO:test:====> Epoch: 42 (steps: 378)


Loading train data: 100%|██████████| 9/9 [00:14<00:00,  1.58s/it]
Loading train data:   0%|          | 0/9 [00:00<?, ?it/s]

INFO:test:====> Epoch: 43 (steps: 387)


Loading train data: 100%|██████████| 9/9 [00:14<00:00,  1.56s/it]
Loading train data:   0%|          | 0/9 [00:00<?, ?it/s]

INFO:test:====> Epoch: 44 (steps: 396)


Loading train data:  44%|████▍     | 4/9 [00:10<00:08,  1.65s/it]

INFO:test:Train Epoch: 45 [44%]
INFO:test:[2.6554269790649414, 2.1317427158355713, 3.1085214614868164, 26.795900344848633, 2.603837251663208, 2.630241632461548, 400, 0.00019885322845327182]
INFO:test:Saving eval audio at epoch 45, step 400
INFO:test:Saving model and optimizer state at iteration 45 to ./logs/test/G_400-45.pth
INFO:test:Saving model and optimizer state at iteration 45 to ./logs/test/D_400-45.pth
INFO:test:Saving model and optimizer state at iteration 45 to ./logs/test/DUR_400-45.pth


Loading train data: 100%|██████████| 9/9 [00:28<00:00,  3.19s/it]
Loading train data:   0%|          | 0/9 [00:00<?, ?it/s]

removed ./logs/test/G_100-12.pth
removed ./logs/test/D_100-12.pth
removed ./logs/test/DUR_100-12.pth
INFO:test:====> Epoch: 45 (steps: 405)


Loading train data: 100%|██████████| 9/9 [00:14<00:00,  1.60s/it]
Loading train data:   0%|          | 0/9 [00:00<?, ?it/s]

INFO:test:====> Epoch: 46 (steps: 414)


Loading train data: 100%|██████████| 9/9 [00:14<00:00,  1.59s/it]
Loading train data:   0%|          | 0/9 [00:00<?, ?it/s]

INFO:test:====> Epoch: 47 (steps: 423)


Loading train data: 100%|██████████| 9/9 [00:14<00:00,  1.63s/it]
Loading train data:   0%|          | 0/9 [00:00<?, ?it/s]

INFO:test:====> Epoch: 48 (steps: 432)


Loading train data: 100%|██████████| 9/9 [00:14<00:00,  1.60s/it]
Loading train data:   0%|          | 0/9 [00:00<?, ?it/s]

INFO:test:====> Epoch: 49 (steps: 441)


Loading train data: 100%|██████████| 9/9 [00:14<00:00,  1.59s/it]
Loading train data:   0%|          | 0/9 [00:00<?, ?it/s]

INFO:test:====> Epoch: 50 (steps: 450)
INFO:test:Train Epoch: 51 [0%]
INFO:test:[2.4663612842559814, 2.180588722229004, 3.0097405910491943, 25.118480682373047, 2.6429929733276367, 2.596942186355591, 450, 0.00019870413513039026]


Loading train data: 100%|██████████| 9/9 [00:14<00:00,  1.62s/it]
Loading train data:   0%|          | 0/9 [00:00<?, ?it/s]

INFO:test:====> Epoch: 51 (steps: 459)


# Cleanup

In [None]:
if COPY_TO_SCRATCH:
    os.unlink(dataset_scratch)