# Imports & preparatory steps

In [None]:
import os
import os.path
import torch
import shutil

# Check the number of CPUs
# $PBS_NUM_PPN vs $OMP_NUM_THREADS?
N_CPUS = int(os.environ["PBS_NUM_PPN"])

# Limit CPU operation in pytorch to `N_CPUS`
torch.set_num_threads(N_CPUS)
torch.set_num_interop_threads(N_CPUS)

# Set username
USER = os.environ["USER"]

# Check free port
def get_free_port(ports):
    import socket
    for port in ports:
        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
            if s.connect_ex(("localhost", port)) != 0:
                break
    return port

port = get_free_port(range(29500, 29526))

# Prepare for multi-gpu training
os.environ["MASTER_ADDR"] = "localhost"
os.environ["MASTER_PORT"] = f"{port}"

# Settings

In [None]:
# Non-Coqui-TTS parameters
COPY_TO_SCRATCH = False

In [None]:
# General params
run_name = "test"

train_params =  {
    "log_interval": 50,
    "eval_interval": 100,
    "seed": 1234,
    "epochs": 2000,
    "learning_rate": 2e-4,
    "betas": [0.8, 0.99],
    "eps": 1e-9,
    "batch_size": 16,
    "fp16_run": False,
    "lr_decay": 0.999875,
    "segment_size": 8192,
    "init_lr_ratio": 1,
    "warmup_epochs": 0,
    "c_mel": 45,
    "c_kl": 1.0
}

data = {
    "use_mel_posterior_encoder": True,
    "dataset_path": f"/storage/plzen4-ntis/home/{USER}/experimenty/vits2_cz/datasets/NeuOl",
    "training_files": "train0.ph-redu.epa.p3b0.csv",
    "validation_files": "val0.ph-redu.epa.p3b0.csv",
    "text_cleaners":["english_cleaners2"],
    "max_wav_value": 32768.0,
    "sampling_rate": 24000,
    "filter_length": 1024,
    "hop_length": 256,
    "win_length": 1024,
    "n_mel_channels": 80,
    "mel_fmin": 0.0,
    "mel_fmax": None,
    "add_blank": False,
    "n_speakers": 0,
    "cleaned_text": True,
    "eval_audio_folder": "eval_audios",
    "characters": "\\ACDEFGHIJLMNOPQRSTUWYZabcdefghijklmnopqrstuvwxyz@#$*%Ç",
    "punctuation": "!,-.:;–/()?ˈ„“”\"‚‘’ˌː…¡¿«» " ,
    "pad": "_",
    "num_workers": 2,
    "min_text_len": 5,
    "max_text_len": 999,
    "min_audio_len": 8192,
    "seed": 1234,
  }

model = {
  "use_mel_posterior_encoder": True,
  "use_transformer_flows": True,
  "transformer_flow_type": "pre_conv",
  "use_spk_conditioned_encoder": False,
  "use_noise_scaled_mas": True,
  "use_duration_discriminator": True,
  "duration_discriminator_type": "dur_disc_1",
  "inter_channels": 192,
  "hidden_channels": 192,
  "filter_channels": 768,
  "n_heads": 2,
  "n_layers": 6,
  "kernel_size": 3,
  "p_dropout": 0.1,
  "resblock": "1",
  "resblock_kernel_sizes": [3,7,11],
  "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
  "upsample_rates": [8,8,2,2],
  "upsample_initial_channel": 512,
  "upsample_kernel_sizes": [16,16,4,4],
  "n_layers_q": 3,
  "use_spectral_norm": False,
  "use_sdp": True
  }

In [None]:
# Parameters

# Non-Coqui-TTS parameters
COPY_TO_SCRATCH = True

# Copy data

In [None]:
if COPY_TO_SCRATCH:
    # Copy dataset
    # Prepare dataset dir in the scratch
    dataset_scratch = os.path.join(os.environ["SCRATCHDIR"], os.path.basename(data["dataset_path"]))
    # Copy dataset to local scratch
    print(f"> Copying data to local scratch: {dataset_scratch}")
    shutil.copytree(data["dataset_path"], dataset_scratch)
    # Store the scratch dataset so that it is used for training
    data["dataset_path"] = dataset_scratch

# Training

In [None]:
import os
import json
from torch import __version__ as torch_version
import torch.multiprocessing as mp
from platform import python_version
from utils import HParams
# Trainer: Where the ✨️ happens.
import train

assert torch.cuda.is_available(), "CPU training is not allowed."

n_gpus = torch.cuda.device_count()

print(" > Computational resources...")
print(f" | > Localhost port: {port}")
print(f" | > Number of CPUs: {N_CPUS}")
print(f" | > Number of GPUs: {n_gpus}")
print(" > Python & module versions...")
print(f" | > Python:    {python_version()}")
print(f" | > PyTorch:   {torch_version}")

# Make up the hyperparameters from individual settings
hps = {
    "train": train_params,
    "data": data,
    "model": model,
}

# Prepare dir for saving traning checkpoint/model files
model_dir = os.path.join(f"./logs/{run_name}")
if not os.path.exists(model_dir):
    os.makedirs(model_dir)
# Save JSON config to model dir
config_save_path = os.path.join(model_dir, "config.json")
with open(config_save_path, "w") as json_file:
    json.dump(hps, json_file, indent=4)

# Prepare hyperparameters and add model dir
hps = HParams(**hps)
hps.model_dir = model_dir

# Start multi-gpu training
mp.spawn(
    train.run,
    nprocs=n_gpus,
    args=(n_gpus, hps,),
)

# Cleanup

In [None]:
if COPY_TO_SCRATCH:
    shutil.rmtree(dataset_scratch)