<a href="https://colab.research.google.com/github/buganart/hifi-gan/blob/master/hifi_gan_train.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#@markdown Before starting please save the notebook in your drive by clicking on `File -> Save a copy in drive`

In [None]:
#@markdown Check GPU, should be a Tesla V100
!nvidia-smi -L
import os
print(f"We have {os.cpu_count()} CPU cores.")

In [None]:
#@markdown Mount google drive
from google.colab import drive
from google.colab import output
drive.mount('/content/drive')

from pathlib import Path
if not Path("/content/drive/My Drive/IRCMS_GAN_collaborative_database").exists():
    raise RuntimeError(
        "Shortcut to our shared drive folder doesn't exits.\n\n"
        "\t1. Go to the google drive web UI\n"
        "\t2. Right click shared folder IRCMS_GAN_collaborative_database and click \"Add shortcut to Drive\""
    )

def clear_on_success(msg="Ok!"):
    if _exit_code == 0:
        output.clear()
        print(msg)

In [None]:
#@markdown Install wandb and log in
%pip install wandb
output.clear()
import wandb
from pathlib import Path
wandb_drive_netrc_path = Path("drive/My Drive/colab/.netrc")
wandb_local_netrc_path = Path("/root/.netrc")
if wandb_drive_netrc_path.exists():
    import shutil

    print("Wandb .netrc file found, will use that to log in.")
    shutil.copy(wandb_drive_netrc_path, wandb_local_netrc_path)
else:
    print(
        f"Wandb config not found at {wandb_drive_netrc_path}.\n"
        f"Using manual login.\n\n"
        f"To use auto login in the future, finish the manual login first and then run:\n\n"
        f"\t!mkdir -p '{wandb_drive_netrc_path.parent}'\n"
        f"\t!cp {wandb_local_netrc_path} '{wandb_drive_netrc_path}'\n\n"
        f"Then that file will be used to login next time.\n"
    )

!wandb login
output.clear()
print("ok!")

# Description



In [None]:
#@title Configuration

#@markdown Directories can be found via file explorer on the left by navigating into `drive` to the desired folders. 
#@markdown Then right-click and `Copy path`.
# audio_db_dir = "/content/drive/My Drive/AUDIO DATABASE/RAW Sessions/Roberto Studio Material" #@param {type:"string"}
audio_db_dir = "/content/drive/MyDrive/AUDIO DATABASE/CAGE_ONE6/22.5/" #@param {type:"string"}
experiment_dir = "/content/drive/My Drive/IRCMS_GAN_collaborative_database/Experiments/colab-violingan/melgan" #@param {type:"string"}
output_dir = "/content/drive/My Drive/IRCMS_GAN_collaborative_database/Experiments/colab-violingan/melgan-outputs" #@param {type:"string"}

#@markdown ### Resumption of previous runs
#@markdown Optional resumption arguments below, leaving both empty will start a new run from scratch. 
#@markdown - The ID can be found on wandb. 
#@markdown - It's 8 characters long and may contain a-z letters and digits (for example `1t212ycn`).

#@markdown Resume a previous run 
resume_run_id = "" #@param {type:"string"}

#@markdown train argument
epochs = 3000 #@param {type: "integer"}
summary_interval = 10 #@param {type: "integer"}
checkpoint_interval = 1000 #@param {type: "integer"}
validation_interval =  500#@param {type: "integer"}

import re
from pathlib import Path

audio_db_dir = Path(audio_db_dir)
output_dir = Path(output_dir)
experiment_dir = Path(experiment_dir)


for path in [experiment_dir, output_dir]:
    path.mkdir(parents=True, exist_ok=True)

if not audio_db_dir.exists():
    raise RuntimeError(f"audio_db_dir {audio_db_dir} does not exists.")

def check_wandb_id(run_id):
    if run_id and not re.match(r"^[\da-z]{8}$", run_id):
        raise RuntimeError(
            "Run ID needs to be 8 characters long and contain only letters a-z and digits.\n"
            f"Got \"{run_id}\""
        )

check_wandb_id(resume_run_id)

In [None]:
#@title custom config file / optional arguments
import json
config = {
    "resblock": "1",
    "num_gpus": 0,
    "batch_size": 16,
    "learning_rate": 0.0002,
    "adam_b1": 0.8,
    "adam_b2": 0.99,
    "lr_decay": 0.999,
    "seed": 1234,

    "upsample_rates": [8,8,2,2],
    "upsample_kernel_sizes": [16,16,4,4],
    "upsample_initial_channel": 512,
    "resblock_kernel_sizes": [3,7,11],
    "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],

    "segment_size": 8192,
    "num_mels": 80,
    "num_freq": 1025,
    "n_fft": 1024,
    "hop_size": 256,
    "win_size": 1024,

    "sampling_rate": 22050,

    "fmin": 0,
    "fmax": 8000,
    "fmax_for_loss": None,

    "num_workers": 4,

    "dist_config": {
        "dist_backend": "nccl",
        "dist_url": "tcp://localhost:54321",
        "world_size": 1
    }
}

config_string = json.dumps(config)
# replace char " to \" for argument parser
config_string = config_string.replace('\"', '\\\"')

In [None]:
#@title Clone melgan repo

!git clone https://github.com/buganart/hifi-gan

In [None]:
#@title Install Dependencies

%cd /content/hifi-gan
%pip install -r requirements.txt
clear_on_success("Dependencies installed.")

In [None]:
#@title Copy audio files to runtime

local_wav_dir = Path("/content/wavs/")
!find "{audio_db_dir}" -maxdepth 2 -type f | xargs -t -d "\n" -I'%%' -P 10 -n 1 rsync -a '%%' "$local_wav_dir"/
# clear_on_success("All files copied to this runtime.")
print(f"{audio_db_dir}")

In [None]:
#@title Split train/test dataset

# os.environ["WANDB_MODE"] = "dryrun"
!python split_dataset.py --data_path "$local_wav_dir"

train_files = Path(local_wav_dir) / "train_files.txt"
test_files = Path(local_wav_dir) / "test_files.txt"

print("TRAIN FILES")
!head -n3 "$train_files"
print('...')
!tail -n3 "$train_files"

print()
print("TEST FILES")
!head -n3 "$test_files"
print('...')
!tail -n3 "$test_files"

In [None]:
#@title TRAIN

# This done a bit weirdly because setting PYTHONPATH=$PWD removes variables afterwards. A colab bug, maybe.
!env PYTHONPATH="$(pwd)" python train.py \
--training_epochs "$epochs" \
--summary_interval "$summary_interval" \
--checkpoint_interval "$checkpoint_interval" \
--validation_interval "$validation_interval" \
--input_wavs_dir "$audio_db_dir" \
--resume_run_id "$resume_run_id" \
--config_string "$config_string"
