<a href="https://colab.research.google.com/github/haru0l/Diff-SVC-notebooks/blob/main/Diff_SVC_training_notebook_(colab_ver_).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Training notebook for [Diff-SVC](https://github.com/prophesier/diff-svc) written by [Nekro](https://twitter.com/NekroTheCorpse) of [Archivoice](https://github.com/archivoice)

Hi! Please don't use this thing for illegal stuff like copying the voices of celebs and such. Just make sure you have permission!

# Check Setup

In [None]:
#@title #Check GPU type
#@markdown this is for checking the GPU type you have as well as the available amount of vram.
!nvidia-smi -L
!nvidia-smi

In [None]:
#@title #Mount Google Drive

#@markdown Makes your life easier when uploading and saving stuff.

from google.colab import drive
drive.flush_and_unmount()
!rm -rf /content/drive
drive.mount('/content/drive')
print('Done!')

# Preparation

In [None]:
#@title #Step 1: Install Diff-SVC
#@markdown The stuff you'll need for every other thing afterwards.
from IPython.display import clear_output
import os
print('Upgrading pip & installing 7zip')
!rm -rf /content/sample_data
!python -m pip install --upgrade pip wheel
!apt-get install unzip
!pip uninstall gdown -y
!pip install git+https://github.com/justinjohn0306/gdown.git

#print('Installing torch')
#%pip install torch==1.12.1+cu113 torchvision==0.13.1+cu113 torchaudio==0.12.1+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html
!pip install --pre torchtext==0.6.0 --no-deps

print('Installing Aria2')
!sudo apt-get install aria2
!apt install wget curl ca-certificates &> /dev/null
!wget -N git.io/aria2.sh &> /dev/null && chmod +x aria2.sh &> /dev/null
!echo 1|./aria2.sh &> /dev/null
!echo 12|./aria2.sh &> /dev/null
!echo 6|./aria2.sh &> /dev/null

#@markdown ---
#@markdown ###Select which fork to use
#@markdown The official repo is up-to-date, while UtaUtaUtau's version has Harvest support for improved f0.
fork = 'Official Diff-SVC Repo' #@param ["Official Diff-SVC Repo", "UtaUtaUtau's Repo"]

#@markdown ---
#@markdown ###Model sample rate
#@markdown Please choose if you want to train a 24kHz model or a 44.1kHz model.
sample_rate = '44.1kHz' #@param ["24kHz", "44.1kHz"]

print('Installing Diff-SVC')
if fork == "Official Diff-SVC Repo":
  !git clone https://github.com/prophesier/diff-svc
else:
  !pip install --upgrade numpy==1.23.0 scipy==1.9.3
  !git clone --branch harvest-preprocess https://github.com/UtaUtaUtau/diff-svc

%cd "/content/diff-svc/"
!pip install -r requirements_short.txt
!pip install tensorboard<2.9,>=2.8
%reload_ext tensorboard

%cd "/content/diff-svc/training/"
!rm config.yaml
!wget https://github.com/haru0l/Diff-SVC-notebooks/releases/download/checkpoints/config.yaml -O config.yaml -q --show-progress
%cd "/content/"
!aria2c --file-allocation=none -c -x 10 -s 10 -d "/content/" https://github.com/haru0l/Diff-SVC-notebooks/releases/download/basic/checkpoints.zip -q
!unzip /content/checkpoints.zip -d /content/diff-svc/

if sample_rate == "44.1kHz":
  config_path = "/content/diff-svc/training/config_nsf.yaml"
  !aria2c --file-allocation=none -c -x 10 -s 10 -d "/content/" https://github.com/justinjohn0306/diff-svc/releases/download/models/nsf_hifigan.zip -q
  !unzip /content/nsf_hifigan.zip -d /content/diff-svc/checkpoints/
else:
  config_path = "/content/diff-svc/training/config.yaml"
  !aria2c --file-allocation=none -c -x 10 -s 10 -d "/content/" https://github.com/haru0l/Diff-SVC-notebooks/releases/download/models/hifigan.zip -q
  !unzip /content/hifigan.zip -d /content/diff-svc/

clear_output()
print('Done!')

In [None]:
#@title #Step 2: Decompress dataset
#@markdown This should work with most common archive formats, so don't worry. Please stick with the alphabet characters otherwise preprocessing might error out.

#@markdown Supported types: `.rar`, `.zip`, `.tar`, `.tar.gz`, `.tar.bz2`, `.7z`

#@markdown ###Note that your dataset should consist of `.wav` or `.ogg` format audio
#@markdown ---
#@markdown Name your singer.
singer_name = 'Unnamed' #@param {type: "string"}
singer_name = singer_name.replace(" ", "_")
%cd "/content/"

if sample_rate == "44.1kHz":
  !sed -i -r 's/nyaru/{singer_name}/g' {config_path}
else:
  !sed -i -r 's/atri/{singer_name}/g' {config_path}
  
#@markdown ---
#@markdown File location
!mkdir -p /content/diff-svc/data/raw
dataset_location = '/content/drive/MyDrive/*' #@param {type: "string"}
diffsvc_location = os.path.join('diff-svc', 'data', 'raw', singer_name, "")

if dataset_location.endswith('.rar'):
    !unrar x "$dataset_location" "$diffsvc_location"
elif dataset_location.endswith('.zip'):
    !unzip "$dataset_location" -d "$diffsvc_location"
elif dataset_location.endswith('.tar'):
    !tar -xf "$dataset_location" -C "$diffsvc_location"
elif dataset_location.endswith('.tar.gz'):
    !tar -xzf "$dataset_location" -C "$diffsvc_location"
elif dataset_location.endswith('.tar.bz2'):
    !tar -xjf "$dataset_location" -C "$diffsvc_location"
else:
    !7za x "$dataset_location" -o$diffsvc_location

clear_output()
print('Done!')

In [None]:
#@title #Step 2-A: Decompress training data
#@markdown Decompresses training data directly to `diff-svc/data/binary`, usable only if you already have the output files of Step 4.

#@markdown If you run this step, please skip step 4.

#@markdown Supported types: check above. 

#@markdown ###You must match the training settings of the config.yaml file you used to generated the preprocessed data.
#@markdown ---
#@markdown Name your singer.
singer_name = 'Unnamed' #@param {type: "string"}
singer_name = singer_name.replace(" ", "_")

if sample_rate == "44.1kHz":
  !sed -i -r 's/nyaru/{singer_name}/g' {config_path}
else:
  !sed -i -r 's/atri/{singer_name}/g' {config_path}

#@markdown ---
#@markdown File location
!mkdir -p /content/diff-svc/data/binary
preprocessed_data_location = '/content/drive/MyDrive/*' #@param {type: "string"}
#@markdown So turns out this is important after all so yeah! It should be in the folder with your binary data.
config_location = '/content/drive/MyDrive/*' #@param {type: "string"}
diffsvc_bin_location = '/content/diff-svc/data'

if preprocessed_data_location.endswith('.rar'):
    !unrar x "$preprocessed_data_location" "$diffsvc_bin_location"
elif preprocessed_data_location.endswith('.zip'):
    !unzip "$preprocessed_data_location" -d "$diffsvc_bin_location"
elif preprocessed_data_location.endswith('.tar'):
    !tar -xf "$preprocessed_data_location" -C "$diffsvc_bin_location"
elif preprocessed_data_location.endswith('.tar.gz'):
    !tar -xzf "$preprocessed_data_location" -C "$diffsvc_bin_location"
elif preprocessed_data_location.endswith('.tar.bz2'):
    !tar -xjf "$preprocessed_data_location" -C "$diffsvc_bin_location"
else:
    !7za x "$preprocessed_data_location" -o$diffsvc_bin_location

if sample_rate == "44.1kHz":
  !cp -r {config_location} /content/diff-svc/training
  
clear_output()
print('Done!')

# Training Options/Parameters
Unfortunately, you can not get away with no editing, not completely that is.

In [None]:
K_step = 1000

batch_size = 12
if sample_rate == "44.1kHz":
  decay_steps = 20000
else:
  decay_steps = 60000

lr = '0.0008'

#@markdown ###F0 extraction method
#@markdown Crepe is used for F0 extraction for data preprocessing, while it is of higher quality, it is slow, therefore set to false as default.

#@markdown Unchecking this while using the official repo will default to parselmouth, while using UtaUtaUtau's repo will use harvest.
use_crepe = True #@param {type: "boolean"}

#@markdown ---
#@markdown ###Set checkpoint interval
#@markdown As the name states, saves a checkpoint at an interval. When using GPU training, it runs quite fast, so try not to touch this, there's no point.
checkpoint_interval = 1000 #@param {type: "integer"}

##@markdown ---
##@markdown ###Disable FastSpeech2 (used for 44.1kHz)
##@markdown This disables fastspeech for decreased model size and faster training. This works best when the 44.1kHz vocoder is released.

##@markdown For 24kHz models, it is not suggested to use this, as your old models will become incompatible, and there's not much difference in training speed for 24kHz models anyway.
#disable_fs2 = True #@param {type: "boolean"}

#@markdown ---
#@markdown ###Pretrain model usage
#@markdown This allows for faster training when in use. It is not recommended to use this if you have a sufficent amount of data (3 hours). Please don't use ATRI
use_pretrain_model = False #@param {type: "boolean"}
pretrain_model_select = "nyaru (feminine 24kHz)" #@param ["nyaru (feminine 24kHz)", "nehito (masculine 24kHz)", "opencpop (feminine 24kHz)", "liee (feminine 24kHz)", "---", "liee (feminine 44.1kHz)", "Mimi-YA EQUINOX v2 (feminine 44.1kHz)", "nehito (masculine 44.1kHz)", "custom"]

#@markdown If you choose a custom pretrained model, please point the path of the model here.
pretrain_path = "" #@param {type:"string"}

#@markdown ---
#@markdown ###Use custom save directory
#@markdown You can change the directory to save wherever you want. Default location is /diff-svc/checkpoint if unchanged.

#@markdown Please point to a directory with the singer name already specified (example /content/drive/MyDrive/diff-svc/nyaru)

use_save_dir = False #@param {type: "boolean"}

save_dir = "/content/diff-svc/checkpoints" #@param {type: "string"}
save_dir = save_dir.replace(" ", "_")

#@markdown ---
#@markdown ###Resume training from a checkpoint

resume_training_from_ckpt = False #@param {type: "boolean"}
ckpt_directory = ""#@param {type: "string"}


#@markdown ---

#@markdown ###Setup for small datasets
#@markdown If your dataset is small, each epoch will go by very fast and won't have enough time to train well, so if your dataset is considered small, use this option.

endless_ds = False #@param {type:"boolean"}

if use_save_dir:
  %cd /content/diff-svc/utils
  !rm -rf hparams.py
  !wget https://github.com/prophesier/diff-svc/raw/main/utils/hparams.py
  !sed -i -r 's|checkpoints/\{args.work_dir}|haruwashere|g' /content/diff-svc/utils/hparams.py
  !sed -i -r 's|haruwashere|{save_dir}|g' /content/diff-svc/utils/hparams.py
  %cd /content/
else:
  %cd /content/diff-svc/utils
  !rm -rf hparams.py
  !wget https://github.com/prophesier/diff-svc/raw/main/utils/hparams.py
  %cd /content/
clear_output()
if resume_training_from_ckpt:
  !cp {ckpt_directory} {save_dir}
  print("Status: Resuming Training From Checkpoint")
else:
  print("Status: Not Resuming From Checkpoint")

if use_pretrain_model:
  import ipywidgets as widgets
  from IPython.display import display

  # Define the terms of agreement
  terms_of_agreement = """"""

  print("""【TERMS OF USE】
  Redistribution
  -You are free to share rendered singing vocals created with the s voice model.
  -Please indicate that you used the selected model for pre-training wherever you post samples of your voice model.
  -Do not release a model for pre-training other models if it has already been pre-trained with the selected model.
  -Do not release a model pre-trained with the selected model that is intended to recreate the voice of a celebrity, copyrighted fictional character, or otherwise public figure. (Showcasing inferenced examples is OK.)

  Disclaimer
  -The owner of the model, neither the developers holds no responsibility to any incidents, damage, or loss by the user from downloading or using the voice model or character
  and loss that occurs to any third party as a result of usage of the voice model or voice model character.""")

  # Create a checkbox widget for the user to accept the terms
  accept_terms = widgets.Checkbox(
      value=False,
      description='I accept the terms and conditions',
      disabled=False
  )

  # Create a button widget for the user to submit their acceptance
  submit_button = widgets.Button(
      description='Submit',
      disabled=False,
      button_style='success',
      icon='check'
  )

  # Display the terms and the checkbox to the user
  display(widgets.VBox([widgets.Label(value=terms_of_agreement), accept_terms, submit_button]))

  # Define a function to handle the user's acceptance
  def on_submit(b):
      if accept_terms.value:
        %mkdir -p /content/diff-svc/pretrain/
        %cd /content/diff-svc/pretrain/
        lr = '0.00005'
        if pretrain_model_select == "nyaru (feminine 24kHz)":
          !aria2c --file-allocation=none -c -x 10 -s 10 -d "/content/diff-svc/pretrain/" https://github.com/haru0l/Diff-SVC-notebooks/releases/download/models/nyaru.ckpt -q
          pretrain_path = "/content/diff-svc/pretrain/nyaru.ckpt"
          !sed -i -r 's|(load_ckpt:)(\s+)(.+)|\1\2{pretrain_path}|g' {config_path}
          print("Using 24kHz (nyaru) for pretraining...")
        elif pretrain_model_select == "nehito (masculine 24kHz)":
          !aria2c --file-allocation=none -c -x 10 -s 10 -d "/content/diff-svc/pretrain/" https://github.com/MLo7Ghinsan/MLo7_Diff-SVC_models/releases/download/models/Nehito-Diff-SVC_24khz.zip -q
          !unzip Nehito-Diff-SVC_24khz.zip -d /content/diff-svc/pretrain/
          pretrain_path = "/content/diff-svc/pretrain/model_ckpt_steps_1000000.ckpt"
          !sed -i -r 's|(load_ckpt:)(\s+)(.+)|\1\2{pretrain_path}|g' {config_path}
          print("Using 24kHz (nehito) for pretraining...")
        elif pretrain_model_select == "opencpop (feminine 24kHz)":
          !aria2c --file-allocation=none -c -x 10 -s 10 -d "/content/diff-svc/pretrain/" https://github.com/haru0l/Diff-SVC-notebooks/releases/download/models/opencpop.ckpt -q
          pretrain_path = "/content/diff-svc/pretrain/opencpop.ckpt"
          !sed -i -r 's|(load_ckpt:)(\s+)(.+)|\1\2{pretrain_path}|g' {config_path}
          print("Using 24kHz (OpenCPop) for pretraining...")
        elif pretrain_model_select == "---":
          print("Not a valid option. Defaulting to not use a pretrained model...")
        elif pretrain_model_select == "liee (feminine 24kHz)":
          !aria2c --file-allocation=none -c -x 10 -s 10 -d "/content/diff-svc/pretrain/" https://github.com/julieraptor/LIEE-DIFF-SVC-AI/releases/download/Model-24kHz/model_ckpt_steps_200000.ckpt -q
          pretrain_path = "/content/diff-svc/pretrain/model_ckpt_steps_200000.ckpt"
          !sed -i -r 's|(load_ckpt:)(\s+)(.+)|\1\2{pretrain_path}|g' {config_path}
          print("Using 24kHz (LIEE) for pretraining...")
        elif pretrain_model_select == "Mimi-YA EQUINOX v2 (feminine 44.1kHz)":
          !aria2c --file-allocation=none -c -x 10 -s 10 -d "/content/diff-svc/pretrain/" https://github.com/Supurreme/Mimi-YA-DIFF-SVC/releases/download/eek/model_ckpt_steps_400000.ckpt -q
          pretrain_path = "/content/diff-svc/pretrain/model_ckpt_steps_400000.ckpt"
          !sed -i -r 's|(load_ckpt:)(\s+)(.+)|\1\2{pretrain_path}|g' {config_path}
          print("Using 44.1kHz (Mimi-YA EQUINOX v2) for pretraining...")
        elif pretrain_model_select == "liee (feminine 44.1kHz)":
          !aria2c --file-allocation=none -c -x 10 -s 10 -d "/content/diff-svc/pretrain/" https://github.com/julieraptor/LIEE-DIFF-SVC-AI/releases/download/Model-44.1kHz/model_ckpt_steps_600000.ckpt -q
          pretrain_path = "/content/diff-svc/pretrain/model_ckpt_steps_600000.ckpt"
          !sed -i -r 's|(load_ckpt:)(\s+)(.+)|\1\2{pretrain_path}|g' {config_path}
          print("Using 44.1kHz (LIEE) for pretraining...")
        elif pretrain_model_select == "nehito (masculine 44.1kHz)":
          !aria2c --file-allocation=none -c -x 10 -s 10 -d "/content/diff-svc/pretrain/" https://github.com/MLo7Ghinsan/MLo7_Diff-SVC_models/releases/download/models/Nehito-Diff-SVC_44khz.zip -q
          !unzip Nehito-Diff-SVC_44khz.zip -d /content/diff-svc/pretrain/
          pretrain_path = "/content/diff-svc/pretrain/Nehito-Diff-SVC_44khz/nehito_ckpt_steps_1000000.ckpt"
          !sed -i -r 's|(load_ckpt:)(\s+)(.+)|\1\2{pretrain_path}|g' {config_path}
          print("Using 44.1kHz (nehito) for pretraining...")
      
        elif pretrain_model_select == "custom":
          print("Custom pretrained model selected.")
          !sed -i -r 's|(load_ckpt:)(\s+)(.+)|\1\2{pretrain_path}|g' {config_path}
      else:
          print("You must accept the terms before running the code.")

  # Attach the function to the submit button's "on_click" event
  submit_button.on_click(on_submit)

if sample_rate == "44.1kHz":
  disable_fs2 = True
  !sed -i -r 's|(no_fs2:)(\s+)(.+)|\1\2{disable_fs2}|g' {config_path}
else:
  disable_fs2 = False
  !sed -i -r 's|(no_fs2:)(\s+)(.+)|\1\2{disable_fs2}|g' {config_path}

!sed -i -r 's|(max_sentences:)(\s+)(.+)|\1\2{batch_size}|g' {config_path}
!sed -i -r 's|(decay_steps:)(\s+)(.+)|\1\2{decay_steps}|g' {config_path}
!sed -i -r 's|(lr:)(\s+)(.+)|\1\2{lr}|g' {config_path}
!sed -i -r 's|(K_step:)(\s+)(.+)|\1\2{K_step}|g' {config_path}
!sed -i -r 's|(max_sentences:)(\s+)(.+)|\1\2{batch_size}|g' {config_path}
!sed -i -r 's|(use_crepe:)(\s+)(.+)|\1\2{use_crepe}|g' {config_path}
!sed -i -r 's|(val_check_interval:)(\s+)(.+)|\1\2{checkpoint_interval}|g' {config_path}
!sed -i -r 's|(no_fs2:)(\s+)(.+)|\1\2{disable_fs2}|g' {config_path}
!sed -i -r 's|(endless_ds:)(\s+)(.+)|\1\2{endless_ds}|g' {config_path}

#clear_output()

# Training
Finally, the dreaded part.

In [None]:
#@title #Step 4: Pre-processing
#@markdown This step is also known as data prep or feature generation, who cares?
#@markdown
%cd "/content/diff-svc/"

os.environ['PYTHONPATH']='.'
!CUDA_VISIBLE_DEVICES=0 python preprocessing/binarize.py --config {config_path}
%cd "/content/diff-svc/data"
!7za -bso0 a "/content/{singer_name}_binary_data.7z" "binary/{singer_name}"
if not os.path.exists('/content/drive/MyDrive/diff-svc/data/{singer_name}'):
    !mkdir -p /content/drive/MyDrive/diff-svc/data/{singer_name}
!mv -v "/content/{singer_name}_binary_data.7z" /content/drive/MyDrive/diff-svc/data/{singer_name}
if sample_rate == "44.1kHz":
  !cp -r /content/diff-svc/training/config_nsf.yaml /content/drive/MyDrive/diff-svc/data/{singer_name}
else:
  !cp -r /content/diff-svc/training/config.yaml /content/drive/MyDrive/diff-svc/data/{singer_name}

In [None]:
#@title #Step 5-0 Tensorboard (run before step 5)
#@markdown Shows training progress, go to the top right corner to set it to update the logs.

import datetime, os

if use_save_dir:
  %tensorboard --load_fast=true --reload_interval=1 --reload_multifile=true --logdir="{save_dir}/lightning_logs/"
else:
  %tensorboard --load_fast=true --reload_interval=1 --reload_multifile=true --logdir=/content/diff-svc/checkpoints/{singer_name}/lightning_logs/


In [None]:
#@title #Step 5: Training
#@markdown Yeah, it took THAT long to get here, colab is probably going to disconnect you at this point... unless you have pro ¯\\_(ツ)_/¯
%cd "/content/diff-svc/"

os.environ['PYTHONPATH']='.'

!CUDA_VISIBLE_DEVICES=0 python run.py --config {config_path} --exp_name $singer_name --reset

In [None]:
#@title # Step 6: Package Model
#@markdown If you used a custom directory for saving your models please don't use this.

store_on_drive = True #@param {type: "boolean"}
from datetime import datetime, timezone
import glob

time_now = datetime.now(timezone.utc).strftime('%Y-%m-%d %H-%M-%S')

archive_name = f'{singer_name}_{time_now}'

if use_save_dir:
  %cd {save_dir}
  !zip -r "/content/{archive_name}.zip" ./ -x ./lighting_logs/\*
else:
  %cd /content/diff-svc/checkpoints
  !zip -r "/content/{archive_name}.zip" ./{singer_name} -x ./{singer_name}/lighting_logs/\*

if store_on_drive:
      if not os.path.exists('/content/drive/MyDrive/Diff-SVC_release'):
          !mkdir /content/drive/MyDrive/Diff-SVC_release
      
      !mv -v "/content/{archive_name}.zip" /content/drive/MyDrive/Diff-SVC_release

print('Done!')