Training notebook for [Diff-SVC](https://github.com/prophesier/diff-svc) originally made by [justinjohn-03](https://github.com/justinjohn0306)
 Modified by [奕晨](https://twitter.com/nekrothecorpse) of [Archivoice](https://github.com/archivoice) and currently maintained by [haru0l](https://twitter.com/mscoocoo2)

This is the basic version of the notebook. This notebook assumes that you have READ the documentation and no support will be given.

QOL improvements has been removed in this version due to a certain someone that won't credit the usage of the program.

# Preparation

In [None]:
import os
from IPython.display import clear_output

!rm -rf sample_data
!python -m pip install --upgrade pip wheel --quiet
!pip uninstall gdown -y --quiet
#!pip install git+https://github.com/justinjohn0306/gdown.git --quiet
!pip install pydub fuzzywuzzy python-Levenshtein pyworld==0.3.1 --quiet

#install aria2
!sudo apt-get install aria2  &> /dev/null
!apt install wget curl ca-certificates &> /dev/null
!wget -N git.io/aria2.sh &> /dev/null && chmod +x aria2.sh &> /dev/null
!echo 1|./aria2.sh &> /dev/null
!echo 12|./aria2.sh &> /dev/null
!echo 6|./aria2.sh &> /dev/null
!pip install --pre torchtext==0.6.0 --no-deps --quiet


#@markdown ###Model sample rate
#@markdown Please choose if you want to train a 24kHz model or a 44.1kHz model.
sample_rate = '44.1kHz' #@param ["24kHz", "44.1kHz"]

print('Installing Diff-SVC')
!git clone -q https://github.com/prophesier/diff-svc

%cd "diff-svc"
!pip install -r requirements_short.txt --quiet
!pip install tensorboard<2.9,>=2.8 --quiet
#!pip install --upgrade numpy==1.23.0 scipy==1.9.3 --quiet
%reload_ext tensorboard

%mkdir -p checkpoints

hifigan_24k = "https://github.com/haru0l/Diff-SVC-notebooks/releases/download/models_24khz/hifigan_24k.zip"
hifigan_44k = "https://github.com/haru0l/Diff-SVC-notebooks/releases/download/start/hifigan_44k.zip"
checkpoints = "https://github.com/haru0l/Diff-SVC-notebooks/releases/download/start/checkpoints.zip"

!aria2c --file-allocation=none -c -x 10 -s 10 {checkpoints} -q
!unzip checkpoints.zip
!rm checkpoints.zip

if sample_rate == "44.1kHz":
	!aria2c --file-allocation=none -c -x 10 -s 10 {hifigan_44k} -q
	!unzip hifigan_44k.zip -d checkpoints
	!rm hifigan_44k.zip
	config_path = "training/config_nsf.yaml"
	slay = "44100"
else:
	!aria2c --file-allocation=none -c -x 10 -s 10 {hifigan_24k} -q
	!unzip hifigan_24k.zip
	!rm hifigan_24k.zip
	config_path = "training/config.yaml"
	!rm {config_path}
	!wget "https://github.com/haru0l/Diff-SVC-notebooks/releases/download/models_24khz/config.yaml" -O {config_path} -q
	slay = "24000"
clear_output()
print('Done!')

You are now expected to extract the wavs into data/raw/(singer_name_here)

# Training

In [None]:
#@title #Pre-processing
#@markdown This just converts your data into mel spectograms

os.environ['PYTHONPATH']='.'
!CUDA_VISIBLE_DEVICES=0 python preprocessing/binarize.py --config {config_path}

In [None]:
#@title #Training
#@markdown Trains your model.

os.environ['PYTHONPATH']='.'
!CUDA_VISIBLE_DEVICES=0 python run.py --config {config_path} --exp_name $singer_name --reset

# **Inference Section**

In [None]:
import os
import yaml
#@title # **Load model**

#@markdown ### **Load a trained model for inferencing**
#@markdown ___
#@markdown Note: Add the full path of the FOLDER of your model. This will automatically load the latest model along with your configs.
 
with open(config_path, 'r') as config_file:
    config = yaml.safe_load(config_file)
    singer_name = config['speaker_id']

os.environ['PYTHONPATH']='.'
!CUDA_VISIBLE_DEVICES=0

from utils.hparams import hparams
from preprocessing.data_gen_utils import get_pitch_parselmouth,get_pitch_crepe
import numpy as np
import matplotlib.pyplot as plt
import IPython.display as ipd
import utils
import librosa
import torchcrepe
from infer import *
import logging
from infer_tools.infer_tool import *

logging.getLogger('numba').setLevel(logging.WARNING)

def find_latest_ckpt(dir_path):
    try:
      ckpt_files = [f for f in os.listdir(dir_path) if f.endswith('.ckpt')]
      ckpt_files.sort()
      return os.path.join(dir_path, ckpt_files[-1])
    except Exception as e:
      print("Unable to find model/model is corrupted.")
      error_inference = True

model_folder = "" #@param {type: "string"}
# Example usage
model_path = find_latest_ckpt(f'{model_folder}')

project_name = "sample"
config_path= f"{model_folder}/config.yaml"
hubert_gpu=True
svc_model = Svc(project_name,config_path,hubert_gpu, model_path)
print('model loaded')

#@title Run Inference (yes I'm keeping this image here incase someone want to kidly buy our ko-fi <3)

#@markdown put your wav paths in here
wav_in = "" #@param {type:"string"}

key = 0 #@param {type:"slider", min:-12, max:12, step:1}

pndm_speedup = 20 #@param {type:"slider", min:0, max:100, step:1}

wav_out = "sample"

add_noise_step = 500 #@param {type:"slider", min:0, max:1000, step:10}


thre = 0.05 #@param {type:"slider", min:0.0, max:1.0, step:0.01}
use_crepe= False #@param {type:"boolean"}
use_pe=False #@param {type:"boolean"}
use_gt_mel= False #@param {type:"boolean"}


try:
  f0_tst, f0_pred, audio = run_clip(svc_model,file_path=wav_in, key=key, acc=pndm_speedup, use_crepe=use_crepe, use_pe=use_pe, thre=thre, use_gt_mel=use_gt_mel, add_noise_step=add_noise_step,project_name=project_name,out_path=f"results/{wav_out}.flac")
  clear_output()
  print("Inference complete!")
  print(f"Inferenced file is stored at results/{wav_out}.flac.")
except Exception as e:
  print("Unable to inference.")
  print("Here's a cat meowing though for 4 minutes.")

from IPython.display import Audio
Audio(f"{link}")