# Bert-VITS2-Extra-Fix

Note: `train_ms.py` doesn't work on macOS at the moment.

## Preparing

In [None]:
!git clone https://github.com/evshiron/Bert-VITS2-Extra-Fix
!cd Bert-VITS2-Extra-Fix

In [None]:
!pip install torch torchvision torchaudio
# !pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm6.0
!pip install -r requirements.txt

In [None]:
# download required models

# bert
%pushd bert
!curl -L 'https://huggingface.co/hfl/chinese-roberta-wwm-ext-large/resolve/main/pytorch_model.bin?download=true' > chinese-roberta-wwm-ext-large/pytorch_model.bin
!curl -L 'https://huggingface.co/ku-nlp/deberta-v2-large-japanese-char-wwm/resolve/main/pytorch_model.bin?download=true' > deberta-v2-large-japanese-char-wwm/pytorch_model.bin
!curl -L 'https://huggingface.co/microsoft/deberta-v3-large/resolve/main/pytorch_model.bin?download=true' > deberta-v3-large/pytorch_model.bin
!curl -L 'https://huggingface.co/microsoft/deberta-v3-large/resolve/main/pytorch_model.generator.bin?download=true' > deberta-v3-large/pytorch_model.generator.bin
!curl -L 'https://huggingface.co/IDEA-CCNL/Erlangshen-MegatronBert-1.3B/resolve/main/pytorch_model.bin?download=true' > Erlangshen-MegatronBert-1.3B-Chinese/pytorch_model.bin
%popd

# slm
!curl -L 'https://huggingface.co/microsoft/wavlm-base-plus/resolve/main/pytorch_model.bin?download=true' > slm/wavlm-base-plus/pytorch_model.bin

# g2pW
!curl -L 'https://openi.pcl.ac.cn/Stardust_minus/Bert-VITS2/modelmanage/f4977cc4-3784-4a52-a605-21b5684f3d8f/downloadsingle?parentDir=&fileName=g2pW.onnx' > g2pW/g2pW.onnx

# emotional
!curl -L 'https://huggingface.co/laion/clap-htsat-fused/resolve/main/pytorch_model.bin?download=true' > emotional/clap-htsat-fused/pytorch_model.bin

# pretrained models
!mkdir pretrained_models
%pushd pretrained_models
!curl -L 'https://openi.pcl.ac.cn/Stardust_minus/Bert-VITS2/modelmanage/7ed55f37-467f-4212-9cde-ae312fbf0c1d/downloadsingle?parentDir=&fileName=G_0.pth' > G_0.pth
!curl -L 'https://openi.pcl.ac.cn/Stardust_minus/Bert-VITS2/modelmanage/7ed55f37-467f-4212-9cde-ae312fbf0c1d/downloadsingle?parentDir=&fileName=D_0.pth' > D_0.pth
!curl -L 'https://openi.pcl.ac.cn/Stardust_minus/Bert-VITS2/modelmanage/7ed55f37-467f-4212-9cde-ae312fbf0c1d/downloadsingle?parentDir=&fileName=WD_0.pth' > WD_0.pth
!curl -L 'https://openi.pcl.ac.cn/Stardust_minus/Bert-VITS2/modelmanage/7ed55f37-467f-4212-9cde-ae312fbf0c1d/downloadsingle?parentDir=&fileName=DUR_0.pth' > DUR_0.pth
%popd


### Mounting Google Drive

In [None]:
from google.colab import drive
drive.mount("/content/drive")

In [None]:
# optional: unpack zips into Data folder from Google Drive

!mkdir Data
%pushd Data
!unzip /content/drive/MyDrive/Bert-VITS2-Extra-Fix/*.zip
%popd

## Training

In [None]:
model_name='MODEL_NAME'

device='cuda'
# device='mps'
# device='cpu'

batch_size=4

### Prepare Datasets

```
├── Data
│   ├── MODEL_NAME
│   │   ├── esd.list
│   │   ├── raw
│   │   │   ├── ****.wav
│   │   │   ├── ****.wav
│   │   │   ├── ...
```

In [None]:
# generate configs

import json
import os
import shutil
import yaml

import utils

def get_paths_for_model_name(model_name):
    base_dir = os.path.join("./Data", model_name)
    label_path = os.path.join(base_dir, "esd.list")
    train_path = os.path.join(base_dir, "train.list")
    val_path = os.path.join(base_dir, "val.list")
    config_path = os.path.join(base_dir, "configs", "config.json")
    return base_dir, label_path, train_path, val_path, config_path


base_dir, label_path, train_path, val_path, config_path = get_paths_for_model_name(model_name)

# use existing or default config
if os.path.isfile(config_path):
    config = json.load(open(config_path, "r", encoding="utf-8"))
else:
    config = json.load(open("configs/config.json", "r", encoding="utf-8"))
config["data"]["training_files"] = train_path
config["data"]["validation_files"] = val_path
config["train"]["batch_size"] = batch_size
config_dir = os.path.join(base_dir, "configs")
if not os.path.isdir(config_dir):
    os.mkdir(config_dir)
model_dir = os.path.join(base_dir, "models")
if not os.path.isdir(model_dir):
    os.mkdir(model_dir)
with open(config_path, "w", encoding="utf-8") as f:
    json.dump(config, f, indent=4)


if not os.path.exists("config.yml"):
    shutil.copy(src="default_config.yml", dst="config.yml")


with open("default_config.yml", "r", encoding="utf-8") as f:
    data = yaml.safe_load(f)
data["dataset_path"] = base_dir
data["bert_gen"]["device"] = device
data["emo_gen"]["device"] = device
data["webui"]["device"] = device
with open("config.yml", "w", encoding="utf-8") as f:
    yaml.dump(data, f, allow_unicode=True)

In [None]:
# preprocess audios

in_dir = os.path.join(base_dir, "raw")
out_dir = os.path.join(base_dir, "wavs")

!python resample_legacy.py --sr 44100 --in_dir {in_dir} --out_dir {out_dir}

In [None]:
# preprocess labels

lines = open(label_path, "r", encoding="utf-8").readlines()
with open(label_path, "w", encoding="utf-8") as f:
    for line in lines:
        path, speaker, language, text = line.strip().split("|")
        path = os.path.join(base_dir, "wavs", os.path.basename(path)).replace(
            "\\", "/"
        )
        f.writelines(f"{path}|{speaker}|{language}|{text}\n")

!python preprocess_text.py --transcription-path {label_path} --train-path {train_path} --val-path {val_path} --config-path {config_path}

In [None]:
# generate bert checkpoints

!python bert_gen.py --config {config_path}

In [None]:
# generate clap checkpoints

!python clap_gen.py --config {config_path}

In [None]:
!cp pretrained_models/*.pth {model_dir}

In [None]:
!python train_ms.py

## Inference

In [None]:
with open("config.yml", "r", encoding="utf-8") as f:
    data = yaml.safe_load(f)
data["webui"]["model"] = os.path.relpath(utils.latest_checkpoint_path(model_dir, "G_*.pth"), base_dir)
with open("config.yml", "w", encoding="utf-8") as f:
    yaml.dump(data, f, allow_unicode=True)

In [None]:
!python webui.py