# [Retrieval-based-Voice-Conversion-WebUI](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI) Training notebook

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/giannifiore/Retrieval-based-Voice-Conversion-WebUI/blob/main/Retrieval_based_Voice_Conversion_WebUI.ipynb)

In [None]:
# @title 查看显卡
!nvidia-smi

In [None]:
# @title 挂载谷歌云盘

from google.colab import drive

drive.mount("/content/drive")

In [None]:
# @title 安装依赖
!apt-get -y install build-essential python3-dev ffmpeg
!pip3 install --upgrade setuptools wheel
!pip3 install 'pip<24.1'
import sys
py_ver = sys.version_info
if py_ver >= (3, 12):
    numpy_spec = "numpy==1.26.4"
    numba_spec = "numba==0.59.1"
    llvmlite_spec = "llvmlite==0.42.0"
elif py_ver >= (3, 11):
    numpy_spec = "numpy==1.26.4"
    numba_spec = "numba==0.58.1"
    llvmlite_spec = "llvmlite==0.41.1"
else:
    numpy_spec = "numpy==1.23.5"
    numba_spec = "numba==0.56.4"
    llvmlite_spec = "llvmlite==0.39.0"
COMMON_PACKAGES = f"faiss-cpu==1.8.0 gradio==3.14.0 ffmpeg ffmpeg-python praat-parselmouth pyworld {numpy_spec} {numba_spec} {llvmlite_spec} librosa==0.9.2 torchcrepe-0.0.24"
!pip3 install {COMMON_PACKAGES}
FAIRSEQ_PKG = 'fairseq==0.12.2' if sys.version_info < (3, 11) else 'fairseq@git+https://github.com/One-sixth/fairseq.git'
!pip3 install {FAIRSEQ_PKG}


In [None]:
# @title 克隆仓库

!git clone --depth=1 -b main https://github.com/giannifiore/Retrieval-based-Voice-Conversion-WebUI
%cd /content/Retrieval-based-Voice-Conversion-WebUI
!mkdir -p pretrained uvr5_weights

In [None]:
# @title 更新仓库（一般无需执行）
!git pull

In [None]:
# @title 安装aria2
!apt -y install -qq aria2

In [None]:
# @title 下载底模
!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/assets/pretrained -o D32k.pth
!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/assets/pretrained -o D40k.pth
!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/assets/pretrained -o D48k.pth
!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/assets/pretrained -o G32k.pth
!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/assets/pretrained -o G40k.pth
!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/assets/pretrained -o G48k.pth
!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/assets/pretrained -o f0D32k.pth
!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/assets/pretrained -o f0D40k.pth
!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/assets/pretrained -o f0D48k.pth
!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/assets/pretrained -o f0G32k.pth
!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/assets/pretrained -o f0G40k.pth
!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/assets/pretrained -o f0G48k.pth

In [None]:
# @title 下载人声分离模型
!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP2-人声vocals+非人声instrumentals.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/assets/uvr5_weights -o HP2-人声vocals+非人声instrumentals.pth
!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP5-主旋律人声vocals+其他instrumentals.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/assets/uvr5_weights -o HP5-主旋律人声vocals+其他instrumentals.pth

In [None]:
# @title 下载hubert_base
!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt -d /content/Retrieval-based-Voice-Conversion-WebUI/assets/hubert -o hubert_base.pt

In [None]:
# @title #下载rmvpe模型
!mkdir -p /content/Retrieval-based-Voice-Conversion-WebUI/assets/rmvpe
!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/rmvpe.pt -d /content/Retrieval-based-Voice-Conversion-WebUI/assets/rmvpe -o rmvpe.pt

In [None]:
# @title 从谷歌云盘加载打包好的数据集到/content/dataset

# @markdown 数据集位置
DATASET = (
    "/content/drive/MyDrive/RVC_Voice_Data-20251122T130953Z-1-001.zip"  # @param {type:"string"}
)

!mkdir -p /content/dataset
!unzip -d /content/dataset -B {DATASET}

In [None]:
# @title 重命名数据集中的重名文件
!ls -a /content/dataset/
!rename 's/(\w+)\.(\w+)~(\d*)/$1_$3.$2/' /content/dataset/*.*~*

In [None]:
# @title （可选）启动Gradio Web界面
import os, subprocess
USE_WEBUI = False  # @param {type:"boolean"}
os.chdir("/content/Retrieval-based-Voice-Conversion-WebUI")
if USE_WEBUI:
    subprocess.run(['python3','infer-web.py','--colab','--pycmd','python3'], check=True)
else:
    print('已跳过 WebUI，推荐使用下方 CLI 训练单元。')


In [None]:
# @title CLI 训练（GPU 必需）
import os, random, subprocess, torch
MODELNAME = "lulu"  # @param {type:"string"}
MODELSAMPLE = "48k"  # @param ["32k", "40k", "48k"]
MODEL_VERSION = "v2"  # @param ["v1", "v2"]
USEGPU = "0"  # @param {type:"string"}
BATCHSIZE = 6  # @param {type:"integer"}
MODELEPOCH = 2000  # @param {type:"integer"}
EPOCHSAVE = 100  # @param {type:"integer"}
USE_F0 = 1  # @param {type:"integer"}
ONLYLATEST = 0  # @param {type:"integer"}
CACHEDATA = 1  # @param {type:"integer"}
SAVE_WEIGHTS = 0  # @param {type:"integer"}
RUN_TRAINING = False  # @param {type:"boolean"}

if not torch.cuda.is_available():
    raise RuntimeError('未检测到 GPU，CLI 训练需要 GPU 环境。')
config_path = f'/content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/config.json'
if not os.path.exists(config_path):
    raise FileNotFoundError(f'未找到 {config_path}，请先完成预处理和特征提取。')
os.chdir('/content/Retrieval-based-Voice-Conversion-WebUI')
os.environ['MASTER_PORT'] = str(random.randint(20000, 59000))
cmd = [
    'python3',
    'infer/modules/train/train.py',
    '-e', MODELNAME,
    '-sr', MODELSAMPLE,
    '-v', MODEL_VERSION,
    '-bs', str(BATCHSIZE),
    '-te', str(MODELEPOCH),
    '-se', str(EPOCHSAVE),
    '-g', USEGPU,
    '-f0', str(USE_F0),
    '-l', str(ONLYLATEST),
    '-c', str(CACHEDATA),
    '-pg', f'pretrained/f0G{MODELSAMPLE}.pth',
    '-pd', f'pretrained/f0D{MODELSAMPLE}.pth',
    '-sw', str(SAVE_WEIGHTS),
]
if RUN_TRAINING:
    print('运行命令: ' + ' '.join(cmd))
    subprocess.run(cmd, check=True)
else:
    print('已跳过 CLI 训练，设置 RUN_TRAINING=True 后再运行。')


In [None]:
# @title 手动将训练后的模型文件备份到谷歌云盘
# @markdown 需要自己查看logs文件夹下模型的文件名，手动修改下方命令末尾的文件名

# @markdown 模型名
MODELNAME = "lulu"  # @param {type:"string"}
# @markdown 模型epoch
MODELEPOCH = 9600  # @param {type:"integer"}

!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth /content/drive/MyDrive/{MODELNAME}_D_{MODELEPOCH}.pth
!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth /content/drive/MyDrive/{MODELNAME}_G_{MODELEPOCH}.pth
!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/added_*.index /content/drive/MyDrive/
!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/total_*.npy /content/drive/MyDrive/

!cp /content/Retrieval-based-Voice-Conversion-WebUI/weights/{MODELNAME}.pth /content/drive/MyDrive/{MODELNAME}{MODELEPOCH}.pth

In [None]:
# @title 从谷歌云盘恢复pth
# @markdown 需要自己查看logs文件夹下模型的文件名，手动修改下方命令末尾的文件名

# @markdown 模型名
MODELNAME = "lulu"  # @param {type:"string"}
# @markdown 模型epoch
MODELEPOCH = 7500  # @param {type:"integer"}

!mkdir -p /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}

!cp /content/drive/MyDrive/{MODELNAME}_D_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth
!cp /content/drive/MyDrive/{MODELNAME}_G_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth
!cp /content/drive/MyDrive/*.index /content/
!cp /content/drive/MyDrive/*.npy /content/
!cp /content/drive/MyDrive/{MODELNAME}{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/weights/{MODELNAME}.pth

In [None]:
# @title 手动预处理（不推荐）
# @markdown 模型名
MODELNAME = "lulu"  # @param {type:"string"}
# @markdown 采样率
BITRATE = 48000  # @param {type:"integer"}
# @markdown 使用的进程数
THREADCOUNT = 8  # @param {type:"integer"}

!python3 trainset_preprocess_pipeline_print.py /content/dataset {BITRATE} {THREADCOUNT} logs/{MODELNAME} True

In [None]:
# @title 手动提取特征（不推荐）
# @markdown 模型名
MODELNAME = "lulu"  # @param {type:"string"}
# @markdown 使用的进程数
THREADCOUNT = 8  # @param {type:"integer"}
# @markdown 音高提取算法
ALGO = "harvest"  # @param {type:"string"}

!python3 extract_f0_print.py logs/{MODELNAME} {THREADCOUNT} {ALGO}

!python3 extract_feature_print.py cpu 1 0 0 logs/{MODELNAME} True

In [None]:
# @title 手动训练（不推荐）
# @markdown 模型名
MODELNAME = "lulu"  # @param {type:"string"}
# @markdown 使用的GPU
USEGPU = "0"  # @param {type:"string"}
# @markdown 批大小
BATCHSIZE = 32  # @param {type:"integer"}
# @markdown 停止的epoch
MODELEPOCH = 3200  # @param {type:"integer"}
# @markdown 保存epoch间隔
EPOCHSAVE = 100  # @param {type:"integer"}
# @markdown 采样率
MODELSAMPLE = "48k"  # @param {type:"string"}
# @markdown 是否缓存训练集
CACHEDATA = 1  # @param {type:"integer"}
# @markdown 是否仅保存最新的ckpt文件
ONLYLATEST = 0  # @param {type:"integer"}

!python3 train_nsf_sim_cache_sid_load_pretrain.py -e lulu -sr {MODELSAMPLE} -f0 1 -bs {BATCHSIZE} -g {USEGPU} -te {MODELEPOCH} -se {EPOCHSAVE} -pg pretrained/f0G{MODELSAMPLE}.pth -pd pretrained/f0D{MODELSAMPLE}.pth -l {ONLYLATEST} -c {CACHEDATA}

In [None]:
# @title 删除其它pth，只留选中的（慎点，仔细看代码）
# @markdown 模型名
MODELNAME = "lulu"  # @param {type:"string"}
# @markdown 选中模型epoch
MODELEPOCH = 9600  # @param {type:"integer"}

!echo "备份选中的模型。。。"
!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth /content/{MODELNAME}_D_{MODELEPOCH}.pth
!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth /content/{MODELNAME}_G_{MODELEPOCH}.pth

!echo "正在删除。。。"
!ls /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}
!rm /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/*.pth

!echo "恢复选中的模型。。。"
!mv /content/{MODELNAME}_D_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth
!mv /content/{MODELNAME}_G_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth

!echo "删除完成"
!ls /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}

In [None]:
# @title 清除项目下所有文件，只留选中的模型（慎点，仔细看代码）
# @markdown 模型名
MODELNAME = "lulu"  # @param {type:"string"}
# @markdown 选中模型epoch
MODELEPOCH = 9600  # @param {type:"integer"}

!echo "备份选中的模型。。。"
!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth /content/{MODELNAME}_D_{MODELEPOCH}.pth
!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth /content/{MODELNAME}_G_{MODELEPOCH}.pth

!echo "正在删除。。。"
!ls /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}
!rm -rf /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/*

!echo "恢复选中的模型。。。"
!mv /content/{MODELNAME}_D_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth
!mv /content/{MODELNAME}_G_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth

!echo "删除完成"
!ls /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}