# 🎤 RVC Voice Cloning System - Google Colab

This notebook provides GPU access for users without local GPUs.

**Features**:
- 💾 **Google Drive Integration**: Automatically save and load trained models
- 🚀 **GPU Acceleration**: Uses Tesla T4/P100
- 🧠 **Real Training**: Uses official RVC backend for high-quality results
- 🔄 **RVC-Python**: Robust inference engine

## Setup Instructions

1.  Run all cells in order
2.  Mount Google Drive when prompted
3.  Use the training and inference cells below

## 🔌 Step 1: Mount Google Drive

In [None]:
#@title 🔌 Step 1: Mount Google Drive
from google.colab import drive
import os

print("Mounting Google Drive...")
drive.mount('/content/drive')

# Create serialization directory on Drive
DRIVE_RVC_DIR = "/content/drive/MyDrive/RVC_Models"
os.makedirs(DRIVE_RVC_DIR, exist_ok=True)
print(f"✅ Google Drive mounted. Models will be saved to: {DRIVE_RVC_DIR}")

## 📦 Step 2: Clone Repository and Install Dependencies

In [None]:
#@title 📦 Step 2: Clone Repository and Install Dependencies
import os
import subprocess

# ⚠️ REPLACE WITH YOUR GITHUB REPO URL ⚠️
REPO_URL = "https://github.com/alakhsharmaa/RVCVoiceCloning.git"
REPO_DIR = "RVCVoiceCloning"

if not os.path.exists(REPO_DIR):
    print(f"Cloning repository from {REPO_URL}...")
    try:
        subprocess.run(["git", "clone", REPO_URL, REPO_DIR], check=True)
        print("✅ Repository cloned successfully")
    except subprocess.CalledProcessError:
        print("❌ Failed to clone. Please check the REPO_URL above.")
else:
    print(f"Repository already exists at {REPO_DIR}")

if os.path.exists(REPO_DIR):
    os.chdir(REPO_DIR)
    print(f"Working directory: {os.getcwd()}")
    # Removed requirements.txt install to prevent crashes

## 🔄 Step 3: Load Saved Models from Drive

Syncs models from your Google Drive `RVC_Models` folder to the local workspace.

In [None]:
#@title 🔄 Step 3: Load Saved Models from Drive
import shutil
import os

local_models_dir = "models/finetuned_models"
os.makedirs(local_models_dir, exist_ok=True)

print("Syncing models from Drive...")
if os.path.exists(DRIVE_RVC_DIR):
    synced_count = 0
    for item in os.listdir(DRIVE_RVC_DIR):
        drive_path = os.path.join(DRIVE_RVC_DIR, item)
        if os.path.isdir(drive_path):
            local_path = os.path.join(local_models_dir, item)
            if not os.path.exists(local_path):
                shutil.copytree(drive_path, local_path)
                synced_count += 1
                print(f"Synced voice: {item}")
    
    if synced_count == 0:
        print("No new models found on Drive to sync.")
    else:
        print(f"✅ Synced {synced_count} models from Google Drive")
else:
    print("Drive directory not found (should be empty if first run)")

## 🎓 Step 4: Train a New Voice (Real RVC Backend)

1. Enter the name of the person/character.
2. Click the upload button to select your `.wav` files.
3. The system will process, train (50 epochs by default), and save the model to your Drive.

In [None]:
#@title 🎓 Step 4: Train a New Voice (Real RVC Backend)
import os
import shutil
import subprocess
import re
import glob
import sys
import requests
import json
import torch
from collections import OrderedDict
from pathlib import Path
from google.colab import files

# 1. Inputs
PERSON_NAME = "my_voice" # @param {type:"string"}
EPOCHS = 50 # @param {type:"integer"}
SAVE_FREQUENCY = 10 # @param {type:"integer"}

print(f"🎤 Voice Name: {PERSON_NAME}")
print(f"🔄 Epochs: {EPOCHS}")

# 2. Upload Audio
print("\n📂 Please upload your audio files (.wav)...")
uploaded = files.upload()
AUDIO_FILES = list(uploaded.keys())

if not AUDIO_FILES:
    print("⚠️ No files uploaded. Please rerun this cell and upload audio.")
else:
    print(f"🚀 Initializing Real RVC Training for: {PERSON_NAME}")
    
    # 1. Setup Official RVC Backend (STEALTH MODE - No Clone)
    RVC_BACKEND_DIR = "training_core"
    
    # FORCE CLEANUP
    if os.path.exists(RVC_BACKEND_DIR):
        if not os.path.exists(os.path.join(RVC_BACKEND_DIR, "infer")):
             print("⚠️ Detected broken backend from previous run. Deleting...")
             shutil.rmtree(RVC_BACKEND_DIR)
             
    if not os.path.exists(RVC_BACKEND_DIR):
        print("📥 Downloading core assets (Safe Mode)...")
        subprocess.run("wget https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/archive/refs/heads/main.zip -O rvc_core.zip", shell=True, check=True)
        subprocess.run("unzip -q rvc_core.zip", shell=True, check=True)
        subprocess.run(f"mv Retrieval-based-Voice-Conversion-WebUI-main {RVC_BACKEND_DIR}", shell=True, check=True)
        subprocess.run("rm rvc_core.zip", shell=True, check=True)
        
        for f in ["README.md", "README.en.md", "docs"]:
            path = os.path.join(RVC_BACKEND_DIR, f)
            if os.path.exists(path):
                if os.path.isdir(path):
                    shutil.rmtree(path)
                else:
                    os.remove(path)
    else:
        print("✅ Backend directory exists")
    
    print("📦 Installing Verified RVC Dependencies (SEQUENTIAL MODE)...")
    
    def run_pip(pkg_name, cmd_override=None):
        print(f"... Installing {pkg_name}")
        cmd = cmd_override if cmd_override else f"pip install --no-cache-dir {pkg_name}"
        res = subprocess.run(cmd, shell=True, capture_output=True, text=True)
        if res.returncode != 0:
            print(f"❌ FAILED {pkg_name} install! Output:\n{res.stdout}\n{res.stderr}")
            return False
        return True

    subprocess.run("sudo apt-get install -y libsndfile1-dev swig > /dev/null 2>&1", shell=True, check=True)

    run_pip("ninja")
    run_pip('"numpy<2.0"')
    
    # Use modern Hydra/Omegaconf for Python 3.12 support
    print("... Installing modern omegaconf/hydra (wheels)")
    run_pip("omegaconf==2.3.0")
    run_pip("hydra-core==1.3.2")
    run_pip("antlr4-python3-runtime==4.9.3") 
    run_pip("bitarray") 
    run_pip("sacrebleu")

    deps = [
        "librosa==0.9.1", 
        "faiss-cpu",
        "praat-parselmouth==0.4.3",
        "pyworld==0.3.4",
        "tensorboardX",
        "torchcrepe",
        "ffmpeg-python",
        "av",
        "scipy",
        "protobuf==3.20.0"
    ]

    for dep in deps:
        run_pip(dep)

    print("... Installing fairseq (wheel info override)")
    if not run_pip("fairseq==0.12.2", "pip install --no-cache-dir --no-deps fairseq==0.12.2"):
         print("⚠️ Wheel failed. Trying source...")
         run_pip("fairseq", "pip install --no-cache-dir git+https://github.com/facebookresearch/fairseq.git")

    # ==========================================================================
    # 🐍 PYTHON 3.12 COMPATIBILITY PATCHER (SAFE REGEX + MANUAL OVERRIDE)
    # ==========================================================================
    print("🛠️ Running Python 3.12 Compatibility Patcher (Clean-Slate Mode)...")
    
    site_dirs = [p for p in sys.path if ("site-packages" in p or "dist-packages" in p) and os.path.isdir(p)]
    if not site_dirs:
        print("❌ Could not locate package directory!")
    else:
        base_dir = site_dirs[0]
        fairseq_dir = os.path.join(base_dir, "fairseq")
        
        # --- PART A: Manual Fix for configs.py (CRITICAL) ---
        configs_url = "https://raw.githubusercontent.com/facebookresearch/fairseq/v0.12.2/fairseq/dataclass/configs.py"
        configs_path = os.path.join(fairseq_dir, "dataclass", "configs.py")
        try:
            print("   ⬇️ Downloading pristine configs.py...")
            configs_content = requests.get(configs_url).text
            print("   🔧 Patching configs.py...")
            replacements = [
                 ("common: CommonConfig = CommonConfig()", "common: CommonConfig = field(default_factory=CommonConfig)"),
                 ("dataset: DatasetConfig = DatasetConfig()", "dataset: DatasetConfig = field(default_factory=DatasetConfig)"),
                 ("distributed_training: DistributedTrainingConfig = DistributedTrainingConfig()", "distributed_training: DistributedTrainingConfig = field(default_factory=DistributedTrainingConfig)"),
                 ("checkpoint: CheckpointConfig = CheckpointConfig()", "checkpoint: CheckpointConfig = field(default_factory=CheckpointConfig)"),
                 ("common_eval: CommonEvalConfig = CommonEvalConfig()", "common_eval: CommonEvalConfig = field(default_factory=CommonEvalConfig)"),
                 ("generation: GenerationConfig = GenerationConfig()", "generation: GenerationConfig = field(default_factory=GenerationConfig)"),
                 ("optimization: OptimizationConfig = OptimizationConfig()", "optimization: OptimizationConfig = field(default_factory=OptimizationConfig)"),
                 ("ema: EMAConfig = EMAConfig()", "ema: EMAConfig = field(default_factory=EMAConfig)"), 
                 ("bmuf: FairseqBMUFConfig = FairseqBMUFConfig()", "bmuf: FairseqBMUFConfig = field(default_factory=FairseqBMUFConfig)"),
                 ("eval_lm: EvalLMConfig = EvalLMConfig()", "eval_lm: EvalLMConfig = field(default_factory=EvalLMConfig)"),
                 ("interactive: InteractiveConfig = InteractiveConfig()", "interactive: InteractiveConfig = field(default_factory=InteractiveConfig)"),
            ]
            for old, new in replacements:
                configs_content = configs_content.replace(old, new)
            with open(configs_path, "w", encoding="utf-8") as f:
                f.write(configs_content)
            print("   ✅ configs.py patched.")
        except Exception as e:
            print(f"   ❌ Failed to patch configs.py: {e}")

        # --- PART A.2: Manual Fix for transformer_config.py (CRITICAL) ---
        trans_url = "https://raw.githubusercontent.com/facebookresearch/fairseq/v0.12.2/fairseq/models/transformer/transformer_config.py"
        trans_path = os.path.join(fairseq_dir, "models", "transformer", "transformer_config.py")
        try:
            print("   ⬇️ Downloading pristine transformer_config.py...")
            trans_content = requests.get(trans_url).text
            print("   🔧 Patching transformer_config.py...")
            replacements_t = [
                ("quant_noise: QuantNoiseConfig = field(default=QuantNoiseConfig())", "quant_noise: QuantNoiseConfig = field(default_factory=QuantNoiseConfig)"),
                 # Generic match using safe replacement for one-liners
                ("encoder: EncDecBaseConfig = EncDecBaseConfig()", "encoder: EncDecBaseConfig = field(default_factory=EncDecBaseConfig)"),
                ("decoder: DecoderConfig = DecoderConfig()", "decoder: DecoderConfig = field(default_factory=DecoderConfig)"),
                 # Add explicit import for field if missing (it usually is)
                ("import re", "import re\nfrom dataclasses import field"),
            ]
            for old, new in replacements_t:
                trans_content = trans_content.replace(old, new)
            
            # Fallback if EncDecBaseConfig format is slightly different (e.g. spaces)
            # Using strict check to avoid overwriting twice
            
            with open(trans_path, "w", encoding="utf-8") as f:
                f.write(trans_content)
            print("   ✅ transformer_config.py patched.")
        except Exception as e:
             print(f"   ❌ Failed to patch transformer_config.py: {e}")

        # --- PART A.3: Manual Fix for checkpoint_utils.py (PyTorch 2.6+) ---
        ckpt_utils_path = os.path.join(fairseq_dir, "checkpoint_utils.py")
        try:
            print("   🔧 Patching checkpoint_utils.py for PyTorch 2.6+...")
            with open(ckpt_utils_path, "r", encoding="utf-8") as f:
                ckpt_content = f.read()
            
            # Fix torch.load defaults
            ckpt_content = ckpt_content.replace(
                'state = torch.load(f, map_location=torch.device("cpu"))', 
                'state = torch.load(f, map_location=torch.device("cpu"), weights_only=False)'
            )
            
            with open(ckpt_utils_path, "w", encoding="utf-8") as f:
                f.write(ckpt_content)
            print("   ✅ checkpoint_utils.py patched.")
        except Exception as e:
             print(f"   ❌ Failed to patch checkpoint_utils.py: {e}")

        # --- PART A.4: Manual Fix for utils.py (Matplotlib 3.8+ tostring_rgb fix) ---
        cwd_backup = os.getcwd()
        utils_py_path = os.path.join(cwd_backup, RVC_BACKEND_DIR, "infer/lib/train/utils.py")
        try:
            print("   🔧 Patching utils.py for Matplotlib 3.8+ (tostring_rgb removal)...")
            if os.path.exists(utils_py_path):
                with open(utils_py_path, "r", encoding="utf-8") as f:
                    utils_content = f.read()
                
                # Simple direct replacement of the deprecated line
                # Old: data = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8, sep="")
                # New: RGBA -> RGB -> Flatten to match old expected format
                if "tostring_rgb" in utils_content:
                    utils_content = utils_content.replace(
                        'data = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8, sep="")',
                        'fig.canvas.draw(); data = np.frombuffer(fig.canvas.buffer_rgba(), dtype=np.uint8).reshape(fig.canvas.get_width_height()[::-1] + (4,))[:, :, :3].flatten()'
                    )
                    with open(utils_py_path, "w", encoding="utf-8") as f:
                        f.write(utils_content)
                    print("   ✅ utils.py patched for Matplotlib 3.8+.")
                else:
                    print("   ℹ️ utils.py already patched or tostring_rgb not found.")
            else:
                print(f"   ⚠️ Could not find utils.py at {utils_py_path}")
        except Exception as e:
             print(f"   ❌ Failed to patch utils.py: {e}")

        # --- PART B: Recursive Patch for ALL other files ---
        print("   🔍 Starting global recursive patch for other files...")
        
        def apply_safe_patch(file_path):
            try:
                fname = os.path.basename(file_path)
                if fname in ["configs.py", "transformer_config.py"]: return False # Skip handled files
                
                with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
                    content = f.read()
                
                original_content = content
                modified = False
                
                # 0. DISABLE HYDRA INIT
                if "hydra_init()" in content and "# hydra_init()" not in content:
                     content = content.replace("hydra_init()", "# hydra_init() # Disabled by Patcher")
                     modified = True
                
                is_dataclass = "@dataclass" in content or "from dataclasses" in content
                
                if is_dataclass:
                    # 0. SELF-HEALING (Use lambda to avoid group ref errors)
                    heal_pattern = r'field\(default_factory=([\w\.]+)\)\(\)'
                    if re.search(heal_pattern, content):
                        content = re.sub(heal_pattern, lambda m: f"field(default_factory={m.group(1)})", content)
                        modified = True

                    # 1. Regex for:  var: Type = Type()
                    try:
                        pattern1 = r'([ \t]+\w+)[ \t]*:[ \t]*([\w\.]+)[ \t]*=[ \t]*([\w\.]+)\([ \t]*\)'
                        matches1 = re.findall(pattern1, content)
                        if matches1:
                            def repl1(m):
                                type_name = m.group(3)
                                if type_name in ["II", "Optional", "List", "Dict", "Union", "Any", "field"]:
                                    return m.group(0) 
                                return f"{m.group(1)}: {m.group(2)} = field(default_factory={type_name})"
                            content = re.sub(pattern1, repl1, content)
                            modified = True
                    except Exception:
                         pass

                    # 2. Regex for:  field(default=Type())
                    try:
                        pattern2 = r'field\(default=([\w\.]+)\([ \t]*\)'
                        matches2 = re.findall(pattern2, content)
                        if matches2:
                            def repl2(m):
                                return f"field(default_factory={m.group(1)}"
                            content = re.sub(pattern2, repl2, content)
                            modified = True
                    except Exception:
                         pass

                    # 3. Import Injection
                    if modified and "field(" in content and "from dataclasses import field" not in content:
                        if "from dataclasses import dataclass" in content:
                             content = content.replace("from dataclasses import dataclass", "from dataclasses import dataclass, field")
                        elif "import dataclasses" in content:
                             content = content.replace("import dataclasses", "import dataclasses\nfrom dataclasses import field")
                        else:
                             content = "from dataclasses import field\n" + content
                
                if content != original_content:
                    with open(file_path, "w", encoding="utf-8") as f:
                        f.write(content)
                    return True
            except Exception as e:
                # Silent fail for other files to avoid clogging logs, key files checks above.
                pass
            return False

        patch_count = 0
        for root, dirs, files in os.walk(fairseq_dir):
            for file in files:
                if file.endswith(".py"):
                     if apply_safe_patch(os.path.join(root, file)):
                         patch_count += 1
        
        print(f"   ✅ Recursive patch applied to {patch_count} files.")

    # ==========================================================================

    # 3. Trigger Training
    print("🧠 Starting Feature Extraction and Training...")
    
    cwd_backup = os.getcwd()
    
    # Define Absolute paths
    rvc_internal_dataset_dir = os.path.join(cwd_backup, RVC_BACKEND_DIR, "dataset")
    dataset_abs_path = os.path.join(rvc_internal_dataset_dir, PERSON_NAME)
    logs_abs_path = os.path.join(cwd_backup, RVC_BACKEND_DIR, "logs", PERSON_NAME)
    
    # --- DEBUGGING / CLEAN START ---
    if os.path.exists(logs_abs_path):
        print(f"⚠️ Clearning logs directory for fresh start: {logs_abs_path}")
        shutil.rmtree(logs_abs_path)
    
    os.makedirs(dataset_abs_path, exist_ok=True)
    os.makedirs(logs_abs_path, exist_ok=True)
    # CRITICAL: Create weights directory so savee() doesn't fail
    os.makedirs("weights", exist_ok=True)
    os.makedirs("assets/weights", exist_ok=True) # RVC often saves here
    
    print(f"... Moving audio files to {dataset_abs_path}")
    for audio_file in AUDIO_FILES:
        if os.path.exists(audio_file):
            shutil.copy(audio_file, os.path.join(dataset_abs_path, audio_file))
            
    # --- NEW: VERIFY/DOWNLOAD PRETRAINED MODELS ---
    print("⬇️ Verifying/Downloading Pretrained Models...")
    rvc_assets_dir = os.path.join(cwd_backup, RVC_BACKEND_DIR, "assets")
    hubert_dir = os.path.join(rvc_assets_dir, "hubert")
    rmvpe_dir = os.path.join(rvc_assets_dir, "rmvpe")
    pretrained_dir = os.path.join(rvc_assets_dir, "pretrained_v2")
    os.makedirs(hubert_dir, exist_ok=True)
    os.makedirs(rmvpe_dir, exist_ok=True)
    os.makedirs(pretrained_dir, exist_ok=True)

    # 1. Hubert
    hubert_path = os.path.join(hubert_dir, "hubert_base.pt")
    if not os.path.exists(hubert_path):
        # Check if user uploaded it to root /content/
        # cwd_backup is usually /content/rvc-system, so typical root is ../
        possible_locs = [
            os.path.join(cwd_backup, "hubert_base.pt"), 
            "/content/hubert_base.pt"
        ]
        found = False
        for loc in possible_locs:
            if os.path.exists(loc):
                print(f"   Found hubert_base.pt at {loc}, moving to {hubert_path}...")
                shutil.move(loc, hubert_path)
                found = True
                break
        
        if not found:
             print("   Downloading hubert_base.pt from HuggingFace...")
             subprocess.run(f"wget -q https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt -O {hubert_path}", shell=True)

    # 2. RMVPE
    rmvpe_path = os.path.join(rmvpe_dir, "rmvpe.pt")
    if not os.path.exists(rmvpe_path):
         possible_locs = [
            os.path.join(cwd_backup, "rmvpe.pt"), 
            "/content/rmvpe.pt"
        ]
         found = False
         for loc in possible_locs:
            if os.path.exists(loc):
                print(f"   Found rmvpe.pt at {loc}, moving to {rmvpe_path}...")
                shutil.move(loc, rmvpe_path)
                found = True
                break
         if not found:
             print("   Downloading rmvpe.pt from HuggingFace...")
             subprocess.run(f"wget -q https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/rmvpe.pt -O {rmvpe_path}", shell=True)
             
    # 3. Base Models (G and D)
    f0G_path = os.path.join(pretrained_dir, "f0G40k.pth")
    f0D_path = os.path.join(pretrained_dir, "f0D40k.pth")
    if not os.path.exists(f0G_path):
         print("   Downloading f0G40k.pth from HuggingFace...")
         subprocess.run(f"wget -q https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0G40k.pth -O {f0G_path}", shell=True)
    if not os.path.exists(f0D_path):
         print("   Downloading f0D40k.pth from HuggingFace...")
         subprocess.run(f"wget -q https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0D40k.pth -O {f0D_path}", shell=True)

    # ----------------------------------------------

    os.chdir(RVC_BACKEND_DIR)
    
    # DEBUG: Check file existence
    print("🔍 Validating backend files...")
    target_script = "infer/modules/train/extract/extract_f0_print.py"
    if not os.path.exists(target_script):
        print(f"❌ CRITICAL: Script not found: {target_script}")
    
    try:
        def run_cmd(cmd, hide_output=False):
            print(f"Running: {cmd}") # FULL COMMAND
            if hide_output:
                result = subprocess.run(cmd, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE, text=True)
                if result.returncode != 0:
                    raise RuntimeError(f"Command failed: {cmd}")
            else:
                # USE RUN WITH CAPTURE BUT PRINT ON FAILURE (Safe for syntax errors)
                result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
                # Print the output anyway for debug
                if "train.py" in cmd: # Always print for train.py
                     print(result.stdout)
                     if result.stderr:
                         # Filter out common TF warning spam from detailed log
                         filtered_err = "\n".join([l for l in result.stderr.split("\n") if "TensorFlow" not in l])
                         print("⚠️ STDERR: " + filtered_err)
                
                if result.returncode != 0:
                    print(f"❌ Command Failed with exit code {result.returncode}")
                    print(f"   STDOUT: {result.stdout}")
                    print(f"   STDERR: {result.stderr}")
                    raise RuntimeError(f"Command failed: {cmd}")
            print("✅ Done.")
        
        print("--- 1. Preprocessing Dataset ---")
        cmd_preprocess = f"python infer/modules/train/preprocess.py '{dataset_abs_path}' 40000 2 '{logs_abs_path}' False 3.0"
        run_cmd(cmd_preprocess, hide_output=False) # Enable log
        
        # DEBUG: Check 1_16k_wavs (Simple confirmation)
        wavs_16k = os.path.join(logs_abs_path, "1_16k_wavs")
        if os.path.exists(wavs_16k):
             files_16k = os.listdir(wavs_16k)
             print(f"   ℹ️ 1_16k_wavs content ({len(files_16k)} files): {files_16k[:5]}...")
        else:
             print("   ❌ 1_16k_wavs not created! Preprocess failed.")

        print("--- 2. Extracting Pitch (F0) ---")
        run_cmd(f"python infer/modules/train/extract/extract_f0_print.py '{logs_abs_path}' 2 rmvpe", hide_output=False) # Enable log
        
        print("--- 3. Extracting Features ---")
        # Added 'False' (is_half) to fix argument parsing mismatch
        # ENABLE OUTPUT TO DEBUG MISSING DIR
        run_cmd(f"python infer/modules/train/extract_feature_print.py cuda 1 0 0 '{logs_abs_path}' v2 False", hide_output=False)
        
        print("--- 4. Training Model ---")
        
        # --- CONFIG FIX: GENERATE 40k.json DYNAMICALLY ---
        target_config = os.path.join(logs_abs_path, "config.json")

        # --- CRITICAL FIX: GENERATE filelist.txt ---
        print("... Generating filelist.txt (Mandatory for train.py)")
        gt_wavs_dir = os.path.join(logs_abs_path, "0_gt_wavs")
        
        # DYNAMICALY FIND FEATURE DIR
        # RVC V2 often uses 3_feature768 or 3_feature256 depending on version/embedder
        import glob
        feature_dirs = glob.glob(os.path.join(logs_abs_path, "3_feature*"))
        if feature_dirs:
             feature_dir = feature_dirs[0]
             print(f"   Found feature dir: {feature_dir}")
        else:
             # Fallback to default expected
             feature_dir = os.path.join(logs_abs_path, "3_feature256") 
             print(f"   ⚠️ Feature dir not found by glob. Defaulting to: {feature_dir}")

        f0_dir = os.path.join(logs_abs_path, "2a_f0")
        f0nsf_dir = os.path.join(logs_abs_path, "2b-f0nsf")
        
        filelist_path = os.path.join(logs_abs_path, "filelist.txt")
        
        valid_entries = 0
        with open(filelist_path, "w", encoding="utf-8") as f:
            if os.path.exists(gt_wavs_dir):
                for wav_file in os.listdir(gt_wavs_dir):
                    if wav_file.endswith(".wav"):
                        # Format: GT_WAV | FEATURE | PITCH | PITCHF | SID
                        # Filenames are usually {idx0}_{idx1}.wav
                        base_name = wav_file.replace(".wav", "")
                        
                        gt_path = os.path.join(gt_wavs_dir, wav_file)
                        # Feature is .npy
                        feat_path = os.path.join(feature_dir, f"{base_name}.npy")
                        # Pitch is .wav.npy
                        pitch_path = os.path.join(f0_dir, f"{wav_file}.npy")
                        pitchf_path = os.path.join(f0nsf_dir, f"{wav_file}.npy")
                        
                        # Verify existence (optional, but good for debug)
                        if os.path.exists(feat_path) and os.path.exists(pitch_path):
                            line = f"{gt_path}|{feat_path}|{pitch_path}|{pitchf_path}|0"
                            f.write(line + "\n")
                            valid_entries += 1
        
        if valid_entries == 0:
            print("❌ SYSTEM ERROR: No valid training data found! Check if extract_feature ran correctly.")
            # Just listing dirs to see what happened
            print(f"   GT Dir exists: {os.path.exists(gt_wavs_dir)}")
            print(f"   Feat Dir exists: {os.path.exists(feature_dir)} ({feature_dir})")
            print(f"   Pitch Dir exists: {os.path.exists(f0_dir)}")
        else:
            print(f"✅ Generated filelist.txt with {valid_entries} samples.")

        # --- DYNAMIC BATCH SIZE ---
        # If we have fewer samples than batch_size, the epoch might be skipped or fail to save
        optimal_batch_size = min(4, valid_entries)
        if optimal_batch_size < 1: optimal_batch_size = 1
        print(f"   ⚖️  Auto-Adjusted Batch Size: {optimal_batch_size} (samples: {valid_entries})")

        print("... Generating config.json for 40k sample rate")
        # Synthesized from 48k.json structure but adapted for 40k
        config_content = {
            "train": {
                "log_interval": 10,  # Lower log interval to see progress on small data
                "seed": 1234,
                "epochs": 20000,
                "learning_rate": 1e-4,
                "betas": [0.8, 0.99],
                "eps": 1e-9,
                "batch_size": optimal_batch_size,
                "fp16_run": True,
                "lr_decay": 0.999875,
                "segment_size": 12800,
                "init_lr_ratio": 1,
                "warmup_epochs": 0,
                "c_mel": 45,
                "c_kl": 1.0
            },
            "data": {
                "max_wav_value": 32768.0,
                "sampling_rate": 40000,
                "filter_length": 2048,
                "hop_length": 400,
                "win_length": 2048,
                "n_mel_channels": 128,
                "mel_fmin": 0.0,
                "mel_fmax": None,
                "training_files": f"{logs_abs_path}/filelist.txt"
            },
            "model": {
                "inter_channels": 192,
                "hidden_channels": 192,
                "filter_channels": 768,
                "n_heads": 2,
                "n_layers": 6,
                "kernel_size": 3,
                "p_dropout": 0,
                "resblock": "1",
                "resblock_kernel_sizes": [3, 7, 11],
                "resblock_dilation_sizes": [[1, 3, 5], [1, 3, 5], [1, 3, 5]],
                "upsample_rates": [10, 10, 2, 2],
                "upsample_initial_channel": 512,
                # FIXED: KERNE SIZE 16 to match f0G40k.pth pretrained model
                "upsample_kernel_sizes": [16, 16, 4, 4], # Was [20, 20, 4, 4]
                "use_spectral_norm": False,
                "gin_channels": 256,
                "spk_embed_dim": 109
            }
        }
        
        with open(target_config, "w") as f:
            json.dump(config_content, f, indent=2)
        print(f"✅ Generated config.json at {target_config}")

        # --- DEBUG: AGGRESSIVE INJECT TRACING INTO TRAIN.PY ---
        # (REMOVED: The issue was arguments/logic, we don't need to break syntax anymore)
        
        # --- DYNAMIC SAFE INTERVAL CALCULATION ---
        # Ensure we always save at least once, even for short runs
        real_epoch = int(EPOCHS)
        real_freq = int(SAVE_FREQUENCY)
        
        # AGGRESSIVE SAVE STRATEGY for short runs
        if real_epoch < 50:
             print("   ⚠️ Short run detected! Forcing save_every_epoch = 1")
             real_freq = 1
        elif real_epoch < real_freq:
            print(f"   ⚠️ Total epochs ({real_epoch}) < Save Frequency ({real_freq}). Force setting save frequency to {real_epoch}.")
            real_freq = real_epoch
        elif real_freq <= 0:
            real_freq = 1

        # UPDATED FLAGS: 
        # -se (save_every_epoch)
        # -bs (batch_size) -> overridden by config logic usually, but passing for argument correctness
        # -te (total_epoch)
        # -l 1 (if_latest) -> FORCE SAVE LATEST to ensure we get a G_latest.pth if all else fails
        # -pg assets/pretrained_v2/f0G40k.pth
        # -pd assets/pretrained_v2/f0D40k.pth
        # -f0 1 (ENABLE F0: Critical for proper dataloader usage!)
        
        # ENVIRONMENT VARS FOR STABLE DDP
        os.environ["MASTER_ADDR"] = "localhost"
        os.environ["MASTER_PORT"] = "12355"
        
        cmd_train = f"python infer/modules/train/train.py -e {PERSON_NAME} -sr 40k -se {real_freq} -bs {optimal_batch_size} -te {real_epoch} -pg assets/pretrained_v2/f0G40k.pth -pd assets/pretrained_v2/f0D40k.pth -f0 1 -l 1 -c 0 -sw 1 -v v2"
        run_cmd(cmd_train, hide_output=False)
        
        # --- 5. Train Index (Faiss) ---
        print("--- 5. Training Index (Faiss) ---")
        cmd_index = f"python infer/modules/train/train_index.py {PERSON_NAME} v2 {EPOCHS} {feature_dir}"
        run_cmd(cmd_index, hide_output=False)

        # 4. Export Model
        print("✅ Training finished. Exporting model...")
        
        # Fallback check for weights in both possible locations
        possible_dirs = ["assets/weights", "weights"]
        pth_files = []
        used_dir = ""
        
        for w_dir in possible_dirs:
             if os.path.exists(w_dir):
                 found = [f for f in os.listdir(w_dir) if PERSON_NAME in f and ".pth" in f]
                 if found:
                     pth_files = found
                     used_dir = w_dir
                     break
        
        if pth_files:
             latest_model = sorted(pth_files)[-1]
             source_path = os.path.join(used_dir, latest_model)
             target_model_path = os.path.join(cwd_backup, "models", "finetuned_models", f"{PERSON_NAME}.pth")
             
             # Ensure local models dir exists
             os.makedirs(os.path.join(cwd_backup, "models", "finetuned_models"), exist_ok=True)
             
             shutil.copy(source_path, target_model_path)
             print(f"🏆 Model saved locally to: {target_model_path}")
             
             drive_voice_dir = os.path.join(DRIVE_RVC_DIR, PERSON_NAME)
             if not os.path.exists(drive_voice_dir):
                 os.makedirs(drive_voice_dir)
             shutil.copy(target_model_path, os.path.join(drive_voice_dir, f"{PERSON_NAME}.pth"))
             print(f"☁️ Model backed up to Google Drive: {drive_voice_dir}")
             
             # Export Index
             index_files = [f for f in os.listdir(logs_abs_path) if f.endswith('.index') and "added" in f]
             if index_files:
                 latest_index = sorted(index_files)[-1]
                 source_index = os.path.join(logs_abs_path, latest_index)
                 target_index_path = os.path.join(cwd_backup, "models", "finetuned_models", f"{PERSON_NAME}.index")
                 shutil.copy(source_index, target_index_path)
                 shutil.copy(target_index_path, os.path.join(drive_voice_dir, f"{PERSON_NAME}.index"))
                 print(f"☁️ Index backed up to Google Drive: {os.path.join(drive_voice_dir, f'{PERSON_NAME}.index')}")
             else:
                 print("⚠️ No .index file found in logs.")
        else:
             print("❌ No model file generated in weights/. Using MANUAL FALLBACK...")
             
             # --- MANUAL CHECKPOINT CONVERSION FALLBACK ---
             if os.path.exists(logs_abs_path):
                  checkpoints = [f for f in os.listdir(logs_abs_path) if "G_" in f and ".pth" in f]
                  if checkpoints:
                       # Find latest checkpoint by number
                       # G_0.pth, G_100.pth, etc.
                       def get_step(name):
                           try:
                               return int(re.search(r"G_(\d+)", name).group(1))
                           except:
                               return 0
                       latest_ckpt = sorted(checkpoints, key=get_step)[-1]
                       latest_ckpt_path = os.path.join(logs_abs_path, latest_ckpt)
                       print(f"⚠️ Found raw checkpoint: {latest_ckpt_path}. Converting manually...")
                       
                       try:
                           # Load checkpoint
                           print("... Loading checkpoint (CPU)")
                           ckpt = torch.load(latest_ckpt_path, map_location="cpu")
                           opt = OrderedDict()
                           opt["weight"] = {}
                           if "model" in ckpt:
                               ckpt_model = ckpt["model"]
                           else:
                               ckpt_model = ckpt
                           
                           # Extract weights
                           for key in ckpt_model.keys():
                               if "enc_q" in key:
                                   continue
                               opt["weight"][key] = ckpt_model[key].half()
                           
                           # Add Config (Synthesized 40k)
                           opt["config"] = [
                                1025, 32, 192, 192, 768, 2, 6, 3, 0, "1",
                                [3, 7, 11], [[1, 3, 5], [1, 3, 5], [1, 3, 5]],
                                [10, 10, 2, 2], 512, [20, 20, 4, 4], 109, 256, 40000
                           ]
                           opt["info"] = "ManualConversion_Epoch50"
                           opt["sr"] = "40k"
                           opt["f0"] = 1 # rmvpe used
                           opt["version"] = "v2"
                           
                           # Save
                           target_model_path = os.path.join(cwd_backup, "models", "finetuned_models", f"{PERSON_NAME}.pth")
                           os.makedirs(os.path.join(cwd_backup, "models", "finetuned_models"), exist_ok=True)
                           torch.save(opt, target_model_path)
                           print(f"🏆 Saved manually converted model to: {target_model_path}")
                           
                           # Backup
                           drive_voice_dir = os.path.join(DRIVE_RVC_DIR, PERSON_NAME)
                           if not os.path.exists(drive_voice_dir):
                               os.makedirs(drive_voice_dir)
                           shutil.copy(target_model_path, os.path.join(drive_voice_dir, f"{PERSON_NAME}.pth"))
                           print(f"☁️ Model backed up to Google Drive: {drive_voice_dir}")
                           
                           # Export Index
                           index_files = [f for f in os.listdir(logs_abs_path) if f.endswith('.index') and "added" in f]
                           if index_files:
                               latest_index = sorted(index_files)[-1]
                               source_index = os.path.join(logs_abs_path, latest_index)
                               target_index_path = os.path.join(cwd_backup, "models", "finetuned_models", f"{PERSON_NAME}.index")
                               shutil.copy(source_index, target_index_path)
                               shutil.copy(target_index_path, os.path.join(drive_voice_dir, f"{PERSON_NAME}.index"))
                               print(f"☁️ Index backed up to Google Drive: {os.path.join(drive_voice_dir, f'{PERSON_NAME}.index')}")
                           else:
                               print("⚠️ No .index file found in logs.")
                           
                       except Exception as e:
                           print(f"❌ Manual conversion failed: {e}")
                           import traceback
                           traceback.print_exc()
                  else:
                       print(f"❌ CRITICAL: No G_*.pth checkpoints found in {logs_abs_path}!")
                       print("This implies the training loop did not save ANY checkpoints. Check epoch count vs save_interval.")
        
    except Exception as e:
        print(f"❌ Training failed with error: {e}")
    finally:
        os.chdir(cwd_backup)


## 🎭 Step 5: Voice Conversion

Convert audio using any trained (or loaded) voice.

In [None]:
#@title 🎭 Step 5: Voice Conversion
import sys
import os
from google.colab import files

# Add src to path for imports
sys.path.append(os.path.join(os.getcwd(), "src"))

from core.inference.converter import VoiceConverter
from utils.registry import VoiceRegistry, discover_voices
from utils.device import get_device

MODELS_DIR = "models/finetuned_models"
device = get_device()

# List available voices 
available_voices = discover_voices(models_dir=MODELS_DIR)
print(f"Available voices: {available_voices}")

if not available_voices:
    print("❌ No trained voices found. Please train a voice first.")
else:
    print("\n📂 Please upload your Source Audio file (wav/mp3)...")
    uploaded = files.upload()
    if uploaded:
        SOURCE_AUDIO = list(uploaded.keys())[0]
        print(f"   ✅ Source Audio: {SOURCE_AUDIO}")
        
        # Interactive Model Selection
        print("\nSelect a voice model:")
        for idx, v in enumerate(available_voices):
            print(f"{idx}: {v}")
        
        try:
            selection = int(input("Enter the number of the voice model: "))
            TARGET_VOICE = available_voices[selection]
        except (ValueError, IndexError):
            print("⚠️ Invalid input, defaulting to first model.")
            TARGET_VOICE = available_voices[0]

        # Use person-specific output folder
        OUTPUT_DIR = os.path.join("data/outputs", TARGET_VOICE)
        os.makedirs(OUTPUT_DIR, exist_ok=True)
        OUTPUT_PATH = os.path.join(OUTPUT_DIR, f"converted_{TARGET_VOICE}.wav")
        
        print(f"🚀 Converting '{SOURCE_AUDIO}' using '{TARGET_VOICE}'...")
        registry = VoiceRegistry(models_dir=MODELS_DIR)
        model_path = registry.get_model_path(TARGET_VOICE)
        
        if model_path:
            converter = VoiceConverter(model_path, device=device)
            try:
                # Auto-detect index
                index_path = None
                model_folder = os.path.dirname(model_path)
                import glob
                idx_files = glob.glob(os.path.join(model_folder, "*.index"))
                if idx_files:
                    index_path = idx_files[0]
                    print(f"   ℹ️ Using index file: {os.path.basename(index_path)}")
                
                converter.convert(
                    source_audio_path=SOURCE_AUDIO, 
                    output_path=OUTPUT_PATH, 
                    pitch_shift=0.0,
                    index_path=index_path
                )
                print(f"✅ Conversion completed! Output saved to: {OUTPUT_PATH}")
                
                # Auto-download
                files.download(OUTPUT_PATH)
            except Exception as e:
                print(f"❌ Conversion failed: {e}")
        else:
            print(f"❌ Could not find model path for {TARGET_VOICE}")
