<a href="https://www.kaggle.com/code/djkruger/arlecchino-training?scriptVersionId=237059969" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [2]:
# Verify Structure:
import os
arlecchino_voice_data_path = kagglehub.dataset_download('djkruger/arlecchino-voice-data')

dataset_path = "../input/djkruger/arlecchino-voice-data"  # Adjust charactername
wavs_path = os.path.join(dataset_path, "/kaggle/input/arlecchino-voice-data/wavs")
metadata_path = os.path.join(dataset_path, "/kaggle/input/arlecchino-voice-data/metadata.csv")
print(f"WAVs found: {len(os.listdir(wavs_path))}")
print(f"Metadata exists: {os.path.exists(metadata_path)}")

NameError: name 'kagglehub' is not defined

In [1]:
import os
import subprocess
import time
import shutil
import csv
import numpy as np
import librosa
import soundfile as sf
import json
import traceback
from multiprocessing import Process, Queue, Pool, cpu_count
import select
import pty
import random
import queue
import uuid
import torch

# --- Initial User Reminders ---
print("="*60)
print("IMPORTANT KAGGLE SETTINGS:")
print("- Ensure 'Accelerator' is set to GPU (P100 or T4 x2 recommended).")
print("- Ensure 'Internet' is turned ON in the Settings panel.")
print("="*60)

# Step 1: Set Up Environment
cached_env_dir = "/kaggle/input/rvc-dependencies"
env_yml = "/kaggle/working/environment.yml"
cudnn_dir = "/kaggle/working/cudnn"
cupti_dir = "/kaggle/working/cupti"

# Enhanced disk space check
print("Checking disk space...")
disk_space = subprocess.run(["df", "-h", "/kaggle/working"], capture_output=True, text=True)
if "Avail" in disk_space.stdout:
    avail_space_line = [line for line in disk_space.stdout.splitlines() if "/kaggle/working" in line]
    if avail_space_line:
        avail_space = avail_space_line[0]
        try:
            avail_str = avail_space.split()[3]
            unit = avail_str[-1].upper()
            value = float(avail_str[:-1])
            if unit == 'T': value *= 1024
            elif unit == 'M': value /= 1024
            elif unit == 'K': value /= (1024 * 1024)
            print(f"Available space: {value:.2f} GB")
            if value < 5:
                raise RuntimeError(f"Insufficient disk space: {value:.2f}GB available, need ~5GB")
        except (IndexError, ValueError) as e:
            print(f"Warning: Could not parse available disk space: {e}. Proceeding cautiously.")
    else:
        print("Warning: Could not find /kaggle/working in df output. Disk space check skipped.")

# Create environment.yml
env_content = """
name: rvc
channels:
  - conda-forge
  - pytorch
dependencies:
  - python=3.11
  - pip
  - pytorch=2.3.1
  - torchaudio=2.3.1
  - torchvision=0.18.1
  - cudatoolkit=11.8
  - librosa=0.9.2
  - soundfile=0.12.1
  - scipy=1.11.4
  - faiss-cpu=1.7.4
  - fairseq=0.12.2
  - gradio=3.14.0
  - gradio-client=0.0.2
  - fastapi=0.112.2
  - uvicorn=0.30.6
  - pydantic=2.8.2
  - starlette=0.38.2
  - httpx=0.27.0
  - websockets=12.0
  - python-multipart=0.0.9
  - ffmpeg-python=0.2.0
  - python-dotenv=1.0.1
  - praat-parselmouth=0.4.3
  - numpy=1.23.5
  - numba=0.57.0
  - pyworld=0.3.4
  - pytorch-lightning=2.0.9
  - joblib=1.4.2
  - tensorboard=2.15.2
  - transformers=4.28.1
  - wandb=0.15.12
  - openai-whisper=20231117
  - matplotlib=3.7.5
  - scikit-learn=1.3.2
  - resampy=0.4.3
  - tqdm=4.66.5
  - einops=0.8.0
  - tensorboardX=2.6.2.2
  - pyngrok=7.2.0
  - pip:
      - wheel
"""
with open(env_yml, "w") as f:
    f.write(env_content)

# Install cuDNN
print("Installing cuDNN for CUDA 11.8...")
try:
    !apt-get update -qq
    !apt-get install -y --allow-downgrades libcudnn8=8.9.7.29-1+cuda11.8 libcudnn8-dev=8.9.7.29-1+cuda11.8
    !ldconfig
except subprocess.CalledProcessError as e:
    print(f"apt-get install failed: {e.stderr}; downloading cuDNN manually...")
    os.makedirs(cudnn_dir, exist_ok=True)
    cudnn_url = "https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-8.9.7.29_cuda11-archive.tar.xz"
    cudnn_tar = f"{cudnn_dir}/cudnn.tar.xz"
    !wget -q {cudnn_url} -O {cudnn_tar}
    !tar -xJf {cudnn_tar} -C {cudnn_dir} --strip-components=1
    !cp -P {cudnn_dir}/lib/* /usr/lib/x86_64-linux-gnu/
    !cp {cudnn_dir}/include/* /usr/include/
    !ldconfig
    !ls -l /usr/lib/x86_64-linux-gnu/libcudnn.so.8*

# Remove residual libcupti files
print("Checking for residual libcupti files...")
potential_paths = [
    "/lib/x86_64-linux-gnu/libcupti.so", "/usr/lib/x86_64-linux-gnu/libcupti.so",
    "/lib/x86_64-linux-gnu/libcupti.so.11.8", "/usr/lib/x86_64-linux-gnu/libcupti.so.11.8",
    "/lib/x86_64-linux-gnu/libcupti.so.12", "/usr/lib/x86_64-linux-gnu/libcupti.so.12",
]
removed_flag = False
for path in potential_paths:
    if os.path.exists(path) or os.path.islink(path):
        !rm -f {path}
        print(f"Removed residual {path}")
        removed_flag = True
if removed_flag:
    !ldconfig

# Install libcupti
print("Installing libcupti for CUDA 11.8...")
os.makedirs(cupti_dir, exist_ok=True)
cupti_url = "https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-cupti-11-8_11.8.87-1_amd64.deb"
cupti_deb = f"{cupti_dir}/cuda-cupti-11-8.deb"
cupti_install_dir = f"{cupti_dir}/cuda_install"
try:
    !wget -q {cupti_url} -O {cupti_deb}
    !dpkg -x {cupti_deb} {cupti_install_dir}
    cupti_so_path = f"{cupti_install_dir}/usr/local/cuda-11.8/extras/CUPTI/lib64/libcupti.so.11.8"
    target_lib_path = "/usr/lib/x86_64-linux-gnu/libcupti.so.11.8"
    target_link_path = "/usr/lib/x86_64-linux-gnu/libcupti.so"
    if os.path.exists(cupti_so_path):
        !cp {cupti_so_path} {target_lib_path}
        !ln -sf {target_lib_path} {target_link_path}
        !ldconfig
        !ls -l {target_lib_path} {target_link_path}
    else:
        raise FileNotFoundError(f"libcupti.so.11.8 not found at {cupti_so_path}")
except Exception as e:
    print(f"CUPTI .deb install failed: {e}. Attempting CUDA Toolkit...")
    toolkit_url = "https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run"
    toolkit_run = f"{cupti_dir}/cuda_11.8.0.run"
    toolkit_install_path = f"{cupti_dir}/cuda_toolkit_install"
    !wget -q {toolkit_url} -O {toolkit_run}
    !chmod +x {toolkit_run}
    !sh {toolkit_run} --silent --toolkit --toolkitpath={toolkit_install_path}
    cupti_so_path = f"{toolkit_install_path}/extras/CUPTI/lib64/libcupti.so.11.8"
    if os.path.exists(cupti_so_path):
        !cp {cupti_so_path} {target_lib_path}
        !ln -sf {target_lib_path} {target_link_path}
        !ldconfig
        !ls -l {target_lib_path} {target_link_path}
    else:
        shutil.rmtree(cupti_dir, ignore_errors=True)
        raise RuntimeError(f"Failed to find libcupti.so.11.8 at {cupti_so_path}")

# Set LD_LIBRARY_PATH
standard_paths = "/usr/lib/x86_64-linux-gnu:/lib/x86_64-linux-gnu"
current_ld_path = os.environ.get('LD_LIBRARY_PATH', '')
os.environ["LD_LIBRARY_PATH"] = f"{standard_paths}:{current_ld_path}" if current_ld_path else standard_paths
print(f"LD_LIBRARY_PATH: {os.environ['LD_LIBRARY_PATH']}")

# Verify installations
print("\n--- Verifying Installations ---")
!nvcc --version
!nvidia-smi
!find /usr/lib /lib -name 'libcudnn.so.8*' -ls 2>/dev/null || echo "libcudnn.so.8 not found"
!find /usr/lib /lib -name 'libcupti.so.11.8*' -ls 2>/dev/null || echo "libcupti.so.11.8 not found"
!ldconfig -p | grep libcupti || echo "libcupti not found in linker cache"
if os.path.exists("/usr/lib/x86_64-linux-gnu/libcupti.so.11.8"):
    !readelf -V /usr/lib/x86_64-linux-gnu/libcupti.so.11.8 | grep -i version || echo "Could not read version info"

# Test torch import
print("\n--- Testing torch import and CUDA ---")
try:
    import torch
    print(f"Torch version: {torch.__version__}")
    cuda_available = torch.cuda.is_available()
    print(f"Torch CUDA available: {cuda_available}")
    if cuda_available:
        print(f"CUDA version: {torch.version.cuda}")
        print(f"cuDNN version: {torch.backends.cudnn.version()}")
        num_gpus = torch.cuda.device_count()
        print(f"GPUs: {num_gpus}")
        for i in range(num_gpus):
            print(f"  GPU {i}: {torch.cuda.get_device_name(i)}")
    else:
        raise RuntimeError("CUDA not available. Ensure GPU is enabled.")
except ImportError as e:
    print(f"FATAL: Torch import failed: {e}")
    !ldconfig -p | grep libcupti
    !find /lib /usr -name 'libcupti.so*' -ls
    raise

# Detect GPU configuration
gpu_count = torch.cuda.device_count()
gpu_names = [torch.cuda.get_device_name(i) for i in range(gpu_count)]
is_p100 = any("Tesla P100" in name for name in gpu_names)
is_t4_x2 = all("Tesla T4" in name for name in gpu_names) and gpu_count == 2
print(f"\n--- GPU Configuration ---")
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"GPU count: {gpu_count}")
for i, name in enumerate(gpu_names):
    print(f"GPU {i}: {name}, VRAM: {torch.cuda.get_device_properties(i).total_memory / 1024**3:.2f} GB")
print("Detected:", "Tesla P100" if is_p100 else "T4 x2" if is_t4_x2 else f"Unknown ({gpu_names})")
!nvidia-smi

# Install dependencies
if os.path.exists(cached_env_dir) and os.listdir(cached_env_dir):
    print(f"Using cached dependencies from {cached_env_dir}")
    !cp -rT {cached_env_dir} /usr/local/lib/python3.11/dist-packages/
    !ldconfig
else:
    print("Installing dependencies with Mamba...")
    !command -v mamba >/dev/null 2>&1 || conda install -y mamba -n base -c conda-forge
    !mamba env update -n base -f {env_yml} --prune
    !apt-get update -qq
    !apt-get install -y --no-install-recommends \
        build-essential g++ \
        libopenblas-dev \
        unzip cmake \
        libvorbis-dev libmpg123-dev libsndfile1-dev \
        swig libomp-dev aria2 git
    !apt-get clean && rm -rf /var/lib/apt/lists/*

# Verify packages
print("\n--- Verifying packages ---")
python_executable = "/opt/conda/bin/python"
!{python_executable} -c "import torch; print(f'Torch: {torch.__version__}')"
!{python_executable} -c "import librosa; print(f'Librosa: {librosa.__version__}')"
!{python_executable} -c "import soundfile; print(f'Soundfile: {soundfile.__version__}')"
!{python_executable} -c "import fairseq; print(f'Fairseq: {fairseq.__version__}')"
!{python_executable} -c "import gradio; print(f'Gradio: {gradio.__version__}')"
!{python_executable} -c "import transformers; print(f'Transformers: {transformers.__version__}')"
!{python_executable} -c "import whisper; print(f'Whisper: {whisper.__version__}')"
!aria2c --version | head -n 1
!ffmpeg -version | head -n 1

# Step 2: Install VGMStream
print("\n--- Step 2: Install VGMStream ---")
tools_dir = "/kaggle/working/tools"
vgmstream_dir = f"{tools_dir}/vgmstream_cli_extracted"
os.makedirs(vgmstream_dir, exist_ok=True)
vgmstream_zip = f"{tools_dir}/vgmstream-linux-cli.zip"
vgmstream_url = "https://github.com/vgmstream/vgmstream/releases/download/r1980/vgmstream-linux-cli.zip"
vgmstream_cli = f"{vgmstream_dir}/vgmstream-cli"

if os.path.exists(vgmstream_cli):
    print(f"VGMStream CLI exists at {vgmstream_cli}, skipping download.")
else:
    if os.path.exists(vgmstream_zip) and os.path.getsize(vgmstream_zip) > 0:
        print(f"VGMStream ZIP exists: {vgmstream_zip}")
    else:
        for attempt in range(3):
            print(f"Downloading VGMStream (attempt {attempt + 1}/3)...")
            try:
                !wget --tries=2 --timeout=30 -q -O {vgmstream_zip} {vgmstream_url}
                print("Downloaded VGMStream ZIP")
                break
            except subprocess.CalledProcessError as e:
                print(f"Download failed: {e.stderr}")
                if attempt < 2:
                    time.sleep(5)
                else:
                    vgmstream_zip = None

    if vgmstream_zip and os.path.exists(vgmstream_zip):
        print(f"Unzipping {vgmstream_zip}...")
        if os.path.exists(vgmstream_dir):
            shutil.rmtree(vgmstream_dir)
        os.makedirs(vgmstream_dir)
        !unzip -o {vgmstream_zip} -d {vgmstream_dir}
        if os.path.exists(vgmstream_cli):
            print(f"Found vgmstream-cli at: {vgmstream_cli}")
            os.remove(vgmstream_zip)
            print(f"Deleted {vgmstream_zip}")
        else:
            print(f"Warning: vgmstream-cli not found in {vgmstream_dir}")
            vgmstream_cli = None

if vgmstream_cli and os.path.exists(vgmstream_cli):
    try:
        !chmod +x {vgmstream_cli}
        result = subprocess.run([vgmstream_cli, "--version"], capture_output=True, text=True, check=True)
        print(f"vgmstream-cli version: {result.stdout.strip()}")
    except Exception as e:
        print(f"VGMStream verification failed: {e}")
        vgmstream_cli = None
else:
    print("Attempting to build VGMStream from source...")
    vgmstream_src_dir = f"{tools_dir}/vgmstream_src"
    build_dir = f"{vgmstream_src_dir}/build"
    final_cli_path = f"{build_dir}/vgmstream-cli"
    try:
        !apt-get update -qq
        !apt-get install -y libvorbis-dev libmpg123-dev libsndfile1-dev
        if not os.path.exists(vgmstream_src_dir):
            !git clone --depth 1 https://github.com/vgmstream/vgmstream.git {vgmstream_src_dir}
        os.makedirs(build_dir, exist_ok=True)
        !cmake .. -B {build_dir}
        !make -C {build_dir} -j{cpu_count()}
        if os.path.exists(final_cli_path):
            vgmstream_cli = final_cli_path
            !chmod +x {vgmstream_cli}
            result = subprocess.run([vgmstream_cli, "--version"], capture_output=True, text=True, check=True)
            print(f"VGMStream built: {result.stdout.strip()}")
        else:
            raise FileNotFoundError(f"Build failed: {final_cli_path} not found")
    except Exception as e:
        print(f"Failed to build VGMStream: {e}")
        vgmstream_cli = None

# Fallback to ww2ogg
ww2ogg_available = False
if not vgmstream_cli:
    print("Trying ww2ogg as fallback...")
    try:
        !pip install --upgrade pip
        !pip install ww2ogg==0.9.0
        import ww2ogg
        print("ww2ogg installed")
        ww2ogg_available = True
    except Exception as e:
        print(f"Failed to install ww2ogg: {e}")
else:
    print("Using VGMStream for WEM conversion")

# Step 3: Install and Configure Ngrok
print("\n--- Step 3: Install Ngrok ---")
try:
    !pip install --upgrade pyngrok==7.2.0
    from pyngrok import ngrok
    print("pyngrok installed")

    NGROK_TOKEN = None
    try:
        from kaggle_secrets import UserSecretsClient
        NGROK_TOKEN = UserSecretsClient().get_secret("NGROK_AUTH_TOKEN")
        print("Retrieved Ngrok token from Kaggle Secrets")
    except Exception as e:
        print(f"Could not retrieve Ngrok token: {e}")
        print("Set NGROK_TOKEN manually or add it as Kaggle Secret 'NGROK_AUTH_TOKEN'")

    if not NGROK_TOKEN:
        print("Ngrok token not set. Falling back to Pinggy.")
    else:
        try:
            ngrok.set_auth_token(NGROK_TOKEN)
            print("Ngrok authtoken configured")
        except Exception as e:
            print(f"Ngrok configuration failed: {e}. Falling back to Pinggy.")
            NGROK_TOKEN = None
except Exception as e:
    print(f"Failed to install pyngrok: {e}")
    NGROK_TOKEN = None

# Step 4: Clone RVC Repositories and Download Models
print("\n--- Step 4: Clone RVC Repositories ---")
os.chdir("/kaggle/working")
print(f"Current directory: {os.getcwd()}")

def check_connectivity(url, service_name):
    print(f"Testing {service_name} connectivity ({url})...")
    try:
        result = subprocess.run(["curl", "-I", "--connect-timeout", "10", url], capture_output=True, text=True, check=True)
        if "HTTP/2 200" in result.stdout or "HTTP/1.1 200" in result.stdout:
            print(f"{service_name} connectivity verified")
            return True
        print(f"Warning: Unexpected response:\n{result.stdout}")
        return False
    except Exception as e:
        print(f"Warning: {service_name} connectivity test failed: {e}")
        return False

check_connectivity("https://github.com", "GitHub")
check_connectivity("https://huggingface.co", "Hugging Face")

rvc_repo_url = "https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI.git"
rvc_dir = "/kaggle/working/RVC"
webui_repo_url = "https://github.com/ddPn08/rvc-webui.git"
webui_dir = "/kaggle/working/rvc-webui"

def clone_repo(repo_url, target_dir, max_retries=3):
    print(f"Cloning {repo_url} into {target_dir}...")
    if os.path.exists(target_dir):
        shutil.rmtree(target_dir)
    for attempt in range(max_retries):
        print(f"Clone attempt {attempt + 1}/{max_retries}...")
        try:
            result = subprocess.run(["git", "clone", "--depth", "1", repo_url, target_dir], check=True, capture_output=True, text=True, timeout=120)
            print(f"Cloned {os.path.basename(target_dir)}")
            if target_dir == rvc_dir and not os.path.exists(os.path.join(rvc_dir, "infer", "modules", "train")):
                raise FileNotFoundError("Key directory 'infer/modules/train' missing")
            if target_dir == webui_dir and not any(os.path.exists(os.path.join(webui_dir, f)) for f in ["launch.py", "app.py", "webui.py", "main.py"]):
                raise FileNotFoundError("No entry point found")
            return True
        except Exception as e:
            print(f"Clone failed: {e}")
            if os.path.exists(target_dir):
                shutil.rmtree(target_dir, ignore_errors=True)
            if attempt < max_retries - 1:
                time.sleep(5 * (2 ** attempt))
            else:
                return False

if not clone_repo(rvc_repo_url, rvc_dir):
    raise RuntimeError(f"Failed to clone RVC repository")
if not clone_repo(webui_repo_url, webui_dir):
    raise RuntimeError(f"Failed to clone WebUI fork")

# Download pretrained models
print("\n--- Downloading Pretrained Models ---")
pretrained_v2_dir = os.path.join(rvc_dir, "pretrained_v2")
uvr5_weights_dir = os.path.join(rvc_dir, "uvr5_weights")
weights_dir = os.path.join(rvc_dir, "weights")
hubert_path = os.path.join(rvc_dir, "hubert_base.pt")
os.makedirs(pretrained_v2_dir, exist_ok=True)
os.makedirs(uvr5_weights_dir, exist_ok=True)
os.makedirs(weights_dir, exist_ok=True)

model_urls = [
    ("https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0G40k.pth", os.path.join(pretrained_v2_dir, "f0G40k.pth"), 150),
    ("https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0D40k.pth", os.path.join(pretrained_v2_dir, "f0D40k.pth"), 150),
    ("https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP5-主旋律人声vocals+其他instrumentals.pth", os.path.join(uvr5_weights_dir, "HP5-主旋律人声vocals+其他instrumentals.pth"), 150),
    ("https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt", hubert_path, 350),
    ("https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/rmvpe.pt", os.path.join(weights_dir, "rmvpe.pt"), 5)
]

def download_file_robust(url, output_path, estimated_size_mb, max_aria_retries=2, max_wget_retries=2):
    filename = os.path.basename(output_path)
    output_dir = os.path.dirname(output_path)
    if os.path.exists(output_path) and os.path.getsize(output_path) > 0:
        print(f"Model exists: {filename}, skipping download")
        return True
    print(f"Checking disk space for {filename} (~{estimated_size_mb}MB)...")
    try:
        stat = os.statvfs('/kaggle/working')
        available_mb = (stat.f_bavail * stat.f_frsize) / (1024 * 1024)
        if available_mb < estimated_size_mb * 1.1:
            print(f"Error: Insufficient disk space: {available_mb:.1f}MB available, need ~{estimated_size_mb * 1.1:.1f}MB")
            return False
    except Exception as e:
        print(f"Warning: Could not check disk space: {e}. Proceeding cautiously.")

    aria2c_available = shutil.which("aria2c") is not None
    if aria2c_available:
        for attempt in range(max_aria_retries):
            print(f"Downloading {filename} with aria2c (attempt {attempt + 1}/{max_aria_retries})...")
            try:
                cmd = ["aria2c", "--console-log-level=warn", "-c", "-x", "4", "-s", "4", "-k", "1M", "--connect-timeout=15", "--timeout=60", "--dir", output_dir, "-o", filename, url]
                result = subprocess.run(cmd, check=True, capture_output=True, text=True, timeout=300)  # Increased timeout
                print(f"aria2c successful for {filename}")
                if os.path.exists(output_path) and os.path.getsize(output_path) > 0:
                    return True
                print(f"aria2c ran but {filename} is missing or empty")
                if os.path.exists(output_path):
                    os.remove(output_path)
            except subprocess.TimeoutExpired:
                print(f"aria2c timed out after 300s")
                if os.path.exists(output_path):
                    os.remove(output_path)
            except KeyboardInterrupt:
                print(f"KeyboardInterrupt during aria2c for {filename}. Cleaning up...")
                if os.path.exists(output_path):
                    os.remove(output_path)
                return False
            except Exception as e:
                print(f"aria2c failed: {e}")
                if os.path.exists(output_path):
                    os.remove(output_path)
            if attempt < max_aria_retries - 1:
                print("Retrying after delay...")
                time.sleep(10 * (2 ** attempt))  # Increased delay
    
    print(f"Falling back to wget for {filename}...")
    wget_available = shutil.which("wget") is not None
    if not wget_available:
        print("Error: wget not found")
        return False
    for attempt in range(max_wget_retries):
        print(f"Downloading {filename} with wget (attempt {attempt + 1}/{max_wget_retries})...")
        try:
            cmd = ["wget", "--continue", "--tries=2", "--timeout=60", "-O", output_path, url]
            result = subprocess.run(cmd, check=True, capture_output=True, text=True, timeout=300)  # Increased timeout
            print(f"wget successful for {filename}")
            if os.path.exists(output_path) and os.path.getsize(output_path) > 0:
                return True
            print(f"wget ran but {filename} is missing or empty")
            if os.path.exists(output_path):
                os.remove(output_path)
        except subprocess.TimeoutExpired:
            print(f"wget timed out after 300s")
            if os.path.exists(output_path):
                os.remove(output_path)
        except KeyboardInterrupt:
            print(f"KeyboardInterrupt during wget for {filename}. Cleaning up...")
            if os.path.exists(output_path):
                os.remove(output_path)
            return False
        except Exception as e:
            print(f"wget failed: {e}")
            if os.path.exists(output_path):
                os.remove(output_path)
        if attempt < max_wget_retries - 1:
            print("Retrying after delay...")
            time.sleep(10 * (2 ** attempt))  # Increased delay
    print(f"Failed to download {filename}")
    return False

all_models_downloaded = True
for url, path, size_mb in model_urls:
    if not download_file_robust(url, path, size_mb):
        all_models_downloaded = False
        print(f"CRITICAL: Failed to download {os.path.basename(path)}")
if not all_models_downloaded:
    missing_models = [path for _, path, _ in model_urls if not (os.path.exists(path) and os.path.getsize(path) > 0)]
    raise FileNotFoundError(f"Missing models: {missing_models}")
print("All pretrained models downloaded or present")

print("\nVerifying models:")
!ls -lh {rvc_dir}/pretrained_v2/*.pth || echo "No pth files in pretrained_v2"
!ls -lh {rvc_dir}/uvr5_weights/*.pth || echo "No pth files in uvr5_weights"
!ls -lh {rvc_dir}/hubert_base.pt || echo "hubert_base.pt not found"
!ls -lh {rvc_dir}/weights/rmvpe.pt || echo "rmvpe.pt not found"

# Step 5: Set Up Dataset
print("\n--- Step 5: Set Up Dataset ---")
dataset_root = "/kaggle/input/arlecchino-voice-data"
output_path = "/kaggle/working/processed_dataset"
temp_wav_path = "/kaggle/working/temp_wavs"
output_wav_dir = os.path.join(output_path, "wavs")
metadata_dst = os.path.join(output_path, "metadata.csv")
metadata_train_dst = os.path.join(output_path, "metadata_train.csv")
metadata_val_dst = os.path.join(output_path, "metadata_val.csv")
TARGET_SR = 44100
SPEAKER_NAME = "Arlecchino"
MIN_FILES_FOR_CACHE = 500
VALIDATION_SPLIT_RATIO = 0.05
MIN_VAL_SAMPLES = 5

use_cached_dataset = False
if os.path.exists(output_wav_dir) and os.path.exists(metadata_train_dst) and os.path.exists(metadata_val_dst):
    try:
        wav_count = len([f for f in os.listdir(output_wav_dir) if f.lower().endswith(".wav")])
        train_count = sum(1 for _ in open(metadata_train_dst, "r", encoding='utf-8') if _.strip())
        val_count = sum(1 for _ in open(metadata_val_dst, "r", encoding='utf-8') if _.strip())
        print(f"Cached dataset: {wav_count} WAVs, {train_count} train, {val_count} val")
        if wav_count >= MIN_FILES_FOR_CACHE and (train_count + val_count) >= MIN_FILES_FOR_CACHE * 0.95:
            print("Using cached dataset")
            use_cached_dataset = True
            !ls -l {output_wav_dir} | head -n 5
            !echo "Total files:" $(ls -1 {output_wav_dir} | wc -l)
            !echo "Train lines:" $(wc -l < {metadata_train_dst})
            !echo "Val lines:" $(wc -l < {metadata_val_dst})
            !echo "Train head:"; !head -n 3 {metadata_train_dst}
            !echo "Val head:"; !head -n 3 {metadata_val_dst}
        else:
            print("Cached dataset incomplete. Reprocessing...")
            shutil.rmtree(output_path, ignore_errors=True)
    except Exception as e:
        print(f"Error checking cache: {e}. Reprocessing...")
        shutil.rmtree(output_path, ignore_errors=True)
else:
    print("No complete cached dataset. Processing...")

def convert_wem_file(args):
    input_file, output_file, tool_path_or_flag = args
    filename = os.path.basename(input_file)
    if os.path.exists(output_file) and os.path.getsize(output_file) > 0:
        return (input_file, None)
    try:
        if tool_path_or_flag == "ww2ogg":
            from ww2ogg import convert
            convert.convert_file(input_file, output_file)
        elif tool_path_or_flag and os.path.exists(tool_path_or_flag):
            cmd = [tool_path_or_flag, "-o", output_file, input_file]
            subprocess.run(cmd, check=True, capture_output=True, text=True, timeout=60)
        else:
            return (input_file, "No valid conversion tool")
        if not os.path.exists(output_file) or os.path.getsize(output_file) == 0:
            return (input_file, "Conversion ran but output is missing or empty")
        return (input_file, None)
    except Exception as e:
        return (input_file, f"Conversion failed: {e}")

def convert_wem_to_wav_parallel(input_dir, output_dir, conversion_tool):
    os.makedirs(output_dir, exist_ok=True)
    input_files_args = []
    import glob
    wem_patterns = [os.path.join(input_dir, "*.wem"), os.path.join(input_dir, "*.wem.wav")]
    found_files = []
    for pattern in wem_patterns:
        found_files.extend(glob.glob(pattern))
    if not found_files:
        print(f"Warning: No WEM files in {input_dir}")
        return []
    for input_path in found_files:
        f = os.path.basename(input_path)
        base, _ = os.path.splitext(f)
        if base.lower().endswith(".wem"):
            base, _ = os.path.splitext(base)
        output_filename = f"{base}.wav"
        output_path = os.path.join(output_dir, output_filename)
        input_files_args.append((input_path, output_path, conversion_tool))
    if not input_files_args:
        print(f"Warning: No WEM files identified")
        return []
    print(f"Converting {len(input_files_args)} WEM files with {cpu_count()} processes...")
    failed_files = []
    successful_count = 0
    with Pool(processes=cpu_count()) as pool:
        results = pool.map(convert_wem_file, input_files_args)
    for input_f, error in results:
        if error:
            failed_files.append((os.path.basename(input_f), error))
        else:
            successful_count += 1
    print(f"Conversion complete. Successful: {successful_count}, Failed: {len(failed_files)}")
    if failed_files:
        print("--- Conversion Failures ---")
        for fname, err in failed_files[:10]:
            print(f"  File: {fname}, Error: {err}")
        if len(failed_files) > 10:
            print(f"  ... and {len(failed_files) - 10} more")
    return failed_files

def preprocess_audio_file(args):
    input_file, output_file, target_sr = args
    filename = os.path.basename(input_file)
    if os.path.exists(output_file) and os.path.getsize(output_file) > 0:
        return (input_file, None)
    try:
        y, sr = librosa.load(input_file, sr=target_sr, mono=True)
        if len(y) == 0:
            return (input_file, "Empty audio")
        max_amp = np.max(np.abs(y))
        if max_amp > 0:
            y = y / max_amp * 0.95
        else:
            y = np.zeros_like(y)
        sf.write(output_file, y, target_sr, subtype='PCM_16')
        return (input_file, None)
    except Exception as e:
        return (input_file, f"Preprocessing error: {e}")

def preprocess_audio_parallel(input_dir, output_dir, target_sr):
    os.makedirs(output_dir, exist_ok=True)
    files_to_process = []
    import glob
    wav_pattern = os.path.join(input_dir, "*.wav")
    found_wavs = glob.glob(wav_pattern)
    if not found_wavs:
        print(f"Warning: No WAV files in {wav_pattern}")
        return []
    for input_path in found_wavs:
        f = os.path.basename(input_path)
        output_path = os.path.join(output_dir, f)
        files_to_process.append((input_path, output_path, target_sr))
    print(f"Preprocessing {len(files_to_process)} WAV files with {cpu_count()} processes...")
    failed_files = []
    successful_count = 0
    with Pool(processes=cpu_count()) as pool:
        results = pool.map(preprocess_audio_file, files_to_process)
    for input_f, error in results:
        if error:
            failed_files.append((os.path.basename(input_f), error))
        else:
            successful_count += 1
    print(f"Preprocessing complete. Successful: {successful_count}, Failed: {len(failed_files)}")
    if failed_files:
        print("--- Preprocessing Failures ---")
        for fname, err in failed_files[:10]:
            print(f"  File: {fname}, Error: {err}")
        if len(failed_files) > 10:
            print(f"  ... and {len(failed_files) - 10} more")
    return failed_files

def rename_potential_duplicates(dataset_dir):
    if shutil.which("rename") is None:
        print("Warning: 'rename' utility not found")
        return
    print("Checking for duplicate files...")
    try:
        cmd = ["rename", "-v", "s/(\\.\\w+)~(\\d*)$/_$2$1/", f"{dataset_dir}/*.*~*"]
        result = subprocess.run(cmd, capture_output=True, text=True)
        if result.returncode == 0 and result.stdout.strip():
            print("Renamed duplicates:\n", result.stdout)
        else:
            print("No duplicates found or renamed")
    except Exception as e:
        print(f"Error during rename: {e}")

if not use_cached_dataset:
    print("\n--- Starting Dataset Processing ---")
    os.makedirs(output_path, exist_ok=True)
    os.makedirs(temp_wav_path, exist_ok=True)
    os.makedirs(output_wav_dir, exist_ok=True)
    input_wem_dir = os.path.join(dataset_root, "wavs")
    if not os.path.isdir(input_wem_dir):
        input_wem_dir = dataset_root
        if not os.path.isdir(input_wem_dir):
            raise FileNotFoundError(f"Input audio directory not found at {input_wem_dir}")
        print(f"Using dataset root {dataset_root} as input")
    conversion_tool_param = "ww2ogg" if ww2ogg_available else vgmstream_cli if vgmstream_cli else None
    if conversion_tool_param:
        print(f"Using {conversion_tool_param} for conversion")
        conversion_failures = convert_wem_to_wav_parallel(input_wem_dir, temp_wav_path, conversion_tool_param)
        if not any(f.lower().endswith(".wav") for f in os.listdir(temp_wav_path)):
            print(f"Warning: No WAV files in {temp_wav_path}")
    else:
        print("Warning: No WEM conversion tool. Assuming WAV input")
        temp_wav_path = input_wem_dir
    preprocessing_failures = preprocess_audio_parallel(temp_wav_path, output_wav_dir, TARGET_SR)
    rename_potential_duplicates(output_wav_dir)
    metadata_src = os.path.join(dataset_root, "metadata.csv")
    print(f"\nLooking for metadata at: {metadata_src}")
    if os.path.exists(metadata_src):
        print("Metadata found. Copying...")
        try:
            shutil.copy(metadata_src, metadata_dst)
            print(f"Copied to {metadata_dst}")
        except Exception as e:
            print(f"Error copying metadata: {e}. Attempting transcription")
            metadata_src = None
    else:
        print("Metadata not found")
        metadata_src = None
    if not os.path.exists(metadata_dst) or os.path.getsize(metadata_dst) == 0:
        print("Generating metadata with Whisper...")
        try:
            import whisper
            print("Loading Whisper model...")
            if torch.cuda.is_available():
                vram_free = torch.cuda.get_device_properties(0).total_memory - torch.cuda.memory_allocated()
                device = "cuda" if vram_free > 2 * 1024**3 else "cpu"
                print(f"Using {device} for Whisper (VRAM free: {vram_free/1024**3:.2f}GB)")
            else:
                device = "cpu"
            model = whisper.load_model("base", device=device)
            print("Whisper loaded")
            transcription_errors = []
            generated_count = 0
            with open(metadata_dst, "w", encoding='utf-8', newline='') as f:
                writer = csv.writer(f, delimiter='|', quoting=csv.QUOTE_MINIMAL)
                wav_files = sorted([f for f in os.listdir(output_wav_dir) if f.lower().endswith(".wav")])
                if not wav_files:
                    print("Warning: No WAV files to transcribe")
                else:
                    from tqdm import tqdm
                    for file in tqdm(wav_files, desc="Transcribing"):
                        file_path = os.path.join(output_wav_dir, file)
                        try:
                            result = model.transcribe(file_path, fp16=(device == "cuda"))
                            file_id = os.path.splitext(file)[0]
                            transcription = result["text"].strip().replace("\n", " ")
                            if not transcription:
                                transcription_errors.append((file, "Empty transcription"))
                                continue
                            writer.writerow([file_id, SPEAKER_NAME, transcription])
                            generated_count += 1
                        except Exception as e:
                            transcription_errors.append((file, str(e)))
            print(f"Generated {generated_count} metadata entries")
            if transcription_errors:
                print(f"{len(transcription_errors)} transcription errors")
                for fname, err in transcription_errors[:5]:
                    print(f"  File: {fname}, Error: {err}")
            del model
            if torch.cuda.is_available():
                torch.cuda.empty_cache()
        except ImportError:
            raise RuntimeError("Whisper package not found")
        except Exception as e:
            raise RuntimeError(f"Whisper transcription failed: {e}")
    if os.path.exists(metadata_dst) and os.path.getsize(metadata_dst) > 0:
        print(f"\nSplitting {metadata_dst} into Train/Val...")
        try:
            with open(metadata_dst, 'r', encoding='utf-8') as f:
                all_lines = [line for line in f if line.strip()]
            num_total = len(all_lines)
            num_val = int(num_total * VALIDATION_SPLIT_RATIO)
            num_train = num_total - num_val
            if num_total < MIN_VAL_SAMPLES:
                print("Dataset too small for validation. Using all for training")
                shutil.copy(metadata_dst, metadata_train_dst)
                with open(metadata_val_dst, 'w') as f_val:
                    pass
                print(f"Train: {num_total}, Val: 0")
            elif num_val < MIN_VAL_SAMPLES and num_total >= MIN_VAL_SAMPLES:
                num_val = MIN_VAL_SAMPLES
                num_train = num_total - num_val
                print(f"Adjusted split: Train={num_train}, Val={num_val}")
            else:
                print(f"Standard split: Train={num_train}, Val={num_val}")
            random.shuffle(all_lines)
            train_lines = all_lines[:num_train]
            val_lines = all_lines[num_train:]
            with open(metadata_train_dst, 'w', encoding='utf-8') as f_train:
                f_train.writelines(train_lines)
            with open(metadata_val_dst, 'w', encoding='utf-8') as f_val:
                f_val.writelines(val_lines)
            print(f"Train saved: {metadata_train_dst}")
            print(f"Val saved: {metadata_val_dst}")
        except Exception as e:
            print(f"Error splitting metadata: {e}. Using combined file")
            if os.path.exists(metadata_dst):
                shutil.copy(metadata_dst, metadata_train_dst)
                shutil.copy(metadata_dst, metadata_val_dst)
    else:
        print("Error: Metadata file missing or empty")
        open(metadata_train_dst, 'w').close()
        open(metadata_val_dst, 'w').close()
    if temp_wav_path != input_wem_dir and os.path.exists(temp_wav_path):
        print(f"Cleaning up: {temp_wav_path}")
        shutil.rmtree(temp_wav_path, ignore_errors=True)

print("\n--- Verifying Dataset ---")
final_wav_files = []
if os.path.isdir(output_wav_dir):
    final_wav_files = [f for f in os.listdir(output_wav_dir) if f.lower().endswith(".wav")]
    print(f"WAV files: {len(final_wav_files)}")
    !ls -l {output_wav_dir} | head -n 5
else:
    print(f"Error: {output_wav_dir} does not exist")
train_lines_count = 0
val_lines_count = 0
if os.path.exists(metadata_train_dst):
    train_lines_count = sum(1 for line in open(metadata_train_dst, "r", encoding='utf-8') if line.strip())
    print(f"Train metadata lines: {train_lines_count}")
    !echo "Train head:"; !head -n 3 {metadata_train_dst}
else:
    print(f"Error: {metadata_train_dst} missing")
if os.path.exists(metadata_val_dst):
    val_lines_count = sum(1 for line in open(metadata_val_dst, "r", encoding='utf-8') if line.strip())
    print(f"Val metadata lines: {val_lines_count}")
    !echo "Val head:"; !head -n 3 {metadata_val_dst}
else:
    print(f"Error: {metadata_val_dst} missing")
if not final_wav_files or train_lines_count == 0:
    raise RuntimeError("Dataset processing failed: No WAV files or training metadata")

# Step 6: Create RVC Training Configuration
print("\n--- Creating RVC Training Config ---")
config_content = {
    "train": {
        "log_interval": 100, "seed": 1234, "epochs": 200, "learning_rate": 1e-4,
        "betas": [0.8, 0.99], "eps": 1e-9, "batch_size": 8,
        "fp16_run": torch.cuda.is_available(), "lr_decay": 0.999875,
        "segment_size": 12800, "init_lr_ratio": 1, "warmup_epochs": 0,
        "c_mel": 45, "c_kl": 1.0
    },
    "data": {
        "training_files": metadata_train_dst, "validation_files": metadata_val_dst,
        "max_wav_value": 32768.0, "sampling_rate": TARGET_SR,
        "filter_length": 2048, "hop_length": 240, "win_length": 1200,
        "n_mel_channels": 80, "mel_fmin": 0.0, "mel_fmax": None
    },
    "model": {
        "inter_channels": 192, "hidden_channels": 192, "filter_channels": 768,
        "n_heads": 2, "n_layers": 6, "kernel_size": 3, "p_dropout": 0.1,
        "resblock": "1", "resblock_kernel_sizes": [3, 7, 11],
        "resblock_dilation_sizes": [[1, 3, 5], [1, 3, 5], [1, 3, 5]],
        "upsample_rates": [10, 6, 2, 2], "upsample_initial_channel": 512,
        "upsample_kernel_sizes": [16, 16, 4, 4], "n_layers_q": 3,
        "use_spectral_norm": False, "gin_channels": 256
    },
    "experiment_name": SPEAKER_NAME, "save_every_epoch": 10,
    "save_only_latest": False, "save_total_limit": 0, "f0method": "rmvpe",
    "pretrained_G": os.path.join(rvc_dir, "pretrained_v2", "f0G40k.pth"),
    "pretrained_D": os.path.join(rvc_dir, "pretrained_v2", "f0D40k.pth"),
    "cache_data_in_ram": True, "num_workers": min(4, cpu_count()),
    "pitch_guidance": True, "use_wandb": False
}

# Set GPUs with fallback
gpu_count = torch.cuda.device_count() if torch.cuda.is_available() else 0
if gpu_count > 0:
    config_content["gpus"] = ",".join(map(str, range(gpu_count)))
else:
    config_content["gpus"] = "0"  # Fallback to single GPU or dummy value
    print("Warning: No GPUs detected. Setting 'gpus' to '0' as fallback.")

# Adjust batch size based on GPU
if is_p100:
    config_content["train"]["batch_size"] = max(2, config_content["train"]["batch_size"] // 3)
    print(f"Batch size for P100: {config_content['train']['batch_size']}")
elif is_t4_x2:
    config_content["train"]["batch_size"] = max(4, config_content["train"]["batch_size"])
    print(f"Batch size for T4x2: {config_content['train']['batch_size']}")
else:
    config_content["train"]["batch_size"] = max(4, config_content["train"]["batch_size"] // 2)
    print(f"Batch size for Unknown GPU: {config_content['train']['batch_size']}")

# Save config
config_dir = os.path.join(rvc_dir, "configs")
os.makedirs(config_dir, exist_ok=True)
config_path = os.path.join(config_dir, f"{SPEAKER_NAME}_config.json")
with open(config_path, "w", encoding='utf-8') as f:
    json.dump(config_content, f, indent=4)
print(f"Config saved: {config_path}")

# Verify config
print("Verifying config paths:")
print(f"  Train: {config_content['data']['training_files']} - {os.path.exists(config_content['data']['training_files'])}")
print(f"  Val: {config_content['data']['validation_files']} - {os.path.exists(config_content['data']['validation_files'])}")
print(f"  Pretrained G: {config_content['pretrained_G']} - {os.path.exists(config_content['pretrained_G'])}")
print(f"  Pretrained D: {config_content['pretrained_D']} - {os.path.exists(config_content['pretrained_D'])}")
print(f"  RMVPE: {os.path.join(weights_dir, 'rmvpe.pt')} - {os.path.exists(os.path.join(weights_dir, 'rmvpe.pt'))}")
print(f"  Hubert: {hubert_path} - {os.path.exists(hubert_path)}")
print(f"  GPUs: {config_content['gpus']}")
!head -n 20 {config_path}

# Step 7: Install and Configure FileBrowser
print("\n--- Step 7: Install FileBrowser ---")
os.chdir("/kaggle/working")
filebrowser_dir = "/kaggle/working/filebrowser_install"
filebrowser_bin = os.path.join(filebrowser_dir, "filebrowser")
filebrowser_db = os.path.join(filebrowser_dir, "filebrowser.db")
filebrowser_config = os.path.join(filebrowser_dir, ".filebrowser.json")
filebrowser_tar = "linux-amd64-filebrowser.tar.gz"
filebrowser_url = "https://github.com/filebrowser/filebrowser/releases/download/v2.31.2/linux-amd64-filebrowser.tar.gz"
os.makedirs(filebrowser_dir, exist_ok=True)

if os.path.exists(filebrowser_bin):
    print("FileBrowser exists, skipping download")
else:
    print(f"Downloading FileBrowser from {filebrowser_url}...")
    if os.path.exists(filebrowser_tar):
        os.remove(filebrowser_tar)
    try:
        !wget -q {filebrowser_url} -O {filebrowser_tar}
        !tar xvfz {filebrowser_tar} -C {filebrowser_dir}
        if not os.path.exists(filebrowser_bin):
            extracted_contents = os.listdir(filebrowser_dir)
            if len(extracted_contents) == 1 and os.path.isdir(os.path.join(filebrowser_dir, extracted_contents[0])):
                inner_dir = os.path.join(filebrowser_dir, extracted_contents[0])
                if os.path.exists(os.path.join(inner_dir, "filebrowser")):
                    for item in os.listdir(inner_dir):
                        shutil.move(os.path.join(inner_dir, item), filebrowser_dir)
                    shutil.rmtree(inner_dir)
                else:
                    raise FileNotFoundError("FileBrowser executable not found in subdirectory")
            else:
                raise FileNotFoundError("FileBrowser executable not found")
        print("FileBrowser extracted")
        os.remove(filebrowser_tar)
    except Exception as e:
        print(f"Error downloading/extracting FileBrowser: {e}")
        shutil.rmtree(filebrowser_dir, ignore_errors=True)
        filebrowser_bin = None

if filebrowser_bin and os.path.exists(filebrowser_bin):
    print("Configuring FileBrowser...")
    try:
        !chmod +x {filebrowser_bin}
        db_flag = f"--database={filebrowser_db}"
        config_flag = f"--config={filebrowser_config}"
        if not os.path.exists(filebrowser_config):
            !{filebrowser_bin} config init {config_flag} {db_flag}
        !{filebrowser_bin} config set --root="/kaggle/working" {config_flag} {db_flag}
        !{filebrowser_bin} config set --address="0.0.0.0" {config_flag} {db_flag}
        !{filebrowser_bin} config set --port="8088" {config_flag} {db_flag}
        !{filebrowser_bin} config set --auth.method="noauth" {config_flag} {db_flag}
        !{filebrowser_bin} config set --branding.theme="dark" {config_flag} {db_flag}
        print("FileBrowser configured")
        !{filebrowser_bin} config cat {config_flag} {db_flag}
    except Exception as e:
        print(f"Error configuring FileBrowser: {e}")
        filebrowser_bin = None

# Step 8: Attempt Manual Training
print("\n--- Step 8: Attempt Manual Training ---")
os.chdir(rvc_dir)
print(f"Changed directory: {os.getcwd()}")

print("Monitoring VRAM...")
if torch.cuda.is_available():
    torch.cuda.empty_cache()
    print(f"Allocated: {torch.cuda.memory_allocated() / 1024**3:.2f} GB")
    print(f"Reserved: {torch.cuda.memory_reserved() / 1024**3:.2f} GB")
    !nvidia-smi

training_config_path = config_path
train_script = os.path.join("infer", "modules", "train", "train.py")
if not os.path.exists(train_script):
    print(f"Error: Training script not found at {train_script}")
elif not os.path.exists(training_config_path):
    print(f"Error: Config not found at {training_config_path}")
elif not os.path.exists(config_content['data']['training_files']) or not os.path.getsize(config_content['data']['training_files']):
    print("Error: Training metadata missing or empty")
elif not all(os.path.exists(path) for path in [config_content['pretrained_G'], config_content['pretrained_D']]):
    print("Error: Pretrained models missing")
else:
    print(f"Running: {python_executable} {train_script} --config {training_config_path}")
    try:
        env = os.environ.copy()
        env['CUDA_VISIBLE_DEVICES'] = config_content.get('gpus', '0')
        cmd = [python_executable, train_script, "--config", training_config_path]
        timeout_hours = 3
        process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, env=env)
        print("--- Training Log ---")
        while True:
            output = process.stdout.readline()
            if output == '' and process.poll() is not None:
                break
            if output:
                print(output.strip())
        print("--- End Training Log ---")
        return_code = process.poll()
        if return_code == 0:
            print("\n--- Training Completed ---")
            # Attempt index training
            index_script = os.path.join("infer", "modules", "train", "index.py")  # Hypothetical
            if os.path.exists(index_script):
                print("Attempting index training...")
                try:
                    cmd = [python_executable, index_script, "--model_name", SPEAKER_NAME, "--logs_dir", os.path.join(rvc_dir, "logs", SPEAKER_NAME)]
                    result = subprocess.run(cmd, check=True, capture_output=True, text=True)
                    print("Index training completed:\n", result.stdout)
                except Exception as e:
                    print(f"Index training failed: {e}. Run manually in WebUI.")
        else:
            print(f"\n--- Training Failed (Exit Code: {return_code}) ---")
            !nvidia-smi
            !ldconfig -p | grep -E 'libcufft|libnvrtc|libcudnn|libcupti|libtorch'
            !{python_executable} -c "import torch; print(f'Torch: {torch.__version__}'); print(f'CUDA: {torch.cuda.is_available()}'); print(f'CUDA Version: {torch.version.cuda}'); print(f'cuDNN: {torch.backends.cudnn.version()}'); print(f'Device: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else \'N/A\'})"
            !{python_executable} -m pip list | grep -E 'torch|numpy|librosa|soundfile|scipy|numba|resampy|tqdm|einops|tensorboardX|fairseq|praat'
    except subprocess.TimeoutExpired:
        print(f"\nTraining timed out after {timeout_hours} hours")
        process.kill()
    except Exception as e:
        print(f"Error running training: {e}\n{traceback.format_exc()}")

# Step 9: Launch WebUI
print("\n--- Step 9: Launch WebUI ---")
os.chdir(webui_dir)
print(f"Changed directory: {os.getcwd()}")
os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
webui_log_file = "/kaggle/working/webui.log"
tunnel_log_file = "/kaggle/working/tunnel.log"
open(webui_log_file, 'w').close()
open(tunnel_log_file, 'w').close()

def run_webui(log_queue):
    webui_ready = False
    try:
        print("Starting WebUI...")
        launch_script = "launch.py"
        if not os.path.exists(launch_script):
            for script in ["app.py", "webui.py", "main.py"]:
                if os.path.exists(script):
                    launch_script = script
                    break
            else:
                log_queue.put(f"ERROR: No WebUI launch script found in {os.getcwd()}")
                return
        cmd = [python_executable, "-u", launch_script]
        master_fd, slave_fd = pty.openpty()
        process = subprocess.Popen(cmd, stdout=slave_fd, stderr=slave_fd, close_fds=True, text=True, bufsize=1, env=os.environ.copy())
        os.close(slave_fd)
        print(f"WebUI started (PID: {process.pid})")
        log_queue.put(f"WebUI started (PID: {process.pid})")
        buffer = ""
        while process.poll() is None:
            rlist, _, _ = select.select([master_fd], [], [], 0.1)
            if rlist:
                try:
                    output = os.read(master_fd, 1024).decode('utf-8', errors='replace')
                    if output:
                        buffer += output
                        while '\n' in buffer:
                            line, buffer = buffer.split('\n', 1)
                            print(f"WebUI: {line.strip()}")
                            log_queue.put(line.strip())
                            if "Running on local URL:" in line or "Running on public URL:" in line:
                                if not webui_ready:
                                    print("WebUI ready")
                                    log_queue.put("WEBUI_READY")
                                    webui_ready = True
                            if "Error" in line or "Traceback" in line:
                                print(f"Potential error: {line.strip()}")
                    else:
                        break
                except OSError:
                    break
        exit_code = process.wait()
        print(f"WebUI terminated with exit code {exit_code}")
        log_queue.put(f"WebUI terminated with exit code {exit_code}")
        try:
            remaining_output = os.read(master_fd, 10240).decode('utf-8', errors='replace')
            if remaining_output:
                print(f"WebUI (remaining): {remaining_output.strip()}")
                log_queue.put(remaining_output.strip())
        except OSError:
            pass
        os.close(master_fd)
    except Exception as e:
        error_msg = f"Error running WebUI: {e}\n{traceback.format_exc()}"
        print(error_msg)
        log_queue.put(error_msg)

def run_filebrowser(log_queue):
    if not filebrowser_bin or not os.path.exists(filebrowser_bin):
        log_queue.put("FileBrowser not available")
        return
    try:
        cmd = [filebrowser_bin, "--config", filebrowser_config, "--database", filebrowser_db]
        process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
        log_queue.put(f"FileBrowser started (PID: {process.pid})")
        while process.poll() is None:
            line = process.stdout.readline().strip()
            if line:
                log_queue.put(f"FileBrowser: {line}")
            time.sleep(0.1)
        log_queue.put(f"FileBrowser terminated with exit code {process.poll()}")
    except Exception as e:
        log_queue.put(f"FileBrowser error: {e}")

def start_ngrok_tunnel(port, log_queue):
    if not NGROK_TOKEN:
        log_queue.put("Ngrok token not available")
        return None
    try:
        from pyngrok import ngrok
        print(f"Starting Ngrok tunnel for port {port}...")
        public_url = ngrok.connect(port, proto="http", bind_tls=True)
        url_str = str(public_url)
        print(f"Ngrok tunnel: {url_str}")
        log_queue.put(f"NGROK_URL:{url_str}")
        return public_url
    except Exception as e:
        log_queue.put(f"Failed to start Ngrok: {e}")
        return None

def start_pinggy_tunnel(port, log_queue):
    print(f"Starting Pinggy tunnel for port {port}...")
    cmd = f"ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -p 80 -R0:localhost:{port} a.pinggy.io > {tunnel_log_file} 2>&1 &"
    try:
        pinggy_process = subprocess.Popen(cmd, shell=True)
        print(f"Pinggy started (PID: {pinggy_process.pid})")
        log_queue.put(f"Pinggy started (PID: {pinggy_process.pid})")
        found_url = None
        max_attempts = 30
        for attempt in range(max_attempts):
            time.sleep(2)
            try:
                with open(tunnel_log_file, 'r') as file:
                    log_content = file.read()
                    import re
                    match = re.search(r'(https?://\w+\.pinggy\.link)', log_content)
                    if match:
                        found_url = match.group(1)
                        print(f"Pinggy URL: {found_url}")
                        log_queue.put(f"PINGGY_URL:{found_url}")
                        break
                    if "Connection refused" in log_content or "failed" in log_content.lower():
                        log_queue.put(f"Pinggy failed: {log_content[-200:]}")
                        return None
            except Exception as e:
                log_queue.put(f"Error reading Pinggy log: {e}")
        if not found_url:
            log_queue.put(f"Pinggy URL not found after {max_attempts} attempts")
            try:
                with open(tunnel_log_file, 'r') as f:
                    print(f.read())
            except:
                pass
            return None
        return found_url
    except Exception as e:
        log_queue.put(f"Failed to start Pinggy: {e}")
        return None

webui_port = 7860
log_queue = Queue()
p_webui = Process(target=run_webui, args=(log_queue,))
p_webui.start()
p_filebrowser = None
if filebrowser_bin:
    print("Starting FileBrowser...")
    p_filebrowser = Process(target=run_filebrowser, args=(log_queue,))
    p_filebrowser.start()

tunnel_url = None
tunnel_type = None
if NGROK_TOKEN:
    print("Attempting Ngrok tunnel...")
    tunnel_url = start_ngrok_tunnel(webui_port, log_queue)
    if tunnel_url:
        tunnel_type = "Ngrok"
    else:
        print("Ngrok failed. Trying Pinggy...")
        tunnel_url = start_pinggy_tunnel(webui_port, log_queue)
        if tunnel_url:
            tunnel_type = "Pinggy"
else:
    print("No Ngrok token. Trying Pinggy...")
    tunnel_url = start_pinggy_tunnel(webui_port, log_queue)
    if tunnel_type:
        tunnel_type = "Pinggy"

if not tunnel_url:
    print("WARNING: Failed to establish tunnel")
else:
    print(f"\n----------------------------------------------------")
    print(f"✅ {tunnel_type or 'N/A'} tunnel started! ✅")
    tunnel_url_str = str(tunnel_url)
    print(f"   WebUI URL ({tunnel_type}): {tunnel_url_str}")
    if tunnel_type == "Pinggy":
        https_url = tunnel_url_str.replace("http:", "https:")
        print(f"   Try HTTPS: {https_url}")
    print(f"----------------------------------------------------")

webui_is_ready = False
start_time = time.time()
max_run_duration_hours = 8
max_run_duration_sec = max_run_duration_hours * 3600
print(f"\nMonitoring WebUI. Max duration: {max_run_duration_hours} hours")
if tunnel_url:
    print("Access WebUI using the URL above")
else:
    print("WebUI may only be accessible locally")
print("Training instructions below")
try:
    with open(webui_log_file, 'a', encoding='utf-8') as log_f:
        log_f.write(f"Monitoring started at {time.ctime()}\n")
        while p_webui.is_alive():
            if time.time() - start_time > max_run_duration_sec:
                print(f"\nMax duration of {max_run_duration_hours} hours reached")
                log_f.write(f"Timeout at {time.ctime()}\n")
                p_webui.terminate()
                if p_filebrowser and p_filebrowser.is_alive():
                    p_filebrowser.terminate()
                time.sleep(5)
                break
            while not log_queue.empty():
                try:
                    message = log_queue.get_nowait()
                    log_f.write(f"{time.strftime('%H:%M:%S')} | {message}\n")
                    log_f.flush()
                    if message == "WEBUI_READY" and not webui_is_ready:
                        print(">>> WebUI Ready <<<")
                        webui_is_ready = True
                    elif "Error:" in message or "Traceback (most recent call last):" in message:
                        print(f"Log: {message}")
                    elif message.startswith("WebUI process terminated"):
                        print(f"Log: {message}")
                except queue.Empty:
                    break
                except Exception as e:
                    print(f"Error processing log: {e}")
                    log_f.write(f"Error processing log: {e}\n")
            time.sleep(2)
except KeyboardInterrupt:
    print("\nKeyboardInterrupt received. Shutting down...")
    try:
        if log_f and not log_f.closed:
            log_f.write(f"KeyboardInterrupt at {time.ctime()}\n")
    except NameError:
        pass
    if p_webui.is_alive():
        p_webui.terminate()
    if p_filebrowser and p_filebrowser.is_alive():
        p_filebrowser.terminate()
except Exception as e:
    print(f"Error in monitoring: {e}\n{traceback.format_exc()}")
    try:
        if log_f and not log_f.closed:
            log_f.write(f"Error in monitoring: {e}\n{traceback.format_exc()}\n")
    except NameError:
        pass
    if p_webui.is_alive():
        p_webui.terminate()
    if p_filebrowser and p_filebrowser.is_alive():
        p_filebrowser.terminate()

if p_webui.is_alive():
    p_webui.join(timeout=30)
    if p_webui.is_alive():
        print("WebUI did not terminate gracefully. Killing...")
        p_webui.kill()
if p_filebrowser and p_filebrowser.is_alive():
    p_filebrowser.join(timeout=30)
    if p_filebrowser.is_alive():
        print("FileBrowser did not terminate gracefully. Killing...")
        p_filebrowser.kill()

print("WebUI monitoring finished")
print(f"Logs: {webui_log_file}, {tunnel_log_file}")

# Training instructions
fb_url = "Unavailable (Tunnel Error?)"
if tunnel_url and filebrowser_bin:
    base_tunnel_url = str(tunnel_url).replace("https://", "").replace("http://", "").split(':')[0]
    fb_url = f"http://{base_tunnel_url}:8088"
filebrowser_info = f"2.  **FileBrowser:** Access at: {fb_url}" if filebrowser_bin else "2.  **FileBrowser:** Not installed"

# Debug config_content state
print("Debug: config_content keys:", list(config_content.keys()))
print(f"Debug: GPUs in config: {config_content.get('gpus', 'Not set')}")

# Use fallback for GPUs
gpus_value = config_content.get('gpus', '0')  # Fallback to '0' if missing

print(f"""
=========================================================
          RVC WebUI Training Instructions
=========================================================

1.  **Access WebUI:** Open the {tunnel_type or 'N/A'} URL: {tunnel_url_str if tunnel_url else 'Local access only'}
    * If the URL fails, check {tunnel_log_file}
    * For Pinggy, try HTTPS: {https_url if tunnel_type == "Pinggy" else 'N/A'}

2.  **Navigate to Training:** Find the 'Training' or 'Train' tab

3.  **Configure Training:**
    * **Experiment Name:** '{SPEAKER_NAME}'
    * **Dataset Path:** `{output_wav_dir}` or `{metadata_train_dst}'
    * **Sample Rate:** `{TARGET_SR}'
    * **Pitch Extraction:** 'rmvpe'
    * **GPUs:** '{gpus_value}'
    * **Epochs:** `{config_content['train']['epochs']}'
    * **Batch Size:** `{config_content['train']['batch_size']}'
    * **Save Frequency:** `{config_content['save_every_epoch']}' epochs
    * **Pretrained Models:**
        * G: `{config_content['pretrained_G']}'
        * D: `{config_content['pretrained_D']}'

4.  **Start Training:** Click 'Start Training' or 'Train Model'

5.  **Monitor:** Watch progress in WebUI console. Check VRAM in Kaggle's Accelerator panel. Reduce batch size if OOM occurs

6.  **After Training:**
    * Generate index file in 'Index Training' or 'Inference' tab
    * Select '{SPEAKER_NAME}' model and click 'Train Index'

7.  **Test Inference:** Use 'Inference' tab, select '{SPEAKER_NAME}' model, ensure index loaded, upload audio, click 'Convert'

8.  **Download Outputs:** See Step 10

=========================================================
""")

# Step 10: Save Outputs
print("\n--- Step 10: Download Outputs ---")
model_output_dir_rvc = os.path.join(rvc_dir, "weights")
model_output_dir_webui = os.path.join(webui_dir, "models", "checkpoints", SPEAKER_NAME)
index_output_dir_rvc_glob = os.path.join(rvc_dir, "logs", SPEAKER_NAME, "added_*.index")
index_output_dir_webui_glob = os.path.join(webui_dir, "models", "checkpoints", SPEAKER_NAME, "add_*.index")
inference_output_dir_webui = os.path.join(webui_dir, "outputs")
logs_dir_rvc = os.path.join(rvc_dir, "logs", SPEAKER_NAME)

print(f"""
=========================================================
                Training Complete & Outputs
=========================================================

Download files using:

1.  **Kaggle Output:** Check "Data" -> "Output" in Kaggle UI
{filebrowser_info}
3.  **WebUI Download:** Use WebUI download buttons if available

**File Locations:**

* **Model (.pth):**
    * RVC: `{model_output_dir_rvc}/{SPEAKER_NAME}_<epoch>.pth`
    * WebUI: `{model_output_dir_webui}/<name>.pth`
* **Index (.index):**
    * RVC: `{index_output_dir_rvc_glob}`
    * WebUI: `{index_output_dir_webui_glob}`
* **Inference Outputs (.wav):**
    * WebUI: `{inference_output_dir_webui}/`
* **Logs:**
    * RVC: `{logs_dir_rvc}/`
    * WebUI: `{webui_log_file}`
    * Tunnel: `{tunnel_log_file}`

**Using Model Locally:**

1. Install RVC locally (see RVC GitHub)
2. Download `{SPEAKER_NAME}.pth` (latest/best epoch)
3. Download `.index` file (`added_...` or `add_...`)
4. Place `.pth` in RVC `weights`
5. Place `.index` in RVC `logs/{SPEAKER_NAME}/`
6. Use RVC WebUI/CLI for inference

=========================================================
""")

print("Final output check (last 5 files):")
!ls -lhtr {model_output_dir_rvc} | tail -n 5 || echo "Not found: {model_output_dir_rvc}"
!ls -lhtr {model_output_dir_webui} | tail -n 5 || echo "Not found: {model_output_dir_webui}"
!ls -lhtr {index_output_dir_rvc_glob} 2>/dev/null | tail -n 5 || echo "No index files: {index_output_dir_rvc_glob}"
!ls -lhtr {index_output_dir_webui_glob} 2>/dev/null | tail -n 5 || echo "No index files: {index_output_dir_webui_glob}"
!ls -lhtr {inference_output_dir_webui} | tail -n 5 || echo "Not found: {inference_output_dir_webui}"
!ls -lhtr {logs_dir_rvc} | tail -n 5 || echo "Not found: {logs_dir_rvc}"

print("\nNotebook execution finished")

IMPORTANT KAGGLE SETTINGS:
- Ensure 'Accelerator' is set to GPU (P100 or T4 x2 recommended).
- Ensure 'Internet' is turned ON in the Settings panel.
Checking disk space...
Available space: 20.00 GB
Installing cuDNN for CUDA 11.8...
W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following NEW packages will be installed:
  libcudnn8 libcudnn8-dev
0 upgraded, 2 newly installed, 0 to remove and 153 not upgraded.
Need to get 878 MB of archives.
After this operation, 2,366 MB of additional disk space will be used.
Get:1 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  libcudnn8 8.9.7.29-1+cuda11.8 [441 MB]
Get:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  libcudnn8-dev 8.9.7.29-1+cuda11.8 [437 M

KeyboardInterrupt: 