In [None]:
#@title RVC Dataset Maker (All-in-One)
#====================================================================================
# @markdown # 1. USER CONFIGURATION
# @markdown ### Fill in all your project details here before running the script.
#====================================================================================

# @markdown **Select the main operation mode and the source of your audio.**
# @markdown - `Splitting`: Separates vocals, then cuts them into a dataset.
# @markdown - `Separate`: Only separates vocals from music.
mode = "Splitting"  #@param ["Splitting", "Separate"]
dataset = "Drive"  #@param ["Youtube", "Drive"]

# @markdown ---
# @markdown **Fill in the source URL or Drive Path.**
# @markdown - If using Drive, right-click your file in Colab's file browser and "Copy path".
url = ""  #@param {type:"string"}
drive_path = "/content/drive/MyDrive/fuyumi_elsa/fuyumi_elsa_mentah.wav"  #@param {type:"string"}

# @markdown ---
# @markdown **Define a project name and output format.**
# @markdown - `audio_name`: A unique name for your project folders.
# @markdown - `output_sr`: The sample rate for the output. 32000 is good for RVC. '0' keeps the original.
audio_name = "fuyumi_elsa_dataset2"  #@param {type:"string"}
output_sr = "44100"  #@param ["0", "8000", "16000", "22050", "32000", "44100", "48000"]

# @markdown ---
# @markdown **Advanced Settings (Optional)**
# @markdown - `demucs_model`: The AI model for vocal separation. `htdemucs` is recommended.
# @markdown - `chunk_duration_seconds`: Splits very long audio files into chunks of this duration (in seconds) to avoid memory errors.
demucs_model = "htdemucs"  #@param ["htdemucs", "demucs", "demucs_extra", "htdemucs_ft"]
chunk_duration_seconds = 1800  #@param {type:"number"}

# @markdown - `Slicer Settings` (for 'Splitting' mode only): Controls how the vocal track is cut. Defaults are usually effective.
threshold = -40  #@param {type:"slider", min:-60, max:-20, step:1}
min_length = 5000  #@param {type:"number"}
min_interval = 300  #@param {type:"number"}
hop_size = 10 #@param {type:"number"}
max_sil_kept = 500 #@param {type:"number"}

#====================================================================================
# SCRIPT EXECUTION - NO NEED TO EDIT BELOW THIS LINE
#====================================================================================
import os
import sys
import subprocess
import shlex
import glob
import shutil
from google.colab import drive

def run_command(command, error_message):
    """Runs a shell command and prints output in real-time."""
    process = subprocess.Popen(shlex.split(command), stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, encoding='utf-8')
    while True:
        output = process.stdout.readline()
        if output == '' and process.poll() is not None:
            break
        if output:
            print(output.strip())
    rc = process.poll()
    if rc != 0:
        print(f"❌ ERROR: {error_message} (Exit Code: {rc})")
        raise subprocess.CalledProcessError(rc, command)

try:
    # --- 2. SETUP AND VALIDATION ---
    print("STEP 2: Mounting Google Drive...")
    drive.mount('/content/drive')
    print("\nSTEP 2: Validating parameters...")
    if not audio_name.strip():
        raise ValueError("'audio_name' cannot be empty.")
    if dataset == "Youtube" and not url.strip():
        raise ValueError("'url' cannot be empty for Youtube mode.")
    if dataset == "Drive" and not drive_path.strip():
        raise ValueError("'drive_path' cannot be empty for Drive mode.")
    if dataset == "Drive" and not os.path.exists(drive_path):
        raise FileNotFoundError(f"File not found at 'drive_path': {drive_path}")
    output_sr_int = int(output_sr)
    print("✅ Parameters validated successfully.")

    # --- 3. INSTALL DEPENDENCIES ---
    print("\nSTEP 3: Installing all required libraries...")
    install_command = "python3 -m pip install --upgrade demucs yt-dlp ffmpeg-python librosa soundfile numpy --quiet"
    subprocess.check_call(shlex.split(install_command))
    import numpy as np
    import librosa
    import soundfile as sf
    print("✅ All libraries installed.")

    # --- 4. PREPARE AUDIO INPUT ---
    print("\nSTEP 4: Preparing audio input...")
    audio_input = ""
    if dataset == "Youtube":
        import yt_dlp
        print(f"Downloading from YouTube URL: {url}")
        os.makedirs("youtube_audio", exist_ok=True)
        ydl_opts = {
            'format': 'bestaudio/best',
            'postprocessors': [{'key': 'FFmpegExtractAudio', 'preferredcodec': 'wav'}],
            'outtmpl': f'youtube_audio/{audio_name}.%(ext)s',
        }
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            ydl.download([url])
        audio_input = os.path.abspath(f"youtube_audio/{audio_name}.wav")
    else: # Drive
        audio_input = os.path.abspath(drive_path)

    if not os.path.exists(audio_input):
        raise FileNotFoundError(f"Audio input file not found: {audio_input}")
    print(f"✅ Audio input ready: {audio_input}")

    # --- 5. VOCAL SEPARATION (DEMUCS) ---
    print("\nSTEP 5: Starting vocal separation...")
    duration = librosa.get_duration(path=audio_input)
    print(f"Audio duration: {duration:.2f} seconds.")
    output_dir = os.path.abspath(f"separated/{demucs_model}/{audio_name}")
    os.makedirs(output_dir, exist_ok=True)
    vocals_final_path = os.path.join(output_dir, "vocals.wav")

    if duration > chunk_duration_seconds:
        # Logic for long files
        print(f"Audio is long. Splitting into {chunk_duration_seconds}s chunks...")
        chunks_dir = os.path.abspath("temp_chunks")
        os.makedirs(chunks_dir, exist_ok=True)
        split_cmd = f'ffmpeg -hide_banner -loglevel error -i "{audio_input}" -f segment -segment_time {chunk_duration_seconds} -c copy "{chunks_dir}/{audio_name}_%03d.wav"'
        run_command(split_cmd, "Failed to split audio into chunks.")

        chunk_files = sorted(glob.glob(f"{chunks_dir}/{audio_name}_*.wav"))
        vocals_from_chunks = []
        for idx, chunk_file in enumerate(chunk_files):
            print(f"--- Processing Chunk {idx+1}/{len(chunk_files)} ---")
            demucs_cmd = f'python3 -m demucs.separate --two-stems vocals --name {demucs_model} -o "separated" "{chunk_file}"'
            run_command(demucs_cmd, f"Demucs failed on chunk {idx+1}.")
            base = os.path.splitext(os.path.basename(chunk_file))[0]
            expected_vocal_path = os.path.abspath(f"separated/{demucs_model}/{base}/vocals.wav")
            if os.path.exists(expected_vocal_path):
                vocals_from_chunks.append(expected_vocal_path)

        concat_list_file = "vocals_list.txt"
        with open(concat_list_file, "w") as f:
            for v_file in vocals_from_chunks:
                f.write(f"file '{v_file}'\n")

        print("Stitching vocal chunks back together...")
        concat_cmd = f'ffmpeg -hide_banner -loglevel error -f concat -safe 0 -i "{concat_list_file}" -c copy "{vocals_final_path}"'
        run_command(concat_cmd, "Failed to concatenate vocal chunks.")
    else:
        # Logic for short files
        print("Processing full audio file with Demucs...")
        demucs_cmd = f'python3 -m demucs.separate --two-stems vocals --name {demucs_model} -o "separated" --filename "{audio_name}/{{stem}}.{{ext}}" "{audio_input}"'
        run_command(demucs_cmd, "Demucs separation process failed.")

    if not os.path.exists(vocals_final_path):
        raise FileNotFoundError(f"Vocal separation failed. Final vocal file not found: {vocals_final_path}")
    print("✅ Vocal separation complete!")

    # --- 6. SLICE DATASET ---
    if mode == "Splitting":
        print("\nSTEP 6: Slicing vocal track into dataset...")
        # Slicer class from original notebook
        class Slicer:
            def __init__(self, sr: int, threshold: float, min_length: int, min_interval: int, hop_size: int, max_sil_kept: int):
                if not min_length >= min_interval >= hop_size: raise ValueError('min_length >= min_interval >= hop_size')
                if not max_sil_kept >= hop_size: raise ValueError('max_sil_kept >= hop_size')
                self.sr, self.threshold = sr, 10**(threshold/20.)
                self.hop_size, self.win_size = round(sr*hop_size/1000), min(round(sr*min_interval/1000), 4*round(sr*hop_size/1000))
                self.min_length, self.min_interval, self.max_sil_kept = round(sr*min_length/1000/self.hop_size), round(min_interval/1000/self.hop_size), round(sr*max_sil_kept/1000/self.hop_size)
            def _get_rms(self, y, **kwargs):
                return librosa.feature.rms(y=y, **kwargs)
            def slice(self, waveform):
                rms_list = self._get_rms(y=waveform, frame_length=self.win_size, hop_length=self.hop_size)[0]
                sil_tags, silence_start, clip_start = [], None, 0
                for i, rms in enumerate(rms_list):
                    if rms < self.threshold:
                        if silence_start is None: silence_start = i
                        continue
                    if silence_start is None: continue
                    is_leading_silence, need_slice_middle = silence_start == 0 and i > self.max_sil_kept, (i - silence_start >= self.min_interval and i - clip_start >= self.min_length)
                    if not is_leading_silence and not need_slice_middle:
                        silence_start = None
                        continue
                    if i - silence_start <= self.max_sil_kept:
                        pos = rms_list[silence_start:i+1].argmin()+silence_start
                        sil_tags.append((0,pos) if silence_start==0 else (pos,pos))
                        clip_start = pos
                    else:
                        pos_l, pos_r = rms_list[silence_start:silence_start+self.max_sil_kept+1].argmin()+silence_start, rms_list[i-self.max_sil_kept:i+1].argmin()+i-self.max_sil_kept
                        sil_tags.append((0,pos_r) if silence_start==0 else (pos_l,pos_r))
                        clip_start = pos_r
                    silence_start = None
                total_frames = len(rms_list)
                if silence_start is not None and total_frames - silence_start >= self.min_interval:
                    sil_tags.append((rms_list[silence_start:total_frames+1].argmin()+silence_start, total_frames+1))
                if not sil_tags: return [waveform]
                chunks = []
                if sil_tags[0][0] > 0: chunks.append(waveform[:sil_tags[0][0]*self.hop_size])
                for i in range(len(sil_tags)-1): chunks.append(waveform[sil_tags[i][1]*self.hop_size:sil_tags[i+1][0]*self.hop_size])
                if sil_tags[-1][1]*self.hop_size < len(waveform): chunks.append(waveform[sil_tags[-1][1]*self.hop_size:])
                return chunks

        load_sr = None if output_sr_int == 0 else output_sr_int
        audio, sr = librosa.load(vocals_final_path, sr=load_sr, mono=True)
        final_sr = sr if output_sr_int == 0 else output_sr_int
        slicer = Slicer(sr=final_sr, threshold=threshold, min_length=min_length, min_interval=min_interval, hop_size=hop_size, max_sil_kept=max_sil_kept)
        chunks = slicer.slice(audio)
        dataset_dir = os.path.abspath(f"dataset/{audio_name}")
        os.makedirs(dataset_dir, exist_ok=True)
        saved_chunks = 0
        for i, chunk in enumerate(chunks):
            if len(chunk) / final_sr < 0.1: continue
            sf.write(f"{dataset_dir}/split_{i:04d}.wav", chunk, final_sr)
            saved_chunks += 1
        print(f"✅ Splitting complete. {saved_chunks} chunks saved.")
    else:
        print("\nSTEP 6: Mode is 'Separate'. Skipping slicing.")

    # --- 7. SAVE TO GOOGLE DRIVE ---
    print("\nSTEP 7: Copying all results to Google Drive...")
    drive_separated_dest = f"/content/drive/MyDrive/RVC_Datasets/{audio_name}"
    print(f"Copying separated audio to: {drive_separated_dest}")
    shutil.copytree(output_dir, drive_separated_dest, dirs_exist_ok=True)
    if mode == "Splitting":
        source_dataset_folder = os.path.abspath(f"dataset/{audio_name}")
        drive_dataset_dest = f"/content/drive/MyDrive/RVC_Datasets/{audio_name}_sliced"
        if os.path.exists(source_dataset_folder):
            print(f"Copying sliced dataset to: {drive_dataset_dest}")
            shutil.copytree(source_dataset_folder, drive_dataset_dest, dirs_exist_ok=True)

    print("\n\n🎉 All processes are complete! Check your Google Drive for the results.")

except Exception as e:
    print("\n" + "="*20 + " A FATAL ERROR OCCURRED " + "="*20)
    print("The script stopped due to an error. Please review the messages above.")
    # Re-raise the exception to show the full traceback in Colab
    raise e