<a href="https://colab.research.google.com/github/glickko/googlecolabtools/blob/main/Datasets_RVC_maker_zip_output.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#@title Mount Google Drive

from google.colab import drive
drive.mount('/content/drive')

In [None]:
#@title Parameter Input
mode = "Splitting"  #@param {type:"string", options:["Splitting", "Separate"]}
dataset = "Youtube"  #@param {type:"string", options:["Youtube", "Drive"]}
# Gunakan "htdemucs" sesuai permintaan
demucs_model = "htdemucs"  #@param {type:"string", options:["demucs", "htdemucs", "demucs_extra", "htdemucs_ft"]}
url = ""  #@param {type:"string", multiline:false}
drive_path = ""  #@param {type:"string", multiline:false}
audio_name = "gamma32k_sing"  #@param {type:"string", multiline:false}
chunk_duration = 1800  #@param {type:"number"}  # Durasi tiap chunk dalam detik (misal, 1800 = 30 menit)
# Dropdown sample rate: "0" = gunakan sample rate asli; opsi lain untuk re‑sampling.
output_sr = "44100"  #@param {type:"string", options:["0", "8000", "16000", "22050", "32000", "44100", "48000"]}
output_sr = int(output_sr)


In [None]:
#@title Process Parameter and Zip output
import os
import subprocess
import glob

print("Memulai proses...\n")

# Pastikan runtime Colab menggunakan GPU (misalnya, T4)
print("GPU Info:")
!nvidia-smi

# Fungsi untuk mendapatkan durasi audio menggunakan ffprobe
def get_duration(file_path):
    try:
        result = subprocess.run(
            ["ffprobe", "-v", "error", "-show_entries", "format=duration",
             "-of", "default=noprint_wrappers=1:nokey=1", file_path],
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            universal_newlines=True,
        )
        return float(result.stdout.strip())
    except Exception as e:
        print("Gagal mendapatkan durasi audio:", e)
        return None

# Validasi Input
if audio_name == "":
    raise ValueError("Error: Audio Name tidak boleh kosong!")
if dataset == "Youtube" and url == "":
    raise ValueError("Error: URL tidak boleh kosong untuk dataset Youtube!")
if dataset == "Drive" and drive_path == "":
    raise ValueError("Error: Drive Path tidak boleh kosong untuk dataset Drive!")

# === STEP 1: Download Audio (Jika dataset = "Youtube") ===
if dataset == "Youtube":
    print("Menginstal yt_dlp dan ffmpeg-python...")
    subprocess.run("python3 -m pip install yt_dlp ffmpeg-python --quiet", shell=True)
    os.makedirs("youtubeaudio", exist_ok=True)
    print("Downloading audio dari YouTube...")
    try:
        import yt_dlp
        ydl_opts = {
            'format': 'bestaudio/best',
            'postprocessors': [{
                'key': 'FFmpegExtractAudio',
                'preferredcodec': 'wav',
            }],
            'outtmpl': f'youtubeaudio/{audio_name}.%(ext)s'
        }
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            ydl.download([url])
        print("Download selesai.\n")
    except Exception as e:
        print("Terjadi kesalahan saat download:", e)
        raise e
    audio_input = os.path.abspath(f"youtubeaudio/{audio_name}.wav")
else:
    audio_input = os.path.abspath(drive_path)

# === STEP 2: Instal Demucs ===
print("Menginstal Demucs (jika belum terinstal)...")
subprocess.run("python3 -m pip install --upgrade demucs", shell=True)

# Cek durasi audio
duration = get_duration(audio_input)
if duration is None:
    raise Exception("Tidak dapat mendapatkan durasi audio.")
print(f"Durasi audio: {duration:.0f} detik.\n")

# === STEP 3: Pisahkan Audio Menggunakan Demucs ===
if duration > 3600:
    print("Audio berdurasi lebih dari 1 jam. Melakukan splitting audio menjadi beberapa chunk...")
    os.makedirs("chunks", exist_ok=True)
    split_cmd = f'ffmpeg -hide_banner -loglevel error -i "{audio_input}" -f segment -segment_time {chunk_duration} -c copy "chunks/{audio_name}_%03d.wav"'
    subprocess.run(split_cmd, shell=True)

    chunk_files = sorted(glob.glob(f"chunks/{audio_name}_*.wav"))
    if not chunk_files:
        raise Exception("Tidak ada chunk yang ditemukan setelah splitting audio.")

    print(f"Ditemukan {len(chunk_files)} chunk. Memproses tiap chunk dengan Demucs...\n")
    vocals_files = []
    for idx, chunk_file in enumerate(chunk_files):
        print(f"Memproses chunk {idx+1}/{len(chunk_files)}: {chunk_file}")
        cmd = ["python3", "-m", "demucs.separate", "--two-stems", "vocals", "--name", demucs_model, chunk_file]
        result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        if result.stdout:
            print(result.stdout.decode())
        if result.stderr:
            print(result.stderr.decode())
        # Output diharapkan berada di: separated/<model>/<basename_chunk>/vocals.wav
        base = os.path.splitext(os.path.basename(chunk_file))[0]
        vocals_path = os.path.abspath(f"separated/{demucs_model}/{base}/vocals.wav")
        if not os.path.exists(vocals_path):
            print(f"Warning: Hasil Demucs untuk {chunk_file} tidak ditemukan di {vocals_path}")
        else:
            vocals_files.append(vocals_path)

    if not vocals_files:
        raise Exception("Tidak ada file vokal yang dihasilkan dari Demucs pada tiap chunk. Coba gunakan model alternatif seperti 'htdemucs'.")

    # Gabungkan file vokal dari tiap chunk menggunakan FFmpeg concat demuxer.
    combined_folder = os.path.abspath(f"separated/{demucs_model}/{audio_name}")
    os.makedirs(combined_folder, exist_ok=True)
    list_file = "chunks_list.txt"
    with open(list_file, "w") as f:
        for file in vocals_files:
            f.write(f"file '{file}'\n")
    combined_vocals = os.path.join(combined_folder, "vocals_combined.wav")
    concat_cmd = f'ffmpeg -hide_banner -loglevel error -f concat -safe 0 -i "{list_file}" -c copy "{combined_vocals}"'
    subprocess.run(concat_cmd, shell=True)
    print("Penggabungan file vokal selesai.\n")
    vocals_final = combined_vocals
else:
    print("Memproses audio penuh dengan Demucs...")
    cmd = ["python3", "-m", "demucs.separate", "--two-stems", "vocals", "--name", demucs_model, audio_input]
    print("Menjalankan perintah:\n", " ".join(cmd))
    result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    print(result.stdout.decode())
    if result.stderr:
        print("Pesan error dari Demucs:\n", result.stderr.decode())
    vocals_final = os.path.abspath(f"separated/{demucs_model}/{audio_name}/vocals.wav")
    print("Proses pemisahan selesai.\n")

# === STEP 4: Splitting Audio (Jika mode = "Splitting") ===
if mode == "Splitting":
    print("Menginstal numpy, librosa, dan soundfile...")
    subprocess.run("python3 -m pip install numpy librosa soundfile --quiet", shell=True)
    os.makedirs(f"dataset/{audio_name}", exist_ok=True)
    print("Melakukan splitting pada hasil vokal (output mono)...\n")
    try:
        import numpy as np
        import librosa
        import soundfile as sf

        if not os.path.exists(vocals_final):
            raise FileNotFoundError(f"File vokal tidak ditemukan: {vocals_final}")
        load_sr = None if output_sr == 0 else output_sr
        # Muat audio sebagai mono dengan sample rate sesuai pilihan
        audio, sr = librosa.load(vocals_final, sr=load_sr, mono=True)

        # === Fungsi Slicing (berdasarkan RMS) ===
        def get_rms(y, frame_length=2048, hop_length=512, pad_mode="constant"):
            padding = (int(frame_length // 2), int(frame_length // 2))
            y = np.pad(y, padding, mode=pad_mode)
            axis = -1
            out_strides = y.strides + (y.strides[axis],)
            x_shape_trimmed = list(y.shape)
            x_shape_trimmed[axis] -= frame_length - 1
            out_shape = tuple(x_shape_trimmed) + (frame_length,)
            xw = np.lib.stride_tricks.as_strided(y, shape=out_shape, strides=out_strides)
            if axis < 0:
                target_axis = axis - 1
            else:
                target_axis = axis + 1
            xw = np.moveaxis(xw, -1, target_axis)
            slices = [slice(None)] * xw.ndim
            slices[axis] = slice(0, None, hop_length)
            x = xw[tuple(slices)]
            power = np.mean(np.abs(x)**2, axis=-2, keepdims=True)
            return np.sqrt(power).squeeze(0)

        class Slicer:
            def __init__(self, sr, threshold=-40., min_length=5000, min_interval=300, hop_size=20, max_sil_kept=5000):
                if not min_length >= min_interval >= hop_size:
                    raise ValueError('min_length >= min_interval >= hop_size harus terpenuhi')
                if not max_sil_kept >= hop_size:
                    raise ValueError('max_sil_kept >= hop_size harus terpenuhi')
                min_interval = sr * min_interval / 1000
                self.threshold = 10 ** (threshold/20.)
                self.hop_size = round(sr * hop_size / 1000)
                self.win_size = min(round(min_interval), 4 * self.hop_size)
                self.min_length = round(sr * min_length / 1000 / self.hop_size)
                self.min_interval = round(min_interval / self.hop_size)
                self.max_sil_kept = round(sr * max_sil_kept / 1000 / self.hop_size)

            def _apply_slice(self, waveform, begin, end):
                return waveform[begin*self.hop_size: min(len(waveform), end*self.hop_size)]

            def slice(self, waveform):
                if len(waveform) <= self.min_length:
                    return [waveform]
                rms_list = get_rms(waveform, frame_length=self.win_size, hop_length=self.hop_size)
                sil_tags = []
                silence_start = None
                clip_start = 0
                for i, rms in enumerate(rms_list):
                    if rms < self.threshold:
                        if silence_start is None:
                            silence_start = i
                        continue
                    if silence_start is None:
                        continue
                    is_leading_silence = silence_start == 0 and i > self.max_sil_kept
                    need_slice_middle = i - silence_start >= self.min_interval and i - clip_start >= self.min_length
                    if not is_leading_silence and not need_slice_middle:
                        silence_start = None
                        continue
                    if i - silence_start <= self.max_sil_kept:
                        pos = rms_list[silence_start: i+1].argmin() + silence_start
                        sil_tags.append((0, pos) if silence_start == 0 else (pos, pos))
                        clip_start = pos
                    elif i - silence_start <= self.max_sil_kept * 2:
                        pos = rms_list[i-self.max_sil_kept: silence_start+self.max_sil_kept+1].argmin() + i-self.max_sil_kept
                        pos_l = rms_list[silence_start: silence_start+self.max_sil_kept+1].argmin() + silence_start
                        pos_r = rms_list[i-self.max_sil_kept: i+1].argmin() + i-self.max_sil_kept
                        sil_tags.append((0, pos_r) if silence_start == 0 else (min(pos_l, pos), max(pos_r, pos)))
                        clip_start = pos_r
                    else:
                        pos_l = rms_list[silence_start: silence_start+self.max_sil_kept+1].argmin() + silence_start
                        pos_r = rms_list[i-self.max_sil_kept: i+1].argmin() + i-self.max_sil_kept
                        sil_tags.append((0, pos_r) if silence_start == 0 else (pos_l, pos_r))
                        clip_start = pos_r
                    silence_start = None
                total_frames = len(rms_list)
                if silence_start is not None and total_frames - silence_start >= self.min_interval:
                    silence_end = min(total_frames, silence_start+self.max_sil_kept)
                    pos = rms_list[silence_start: silence_end+1].argmin() + silence_start
                    sil_tags.append((pos, total_frames+1))
                if len(sil_tags) == 0:
                    return [waveform]
                chunks = []
                if sil_tags[0][0] > 0:
                    chunks.append(self._apply_slice(waveform, 0, sil_tags[0][0]))
                for i in range(len(sil_tags)-1):
                    chunks.append(self._apply_slice(waveform, sil_tags[i][1], sil_tags[i+1][0]))
                if sil_tags[-1][1] < total_frames:
                    chunks.append(self._apply_slice(waveform, sil_tags[-1][1], total_frames))
                return chunks

        slicer = Slicer(sr=sr, threshold=-40, min_length=5000, min_interval=500, hop_size=10, max_sil_kept=500)
        chunks = slicer.slice(audio)
        for i, chunk in enumerate(chunks):
            sf.write(f"dataset/{audio_name}/split_{i}.wav", chunk, sr)
        print("Splitting audio selesai.\n")
    except Exception as e:
        print("Terjadi kesalahan saat splitting:", e)
        raise e

# === STEP 5: Zip dan Copy Hasil ke Google Drive ===
print("Mengemas hasil ke file ZIP dengan struktur nested dan menyalinnya ke Google Drive...")
# Buat folder sementara untuk mengemas hasil akhir dengan struktur nested.
final_output_folder = f"final_output/{audio_name}"
os.makedirs(final_output_folder, exist_ok=True)
# Salin folder hasil Demucs (misalnya, folder separated/<model>/<audio_name>) ke final_output dengan nama "demucs"
demucs_dest = os.path.join(final_output_folder, "demucs")
os.makedirs(demucs_dest, exist_ok=True)
os.system(f'cp -r "{os.path.abspath("separated/" + demucs_model + "/" + audio_name)}" "{demucs_dest}"')
# Jika mode Splitting, salin folder dataset (hasil slicing) ke final_output dengan nama "dataset"
if mode == "Splitting":
    dataset_dest = os.path.join(final_output_folder, "dataset")
    os.makedirs(dataset_dest, exist_ok=True)
    os.system(f'cp -r "{os.path.abspath("dataset/" + audio_name)}" "{dataset_dest}"')

# Buat file ZIP dari folder final_output, sehingga di dalam file ZIP terdapat folder {audio_name} beserta isinya.
zip_filename = f"{audio_name}_results.zip"
zip_cmd = f'zip -r "{zip_filename}" "final_output/{audio_name}"'
subprocess.run(zip_cmd, shell=True)
print("Pengemasan file ZIP selesai.\n")

# Pindahkan file ZIP ke Google Drive
os.system(f'mkdir -p "/content/drive/MyDrive/audio/{audio_name}"')
os.system(f'cp "{zip_filename}" "/content/drive/MyDrive/audio/{audio_name}/"')
print("\nProses selesai!")
