In [2]:
import os
import random
import librosa
import soundfile as sf
import numpy as np
from pydub import AudioSegment

# ============================================
# Fungsi-fungsi Augmentasi
# ============================================

def add_noise(data, noise_factor=0.005):
    noise = np.random.randn(len(data))
    return data + noise_factor * noise

def change_pitch(data, sr, n_steps):
    return librosa.effects.pitch_shift(data, sr=sr, n_steps=n_steps)

def change_speed(data, speed_factor):
    return librosa.effects.time_stretch(data, rate=speed_factor)

def shift_time(data, shift_max):
    shift = np.random.randint(-shift_max, shift_max)
    return np.roll(data, shift)

def change_volume(audio, gain_db):
    segment = AudioSegment.from_file(audio)
    return segment.apply_gain(gain_db)

# ============================================
# Proses Augmentasi
# ============================================

def augment_audio(input_file, output_file):
    y, sr = librosa.load(input_file, sr=None)

    choice = random.choice(["noise", "pitch", "speed", "shift"])

    if choice == "noise":
        y_aug = add_noise(y)
    elif choice == "pitch":
        n_steps = random.uniform(-2, 2)
        y_aug = change_pitch(y, sr, n_steps)
    elif choice == "speed":
        speed = random.uniform(0.9, 1.1)
        y_aug = change_speed(y, speed)
    elif choice == "shift":
        y_aug = shift_time(y, shift_max=2000)

    sf.write(output_file, y_aug, sr)


# ============================================
# Fungsi utama augmentasi folder
# ============================================

def process_folder(folder_path, target_count=100):
    wav_files = [f for f in os.listdir(folder_path) if f.endswith(".wav")]

    current_count = len(wav_files)

    print(f"\nFolder: {folder_path}")
    print(f"Data asli: {current_count}")

    # Jika sudah cukup
    if current_count >= target_count:
        print("Jumlah file sudah mencapai 100, tidak perlu augmentasi.")
        return

    augment_needed = target_count - current_count
    print(f"Perlu menambah: {augment_needed} file")

    for i in range(augment_needed):
        src = os.path.join(folder_path, random.choice(wav_files))
        dst = os.path.join(folder_path, f"aug_{i}.wav")
        augment_audio(src, dst)

    print(f"Augmentasi selesai → total sekarang = {target_count} file")


# ============================================
# Jalankan untuk semua dataset
# ============================================

datasets = ["Suarayotan", "Suaraasep"]
subfolders = ["buka", "tutup"]

for dataset in datasets:
    for sub in subfolders:
        folder = os.path.join(dataset, sub)
        process_folder(folder, target_count=100)



Folder: Suarayotan/buka
Data asli: 100
Jumlah file sudah mencapai 100, tidak perlu augmentasi.

Folder: Suarayotan/tutup
Data asli: 100
Jumlah file sudah mencapai 100, tidak perlu augmentasi.

Folder: Suaraasep/buka
Data asli: 20
Perlu menambah: 80 file
Augmentasi selesai → total sekarang = 100 file

Folder: Suaraasep/tutup
Data asli: 20
Perlu menambah: 80 file
Augmentasi selesai → total sekarang = 100 file


In [1]:
import os
import shutil
import random

def split_dataset(input_dir, output_dir, val_ratio=0.2):
    """
    input_dir  : folder sumber dataset (berisi buka/ dan tutup/)
    output_dir : folder tujuan dataset split
    val_ratio  : persentase untuk validation (0.2 = 20%)
    """
    
    categories = ["buka", "tutup"]

    for category in categories:
        input_path = os.path.join(input_dir, category)
        train_path = os.path.join(output_dir, "train", category)
        val_path = os.path.join(output_dir, "val", category)

        os.makedirs(train_path, exist_ok=True)
        os.makedirs(val_path, exist_ok=True)

        files = os.listdir(input_path)
        files = [f for f in files if f.endswith(".wav")]

        random.shuffle(files)

        val_count = int(len(files) * val_ratio)

        val_files = files[:val_count]
        train_files = files[val_count:]

        # Pindahkan file val
        for f in val_files:
            shutil.copy(os.path.join(input_path, f), os.path.join(val_path, f))

        # Pindahkan file train
        for f in train_files:
            shutil.copy(os.path.join(input_path, f), os.path.join(train_path, f))

        print(f"[OK] {category}: Train = {len(train_files)}, Val = {len(val_files)}")



# --------------------------- #
#      PANGGIL FUNGSINYA      #
# --------------------------- #

input_folder = "Gabungan_Suara"          # folder awal
output_folder = "dataset"   # folder hasil split
val_ratio = 0.2                            # misal 20% untuk validation

split_dataset(input_folder, output_folder, val_ratio)


[OK] buka: Train = 160, Val = 40
[OK] tutup: Train = 160, Val = 40
