In [1]:
import os
import random
import librosa
import librosa.display
import numpy as np
import noisereduce as nr
import soundfile as sf
from sklearn.model_selection import train_test_split

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Konfigurasi
DATASET_PATH = "dataset"
OUTPUT_PATH = "dataaug2"
COMMANDS = ["baca", "berhenti", "foto", "halo", "info", "kembali", "ulang"]
SAMPLE_RATE = 16000
AUG_FUNCTIONS = ['noise', 'stretch', 'pitch', 'dyn_change', 'speednpitch']

In [3]:
# Preprocessing functions
def load_audio(file_path, sr=SAMPLE_RATE):
    audio, _ = librosa.load(file_path, sr=sr)
    return audio

def reduce_noise(audio):
    return nr.reduce_noise(y=audio, sr=SAMPLE_RATE)

def preprocess_audio(file_path):
    audio = load_audio(file_path)
    return reduce_noise(audio)

In [4]:
# Augmentasi functions
def noise(data):
    noise_amp = 0.05 * np.random.uniform() * np.amax(data)
    return data.astype('float64') + noise_amp * np.random.normal(size=data.shape[0])

def stretch(data):
    rate = np.random.uniform(0.8, 1.2)  
    return librosa.effects.time_stretch(data, rate=rate)

def pitch(data):
    pitch_pm = 2
    pitch_change = np.random.randint(-pitch_pm, pitch_pm)
    return librosa.effects.pitch_shift(y=data.astype('float64'), 
                                       sr=SAMPLE_RATE, 
                                       n_steps=pitch_change)

def dyn_change(data):
    dyn = np.random.uniform(low=0.3, high=2.0)
    return data * dyn

def speednpitch(data):
    length_change = np.random.uniform(low=0.8, high=1)
    speed_fac = 1.2 / length_change
    tmp = np.interp(np.arange(0, len(data), speed_fac), np.arange(0, len(data)), data)
    minlen = min(len(data), len(tmp))
    data_out = np.zeros_like(data)
    data_out[0:minlen] = tmp[0:minlen]
    return data_out

In [5]:
AUGMENTATION_MAP = {
    'noise': noise,
    'stretch': stretch,
    'pitch': pitch,
    'dyn_change': dyn_change,
    'speednpitch': speednpitch
}


In [6]:
# Membuat folder
def ensure_dir(path):
    if not os.path.exists(path):
        os.makedirs(path)

In [7]:
# Proses utama
def process_dataset():
    for label in COMMANDS:
        label_path = os.path.join(DATASET_PATH, label)
        files = [f for f in os.listdir(label_path) if f.endswith(".wav")]

        train_files, val_files = train_test_split(files, test_size=0.3, random_state=42)

        # === Simpan data validasi ===
        for f in val_files:
            file_path = os.path.join(label_path, f)
            audio = preprocess_audio(file_path)
            out_dir = os.path.join(OUTPUT_PATH, "validation", label)
            ensure_dir(out_dir)
            sf.write(os.path.join(out_dir, f), audio, SAMPLE_RATE)

        # === Proses dan augmentasi data training ===
        for f in train_files:
            file_path = os.path.join(label_path, f)
            audio = preprocess_audio(file_path)

            # Pilih 3 fungsi augmentasi secara acak dan berbeda
            selected_augs = random.sample(AUG_FUNCTIONS, 3)

            for i, aug_name in enumerate(selected_augs):
                aug_func = AUGMENTATION_MAP[aug_name]
                try:
                    augmented = aug_func(audio)
                    # Normalisasi hasil augmentasi
                    augmented = augmented / np.max(np.abs(augmented))

                    out_dir = os.path.join(OUTPUT_PATH, "train", label)
                    ensure_dir(out_dir)
                    base_name = os.path.splitext(f)[0]
                    out_file = f"{base_name}_aug{i+1}_{aug_name}.wav"
                    sf.write(os.path.join(out_dir, out_file), augmented, SAMPLE_RATE)
                except Exception as e:
                    print(f"Error augmenting {f} with {aug_name}: {e}")

if __name__ == "__main__":
    process_dataset()
