## How do we make our *limited* audio data more diverse?

In [None]:
import os
import librosa
import soundfile as sf
from audiomentations import Compose, AddGaussianNoise, TimeStretch, PitchShift, Shift
from pathlib import Path
import random

### Configuration of Augmentations

In [None]:

# Directory where your original voice recordings (e.g., "yes.wav", "no.wav") are stored
INPUT_DIR = "/workspace/data/raw_audio"

# Directory where the augmented audio files will be saved
OUTPUT_DIR = "/workspace/data/augmented_audio"

# All audio files will be resampled to this rate for consistency
SAMPLE_RATE = 16000

# Number of augmented copies to generate *per original file*
NUM_AUGMENTATIONS = 3

# Parameters for each type of augmentation — adjust to control strength and variability
AUGMENTATION_PARAMS = {
    # Add Gaussian noise in this amplitude range
    "noise_amplitude": (0.001, 0.015),

    # Stretch/compress audio within this range without affecting pitch
    "time_stretch_rate": (0.9, 1.1),

    # Shift pitch up or down by up to these semitones (positive = higher, negative = lower)
    "pitch_shift_semitones": (-2, 2),

    # Shift waveform in time, as a fraction of total length (e.g., 0.1 = 10%)
    "shift_fraction": (-0.2, 0.2)
}

# Fixed seed makes augmentation reproducible — change to get new random variations
RANDOM_SEED = 42

### Setup Augmentation Pipeline

In [None]:
# Ensure consistent random behavior
random.seed(RANDOM_SEED)

# Create an augmenter composed of multiple audio transformations
augmenter = Compose([
    AddGaussianNoise(min_amplitude=AUGMENTATION_PARAMS["noise_amplitude"][0],
                     max_amplitude=AUGMENTATION_PARAMS["noise_amplitude"][1], p=0.5),

    TimeStretch(min_rate=AUGMENTATION_PARAMS["time_stretch_rate"][0],
                max_rate=AUGMENTATION_PARAMS["time_stretch_rate"][1], p=0.5),

    PitchShift(min_semitones=AUGMENTATION_PARAMS["pitch_shift_semitones"][0],
               max_semitones=AUGMENTATION_PARAMS["pitch_shift_semitones"][1], p=0.5),

    Shift(min_shift=AUGMENTATION_PARAMS["shift_fraction"][0],
          max_shift=AUGMENTATION_PARAMS["shift_fraction"][1],
          shift_unit="fraction", p=0.5)
])


### Process and Augment a Single File

In [None]:
def augment_and_save(file_path, sample_rate, input_root, output_root):
    relative_path = file_path.relative_to(input_root)       # e.g., red/yes.wav
    class_subdir = relative_path.parent                     # e.g., red
    basename = relative_path.stem                           # e.g., yes

    y, sr = librosa.load(file_path, sr=sample_rate)

    for i in range(NUM_AUGMENTATIONS):
        augmented = augmenter(samples=y, sample_rate=sample_rate)

        # Create full output directory path, preserving class subfolder
        output_subdir = output_root / class_subdir
        output_subdir.mkdir(parents=True, exist_ok=True)

        # Save augmented file
        output_path = output_subdir / f"{basename}_aug{i+1}.wav"
        sf.write(output_path, augmented, sample_rate)
        print(f"Saved: {output_path}")

### Apply Augmentation to All Files In *Input* Folder

In [None]:
input_root = Path(INPUT_DIR)
output_root = Path(OUTPUT_DIR)

input_files = list(input_root.rglob("*.wav"))
print(f"Found {len(input_files)} audio files in {INPUT_DIR}")

for file_path in input_files:
    augment_and_save(file_path, SAMPLE_RATE, input_root, output_root)

print(f"\n✅ Done. Augmented files saved to: {OUTPUT_DIR}")

**You may now listen to the augmented audio files in the GitHub directory `\path_to_GitHub\lakota.aicode.camp\week_two\data\augmented_audio\`**