In [4]:
from pathlib import Path

# absolute root of your repo
PROJECT_ROOT = Path(r"C:\FYP\PROJECT")

# audio_preprocessing module root
AUDIO_PREPROC_ROOT = PROJECT_ROOT / "product" / "audio_preprocessing"

# dataset locations
AUDIO_DIR = AUDIO_PREPROC_ROOT / "data" / "ESC-50" / "audio"

# output locations
AUG_AUDIO_OUT = AUDIO_PREPROC_ROOT / "outputs" / "augmented_audio"
AUG_SPEC_OUT  = AUDIO_PREPROC_ROOT / "outputs" / "augmented_spectrograms"

AUG_AUDIO_OUT.mkdir(parents=True, exist_ok=True)
AUG_SPEC_OUT.mkdir(parents=True, exist_ok=True)

print("AUDIO_DIR      =", AUDIO_DIR)
print("AUG_AUDIO_OUT  =", AUG_AUDIO_OUT)
print("AUG_SPEC_OUT   =", AUG_SPEC_OUT)
print("AUDIO_DIR exists? ", AUDIO_DIR.exists())


AUDIO_DIR      = C:\FYP\PROJECT\product\audio_preprocessing\data\ESC-50\audio
AUG_AUDIO_OUT  = C:\FYP\PROJECT\product\audio_preprocessing\outputs\augmented_audio
AUG_SPEC_OUT   = C:\FYP\PROJECT\product\audio_preprocessing\outputs\augmented_spectrograms
AUDIO_DIR exists?  True


In [5]:
import librosa
import soundfile as sf  # to write .wav files

SAMPLE_FILE = "1-137-A-32.wav"  # <-- use something that definitely exists

wav_path = AUDIO_DIR / SAMPLE_FILE
y, sr = librosa.load(wav_path, sr=None)  # y = waveform (float32), sr = sample rate

print("Loaded:", wav_path)
print("Waveform shape:", y.shape)
print("Sample rate:", sr)


Loaded: C:\FYP\PROJECT\product\audio_preprocessing\data\ESC-50\audio\1-137-A-32.wav
Waveform shape: (220500,)
Sample rate: 44100


In [6]:
import numpy as np
import librosa

def augment_add_noise(y, noise_factor=0.02):
    # white noise scaled to noise_factor * signal std
    noise = np.random.randn(len(y))
    augmented = y + noise_factor * np.std(y) * noise
    return augmented.astype(np.float32)

def augment_time_stretch(y, rate=0.9):
    # rate < 1.0 = slower (longer clip), rate > 1.0 = faster (shorter clip)
    stretched = librosa.effects.time_stretch(y, rate=rate)
    return stretched.astype(np.float32)

def augment_pitch_shift(y, sr, n_steps=2):
    # n_steps = +2 means shift pitch up 2 semitones
    shifted = librosa.effects.pitch_shift(y, sr=sr, n_steps=n_steps)
    return shifted.astype(np.float32)

def augment_gain(y, gain_db=6.0):
    # change volume in decibels
    factor = 10 ** (gain_db / 20.0)
    louder = y * factor
    # clip safely so we don't blow up
    louder = np.clip(louder, -1.0, 1.0)
    return louder.astype(np.float32)


In [7]:

base_name = SAMPLE_FILE.replace(".wav", "")
variants = {}

variants[f"{base_name}_noise.wav"] = augment_add_noise(y, noise_factor=0.02)
variants[f"{base_name}_stretch0.9.wav"] = augment_time_stretch(y, rate=0.9)
variants[f"{base_name}_pitch+2.wav"] = augment_pitch_shift(y, sr=sr, n_steps=+2)
variants[f"{base_name}_gain+6db.wav"] = augment_gain(y, gain_db=6.0)

for new_name, new_waveform in variants.items():
    out_path = AUG_AUDIO_OUT / new_name
    # IMPORTANT: when time-stretch happens, length changes; that's fine.
    sf.write(out_path, new_waveform, sr)
    print("Wrote:", out_path)


Wrote: C:\FYP\PROJECT\product\audio_preprocessing\outputs\augmented_audio\1-137-A-32_noise.wav
Wrote: C:\FYP\PROJECT\product\audio_preprocessing\outputs\augmented_audio\1-137-A-32_stretch0.9.wav
Wrote: C:\FYP\PROJECT\product\audio_preprocessing\outputs\augmented_audio\1-137-A-32_pitch+2.wav
Wrote: C:\FYP\PROJECT\product\audio_preprocessing\outputs\augmented_audio\1-137-A-32_gain+6db.wav


In [8]:
import matplotlib.pyplot as plt
import librosa.display

def audio_to_mel_db(y, sr, n_fft=2048, hop_length=512, n_mels=128):
    S = librosa.feature.melspectrogram(
        y=y,
        sr=sr,
        n_fft=n_fft,
        hop_length=hop_length,
        n_mels=n_mels
    )
    S_db = librosa.power_to_db(S, ref=np.max)
    return S_db

for new_name, new_waveform in variants.items():
    # make mel
    S_db = audio_to_mel_db(new_waveform, sr)

    # plot mel
    plt.figure(figsize=(10, 3))
    librosa.display.specshow(
        S_db,
        sr=sr,
        hop_length=512,
        x_axis='time',
        y_axis='mel',
        cmap='magma'
    )
    plt.colorbar(format='%+2.0f dB')
    plt.title(f"Mel spectrogram: {new_name}")
    plt.tight_layout()

    # save PNG next to other augmented spectrograms
    png_name = new_name.replace(".wav", ".png")
    out_path = AUG_SPEC_OUT / png_name
    plt.savefig(out_path, dpi=150, bbox_inches="tight")
    plt.close()

    print("Saved augmented spectrogram →", out_path)


Saved augmented spectrogram → C:\FYP\PROJECT\product\audio_preprocessing\outputs\augmented_spectrograms\1-137-A-32_noise.png
Saved augmented spectrogram → C:\FYP\PROJECT\product\audio_preprocessing\outputs\augmented_spectrograms\1-137-A-32_stretch0.9.png
Saved augmented spectrogram → C:\FYP\PROJECT\product\audio_preprocessing\outputs\augmented_spectrograms\1-137-A-32_pitch+2.png
Saved augmented spectrogram → C:\FYP\PROJECT\product\audio_preprocessing\outputs\augmented_spectrograms\1-137-A-32_gain+6db.png
