In [None]:
import pandas as pd

In [None]:
!pip install pydub --quiet
!apt-get install ffmpeg -y -qq

In [None]:
import os
import random
from pydub import AudioSegment, effects
import numpy as np
from pathlib import Path

In [None]:
noise_path = "/content/hospital_noise.wav"
noise = AudioSegment.from_file(noise_path)

In [None]:
def add_background_noise(audio: AudioSegment, noise: AudioSegment, snr_db=5) -> AudioSegment:

    noise_loop = noise * (len(audio) // len(noise) + 1)
    noise_loop = noise_loop[:len(audio)]

    audio_rms = audio.rms
    noise_rms = noise_loop.rms

    desired_noise_rms = audio_rms / (10 ** (snr_db / 20))
    gain_db = 20 * np.log10(desired_noise_rms / noise_rms) if noise_rms > 0 else -120

    noise_adjusted = noise_loop + gain_db
    return audio.overlay(noise_adjusted)

In [None]:
dirs = {
    "train": "/content/drive/MyDrive/PiotrWysocki/train",
    "dev": "/content/drive/MyDrive/PiotrWysocki/dev",
    "test": "/content/drive/MyDrive/PiotrWysocki/test",
}


In [None]:
for split, dir_path in dirs.items():
    input_dir = Path(dir_path)
    output_dir = Path(str(input_dir) + "_noisy")
    output_dir.mkdir(exist_ok=True)

    for file in input_dir.glob("*.wav"):
        audio = AudioSegment.from_file(file)

        if random.random() < 0.25:
            if random.random() < 0.2:
                noisy_audio = add_background_noise(audio, noise, snr_db=0)
            else:
                noisy_audio = add_background_noise(audio, noise, snr_db=5)
            noisy_audio.export(output_dir / file.name, format="wav")
        else:
            audio.export(output_dir / file.name, format="wav")