In [11]:
import torch
import torchaudio
import numpy as np
import pandas as pd
import librosa
from IPython.display import display, Audio
import math
import random
import os
from scipy.signal import convolve

SAMPLE_RATE = 16000

In [3]:
def get_seconds(audio):

    duration = (int)(audio.shape[0]/SAMPLE_RATE)
    audio_list = []
    
    for i in range(0, duration*SAMPLE_RATE, SAMPLE_RATE):
        audio_list.append(audio[i:i+SAMPLE_RATE])
    return audio_list

In [5]:
def round_up_audio(audio):
    
    rem = audio.shape[0]%SAMPLE_RATE
    zero_len = SAMPLE_RATE-rem
    added_arr = np.zeros(zero_len, audio.dtype)
    ext_audio = np.concatenate((audio, added_arr), axis=None)

    return ext_audio

In [27]:
def add_echo_from_file(filename, audio):

    rir_wav,sr = librosa.load(filename, sr=SAMPLE_RATE)
    echo_audio = convolve(audio, rir_wav, mode='full')

    return echo_audio[0:SAMPLE_RATE]

In [7]:
def get_noise_from_sound(signal,noise,SNR):
    
    RMS_s=math.sqrt(np.mean(signal**2))
    #required RMS of noise
    RMS_n=math.sqrt(RMS_s**2/(pow(10,SNR/10)))
    
    #current RMS of noise
    RMS_n_current=math.sqrt(np.mean(noise**2))
    noise=noise*(RMS_n/RMS_n_current)
    
    return noise

In [33]:
def add_noise(audio, noise):

    SNR_list = [i for i in range(0,10)]
    SNR_choice = random.choice(SNR_list)
    
    noise = get_noise_from_sound(audio, noise, SNR_choice)
    noisy_audio = audio + noise
    return noisy_audio, noise

In [37]:
mylist = [0,1,3,4]
mylist[:-1]

[0, 1, 3]

In [None]:
audio_sample = '../../LibriVox_Kaggle/achtgesichterambiwasse/achtgesichterambiwasse_0003.wav'
bg_dir = '../../LibriVox_Kaggle/BGnoise/'
rir_dir = '../../RIR/MIT_IR_Survey/Audio/'

bg_files = os.listdir(bg_dir)
rir_files = os.listdir(rir_dir)


def get_random_audio_sec(audio_filename):
    
    # Choosing a random background and echo filename
    bg_file = bg_dir + random.choice(bg_files)
    rir_file = rir_dir + random.choice(rir_files)

    # Extracting audio data
    wav, sr = librosa.load(audio_filename, sr=SAMPLE_RATE)
    bg_wav,sr =librosa.load(bg_file, sr=SAMPLE_RATE)

    # Randomising and normalising audio data
    wav = round_up_audio(wav)
    wav /= np.max(np.abs(wav), axis=0)
    bg_wav /= np.max(np.abs(bg_wav), axis=0)

    # Getting a random audio and bg second
    wav_list = get_seconds(wav)[:-1]
    rand_audio_sec = random.choice(wav_list)
    bg_duration = (int)(bg_wav.shape[0]/SAMPLE_RATE)
    bg_random_sec = random.choice([i for i in range(0, bg_duration-1)])
    bg_random_wav = bg_wav[bg_random_sec*SAMPLE_RATE:(bg_random_sec+1)*SAMPLE_RATE]
    
    # Adding echo and bg noise to the audio
    echo_audio = add_echo_from_file(rir_file, wav)
    noisy_audio, noise = add_noise(echo_audio, bg_wav)

    return noisy_audio, noise



In [15]:

wav, sr = librosa.load(audio_sample, sr=16000)
wav_bg, sr = librosa.load(bg, sr=16000)

wav = round_up_audio(wav)
wav /= np.max(np.abs(wav), axis=0)

wav_list = get_seconds(wav)
#duration = (int)(wav.shape[0]/sr)

# Creating a list of seconds in bg noise to randomly choose bg noise samples
duration_noise = (int)(wav_bg.shape[0]/sr)
bg_noise_dur = [i for i in range(0, duration_noise-1)]

for i in wav_list:
    
    random_rir = rir_dir + random.choice(rir_files)
    i = add_echo_from_file(random_rir, i)
    
    # Randomly choosing a bgnoise sec to be added to the audio
    bg_noise_sec = random.choice(bg_noise_dur)
    bg_noise = wav_bg[bg_noise_sec*SAMPLE_RATE:(bg_noise_sec+1)*SAMPLE_RATE]
    
    noisy_audio, noise = add_noise(i, bg_noise)
    print(noisy_audio.mean(), noise.mean())


-0.00450417 -0.004469852
-7.438141e-05 -8.6161075e-05
-0.009612338 -0.009285496
-4.8047305e-06 -2.1111966e-07
9.556692e-07 -4.130846e-06
-0.00023141886 -0.0002722185
0.00012823778 4.981515e-05
-4.637119e-05 0.000121579345
-7.31562e-05 -0.00021753622
-0.0020477665 -0.0020459678


In [35]:
echo_wav, noise = add_noise(add_echo_from_file(random_rir, wav), bg_noise)

display(Audio(echo_wav, rate=SAMPLE_RATE))