In [162]:
from scipy.io.wavfile import read
import matplotlib.pyplot as plt
import numpy as np
import math
import soundfile as sf
from scipy import signal
import librosa

In [163]:
# Read all the Noise Files

import librosa
noise1,sr1=sf.read(r"Noise Data/adtBabble2.wav")
noise2,sr2=sf.read(r"Noise Data/adtCafe.wav")
noise3,sr3=sf.read(r"Noise Data/Live_Restaurant.wav")

adtBabble2 = librosa.resample(noise1,sr1,16000)
adtCafe = librosa.resample(noise2,sr2,16000)
Live_Restaurant = librosa.resample(noise3[:,0],sr3,16000)

# Divide each noise files into Training and Testing Set (50% each)
mid=len(adtBabble2)//2
train_adtBabble2 = adtBabble2[:mid]
test_adtBabble2 = adtBabble2[mid:]

mid=len(adtCafe)//2
train_adtCafe = adtCafe[:mid]
test_adtCafe = adtCafe[mid:]


mid=len(Live_Restaurant)//2

train_Live_Restaurant = Live_Restaurant[:mid]
test_Live_Restaurant = Live_Restaurant[mid:]

### For each clean speech signal from the training (and development) data set, randomly select an equal length, contiguous segment from the noise training (and development) signal. For each speech and noise signal pair, generate noisy speech at signal-to-noise ratios (SNRs) of: -3, 0, and 3 dB. Meaning, for 1000 training signals (500 from male and 500 from female), 1 noise segment, 3 noise signals, and 3 SNRs per signal pair, there should be 1000 x 1 x 3 x 3 = 9000 training signals. Be sure to keep track of the corresponding clean speech signal for each noisy speech signal. You may want to rename the files and output as wav files to a different directory.

In [164]:
import os

train_male_filenames=os.listdir(r"Speech Data/IEEE/IEEE_male/train_male")
dev_male_filenames=os.listdir(r"Speech Data/IEEE/IEEE_male/development_male")
test_male_filenames=os.listdir(r"Speech Data/IEEE/IEEE_male/test_male")

train_female_filenames=os.listdir(r"Speech Data/IEEE/IEEE_female/train_female")
dev_female_filenames=os.listdir(r"Speech Data/IEEE/IEEE_female/development_female")
test_female_filenames=os.listdir(r"Speech Data/IEEE/IEEE_female/test_female")

print("Male: Train Length: {} , Dev Length {} , Test length {} ".format(len(train_male_filenames),len(dev_male_filenames),len(test_male_filenames)))
print("Female: Train Length: {} , Dev Length {} , Test length {} ".format(len(train_female_filenames),len(dev_female_filenames),len(test_female_filenames)))

Male: Train Length: 500 , Dev Length 100 , Test length 100 
Female: Train Length: 500 , Dev Length 100 , Test length 100 


In [179]:
def snr_calculate(speech_data,noise_data):
    speech_energy=np.sum(np.array(speech_data, dtype='int64')**2)
    noise_energy=np.sum(np.array(noise_data, dtype='int64')**2)
    ratio=speech_energy/noise_energy
    sound_level=10*math.log(ratio,10)
    return sound_level
# Function that creates noisy speech signal by combining noise and clean speech at desired SNR level.
def generate_signal(speech_data,noise_data,dsnr,outputfilename):
    speech_energy=sum(np.array(speech_data)**2)
    noise_energy=np.sum(np.array(noise_data)**2)
    b=np.sqrt((speech_energy/noise_energy)*(10**(-dsnr/10)))
    updated_noise=b*noise_data
#     print("Noise : ",len(updated_noise))
#     print("Speech : ",len(speech_data))
    updated_noisy_signal=updated_noise+speech_data
#     print("Speech : ",len(updated_noisy_signal))
    sf.write(file=outputfilename,data=updated_noisy_signal,samplerate=16000)
    ss,sr = librosa.load(outputfilename,sr=None)
    S = librosa.stft(ss,n_fft=512,hop_length=160,win_length=320)
    return 

### Male

In [188]:
# For Training 
for filename in train_male_filenames:
    destination_file_name="PREPARED_DATASET/TRAIN_MALE/"+str(filename).split('.')[0]
    complete_filename="Speech Data/IEEE/IEEE_male/train_male/"+str(filename)
    speech_signal,sr=sf.read(complete_filename)
    speech_signal = librosa.resample(speech_signal,sr,16000)
    len_speech_signal=len(speech_signal)
    for index1,each_noisy_signal in enumerate([train_adtBabble2,train_adtCafe,train_Live_Restaurant]):
        start=np.random.randint(0,len(each_noisy_signal)-len_speech_signal)
        for index2,each_snr in enumerate([-3,0,3]):
            generate_signal(speech_signal,each_noisy_signal[start:start+len_speech_signal],each_snr,destination_file_name+'__'+str(index1)+"_"+str(index2)+'.wav')


In [150]:
# For Development
for filename in dev_male_filenames:
    destination_file_name="PREPARED_DATASET/DEV_MALE/"+str(filename).split('.')[0]
    complete_filename="Speech Data/IEEE/IEEE_male/development_male/"+str(filename)
    speech_signal,sr=sf.read(complete_filename)
    speech_signal = librosa.resample(speech_signal,sr,16000)
    len_speech_signal=len(speech_signal)
    for index1,each_noisy_signal in enumerate([adtBabble2,adtCafe,Live_Restaurant]):
        start=np.random.randint(0,len(each_noisy_signal)-len_speech_signal)
        for index2,each_snr in enumerate([-3,0,3]):
            generate_signal(speech_signal,each_noisy_signal[start:start+len_speech_signal],each_snr,destination_file_name+'__'+str(index1)+"_"+str(index2)+'.wav')

# For Testing
for filename in test_male_filenames:
    destination_file_name="PREPARED_DATASET/TEST_MALE/"+str(filename).split('.')[0]
    complete_filename="Speech Data/IEEE/IEEE_male/test_male/"+str(filename)
    speech_signal,sr=sf.read(complete_filename)
    speech_signal = librosa.resample(speech_signal,sr,16000)
    len_speech_signal=len(speech_signal)
    for index1,each_noisy_signal in enumerate([adtBabble2,adtCafe,Live_Restaurant]):
        start=np.random.randint(0,len(each_noisy_signal)-len_speech_signal)
        for index2,each_snr in enumerate([-3,0,3]):
            generate_signal(speech_signal,each_noisy_signal[start:start+len_speech_signal],each_snr,destination_file_name+'__'+str(index1)+"_"+str(index2)+'.wav')


## Female

In [151]:
# For Training 

for filename in train_female_filenames:
    destination_file_name="PREPARED_DATASET/TRAIN_FEMALE/"+str(filename).split('.')[0]
    complete_filename="Speech Data/IEEE/IEEE_female/train_female/"+str(filename)
    speech_signal,sr=sf.read(complete_filename)
    speech_signal = librosa.resample(speech_signal,sr,16000)
    len_speech_signal=len(speech_signal)
    for index1,each_noisy_signal in enumerate([adtBabble2,adtCafe,Live_Restaurant]):
        start=np.random.randint(0,len(each_noisy_signal)-len_speech_signal)
        for index2,each_snr in enumerate([-3,0,3]):
            generate_signal(speech_signal,each_noisy_signal[start:start+len_speech_signal],each_snr,destination_file_name+'__'+str(index1)+"_"+str(index2)+'.wav')



In [152]:
# For Development
for filename in dev_female_filenames:
    destination_file_name="PREPARED_DATASET/DEV_FEMALE/"+str(filename).split('.')[0]
    complete_filename="Speech Data/IEEE/IEEE_female/development_female/"+str(filename)
    speech_signal,sr=sf.read(complete_filename)
    speech_signal = librosa.resample(speech_signal,sr,16000)
    len_speech_signal=len(speech_signal)
    for index1,each_noisy_signal in enumerate([adtBabble2,adtCafe,Live_Restaurant]):
        start=np.random.randint(0,len(each_noisy_signal)-len_speech_signal)
        for index2,each_snr in enumerate([-3,0,3]):
            generate_signal(speech_signal,each_noisy_signal[start:start+len_speech_signal],each_snr,destination_file_name+'__'+str(index1)+"_"+str(index2)+'.wav')

# For Testing
for filename in test_female_filenames:
    destination_file_name="PREPARED_DATASET/TEST_FEMALE/"+str(filename).split('.')[0]
    complete_filename="Speech Data/IEEE/IEEE_female/test_female/"+str(filename)
    speech_signal,sr=sf.read(complete_filename)
    speech_signal = librosa.resample(speech_signal,sr,16000)
    len_speech_signal=len(speech_signal)
    for index1,each_noisy_signal in enumerate([adtBabble2,adtCafe,Live_Restaurant]):
        start=np.random.randint(0,len(each_noisy_signal)-len_speech_signal)
        for index2,each_snr in enumerate([-3,0,3]):
            generate_signal(speech_signal,each_noisy_signal[start:start+len_speech_signal],each_snr,destination_file_name+'__'+str(index1)+"_"+str(index2)+'.wav')


In [153]:
# Below function is a simple helper function sourced from https://stackoverflow.com/questions/9727673/list-directory-tree-structure-in-python.
# It is just for the sake of easy walkthrough of my directory and no of files in a particular directory.
def list_files(startpath):
    for root, dirs, files in os.walk(startpath):
        level = root.replace(startpath, '').count(os.sep)
        indent = ' ' * 4 * (level)
        subindent = ' ' * 0 * (level + 1)
        print('{}{}/ --> {}{} Files'.format(indent, os.path.basename(root),subindent, len(files)))
list_files('.')

./ --> 10 Files
    .ipynb_checkpoints/ --> 2 Files
    Noise Data/ --> 3 Files
    PREPARED_DATASET/ --> 0 Files
        .ipynb_checkpoints/ --> 0 Files
        DEV_FEMALE/ --> 900 Files
        DEV_MALE/ --> 900 Files
        TEST_FEMALE/ --> 900 Files
        TEST_MALE/ --> 900 Files
        TRAIN_FEMALE/ --> 4500 Files
        TRAIN_MALE/ --> 4500 Files
    Speech Data/ --> 0 Files
        IEEE/ --> 0 Files
            IEEE_female/ --> 20 Files
                development_female/ --> 100 Files
                test_female/ --> 100 Files
                train_female/ --> 500 Files
            IEEE_male/ --> 20 Files
                development_male/ --> 100 Files
                test_male/ --> 100 Files
                train_male/ --> 500 Files
