# Import Dependencies

In [2]:
import librosa
import librosa.display
import numpy as np
import pandas as pd
import os
import time
from pydub import AudioSegment
from pydub.playback import play

## Global Variables

In [3]:
data_set_folder = 'UrbanSound8K/audio'
temp_folder_path = 'UrbanSound8K/tmp'
output_folder_path = 'UrbanSound8K/sound_data'
black_list_folders = ['fold10'] # reserve for validation set
import_folder_paths = []
target_wav_duration = 4.00
pitch_sounds = [-1, 1]
vary_sounds = [0.93, 1.07]

## Common Functions

In [4]:
def create_directory_if_not_exist(directory):
    if not os.path.exists(directory):
        os.makedirs(directory)

## Sound Augment Functions

In [7]:
def augment_sound(file_path, folder_path):
    folder_name = folder_path.split('/')[-1]
    file_name = file_path.split('/')[-1].split('.')[0]
    out_dir = output_folder_path + '/' + folder_name
    
    create_directory_if_not_exist(out_dir)
    
    sound_file_path = file_path
    y, sr = librosa.load(sound_file_path)
    wav_duration = librosa.get_duration(y=y, sr=sr)
    
    if wav_duration < target_wav_duration:
        sound_file_path = add_slient_to_sound(sound_file_path, wav_duration)    
    y, sr = librosa.load(sound_file_path, duration=target_wav_duration)
    librosa.output.write_wav(out_dir + '/' + file_name + '.wav', y, sr)
    
    for rate in vary_sounds:
        stretched_wav, sr = stretch_wav(file_path, rate)
        stretched_file_name = file_name + '_vary_' + str(rate) + '.wav'
        librosa.output.write_wav(out_dir + '/' + stretched_file_name, stretched_wav, sr)
    
    for pitch_rate in pitch_sounds:
        pitched_wav, sr = shift_wav(file_path, pitch_rate)
        pitched_file_name = file_name + '_pitch_' + str(pitch_rate) + '.wav'
        librosa.output.write_wav(out_dir + '/' + pitched_file_name, pitched_wav, sr)
        
    if sound_file_path.find('tmp') > -1:
        os.remove(sound_file_path)
    
def add_slient_to_sound(file_path, wav_duration):
    slient_duration = ((target_wav_duration - wav_duration) * 1000) / 2
    if slient_duration < 0:
        return file_path
    left_slient_segment = AudioSegment.silent(duration=slient_duration)
    right_slient_segment = AudioSegment.silent(duration=slient_duration)
    sound = AudioSegment.from_wav(file_path)
    file_name = file_path.split('/')[-1]
    final_sound = left_slient_segment + sound + right_slient_segment
    final_sound_path = temp_folder_path + '/' + file_name
    final_sound.export(final_sound_path, format="wav")
    return final_sound_path

def stretch_wav(wav_file, rate):
    y, sr = librosa.load(wav_file)
    stretched_wav = librosa.effects.time_stretch(y, rate=rate)
    return (stretched_wav, sr)

def shift_wav(wav_file, steps):
    y, sr = librosa.load(wav_file)
    shift_wav = librosa.effects.pitch_shift(y, sr, n_steps=steps)
    return (shift_wav, sr)

## Augment Wav Files

In [8]:
global import_folder_paths
for root, dirnames, _ in os.walk(data_set_folder):
    for dir_name in dirnames:
        if dir_name in black_list_folders:
            continue
        wav_folder_path = os.path.join(root, dir_name)
        import_folder_paths.append(wav_folder_path)
print('wav folder path:{}'.format(wav_folder_path))

wav folder path:UrbanSound8K/audio/fold10


In [None]:
imported_counter = 0
for folder_path in import_folder_paths:
    for root, _, files in os.walk(folder_path):
        for file in files:
            file_name, file_extension = os.path.splitext(file)
            if file_extension != '.wav': continue
            if imported_counter % 100 == 0:
                print('Training in progress: {}'.format(imported_counter))
            file_path = os.path.join(root, file)
            augment_sound(file_path, folder_path)
            imported_counter += 1
print('Finish')

Training in progress: 0
Training in progress: 100
Training in progress: 200
Training in progress: 300
Training in progress: 400
Training in progress: 500
Training in progress: 600
Training in progress: 700
Training in progress: 800
Training in progress: 900
Training in progress: 1000
