In [11]:
import matplotlib.pyplot as plt
import librosa
import numpy as np
import scipy
import os
from scipy.io import wavfile
import sox

Create directories for the original data and the augmented data. The original data directory should have subdirectories for the different datasets. Each subdirectory contains wav files. The augmented data directory can be empty and will be populated as this notebook runs.

In [12]:
orig_dir = './original_data/'
aug_dir = './augmented_data/'

In [7]:
def make_dir(dir_path):
    if not os.path.isdir(dir_path):
        os.mkdir(dir_path)
        
def augment_and_save(feature_name, augment_function, signal, sample_rate):
    new_dir = feature_name
    make_dir(aug_dir + dataset_name + '/' + new_dir)
    new_file_path = aug_dir + dataset_name + '/' + new_dir + '/' + filename
    if not os.path.isfile(new_file_path):
        wavfile.write(new_file_path, sample_rate, augment_function(signal, sample_rate))

In [8]:
def augment_pitch(signal, sample_rate):
    PITCH_FACTOR = round(np.random.uniform(low=.9,high=1.1),2)
    print("Pitch Modulation Factor: ", PITCH_FACTOR)
    pitch_modulated_signal = librosa.effects.pitch_shift(signal, sample_rate, PITCH_FACTOR)
    return pitch_modulated_signal

def augment_noise(signal, sample_rate):
    NOISE_FACTOR = round(np.random.uniform(low=.001,high=.02),3)
    print("Noise Modulation Factor: ",NOISE_FACTOR)
    noise = np.random.randn(len(signal))
    noise_modulated_signal = signal + NOISE_FACTOR * noise
    noise_modulated_signal = noise_modulated_signal.astype(type(signal[0]))
    return noise_modulated_signal

def augment_speed(signal, sample_rate):
    SPEED_FACTOR = round(np.random.uniform(low = 0.9, high = 1.1),2)
    print("Speed Modulation Factor: ", SPEED_FACTOR)
    speed_modulated_signal = librosa.effects.time_stretch(signal,SPEED_FACTOR)
    return speed_modulated_signal

def augment_tempo_and_save(filepath):
    new_file_path = aug_dir + dataset_name + '/' + new_dir + '/' + filename
    if not os.path.isfile(new_file_path):
        TEMPO_FACTOR = round(np.random.uniform(low = 0.9, high = 1.1),2)
        print("Tempo Modulation Factor: ", TEMPO_FACTOR)
        tempoTransformer = sox.Transformer()
        tempoTransformer.tempo(TEMPO_FACTOR)
        new_dir = 'tempo'
        make_dir(aug_dir + dataset_name + '/' + new_dir)
        tempoTransformer.build(filepath, new_file_path)

In [9]:
def augment_data(dataset_name, filename):
    filepath = orig_dir + dataset_name + '/' + filename
    signal, sample_rate = librosa.load(filepath,sr=384000)
    
    # Add augmentations here
    augment_and_save('pitch', augment_pitch, signal, sample_rate)
    augment_and_save('noise', augment_noise, signal, sample_rate)
    augment_and_save('speed', augment_speed, signal, sample_rate)
    #augment_tempo_and_save(filepath)

In [10]:
for subdir in [x[0] for x in os.walk(orig_dir)][1:]:
    dataset_name = subdir.split('/')[-1]
    make_dir(aug_dir + dataset_name)
        
    for filename in os.listdir(subdir):
        if filename.endswith(".wav"):
            print(subdir + filename)
            augment_data(dataset_name, filename)
            print()

./original_data/xenocanto0-Hzui4Qpl4_40.000 - 0s_5s.wav

./original_data/xenocanto0-Hzui4Qpl4_40.000 - 5s_10s.wav

./original_data/xenocanto0-N62G9etNE_30.000 - 0s_5s.wav

./original_data/xenocanto0-N62G9etNE_30.000 - 5s_10s.wav

./original_data/xenocanto0-yskgO46Bg_30.000 - 0s_5s.wav

./original_data/xenocanto0-yskgO46Bg_30.000 - 5s_10s.wav

./original_data/xenocanto00cREXaIlnQ_30.000 - 0s_5s.wav

./original_data/xenocanto00cREXaIlnQ_30.000 - 5s_10s.wav

./original_data/xenocanto00G2vNrTnCc_10.000 - 0s_5s.wav
Pitch Modulation Factor:  1.05
Noise Modulation Factor:  0.018
Speed Modulation Factor:  1.0

./original_data/xenocanto00G2vNrTnCc_10.000 - 5s_10s.wav
Pitch Modulation Factor:  0.91
Noise Modulation Factor:  0.003
Speed Modulation Factor:  1.01

./original_data/xenocanto00KM53yZi2A_30.000 - 0s_5s.wav


KeyboardInterrupt: 