In [2]:
import keras
import librosa
import librosa.display
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
import random
import warnings
warnings.filterwarnings('ignore')

Using TensorFlow backend.
Import requested from: 'numba.decorators', please update to use 'numba.core.decorators' or pin to Numba version 0.48.0. This alias will not be present in Numba version 0.50.0.[0m
  from numba.decorators import jit as optional_jit
Import of 'jit' requested from: 'numba.decorators', please update to use 'numba.core.decorators' or pin to Numba version 0.48.0. This alias will not be present in Numba version 0.50.0.[0m
  from numba.decorators import jit as optional_jit


In [3]:
np.random.seed(1337)
random.seed(1337)

In [4]:
meta_data = pd.read_csv('./ESC-50-master/meta/esc50.csv')

In [5]:
print(meta_data.shape)
meta_data.head()

(2000, 7)


Unnamed: 0,filename,fold,target,category,esc10,src_file,take
0,1-100032-A-0.wav,1,0,dog,True,100032,A
1,1-100038-A-14.wav,1,14,chirping_birds,False,100038,A
2,1-100210-A-36.wav,1,36,vacuum_cleaner,False,100210,A
3,1-100210-B-36.wav,1,36,vacuum_cleaner,False,100210,B
4,1-101296-A-19.wav,1,19,thunderstorm,False,101296,A


In [6]:
filenames = meta_data['filename']
labels = meta_data['target']

In [7]:
filenames

0        1-100032-A-0.wav
1       1-100038-A-14.wav
2       1-100210-A-36.wav
3       1-100210-B-36.wav
4       1-101296-A-19.wav
              ...        
1995     5-263831-B-6.wav
1996    5-263902-A-36.wav
1997     5-51149-A-25.wav
1998      5-61635-A-8.wav
1999       5-9032-A-0.wav
Name: filename, Length: 2000, dtype: object

In [8]:
labels

0        0
1       14
2       36
3       36
4       19
        ..
1995     6
1996    36
1997    25
1998     8
1999     0
Name: target, Length: 2000, dtype: int64

In [9]:
x_train_val, x_test, y_train_val, y_test = train_test_split(filenames, labels, random_state=1337, stratify=labels, test_size=.15)

In [10]:
x_train, x_val, y_train, y_val = train_test_split(x_train_val, y_train_val, random_state=1337, stratify=y_train_val, test_size=0.117647058823529)

In [11]:
x_train = np.array(x_train)
y_train = np.array(y_train)

In [17]:
rates = [0.93, 1.07]

for index, file in enumerate(x_train):
    y, sr = librosa.load('./ESC-50-master/audio/' + file)
    for rate in rates:
        y_changed = librosa.effects.time_stretch(y, rate=rate)
        librosa.output.write_wav('./ESC-50-master/augmented_audio/' \
                                 + str(y_train[index]).zfill(2) + '-speed-' + str(int(rate*100)) + '-' + file, y_changed, sr)

In [18]:
n_steps = [-2, -1, 1.1, 2.1]

for index, file in enumerate(x_train):
    y, sr = librosa.load('./ESC-50-master/audio/' + file)
    for n_step in n_steps:
        y_changed = librosa.effects.pitch_shift(y, sr, n_steps=n_step)
        librosa.output.write_wav('./ESC-50-master/augmented_audio/' \
                                 + str(y_train[index]).zfill(2) + '-pitch-' + str(abs(int(n_step*100))) + '-' + file, y_changed, sr)

In [14]:
noise_factors = [0.01, 0.04]
    
for index, file in enumerate(x_train):
    y, sr = librosa.load('./ESC-50-master/audio/' + file)
    for noise_factor in noise_factors:
        noise = np.random.randn(len(y))
        y_changed = y + noise_factor * noise
        y_changed = y_changed.astype(type(y[0]))
        librosa.output.write_wav('./ESC-50-master/augmented_audio/' \
                                 + str(y_train[index]).zfill(2) + '-noise-' + str(abs(int(noise_factor*100))) + '-' + file, y_changed, sr)