### pre processing


In [None]:
import numpy as np
import librosa
import librosa.display
import os
import csv
%matplotlib inline
import matplotlib.pyplot as plt
import random



In [None]:
train_dir = './data/train/audio/' #download files from kaggle

classes = ['yes', 'no', 
           'up', 'down', 
           'left', 'right', 
           'on', 'off', 
           'stop', 'go', 
           'silence', 'unknown']

run this cell to move the background noises folder out of the audio directory. We will create silence samples from these files after.

In [None]:
%%bash
mv data/train/audio/_background_noise_ data/train
ls data/train

Split all the audio files from \_background\_noises\_ folder in 1-sec chunks

In [None]:
def split_arr(arr):
    """
    split an array into chunks of length 16000
    Returns:
        list of arrays
    """
    return np.split(arr, np.arange(16000, len(arr), 16000))

In [None]:
def create_silence():
    """
    reads wav files in background noises folder, 
    splits them and saves to silence folder in train_dir
    """
    for file in os.listdir('data/train/_background_noise_/'):
        if 'wav' in file:
            sig, rate = librosa.load('data/train/_background_noise_/' + file, sr = 16000)        
            sig_arr = split_arr(sig)
            if not os.path.exists(train_dir+'silence/'):
                os.makedirs(train_dir+'silence/')
            for ind, arr in enumerate(sig_arr):
                filename = 'frag%d' %ind + '_%s' %file # example: frag0_running_tap.wav
                librosa.output.write_wav(train_dir+'silence/'+filename, arr, 16000)
  

In [None]:
create_silence()

It is probably a good idea to make more silence samples yourself. 
Perhaps just by recording walking or driving around without speaking.
the silence class is underrepresented.

three lists with file names. one for training set, one for validation set, one for all. Plus a dictionary with file counts per class.

In [None]:
folders = os.listdir(train_dir)
# put folders in same order as in the classes list, used when making sets
all_classes = [x for x in classes[:11]]
for ind, cl in enumerate(folders):
    if cl not in classes:
        all_classes.append(cl)
print(all_classes)

In [None]:
with open('./data/train/validation_list.txt') as val_list:
    validation_list = [row[0] for row in csv.reader(val_list)]
assert len(validation_list) == 6798, 'file not loaded'

"""
#if you want to add the files in testing_list.txt to the validation list:

with open('./data/train/testing_list.txt') as test_list:
    testing_list = [row[0] for row in csv.reader(test_list)]
assert len(testing_list) == 6835, 'file not loaded'

#combine into validation set
validation_list.extend(testing_list)
"""
#add silence files to validation_list
for i, file in enumerate(os.listdir(train_dir + 'silence/')):
    if i%10==0:
        validation_list.append('silence/'+file)

training_list = []
all_files_list = []
class_counts = {}

for folder in folders:
    files = os.listdir(train_dir + folder)
    for i, f in enumerate(files):
        all_files_list.append(folder + '/' + f)
        path = folder + '/' + f
        if path not in validation_list:
            training_list.append(folder + '/' + f)        
        class_counts[folder] = i

#remove filenames from validation_list that don't exist anymore (due to eda)
validation_list = list(set(validation_list).intersection(all_files_list))

In [None]:
assert len(validation_list)+len(training_list)==len(all_files_list), 'error'

In [None]:
# check random file name
print(training_list[345], 'size training set: ',len(training_list), 'size validation set: ', len(validation_list))

In [None]:
print(class_counts)

plot a wav file

In [None]:
x, r = librosa.load(train_dir + 'yes/bfdb9801_nohash_0.wav', sr = 16000)
print('min: ',np.min(x), 
      '\nmax: ', np.max(x), 
      '\nmean: ', np.mean(x),
      '\nmedian: ', np.median(x),
      '\nvariance: ', np.var(x),
      '\nlength: ', len(x))
plt.plot(x)

### turn all wav files into spectrograms

In [None]:
def make_spec(file, file_dir = train_dir, flip = False, ps = False, st = 4):
    """
    create a melspectrogram from the amplitude of the sound
    
    Args:
        file (str): filename
        file_dir (str): directory path
        flip (bool): reverse time axis
        ps (bool): pitch shift
        st (int): half-note steps for pitch shift
    Returns:
        np.array with shape (122,85) (time, freq)
    """
    sig, rate = librosa.load(file_dir + file, sr = 16000)
    if len(sig) < 16000: # pad shorter than 1 sec audio with ramp to zero
        sig = np.pad(sig, (0,16000-len(sig)), 'linear_ramp')
    if ps:
        sig = librosa.effects.pitch_shift(sig, rate, st)
    D = librosa.amplitude_to_db(librosa.stft(sig[:16000], n_fft = 512, 
                                             hop_length = 128, 
                                             center = False), ref = np.max)
    S = librosa.feature.melspectrogram(S=D, n_mels = 85).T
    if flip:
        S = np.flipud(S)
    return S.astype(np.float32)

In [None]:
librosa.display.specshow(make_spec('yes/bfdb9801_nohash_0.wav'), 
                         x_axis='mel', 
                         fmax=8000, 
                         y_axis='time', 
                         sr = 16000,
                         hop_length = 128)

In [None]:
make_spec('yes/bfdb9801_nohash_0.wav').shape

In [None]:
def create_sets(file_list = training_list, unknowns = True):
    X_array = np.zeros([len(file_list),122,85])
    Y_array = np.zeros([len(file_list)])    
    for ind, file in enumerate(file_list):
        if ind%2000 == 0:
            print(ind, file)
        try:
            X_array[ind] = make_spec(file)
        except ValueError:
            print(ind, file, ValueError)
        if not unknowns:
            Y_array[ind] = all_classes.index(file.rsplit('/')[0])
        else:
            if file.rsplit('/')[0] in classes:
                Y_array[ind] = classes.index(file.rsplit('/')[0])
            else: Y_array[ind] = 11
    return X_array, Y_array
        
        
        

In [None]:
X_train, Y_train = create_sets() # takes a while

In [None]:
X_train.shape

In [None]:
librosa.display.specshow(X_train[6500], 
                         x_axis='mel', 
                         fmax=8000, 
                         y_axis='time', 
                         sr = 16000,
                         hop_length = 128)

In [None]:
print(Y_train.shape, 11 in Y_train, 12 in Y_train)

In [None]:
print('min: ',np.min(X_train), 
      '\nmax: ', np.max(X_train), 
      '\nmean: ', np.mean(X_train),
      '\nmedian: ', np.median(X_train),
      '\nvariance: ', np.var(X_train))

In [None]:
plt.hist(X_train.flatten(), bins = 50)

save the training sets, add channel dimension for keras,
normalize around zero

In [None]:
np.save('data/X_train.npy', np.expand_dims(X_train, -1)+1.3)
np.save('data/Y_train.npy', Y_train)

In [None]:
X_val, Y_val = create_sets(file_list = validation_list)

In [None]:
plt.hist(X_val.flatten(), bins = 50)

In [None]:
np.save('data/X_val.npy', np.expand_dims(X_val, -1)+1.3)
np.save('data/Y_val.npy', Y_val)