**Reproduction of:**

**Deep Convolutional Neural Networks and Data Augmentation for Environmental  Sound Classification** 

Justin Salamon and Juan Pablo Bello

Some code taken from https://github.com/jaron/deep-listening/blob/master/4-us8k-cnn-salamon.ipynb

# Preprocessing

In [1]:
#import os

#for dir in [load_dir, augmented_load_dir]:
#    for k in range(1,10+1):
#        filename = "fold"+str(k)+"_x.npy"
#        file_path = os.path.join(dir,filename)
#        file= np.load(file_path, allow_pickle = True)
#        file = file.astype('float32') 
#        np.save(file_path, file, allow_pickle = True)

In [2]:
#!git clone https://github.com/grudloff/Salomon2017Replication

In [3]:
import numpy as np
import math
import gc
#gc.set_debug(gc.DEBUG_STATS)

from preprocessing_augmented import load_folds
from model import build_model
from evaluation import evaluate

from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import Sequence
from tensorflow.keras.backend import clear_session

from sklearn.metrics import confusion_matrix

import seaborn as sn
import pandas as pd
import matplotlib.pyplot as plt

from pywt import dwt2

load_dir = "CNN-Sound/data/us8k"
augmented_load_dir = "CNN-Sound/data/us8k-augmented"

Import requested from: 'numba.decorators', please update to use 'numba.core.decorators' or pin to Numba version 0.48.0. This alias will not be present in Numba version 0.50.0.[0m
  from numba.decorators import jit as optional_jit
Import of 'jit' requested from: 'numba.decorators', please update to use 'numba.core.decorators' or pin to Numba version 0.48.0. This alias will not be present in Numba version 0.50.0.[0m
  from numba.decorators import jit as optional_jit


In [4]:
wavelet = 'bior1.5'
frames = 68
bands=68
channels = 4

def wavedec(batch_x):
    new_batch_x = np.empty(shape = (batch_x.shape[0], frames, bands, channels),dtype = 'float32')
    for i, img in enumerate(batch_x):
        img = np.squeeze(img)
        img = (img - np.mean(img))/np.std(img)
    
        # 2D Discrete Wavelet Transform
        LL, (LH, HL, HH) = dwt2(img, wavelet)
        new_batch_x[i] = np.stack([LL,LH,HL,HH],axis=-1)# shape: [frames, bands, 4]
    
    return new_batch_x

class waveletGenerator(Sequence):

    def __init__(self, x_set, y_set, batch_size, shuffle):
        self.x, self.y = x_set, np.array(y_set)
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.indexes = np.arange(self.x.shape[0])

    def __len__(self):
        return math.ceil(self.x.shape[0] / self.batch_size)

    def __getitem__(self, idx):
        indexes = self.indexes[idx * self.batch_size: (idx + 1) * self.batch_size]
        batch_x = wavedec(self.x[indexes])
        batch_y = self.y[indexes]

        return batch_x , batch_y

    def on_epoch_end(self):
        """Updates indexes after each epoch
        """
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

10-Fold Crossvalidation.

In [5]:
def train_fold(f):
    # load data
    train_x, test_x, val_x, train_y, test_y, val_y = load_folds(load_dir,augmented_load_dir, f)
    #train_gen, test_x, val_x, test_y, val_y = load_folds_pescador(load_dir,augmented_load_dir, f)

    train_gen = waveletGenerator(train_x, train_y, shuffle=True, batch_size=100)
    test_gen = waveletGenerator(test_x, test_y, shuffle=False, batch_size=100)
    val_gen = waveletGenerator(val_x, val_y, shuffle=False, batch_size=100)
    
    print("Building model...")
    model = build_model(f_size=3, frames=frames, bands=bands, channels=channels)

    # now fit the model to the training data, evaluating loss against the validation data
    print("Training model...")
    model.fit(train_gen, validation_data=test_gen, 
              callbacks=[EarlyStopping(restore_best_weights=True, patience=15)],
              epochs=100, workers=0)
    
    # now evaluate the trained model against the unseen test data
    print("Evaluating model...")
    return evaluate(model, val_gen, val_y)

In [None]:
acc = np.zeros(10)
roc = np.zeros(10)

CM = 0

for f in range(1,10+1):

    roc[f-1], acc[f-1], cm = train_fold(f)
    clear_session() # clear tensorflow variables
    gc.collect() #collect garbage
    CM += cm

    
print ('\nAverage R.O.C:', np.mean(roc))
print ('Average Accuracy:', np.mean(acc))

# using all folds: best ROC = 0.91, f-score = 0.592 (50 epochs)
# using 2 folds: average ROC = 0.792, average f-score = 0.335

# if you want to save the model, uncomment this...
#filepath = "models/salamon-cnn-model.h5"
#model.save(filepath)


*** Train on {2, 3, 4, 5, 6, 7, 9} Validate on 1 Test on 8 ***
val shape:  (873, 128, 128, 1)
test shape:  (806, 128, 128, 1)
train shape:  (148113, 128, 128, 1)
Building model...
Training model...
  ...
    to  
  ['...']
  ...
    to  
  ['...']
Train for 1482 steps, validate for 9 steps
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100

In [None]:
plt.boxplot(acc)

In [None]:
import seaborn as sn
import pandas as pd
import matplotlib.pyplot as plt

classes = ['air_conditioner',
           
'car_horn',
'children_playing',
'dog_bark',
'drilling',
'engine_idling',
'gun_shot',
'jackhammer',
'siren',
'street_music']
df_cm = pd.DataFrame(CM, index = classes,
                  columns = classes)
plt.figure(figsize = (10,7))
sn.heatmap(df_cm, annot=True,  fmt='d')

In [None]:
np.save("acc_augmented_wav.npy", acc)
np.save("cm_agumented_wav.npy", CM)