# Welcome to the Urban Sound Recognizer
### In this notebook we are going to explore the urban sound dataset and create a deep learning model to recognize the different sounds

#### Let's start by importing our libraries

In [1]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Activation
from keras.layers import Conv1D, MaxPooling2D
from keras.optimizers import Adam
from keras.layers import BatchNormalization
from sklearn.preprocessing import LabelEncoder
from scipy.signal import decimate
from sklearn.model_selection import train_test_split

import math
import keras
import soundfile as sf
import numpy as np
import pandas as pd

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


#### Let's read our data

In [2]:
data_folder = './input/train/Train'
df = pd.read_csv('./input/train/train.csv')
df['file'] = df['ID'].apply(lambda x: data_folder+'/'+str(x)+'.wav')

### Our target is categorical so we are going to use label encoder and turn it into numerical data

In [3]:
label_encoder = LabelEncoder()
df["Class_id"] = label_encoder.fit_transform(df['Class'])
df['Class'].describe()

count           5435
unique            10
top       jackhammer
freq             668
Name: Class, dtype: object

In [4]:
train, test = train_test_split(df, test_size=0.2)

i = 0
train, test = train.reset_index(drop="index"), test.reset_index(drop="index")
X_train, y_train = train['file'], train['Class_id']
X_test, y_test = test['file'], test['Class_id']

### Let's read the sound wave data

In [5]:
sample_channels = [sf.read(f, dtype='float32')[0].shape
                   for f in X_test]

framerates = [sf.read(f, dtype='float32')[1]
              for f in X_test]

channels = [1 if len(x)==1 else x[1] for x in sample_channels]

samples = [x[0] for x in sample_channels]
length = np.array(samples) / np.array(framerates)

pd.DataFrame({'framerate': framerates, 'channel':channels, 
             'sample': samples, 'length': length}).describe()

Unnamed: 0,framerate,channel,sample,length
count,1087.0,1087.0,1087.0,1087.0
mean,47235.073597,1.920883,170096.017479,3.621208
std,12855.979028,0.270045,63506.586591,0.958603
min,8000.0,1.0,2205.0,0.05
25%,44100.0,2.0,176400.0,4.0
50%,44100.0,2.0,176400.0,4.0
75%,48000.0,2.0,192000.0,4.0
max,96000.0,2.0,384000.0,4.007937


### Given the size of our data we are going to need generators for training and testing

In [6]:
N_CLASSES = 10
RATE = 8000
CHANNELS = 1
LENGTH = 4
SAMPLES = RATE * LENGTH

def proc_sound(data, rate):
    data = decimate(data, rate//RATE, axis=0)
    if len(data.shape) == 2:
        data = np.sum(data, axis=1)
    pad = SAMPLES - len(data)
    if pad > 0:
        data = np.pad(data, ((0, pad)), mode = 'wrap')
    else:
        data = data[:SAMPLES]
    return data.reshape((-1, 1))

def train_generator(files, labels, augments, per_batch):
    while True:
        for i in range(0, len(files), per_batch):
            signals = []
            _labels = []
            for j in range(i, min(len(files), i+per_batch)):
                file = files[j]
                label = labels[j]
                data, rate = sf.read(file, dtype='float32')
                data = proc_sound(data, rate)
                for _ in range(augments+1):
                    roll = np.roll(data, np.random.randint(0, SAMPLES))
                    signals.append(roll)
                    _labels.append(label)
            yield np.array(signals), np.array(_labels)
            
def test_generator(files, labels, per_batch):
    while True:
        signals = []
        _labels = []
        for i in range(0, per_batch):
            j = np.random.randint(0, len(files))
            file = files[j]
            label = labels[j]
            data, rate = sf.read(file, dtype='float32')
            data = proc_sound(data, rate)
            signals.append(np.roll(data, np.random.randint(0, SAMPLES)))
            _labels.append(label)
        yield np.array(signals), np.array(_labels)
        
def steps_per_epoch(total, batch):
    return int(math.ceil(total / batch))

### Let's create our model

In [7]:
model = Sequential()
model.add(Conv1D(30,
                kernel_size = 25,
                strides = 5,
                activation='relu',
                input_shape=(SAMPLES, CHANNELS,)))
model.add(BatchNormalization())
model.add(Conv1D(50,
                kernel_size = 19,
                strides = 5,
                activation='relu',
                input_shape=(SAMPLES, CHANNELS,)))
model.add(BatchNormalization())
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(N_CLASSES, activation='softmax'))
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_1 (Conv1D)            (None, 6396, 30)          780       
_________________________________________________________________
batch_normalization_1 (Batch (None, 6396, 30)          120       
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 1276, 50)          28550     
_________________________________________________________________
batch_normalization_2 (Batch (None, 1276, 50)          200       
_________________________________________________________________
flatten_1 (Flatten)          (None, 63800)             0         
_________________________________________________________________
dense_1 (Dense)              (None, 128)               8166528   
_________________________________________________________________
dense_2 (Dense)              (None, 10)                1290      
Total para

### Now let's compile it!

In [8]:
model.compile(loss='sparse_categorical_crossentropy',
             optimizer=Adam(0.01),
             metrics=['accuracy'])

### Let's fit it using the generator we previously created

In [9]:
per_batch = 100
epochs = 10

model.fit_generator(generator=train_generator(X_train, y_train, 1, per_batch),
                   steps_per_epoch=steps_per_epoch(len(X_train), per_batch),
                   epochs=epochs,
                   verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1c1b5825f8>

### And finally let's evaluate it!

In [14]:
score = model.evaluate_generator(test_generator(X_test,
                                    y_test, 
                                    per_batch),
                    steps=steps_per_epoch(len(X_test), per_batch),
                    verbose=1)



In [15]:
print(score)

[13.758993755687367, 0.14636363834142685]
