# Urban Sound Classification using MFCC
## 2 Class Classification
Dataset: UrbanSound8K

### Create MFCC Dataset

In [1]:
from pip._internal import main
main(["install", "progressbar"])

Please see https://github.com/pypa/pip/issues/5599 for advice on fixing the underlying issue.
To avoid this problem you can invoke Python with '-m pip' instead of running pip directly.






0

### Import Libraries

In [2]:
import progressbar
import time
import os
import struct
import matplotlib.pyplot as plt
import IPython.display as ipd
import pandas as pd
import numpy as np
import librosa
import DataCollection as dc # a local module

### Metadata Retrievement

In [3]:
data = pd.read_csv("UrbanSound8K/metadata/UrbanSound8K.csv")
data.head()

Unnamed: 0,slice_file_name,fsID,start,end,salience,fold,classID,class
0,100032-3-0-0.wav,100032,0.0,0.317551,1,5,3,dog_bark
1,100263-2-0-117.wav,100263,58.5,62.5,1,5,2,children_playing
2,100263-2-0-121.wav,100263,60.5,64.5,1,5,2,children_playing
3,100263-2-0-126.wav,100263,63.0,67.0,1,5,2,children_playing
4,100263-2-0-137.wav,100263,68.5,72.5,1,5,2,children_playing


In [4]:
dataset = np.zeros(shape=(752,2), dtype=object) # 5:5
dataset.shape

(752, 2)

### Feature Extraction

In [5]:
bar = progressbar.ProgressBar(maxval=data.shape[0], widgets=[progressbar.Bar('*', '||', '||'), ' ', progressbar.Percentage()])
bar.start()

j = 0

cnt_air_conditioner = 0
cnt_car_horn = 0
cnt_children_playing = 0
cnt_dog_bark = 0
cnt_drilling = 0
cnt_engine_idling = 0
cnt_jackhammer = 0
cnt_siren = 0
cnt_street_music = 0

cnt_gun_shot = 0

for i in range(data.shape[0]):
    fullpath, class_id = dc.path_class(data,data.slice_file_name[i])
    if class_id != 'gun_shot':
        if class_id == 'air_conditioner':
            cnt_air_conditioner += 1
            if cnt_air_conditioner > 42:
                continue
            if cnt_air_conditioner >= 42:
                print('air_conditioner: ', cnt_air_conditioner)
            
        elif class_id == 'car_horn':
            cnt_car_horn += 1
            if cnt_car_horn > 42:
                continue
            if cnt_car_horn >= 42:
                print('car_horn: ', cnt_car_horn)
        
        elif class_id == 'children_playing':
            cnt_children_playing += 1
            if cnt_children_playing > 42:
                continue
            if cnt_children_playing >= 42:
                print('children_playing: ', cnt_children_playing)
        
        elif class_id == 'dog_bark':
            cnt_dog_bark += 1
            if cnt_dog_bark > 42:
                continue
            if cnt_dog_bark >= 42:
                print('dog_bark: ', cnt_dog_bark)
        
        elif class_id == 'drilling':
            cnt_drilling += 1
            if cnt_drilling > 42:
                continue
            if cnt_drilling >= 42:
                print('drilling: ', cnt_drilling)
        
        elif class_id == 'engine_idling':
            cnt_engine_idling += 1
            if cnt_engine_idling > 42:
                continue
            if cnt_engine_idling >= 42:
                print('engine_idling: ', cnt_engine_idling)
        
        elif class_id == 'jackhammer':
            cnt_jackhammer += 1
            if cnt_jackhammer > 42:
                continue
            if cnt_jackhammer >= 42:
                print('jackhammer: ', cnt_jackhammer)
        
        elif class_id == 'siren':
            cnt_siren += 1
            if cnt_siren > 42:
                continue
            if cnt_siren >= 42:
                print('siren: ', cnt_siren)
        
        elif class_id == 'street_music':
            cnt_street_music += 1
            if cnt_street_music > 42:
                continue
            if cnt_street_music >= 42:
                print('street_music: ', cnt_street_music)
        class_id = 'unknown'
    if class_id == 'gun_shot':
        cnt_gun_shot += 1
        if cnt_gun_shot >= 374:
            print('gun_shot: ', cnt_gun_shot)
        
    try:
        X, sample_rate = librosa.load(fullpath, res_type='kaiser_fast')
        mfcc = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0)
    except Exception:
        print("Error encountered while parsing file: ", file)
        mfcc,class_id = None, None
    feature = mfcc
    label = class_id
    dataset[j,0],dataset[j,1] = feature,label
    j += 1
    bar.update(i+1)
    
bar.finish()

||                                                                      ||   0%

air_conditioner:  42


||*                                                                     ||   2%

engine_idling:  42


||**                                                                    ||   3%

drilling:  42
jackhammer:  42


||***                                                                   ||   4%

siren:  42


||****                                                                  ||   6%

dog_bark:  42
children_playing:  42
street_music:  42


||**************                                                        ||  20%

car_horn:  42


||*******************************************************************   ||  96%

gun_shot:  374


||**********************************************************************|| 100%


In [6]:
np.save("dataset_mfcc_2class", dataset, allow_pickle=True)

In [7]:
l = np.load("dataset_mfcc_2class.npy", allow_pickle=True)

In [8]:
l[751,1]

'gun_shot'

----
### Data Preprocessing

In [9]:
data = pd.DataFrame(np.load("dataset_mfcc_2class.npy",allow_pickle= True))
data.columns = ['feature', 'label']

from sklearn.preprocessing import LabelEncoder

X = np.array(data.feature.tolist())
y = np.array(data.label.tolist())
from sklearn.model_selection import train_test_split
X,val_x,y,val_y = train_test_split(X,y)
lb = LabelEncoder()

from keras.utils import np_utils

y = np_utils.to_categorical(lb.fit_transform(y))
val_y = np_utils.to_categorical(lb.fit_transform(val_y))

Using TensorFlow backend.


### Model Structure

In [10]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.optimizers import Adam
from keras.utils import np_utils
from sklearn import metrics 

num_labels = y.shape[1]

# build model
model = Sequential()

model.add(Dense(512, input_shape=(40,)))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(256))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(num_labels))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')

In [11]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 512)               20992     
_________________________________________________________________
activation_1 (Activation)    (None, 512)               0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 256)               131328    
_________________________________________________________________
activation_2 (Activation)    (None, 256)               0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 2)                

### Train Model

In [12]:
model.fit(X, y, batch_size=64, epochs=32, validation_data=(val_x, val_y))




Train on 564 samples, validate on 188 samples
Epoch 1/32
Epoch 2/32
Epoch 3/32
Epoch 4/32
Epoch 5/32
Epoch 6/32
Epoch 7/32
Epoch 8/32
Epoch 9/32
Epoch 10/32
Epoch 11/32
Epoch 12/32
Epoch 13/32
Epoch 14/32
Epoch 15/32
Epoch 16/32
Epoch 17/32
Epoch 18/32
Epoch 19/32
Epoch 20/32
Epoch 21/32
Epoch 22/32
Epoch 23/32
Epoch 24/32
Epoch 25/32
Epoch 26/32
Epoch 27/32
Epoch 28/32
Epoch 29/32
Epoch 30/32
Epoch 31/32
Epoch 32/32


<keras.callbacks.callbacks.History at 0x7f1c7522b0>

----
### Save & Load Model

In [13]:
model.save("mfcc_2class.h5")

In [14]:
from keras.models import load_model
model = load_model("mfcc_2class.h5")

----
### Model Test

In [15]:
# not gun_shot
cnt_t0 = 0 # gun_shot
cnt_t1 = 0 # unknown

print('gun_shot = [0] & unknown = [1]\n')
for i in range(752):
    if cnt_t0 >= 10 and cnt_t1 >= 10:
        break
        
    if data.label[i] != 'gun_shot':
        if cnt_t1 >= 10:
            continue
            
        cnt_t1 += 1
        t1 = np.array([data.feature[i]])
        pred1 = model.predict_classes(t1)
        print('label:', data.label[i],'| prediction:', pred1)
        
    else:
        if cnt_t0 >= 10:
            continue
            
        cnt_t0 += 1
        t0 = np.array([data.feature[i]])
        pred0 = model.predict_classes(t0)
        print('label:', data.label[i],'| prediction:', pred0)

gun_shot = [0] & unknown = [1]

label: unknown | prediction: [1]
label: unknown | prediction: [0]
label: unknown | prediction: [0]
label: unknown | prediction: [0]
label: unknown | prediction: [0]
label: unknown | prediction: [0]
label: unknown | prediction: [0]
label: unknown | prediction: [0]
label: unknown | prediction: [0]
label: unknown | prediction: [1]
label: gun_shot | prediction: [0]
label: gun_shot | prediction: [1]
label: gun_shot | prediction: [0]
label: gun_shot | prediction: [0]
label: gun_shot | prediction: [1]
label: gun_shot | prediction: [0]
label: gun_shot | prediction: [1]
label: gun_shot | prediction: [1]
label: gun_shot | prediction: [1]
label: gun_shot | prediction: [0]


----
### Result
MFCC
- training accuracy: 0.8191
- training loss: 0.4226
- validation accuracy: 0.8351
- validation loss: 0.3745

Test with 10 Data
- accuracy in predicting unknown: 0.2
- accuracy in predicting gun_shot: 0.5