# UrbanSound Project

## Libraires

In [None]:
# We start by importing all libraires 
import numpy as np
import pandas as pd 
import os 
import tensorflow as tf 
import  librosa as lb 


from tensorflow.keras.utils import to_categorical 
from tensorflow.keras import Sequential , layers 

from sklearn.model_selection import train_test_split 
from sklearn.metrics import confusion_matrix , classification_report 
import IPython.display as ipd

## Data Analysis

In [None]:
#import of the data file contains information about the audio dataset
data = pd.read_csv('/content/drive/MyDrive/UrbanSound8K/UrbanSound8K/metadata/UrbanSound8K.csv')

In [None]:
data.head()


#data.head() method is used to display the first 5 rows of the data

Unnamed: 0,slice_file_name,fsID,start,end,salience,fold,classID,class
0,100032-3-0-0.wav,100032,0.0,0.317551,1,5,3,dog_bark
1,100263-2-0-117.wav,100263,58.5,62.5,1,5,2,children_playing
2,100263-2-0-121.wav,100263,60.5,64.5,1,5,2,children_playing
3,100263-2-0-126.wav,100263,63.0,67.0,1,5,2,children_playing
4,100263-2-0-137.wav,100263,68.5,72.5,1,5,2,children_playing


In [None]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8732 entries, 0 to 8731
Data columns (total 8 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   slice_file_name  8732 non-null   object 
 1   fsID             8732 non-null   int64  
 2   start            8732 non-null   float64
 3   end              8732 non-null   float64
 4   salience         8732 non-null   int64  
 5   fold             8732 non-null   int64  
 6   classID          8732 non-null   int64  
 7   class            8732 non-null   object 
dtypes: float64(2), int64(4), object(2)
memory usage: 545.9+ KB


In [None]:
#preprocessing 
#audio classes
classes = data.groupby('classID')['class']
classes.unique()

classID
0     [air_conditioner]
1            [car_horn]
2    [children_playing]
3            [dog_bark]
4            [drilling]
5       [engine_idling]
6            [gun_shot]
7          [jackhammer]
8               [siren]
9        [street_music]
Name: class, dtype: object

In [None]:
#function that extract and returns numeric features from audio file
def feature_extract(path):
    data , sample = lb.load(path)
    data = lb.feature.mfcc(data , n_mfcc = 128)
    data = np.mean(data , axis = 1)
    return data

In [None]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)



x = []
y = [] 
#extract features from audio files and preparing dataset
for i in range(len(data)):
    path = '/content/drive/MyDrive/UrbanSound8K/UrbanSound8K/audio' + "/fold" + str(data.loc[i , 'fold']) +"/" + str(data.loc[i , "slice_file_name"])
    x.append(feature_extract(path))
    y.append(data.loc[i , 'classID'])
    if(i%50 == 0):
        print(i)
        

X = np.array(x)

y = np.array(y)

0
50
100
150
200
250
300
350
400
450
500
550
600
650
700
750
800
850
900
950
1000
1050
1100
1150
1200
1250
1300
1350
1400
1450
1500
1550
1600
1650
1700
1750
1800
1850
1900
1950
2000
2050
2100
2150
2200
2250
2300
2350
2400
2450
2500
2550
2600
2650
2700
2750
2800
2850
2900
2950
3000
3050
3100
3150
3200
3250
3300
3350
3400
3450
3500
3550


  n_fft, y.shape[-1]


3600
3650
3700
3750
3800
3850
3900
3950
4000
4050
4100
4150
4200
4250
4300
4350
4400
4450
4500
4550
4600
4650
4700
4750
4800
4850
4900
4950
5000
5050
5100
5150
5200
5250
5300
5350
5400
5450
5500
5550
5600
5650
5700
5750
5800
5850
5900
5950
6000
6050
6100
6150
6200
6250
6300
6350
6400
6450
6500
6550
6600
6650
6700
6750
6800
6850
6900
6950
7000
7050
7100
7150
7200
7250
7300
7350
7400
7450
7500
7550
7600
7650
7700
7750
7800
7850
7900
7950
8000
8050
8100
8150
8200
8250
8300


  n_fft, y.shape[-1]
  n_fft, y.shape[-1]


8350
8400
8450
8500
8550
8600
8650
8700


In [None]:
y = to_categorical(y)
y     # Prints the y array



array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       ...,
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.]], dtype=float32)

## Model & Prediction

In [None]:
X_train , X_test , y_train , y_test = train_test_split(X , y , test_size = 0.2 , random_state = 40)

#Stated the data sets 

#Validation and training sets are used during model training. The model learns from the training set and overhauls parameters using validation sets after working through the data set.


In [None]:
model = Sequential([
    layers.Dense(800 , activation = 'relu' , input_shape = (128 , )),
    layers.Dense(500 , activation = 'relu'),
    layers.Dense(200 , activation = 'relu'),
    layers.Dense(100 , activation = 'relu'),
    layers.Dense(50 , activation = 'relu'),
    layers.Dense(10 , activation = 'softmax')
])

In [None]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 800)               103200    
                                                                 
 dense_1 (Dense)             (None, 500)               400500    
                                                                 
 dense_2 (Dense)             (None, 200)               100200    
                                                                 
 dense_3 (Dense)             (None, 100)               20100     
                                                                 
 dense_4 (Dense)             (None, 50)                5050      
                                                                 
 dense_5 (Dense)             (None, 10)                510       
                                                                 
Total params: 629,560
Trainable params: 629,560
Non-trai

In [None]:
model.compile(loss = 'categorical_crossentropy' ,
              optimizer = 'adam' ,
              metrics = ['accuracy'])

In [None]:
history = model.fit(X_train , y_train , epochs = 25)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


In [None]:
y_true = np.argmax(y_test , axis = 1)
y_pred = np.argmax(model.predict(X_test) , axis = 1)

print(confusion_matrix(y_true , y_pred))

[[188   0   1   0   1   1   0   0   0   1]
 [  0  66   1   3   2   0   0   1   2   2]
 [  3   1 170   3   0   6   6   1   5   7]
 [  0   0   5 178   1   4   2   0  26   3]
 [  4   0   2   0 165   0   0  13   1   0]
 [  0   0   0   1   0 204   0   0   1   0]
 [  0   0   1   5   0   0  56   0   0   1]
 [  0   0   1   0   3   1   0 191   0   0]
 [  0   1   0   0   0   3   0   0 185   2]
 [  1   4   7   9  10   7   0   8   5 165]]


In [None]:
y_test.shape

(1747, 10)

In [None]:
class_names = data.groupby('classID')['class']
class_names.unique()

classID
0     [air_conditioner]
1            [car_horn]
2    [children_playing]
3            [dog_bark]
4            [drilling]
5       [engine_idling]
6            [gun_shot]
7          [jackhammer]
8               [siren]
9        [street_music]
Name: class, dtype: object

In [None]:
class_names = pd.DataFrame(class_names.unique() )
class_names

Unnamed: 0_level_0,class
classID,Unnamed: 1_level_1
0,[air_conditioner]
1,[car_horn]
2,[children_playing]
3,[dog_bark]
4,[drilling]
5,[engine_idling]
6,[gun_shot]
7,[jackhammer]
8,[siren]
9,[street_music]


In [None]:
test_path = '/content/drive/MyDrive/UrbanSound8K/UrbanSound8K/audio/fold7/104625-4-0-27.wav'
feature = np.array([feature_extract(test_path)])
ID = np.argmax(model.predict(feature)[0])
print(ID)
print(class_names.loc[ID , 'class'])
ipd.Audio(test_path)

4
['drilling']
