#### Download Dataset

UrbanSound8K -> [Link](https://urbansounddataset.weebly.com/download-urbansound8k.html)

In [None]:
#Download data using the link below
!wget https://zenodo.org/record/1203745/files/UrbanSound8K.tar.gz

In [None]:
#Confirm data has been downloaded
!ls -l

In [None]:
#Unzip file
!tar -xvf UrbanSound8K.tar.gz

#### Load Dataset

In [None]:
import pandas as pd
import os
import librosa
import librosa.display
import numpy as np
from matplotlib import pyplot as plt

In [None]:
#Read CSV file
df = pd.read_csv('UrbanSound8K/metadata/UrbanSound8K.csv')
print('Number of audio files:',df.shape[0])
df.sample(n=5)

In [None]:
#Number of unique classes
df['class'].unique()

In [None]:
#Class labels
df['classID'].unique()

### Extract Audio Features

Read an audio file

In [None]:
#We will use librosa package to load the audio files. 
#Files are sampled at 22.05 KHz and are always converted to mono sound

idx = np.random.randint(0, df.shape[0])
file_name = 'UrbanSound8K/audio/fold' + str(df.loc[idx, "fold"]) +'/' + df.loc[idx,"slice_file_name"]
print(file_name)
audio, sample_rate = librosa.load(file_name)

print('Sample rate:', sample_rate)
print('Audio array shape:', audio.shape)

In [None]:
audio

Visualize an audio signal

In [None]:
idx = np.random.randint(0, df.shape[0])
file_name = 'UrbanSound8K/audio/fold' + str(df.loc[idx, "fold"]) +'/' + df.loc[idx,"slice_file_name"]
audio, sample_rate = librosa.load(file_name)
librosa.display.waveplot(audio, sr= sample_rate)
plt.suptitle(df.loc[idx, 'class'])
plt.show()

Playing an audio

In [None]:
!pip3 install pydub --quiet

In [None]:
from pydub import AudioSegment
from pydub.playback import play

sound = AudioSegment.from_wav(file_name)
play(sound)

Get MFCCs (Mel frequency Cepstral Coefficients) feature

In [None]:
mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
mfccsscaled = np.mean(mfccs.T,axis=0)
print(mfccs.shape, mfccsscaled.shape)

In [None]:
mfccsscaled

Function to extract MFCC feature for each audio file

In [None]:
def extract_mfcc_feature(idx):

    global df

    try:

        #Sample audio signal
        file_name = 'UrbanSound8K/audio/fold' + str(df.loc[idx, "fold"]) + '/' + df.loc[idx,"slice_file_name"]
        audio, sample_rate = librosa.load(file_name)

        #Convert to MFCC feature
        mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
        mfccsscaled = np.mean(mfccs.T,axis=0)

        return mfccsscaled
    
    except Exception as e:
        print(e)
        return None

In [None]:
#Get features for all audio signals
features = []

#Selecting few examples
ids = np.random.randint(0, df.shape[0], df.shape[0])

for i in ids:

    data = extract_mfcc_feature(i)

    if data is not None:

        features.append([data, df.loc[i, 'classID']])

#Create a dataframe for easier data handling
audio_df =  pd.DataFrame(features, columns=['Features', 'Label'])

In [None]:
#Review audio features
print(audio_df.shape)
audio_df.sample(n=5)

Split data between training and test

In [None]:
import tensorflow as tf

In [None]:
from sklearn.model_selection import train_test_split 

In [None]:
#Get X and Y as numpy array
X = np.array(audio_df['Features'].tolist())
y = np.array(audio_df['Label'].tolist())

#One hot encoding of Label
y = tf.keras.utils.to_categorical(y, num_classes=10)

In [None]:
trainX, testX, trainY, testY = train_test_split(X, y, test_size=0.2, random_state = 42)

In [None]:
trainX.shape

In [None]:
testX.shape

### Building Model

In [None]:
tf.keras.backend.clear_session()
model = tf.keras.Sequential()

In [None]:
model.add(tf.keras.layers.Reshape((40,1,), input_shape=(40,)))

In [None]:
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Conv1D(16, kernel_size=(3), activation='relu'))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Conv1D(32, (3), activation='relu'))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Conv1D(64, (3), activation='relu'))
model.add(tf.keras.layers.GlobalAveragePooling1D())
model.add(tf.keras.layers.Dense(10, activation='softmax'))

In [None]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
model.fit(trainX, trainY, epochs=200, validation_data=(testX, testY))