In [None]:
import IPython.display as ipd
import numpy as np
import pandas as pd
import librosa
import librosa.display
import matplotlib.pyplot as plt
from scipy.io import wavfile as wav
import os

from sklearn import metrics
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras import activations
from tensorflow.keras.utils import to_categorical

In [None]:
data_path = '/content/UrbanSound8K/audio'
df = pd.read_csv("/content/UrbanSound8K/metadata/UrbanSound8K.csv")
labels = list(df['class'].unique())

In [None]:
files = dict()
for i in range(len(labels)):
    tmp = df[df['class'] == labels[i]][:1].reset_index()
    path = f"/content/UrbanSound8K/audio/fold{tmp['fold'][0]}/{tmp['slice_file_name'][0]}"
    files[labels[i]] = path

In [None]:
fig = plt.figure(figsize=(15,15))
fig.subplots_adjust(hspace=0.4, wspace=0.4)
for i, label in enumerate(labels):
    fn = files[label]
    fig.add_subplot(5, 2, i+1)
    plt.title(label)
    data, sample_rate = librosa.load(fn)
    librosa.display.waveplot(data, sr= sample_rate)
plt.savefig('class_examples.png')

In [None]:
fn = '/content/UrbanSound8K/audio/fold1/191431-9-0-66.wav'
librosa_audio, librosa_sample_rate = librosa.load(fn)
scipy_sample_rate, scipy_audio = wav.read(fn)

print(f"Original sample rate: {scipy_sample_rate}")
print(f"Librosa sample rate: {librosa_sample_rate}")

In [None]:
print(f"Original audio fil min~max range: {np.min(scipy_audio)} to {np.max(scipy_audio)}")
print(f"Librosa audio file min~max range: {np.min(librosa_audio):.2f} to {np.max(librosa_audio):.2f}")

In [None]:
plt.figure(figsize=(12, 4))
plt.plot(scipy_audio)
plt.savefig('original_audio.png')

In [None]:
plt.figure(figsize=(12, 4))
plt.plot(librosa_audio)
plt.savefig('librosa_audio.png')

In [None]:
mfccs = librosa.feature.mfcc(y=librosa_audio, sr=librosa_sample_rate, n_mfcc=40)
mfccs.shape

In [None]:
plt.figure(figsize=(8, 8))
librosa.display.specshow(mfccs, sr=librosa_sample_rate, x_axis='time')
plt.savefig('MFCCs.png')

In [None]:
def extract_features(file_name):
    audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast')
    mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
    mfccs_processed = np.mean(mfccs.T, axis=0)

    return mfccs_processed

def extract(row):
    file_name = os.path.join(os.path.abspath(fulldatasetpath), 'fold' + str(row['fold']) + '/', str(row['slice_file_name']))
    class_label = row['class']
    data = extract_features(file_name)
    features.append([data, class_label])

In [None]:
features = []
df.apply(lambda row: extract(row), axis=1)

In [None]:
featuresdf = pd.DataFrame(features, columns=['feature', 'class_label'])
featuresdf.head()

In [None]:
X = np.array(featuresdf.feature.to_list())
y = np.array(featuresdf.class_label.to_list())

le = LabelEncoder()
yy = to_categorical(le.fit_transform(y))

x_train, x_test, y_train, y_test = train_test_split(X, yy, test_size=0.2, random_state = 127)

In [None]:
num_labels = yy.shape[1]
filter_size = 2
def build_model_graph(input_shape=(40,)):
    model = tf.keras.Sequential()
    model.add(layers.Dense(256, input_shape=input_shape))
    model.add(layers.Activation(activations.relu))
    model.add(layers.Dropout(0.5))
    model.add(layers.Dense(256))
    model.add(layers.Activation(activations.relu))
    model.add(layers.Dropout(0.5))
    model.add(layers.Dense(num_labels))
    model.add(layers.Activation(activations.softmax))
    # Compile
    model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')

    return model

model = build_model_graph()
model.summary()

In [None]:
score = model.evaluate(x_test, y_test, verbose=0)
accuracy = 100*score[1]
print(f"Pre-training accuracy: {accuracy:.4f}%")

In [None]:
from keras.callbacks import ModelCheckpoint
from datetime import datetime

num_epochs = 100
num_batch_size = 32

model.fit(x_train, y_train, batch_size=num_batch_size, epochs=num_epochs, validation_data=(x_test, y_test), verbose=1)

In [None]:
# Evaluating the model on the training and testing set
score = model.evaluate(x_train, y_train, verbose=0)
print(f"Training accuracy: {score[1]:.2%}")

score = model.evaluate(x_test, y_test, verbose=0)
print(f"Testing accuracy: {score[1]:.2%}")