### Imports

In [None]:
import IPython.display as ipd
import librosa.display
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
#from DataLoaderUrbanSounds import DataLoaderUrbanSounds

In [None]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

In [None]:
import os

import librosa
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
import tensorflow as tf
from tqdm import tqdm


class DataLoaderUrbanSounds():
    def __init__(self, input_dim):
        self.AUDIO_DIR = "../input/urbansound8k"
        self.METADATA = pd.read_csv('../input/urbansound8k/UrbanSound8K.csv')
        self.EXTRACTED_FEATURES = []
        self.INPUT_DIM = input_dim
        self.labelencoder = LabelEncoder()

    def __len__(self):
        return len(self.METADATA)

    def extract_features(self):
        for index_num, row in tqdm(self.METADATA.iterrows()):
            file_name = os.path.join(os.path.abspath(self.AUDIO_DIR), 'fold' + str(row["fold"]) + '/',
                                     str(row["slice_file_name"]))
            final_class_labels = row["class"]
            data = self.get_one_file_features_extractor(file_name)
            self.EXTRACTED_FEATURES.append([data, final_class_labels])

    def get_one_file_features_extractor(self, file_name):
        audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast')
        mfccs_features = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=self.INPUT_DIM)
        mfccs_scaled_features = np.mean(mfccs_features.T, axis=0)
        return mfccs_scaled_features

    def get_train_test_data(self):
        ### converting extracted_features to Pandas dataframe
        extracted_features_df = pd.DataFrame(self.EXTRACTED_FEATURES, columns=['feature', 'class'])

        ### Split the dataset into independent and dependent dataset
        X = np.array(extracted_features_df['feature'].tolist())
        y = np.array(extracted_features_df['class'].tolist())

        return X, y

    def get_target_as_one_hot_encoder(self):
        ### converting extracted_features to Pandas dataframe
        extracted_features_df = pd.DataFrame(self.EXTRACTED_FEATURES, columns=['feature', 'class'])

        y = np.array(extracted_features_df['class'].tolist())
        y = to_categorical(self.labelencoder.fit_transform(y))
        return y

    def get_target_as_label_encoder(self):
        ### converting extracted_features to Pandas dataframe
        extracted_features_df = pd.DataFrame(self.EXTRACTED_FEATURES, columns=['feature', 'class'])

        y = np.array(extracted_features_df['class'].tolist())
        y = self.labelencoder.fit_transform(y)
        return y

    def split_to_train_test_data(self, X, y, test_size=0.2):
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=50)
        return X_train, X_test, y_train, y_test

    def decode_label(self, predicted_label):
        return self.labelencoder.inverse_transform(predicted_label)

if __name__ == "__main__":
    print('Dataloader')

### Sample audio

In [None]:
librosa_audio_data, librosa_sample_rate = librosa.load('../input/urbansound8k/fold4/102102-3-0-0.wav')

In [None]:
print(librosa_audio_data.shape)
# Original audio with 1 channel
plt.figure(figsize=(12, 4))
plt.plot(librosa_audio_data)
ipd.Audio('../input/urbansound8k/fold4/102102-3-0-0.wav')

### Exploring Metadata

In [None]:
metadata = pd.read_csv('../input/urbansound8k/UrbanSound8K.csv')
metadata.head()

### Training method

In [None]:
from datetime import datetime
from keras.callbacks import ModelCheckpoint


def train(model, num_epochs, num_batch_size, X_train, y_train, X_test, y_test):
    ## Trianing my model
    checkpointer = ModelCheckpoint(filepath='saved_models/audio_classification.hdf5',
                                   verbose=1, save_best_only=True)
    start = datetime.now()

    model.fit(X_train, y_train, batch_size=num_batch_size, epochs=num_epochs, validation_data=(X_test, y_test),
              callbacks=[checkpointer], verbose=1)

    duration = datetime.now() - start
    print("Training completed in time: ", duration)

**instantiate DataLoaderUrbanSound class**

In [None]:
dl = DataLoaderUrbanSounds(100)  # you can choose your own dim

**extract features from all aduio files**


In [None]:
dl.extract_features()

**Get Training Data**

In [None]:
# get X (train) data
X, _ = dl.get_train_test_data()

# get y (target) based on the encoder to use
y = dl.get_target_as_one_hot_encoder()
#y = dl.get_target_as_label_encoder()

In [None]:
X.shape, y.shape

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=123, stratify=y)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=123)

**split data into train and test**

In [None]:
X_train = X_train.reshape(-1, 100, 1)

In [None]:
X_val = X_val.reshape(-1, 100, 1)

In [None]:
X_test = X_test.reshape(-1, 100, 1)

In [None]:
X_train.shape, X_test.shape, X_val.shape, y_train.shape, y_test.shape, y_val.shape

## Build model and feed in the data

### Creating RNN

In [None]:
import tensorflow
import keras
from tensorflow.keras.layers import LSTM, Dense, Conv2D, MaxPooling2D
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.optimizers import Adam

In [None]:
input_shape=(100,1)
model = keras.Sequential()
model.add(LSTM(1000,input_shape=input_shape, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(512,input_shape=input_shape, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(512, activation='relu'))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(10, activation='softmax'))
model.summary()

In [None]:
model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['acc'])

In [None]:
history = model.fit(X_train, y_train, epochs=20, batch_size=36, 
                    validation_data=(X_val, y_val), shuffle=False)

In [None]:
#Adapted from Deep Learning with Python by Francois Chollet, 2018
history_dict=history.history
loss_values=history_dict['loss']
acc_values=history_dict['acc']
val_loss_values = history_dict['val_loss']
val_acc_values=history_dict['val_acc']
epochs=range(1,31)
fig,(ax1,ax2)=plt.subplots(1,2,figsize=(15,5))
ax1.plot(epochs,loss_values,'co',label='Training Loss')
ax1.plot(epochs,val_loss_values,'m', label='Validation Loss')
ax1.set_title('Training and validation loss')
ax1.set_xlabel('Epochs')
ax1.set_ylabel('Loss')
ax1.legend()
ax2.plot(epochs,acc_values,'co', label='Training accuracy')
ax2.plot(epochs,val_acc_values,'m',label='Validation accuracy')
ax2.set_title('Training and validation accuracy')
ax2.set_xlabel('Epochs')
ax2.set_ylabel('Accuracy')
ax2.legend()
plt.show()

In [None]:
TrainLoss, Trainacc = model.evaluate(X_train,y_train)
TestLoss, Testacc = model.evaluate(X_test, y_test)
y_pred=model.predict(X_test)
print('Confusion_matrix: ',tf.math.confusion_matrix(y_test, np.argmax(y_pred,axis=1)))