# Appling machine learning to audio analysis

### Trainning Code

In [1]:
### Dependencies were already installed previously on ubuntu
### Comet: experiment tracking and visual tools

from comet_ml import Experiment

import IPython.display as ipd
import numpy as np
import pandas as pd
import librosa # audio analysis
import librosa.display # this submodule needs to be imported explicitly
import matplotlib.pyplot as plt
from scipy.io import wavfile as wav
import os

In [2]:
from sklearn import metrics
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

In [3]:
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
# Keras now is fully integrated to Tensorflow, so to_categorical and Adam
# can't be imported directly from keras, therefore, the update:
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical

2021-12-16 17:23:51.431350: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-12-16 17:23:51.431409: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [4]:
# To play audio file:
import IPython.display as ipd

In [5]:
# Create a Comet Experiment, to capture any and all artifacts
# such as audio files, visualization, model, dataset, system information and training metrics
experiment = Experiment(api_key = "Xa6eGiuYFngDcAJ9PVY1WpZp0", project_name = "SetA")

COMET INFO: Experiment is live on comet.ml https://www.comet.ml/negromontebs/seta/cf3b9522e3a547d58fdf516603132973



## MFCCs - Mel-Frequency Cepstrum Coefficients

### Function to extract the MFCCs from every file in our dataset

In [None]:
# funçao com gerador que guarda os valores da atual posição da janela e calcula os proximos
def windows(audio, window_size):
    start = 0
    while start < len(audio):
        yield start, start + window_size # a função para aqui e returna o valor para a outra função
        start += (window_size // 2) #calcula a posição inicial do proximo segmento

In [None]:
def extract_features(file_name, window_size):
    #upload do audio
    audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast') 
    #list de retorno
    mfccsArr = []
    
    # A função gerador é um iteravel a partir dos valores gerados durante a keyword yield
    #Percorre um audio inteiro
    for (start,end) in windows(audio, window_size):
        #Enquanto a janela nao atingir o fim do audio
        if end <= len(audio): 
            #entao define um excerto do audio
            signal = audio[start:end]
            # e calcula os coeficientes de mel
            mfccs = librosa.feature.mfcc(y=signal, sr=sample_rate, n_mfcc=40)
            mfccs_processed = np.mean(mfccs.T,axis=0)
            mfccsArr.append([mfccs_processed])
    # Retorna os coeficientes de mel em forma de lista do audio inteiro
    return mfccsArr

### Code 

In [8]:

df = pd.read_csv('../csvFiles/set_a.csv')


In [10]:
features = []# Iterate through each sound file and extract the features 

for index, row in df.iterrows():
    
    absolutePath = os.path.abspath("../")
    fname = str(row["fname"])
    file_name = os.path.join(absolutePath,fname)
    
    class_label = row["label"]
    
    if(class_label != "unlab"):
        data = extract_features(file_name)
        features.append([data, class_label])
    
# Convert into a Panda dataframe 
featuresdf = pd.DataFrame(features, columns=['feature','class_label'])

In [13]:
featuresdf.head()

Unnamed: 0,feature,class_label
0,"[-594.53345, 39.48496, 4.773615, 6.360032, 2.7...",artifact
1,"[-756.76984, 38.06422, -12.761636, 5.586702, -...",artifact
2,"[-568.246, 48.002598, -25.640783, 8.441813, -2...",artifact
3,"[-299.85703, 112.79045, -22.194496, 28.633465,...",artifact
4,"[-296.34073, 102.59627, -18.80507, 15.606756, ...",artifact


In [14]:
featuresdf.iloc[0]['feature']

array([-5.9453345e+02,  3.9484959e+01,  4.7736149e+00,  6.3600321e+00,
        2.7210441e+00,  1.9556358e+00, -1.5586532e+00, -1.5916940e+00,
       -4.1285987e+00, -9.4592869e-01, -2.9675467e+00, -2.1387017e+00,
       -3.4743207e+00, -1.7667232e+00, -2.4402936e+00, -1.8360806e+00,
       -2.1535695e+00, -1.2188101e+00, -1.6877983e+00, -1.3186961e+00,
       -1.6294661e+00, -1.5636450e+00, -1.0807190e+00, -1.5055786e+00,
       -9.3540710e-01, -9.4582045e-01, -6.7552823e-01, -4.6510810e-01,
       -5.9152031e-01, -6.3035971e-01, -3.6442605e-01, -2.2027926e-01,
       -5.5047251e-02,  1.7959123e-02, -7.3863313e-02, -2.9855010e-01,
        4.0030742e-01,  3.0644944e-01,  2.5131455e-01,  2.2699493e-01],
      dtype=float32)

## Model building and training

In [15]:
# Convert features and corresponding classification labels into numpy arrays
X = np.array(featuresdf.feature.tolist())
y = np.array(featuresdf.class_label.tolist())

# Encode the classification labels
le = LabelEncoder()
yy = to_categorical(le.fit_transform(y))

### Trainning and test sets

In [16]:
x_train, x_test, y_train, y_test = train_test_split(X, yy, test_size=0.2, random_state = 127)

### Neural Network architecture

In [19]:
num_labels = yy.shape[1]

filter_size = 2

def build_model_graph(input_shape=(40,)):
    model = Sequential()
    model.add(Dense(256))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(256))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(num_labels))
    model.add(Activation('softmax'))
    # Compile the model
    model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam') 
    
    return model

model = build_model_graph()

# Display model architecture summary 
model(x_train)
model.summary()# Calculate pre-training accuracy 
score = model.evaluate(x_test, y_test, verbose=0)
accuracy = 100*score[1]

print("Pre-training accuracy: %.4f%%" % accuracy)

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (99, 256)                 10496     
                                                                 
 activation_3 (Activation)   (99, 256)                 0         
                                                                 
 dropout_2 (Dropout)         (99, 256)                 0         
                                                                 
 dense_4 (Dense)             (99, 256)                 65792     
                                                                 
 activation_4 (Activation)   (99, 256)                 0         
                                                                 
 dropout_3 (Dropout)         (99, 256)                 0         
                                                                 
 dense_5 (Dense)             (99, 4)                  

### Model Trainning

In [20]:
num_epochs = 100
num_batch_size = 32
model.fit(x_train, y_train, batch_size=num_batch_size, epochs=num_epochs, validation_data=(x_test, y_test), verbose=1)

# Evaluating the model on the training and testing set
score = model.evaluate(x_train, y_train, verbose=0)
print("Training Accuracy: {0:.2%}".format(score[1]))
score = model.evaluate(x_test, y_test, verbose=0)
print("Testing Accuracy: {0:.2%}".format(score[1]))

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100


Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
Training Accuracy: 68.69%
Testing Accuracy: 76.00%


In [21]:
experiment.end()

COMET INFO: ---------------------------
COMET INFO: Comet.ml Experiment Summary
COMET INFO: ---------------------------
COMET INFO:   Data:
COMET INFO:     display_summary_level : 1
COMET INFO:     url                   : https://www.comet.ml/negromontebs/seta/cf3b9522e3a547d58fdf516603132973
COMET INFO:   Metrics [count] (min, max):
COMET INFO:     accuracy [200]                : (0.16161616146564484, 0.6565656661987305)
COMET INFO:     batch_accuracy [200]          : (0.1875, 0.78125)
COMET INFO:     batch_loss [200]              : (0.8622515201568604, 49.34465789794922)
COMET INFO:     epoch_duration [200]          : (0.10189401900015582, 1.5324152049997792)
COMET INFO:     loss [200]                    : (1.2406256198883057, 53.74673080444336)
COMET INFO:     val_accuracy [200]            : (0.3199999928474426, 0.800000011920929)
COMET INFO:     val_loss [200]                : (0.6950652599334717, 22.237443923950195)
COMET INFO:     validate_batch_accuracy [200] : (0.31999999284744