# Appling machine learning to audio analysis

### Trainning Code

In [1]:
### Dependencies were already installed previously on ubuntu
### Comet: experiment tracking and visual tools

from comet_ml import Experiment

import IPython.display as ipd
import numpy as np
import pandas as pd
import librosa # audio analysis
import librosa.display # this submodule needs to be imported explicitly
import matplotlib.pyplot as plt
from scipy.io import wavfile as wav
import os

In [2]:
from sklearn import metrics
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

In [3]:
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
# Keras now is fully integrated to Tensorflow, so to_categorical and Adam
# can't be imported directly from keras, therefore, the update:
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical

2022-01-06 10:48:55.502096: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-01-06 10:48:55.502152: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [4]:
# To play audio file:
import IPython.display as ipd

In [5]:
# Create a Comet Experiment, to capture any and all artifacts
# such as audio files, visualization, model, dataset, system information and training metrics
experiment = Experiment(api_key = "Xa6eGiuYFngDcAJ9PVY1WpZp0", project_name = "SetA")

COMET INFO: Experiment is live on comet.ml https://www.comet.ml/negromontebs/seta/a68d4892eb8d44ed93a78ed9b577c286



## MFCCs - Mel-Frequency Cepstrum Coefficients

### Function to extract the MFCCs from every file in our dataset

In [6]:
# funçao com gerador que guarda os valores da atual posição da janela e calcula os proximos
def windows(audio, window_size):
    start = 0
    while start < len(audio):
        yield start, start + window_size # a função para aqui e returna o valor para a outra função
        start += (window_size // 2) #calcula a posição inicial do proximo segmento

In [7]:
def extract_features(file_name, window_size):
    #upload do audio
    audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast') 
    #list de retorno
    mfccsArr = []
    
    # A função gerador é um iteravel a partir dos valores gerados durante a keyword yield
    #Percorre um audio inteiro
    for (start,end) in windows(audio, window_size):
        #Enquanto a janela nao atingir o fim do audio
        if end <= len(audio): 
            #entao define um excerto do audio
            signal = audio[start:end]
            # e calcula os coeficientes de mel do excerto
            mfccs = librosa.feature.mfcc(y=signal, sr=sample_rate, n_mfcc=40)
            mfccs_processed = np.mean(mfccs.T,axis=0)
            mfccsArr.append([mfccs_processed])
    # Retorna os coeficientes de mel dos excertos em forma de lista do audio inteiro
    return mfccsArr

### Code 

In [8]:

df = pd.read_csv('../csvFiles/set_a.csv')


In [13]:
features = []# Iterate through each sound file and extract the features 

for index, row in df.iterrows():
    
    absolutePath = os.path.abspath("../")
    fname = str(row["fname"])
    file_name = os.path.join(absolutePath,fname)
    
    class_label = row["label"]
    
    if(class_label != "unlab"):
        data = extract_features(file_name,10000)
        for item in data:
            features.append([item, class_label])
    
# Convert into a Panda dataframe 
featuresdf = pd.DataFrame(features, columns=['feature','class_label'])

In [11]:
featuresdf.head()

Unnamed: 0,feature,class_label
0,"[[[-650.79114, 63.548443, -20.470852, 28.46080...",artifact
1,"[[[-1082.1058, -29.888508, 23.279947, -20.5528...",artifact
2,"[[[-623.37646, 55.783348, -34.655636, 11.50144...",artifact
3,"[[[-243.38713, 90.86705, -25.640467, 33.680714...",artifact
4,"[[[-270.41394, 95.64355, -38.597084, 22.175013...",artifact


In [12]:
featuresdf.iloc[0]['feature']

[[array([-6.5079114e+02,  6.3548443e+01, -2.0470852e+01,  2.8460804e+01,
         -1.0117528e+01,  1.9794758e+01, -1.3259483e+01,  1.1926706e+01,
         -1.4932245e+01,  1.1031508e+01, -1.1843365e+01,  7.3816118e+00,
         -1.6700590e+01,  2.1917920e+00, -1.1410288e+01,  6.1895409e+00,
         -1.0775829e+01,  2.9669476e+00, -9.4110289e+00,  2.8505924e+00,
         -5.4362922e+00,  4.9595389e+00, -6.0421333e+00, -1.6245689e+00,
         -7.0252862e+00,  1.8020422e+00, -3.9785454e+00,  9.1642153e-01,
         -3.1723285e+00,  1.4830244e+00, -3.4907699e+00,  1.7940048e+00,
         -1.6697212e+00,  1.6860838e+00, -3.1101050e+00,  1.1374171e+00,
          3.6319980e-01,  2.4055085e+00, -2.6050715e+00,  1.3114436e+00],
        dtype=float32)],
 [array([-6.3357776e+02,  8.1440926e+01, -8.8880749e+00,  3.4122105e+01,
         -3.6018798e+00,  2.4282431e+01, -8.4829082e+00,  1.1672964e+01,
         -1.3049482e+01,  1.2323549e+01, -8.6594791e+00,  8.0836391e+00,
         -1.4428652e+01, 

## Model building and training

In [None]:
# Convert features and corresponding classification labels into numpy arrays
X = np.array(featuresdf.feature.tolist())
y = np.array(featuresdf.class_label.tolist())

# Encode the classification labels
le = LabelEncoder()
yy = to_categorical(le.fit_transform(y))

### Trainning and test sets

In [None]:
x_train, x_test, y_train, y_test = train_test_split(X, yy, test_size=0.2, random_state = 127)

### Neural Network architecture

In [None]:
num_labels = yy.shape[1]

filter_size = 2

def build_model_graph(input_shape=(40,)):
    model = Sequential()
    model.add(Dense(256))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(256))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(num_labels))
    model.add(Activation('softmax'))
    # Compile the model
    model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam') 
    
    return model

model = build_model_graph()

# Display model architecture summary 
model(x_train)
model.summary()# Calculate pre-training accuracy 
score = model.evaluate(x_test, y_test, verbose=0)
accuracy = 100*score[1]

print("Pre-training accuracy: %.4f%%" % accuracy)

### Model Trainning

In [None]:
num_epochs = 100
num_batch_size = 32
model.fit(x_train, y_train, batch_size=num_batch_size, epochs=num_epochs, validation_data=(x_test, y_test), verbose=1)

# Evaluating the model on the training and testing set
score = model.evaluate(x_train, y_train, verbose=0)
print("Training Accuracy: {0:.2%}".format(score[1]))
score = model.evaluate(x_test, y_test, verbose=0)
print("Testing Accuracy: {0:.2%}".format(score[1]))

In [None]:
experiment.end()