# Appling machine learning to audio analysis

### Libraries

In [1]:
### Dependencies were already installed previously on ubuntu
### Comet: experiment tracking and visual tools

from comet_ml import Experiment

import IPython.display as ipd
import numpy as np
import pandas as pd
import librosa # audio analysis
import librosa.display # this submodule needs to be imported explicitly
import matplotlib.pyplot as plt
from scipy.io import wavfile as wav
import os

In [2]:
from sklearn import metrics
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

In [27]:
from tensorflow import keras
from keras.models import Sequential, save_model, load_model
from keras.layers import Dense, Dropout, Activation
# Keras now is fully integrated to Tensorflow, so to_categorical and Adam
# can't be imported directly from keras, therefore, the update:
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical

In [4]:
# To play audio file:
import IPython.display as ipd

In [5]:
# Create a Comet Experiment, to capture any and all artifacts
# such as audio files, visualization, model, dataset, system information and training metrics
experiment = Experiment(api_key = "Xa6eGiuYFngDcAJ9PVY1WpZp0", project_name = "SetA")

COMET INFO: Experiment is live on comet.ml https://www.comet.ml/negromontebs/seta/c41343628574422f92a40d87b84d698b



## MFCCs - Mel-Frequency Cepstrum Coefficients

### Functions to extract the MFCCs from every file in our dataset

In [6]:
# funçao com gerador que guarda os valores da atual posição da janela e calcula os proximos
def windows(audio, window_size):
    start = 0
    while start < len(audio):
        yield start, start + window_size # a função para aqui e returna o valor para a outra função
        start += (window_size // 2) #calcula a posição inicial do proximo segmento

In [7]:
def extract_features(file_name, window_size):
    #upload do audio
    audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast') 
    #list de retorno
    mfccsArr = []
    
    # A função gerador é um iteravel a partir dos valores gerados durante a keyword yield
    #Percorre um audio inteiro
    for (start,end) in windows(audio, window_size):
        #Enquanto a janela nao atingir o fim do audio
        if end <= len(audio): 
            #entao define um excerto do audio
            signal = audio[start:end]
            # e calcula os coeficientes de mel
            mfccs = librosa.feature.mfcc(y=signal, sr=sample_rate, n_mfcc=40)
            mfccs_processed = np.mean(mfccs.T,axis=0)
            mfccsArr.append([mfccs_processed])
    # Retorna os coeficientes de mel em forma de lista do audio inteiro
    return mfccsArr

### Code to save the MFCCs

In [8]:
features = []# Iterate through each sound file and extract the features 

df = pd.read_csv('../csvFiles/set_a.csv')

windowSize = 3000

In [47]:
for index, row in df.iterrows():
    
    absolutePath = os.path.abspath("../")
    fname = str(row["fname"])
    file_name = os.path.join(absolutePath,fname)
    
    
    data = extract_features(file_name, windowSize)
    
    for item in data:
        features.append(item)
    
# Convert into a Panda dataframe 
featuresdf = pd.DataFrame(features, columns=['feature'])

In [48]:
featuresdf.head()

Unnamed: 0,feature
0,"[-656.6555, 63.691227, -9.413803, 32.934563, -..."
1,"[-676.3987, 46.328228, -19.549753, 27.875717, ..."
2,"[-670.61273, 46.94281, -25.268034, 26.992773, ..."
3,"[-661.8272, 52.96385, -25.078768, 27.360682, -..."
4,"[-639.69, 70.559296, -17.338217, 29.466614, -6..."


In [51]:
featuresdf.iloc[0]['feature']
fileMFCCs = '../MFCCs/completeSetA_' + str(windowSize) + '.pkl'
featuresdf.to_pickle(fileMFCCs) 

## Model's Predections

### Load Model from saved files

In [53]:
model = load_model('../model_save/cut_setA', compile = True)
featuresdf = pd.read_pickle(fileMFCCs)
featuresdf.head()

Unnamed: 0,feature
0,"[-656.6555, 63.691227, -9.413803, 32.934563, -..."
1,"[-676.3987, 46.328228, -19.549753, 27.875717, ..."
2,"[-670.61273, 46.94281, -25.268034, 26.992773, ..."
3,"[-661.8272, 52.96385, -25.078768, 27.360682, -..."
4,"[-639.69, 70.559296, -17.338217, 29.466614, -6..."


### Predections (SOFTMAX func)

In [33]:
X = np.array(featuresdf.feature.tolist())
predictions = model.predict(X)

[[-6.5079114e+02  6.3548443e+01 -2.0470852e+01 ...  2.4055085e+00
  -2.6050715e+00  1.3114436e+00]
 [-6.3357776e+02  8.1440926e+01 -8.8880749e+00 ...  1.8060404e+00
  -1.7498825e+00 -5.2263105e-01]
 [-6.3887439e+02  8.0685036e+01 -4.4576321e+00 ...  1.3681992e+00
  -2.1534874e+00 -5.3160775e-01]
 ...
 [-4.6978354e+02  7.2954926e+01  6.5328239e+01 ...  3.1683363e-02
  -1.9917801e-01 -6.8977952e-02]
 [-4.6556894e+02  7.3527061e+01  6.4108841e+01 ...  4.4135413e-01
   2.4968827e-02  2.6144261e-02]
 [-5.6018750e+02  1.1904225e+02  7.8263901e+01 ...  1.3609710e-01
  -1.3517660e-01 -3.3912730e-01]]


In [36]:
experiment.end()

COMET INFO: ---------------------------
COMET INFO: Comet.ml Experiment Summary
COMET INFO: ---------------------------
COMET INFO:   Data:
COMET INFO:     display_summary_level : 1
COMET INFO:     url                   : https://www.comet.ml/negromontebs/seta/c41343628574422f92a40d87b84d698b
COMET INFO:   Parameters:
COMET INFO:     Adam_amsgrad       : False
COMET INFO:     Adam_beta_1        : 0.8999999761581421
COMET INFO:     Adam_beta_2        : 0.9990000128746033
COMET INFO:     Adam_decay         : 0.0
COMET INFO:     Adam_epsilon       : 1e-07
COMET INFO:     Adam_learning_rate : 0.0010000000474974513
COMET INFO:     Optimizer          : Adam
COMET INFO:   Uploads:
COMET INFO:     environment details      : 1
COMET INFO:     filename                 : 1
COMET INFO:     git metadata             : 1
COMET INFO:     git-patch (uncompressed) : 1 (47.12 KB)
COMET INFO:     installed packages       : 1
COMET INFO:     notebook                 : 1
COMET INFO:     os packages         