# Predicciones con RandomForest

A continuación voy a aplicar mi modelo para predecir el tipo de sonido en base a un dataset nuevo de sonidos.

Para ellos voy a realizar dos predicciones, una con mi modelo básico, y otra con un modelo sobre el que he aplicado un PCA para disminuir el número de features a 10.

Empiezo importando las librerías que voy a necesitar.

In [34]:
import numpy as np
import pandas as pd
import pandas.io

import re

import matplotlib.pyplot as plt

import librosa.display
import librosa

from glob import glob

import ffmpeg

import os

from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn import metrics
from sklearn.metrics import confusion_matrix
from sklearn.utils.multiclass import unique_labels
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.multiclass import OneVsRestClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn import linear_model
from sklearn.metrics import accuracy_score
from sklearn.metrics import balanced_accuracy_score
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.metrics import roc_curve, auc
from sklearn.preprocessing import label_binarize
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

from pydub import AudioSegment
from pydub.utils import make_chunks

import pickle

Empiezo importando el modelo que he guardado anteriormente, para trabajar con él.

In [35]:
filename = 'finalized_model_fourier.sav'

loaded_model = pickle.load(open(filename, 'rb'))

Continúo descomponiendo los audios en segmentos de 2 segundos, y sacando todas sus features.

In [None]:
def decompose_files(data_dir, audio_files):
    for j in range(len(audio_files)):
        myaudio = AudioSegment.from_file(data_dir + '{}'.format(os.listdir(data_dir)[j])) 
        chunk_length_ms = 2000
        chunks = make_chunks(myaudio, chunk_length_ms)

        for i, chunk in enumerate(chunks):
            chunk_name = "{}{}.wav".format(os.listdir(data_dir)[j], i)
            #print("exporting", chunk_name)
            chunk.export('./audios/predicciones_fourier_32/{}'.format(chunk_name), format="mp3")
            
decompose_files('./audios/predicciones/', glob('./audios/predicciones' + '/*'))

In [None]:
def get_features(data_dir, audio_files):
    momentos = {'ducha': 'Ducha', 'cena': 'Cena', 'washing': 'Lavadora', 'vacuum': 'Aspiradora', 
                'shaver': 'Afeitadora', 'hairdryer': 'Secador_pelo', 'airconditioner': 'Aire_acondicionado', 
                'cellphone': 'Telefono', 'comp': 'Tecleo', 'silence': 'Silencio', 'dryer': 'Secadora', 
                'blender': 'Licuadora', 'doorbell': 'Timbre', 'alarm': 'Alarma', 'faucet': 'Grifo', 
                'microwave': 'Microondas'}
    features = []
    for i in range(len(audio_files)):
        y, sr = librosa.load(audio_files[i], sr=8000, mono=True)
        name = os.listdir(data_dir)[i]
        pattern = "[._][\w]+"
        name = re.sub(pattern, '', name)
        if name in momentos.keys():
            momento = momentos.get(name)
        else:
            momento = 'Otro'
        mfcc = np.ndarray.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20))
        scem = np.ndarray.mean(librosa.feature.spectral_centroid(y=y, sr=sr))
        scom = np.ndarray.mean(librosa.feature.spectral_contrast(S=np.abs(librosa.stft(y)), sr=sr, n_bands=4))
        srom = np.ndarray.mean(librosa.feature.spectral_rolloff(y=y, sr=sr))
        sbwm = np.ndarray.mean(librosa.feature.spectral_bandwidth(y=y, sr=sr))
        tempo = librosa.beat.tempo(onset_envelope=librosa.onset.onset_strength(y=y, sr=sr, hop_length=512), sr=sr, hop_length=512)[0]
        rmse = np.ndarray.mean(librosa.feature.rms(y=y))
        D = np.abs(np.fft.fft(y, n=32)[:8000 // 2])
        features.append([mfcc, scem, scom, srom, sbwm, tempo, rmse, 
                         D[0], D[1], D[2], D[3], D[4], D[5], 
                         D[6], D[7], D[8], D[9], D[10], D[11], 
                         D[12], D[13], D[14], D[15], D[16], D[17], 
                         D[18], D[19], D[20], D[21], D[22], D[23],
                         D[24], D[25], D[26], D[27], D[28], D[29],
                         D[30], D[31], momento])
    return pd.DataFrame(features, columns=['mfcc', 'scem','scom', 'srom','sbwm', 'tempo', 'rmse', 
                                           'Fourier1', 'Fourier2', 'Fourier3', 'Fourier4', 'Fourier5', 
                                           'Fourier6', 'Fourier7', 'Fourier8', 'Fourier9', 'Fourier10',
                                           'Fourier11', 'Fourier12', 'Fourier13', 'Fourier14', 'Fourier15',
                                           'Fourier16', 'Fourier17', 'Fourier18', 'Fourier19', 'Fourier20',
                                           'Fourier21', 'Fourier22', 'Fourier23', 'Fourier24', 'Fourier25',
                                           'Fourier26', 'Fourier27', 'Fourier28', 'Fourier29', 'Fourier30',
                                           'Fourier31', 'Fourier32', 'momento'])

prueba = get_features('./audios/predicciones_fourier_32/', glob('./audios/predicciones_fourier_32' + '/*.wav'))

In [36]:
prueba.to_csv('data_fourier_aux.csv', index=False)

In [36]:
prueba = pd.read_csv('data_fourier_aux.csv')
prueba = prueba.drop(['Fourier18', 'Fourier19', 'Fourier20', 'Fourier21', 'Fourier22', 'Fourier23', 
                      'Fourier24', 'Fourier25', 'Fourier26', 'Fourier27', 'Fourier28', 'Fourier29', 
                      'Fourier30', 'Fourier31', 'Fourier32'], axis=1)
prueba.head()

Unnamed: 0,mfcc,scem,scom,srom,sbwm,tempo,rmse,Fourier1,Fourier2,Fourier3,...,Fourier9,Fourier10,Fourier11,Fourier12,Fourier13,Fourier14,Fourier15,Fourier16,Fourier17,momento
0,-14.789252,1307.71233,22.329178,2490.112305,941.869915,117.1875,0.029125,0.105356,0.059353,0.475078,...,0.040705,0.091964,0.039185,0.020593,0.004712,0.016715,0.037124,0.032305,0.000508,Lavadora
1,-2.996813,1181.177661,33.464148,3242.1875,1190.281966,120.0,0.085731,0.121791,0.494262,1.742387,...,0.159545,0.066197,0.076614,0.127871,0.2264,0.566957,0.158344,0.153017,0.11958,Microondas
2,-2.807277,1409.495596,23.275813,2779.785156,1055.763228,133.928571,0.096703,0.77667,0.482565,0.618999,...,0.057,0.043657,0.237582,0.12624,0.191609,0.238118,0.230985,0.052054,6.5e-05,Aspiradora
3,-16.530776,707.460976,23.874835,747.314453,440.637551,133.928571,0.017683,0.101806,0.120892,0.529542,...,0.029982,0.02437,0.023364,0.019847,0.018202,0.015442,0.015859,0.016363,0.011732,Timbre
4,-4.882842,1925.234669,19.538132,3061.645508,968.023202,133.928571,0.038148,0.09655,0.043815,0.224315,...,0.290234,0.200003,0.171583,0.15543,0.123371,0.076988,0.130435,0.03073,0.043009,Ducha


En este paso intermedio, dado que hay una pequeña cantidad de audios cuyas features son todas cero, los elimino para no ensuciar el modelo.

In [37]:
prueba = prueba[prueba['Fourier15'] != 0].reset_index(drop=True)
prueba.head()

Unnamed: 0,mfcc,scem,scom,srom,sbwm,tempo,rmse,Fourier1,Fourier2,Fourier3,...,Fourier9,Fourier10,Fourier11,Fourier12,Fourier13,Fourier14,Fourier15,Fourier16,Fourier17,momento
0,-14.789252,1307.71233,22.329178,2490.112305,941.869915,117.1875,0.029125,0.105356,0.059353,0.475078,...,0.040705,0.091964,0.039185,0.020593,0.004712,0.016715,0.037124,0.032305,0.000508,Lavadora
1,-2.996813,1181.177661,33.464148,3242.1875,1190.281966,120.0,0.085731,0.121791,0.494262,1.742387,...,0.159545,0.066197,0.076614,0.127871,0.2264,0.566957,0.158344,0.153017,0.11958,Microondas
2,-2.807277,1409.495596,23.275813,2779.785156,1055.763228,133.928571,0.096703,0.77667,0.482565,0.618999,...,0.057,0.043657,0.237582,0.12624,0.191609,0.238118,0.230985,0.052054,6.5e-05,Aspiradora
3,-16.530776,707.460976,23.874835,747.314453,440.637551,133.928571,0.017683,0.101806,0.120892,0.529542,...,0.029982,0.02437,0.023364,0.019847,0.018202,0.015442,0.015859,0.016363,0.011732,Timbre
4,-4.882842,1925.234669,19.538132,3061.645508,968.023202,133.928571,0.038148,0.09655,0.043815,0.224315,...,0.290234,0.200003,0.171583,0.15543,0.123371,0.076988,0.130435,0.03073,0.043009,Ducha


In [38]:
classes = ['Afeitadora', 'Aire_acondicionado', 'Alarma', 'Aspiradora', 'Cena', 'Ducha', 'Grifo', 'Lavadora', 
           'Licuadora', 'Microondas', 'Secador_pelo', 'Secadora', 'Silencio', 'Tecleo', 'Telefono', 'Timbre']

X_new = prueba.loc[:, prueba.columns != 'momento']
y_new = loaded_model.predict(X_new)
y_new2 = loaded_model.predict_proba(X_new)

reconocidos = 0
aciertos = 0
for i in range(len(X_new)):
    predicted_list = y_new[i].tolist()
    predicted_class = classes[y_new2[i].tolist().index(max(y_new2[i]))]
    predicted_probability = y_new2[i].tolist()[y_new2[i].tolist().index(max(y_new2[i]))]
    if max(y_new2[i]) > 0:
        if predicted_class == prueba.momento[i]:
            #print(str(i) + '-  {}: {:.2f}% ----> {}'.format(predicted_class, predicted_probability*100, prueba.momento[i]))
            aciertos += 1
            reconocidos += 1
        else:
            #print(str(i) + '-  {}: {:.2f}% ----> {}'.format(predicted_class, predicted_probability*100, prueba.momento[i]))
            reconocidos += 1


print('Porcentaje de sonidos reconocidos: {:.2f}%.'.format(reconocidos/(len(X_new))*100))
print('Porcentaje de aciertos: {:.2f}%.'.format(aciertos/(len(X_new))*100))
print('Porcentaje de aciertos una vez reconoce el sonido: {:.2f}%.'.format(aciertos/reconocidos*100))

Porcentaje de sonidos reconocidos: 100.00%.
Porcentaje de aciertos: 53.64%.
Porcentaje de aciertos una vez reconoce el sonido: 53.64%.


# Predicciones con el modelo PCA de 10 features

Hago lo mismo que antes pero con el nuevo modelo sobre el que hemos aplicado el PCA.

In [44]:
prueba = pd.read_csv('data_fourier_aux.csv')
prueba = prueba.drop(['Fourier18', 'Fourier19', 'Fourier20', 'Fourier21', 'Fourier22', 'Fourier23', 
                      'Fourier24', 'Fourier25', 'Fourier26', 'Fourier27', 'Fourier28', 'Fourier29', 
                      'Fourier30', 'Fourier31', 'Fourier32'], axis=1)
prueba.head()

Unnamed: 0,mfcc,scem,scom,srom,sbwm,tempo,rmse,Fourier1,Fourier2,Fourier3,...,Fourier9,Fourier10,Fourier11,Fourier12,Fourier13,Fourier14,Fourier15,Fourier16,Fourier17,momento
0,-14.789252,1307.71233,22.329178,2490.112305,941.869915,117.1875,0.029125,0.105356,0.059353,0.475078,...,0.040705,0.091964,0.039185,0.020593,0.004712,0.016715,0.037124,0.032305,0.000508,Lavadora
1,-2.996813,1181.177661,33.464148,3242.1875,1190.281966,120.0,0.085731,0.121791,0.494262,1.742387,...,0.159545,0.066197,0.076614,0.127871,0.2264,0.566957,0.158344,0.153017,0.11958,Microondas
2,-2.807277,1409.495596,23.275813,2779.785156,1055.763228,133.928571,0.096703,0.77667,0.482565,0.618999,...,0.057,0.043657,0.237582,0.12624,0.191609,0.238118,0.230985,0.052054,6.5e-05,Aspiradora
3,-16.530776,707.460976,23.874835,747.314453,440.637551,133.928571,0.017683,0.101806,0.120892,0.529542,...,0.029982,0.02437,0.023364,0.019847,0.018202,0.015442,0.015859,0.016363,0.011732,Timbre
4,-4.882842,1925.234669,19.538132,3061.645508,968.023202,133.928571,0.038148,0.09655,0.043815,0.224315,...,0.290234,0.200003,0.171583,0.15543,0.123371,0.076988,0.130435,0.03073,0.043009,Ducha


In [45]:
filename = 'finalized_model_fourier_32_PCA.sav'

new_model = pickle.load(open(filename, 'rb'))

In [46]:
features = ['mfcc', 'scem','scom', 'srom','sbwm', 'tempo', 'rmse', 
            'Fourier1', 'Fourier2', 'Fourier3', 'Fourier4', 'Fourier5', 
            'Fourier6', 'Fourier7', 'Fourier8', 'Fourier9', 'Fourier10',
            'Fourier11', 'Fourier12', 'Fourier13', 'Fourier14', 'Fourier15',
            'Fourier16', 'Fourier17']

x = prueba.loc[:, features].values
y = prueba.loc[:,['momento']].values

x = StandardScaler().fit_transform(x)

In [47]:
pca = PCA(n_components=10)
principalComponents = pca.fit_transform(x)
principalDf = pd.DataFrame(data = principalComponents, 
                           columns = ['principal component 1',
                                      'principal component 2',
                                      'principal component 3',
                                      'principal component 4',
                                      'principal component 5',
                                      'principal component 6',
                                      'principal component 7',
                                      'principal component 8',
                                      'principal component 9',
                                      'principal component 10'])

principalDf.head()

Unnamed: 0,principal component 1,principal component 2,principal component 3,principal component 4,principal component 5,principal component 6,principal component 7,principal component 8,principal component 9,principal component 10
0,-1.358214,-0.760734,-0.365134,-0.705568,0.239484,-0.665049,-0.110475,0.502446,0.238647,-0.14196
1,2.815272,-1.059408,-1.197286,-0.154336,-0.055191,-1.07201,0.709188,-0.45688,-3.162241,-0.60496
2,0.512046,-0.904559,-0.820519,0.172917,0.48048,0.076216,0.182237,0.409223,-1.071791,0.071799
3,-2.251986,0.986497,0.082845,0.233693,0.466527,-0.279016,0.032414,0.082645,-0.348303,0.419084
4,0.272917,-1.66829,0.113397,0.18834,0.183597,0.260434,0.144522,-0.041764,0.404198,-0.159114


In [48]:
principalDf.reset_index(drop=True, inplace=True)

aux = prueba[['momento']]
aux.reset_index(drop=True, inplace=True)

finalDf = pd.concat([principalDf, aux], axis = 1)
finalDf.head()

Unnamed: 0,principal component 1,principal component 2,principal component 3,principal component 4,principal component 5,principal component 6,principal component 7,principal component 8,principal component 9,principal component 10,momento
0,-1.358214,-0.760734,-0.365134,-0.705568,0.239484,-0.665049,-0.110475,0.502446,0.238647,-0.14196,Lavadora
1,2.815272,-1.059408,-1.197286,-0.154336,-0.055191,-1.07201,0.709188,-0.45688,-3.162241,-0.60496,Microondas
2,0.512046,-0.904559,-0.820519,0.172917,0.48048,0.076216,0.182237,0.409223,-1.071791,0.071799,Aspiradora
3,-2.251986,0.986497,0.082845,0.233693,0.466527,-0.279016,0.032414,0.082645,-0.348303,0.419084,Timbre
4,0.272917,-1.66829,0.113397,0.18834,0.183597,0.260434,0.144522,-0.041764,0.404198,-0.159114,Ducha


In [49]:
classes = ['Afeitadora', 'Aire_acondicionado', 'Alarma', 'Aspiradora', 'Cena', 'Ducha', 'Grifo', 'Lavadora', 
           'Licuadora', 'Microondas', 'Secador_pelo', 'Secadora', 'Silencio', 'Tecleo', 'Telefono', 'Timbre']

X_new = finalDf.loc[:, finalDf.columns != 'momento']
y_new = new_model.predict(X_new)
y_new2 = new_model.predict_proba(X_new)

reconocidos = 0
aciertos = 0
for i in range(len(X_new)):
    predicted_list = y_new[i].tolist()
    predicted_class = classes[y_new2[i].tolist().index(max(y_new2[i]))]
    predicted_probability = y_new2[i].tolist()[y_new2[i].tolist().index(max(y_new2[i]))]
    if max(y_new2[i]) > 0:
        if predicted_class == finalDf.momento[i]:
            #print(str(i) + '-  {}: {:.2f}% ----> {}'.format(predicted_class, predicted_probability*100, finalDf.momento[i]))
            aciertos += 1
            reconocidos += 1
        else:
            #print(str(i) + '-  {}: {:.2f}% ----> {}'.format(predicted_class, predicted_probability*100, finalDf.momento[i]))
            reconocidos += 1


print('Porcentaje de sonidos reconocidos: {:.2f}%.'.format(reconocidos/(len(X_new))*100))
print('Porcentaje de aciertos: {:.2f}%.'.format(aciertos/(len(X_new))*100))
print('Porcentaje de aciertos una vez reconoce el sonido: {:.2f}%.'.format(aciertos/reconocidos*100))

Porcentaje de sonidos reconocidos: 100.00%.
Porcentaje de aciertos: 6.55%.
Porcentaje de aciertos una vez reconoce el sonido: 6.55%.


# Conclusiones

Los resultados me sorprenden, dado que me esperaba que el modelo con el PCA me diese mejores resultados, dado que tenía un mejor score. Creo que podría deberse a que aunque son dos datasets de audios del mismo tipo, al ser sonidos diferentes, al aplicar el PCA de forma independiente me da unas relaciones y unos coeficientes diferentes.

Aparte de eso, el modelo es capaz de predecirme un 54% de los audios. Mi dataset consta de 4 tipos diferentes de sonidos para cada clase, por lo que no es un dataset muy rico, y mi dataset a predecir consta de 3 tipo diferentes para cada clase. Por lo que en base a esta escasa variedad de variables, los resultados considero que no son nada desdeñables.

Además, muchos de los sonidos que falla lo hace por gran similitud con otros, como el secador de pelo y la secadora, el grifo y la ducha, o la licuadora y la máquina de afeitar.

Como próximo paso, convendría enriquecer mi dataset.