In [67]:
import librosa
import glob
import numpy as np

import matplotlib.pyplot as plt
from IPython.display import Audio
from librosa.display import specshow
from time import time
from sklearn.naive_bayes import MultinomialNB, GaussianNB
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import pandas as pd
from sklearn.ensemble import RandomForestClassifier



#load utils.py

In [68]:
import os.path
from os import path

if not path.exists("speechcommands"):
    !wget http://download.tensorflow.org/data/speech_commands_v0.01.tar.gz
    !mkdir speechcommands
    !tar -xf speech_commands_v0.01.tar.gz -C speechcommands

In [69]:
# generamos lista con las rutas y nombres de archivo de los digitos
numbers_filenames = []

to_number =	{
  "zero": 0,
  "one": 1,
  "two": 3,
  "three": 3,
  "four": 4,
  "five": 5,
  "six": 6,
  "seven": 7,
  "eight": 8,
  "nine": 9
}

for i in to_number:
    path = 'speechcommands/' + i + '/*.wav'
    numbers_filenames.append(glob.glob(path))

# guardamos la lista de los conjuntos de test y validación
# solo para archivos que referencian a dígitos
test_filenames = ['speechcommands/' + e for e in open('speechcommands/testing_list.txt','r').read().splitlines() if e[:e.find('/')] in numbers_folders]
valid_filenames = ['speechcommands/' + e for e in open('speechcommands/validation_list.txt','r').read().splitlines() if e[:e.find('/')] in numbers_folders]
train_filenames = [e for n in range(10) for e in numbers_filenames[n] if (e not in test_filenames) and (e not in valid_filenames)]

print('train_count:' , len(train_filenames))
print('test_count:' , len(test_filenames))
print('valid_count:' , len(valid_filenames))

train_count: 18620
test_count: 2552
valid_count: 2494


In [70]:
def count_elapsed_time(f):
    """
    Decorator.
    Execute the function and calculate the elapsed time.
    Print the result to the standard output.
    """
    def wrapper(*args, **kwargs):
        # Start counting.
        start_time = time()
        # Take the original function's return value.
        ret = f(*args, **kwargs)
        # Calculate the elapsed time.
        elapsed_time = time() - start_time
        print("Elapsed time: %0.10f seconds." % elapsed_time)
        return ret

    return wrapper

In [71]:
def calculate_features(filename, n_mfcc=12, delta=True, deltadelta=True, energy=True, summary_fn = [np.mean, np.std], summary_names=['mean', 'std']):  
  #Abro el archivo:
  x, sr = librosa.core.load(filename,sr=None)
  
  #Calculo MFCCs
  features = librosa.feature.mfcc(x,sr=sr,n_mfcc=n_mfcc)

  #Calculo energia:
  if energy:
    energy = librosa.feature.rmse(x)
    features = np.concatenate([features,energy])

  #Aplico media y desvio estandar por defecto
  summary_features = np.concatenate([fn(features,axis=1) for fn in summary_fn])
    
  #Lo mismo con los delta
  if delta:
    deltafeatures = np.diff(features)
    summary_features = np.concatenate([summary_features,np.concatenate([fn(deltafeatures,axis=1) for fn in summary_fn])])

  #Y con los delta de segundo orden
  if deltadelta:
    deltadeltafeatures = np.diff(features,n=2)
    summary_features = np.concatenate([summary_features,np.concatenate([fn(deltadeltafeatures,axis=1) for fn in summary_fn])]) 
  
  summary_features = np.append(summary_features, [to_number[filename.split('/')[1]], filename])

  return summary_features

In [72]:
def name_features(filename = '', n_mfcc=12, delta=True, deltadelta=True, energy=True, summary_fn = [np.mean, np.std], summary_names=['mean', 'std']):
    feat_names = ['mfcc_{}'.format(i) for i in range(n_mfcc)]
    if energy: feat_names = feat_names + ['energy']
    feat_names = ['{}_{}'.format(name_i,summ_i) for summ_i in summary_names for name_i in feat_names]
    if delta: d_names = ['d{}'.format(name) for name in feat_names]
    if deltadelta: dd_names = ['dd{}'.format(name) for name in feat_names]

    feat_names = feat_names + d_names + dd_names + ['digit', 'file']

    return feat_names

In [73]:
from os import path
@count_elapsed_time
def calculate_features_if_needed(filenames, result_filename):
    result_path = "features/"+result_filename
    result = []
    if not path.exists(result_path):
        print("Calculating features for ", len(filenames), " rows -> ", result_path)
        features_names = name_features()
        features_data = [calculate_features(x) for x in filenames]
        pd.DataFrame(data = features_data, columns = features_names).to_csv(result_path)
    
    else:
        print("Loading saved features <- ", result_path)
        result = pd.read_csv(result_path)
    
    return result

##  Obtenemos features  y eliminamos las columnas innecesarias para la predicción

In [74]:
# Variables a excluir de la predicción
var_exclude = ['file', 'Unnamed: 0']

test_features = calculate_features_if_needed(test_filenames, "test_features.csv")
test_features.drop(var_exclude, axis=1, inplace=True)
test_features.head(10)
pd.crosstab(index=test_features["digit"], columns="count", normalize=True)

Loading saved features <-  features/test_features.csv
Elapsed time: 0.0408360958 seconds.


col_0,count
digit,Unnamed: 1_level_1
eight,0.100705
five,0.106191
four,0.099138
nine,0.101489
one,0.097179
seven,0.093652
six,0.095611
three,0.104624
two,0.103448
zero,0.097962


In [75]:
validaton_features = calculate_features_if_needed(valid_filenames, "valid_features.csv")
validaton_features.drop(var_exclude, axis=1, inplace=True)
validaton_features.head(10)
pd.crosstab(index=validaton_features["digit"], columns="count", normalize=True)

Loading saved features <-  features/valid_features.csv
Elapsed time: 0.0354580879 seconds.


col_0,count
digit,Unnamed: 1_level_1
eight,0.097434
five,0.097033
four,0.112269
nine,0.092221
one,0.092221
seven,0.105453
six,0.105052
three,0.099439
two,0.094627
zero,0.10425


In [None]:
train_features = calculate_features_if_needed(train_filenames, "train_features.csv")
train_features.drop(var_exclude, axis=1, inplace=True)
train_features.head(10)
pd.crosstab(index=train_features["digit"], columns="count", normalize=True)

Calculating features for  18620  rows ->  features/train_features.csv


# Modelos

## Naive Bayes

In [None]:
## Naive Bayes Modelo 1 -- Sin hiperparametros
naive_bayes_1 = GaussianNB()
naive_bayes_1.fit(train_features.drop('digit',axis=1).values, train_features.digit.values)
predict_nb1 = naive_bayes_1.predict(train_features.drop('digit',axis=1).values)

print("Accuracy training : {:.3f}".format(naive_bayes_1.score(train_features.drop('digit',axis=1).values, 
                                                               train_features.digit.values)))
print("Accuracy Validación: {:.3f}".format(naive_bayes_1.score(train_features.drop('digit',axis=1).values, 
                                                          train_features.digit.values)))

print(classification_report(validaton_features.digit.values, predict_nb1))

In [None]:
## Matriz de Confusión 
sns.set_context('talk')
sns.heatmap(confusion_matrix(validaton_features.digit.values, predict_nb1), annot=True, fmt='g')
#sns.set(font_scale=0.2) 
plt.xlabel('Predit')
plt.ylabel('True')

In [None]:
## Random Forest [Sin Hiperparametros]

In [None]:
random_forest1 = RandomForestClassifier(n_estimators=450, max_depth=10, bootstrap=False,  random_state=1)
#random_forest1 = RandomForestClassifier(random_state=1)
random_forest1.fit(train_features.drop('digitnumber',axis=1).values, train_features.digit.values)
predict_rf1 = random_forest1.predict(validation_features.drop('digit',axis=1).values)

print("Accuracy training : {:.3f}".format(random_forest1.score(train_features.drop('digit',axis=1).values, 
                                                               train_features.digitnumber.values)))
print("Accuracy Validación: {:.3f}".format(random_forest1.score(validation_features.drop('digit',axis=1).values, 
                                                          validation_features.digitnumber.values)))

print(classification_report(validaton_features.digit.values, predict_rf1))

## Probamos Hiperparametros

In [None]:
## Primero unir train & validación (Esto según lo que preguntaron en el documento pero aún no lo tengo claro)
to_hiper = train_features.append(validation_features, ignore_index=True)

#genero listo con datos a usar en split de validación u entrenamiento -1 para entrenar y 0 para validar
test_fold = list([-1] * len(train_features) + [0]*len(validation_features))

In [None]:
### Agrego hiperparametrización 

from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import ShuffleSplit
from sklearn.model_selection import cross_val_score, StratifiedShuffleSplit
from sklearn.model_selection import PredefinedSplit

kfoldcv = PredefinedSplit(test_fold)  ## Indices de Split predefinido de Validacion

parametros = {'n_estimators':range(100, 250, 15), 'max_depth':range(6, 12, 2), 'bootstrap':[True, False], 'random_state': range(0, 5, 1)}


clf = RandomizedSearchCV(RandomForestClassifier(), parametros, n_jobs=10, random_state=131313,    
                         scoring='accuracy', n_iter=3, cv = kfoldcv)

### Me genera muchas dudas la data con la que se fitea xq no deberían ser entrenamiento y validación
## con esto se está haciendo trampa
clf.fit(to_hiper.drop('digit',axis=1).values, to_hiper.digit.values)
rf = clf.best_estimator_  

print(clf.best_score_, clf.best_params_)
print("Accuracy training : {:.3f}".format(rf.score(train_features.drop('digit',axis=1).values, 
                                                   train_features.digitnumber.values)))
print("Accuracy Validación: {:.3f}".format(rf.score(validation_features.drop('digit',axis=1).values, 
                                                          validation_features.digitnumber.values)))

In [None]:
rf_mejor = RandomForestClassifier(random_state=4, n_estimators=175, max_depth=10, bootstrap=False)
rf_mejor.fit(train_features.drop('digit',axis=1).values, train_features.digit.values)

print("Accuracy training : {:.3f}".format(rf.score(train_features.drop('digitn',axis=1).values, 
                                                   train_features.digit.values)))
print("Accuracy Validación: {:.3f}".format(rf.score(validation_features.drop('digit',axis=1).values, 
                                                          validation_features.digit.values)))