## Import de librerias utilizadas

In [2]:
import librosa
import glob
import numpy as np
import seaborn as sns
import pandas as pd
import os.path
from os import path

import matplotlib.pyplot as plt

from enum import Enum
from time import time

from IPython.display import Audio
from librosa.display import specshow
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import RandomizedSearchCV
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier

## Funciones de utilidad para la generación de features

In [3]:
class AudioType(Enum):
  TRAIN= 1   
  VALIDATION= 2   
  TEST= 3   
  CUSTOM= 4   

In [4]:
class Feature:
  def __init__(self, filename, audio_type, noise_type=None):
    self.filename = filename
    self.noise_type = noise_type
    self.audio_type = audio_type

In [5]:
class Features(Enum):
    Train = Feature("train_features.csv",AudioType.TRAIN)
    Validation = Feature("valid_features.csv", AudioType.VALIDATION)
    Test = Feature("test_features.csv", AudioType.TEST) 
    Test_Noise_Gauss = Feature("test_features_gauss.csv",AudioType.TEST, "gauss")
    Test_Noise_Dishes = Feature("test_features_dishes.csv", AudioType.TEST, "doing_the_dishes.wav")
    Test_Noise_Dude = Feature("test_features_dude.csv", AudioType.TEST, "dude_miaowing.wav")
    Test_Noise_Bike = Feature("test_features_bike.csv", AudioType.TEST, "exercise_bike.wav")
    Test_Noise_Pink = Feature("test_features_noise.csv", AudioType.TEST, "pink_noise.wav")
    Test_Noise_Running = Feature("test_features_running.csv", AudioType.TEST, "running_tap.wav")            
    Test_Noise_White = Feature("test_features_white.csv", AudioType.TEST, "white_noise.wav")

In [6]:
audio_filenames= None

to_number = {
  "zero": 0,
  "one": 1,
  "two": 2,
  "three": 3,
  "four": 4,
  "five": 5,
  "six": 6,
  "seven": 7,
  "eight": 8,
  "nine": 9
}

def get_filenames():
    
    global audio_filenames
    
    if audio_filenames!= None:
        return audio_filenames
    
    import os.path
    from os import path
    if not path.exists("speechcommands"):
        !wget http://download.tensorflow.org/data/speech_commands_v0.01.tar.gz
        !mkdir speechcommands
        !tar -xf speech_commands_v0.01.tar.gz -C speechcommands
    
    numbers_filenames = []

    for i in to_number:
        path = 'speechcommands/' + i + '/*.wav'
        numbers_filenames.append(glob.glob(path))

    test_filenames = ['speechcommands/' + e for e in open('speechcommands/testing_list.txt','r').read().splitlines() if e[:e.find('/')] in to_number]
    valid_filenames = ['speechcommands/' + e for e in open('speechcommands/validation_list.txt','r').read().splitlines() if e[:e.find('/')] in to_number]
    train_filenames = [e for n in range(10) for e in numbers_filenames[n] if (e not in test_filenames) and (e not in valid_filenames)]
    
    print('train_count:' , len(train_filenames))
    print('test_count:' , len(test_filenames))
    print('valid_count:' , len(valid_filenames))
    
    audio_filenames = {
      AudioType.TRAIN: train_filenames,
      AudioType.TEST: test_filenames,
      AudioType.VALIDATION: valid_filenames  
    }
        
    return audio_filenames

In [7]:
def apply_noise(audio, noise_type):
    ruido = None
    if noise_type=="gauss":
        ruido = np.random.normal(size=audio.shape)
    else:
        ruido, sr = librosa.core.load('speechcommands/_background_noise_/'+noise_type, sr=None)
        ruido = 0.5*ruido[5000:5000+len(audio)]        
    
    return ruido + audio

In [8]:
def calculate_features(filename, n_mfcc=12, delta=True, deltadelta=True, energy=True, summary_fn = [np.mean, np.std],
                       summary_names=['mean', 'std'], noise_type=None):  
  #Abro el archivo:
  x, sr = librosa.core.load(filename,sr=None)
  
  if (noise_type != None):
        x = apply_noise(x, noise_type)

  #Calculo MFCCs
  features = librosa.feature.mfcc(x,sr=sr,n_mfcc=n_mfcc)

  #Calculo energia:
  if energy:
    energy = librosa.feature.rmse(x)
    features = np.concatenate([features,energy])

  #Aplico media y desvio estandar por defecto
  summary_features = np.concatenate([fn(features,axis=1) for fn in summary_fn])
    
  #Lo mismo con los delta
  if delta:
    deltafeatures = np.diff(features)
    summary_features = np.concatenate([summary_features,np.concatenate([fn(deltafeatures,axis=1) for fn in summary_fn])])

  #Y con los delta de segundo orden
  if deltadelta:
    deltadeltafeatures = np.diff(features,n=2)
    summary_features = np.concatenate([summary_features,np.concatenate([fn(deltadeltafeatures,axis=1) for fn in summary_fn])]) 
  
  summary_features = np.append(summary_features, [to_number[filename.split('/')[1]], filename])

  return summary_features

In [9]:
features_names= None
def name_features(filename = '', n_mfcc=12, delta=True, deltadelta=True, energy=True, summary_fn = [np.mean, np.std], summary_names=['mean', 'std']):
    global features_names
    
    if features_names!= None:
        return features_names
    
    features_names = ['mfcc_{}'.format(i) for i in range(n_mfcc)]
    if energy: features_names = features_names + ['energy']
    features_names = ['{}_{}'.format(name_i,summ_i) for summ_i in summary_names for name_i in features_names]
    if delta: d_names = ['d{}'.format(name) for name in features_names]
    if deltadelta: dd_names = ['dd{}'.format(name) for name in features_names]

    features_names = features_names + d_names + dd_names + ['digit', 'file']

    return features_names

In [10]:
def calculate_features_if_needed(feature):
    result_path = "features/"+feature.filename
    result = []
    if not path.exists(result_path):
        print("Getting filenames")
        filenames = get_filenames()[feature.audio_type]        
        
        print("Calculating features for ", len(filenames), " rows -> ", result_path)
        features_names = name_features()
        features_data = [calculate_features(filename, noise_type= feature.noise_type) for filename in filenames]
        pd.DataFrame(data = features_data, columns = features_names).to_csv(result_path)
        
    
    print("Loading saved features <- ", result_path)
    result = pd.read_csv(result_path)
    
    return result

In [11]:
def load_features(feature):
    features = pd.DataFrame(calculate_features_if_needed(feature))
    features.drop(['file', 'Unnamed: 0'], axis=1, inplace=True)
    print(pd.crosstab(index=features["digit"], columns="count", normalize=True))
    return features

##  Obtenemos features

### Descargamos los features guardados previamente

In [25]:
if not path.exists("features/test_features.csv"):
    !wget https://raw.githubusercontent.com/dhruszecki/cdatos-AA-TP2/master/features/test_features.csv -P features
    
if not path.exists("features/train_features.csv"):        
    !wget https://raw.githubusercontent.com/dhruszecki/cdatos-AA-TP2/master/features/train_features.csv -P features
        
if not path.exists("features/valid_features.csv"):        
    !wget https://raw.githubusercontent.com/dhruszecki/cdatos-AA-TP2/master/features/valid_features.csv -P features

if not path.exists("features/test_features_gauss.csv"):
    !wget https://raw.githubusercontent.com/dhruszecki/cdatos-AA-TP2/master/features/test_features_gaus.csv -P features

if not path.exists("features/test_features_dude.csv"):
    !wget https://raw.githubusercontent.com/dhruszecki/cdatos-AA-TP2/master/features/test_features_dude.csv -P features

if not path.exists("features/test_features_bike.csv"):
    !wget https://raw.githubusercontent.com/dhruszecki/cdatos-AA-TP2/master/features/test_features_bike.csv -P features

if not path.exists("features/test_features_pink.csv"):
    !wget https://raw.githubusercontent.com/dhruszecki/cdatos-AA-TP2/master/features/test_features_pink.csv -P features

if not path.exists("features/test_features_running.csv"):
    !wget https://raw.githubusercontent.com/dhruszecki/cdatos-AA-TP2/master/features/test_features_running.csv -P features

if not path.exists("features/test_features_white.csv"):
    !wget https://raw.githubusercontent.com/dhruszecki/cdatos-AA-TP2/master/features/test_features_white.csv -P features


--2020-07-09 15:43:13--  https://raw.githubusercontent.com/dhruszecki/cdatos-AA-TP2/master/features/test_features_pink.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.216.133
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.216.133|:443... connected.
HTTP request sent, awaiting response... 404 Not Found
2020-07-09 15:43:13 ERROR 404: Not Found.



### Features de entrenamiento

In [12]:
%%time
train_features = load_features(Features.Train.value)

Loading saved features <-  features/train_features.csv
col_0     count
digit          
0      0.100215
1      0.101611
2      0.100591
3      0.098872
4      0.098765
5      0.099033
6      0.100054
7      0.100698
8      0.099463
9      0.100698
CPU times: user 231 ms, sys: 27.7 ms, total: 259 ms
Wall time: 257 ms


### Features de validación

In [13]:
%%time
validation_features = load_features(Features.Validation.value)

Loading saved features <-  features/valid_features.csv
col_0     count
digit          
0      0.104250
1      0.092221
2      0.094627
3      0.099439
4      0.112269
5      0.097033
6      0.105052
7      0.105453
8      0.097434
9      0.092221
CPU times: user 79.6 ms, sys: 3.19 ms, total: 82.8 ms
Wall time: 81.7 ms


### Features de Desarrollo

In [14]:
develop_features = validation_features.append(train_features, ignore_index=True)

### Features de test

In [15]:
#### Test Originales

In [16]:
%%time
test_features = load_features(Features.Test.value)

Loading saved features <-  features/test_features.csv
col_0     count
digit          
0      0.097962
1      0.097179
2      0.103448
3      0.104624
4      0.099138
5      0.106191
6      0.095611
7      0.093652
8      0.100705
9      0.101489
CPU times: user 50 ms, sys: 9.9 ms, total: 59.9 ms
Wall time: 57.8 ms


#### Test con ruido Gausiano

In [17]:
%%time
test_features_gauss = load_features(Features.Test_Noise_Gauss.value)

Getting filenames
train_count: 18620
test_count: 2552
valid_count: 2494
Calculating features for  2552  rows ->  features/test_features_gauss.csv
Loading saved features <-  features/test_features_gauss.csv
col_0     count
digit          
0      0.097962
1      0.097179
2      0.103448
3      0.104624
4      0.099138
5      0.106191
6      0.095611
7      0.093652
8      0.100705
9      0.101489
CPU times: user 42.5 s, sys: 41.8 s, total: 1min 24s
Wall time: 2min 29s


#### Test con ruido `doing_the_dishes`

In [18]:
%%time
test_features_dishes = load_features(Features.Test_Noise_Dishes.value)

Getting filenames
Calculating features for  2552  rows ->  features/test_features_dishes.csv
Loading saved features <-  features/test_features_dishes.csv
col_0     count
digit          
0      0.097962
1      0.097179
2      0.103448
3      0.104624
4      0.099138
5      0.106191
6      0.095611
7      0.093652
8      0.100705
9      0.101489
CPU times: user 1min 55s, sys: 2min, total: 3min 55s
Wall time: 7min 43s


#### Test con ruido `dude_miaowing`

In [19]:
%%time
test_features_dude = load_features(Features.Test_Noise_Dude.value)

Getting filenames
Calculating features for  2552  rows ->  features/test_features_dude.csv
Loading saved features <-  features/test_features_dude.csv
col_0     count
digit          
0      0.097962
1      0.097179
2      0.103448
3      0.104624
4      0.099138
5      0.106191
6      0.095611
7      0.093652
8      0.100705
9      0.101489
CPU times: user 1min 40s, sys: 1min 59s, total: 3min 39s
Wall time: 7min 26s


#### Test con ruido `exercise_bike`

In [20]:
%%time
test_features_bike = load_features(Features.Test_Noise_Bike.value)

Getting filenames
Calculating features for  2552  rows ->  features/test_features_bike.csv
Loading saved features <-  features/test_features_bike.csv
col_0     count
digit          
0      0.097962
1      0.097179
2      0.103448
3      0.104624
4      0.099138
5      0.106191
6      0.095611
7      0.093652
8      0.100705
9      0.101489
CPU times: user 1min 38s, sys: 1min 59s, total: 3min 37s
Wall time: 7min 11s


#### Test con ruido `pink_noise`

In [21]:
%%time
test_features_pink = load_features(Features.Test_Noise_Pink.value)

Getting filenames
Calculating features for  2552  rows ->  features/test_features_noise.csv
Loading saved features <-  features/test_features_noise.csv
col_0     count
digit          
0      0.097962
1      0.097179
2      0.103448
3      0.104624
4      0.099138
5      0.106191
6      0.095611
7      0.093652
8      0.100705
9      0.101489
CPU times: user 1min 38s, sys: 1min 57s, total: 3min 35s
Wall time: 7min 21s


#### Test con ruido `running_tap`

In [22]:
%%time
test_features_running = load_features(Features.Test_Noise_Running.value)

Getting filenames
Calculating features for  2552  rows ->  features/test_features_running.csv
Loading saved features <-  features/test_features_running.csv
col_0     count
digit          
0      0.097962
1      0.097179
2      0.103448
3      0.104624
4      0.099138
5      0.106191
6      0.095611
7      0.093652
8      0.100705
9      0.101489
CPU times: user 1min 34s, sys: 1min 52s, total: 3min 27s
Wall time: 6min 37s


#### Test con ruido `white_noise`

In [23]:
%%time
test_features_white = load_features(Features.Test_Noise_White.value)

Getting filenames
Calculating features for  2552  rows ->  features/test_features_white.csv
Loading saved features <-  features/test_features_white.csv
col_0     count
digit          
0      0.097962
1      0.097179
2      0.103448
3      0.104624
4      0.099138
5      0.106191
6      0.095611
7      0.093652
8      0.100705
9      0.101489
CPU times: user 1min 37s, sys: 1min 56s, total: 3min 33s
Wall time: 7min 18s


# Modelos

## Naive Bayes

In [None]:
%%time

naive_bayes_1 = GaussianNB()
naive_bayes_1.fit(train_features.drop('digit',axis=1).values, train_features.digit.values)
predict_nb1 = naive_bayes_1.predict(validation_features.drop('digit',axis=1).values)

print("Accuracy training : {:.3f}".format(naive_bayes_1.score(train_features.drop('digit',axis=1).values, 
                                                               train_features.digit.values)))
print("Accuracy Validación: {:.3f}".format(naive_bayes_1.score(validation_features.drop('digit',axis=1).values, 
                                                          validation_features.digit.values)))

print(classification_report(validation_features.digit.values, predict_nb1))

###  Matriz de Confusión 

In [None]:
%%time

sns.set_context('paper')
sns.heatmap(confusion_matrix(validation_features.digit.values, predict_nb1), annot=True, fmt='g')
plt.xlabel('Predict')
plt.ylabel('True')

## Random Forest:  búsqueda de hiperparámetros óptimos

In [None]:
%%time

parametros = {'n_estimators':range(100, 250, 15), 'max_depth':range(6, 12, 2), 'bootstrap':[True, False]}

clf = RandomizedSearchCV(RandomForestClassifier(random_state=22), parametros, n_jobs=20, random_state=131313,    
                         scoring='accuracy', n_iter=30, cv =  [(slice(None), slice(None))])

clf.fit(train_features.drop('digit',axis=1).values, train_features.digit.values)
rf = clf.best_estimator_  

print(clf.best_score_, clf.best_params_)
print("Accuracy training : {:.3f}".format(rf.score(train_features.drop('digit',axis=1).values, 
                                                   train_features.digit.values)))
print("Accuracy Validación: {:.3f}".format(rf.score(validation_features.drop('digit',axis=1).values, 
                                                          validation_features.digit.values)))

###  Matriz de Confusión 

In [None]:
rf_pred = rf.predict(validation_features.drop('digit',axis=1).values)
sns.set_context('paper')
sns.heatmap(confusion_matrix(validation_features.digit.values, rf_pred), annot=True, fmt='g')
plt.xlabel('Predict')
plt.ylabel('True')

## Gradient Boosting

In [None]:
%%time

parameters = {'n_estimators':range(50, 150, 25), 'max_depth':range(5,7), 'learning_rate':np.arange(0,1,0.1)}
clf = RandomizedSearchCV(GradientBoostingClassifier(random_state=22), parameters, n_jobs=30, scoring='accuracy',cv=[(slice(None), slice(None))], n_iter=50, random_state=8)

clf.fit(train_features.drop('digit',axis=1).values, train_features.digit.values)
gb = clf.best_estimator_

print (clf.best_score_, clf.best_params_)
print("Accuracy training : {:.3f}".format(gb.score(train_features.drop('digit',axis=1).values, 
                                                   train_features.digit.values))) 
print("Accuracy validation: {:.3f}".format(gb.score(validation_features.drop('digit',axis=1).values, 
                                                   validation_features.digit.values)))

###  Matriz de Confusión 

In [None]:
gb_pred = gb.predict(validation_features.drop('digit',axis=1).values)
sns.set_context('paper')
sns.heatmap(confusion_matrix(validation_features.digit.values, gb_pred), annot=True, fmt='g')
plt.xlabel('Predict')
plt.ylabel('True')

In [None]:
%%time
# Variables a excluir de la predicción
var_exclude = ['file', 'Unnamed: 0']

test_features_g = pd.DataFrame(calculate_features_if_needed("test_features_ruidog.csv", 1))
test_features_g.drop(var_exclude, axis=1, inplace=True)
test_features_g.head(10)
pd.crosstab(index=test_features_g["digit"], columns="count", normalize=True)

In [None]:
%%time

naive_bayes_1 = GaussianNB()
naive_bayes_1.fit(develop_features.drop('digit',axis=1).values, develop_features.digit.values)

predict_nb1_test = naive_bayes_1.predict(test_features.drop('digit',axis=1).values)

#predict_nb1_g = naive_bayes_1.predict(test_features_g.drop('digit',axis=1).values)

# TODO: agregar resto de ruidosos + nuestros


print("Accuracy Develop : {:.3f}".format(naive_bayes_1.score(develop_features.drop('digit',axis=1).values, 
                                                               develop_features.digit.values)))
print("Accuracy Test: {:.3f}".format(naive_bayes_1.score(test_features.drop('digit',axis=1).values, 
                                                          test_features.digit.values)))


#print("Accuracy Test+G: {:.3f}".format(naive_bayes_1.score(test_features_g.drop('digit',axis=1).values, 
#                                                          test_features_g.digit.values)))


print(classification_report(test_features.digit.values, predict_nb1_test))

In [None]:
%%time

rf.fit(develop_features.drop('digit',axis=1).values, develop_features.digit.values)
#rf_pred_test = rf.predict(test_features.drop('digit',axis=1).values)
#rf_pred_test_g = rf.predict(test_features_g.drop('digit',axis=1).values)

print("Accuracy Develop : {:.3f}".format(rf.score(develop_features.drop('digit',axis=1).values, 
                                                   develop_features.digit.values)))
print("Accuracy Test: {:.3f}".format(rf.score(test_features.drop('digit',axis=1).values, 
                                                          test_features.digit.values)))

In [None]:
%%time

gb.fit(develop_features.drop('digit',axis=1).values, develop_features.digit.values)

gb_pred_test = gb.predict(test_features.drop('digit',axis=1).values)

print("Accuracy Develop : {:.3f}".format(gb.score(develop_features.drop('digit',axis=1).values, 
                                                   develop_features.digit.values)))
print("Accuracy Test: {:.3f}".format(gb.score(test_features.drop('digit',axis=1).values, 
                                                          test_features.digit.values)))