<a href="https://colab.research.google.com/github/francotestori/aauba_02/blob/master/Armado_de_datos.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Trabajo Practico 2

## Armado del dataset

In [None]:
!wget http://download.tensorflow.org/data/speech_commands_v0.01.tar.gz
!mkdir speechcommands
!tar -xf speech_commands_v0.01.tar.gz -C /content/speechcommands

In [20]:
# Import de Librerías
import librosa
import glob
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
from IPython.display import Audio
from librosa.display import specshow

In [21]:
# Busco y guardo en un mapa todos los audio files correspondientes a los digit_keys
digit_command_keys = [
  'zero',                    
  'one',
  'two',
  'three',
  'four',
  'five',
  'six',
  'seven',
  'eight',
  'nine'
]

audio_files = {}

for key in digit_command_keys:
    directory = f'speechcommands/{key}/*.wav'
    audio_files[key] = glob.glob(directory)

In [22]:
# Utilizamos la función propuesta en el TP 
# para hacer la extracción de los datos 
# correspondientes a cada audio file

def calculate_features(
        filename,
        n_mfcc=12,
        delta=True,
        deltadelta=True,
        energy=True, 
        summary_fn = [np.mean, np.std], 
        summary_names=['mean','std']):
    #Abro el archivo:
    x, sr = librosa.core.load(filename,sr=None)

    #Calculo MFCCs
    features = librosa.feature.mfcc(x,sr=sr,n_mfcc=n_mfcc)
    feat_names = ['mfcc_{}'.format(i) for i in range(n_mfcc)]
    #Calculo energia:
    if energy:
        energy = librosa.feature.rms(x)
        features = np.concatenate([features,energy])
        feat_names = feat_names + ['energy']
    #Aplico media y desvio estandar por defecto
    summary_features = np.concatenate([fn(features,axis=1) for fn in summary_fn])
    feat_names = ['{}_{}'.format(name_i,summ_i) for summ_i in summary_names for name_i in feat_names]

    #Lo mismo con los delta
    if delta:
        deltafeatures = np.diff(features)
        summary_features = np.concatenate([summary_features,np.concatenate([fn(deltafeatures,axis=1) for fn in summary_fn])])
        d_names = ['d{}'.format(name) for name in feat_names]
    else:
        d_names = []

    #Y con los delta de segundo orden
    if deltadelta:
        deltadeltafeatures = np.diff(features,n=2)
        summary_features = np.concatenate([summary_features,np.concatenate([fn(deltadeltafeatures,axis=1) for fn in summary_fn])]) 
        dd_names = ['dd{}'.format(name) for name in feat_names]
    else:
        dd_names = []

    feat_names = feat_names + d_names + dd_names

    return summary_features, feat_names  

In [23]:
audio_df = pd.DataFrame()
k=0

for key in audio_files:
    print(key)
    j=0
    for audio in audio_files[key]:
        feat, names = calculate_features(audio)
        audio_row = dict(zip(names, feat))
        audio_row['digit'] = key
        audio_row['filename'] = audio
        audio_df = audio_df.append(audio_row, ignore_index=True)
        k+=1
        j+=1
    print(f'Procesados {j} audios en {key}.')
print(f'Procesados {k} audios.')

zero
Procesados 2376 audios en zero.
one
Procesados 2370 audios en one.
two
Procesados 2373 audios en two.
three
Procesados 2356 audios en three.
four
Procesados 2372 audios en four.
five
Procesados 2357 audios en five.
six
Procesados 2369 audios en six.
seven
Procesados 2377 audios en seven.
eight
Procesados 2352 audios en eight.
nine
Procesados 2364 audios en nine.
Procesados 23666 audios.


In [27]:
audio_df.head()

Unnamed: 0,ddenergy_mean,ddenergy_std,ddmfcc_0_mean,ddmfcc_0_std,ddmfcc_10_mean,ddmfcc_10_std,ddmfcc_11_mean,ddmfcc_11_std,ddmfcc_1_mean,ddmfcc_1_std,ddmfcc_2_mean,ddmfcc_2_std,ddmfcc_3_mean,ddmfcc_3_std,ddmfcc_4_mean,ddmfcc_4_std,ddmfcc_5_mean,ddmfcc_5_std,ddmfcc_6_mean,ddmfcc_6_std,ddmfcc_7_mean,ddmfcc_7_std,ddmfcc_8_mean,ddmfcc_8_std,ddmfcc_9_mean,ddmfcc_9_std,denergy_mean,denergy_std,digit,dmfcc_0_mean,dmfcc_0_std,dmfcc_10_mean,dmfcc_10_std,dmfcc_11_mean,dmfcc_11_std,dmfcc_1_mean,dmfcc_1_std,dmfcc_2_mean,dmfcc_2_std,dmfcc_3_mean,dmfcc_3_std,dmfcc_4_mean,dmfcc_4_std,dmfcc_5_mean,dmfcc_5_std,dmfcc_6_mean,dmfcc_6_std,dmfcc_7_mean,dmfcc_7_std,dmfcc_8_mean,dmfcc_8_std,dmfcc_9_mean,dmfcc_9_std,energy_mean,energy_std,filename,mfcc_0_mean,mfcc_0_std,mfcc_10_mean,mfcc_10_std,mfcc_11_mean,mfcc_11_std,mfcc_1_mean,mfcc_1_std,mfcc_2_mean,mfcc_2_std,mfcc_3_mean,mfcc_3_std,mfcc_4_mean,mfcc_4_std,mfcc_5_mean,mfcc_5_std,mfcc_6_mean,mfcc_6_std,mfcc_7_mean,mfcc_7_std,mfcc_8_mean,mfcc_8_std,mfcc_9_mean,mfcc_9_std
0,-4e-05,0.004047,-0.087577,14.439054,0.289443,8.461383,0.001,4.527796,-0.276688,10.856913,-0.018697,12.444738,0.172892,11.896126,0.166432,5.956282,0.098696,7.476521,0.007125,7.080504,0.158096,6.999523,0.118771,7.826736,0.112117,7.799444,5e-06,0.007348,zero,0.387241,26.726141,-0.325877,8.762013,-0.017884,4.282867,0.13981,16.173223,0.345507,16.423527,0.282263,14.687464,-0.023132,8.464055,-0.226542,6.99812,-0.206619,6.600952,-0.088103,6.912228,-0.126719,9.370098,-0.140965,7.608357,0.031405,0.030842,speechcommands/zero/8dd788d8_nohash_1.wav,-310.37378,108.335783,-12.138142,19.095641,6.300064,8.229837,108.10026,39.959982,-35.916181,37.495254,33.898475,25.41483,-30.267377,24.366416,21.804664,9.490059,-8.092194,13.301543,3.688589,18.480796,-27.701459,21.117286,14.652777,10.857483
1,-0.000224,0.008262,-0.182528,21.594038,0.008355,5.840726,0.087771,6.301972,-0.118388,10.820153,0.116756,14.593893,0.024313,10.843247,-0.117824,11.666164,-0.197453,7.340812,-0.37394,6.868907,-0.158919,6.697412,-0.172334,8.373859,-0.085276,6.904384,0.000675,0.011127,zero,0.263953,28.657321,0.018793,4.308382,0.322682,6.00049,0.02333,11.932052,-0.045622,17.403189,0.146212,11.297503,0.428317,10.951434,0.323952,7.0769,0.058051,8.351322,0.008589,7.236734,0.045069,7.127188,0.114088,5.511203,0.076727,0.04016,speechcommands/zero/6021f08b_nohash_0.wav,-268.940979,96.166463,0.485951,5.873054,6.682188,7.696213,109.094888,30.126237,18.249464,43.767123,39.821188,17.850829,27.161642,16.96354,26.77513,16.508826,-2.063644,21.946652,13.449217,16.439935,-2.512277,9.671001,17.021364,9.597063
2,0.000331,0.039083,-1.393533,33.512079,0.523177,8.641218,-0.197498,7.290841,0.644651,17.501375,0.133704,10.075938,0.790281,13.438439,-0.305298,11.651066,0.161379,10.441262,-0.072442,8.470837,-0.12464,8.645921,-0.305004,7.428344,-0.25113,9.858792,-0.005057,0.055144,zero,-3.65791,42.929844,0.547587,8.974828,0.5206,5.924756,-3.630059,20.239011,-2.661199,14.608974,-0.66419,14.109088,-0.388859,12.218891,1.116912,9.438793,1.269988,6.818314,0.650308,6.981177,0.046947,7.233902,-0.135616,8.432754,0.284765,0.198115,speechcommands/zero/5aac2efa_nohash_0.wav,-162.838334,152.889283,-5.995101,20.488937,-3.80324,10.556037,89.749041,50.146729,-11.773584,51.632548,-5.650079,30.596622,-21.146136,27.040773,-21.872953,18.821318,-24.375052,14.214723,-16.363518,8.142725,-14.993416,13.274695,-7.609461,10.12022
3,-2.2e-05,0.010173,-0.647873,21.422757,0.12079,5.620986,0.082528,4.80506,-0.45497,13.847316,0.123681,13.42833,0.245653,12.344635,0.213345,10.39704,0.293587,9.086914,0.509202,10.586381,0.272438,7.377933,0.150039,8.711009,-0.05271,4.734111,1.2e-05,0.020158,zero,0.175276,36.334235,-0.401118,5.67776,-0.505302,3.918342,1.33199,19.071729,0.421617,19.291494,-1.422872,16.455475,-0.300709,11.134505,0.026611,7.565342,-0.378856,11.856558,-0.196623,7.215757,0.04218,8.890276,0.073596,5.559824,0.072842,0.081364,speechcommands/zero/5236848b_nohash_3.wav,-311.809058,148.300601,-3.725588,10.908234,2.602168,6.403918,77.403861,55.231457,-39.393476,59.282381,7.104459,41.064272,-11.155193,25.052952,2.627961,13.858085,-22.22977,34.307361,-17.583034,23.183467,-1.14207,15.329263,-4.42715,15.736575
4,1.8e-05,0.006413,-1.692699,32.384986,0.245513,7.514093,-0.288584,8.608589,0.924984,20.501827,-1.137884,14.799777,1.058543,19.254523,-0.381943,15.992294,0.522771,17.469219,-0.039162,8.12095,-0.1956,8.868016,0.202938,8.901982,-0.242765,9.514696,-0.000504,0.009387,zero,-3.979818,45.220099,0.690888,7.081722,-0.084041,7.558391,-3.602311,24.140624,-3.164172,27.211607,-1.517718,19.71828,-0.365183,14.9884,1.105055,12.18085,1.067176,7.460431,1.067015,8.206372,1.263468,8.144511,0.193232,8.174734,0.038358,0.037518,speechcommands/zero/90b0b91a_nohash_1.wav,-441.658706,196.571589,-21.428044,15.728746,-17.133335,16.071462,82.740887,66.848065,-16.4955,87.437274,8.089898,35.118191,-25.552193,33.694765,-17.319364,23.893936,-22.009776,26.446429,-19.578614,19.518883,-17.590691,20.786852,-7.017059,11.838037


In [24]:
# Guardamos nuestro dataframe en un csv
audio_df.to_csv(r'/content/audio_digits.csv')