In [1]:
import librosa
import glob
import numpy as np

import matplotlib.pyplot as plt
from IPython.display import Audio
from librosa.display import specshow

In [2]:
import os.path
from os import path

if not path.exists("speechcommands"):
    !wget http://download.tensorflow.org/data/speech_commands_v0.01.tar.gz
    !mkdir speechcommands
    !tar -xf speech_commands_v0.01.tar.gz -C speechcommands

In [3]:
# generamos lista con las rutas y nombres de archivo de los digitos
numbers_filenames = []
numbers_folders = ('zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine')
for i in numbers_folders:
    path = 'speechcommands/' + i + '/*.wav'
    numbers_filenames.append(glob.glob(path))

In [4]:
numbers_folders
numbers_filenames

[['speechcommands/zero/24befdb3_nohash_4.wav',
  'speechcommands/zero/3143fdff_nohash_0.wav',
  'speechcommands/zero/190821dc_nohash_4.wav',
  'speechcommands/zero/69086eb0_nohash_0.wav',
  'speechcommands/zero/65d14087_nohash_0.wav',
  'speechcommands/zero/b5d1e505_nohash_0.wav',
  'speechcommands/zero/24632875_nohash_0.wav',
  'speechcommands/zero/5af0ca83_nohash_1.wav',
  'speechcommands/zero/df280250_nohash_0.wav',
  'speechcommands/zero/3bfd30e6_nohash_3.wav',
  'speechcommands/zero/2579e514_nohash_1.wav',
  'speechcommands/zero/de6eb27c_nohash_0.wav',
  'speechcommands/zero/ad6a46f1_nohash_1.wav',
  'speechcommands/zero/5c39594f_nohash_3.wav',
  'speechcommands/zero/9aa21fa9_nohash_2.wav',
  'speechcommands/zero/1ecfb537_nohash_4.wav',
  'speechcommands/zero/da584bc0_nohash_1.wav',
  'speechcommands/zero/92a9c5e6_nohash_1.wav',
  'speechcommands/zero/7cf14c54_nohash_3.wav',
  'speechcommands/zero/b7a6f709_nohash_0.wav',
  'speechcommands/zero/d6360b32_nohash_1.wav',
  'speechcomm

In [5]:
def calculate_features(filename, n_mfcc=12, delta=True, deltadelta=True, energy=True, summary_fn = [np.mean, np.std], summary_names=['mean', 'std']):
  #Abro el archivo:
  x, sr = librosa.core.load(filename,sr=None)
  
  #Calculo MFCCs
  features = librosa.feature.mfcc(x,sr=sr,n_mfcc=n_mfcc)

  #Calculo energia:
  if energy:
    energy = librosa.feature.rmse(x)
    features = np.concatenate([features,energy])

  #Aplico media y desvio estandar por defecto
  summary_features = np.concatenate([fn(features,axis=1) for fn in summary_fn])
  
  #Lo mismo con los delta
  if delta:
    deltafeatures = np.diff(features)
    summary_features = np.concatenate([summary_features,np.concatenate([fn(deltafeatures,axis=1) for fn in summary_fn])])
  
  #Y con los delta de segundo orden
  if deltadelta:
    deltadeltafeatures = np.diff(features,n=2)
    summary_features = np.concatenate([summary_features,np.concatenate([fn(deltadeltafeatures,axis=1) for fn in summary_fn])]) 
  
  summary_features = np.append(summary_features, [filename.split('/')[1], filename])

  return summary_features

In [6]:
def name_features(filename = '', n_mfcc=12, delta=True, deltadelta=True, energy=True, summary_fn = [np.mean, np.std], summary_names=['mean', 'std']):
    feat_names = ['mfcc_{}'.format(i) for i in range(n_mfcc)]
    feat_names = ['{}_{}'.format(name_i,summ_i) for summ_i in summary_names for name_i in feat_names]
    d_names = ['d{}'.format(name) for name in feat_names]
    dd_names = ['dd{}'.format(name) for name in feat_names]

    feat_names = feat_names + d_names + dd_names + ['digit', 'file']

    return feat_names

In [None]:
# guardamos la lista de los conjuntos de test y validación
# solo para archivos que referencian a dígitos
test_idxs = ['speechcommands/' + e for e in open('speechcommands/testing_list.txt','r').read().splitlines() if e[:e.find('/')] in numbers_folders]
valid_idxs = ['speechcommands/' + e for e in open('speechcommands/validation_list.txt','r').read().splitlines() if e[:e.find('/')] in numbers_folders]

train_idxs = [e for n in range(10) for e in numbers_filenames[n] if (e not in test_idxs) and (e not in valid_idxs)]

print('train_count' , len(train_idxs))
print('test_count' , len(test_idxs))
print('valid_count' , len(valid_idxs))

In [None]:
# como siempre calculamos las mismas fatures, sus nombres los guardo una sola vez
feat_test = [calculate_features(x) for x in test_idxs]
feat_valid = [calculate_features(x) for x in valid_idxs]
feat_train = [calculate_features(x) for x in train_idxs]
feat_names = name_features()

print(feat_names)