In [2]:
import librosa
import glob
import numpy as np

import matplotlib.pyplot as plt
from IPython.display import Audio
from librosa.display import specshow
from time import time
#load utils.py

In [3]:
import os.path
from os import path

if not path.exists("speechcommands"):
    !wget http://download.tensorflow.org/data/speech_commands_v0.01.tar.gz
    !mkdir speechcommands
    !tar -xf speech_commands_v0.01.tar.gz -C speechcommands

In [4]:
# generamos lista con las rutas y nombres de archivo de los digitos
numbers_filenames = []
numbers_folders = ('zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine')
for i in numbers_folders:
    path = 'speechcommands/' + i + '/*.wav'
    numbers_filenames.append(glob.glob(path))

# guardamos la lista de los conjuntos de test y validación
# solo para archivos que referencian a dígitos
test_filenames = ['speechcommands/' + e for e in open('speechcommands/testing_list.txt','r').read().splitlines() if e[:e.find('/')] in numbers_folders]
valid_filenames = ['speechcommands/' + e for e in open('speechcommands/validation_list.txt','r').read().splitlines() if e[:e.find('/')] in numbers_folders]
train_filenames = [e for n in range(10) for e in numbers_filenames[n] if (e not in test_filenames) and (e not in valid_filenames)]

print('total_count:' , len(numbers_filenames))
print('train_count:' , len(train_filenames))
print('test_count:' , len(test_filenames))
print('valid_count:' , len(valid_filenames))

total_count: 10
train_count: 18620
test_count: 2552
valid_count: 2494


In [5]:
def count_elapsed_time(f):
    """
    Decorator.
    Execute the function and calculate the elapsed time.
    Print the result to the standard output.
    """
    def wrapper(*args, **kwargs):
        # Start counting.
        start_time = time()
        # Take the original function's return value.
        ret = f(*args, **kwargs)
        # Calculate the elapsed time.
        elapsed_time = time() - start_time
        print("Elapsed time: %0.10f seconds." % elapsed_time)
        return ret

    return wrapper

In [6]:
def calculate_features(filename, n_mfcc=12, delta=True, deltadelta=True, energy=True, summary_fn = [np.mean, np.std], summary_names=['mean', 'std']):  
  #Abro el archivo:
  x, sr = librosa.core.load(filename,sr=None)
  
  #Calculo MFCCs
  features = librosa.feature.mfcc(x,sr=sr,n_mfcc=n_mfcc)

  #Calculo energia:
  if energy:
    energy = librosa.feature.rmse(x)
    features = np.concatenate([features,energy])

  #Aplico media y desvio estandar por defecto
  summary_features = np.concatenate([fn(features,axis=1) for fn in summary_fn])
    
  #Lo mismo con los delta
  if delta:
    deltafeatures = np.diff(features)
    summary_features = np.concatenate([summary_features,np.concatenate([fn(deltafeatures,axis=1) for fn in summary_fn])])

  #Y con los delta de segundo orden
  if deltadelta:
    deltadeltafeatures = np.diff(features,n=2)
    summary_features = np.concatenate([summary_features,np.concatenate([fn(deltadeltafeatures,axis=1) for fn in summary_fn])]) 
  
  summary_features = np.append(summary_features, [filename.split('/')[1], filename])

  return summary_features

In [7]:
def name_features(filename = '', n_mfcc=12, delta=True, deltadelta=True, energy=True, summary_fn = [np.mean, np.std], summary_names=['mean', 'std']):
    feat_names = ['mfcc_{}'.format(i) for i in range(n_mfcc)]
    feat_names = ['{}_{}'.format(name_i,summ_i) for summ_i in summary_names for name_i in feat_names]
    d_names = ['d{}'.format(name) for name in feat_names]
    dd_names = ['dd{}'.format(name) for name in feat_names]

    feat_names = feat_names + d_names + dd_names + ['digit', 'file']

    return feat_names

In [31]:
from os import path
@count_elapsed_time
def calculate_features_if_needed(filenames, result_filename):
    result_path = "features/"+result_filename
    result = []
    if not path.exists(result_path):
        print("Calculating features for ", len(filenames), " rows -> ", result_path)
        result = [calculate_features(x) for x in filenames]
        np.savetxt(result_path, result, delimiter=",", fmt="%s")     
    else:
        print("Loading saved features <- ", result_path)
        import csv
        with open(result_path, 'r') as file:
            result = list(csv.reader(file))
    
    return result

In [29]:
feat_test = calculate_features_if_needed(test_filenames, "test_features.csv")

Loading saved features <-  features/test_features.csv
Elapsed time: 0.0589323044 seconds.


In [32]:
feat_valid = calculate_features_if_needed(valid_filenames, "valid_features.csv")

Loading saved features <-  features/valid_features.csv
Elapsed time: 0.0617980957 seconds.


In [33]:
feat_train = calculate_features_if_needed(train_filenames, "train_features.csv")

Loading saved features <-  features/train_features.csv
Elapsed time: 0.3486802578 seconds.


In [34]:
# como siempre calculamos las mismas fatures, sus nombres los guardo una sola vez
feat_names = name_features()

In [35]:
feat_names

['mfcc_0_mean',
 'mfcc_1_mean',
 'mfcc_2_mean',
 'mfcc_3_mean',
 'mfcc_4_mean',
 'mfcc_5_mean',
 'mfcc_6_mean',
 'mfcc_7_mean',
 'mfcc_8_mean',
 'mfcc_9_mean',
 'mfcc_10_mean',
 'mfcc_11_mean',
 'mfcc_0_std',
 'mfcc_1_std',
 'mfcc_2_std',
 'mfcc_3_std',
 'mfcc_4_std',
 'mfcc_5_std',
 'mfcc_6_std',
 'mfcc_7_std',
 'mfcc_8_std',
 'mfcc_9_std',
 'mfcc_10_std',
 'mfcc_11_std',
 'dmfcc_0_mean',
 'dmfcc_1_mean',
 'dmfcc_2_mean',
 'dmfcc_3_mean',
 'dmfcc_4_mean',
 'dmfcc_5_mean',
 'dmfcc_6_mean',
 'dmfcc_7_mean',
 'dmfcc_8_mean',
 'dmfcc_9_mean',
 'dmfcc_10_mean',
 'dmfcc_11_mean',
 'dmfcc_0_std',
 'dmfcc_1_std',
 'dmfcc_2_std',
 'dmfcc_3_std',
 'dmfcc_4_std',
 'dmfcc_5_std',
 'dmfcc_6_std',
 'dmfcc_7_std',
 'dmfcc_8_std',
 'dmfcc_9_std',
 'dmfcc_10_std',
 'dmfcc_11_std',
 'ddmfcc_0_mean',
 'ddmfcc_1_mean',
 'ddmfcc_2_mean',
 'ddmfcc_3_mean',
 'ddmfcc_4_mean',
 'ddmfcc_5_mean',
 'ddmfcc_6_mean',
 'ddmfcc_7_mean',
 'ddmfcc_8_mean',
 'ddmfcc_9_mean',
 'ddmfcc_10_mean',
 'ddmfcc_11_mean',
 'ddmf