In [1]:
!wget http://download.tensorflow.org/data/speech_commands_v0.01.tar.gz
!mkdir speechcommands
!tar -xf speech_commands_v0.01.tar.gz -C /content/speechcommands

--2020-07-05 19:44:00--  http://download.tensorflow.org/data/speech_commands_v0.01.tar.gz
Resolving download.tensorflow.org (download.tensorflow.org)... 64.233.188.128, 2404:6800:4008:c06::80
Connecting to download.tensorflow.org (download.tensorflow.org)|64.233.188.128|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1489096277 (1.4G) [application/gzip]
Saving to: ‘speech_commands_v0.01.tar.gz’


2020-07-05 19:44:15 (99.8 MB/s) - ‘speech_commands_v0.01.tar.gz’ saved [1489096277/1489096277]



In [2]:
import os
import glob
import librosa
import numpy as np
import pandas as pd
from google.colab import drive
import matplotlib.pyplot as plt
from IPython.display import Audio
from librosa.display import specshow

In [3]:
working_directory = '/content/speechcommands'
numbers = ['zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine']
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [11]:
def calculate_features_noise(filename,noisevector, n_mfcc=12,delta=True,deltadelta=True,energy=True, summary_fn = [np.mean, np.std], summary_names=['mean','std']):
  
  
  #Abro el archivo y sumo el ruido
  x, sr = librosa.core.load(filename,sr=None)
  noise_starter = np.random.randint(0,noisevector.size-x.size) # Defino un random para usar una cantidad de valores,  Me aseguro de no pasarme
  x = x + noisevector[noise_starter:noise_starter+x.size]
  #Calculo MFCCs
  features = librosa.feature.mfcc(x,sr=sr,n_mfcc=n_mfcc)
  feat_names = ['mfcc_{}'.format(i) for i in range(n_mfcc)]
  #Calculo energia:
  if energy:
    energy = librosa.feature.rmse(x)
    features = np.concatenate([features,energy])
    feat_names = feat_names + ['energy']
  #Aplico media y desvio estandar por defecto
  summary_features = np.concatenate([fn(features,axis=1) for fn in summary_fn])
  
  #Lo mismo con los delta
  if delta:
    deltafeatures = np.diff(features)
    summary_features = np.concatenate([summary_features,np.concatenate([fn(deltafeatures,axis=1) for fn in summary_fn])])
    d_names = ['d{}'.format(name) for name in feat_names]
  else:
    d_names = []

  #Y con los delta de segundo orden
  if deltadelta:
    deltadeltafeatures = np.diff(features,n=2)
    summary_features = np.concatenate([summary_features,np.concatenate([fn(deltadeltafeatures,axis=1) for fn in summary_fn])]) 
    dd_names = ['dd{}'.format(name) for name in feat_names]
  else:
    dd_names = []

  feat_names = feat_names + d_names + dd_names
  #feat_names = ['{}_{}'.format(feat,summary) for feat in feat_names for summary in summary_names]
  feat_names = ['{}_{}'.format(name_i,summ_i) for summ_i in summary_names for name_i in feat_names]
  return summary_features, feat_names  

In [10]:
def calculate_features_whitenoise(filename, n_mfcc=12,delta=True,deltadelta=True,energy=True, summary_fn = [np.mean, np.std], summary_names=['mean','std']):
  # Defino un random para usar una cantidad de valores
  
  #Abro el archivo y sumo el ruido
  x, sr = librosa.core.load(filename,sr=None)
  noise = np.random.normal(size=x.size)
  x = x + noise
  #Calculo MFCCs
  features = librosa.feature.mfcc(x,sr=sr,n_mfcc=n_mfcc)
  feat_names = ['mfcc_{}'.format(i) for i in range(n_mfcc)]
  #Calculo energia:
  if energy:
    energy = librosa.feature.rmse(x)
    features = np.concatenate([features,energy])
    feat_names = feat_names + ['energy']
  #Aplico media y desvio estandar por defecto
  summary_features = np.concatenate([fn(features,axis=1) for fn in summary_fn])
  
  #Lo mismo con los delta
  if delta:
    deltafeatures = np.diff(features)
    summary_features = np.concatenate([summary_features,np.concatenate([fn(deltafeatures,axis=1) for fn in summary_fn])])
    d_names = ['d{}'.format(name) for name in feat_names]
  else:
    d_names = []

  #Y con los delta de segundo orden
  if deltadelta:
    deltadeltafeatures = np.diff(features,n=2)
    summary_features = np.concatenate([summary_features,np.concatenate([fn(deltadeltafeatures,axis=1) for fn in summary_fn])]) 
    dd_names = ['dd{}'.format(name) for name in feat_names]
  else:
    dd_names = []

  feat_names = feat_names + d_names + dd_names
  #feat_names = ['{}_{}'.format(feat,summary) for feat in feat_names for summary in summary_names]
  feat_names = ['{}_{}'.format(name_i,summ_i) for summ_i in summary_names for name_i in feat_names]
  return summary_features, feat_names 

In [6]:
def fill_types(file_type, file, t, base_path = '/content/speechcommands'):
    for line in file:
        line = line.strip()
        for number in numbers:
            if line.startswith(number + '/'):
                file_type[os.path.join(base_path, line)] = t

In [7]:
file_type = {}

with open('/content/speechcommands/testing_list.txt') as file:
    fill_types(file_type, file, 'testing', base_path = working_directory)

with open('/content/speechcommands/validation_list.txt') as file:
    fill_types(file_type, file, 'validation', base_path = working_directory)

Este bloque le suma ruido blanco al conkunto de validación y lo guarda en gdrive


In [12]:
file_type_validation = {k: v for k, v in file_type.items() if v== 'validation'}

csvname= 'Validation_gaussian.csv'
data = []
columns = []
for number in numbers:
    for filepath in glob.glob(working_directory + '/' + number  + '/' + '/*.wav'):
        if filepath in file_type_validation:
          (feat, names) = calculate_features_whitenoise(filepath)
          t = file_type_validation[filepath] if filepath in file_type_validation else 'training'
          data.append(np.append(feat, [number, filepath, t]))
          columns = names + ['target', 'filename', 'type']

dataset = pd.DataFrame(data, columns = columns)
dataset.to_csv(os.path.join('/content/drive/My Drive/',csvname))

Generación de mfcc usando ruidos ambientales para el conjunto de validación, guardando en la raiz de gdrive


In [None]:
file_type_validation = {k: v for k, v in file_type.items() if v== 'validation'}
noisepaths = glob.glob('Ruidos/*.wav')  # Subir cualquier archivo wav de ruido a la carpeta para poder usarlo

for noisepath in noisepaths:
  csvname = os.path.split(noisepath)[1] + '.csv'
  noise,sr = librosa.core.load(noisepath,sr=None)
  data = []
  columns = []
  for number in numbers:
      for filepath in glob.glob(working_directory + '/' + number  + '/' + '/*.wav'):
          if filepath in file_type_validation:
            (feat, names) = calculate_features(filepath, noise)
            t = file_type_validation[filepath] if filepath in file_type_validation else 'training'
            data.append(np.append(feat, [number, filepath, t]))
            columns = names + ['target', 'filename', 'type']

  dataset = pd.DataFrame(data, columns = columns)
  dataset.to_csv(os.path.join('/content/drive/My Drive/',csvname))

