<a href="https://colab.research.google.com/github/francotestori/aauba_02/blob/master/Armado_de_datos.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Trabajo Practico 2

## Armado del dataset

In [None]:
!wget http://download.tensorflow.org/data/speech_commands_v0.01.tar.gz
!mkdir speechcommands
!tar -xf speech_commands_v0.01.tar.gz -C /content/speechcommands

In [1]:
# Import de Librerías
import librosa
import glob
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
from IPython.display import Audio
from librosa.display import specshow

In [2]:
# Busco y guardo en un mapa todos los audio files correspondientes a los digit_keys
digit_command_keys = [
  'zero',                    
  'one',
  'two',
  'three',
  'four',
  'five',
  'six',
  'seven',
  'eight',
  'nine'
]



In [3]:
# Utilizamos la función propuesta en el TP 
# para hacer la extracción de los datos 
# correspondientes a cada audio file

def calculate_features(
        filename,
        n_mfcc=12,
        delta=True,
        deltadelta=True,
        energy=True, 
        summary_fn = [np.mean, np.std], 
        summary_names=['mean','std']):
    #Abro el archivo:
    x, sr = librosa.core.load(filename,sr=None)

    #Calculo MFCCs
    features = librosa.feature.mfcc(x,sr=sr,n_mfcc=n_mfcc)
    feat_names = ['mfcc_{}'.format(i) for i in range(n_mfcc)]
    #Calculo energia:
    if energy:
        energy = librosa.feature.rms(x)
        features = np.concatenate([features,energy])
        feat_names = feat_names + ['energy']
    #Aplico media y desvio estandar por defecto
    summary_features = np.concatenate([fn(features,axis=1) for fn in summary_fn])
    feat_names = ['{}_{}'.format(name_i,summ_i) for summ_i in summary_names for name_i in feat_names]

    #Lo mismo con los delta
    if delta:
        deltafeatures = np.diff(features)
        summary_features = np.concatenate([summary_features,np.concatenate([fn(deltafeatures,axis=1) for fn in summary_fn])])
        d_names = ['d{}'.format(name) for name in feat_names]
    else:
        d_names = []

    #Y con los delta de segundo orden
    if deltadelta:
        deltadeltafeatures = np.diff(features,n=2)
        summary_features = np.concatenate([summary_features,np.concatenate([fn(deltadeltafeatures,axis=1) for fn in summary_fn])]) 
        dd_names = ['dd{}'.format(name) for name in feat_names]
    else:
        dd_names = []

    feat_names = feat_names + d_names + dd_names

    return summary_features, feat_names  

In [7]:
# NOISE SOURCE

folders = [
  'sc_STREET-CITY-1_lvl_10',  
  'sc_CAFE-CAFE-1_lvl_10',
  'sc_CAFE-FOODCOURTB-1_lvl_10',
  'sc_HOME-KITCHEN-1_lvl_10',
]


In [9]:
for folder in folders:
    audio_files = {}
    for key in digit_command_keys:
        directory = f'../{folder}/{key}/*.wav'
        audio_files[key] = glob.glob(directory)
    print(folder)
    audio_df = pd.DataFrame()
    k=0
    for key in audio_files:
        print(key)
        j=0
        for audio in audio_files[key]:
            feat, names = calculate_features(audio)
            audio_row = dict(zip(names, feat))
            audio_row['digit'] = key
            audio_row['filename'] = audio
            audio_df = audio_df.append(audio_row, ignore_index=True)
            k+=1
            j+=1
        print(f'Procesados {j} audios en {folder}/{key}.')
    print(f'Procesados {k} audios en {folder}.')
    audio_df.to_csv(f'content/{folder}.csv')

sc_STREET-CITY-1_lvl_10
zero
Procesados 2376 audios en sc_STREET-CITY-1_lvl_10/zero.
one
Procesados 2370 audios en sc_STREET-CITY-1_lvl_10/one.
two
Procesados 2373 audios en sc_STREET-CITY-1_lvl_10/two.
three
Procesados 2356 audios en sc_STREET-CITY-1_lvl_10/three.
four
Procesados 2372 audios en sc_STREET-CITY-1_lvl_10/four.
five
Procesados 2357 audios en sc_STREET-CITY-1_lvl_10/five.
six
Procesados 2369 audios en sc_STREET-CITY-1_lvl_10/six.
seven
Procesados 2377 audios en sc_STREET-CITY-1_lvl_10/seven.
eight
Procesados 2352 audios en sc_STREET-CITY-1_lvl_10/eight.
nine
Procesados 2364 audios en sc_STREET-CITY-1_lvl_10/nine.
Procesados 23666 audios en sc_STREET-CITY-1_lvl_10.
sc_CAFE-CAFE-1_lvl_10
zero
Procesados 2376 audios en sc_CAFE-CAFE-1_lvl_10/zero.
one
Procesados 2370 audios en sc_CAFE-CAFE-1_lvl_10/one.
two
Procesados 2373 audios en sc_CAFE-CAFE-1_lvl_10/two.
three
Procesados 2356 audios en sc_CAFE-CAFE-1_lvl_10/three.
four
Procesados 2372 audios en sc_CAFE-CAFE-1_lvl_10/four.