### Script para extraer las caracteristicas de cada audio

Import the libraries

In [37]:
import os
import gzip
import math
import pickle
import warnings
import numpy as np
import scipy.io.wavfile as wav
from python_speech_features import mfcc
from python_speech_features import delta
from python_speech_features import fbank
from python_speech_features import logfbank

# # install packages:
# sudo apt install sox
# pip install python_speech_features

Functions

In [38]:
# function for extract mfcc features 
def extractFeatures(rate, signal):
    bank,energy = fbank(signal,rate,winlen=0.025,winstep=0.01,
                        nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97)
    features_mfcc = mfcc(signal,rate,winlen=0.0116,winstep=0.039,numcep=16,
                         nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97,ceplifter=22,appendEnergy=True)
        
    return features_mfcc

In [39]:
# function for downsampling wav recordings from 44kHz to 16kHz
def downsampling(wav_file, new_rate):
    state = True
    
    try:
        rate,signal = wav.read(wav_file)        
        factor = math.floor(rate/new_rate)        
        new_rate = rate/factor        
        indices = range(0, signal.size, factor)        
        signal = signal[indices]
        
    except Exception as e:
        state = False
    
    return (state, new_rate, signal)

In [40]:
def splitAudio(rate, signal, window, min_step):

    step = 0    
    result = list()    
    duration =  len(signal)/rate                
    max_duration = len(signal)
    
    while( ( (window+step) * rate) <= max_duration ):        
        from_ = int(step * rate)
        to_ = int( (window+step) * rate )
        step += min_step
        segment = signal[from_:to_]
        result.append(segment)
        
    return result  

In [41]:
# function for process audio file
def process_audio(audio_path):
    
    result = True
    
    clip_features = list()
    mean_features = list()
    
    if os.path.isfile(audio_path):        
        (state, rate, signal) = downsampling(new_audio_path, 16000)
        
    if state is True:
        
        window = 5
        min_step = 1
        
        # split the audio on 5 seconds segments
        audio_segments = splitAudio(rate, signal, window, min_step)
        
        if audio_segments:           
            # for each segment of audio
            for audio_segment in audio_segments:
            # extract mfcc features
                features = np.array(extractFeatures(rate, audio_segment))
                features = np.asarray(features).reshape(-1)
                clip_features.append(features)
        else: 
            result = False
            
    else:        
        print( 'Error when processing the file:', new_audio_path)
        result = False
    
    clip_features = np.array(clip_features)
    
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", category=RuntimeWarning)
        mean_features = np.mean(clip_features, axis=0)
        
    return result, clip_features, mean_features

In [42]:
# Main function 
def main():
    
    # read features file
    data = pickle.load(gzip.open('data.pickle.gz', 'rb'), encoding='latin1')

    item = 1
    species = {}
    data_features = {}

    for key in data:
        
        #try:
        
        audio_path = data[key]['silDir']
        class_id = data[key]['ClassId']

        if class_id in species:
            specie_id = species[class_id]
        else:
            species[class_id] = item
            specie_id = item
            item+=1

        result, clip_features, mean_features = process_audio(audio_path)

        if result is True:
            data_tmp = {}
            data_tmp['clip_features'] = clip_features
            data_tmp['mean_features'] = mean_features
            data_tmp['label'] = specie_id
            data_features[key] = data_tmp
            
        #except:
        #    print( 'Error when processing the file:', key)

    with gzip.open('features.pickle.gz', 'wb') as f:
        pickle.dump((data_features), f, protocol = 2)
    print("Done.") 

In [43]:
main()

NameError: name 'new_audio_path' is not defined

In [None]:
# read features file
data = pickle.load(gzip.open('data.pickle.gz', 'rb'), encoding='latin1')

data_features = {}

for key in dataset:
    audio_path = dataset[key]['wavDir']
    id_specie = dataset[key]['MediaId']
    result, clip_features, mean_features = process_audio(dir_audio)

    if result is True:
        data = {}
        data['clip_features'] = clip_features
        data['mean_features'] = mean_features
        data['label'] = id_specie
        data_features[key] = data

with gzip.open('features_colombia.pickle.gz', 'wb') as f:
    pickle.dump((data_features), f, protocol = 2)
print("Done.") 