In [85]:
import warnings
warnings.filterwarnings('ignore')
import librosa
import pandas as pd
import numpy as np
import os
from scipy.fftpack import fft

In [2]:
def getFiles(directory,typ='.mp3'):
    for file in os.listdir(directory):
        if file.endswith(typ):
            yield('{}/{}'.format(directory,file))
        else:
            continue

In [54]:
def getAudio(file):
    data, sampling_rate = librosa.load(file,res_type='kaiser_fast')
    return data, sampling_rate

In [77]:
df = pd.DataFrame(columns=['lang','data','sample_rate'])
for lang in os.listdir('../data/samples'):
    for file in getFiles('../data/samples/{}'.format(lang)):
        data, sample_rate = getAudio(file)
        df = df.append({'data':data,'lang':lang,'sample_rate':sample_rate}, ignore_index=True)

In [78]:
df.head()

Unnamed: 0,lang,data,sample_rate
0,es_es,"[-0.007209771, -0.015356427, -0.017537486, -0....",22050
1,es_es,"[0.012138898, 0.01150092, 0.0029381728, 0.0010...",22050
2,es_es,"[-0.00045351504, 0.0035464, 0.008936057, 0.009...",22050
3,es_es,"[0.013188994, 0.044150613, 0.06276652, 0.06152...",22050
4,es_es,"[-0.022270145, -0.03373612, -0.03162274, -0.03...",22050


In [80]:
def getMFCC(row):
    mfccs = np.mean(librosa.feature.mfcc(y=row['data'], sr=row['sample_rate'], n_mfcc=40).T,axis=0) 
    return mfccs

In [84]:
df['mfcc'] = df[['data','sample_rate']].apply(getMFCC,axis=1)

In [91]:
def getFFT(data,n=500):
    feature = fft(data,n)
    return feature

In [92]:
df['fft'] = df['data'].apply(lambda x: getFFT(x))

In [111]:
def getFeatureVector(row):
    vector = np.concatenate([row['mfcc'],row['fft']]) 
    return vector

In [114]:
df['feature_vector'] = df[['mfcc','fft']].apply(getFeatureVector,axis=1)

In [115]:
df.head()

Unnamed: 0,lang,data,sample_rate,mfcc,fft,feature_vector
0,es_es,"[-0.007209771, -0.015356427, -0.017537486, -0....",22050,"[-285.23434, 175.89645, -36.257664, 7.9942393,...","[(-0.07016978+0j), (-0.1587303+0.07867816j), (...","[(-285.23434+0j), (175.89645+0j), (-36.257664+..."
1,es_es,"[0.012138898, 0.01150092, 0.0029381728, 0.0010...",22050,"[-263.13785, 115.42601, -22.563814, 15.244263,...","[(-2.407087+0j), (-2.1254783-0.3098814j), (-2....","[(-263.13785+0j), (115.42601+0j), (-22.563814+..."
2,es_es,"[-0.00045351504, 0.0035464, 0.008936057, 0.009...",22050,"[-241.6804, 142.95016, -13.561164, 32.493755, ...","[(-2.515686+0j), (-4.0788665-1.4216757j), (-10...","[(-241.6804+0j), (142.95016+0j), (-13.561164+0..."
3,es_es,"[0.013188994, 0.044150613, 0.06276652, 0.06152...",22050,"[-194.5256, 92.64716, -25.009928, 41.915436, -...","[(-1.4095415+0j), (-1.0262144+0.0716005j), (-2...","[(-194.5256+0j), (92.64716+0j), (-25.009928+0j..."
4,es_es,"[-0.022270145, -0.03373612, -0.03162274, -0.03...",22050,"[-305.53488, 134.97104, -9.31721, 14.267323, -...","[(-0.10506569+0j), (-0.5140722+9.7117154e-05j)...","[(-305.53488+0j), (134.97104+0j), (-9.31721+0j..."


In [116]:
df.to_pickle('sound_db.pkl')