In [None]:
from __future__ import print_function
from __future__ import division
import os, sys
import numpy as np
import pandas as pd
from builtins import range
from sklearn.metrics import roc_auc_score
import librosa, librosa.display
import matplotlib.pyplot as plt
% matplotlib inline

import tensorflow as tf
import keras
from keras.models import Sequential, Model
from keras.layers import Activation, Dense, Flatten, Input, Reshape, Dropout, Permute
from keras.layers.convolutional import Conv2D
from keras.layers.normalization import BatchNormalization
from keras.layers.recurrent import GRU
from keras.layers.pooling import MaxPooling2D, GlobalAveragePooling2D
from keras.layers.merge import Concatenate
from keras import backend as K
from keras.backend.tensorflow_backend import set_session

os.environ["CUDA_VISIBLE_DEVICES"]="0" # the number of the GPU
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.7 # percentage to be used
set_session(tf.Session(config=config))

from kapre.time_frequency import Melspectrogram
from global_config import *

from multiprocessing import Pool
N_JOBS = 9

In [None]:
def gen_filepaths(df, dataroot=None):
    """Generate file path (column name 'filepath') from given dataframe """
    if dataroot is None:
        dataroot = PATH_DATASETS
    for filepath in df['filepath']:
        yield os.path.join(dataroot, filepath)
        
def get_mfcc(filename, dataroot=None):    
    start = time.time()
    csv_filename = '{}.csv'.format(filename)
    npy_filename = '{}_mfcc.npy'.format(filename)
    df = pd.DataFrame.from_csv(os.path.join(FOLDER_CSV, csv_filename))
    print('{}: Dataframe with size:{}').format(filename, len(df))
    print(os.path.exists(os.path.join(dataroot, df['filepath'][0])))   
    print(df.columns)
    gen_f = gen_filepaths(df, dataroot=dataroot)

    pool = Pool(N_JOBS)
    paths = list(gen_f)
    feats = pool.map(_path_to_mfccs, paths)
    feats = np.array(feats)
    np.save(os.path.join(FOLDER_FEATS, npy_filename), feats)
    print('MFCC is done! in {:6.4f} sec'.format(time.time() - start))
    pool.close()
    pool.join()
    
def _path_to_mfccs(path):
    src_zeros = np.zeros(1024) # min length to have 3-frame mfcc's
    src, sr = librosa.load(path, sr=SR, duration=29.) # max len: 29s, can be shorter.
    if len(src) < 1024:
        src_zeros[:len(src)] = src
        src = src_zeros
    
    mfcc = librosa.feature.mfcc(src, SR, n_mfcc=20)
    dmfcc = mfcc[:, 1:] - mfcc[:, :-1]
    ddmfcc = dmfcc[:, 1:] - dmfcc[:, :-1]
    return np.concatenate((np.mean(mfcc, axis=1), np.std(mfcc, axis=1),
                           np.mean(dmfcc, axis=1), np.std(dmfcc, axis=1),
                           np.mean(ddmfcc, axis=1), np.std(ddmfcc, axis=1)), axis=0)


for idx, (filename, dr) in enumerate(zip(tasks, dataroots)):
    get_mfcc(filename, dataroot=dr)