In [2]:
import glob
import os
import librosa
import numpy as np
import pandas as pd
import speechpy
from matplotlib import pyplot as plt
from keras import layers, models
from keras.callbacks import TensorBoard
from keras.models import Sequential, model_from_json
from keras.layers import Dense, Activation, Dropout
from keras.optimizers import Adam
from sklearn.model_selection import train_test_split

DATA_DIR = "/Users/mbare/Documents/audana-dataset/dataset/Audio/"
SUB_DIRS = ["Truthful", "Deceptive"]


def find_max_mfccs_len(parent_dir,sub_dirs):
    lengths_mfccs = []
    for sub_dir in sub_dirs:
        for file_name in glob.glob(
            os.path.join(
                parent_dir,
                sub_dir,
                "*.wav")):
            X, sample_rate = librosa.load(file_name)
            mfccs = librosa.feature.mfcc(y=X, sr=sample_rate).T
            len_mfccs = mfccs.shape[0]
            lengths_mfccs.append(len_mfccs)
    max_len = sorted(lengths_mfccs,reverse=True)[0]
    print("Max MFCCS: " + str(max_len) + ". Will pad based on this")
    return max_len


def fex(file_name):
    X, sample_rate = librosa.load(file_name)
    stft = np.abs(librosa.stft(X))
    mfccs = librosa.feature.mfcc(y=X, sr=sample_rate)
    mfccs = speechpy.processing.cmvnw(mfccs, win_size=301, variance_normalization=True)
    mfccs = np.mean(mfccs.T, axis=0)
    chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
    contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
    tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X),
                                              sr=sample_rate).T,axis=0)
    return mfccs, chroma, contrast, tonnetz


def parse_audio_files(parent_dir,sub_dirs):
    mfccs_features, features, labels = np.empty((0, 20)), np.empty((0, 45)), np.empty(0)
    for label, sub_dir in enumerate(sub_dirs):
        for fn in glob.glob(os.path.join(parent_dir, 
                                         sub_dir, 
                                         "*.wav")):
            try:
                print("fex : {0}".format(fn.split('/')[-1]))
                mfccs, chroma, contrast, tonnetz = fex(fn)
            except Exception as e:
              print ("Error encountered while parsing file: ", fn)
              continue
            extracted_features = np.hstack([mfccs, chroma, contrast, tonnetz])
            features = np.vstack([features,extracted_features])
            extracted_mfccs = np.hstack([mfccs])
            mfccs_features = np.vstack([mfccs_features,extracted_mfccs])
            labels = np.append(labels, [1 if sub_dir=="Deceptive" else 0])
    return np.array(mfccs_features), np.array(features), np.array(labels, dtype = np.int)


In [None]:
mfccs_features, features, labels = parse_audio_files(DATA_DIR, SUB_DIRS)

In [4]:
mfccs_df = pd.DataFrame(mfccs_features)
mfccs_df['labels'] = labels
mfccs_df.to_csv(path_or_buf="./mfccs_features.csv", index=False)
features_df = pd.DataFrame(features)
features_df['labels'] = labels
features_df.to_csv(path_or_buf="./all_features.csv", index=False)