In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import joblib
import librosa as lb
import scipy as sp

In [2]:

BASE_DIR            = os.getcwd()
FOLDS               = ['Fold 1', 'Fold 2', 'Fold 3', 'Fold 4', 'Fold 5']
# FOLDS               = ['']
EMOTIONS            = ['Approval', 'Disapproval', 'Neutral']

WINDOWS_DIR         = '../Windows_AF/'
FEATURES_DIR        = '../Features/'


In [3]:
def correlation(x):
    cor = []
    for n in range(x.shape[0]):
        cor.append(np.correlate(x[n, :], x[n, :])[0])
    return np.array(cor)


def mean_crossing_rate(x):
    mcr = []
    for n in range(x.shape[0]):
        mcr.append(lb.feature.zero_crossing_rate(x[n, :] - np.mean(x[n, :]))[0, 0])
    return np.array(mcr)


def get_entropy(x, axis = 1):
    x = x / np.sum(x, axis = axis, keepdims=True)
    entropy = np.sum(sp.special.entr(x), axis = axis)
    return entropy


def number_of_peaks(x):
    npk = []
    for n in range(x.shape[0]):
        thres = (np.max(x[n, :]) / 3)
        peaks, _ = sp.signal.find_peaks(x[n, :], thres)
        npk.append(len(peaks))
    return np.array(npk, dtype=float)


def get_stat_features(x, axis=1, prefix=''):
    min = np.min(x, axis = axis)
    max = np.max(x, axis = axis)
    std = np.std(x, axis = axis)
    avg = np.mean(x, axis = axis)
    var = np.var(x, axis = axis)
    ptp = np.ptp(x, axis = axis)
    mrc = np.max(np.diff(x, axis = axis), axis = axis)
    arc = np.mean(np.diff(x, axis = axis), axis = axis)
    src = np.std(np.diff(x, axis = axis), axis = axis)
    mad = sp.stats.median_abs_deviation(x, axis = axis)
    iqr = sp.stats.iqr(x, axis = axis)
    cor = correlation(x)
    mcr = mean_crossing_rate(x)
    rms = np.sum(np.square(x), axis = axis)

    feature_names = ['min', 'max', 'std', 'avg', 'var', 
                   'ptp', 'mrc', 'arc', 'src', 'mad', 
                   'iqr', 'cor', 'mcr', 'rms']
    columnName = [prefix + '_' + sub for sub in feature_names]

    stat_features = pd.DataFrame(np.stack((min, max, std, avg, 
                                         var, ptp, mrc, arc, 
                                         src, mad, iqr, cor, 
                                         mcr, rms), axis=1), columns=columnName)

    return stat_features
 

def get_freq_features(x, axis=1, fs=44100, nperseg=8000, prefix='psd'):
    freq, psd = sp.signal.welch(x, fs, nperseg = nperseg, axis = axis)
    mpw = np.max(psd, axis = axis)
    ent = get_entropy(psd, axis = axis)
    ctf = np.divide(np.sum((freq * psd), axis = axis), np.sum(psd, axis = axis))
    mxf = np.argmax(psd, axis = axis)
    enr = np.sum(np.square(psd), axis = axis) / nperseg
    skw = sp.stats.skew(x, axis = axis)
    kut = sp.stats.kurtosis(x, axis = axis)
    npk = number_of_peaks(psd)

    feature_names = ['mpw', 'ent', 'ctf', 'mxf', 'enr', 'skw', 'kut', 'npk']
    columnName = [prefix + '_' + sub for sub in feature_names]

    freq_features = pd.DataFrame(np.stack((mpw, ent, ctf, mxf, enr, skw, 
                                         kut, npk), axis=1), columns=columnName)

    return freq_features


def get_mutual_features(x, y, z, axis=1, nperseg=150, prefix=''):
    cxy = []
    cxz = []
    cyz = []
    vxy = []
    vxz = []
    vyz = []
    for n in range(x.shape[0]):
        cxy.append(np.corrcoef(x[n, :].ravel(), y[n, :].ravel())[0, 1])
        cxz.append(np.corrcoef(x[n, :].ravel(), z[n, :].ravel())[0, 1])
        cyz.append(np.corrcoef(y[n, :].ravel(), z[n, :].ravel())[0, 1])
        vxy.append(np.cov(x[n, :].ravel(), y[n, :].ravel())[0, 1])
        vxz.append(np.cov(x[n, :].ravel(), z[n, :].ravel())[0, 1])
        vyz.append(np.cov(y[n, :].ravel(), z[n, :].ravel())[0, 1])
    cxy = np.array(cxy)
    cxz = np.array(cxz)
    cyz = np.array(cyz)
    vxy = np.array(vxy)
    vxz = np.array(vxz)
    vyz = np.array(vyz)
    sma = (np.trapz(x, axis = axis) + np.trapz(x, axis = axis) + np.trapz(x, axis = axis)) / nperseg

    feature_names = ['cxy', 'cxz', 'cyz', 'vxy', 'vxz', 'vyz', 'sma']
    columnName = [prefix + '_' + sub for sub in feature_names]

    mutual_features = pd.DataFrame(np.stack((cxy, cxz, cyz, vxy, vxz, vyz, sma), 
                                        axis=1), columns=columnName)

    return mutual_features

In [4]:
path = os.path.join(BASE_DIR, WINDOWS_DIR)

X = pd.DataFrame()

for emotion in EMOTIONS:
    print('Processing data for ' + emotion, end=' ... ')
    
    for fold in FOLDS:
        fold_path = os.path.join(path, emotion, fold)
        filenames = os.listdir(fold_path)
        
        for filename in filenames:
            file_path = os.path.join(fold_path, filename)
            data = joblib.load(file_path)
            
            clip_features = get_freq_features(data)
            
            e_idx = EMOTIONS.index(emotion)
            y = pd.DataFrame(np.ones((clip_features.shape[0], 1)) * e_idx, columns=['label'])
            
            f_idx = FOLDS.index(fold)
            f = pd.DataFrame(np.ones((clip_features.shape[0], 1)) * f_idx, columns=['fold'])
            
            clip_features = pd.concat([clip_features, y, f], axis=1)
            
            X = pd.concat([X, clip_features], ignore_index=True)
        
    print('√')

Processing data for Approval ... √
Processing data for Disapproval ... √
Processing data for Neutral ... √


In [5]:
features_path = os.path.join(BASE_DIR, FEATURES_DIR, 'Features_VAL.joblib')
joblib.dump(X, features_path)

['/home/andromeda/Temp/Crowd-Emotion/src_ml/../Features/Features_VAL.joblib']

In [6]:
X

Unnamed: 0,psd_mpw,psd_ent,psd_ctf,psd_mxf,psd_enr,psd_skw,psd_kut,psd_npk,label,fold
0,0.000012,6.295446,2159.540355,225.0,8.641112e-13,0.045638,4.817352,66.0,0.0,0.0
1,0.000018,6.310801,2167.418951,255.0,1.635127e-12,0.022894,3.316666,45.0,0.0,0.0
2,0.000020,6.328863,2169.414417,255.0,1.875289e-12,-0.003592,3.063332,44.0,0.0,0.0
3,0.000016,6.325468,2166.209834,255.0,1.707556e-12,-0.013705,3.607823,57.0,0.0,0.0
4,0.000017,6.310121,2158.655002,214.0,1.979116e-12,-0.018822,3.788671,54.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...
9483,0.000046,4.952432,565.208400,63.0,2.459693e-12,0.070171,-0.017122,9.0,2.0,0.0
9484,0.000055,5.088809,641.432703,58.0,2.105968e-12,0.090785,-0.084976,7.0,2.0,0.0
9485,0.000062,5.052711,695.548300,58.0,2.834573e-12,0.075639,-0.043194,7.0,2.0,0.0
9486,0.000073,5.003821,683.520569,58.0,3.797105e-12,0.133953,0.110834,7.0,2.0,0.0
