In [73]:
import numpy as np
import pandas as pd
import os
import re
import warnings
import matplotlib.pyplot as plt
import seaborn as sns
import librosa
import librosa.display
from sklearn.preprocessing import minmax_scale
import IPython.display as ipd

plt.rcParams['figure.figsize'] = (20,8)
plt.rcParams['font.size'] = 16
sns.set_style('darkgrid')
warnings.filterwarnings("ignore")

In [74]:
records = []
for dirname, _, filenames in os.walk('/home/agarwal.aditi/mental_health/RAVDESS-nosil'):
    for filename in filenames:
        records.append([filename, os.path.join(dirname,filename)])

data = pd.DataFrame(records, columns=['filename','path'])
data['actor'] = data['path'].apply(lambda x: re.findall("\w+_\d+",x)[0])
data = data[data['actor']!="audio_speech_actors_01"]
data.reset_index(inplace=True,drop=True)
data['type'] = data['filename'].apply(lambda x: re.split("-\d+\.wav",x)[0])
data

Unnamed: 0,filename,path,actor,type
0,03-01-02-02-02-01-19.wav,/home/agarwal.aditi/mental_health/RAVDESS-nosi...,Actor_19,03-01-02-02-02-01
1,03-01-05-02-01-01-19.wav,/home/agarwal.aditi/mental_health/RAVDESS-nosi...,Actor_19,03-01-05-02-01-01
2,03-01-08-01-01-02-19.wav,/home/agarwal.aditi/mental_health/RAVDESS-nosi...,Actor_19,03-01-08-01-01-02
3,03-01-06-02-01-01-19.wav,/home/agarwal.aditi/mental_health/RAVDESS-nosi...,Actor_19,03-01-06-02-01-01
4,03-01-04-01-01-01-19.wav,/home/agarwal.aditi/mental_health/RAVDESS-nosi...,Actor_19,03-01-04-01-01-01
...,...,...,...,...
1435,03-01-05-02-02-01-10.wav,/home/agarwal.aditi/mental_health/RAVDESS-nosi...,Actor_10,03-01-05-02-02-01
1436,03-01-06-02-01-02-10.wav,/home/agarwal.aditi/mental_health/RAVDESS-nosi...,Actor_10,03-01-06-02-01-02
1437,03-01-06-01-01-02-10.wav,/home/agarwal.aditi/mental_health/RAVDESS-nosi...,Actor_10,03-01-06-01-01-02
1438,03-01-08-02-01-01-10.wav,/home/agarwal.aditi/mental_health/RAVDESS-nosi...,Actor_10,03-01-08-02-01-01


In [75]:
data['actor'].nunique()

24

In [76]:
data['actor'].value_counts()

actor
Actor_19    60
Actor_01    60
Actor_02    60
Actor_11    60
Actor_17    60
Actor_06    60
Actor_07    60
Actor_12    60
Actor_15    60
Actor_05    60
Actor_22    60
Actor_23    60
Actor_24    60
Actor_09    60
Actor_03    60
Actor_08    60
Actor_04    60
Actor_13    60
Actor_18    60
Actor_20    60
Actor_21    60
Actor_14    60
Actor_16    60
Actor_10    60
Name: count, dtype: int64

In [77]:
data['class'] = data['type'].apply(lambda x: x.split('-')[2])

In [78]:
data['class'] = data['class'].astype(int)

In [79]:
data['class'] = data['class'].replace({1:'neutral', 2:'neutral', 3:'happy', 4:'sad', 5:'angry', 6:'fear', 7:'disgust', 8:'surprise'})

In [80]:
data['class'].value_counts()

class
neutral     288
angry       192
surprise    192
fear        192
sad         192
disgust     192
happy       192
Name: count, dtype: int64

In [82]:
data['class'].value_counts()

class
neutral     288
angry       192
surprise    192
fear        192
sad         192
disgust     192
happy       192
Name: count, dtype: int64

In [35]:
from tqdm import tqdm
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report, confusion_matrix

In [36]:
def feature_extraction(df, mfcc=True):
    features = []
    for i,record in tqdm(df.iterrows(),total=df.shape[0]):
        x , sr = librosa.load(record['path'])
        mean_mfcc = np.mean(librosa.feature.mfcc(y=x, sr=sr, n_mfcc=128),axis=1)
        mean_ms = np.mean(librosa.feature.melspectrogram(y=x, sr=sr, n_mels=128),axis=1)
        features.append(mean_mfcc if mfcc else mean_ms)
        
    dataf = pd.DataFrame(features)
    dataf['class'] = df['class']
    return dataf

In [87]:
import pandas as pd
from tqdm import tqdm

def egge(df, smile):
    features = []
    for i, record in tqdm(df.iterrows(), total=df.shape[0]):
        # Process file with OpenSMILE and get features
        y = smile.process_file(record['path'])
        
        # Convert features to DataFrame
        features_df = pd.DataFrame(y)
        
        # Add path column to features_df
        features_df['path'] = record['path']
        
        # Append features to the list
        features.append(features_df)
    
    # Concatenate all features DataFrames
    features_df = pd.concat(features, ignore_index=True)
    
    # Merge features_df with the original DataFrame on path
    merged_df = pd.merge(df, features_df, on='path', how='inner')
    
    return merged_df


In [37]:
from sklearn.preprocessing import LabelEncoder

encoder = LabelEncoder()

## MFCC Features

In [38]:
dataf = feature_extraction(data)
dataf

100%|██████████| 1440/1440 [00:22<00:00, 63.22it/s]


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,119,120,121,122,123,124,125,126,127,class
0,-700.392883,61.621437,16.980686,23.538042,13.520005,12.606575,-0.519369,-4.675446,-2.516029,6.177147,...,-0.273638,-0.187049,-0.126556,0.236960,-0.063746,0.026041,-0.001292,-0.166774,-0.100531,neutral
1,-405.778809,30.750469,-20.098661,7.260509,-7.363235,-10.164589,-6.574836,-2.539282,-11.707138,-4.125695,...,-0.072577,0.102271,0.307730,0.175445,0.151858,0.078863,0.224321,-0.083782,-0.140972,angry
2,-647.876343,78.786400,2.392293,8.779393,-0.526153,3.914570,-3.818168,-10.349932,-5.101916,-6.318324,...,-0.080108,0.151396,0.308493,0.228258,0.083027,0.396278,0.133575,-0.441955,-0.137462,surprise
3,-488.198517,42.980167,-32.867901,4.590630,-4.983958,-3.605664,-19.530750,-14.279503,-13.213704,-8.078819,...,-0.297318,-0.435113,0.169336,0.180520,-0.012502,0.267775,0.236403,0.068696,-0.128023,fear
4,-723.713135,67.224693,17.922281,18.497843,9.793799,13.589759,7.002605,-1.639939,-0.716900,1.205396,...,-0.049022,-0.041680,0.134135,-0.048266,-0.246478,-0.309844,-0.205153,-0.165778,-0.437962,sad
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1435,-393.291962,37.427639,-14.803538,-2.003925,-10.825532,-11.668422,-8.092076,-12.388589,-13.540404,-5.169627,...,0.192264,-0.111088,-0.571826,-0.259035,0.025157,0.427605,0.176654,-0.018887,0.042530,angry
1436,-469.871094,38.700233,-22.003654,2.981045,-1.439886,-14.886630,-9.309187,-17.981142,-9.321032,1.324526,...,-0.094284,-0.640148,-0.465010,-0.279884,-0.174371,0.037860,0.037590,-0.137029,0.002831,fear
1437,-608.372742,55.736412,3.106510,8.106892,3.487876,2.032688,-5.425860,-12.726157,1.086332,4.814475,...,-0.449617,-0.306768,-0.276278,-0.413240,-0.356383,0.003710,0.262689,0.320938,-0.397841,fear
1438,-475.995331,45.401077,-10.204731,1.458183,-1.101155,-7.637703,-6.307356,-12.698923,-10.170322,-1.432007,...,0.279609,-0.182069,-0.524051,-0.449768,0.085295,0.120856,-0.031877,-0.045308,0.123060,surprise


In [39]:
X1 = dataf.iloc[:,:-1].values
y1 = dataf.iloc[:,-1].values 
y1 = encoder.fit_transform(y1)

## Mel Spectrogram Features

In [40]:
dataf = feature_extraction(data,mfcc=False)
dataf

100%|██████████| 1440/1440 [00:20<00:00, 69.62it/s]


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,119,120,121,122,123,124,125,126,127,class
0,0.000102,0.000132,0.010014,0.019777,0.008215,0.005997,0.014001,0.022000,0.018773,0.014248,...,0.000034,0.000032,0.000032,0.000030,0.000035,0.000030,0.000022,0.000011,8.128426e-07,neutral
1,0.204939,0.007837,0.001730,0.000998,0.003056,0.025254,0.064966,0.194058,0.554539,0.996602,...,0.001925,0.001383,0.001222,0.001259,0.001649,0.002469,0.002813,0.002047,2.850048e-04,angry
2,0.000096,0.000046,0.000688,0.003677,0.012726,0.015543,0.007202,0.007033,0.033846,0.030709,...,0.000008,0.000007,0.000004,0.000005,0.000006,0.000006,0.000005,0.000003,2.346686e-07,surprise
3,0.002258,0.000123,0.000048,0.000020,0.000044,0.000130,0.000140,0.000247,0.001117,0.038726,...,0.000063,0.000084,0.000102,0.000085,0.000066,0.000097,0.000158,0.000103,1.295821e-05,fear
4,0.000153,0.000069,0.003380,0.009465,0.013482,0.011710,0.003917,0.008898,0.018658,0.013175,...,0.000003,0.000003,0.000004,0.000004,0.000004,0.000005,0.000005,0.000003,2.630177e-07,sad
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1435,0.000284,0.001507,0.002652,0.002376,0.005163,0.023600,0.112236,0.783303,1.664240,0.903265,...,0.004059,0.003991,0.003797,0.003935,0.005029,0.009105,0.012693,0.007898,6.005071e-04,angry
1436,0.000053,0.000397,0.000624,0.000348,0.000176,0.000259,0.000467,0.019009,0.237898,0.470555,...,0.000711,0.000994,0.000961,0.001491,0.001013,0.000877,0.000794,0.000639,4.776130e-05,fear
1437,0.000038,0.000219,0.000402,0.000300,0.000201,0.002830,0.074776,0.474845,0.427268,0.150646,...,0.000267,0.000266,0.000126,0.000150,0.000108,0.000116,0.000103,0.000102,6.429320e-06,fear
1438,0.000191,0.000699,0.000997,0.000534,0.000603,0.003897,0.119252,1.357782,1.612602,2.967693,...,0.001158,0.001201,0.000961,0.000971,0.001176,0.001224,0.001078,0.000753,9.477753e-05,surprise


In [41]:
X2 = dataf.iloc[:,:-1].values
y2 = dataf.iloc[:,-1].values
y2 = encoder.fit_transform(y2)

eGEEMaps features

In [88]:
from sklearn.preprocessing import StandardScaler
import opensmile

smile = opensmile.Smile(
    feature_set=opensmile.FeatureSet.eGeMAPSv02,
    feature_level=opensmile.FeatureLevel.Functionals,
)
dataf = egge(data,smile)

dataf = dataf.fillna(0)
X3 = dataf[dataf.columns[5:-1]]
y3 = dataf['class']
y3 = encoder.fit_transform(y3)
X3 = StandardScaler().fit_transform(X3)


  0%|          | 0/1440 [00:00<?, ?it/s]

100%|██████████| 1440/1440 [03:18<00:00,  7.27it/s]


In [89]:
from sklearn.preprocessing import StandardScaler
import opensmile

smile = opensmile.Smile(
    feature_set=opensmile.FeatureSet.GeMAPSv01b,
    feature_level=opensmile.FeatureLevel.LowLevelDescriptors,
)
dataf = egge(data,smile)

dataf = dataf.fillna(0)
X4 = dataf[dataf.columns[5:-1]]
y4 = dataf['class']
y4 = encoder.fit_transform(y4)
X4 = StandardScaler().fit_transform(X4)


  0%|          | 1/1440 [00:00<04:00,  5.97it/s]

100%|██████████| 1440/1440 [03:11<00:00,  7.52it/s]


In [90]:
def LogisticRegressionPipeline(X,y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y)
    pipeline = Pipeline([('scaler',StandardScaler()),('LogisticRegression',LogisticRegression())])
    pipeline.fit(X_train,y_train)
    y_train_pred = pipeline.predict(X_train)
    y_pred = pipeline.predict(X_test)
    
    cmatrix = confusion_matrix(y_test,y_pred)
    
    print("Training Performance")
    print(classification_report(y_train,y_train_pred))
    print("-----------------------------------------")
    print("Test Performance")
    print(classification_report(y_test,y_pred))
    print("-----------------------------------------")
    
    cv_score = cross_val_score(pipeline,X,y,cv=5)
    return cv_score, cmatrix

Logistic Regression: MFCCs

In [91]:
scores, cmatrix = LogisticRegressionPipeline(X1,y1)

Training Performance
              precision    recall  f1-score   support

           0       0.86      0.86      0.86       154
           1       0.76      0.73      0.74       154
           2       0.80      0.76      0.78       153
           3       0.78      0.78      0.78       154
           4       0.83      0.92      0.87       230
           5       0.73      0.67      0.70       154
           6       0.74      0.75      0.74       153

    accuracy                           0.79      1152
   macro avg       0.79      0.78      0.78      1152
weighted avg       0.79      0.79      0.79      1152

-----------------------------------------
Test Performance
              precision    recall  f1-score   support

           0       0.70      0.74      0.72        38
           1       0.50      0.47      0.49        38
           2       0.33      0.33      0.33        39
           3       0.45      0.39      0.42        38
           4       0.63      0.59      0.61        5

Logistic Regression: Mel Spectrogram

In [92]:
scores, cmatrix = LogisticRegressionPipeline(X2,y2)

Training Performance
              precision    recall  f1-score   support

           0       0.92      0.65      0.76       154
           1       0.54      0.41      0.47       153
           2       0.65      0.47      0.54       154
           3       0.67      0.39      0.49       153
           4       0.44      0.93      0.60       230
           5       0.31      0.22      0.26       154
           6       0.47      0.39      0.43       154

    accuracy                           0.52      1152
   macro avg       0.57      0.49      0.51      1152
weighted avg       0.56      0.52      0.51      1152

-----------------------------------------
Test Performance
              precision    recall  f1-score   support

           0       0.67      0.42      0.52        38
           1       0.33      0.21      0.25        39
           2       0.48      0.39      0.43        38
           3       0.31      0.26      0.28        39
           4       0.43      0.93      0.59        5

In [93]:
scores, cmatrix = LogisticRegressionPipeline(X3,y3)

Training Performance
              precision    recall  f1-score   support

           0       0.88      0.80      0.84       153
           1       0.77      0.79      0.78       154
           2       0.79      0.78      0.78       153
           3       0.71      0.70      0.70       154
           4       0.81      0.92      0.86       230
           5       0.66      0.58      0.62       154
           6       0.80      0.81      0.80       154

    accuracy                           0.78      1152
   macro avg       0.77      0.77      0.77      1152
weighted avg       0.78      0.78      0.77      1152

-----------------------------------------
Test Performance
              precision    recall  f1-score   support

           0       0.74      0.72      0.73        39
           1       0.60      0.66      0.62        38
           2       0.64      0.72      0.67        39
           3       0.59      0.45      0.51        38
           4       0.72      0.74      0.73        5

In [96]:
scores, cmatrix = LogisticRegressionPipeline(X4,y4)

Training Performance
              precision    recall  f1-score   support

           0       0.36      0.25      0.29     58793
           1       0.22      0.10      0.14     59869
           2       0.27      0.12      0.16     54225
           3       0.22      0.08      0.11     55207
           4       0.26      0.88      0.40     84188
           5       0.17      0.03      0.05     56075
           6       0.25      0.06      0.09     52880

    accuracy                           0.26    421237
   macro avg       0.25      0.22      0.18    421237
weighted avg       0.25      0.26      0.20    421237

-----------------------------------------
Test Performance
              precision    recall  f1-score   support

           0       0.35      0.24      0.29     14698
           1       0.23      0.11      0.15     14967
           2       0.27      0.12      0.17     13556
           3       0.22      0.08      0.11     13802
           4       0.26      0.88      0.40     2104

In [99]:
colnames = ['feature type', ' train accuracy', 'test accuracy', 'class 0 recall']

In [100]:
metrics = pd.DataFrame(columns=colnames)

In [102]:
metrics['feature type'] = ['mfcc','melspectrogram','eGeMAPS','GeMAPS']

In [106]:
metrics['class 0 recall'] = [74,42,72,24]

In [107]:
metrics

Unnamed: 0,feature type,train accuracy,test accuracy,class 0 recall
0,mfcc,79,49,74
1,melspectrogram,52,42,42
2,eGeMAPS,78,63,72
3,GeMAPS,26,26,24
