In [1]:
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.metrics import mean_absolute_error, r2_score, f1_score
from sklearn.linear_model import LinearRegression, SGDClassifier
from sklearn.svm import SVC,SVR
from sklearn import preprocessing

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import pickle

# Loading audio data

In [2]:
aud_train = pd.read_csv('../audio/pickle_files/training_df_all_dur_15.csv')
aud_test = pd.read_csv('../audio/pickle_files/test_df_all_dur_15.csv')
aud_val = pd.read_csv('../audio/pickle_files/validation_df_all_dur_15.csv')

traits = ['interview_score', 'openness', 'conscientiousness', 'extraversion', 'agreeableness', 'neuroticism']
attributes = aud_train.columns

id_aud_train = aud_train['video_id'].values
X_aud_train = aud_train.drop(traits + ['video_id'], axis=1).values
Y_aud_train = aud_train[traits].values

id_aud_test = aud_test['video_id'].values
X_aud_test = aud_test.drop(traits + ['video_id'], axis=1).values
Y_aud_test = aud_test[traits].values

id_aud_val = aud_val['video_id'].values
X_aud_val = aud_val.drop(traits + ['video_id'], axis=1).values
Y_aud_val = aud_val[traits].values

# No hyperparameter to tune
X_train = np.concatenate((X_aud_train, X_aud_val))
Y_train = np.concatenate((Y_aud_train, Y_aud_val))

X_test = X_aud_test
Y_test = Y_aud_test

# Feature Scaling
scaler = preprocessing.StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

## Linear Regression Model

In [3]:
linear_regressor = LinearRegression(n_jobs= -1)
linear_regressor.fit(X_train, Y_train)
y_test_pred = linear_regressor.predict(X_test)

print("\nR2 score\n")
score = r2_score(Y_test, y_test_pred, multioutput = 'raw_values')
for i in range(6):                 
    print(traits[i],": {:.4f}".format(score[i]))
    
print("\nAccuracies\n")
for i in range(6):                 
    print(traits[i],": {:.4f}".format(1 - mean_absolute_error(Y_test[:,i], y_test_pred[:,i])))


R2 score

interview_score : 0.2070
openness : 0.2199
conscientiousness : 0.1853
extraversion : 0.2168
agreeableness : 0.1198
neuroticism : 0.2389

Accuracies

interview_score : 0.8959
openness : 0.8982
conscientiousness : 0.8896
extraversion : 0.8934
agreeableness : 0.9009
neuroticism : 0.8939


## Random Forest Regressor Model

In [4]:
rand_forest_regr = RandomForestRegressor(n_jobs=-1)
rand_forest_regr.fit(X_train, Y_train)
y_test_pred = rand_forest_regr.predict(X_test)

print("\nR2 score\n")
score = r2_score(Y_test, y_test_pred, multioutput = 'raw_values')
for i in range(6):                 
    print(traits[i],": {:.4f}".format(score[i]))
    
print("\nAccuracies\n")
for i in range(6):                 
    print(traits[i],": {:.4f}".format(1 - mean_absolute_error(Y_test[:,i], y_test_pred[:,i])))


R2 score

interview_score : 0.3324
openness : 0.3205
conscientiousness : 0.3202
extraversion : 0.3208
agreeableness : 0.2189
neuroticism : 0.3415

Accuracies

interview_score : 0.9044
openness : 0.9039
conscientiousness : 0.8986
extraversion : 0.9000
agreeableness : 0.9055
neuroticism : 0.9001
