Imports and constants

In [2]:
import pandas as pd
import numpy as np
from sklearn import feature_selection as fs
import matplotlib.pyplot as plt

csv_features_path = 'csvs/pesa_signal_features.csv'

%matplotlib qt

LOADING THE DATA

In [14]:
feature_names = ['AVG', 'PEAK', 'BOTTOM', 'PEAKF', 'STD',\
            'delta_AVG', 'delta_PEAK', 'delta_BOTTOM', 'delta_PEAKF', 'delta_STD',\
            'tehta_AVG', 'tehta_PEAK', 'tehta_BOTTOM', 'tehta_PEAKF', 'tehta_STD',\
            'alpha_AVG', 'alpha_PEAK', 'alpha_BOTTOM', 'alpha_PEAKF', 'alpha_STD',\
            'beta_AVG', 'beta_PEAK', 'beta_BOTTOM', 'beta_PEAKF', 'beta_STD',\
            'gamma_AVG', 'gamma_PEAK', 'gamma_BOTTOM', 'gamma_PEAKF', 'gamma_STD',]
id_columns = ['ID', 'VALENCE', 'AROUSAL']
useful_columns = id_columns + feature_names



data = pd.read_csv(csv_features_path, usecols=useful_columns)

features = data.drop(['ID', 'VALENCE', 'AROUSAL'], axis=1)
classes1 = data['VALENCE']
classes2 = data['AROUSAL']

features = features.to_numpy()

SELECTING BEST FEATURES

Some of the features (namely, peak frequencies) are too uniform to be used, due to the small value range:

In [20]:
filter = fs.VarianceThreshold(threshold=0.5)
features = filter.fit_transform(features)
feature_names = filter.get_feature_names_out(feature_names)

print(feature_names)

['PEAK' 'PEAKF' 'STD' 'delta_AVG' 'delta_PEAK' 'delta_BOTTOM' 'delta_STD'
 'tehta_PEAKF' 'alpha_AVG' 'alpha_PEAK' 'alpha_STD']


Out of those features that are left, we choose five of the best:

In [22]:
selector = fs.SelectKBest(score_func=fs.mutual_info_classif, k=6)
selector.fit(features, classes1)
selected_features_valence = selector.get_feature_names_out(feature_names)

selector.fit(features, classes2)
selected_features_arousal = selector.get_feature_names_out(feature_names)

print(selected_features_valence)
print(selected_features_arousal)

['PEAKF' 'STD' 'tehta_PEAKF' 'alpha_AVG' 'alpha_PEAK' 'alpha_STD']
['PEAKF' 'delta_STD' 'tehta_PEAKF' 'alpha_AVG' 'alpha_PEAK' 'alpha_STD']


EXAMINING BEST FEATURES
----

FEATURES FOR VALENCE DETECTION

In [23]:
valence_data = id_columns + selected_features_valence.tolist()
valence_f = data[valence_data]
valence_f = valence_f.drop(['AROUSAL'], axis=1)
positives = valence_f.loc[valence_f['VALENCE']==0]
negatives = valence_f.loc[valence_f['VALENCE']==1]

Plotting

In [31]:
posdata = positives['STD'].to_numpy()
negdata = negatives['STD'].to_numpy()
for_plot = [negdata, posdata]

plt.boxplot(posdata)
plt.show()
plt.boxplot(negdata)
plt.show()