In [37]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_selection import mutual_info_classif

In [38]:
def Feature_Selection(data, threshold):
    y = data.target
    X = data.drop('target' , axis = 1)
    
    importance = mutual_info_classif(X,y)
    feat_importance = pd.Series(importance , data.columns[0:len( data.columns)-1])

    # sort the features 
    feat_importance = feat_importance.sort_values(ascending=False)

    selected_features = []

    # loop on sorted features
    for i in feat_importance.index:
        if feat_importance[i] > threshold:
            selected_features.append(i)

    # create a data frame of the selected features
    df_selected_features = data[selected_features]
    return df_selected_features

# Reading EEG data

In [39]:
EEG_data = pd.read_csv('ml_data.csv')

In [40]:
EEG_data

Unnamed: 0.1,Unnamed: 0,psdtheta_1,psdalpha_1,psdbeta_1,psdtheta_2,psdalpha_2,psdbeta_2,psdtheta_3,psdalpha_3,psdbeta_3,...,age,gender,participant,video,video_name,target_emotion,valence,arousal,dominance,stress_bin
0,0,-0.150843,-0.150624,-0.166261,-0.145117,-0.143640,-0.119170,-0.475254,-0.468150,-0.330365,...,22,male,1,1,Searching for Bobby Fischer,calmness,4,3,2,0.0
1,1,-0.128521,-0.129717,-0.155580,-0.105668,-0.108871,-0.168438,0.302383,0.258150,-0.121029,...,22,male,1,2,D.O.A.,surprise,3,3,1,0.0
2,2,-0.103209,-0.103515,-0.122637,-0.109388,-0.111235,-0.140875,0.063157,0.057745,0.006707,...,22,male,1,3,The Hangover,amusement,5,4,4,0.0
3,3,-0.181723,-0.184244,-0.230825,-0.113030,-0.114522,-0.139100,-0.067104,-0.068218,-0.071644,...,22,male,1,4,The Ring,fear,4,3,2,1.0
4,4,-0.043894,-0.042331,-0.032673,-0.148676,-0.152181,-0.201394,0.314590,0.301284,0.191730,...,22,male,1,5,300,excitement,4,4,4,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
409,409,-0.259796,-0.264659,-0.351959,-0.215325,-0.223008,-0.347720,-0.282586,-0.281040,-0.263415,...,25,male,23,14,Gentlemans Agreement,anger,2,2,2,1.0
410,410,-0.220586,-0.227271,-0.333407,-0.083146,-0.084745,-0.115249,-0.452527,-0.461702,-0.518178,...,25,male,23,15,Psycho,fear,2,2,2,1.0
411,411,-0.054579,-0.056091,-0.082380,0.014625,0.014887,0.025082,0.225323,0.234515,0.292967,...,25,male,23,16,The Bourne Identitiy,excitement,3,3,2,0.0
412,412,-0.242060,-0.244057,-0.288320,-0.134908,-0.139699,-0.208673,-0.238502,-0.241715,-0.257637,...,25,male,23,17,The Shawshank Redemption,sadness,2,2,4,


### Mapping values to be numeric

In [41]:
EEG_data['target'] = EEG_data['target_emotion'].map(
    {'surprise': 0, 'excitement': 1, 'amusement': 2, 'happiness': 3,'anger': 4, 'fear': 5, 'disgust': 6, 'calmness': 7, 'sadness': 8}
)
EEG_data['gender'] = EEG_data['gender'].map(
    {'male': 0, 'female': 1}
)

In [42]:
# Create a label encoder object
le = LabelEncoder()

# Fit the encoder to the categorical variable
le.fit(EEG_data['video_name'])

# Transform the categorical variable into numerical values
numeric_videos = le.transform(EEG_data['video_name'])

EEG_data['video_name'] = numeric_videos

### Drop unnecessary columns

In [43]:
EEG_data.drop(['target_emotion','valence','arousal','dominance','Unnamed: 0','stress_bin'], axis=1, inplace=True)

In [44]:
EEG_data

Unnamed: 0,psdtheta_1,psdalpha_1,psdbeta_1,psdtheta_2,psdalpha_2,psdbeta_2,psdtheta_3,psdalpha_3,psdbeta_3,psdtheta_4,...,psdbeta_13,psdtheta_14,psdalpha_14,psdbeta_14,age,gender,participant,video,video_name,target
0,-0.150843,-0.150624,-0.166261,-0.145117,-0.143640,-0.119170,-0.475254,-0.468150,-0.330365,-0.427103,...,-0.419732,-0.195563,-0.195812,-0.204449,22,0,1,1,10,7
1,-0.128521,-0.129717,-0.155580,-0.105668,-0.108871,-0.168438,0.302383,0.258150,-0.121029,-0.052994,...,-0.175393,-0.153317,-0.153977,-0.166154,22,0,1,2,2,0
2,-0.103209,-0.103515,-0.122637,-0.109388,-0.111235,-0.140875,0.063157,0.057745,0.006707,-0.081610,...,-0.117177,-0.046608,-0.046963,-0.051510,22,0,1,3,14,2
3,-0.181723,-0.184244,-0.230825,-0.113030,-0.114522,-0.139100,-0.067104,-0.068218,-0.071644,-0.130311,...,-0.311654,-0.150296,-0.150274,-0.153138,22,0,1,4,15,5
4,-0.043894,-0.042331,-0.032673,-0.148676,-0.152181,-0.201394,0.314590,0.301284,0.191730,-0.023894,...,0.125982,0.075506,0.078709,0.108396,22,0,1,5,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
409,-0.259796,-0.264659,-0.351959,-0.215325,-0.223008,-0.347720,-0.282586,-0.281040,-0.263415,-0.350000,...,0.063415,-0.052881,-0.052248,-0.049186,25,0,23,14,3,4
410,-0.220586,-0.227271,-0.333407,-0.083146,-0.084745,-0.115249,-0.452527,-0.461702,-0.518178,-0.551788,...,-0.490378,0.071075,0.065080,0.012865,25,0,23,15,8,5
411,-0.054579,-0.056091,-0.082380,0.014625,0.014887,0.025082,0.225323,0.234515,0.292967,0.233246,...,-0.042684,-0.025692,-0.028928,-0.061149,25,0,23,16,11,1
412,-0.242060,-0.244057,-0.288320,-0.134908,-0.139699,-0.208673,-0.238502,-0.241715,-0.257637,-0.356707,...,-0.480054,-0.119080,-0.118926,-0.121816,25,0,23,17,16,8


## Feature Selectoin

In [45]:
y = EEG_data.target

In [46]:
EEG_selected_features = Feature_Selection(EEG_data, 0.002)

In [47]:
EEG_selected_features['target'] = y

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  EEG_selected_features['target'] = y


### Saving data after preprocessing

In [48]:
EEG_selected_features.to_csv('preprocessed_EEG.csv')

# Reading ECG data

In [49]:
ECG_data = pd.read_csv('ECG.csv')

## Feature Selection

In [50]:
ECG_data.drop(['Unnamed: 0'], axis=1, inplace=True)
y = EEG_data.target

In [51]:
ECG_selected_features = Feature_Selection(ECG_data , 0.04)

In [52]:
ECG_selected_features['target'] = y

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ECG_selected_features['target'] = y


In [53]:
ECG_selected_features

Unnamed: 0,video_name,video,t_maximum,r_maximum,p_maximum,p_wave_range,r_wave_range,t_wave_range,p_std,t_std,...,HRV_HF,HRV_Ca,HRV_MadNN,HRV_LFHF,HRV_SD1a,HRV_S,HRV_SD2a,HRV_Prc20NN,HRV_MedianNN,target
0,10,1,0.304967,0.303868,0.303361,0.300593,0.300583,0.301179,0.302031,0.301568,...,3.627621,1.049158,1.000000,0.410145,1.077186,1.123119,1.086576,1.022222,1.018617,7
1,2,2,0.463792,0.463157,0.462588,0.458902,0.458872,0.457957,0.450559,0.451041,...,0.450490,0.992888,0.843750,0.572625,0.643784,0.536757,0.835036,0.899086,0.896907,0
2,14,3,0.173535,0.173054,0.172699,0.171435,0.171425,0.171590,0.175866,0.175870,...,4.489243,0.753055,0.923684,0.717908,1.020287,0.888804,0.762874,1.036170,1.021278,2
3,15,4,0.364580,0.367103,0.366418,0.362371,0.362476,0.359214,0.361792,0.358729,...,0.717087,0.785177,0.812500,1.222864,0.716231,0.652118,0.726417,0.950862,0.946860,5
4,0,5,0.448406,0.447230,0.446740,0.443627,0.443526,0.443794,0.455058,0.455849,...,1.381814,0.980383,1.238095,0.086450,1.428653,1.643596,1.264587,1.051366,1.075601,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
409,3,14,0.359561,0.356780,0.356227,0.356253,0.356180,0.358061,0.361053,0.362877,...,1.542216,1.377285,1.225000,0.475373,1.126089,1.069732,1.399569,0.901887,0.904388,4
410,8,15,0.196205,0.197722,0.197340,0.196114,0.196112,0.193974,0.201432,0.199192,...,45.577436,1.359336,0.805556,0.589387,1.702592,2.730477,1.437567,1.098454,1.052036,5
411,11,16,0.309164,0.312539,0.312048,0.309153,0.309166,0.304928,0.314175,0.310041,...,1.232263,1.006177,1.500000,1.095624,1.101661,1.555066,1.344214,0.955363,0.971491,1
412,16,17,0.234146,0.236728,0.236258,0.233132,0.235904,0.232618,0.238167,0.238349,...,2.565754,0.875462,1.181818,0.771653,1.052785,1.565247,1.322039,0.958621,0.977642,8


### Saving data after preprocessing

In [54]:
ECG_selected_features.to_csv('preprocessed_ECG.csv')