In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder

### Reading the data

In [2]:
data = pd.read_csv('ml_data.csv')

In [3]:
data

Unnamed: 0.1,Unnamed: 0,psdtheta_1,psdalpha_1,psdbeta_1,psdtheta_2,psdalpha_2,psdbeta_2,psdtheta_3,psdalpha_3,psdbeta_3,...,age,gender,participant,video,video_name,target_emotion,valence,arousal,dominance,stress_bin
0,0,-0.150843,-0.150624,-0.166261,-0.145117,-0.143640,-0.119170,-0.475254,-0.468150,-0.330365,...,22,male,1,1,Searching for Bobby Fischer,calmness,4,3,2,0.0
1,1,-0.128521,-0.129717,-0.155580,-0.105668,-0.108871,-0.168438,0.302383,0.258150,-0.121029,...,22,male,1,2,D.O.A.,surprise,3,3,1,0.0
2,2,-0.103209,-0.103515,-0.122637,-0.109388,-0.111235,-0.140875,0.063157,0.057745,0.006707,...,22,male,1,3,The Hangover,amusement,5,4,4,0.0
3,3,-0.181723,-0.184244,-0.230825,-0.113030,-0.114522,-0.139100,-0.067104,-0.068218,-0.071644,...,22,male,1,4,The Ring,fear,4,3,2,1.0
4,4,-0.043894,-0.042331,-0.032673,-0.148676,-0.152181,-0.201394,0.314590,0.301284,0.191730,...,22,male,1,5,300,excitement,4,4,4,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
409,409,-0.259796,-0.264659,-0.351959,-0.215325,-0.223008,-0.347720,-0.282586,-0.281040,-0.263415,...,25,male,23,14,Gentlemans Agreement,anger,2,2,2,1.0
410,410,-0.220586,-0.227271,-0.333407,-0.083146,-0.084745,-0.115249,-0.452527,-0.461702,-0.518178,...,25,male,23,15,Psycho,fear,2,2,2,1.0
411,411,-0.054579,-0.056091,-0.082380,0.014625,0.014887,0.025082,0.225323,0.234515,0.292967,...,25,male,23,16,The Bourne Identitiy,excitement,3,3,2,0.0
412,412,-0.242060,-0.244057,-0.288320,-0.134908,-0.139699,-0.208673,-0.238502,-0.241715,-0.257637,...,25,male,23,17,The Shawshank Redemption,sadness,2,2,4,


### Mapping values to be numeric

In [4]:
data['target'] = data['target_emotion'].map(
    {'surprise': 0, 'excitement': 1, 'amusement': 2, 'happiness': 3,'anger': 4, 'fear': 5, 'disgust': 6, 'calmness': 7, 'sadness': 8}
)
data['gender'] = data['gender'].map(
    {'male': 0, 'female': 1}
)

In [5]:
# Create a label encoder object
le = LabelEncoder()

# Fit the encoder to the categorical variable
le.fit(data['video_name'])

# Transform the categorical variable into numerical values
numeric_videos = le.transform(data['video_name'])

data['video_name'] = numeric_videos

### drop unnecessary columns

In [6]:
data.drop(['target_emotion','valence','arousal','dominance','Unnamed: 0','stress_bin'], axis=1, inplace=True)

In [7]:
data

Unnamed: 0,psdtheta_1,psdalpha_1,psdbeta_1,psdtheta_2,psdalpha_2,psdbeta_2,psdtheta_3,psdalpha_3,psdbeta_3,psdtheta_4,...,psdbeta_13,psdtheta_14,psdalpha_14,psdbeta_14,age,gender,participant,video,video_name,target
0,-0.150843,-0.150624,-0.166261,-0.145117,-0.143640,-0.119170,-0.475254,-0.468150,-0.330365,-0.427103,...,-0.419732,-0.195563,-0.195812,-0.204449,22,0,1,1,10,7
1,-0.128521,-0.129717,-0.155580,-0.105668,-0.108871,-0.168438,0.302383,0.258150,-0.121029,-0.052994,...,-0.175393,-0.153317,-0.153977,-0.166154,22,0,1,2,2,0
2,-0.103209,-0.103515,-0.122637,-0.109388,-0.111235,-0.140875,0.063157,0.057745,0.006707,-0.081610,...,-0.117177,-0.046608,-0.046963,-0.051510,22,0,1,3,14,2
3,-0.181723,-0.184244,-0.230825,-0.113030,-0.114522,-0.139100,-0.067104,-0.068218,-0.071644,-0.130311,...,-0.311654,-0.150296,-0.150274,-0.153138,22,0,1,4,15,5
4,-0.043894,-0.042331,-0.032673,-0.148676,-0.152181,-0.201394,0.314590,0.301284,0.191730,-0.023894,...,0.125982,0.075506,0.078709,0.108396,22,0,1,5,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
409,-0.259796,-0.264659,-0.351959,-0.215325,-0.223008,-0.347720,-0.282586,-0.281040,-0.263415,-0.350000,...,0.063415,-0.052881,-0.052248,-0.049186,25,0,23,14,3,4
410,-0.220586,-0.227271,-0.333407,-0.083146,-0.084745,-0.115249,-0.452527,-0.461702,-0.518178,-0.551788,...,-0.490378,0.071075,0.065080,0.012865,25,0,23,15,8,5
411,-0.054579,-0.056091,-0.082380,0.014625,0.014887,0.025082,0.225323,0.234515,0.292967,0.233246,...,-0.042684,-0.025692,-0.028928,-0.061149,25,0,23,16,11,1
412,-0.242060,-0.244057,-0.288320,-0.134908,-0.139699,-0.208673,-0.238502,-0.241715,-0.257637,-0.356707,...,-0.480054,-0.119080,-0.118926,-0.121816,25,0,23,17,16,8


### Saving data after preprocessing

In [8]:
data.to_csv('preprocessed_data.csv')