In [1]:
import numpy as np
import pandas as pd
import scipy.io as sio
from sklearn import preprocessing as pre
import neurokit2 as nk
import scipy.io as sio
from scipy import signal
import math

In [2]:
path='DREAMER.mat'
raw=sio.loadmat(path)

In [3]:
def preprocessing_and_feature_extraction_ECG(file_name_csv,raw):
    data_ECG={}
    for participant in range(0,23):
            for video in range(0,18):
                # load raw baseline and stimuli data for left and right
                basl_l=raw['DREAMER'][0,0]['Data'][0,participant]['ECG'][0,0]['baseline'][0,0][video,0][:,0]
                stim_l=raw['DREAMER'][0,0]['Data'][0,participant]['ECG'][0,0]['stimuli'][0,0][video,0][:,0]
                basl_r=raw['DREAMER'][0,0]['Data'][0,participant]['ECG'][0,0]['baseline'][0,0][video,0][:,1]
                stim_r=raw['DREAMER'][0,0]['Data'][0,participant]['ECG'][0,0]['stimuli'][0,0][video,0][:,1]
                # process with neurokit
                ecg_signals_b_l,info_b_l=nk.ecg_process(basl_l,sampling_rate=256)
                ecg_signals_s_l,info_s_l=nk.ecg_process(stim_l,sampling_rate=256)
                ecg_signals_b_r,info_b_r=nk.ecg_process(basl_r,sampling_rate=256)
                ecg_signals_s_r,info_s_r=nk.ecg_process(stim_r,sampling_rate=256)
                # divide stimuli features by baseline features
                # would be interesting to compare classification accuracy when we don't do this
                features_ecg_l=nk.ecg_intervalrelated(ecg_signals_s_l)/nk.ecg_intervalrelated(ecg_signals_b_l)
                features_ecg_r=nk.ecg_intervalrelated(ecg_signals_s_r)/nk.ecg_intervalrelated(ecg_signals_b_r)
                # average left and right features
                # would be interesting to compare classification accuracy when we rather include both left and right features
                features_ecg=(features_ecg_l+features_ecg_r)/2
                if not len(data_ECG):
                    data_ECG=features_ecg
                else:
                    data_ECG=pd.concat([data_ECG,features_ecg],ignore_index=True)

In [None]:
df_ECG = preprocessing_and_feature_extraction_ECG('DREAMER_Extracted_EEG.csv', raw)
df_ECG.head()

In [4]:
def preprocessing(raw,feature):
    overall=signal.firwin(9,[0.0625,0.46875],window='hamming')
    theta=signal.firwin(9,[0.0625,0.125],window='hamming')
    alpha=signal.firwin(9,[0.125,0.203125],window='hamming')
    beta=signal.firwin(9,[0.203125,0.46875],window='hamming')
    filtedData=signal.filtfilt(overall,1,raw)
    filtedtheta=signal.filtfilt(theta,1,filtedData)
    filtedalpha=signal.filtfilt(alpha,1,filtedData)
    filtedbeta=signal.filtfilt(beta,1,filtedData)
    ftheta,psdtheta=signal.welch(filtedtheta,nperseg=256)
    falpha,psdalpha=signal.welch(filtedalpha,nperseg=256)
    fbeta,psdbeta=signal.welch(filtedbeta,nperseg=256)
    feature.append(max(psdtheta))
    feature.append(max(psdalpha))
    feature.append(max(psdbeta))
    return feature

In [8]:
def feature_extraction_EEG(file_name_csv,raw):
    EEG_tmp=np.zeros((23,18,42))
    for participant in range(0,23):
        for video in range(0,18):
            for i in range(0,14):
                B,S=[],[]
                basl=raw['DREAMER'][0,0]['Data'][0,participant]['EEG'][0,0]['baseline'][0,0][video,0][:,i]
                stim=raw['DREAMER'][0,0]['Data'][0,participant]['EEG'][0,0]['stimuli'][0,0][video,0][:,i]
                B=preprocessing(basl,B)
                S=preprocessing(stim,S)
                Extrod=np.divide(S,B)
                EEG_tmp[participant,video,3*i]=Extrod[0]
                EEG_tmp[participant,video,3*i+1]=Extrod[1]
                EEG_tmp[participant,video,3*i+2]=Extrod[2]
    col=[]
    for i in range(0,14):
        col.append('psdtheta_'+str(i + 1)+'_un')
        col.append('psdalpha_'+str(i + 1)+'_un')
        col.append('psdbeta_'+str(i + 1)+'_un')
    EEG=pd.DataFrame(EEG_tmp.reshape((23 * 18,EEG_tmp.shape[2])),columns=col)
    scaler=pre.StandardScaler()
    for i in range(len(col)):
        EEG[col[i][:-3]]=scaler.fit_transform(EEG[[col[i]]])
    EEG.drop(col,axis=1,inplace=True)
    EEG.to_csv(file_name_csv)
    return EEG

In [18]:
df_EEG = feature_extraction_EEG('DREAMER_Extracted_EEG.csv', raw)
df_EEG.head()

Unnamed: 0,psdtheta_1,psdalpha_1,psdbeta_1,psdtheta_2,psdalpha_2,psdbeta_2,psdtheta_3,psdalpha_3,psdbeta_3,psdtheta_4,...,psdbeta_11,psdtheta_12,psdalpha_12,psdbeta_12,psdtheta_13,psdalpha_13,psdbeta_13,psdtheta_14,psdalpha_14,psdbeta_14
0,-0.137607,-0.137238,-0.116997,-0.080612,-0.081118,-0.108773,-0.146683,-0.182929,-0.229771,-0.273779,...,-0.174814,-0.272796,-0.279034,-0.275621,-0.243177,-0.242829,-0.232958,-0.219589,-0.219693,-0.222478
1,-0.139174,-0.138685,-0.117599,-0.096207,-0.096338,-0.110293,-0.119051,-0.10417,-0.091178,-0.132652,...,-0.220249,-0.190519,-0.178008,-0.167682,-0.19781,-0.203598,-0.209676,-0.200044,-0.200146,-0.206911
2,-0.12769,-0.127539,-0.11042,-0.101203,-0.101259,-0.098381,-0.105765,-0.105699,0.042209,-0.162592,...,-0.087127,0.023607,0.023691,0.018827,-0.210256,-0.209778,-0.165222,-0.150181,-0.150377,-0.071193
3,-0.158972,-0.158123,-0.131111,-0.100674,-0.100739,-0.097964,-0.069629,-0.030775,-0.003793,-0.164088,...,-0.12427,-0.078613,-0.078306,-0.07881,-0.231014,-0.230617,-0.195014,-0.19355,-0.193687,-0.158883
4,-0.11048,-0.110713,-0.099031,-0.111506,-0.111337,-0.089051,0.049651,0.050012,0.054975,-0.120093,...,-0.096444,-0.02871,-0.028455,-0.030801,-0.145732,-0.145007,-0.18647,-0.016868,-0.017255,-0.116913


In [12]:
def feature_extraction_EEG_end_baseline(file_name_csv,raw,secs):
    # 128 Hz is the sampling rate for the EEG data
    fs_EEG = 128 
    N_EEG = math.ceil(fs_EEG*secs) 
    EEG_tmp=np.zeros((23,18,42))
    for participant in range(0,23):
        for video in range(0,18):
            for i in range(0,14):
                B,S=[],[]
                basl=raw['DREAMER'][0,0]['Data'][0,participant]['EEG'][0,0]['baseline'][0,0][video,0][-1-N_EEG:-1,i]
                Extrod=preprocessing(basl,B)
                EEG_tmp[participant,video,3*i]=Extrod[0]
                EEG_tmp[participant,video,3*i+1]=Extrod[1]
                EEG_tmp[participant,video,3*i+2]=Extrod[2]
    col=[]
    for i in range(0,14):
        col.append('psdtheta_'+str(i + 1)+'_un')
        col.append('psdalpha_'+str(i + 1)+'_un')
        col.append('psdbeta_'+str(i + 1)+'_un')
    EEG=pd.DataFrame(EEG_tmp.reshape((23 * 18,EEG_tmp.shape[2])),columns=col)
    scaler=pre.StandardScaler()
    for i in range(len(col)):
        EEG[col[i][:-3]]=scaler.fit_transform(EEG[[col[i]]])
    EEG.drop(col,axis=1,inplace=True)
    EEG.to_csv(file_name_csv)
    return EEG

In [19]:
last_four_secs_EEG = feature_extraction_EEG_end_baseline('Extracted_EEG_last4s.csv', raw, 4)
last_four_secs_EEG.head()

Unnamed: 0,psdtheta_1,psdalpha_1,psdbeta_1,psdtheta_2,psdalpha_2,psdbeta_2,psdtheta_3,psdalpha_3,psdbeta_3,psdtheta_4,...,psdbeta_11,psdtheta_12,psdalpha_12,psdbeta_12,psdtheta_13,psdalpha_13,psdbeta_13,psdtheta_14,psdalpha_14,psdbeta_14
0,-0.181097,-0.183079,-0.162763,-0.164206,-0.166005,-0.165722,-0.151888,-0.15131,-0.101514,-0.107896,...,-0.166487,-0.181325,-0.183209,-0.147505,-0.146856,-0.150206,-0.144895,-0.091804,-0.0967,-0.109874
1,-0.180781,-0.182764,-0.162617,-0.163658,-0.16546,-0.165768,-0.155448,-0.155951,-0.125252,-0.143867,...,-0.16131,-0.18255,-0.184799,-0.153178,-0.216407,-0.219037,-0.1617,-0.190384,-0.19334,-0.138654
2,-0.181023,-0.183005,-0.162728,-0.163551,-0.165352,-0.165732,-0.156683,-0.157927,-0.143867,-0.143194,...,-0.176795,-0.18367,-0.186054,-0.157299,-0.21528,-0.217918,-0.175408,-0.192917,-0.195818,-0.150653
3,-0.180947,-0.18293,-0.162694,-0.163492,-0.165294,-0.165701,-0.155842,-0.156396,-0.127366,-0.139328,...,-0.15852,-0.18226,-0.184475,-0.152115,-0.180175,-0.18319,-0.159505,-0.146952,-0.150778,-0.137
4,-0.181134,-0.183115,-0.162783,-0.163567,-0.165369,-0.165722,-0.154435,-0.154809,-0.119832,-0.14451,...,-0.14695,-0.181521,-0.183647,-0.149395,-0.17917,-0.182199,-0.149346,-0.129895,-0.134071,-0.127716


In [16]:
# load features extracted from preprocessed EEG and ECG data
path_EEG='DREAMER_Extracted_EEG.csv'
path_ECG='DREAMER_Extracted_ECG.csv'
data_EEG=pd.read_csv(path_EEG).drop(['Unnamed: 0'],axis=1)
data_ECG=pd.read_csv(path_ECG).drop(['Unnamed: 0'],axis=1)
# load mat file containing raw biosignal, emotion, participant, and video data
raw=sio.loadmat('DREAMER.mat')

# create new dataframe with emotion, participant, and video data
a=np.zeros((23,18,9),dtype=object)
for participant in range(0,23):
    for video in range(0,18):
        a[participant,video,0]=raw['DREAMER'][0,0]['Data'][0,participant]['Age'][0][0][0]
        a[participant,video,1]=raw['DREAMER'][0,0]['Data'][0,participant]['Gender'][0][0][0]
        a[participant,video,2]=participant+1
        a[participant,video,3]=video+1
        a[participant,video,4]=['Searching for Bobby Fischer','D.O.A.', 'The Hangover', 'The Ring', '300',
                  'National Lampoon\'s VanWilder', 'Wall-E', 'Crash', 'My Girl', 'The Fly',
                  'Pride and Prejudice', 'Modern Times', 'Remember the Titans', 'Gentlemans Agreement',
                  'Psycho', 'The Bourne Identitiy', 'The Shawshank Redemption', 'The Departed'][video]
        a[participant,video,5]=['calmness', 'surprise', 'amusement', 'fear', 'excitement', 'disgust',
                  'happiness', 'anger', 'sadness', 'disgust', 'calmness', 'amusement',
                  'happiness', 'anger', 'fear', 'excitement', 'sadness', 'surprise'][video]
        a[participant,video,6]=raw['DREAMER'][0,0]['Data'][0,participant]['ScoreValence'][0,0][video,0]
        a[participant,video,7]=raw['DREAMER'][0,0]['Data'][0,participant]['ScoreArousal'][0,0][video,0]
        a[participant,video,8]=raw['DREAMER'][0,0]['Data'][0,participant]['ScoreDominance'][0,0][video,0]
b=pd.DataFrame(a.reshape((23*18,a.shape[2])),columns=['Age','Gender','Participant','Video','Video_Name','Target_Emotion','Valence','Arousal','Dominance'])
# combine feature extraction dataframes with the new dataframe
all_data=pd.concat([data_EEG,data_ECG,b],axis=1)
print(all_data.head())
all_data.to_csv('DREAMER_Preprocessed_NotTransformed_NotThresholded.csv')

   psdtheta_1  psdalpha_1  psdbeta_1  psdtheta_2  psdalpha_2  psdbeta_2  \
0   -0.137607   -0.137238  -0.116997   -0.080612   -0.081118  -0.108773   
1   -0.139174   -0.138685  -0.117599   -0.096207   -0.096338  -0.110293   
2   -0.127690   -0.127539  -0.110420   -0.101203   -0.101259  -0.098381   
3   -0.158972   -0.158123  -0.131111   -0.100674   -0.100739  -0.097964   
4   -0.110480   -0.110713  -0.099031   -0.111506   -0.111337  -0.089051   

   psdtheta_3  psdalpha_3  psdbeta_3  psdtheta_4    ...        SampEn  Age  \
0   -0.146683   -0.182929  -0.229771   -0.273779    ...     -0.591364   22   
1   -0.119051   -0.104170  -0.091178   -0.132652    ...      0.497739   22   
2   -0.105765   -0.105699   0.042209   -0.162592    ...     -0.598682   22   
3   -0.069629   -0.030775  -0.003793   -0.164088    ...     -0.753730   22   
4    0.049651    0.050012   0.054975   -0.120093    ...      0.520783   22   

   Gender  Participant  Video                   Video_Name  Target_Emotion  \
0 

In [17]:
# minmax
All_Features = pd.read_csv("DREAMER_Preprocessed_NotTransformed_NotThresholded.csv")
del All_Features['Unnamed: 0']
Last4s_EEG_Features = pd.read_csv('Extracted_EEG_last4s.csv')
del Last4s_EEG_Features['Unnamed: 0']
for column in All_Features.columns:
    if not(All_Features[column].dtype == np.object):
        All_Features[column]=(All_Features[column]-np.min(All_Features[column]))/(np.max(All_Features[column])-np.min(All_Features[column]))
for column in Last4s_EEG_Features.columns:
    if not(Last4s_EEG_Features[column].dtype == np.object):
        Last4s_EEG_Features[column]=(Last4s_EEG_Features[column]-np.min(Last4s_EEG_Features[column]))/(np.max(Last4s_EEG_Features[column])-np.min(Last4s_EEG_Features[column]))

FileNotFoundError: File b'Extracted_EEG_last4s.csv' does not exist