In [7]:
#!/usr/bin/env python
# coding: utf-8

# ## loading SEED IV EOG data

# In[4]:


import scipy.io as sio

import numpy as np
from datetime import datetime, date, time
import pandas as pd
import os

#from lib.get_data import cutting_window_data


import pickle
import json
from random import shuffle


# ### get labels corresponding to eog data

# In[7]:


#directory where all data is stored
dir_ = 'data/'
#dir where eog feature smooth data is stored 
eog_dir_ = dir_ + 'eye_feature_smooth/'

#extracted labels from README file
session1_label = [1,2,3,0,2,0,0,1,0,1,2,1,1,1,2,3,2,2,3,3,0,3,0,3]
session2_label =  [2,1,3,0,0,2,0,2,3,3,2,3,2,0,1,1,2,1,0,3,0,1,3,1]
session3_label = [1,2,2,1,3,3,3,1,1,2,1,0,2,3,3,0,2,3,0,0,2,0,1,0]

label_dict = {
    '1': session1_label,
    '2': session2_label,
    '3': session3_label
}

nb_of_trials = 24

channel_order = pd.read_excel(dir_ + 'Channel Order.xlsx', sheet_name=0, header = None)
channel_names = channel_order[0].values
channel_names


# ### get eog data from different sessions

# In[37]:


#get all necessary metainformation
filenamelist = list()
participantlist = list()


#store all information in this dict
metadata_ = dict() 

for sess in os.listdir(eog_dir_):
    sess_label = label_dict[sess]
    for file in os.listdir(eog_dir_ + '/' + sess):
        #get absolute path
        file_abs_path = eog_dir_ + '/' + sess + '/' + file
        #append absolute path due to nested session folders
        filenamelist.append(file_abs_path)
        
        #get participant id from filename (first number) -> in total we have 15 participants
        participant = int( file.split('_')[0] )
        participantlist.append(participant)
        metadata_[file] = dict()
        metadata_[file]['participant'] = participant
        metadata_[file]['labels'] = sess_label
        metadata_[file]['abspath'] = file_abs_path
        metadata_[file]['sess'] = sess
        
        print('file: {0}, participant:{1}'.format(file, participant))


# In[68]:


#save all data in this dict
all_data = dict()

index = 0
#open each file
for file in metadata_.keys():
    print(metadata_[file])
    participant = metadata_[file]['participant']
    labels = metadata_[file]['labels']
    sess = metadata_[file]['sess']
    #open file
    mat = sio.loadmat(metadata_[file]['abspath'])
    
    #each mat file consists of 24 trials
    session_keys = [x for x in mat.keys() if 'eye' in x]
    for session_key_ in session_keys:
        session_key_index = int( session_key_.split('_')[1] ) #get number (x) after 'eye_{x}'
        
        data_session = mat[session_key_]
        
        #get label of specific session
        label = labels[session_key_index - 1]
        
        
        all_data[index] = dict()
        all_data[index]['data'] = data_session
        all_data[index]['participant'] = participant
        all_data[index]['label'] = label
        all_data[index]['sess'] = sess
        index +=1
    





file: 4_20151111.mat, participant:4
file: 9_20151028.mat, participant:9
file: 8_20151103.mat, participant:8
file: 10_20151014.mat, participant:10
file: 2_20150915.mat, participant:2
file: 11_20150916.mat, participant:11
file: 7_20150715.mat, participant:7
file: 6_20150507.mat, participant:6
file: 15_20150508.mat, participant:15
file: 12_20150725.mat, participant:12
file: 14_20151205.mat, participant:14
file: 3_20150919.mat, participant:3
file: 5_20160406.mat, participant:5
file: 13_20151115.mat, participant:13
file: 1_20160518.mat, participant:1
file: 3_20151101.mat, participant:3
file: 11_20151011.mat, participant:11
file: 2_20151012.mat, participant:2
file: 1_20161126.mat, participant:1
file: 8_20151117.mat, participant:8
file: 5_20160420.mat, participant:5
file: 13_20161130.mat, participant:13
file: 6_20150512.mat, participant:6
file: 10_20151023.mat, participant:10
file: 14_20151215.mat, participant:14
file: 15_20150527.mat, participant:15
file: 7_20150721.mat, participant:7
file: 

In [33]:

# In[76]:


shape_ = list()
for i in all_data.keys():
    print( all_data[i]['data'].shape ) #uneven number of columns
    shape_.append(all_data[i]['data'].shape[1] ) 
    
max_length = max(shape_)
#function for padding 0 on top of indicvidual eye gaze features
def pad(A, max_length):
    mat = np.zeros(shape = (31, max_length) ) #we have 31 features in A
    mat[:, :A.shape[1]] = A
    return mat

for i in all_data.keys():
    #padding on 
    all_data[i]['data'] = pad(all_data[i]['data'], max_length)
    


(31, 42)
(31, 23)
(31, 49)
(31, 32)
(31, 22)
(31, 40)
(31, 38)
(31, 52)
(31, 36)
(31, 42)
(31, 12)
(31, 27)
(31, 54)
(31, 42)
(31, 64)
(31, 35)
(31, 17)
(31, 44)
(31, 35)
(31, 12)
(31, 28)
(31, 28)
(31, 43)
(31, 34)
(31, 42)
(31, 23)
(31, 49)
(31, 32)
(31, 22)
(31, 40)
(31, 38)
(31, 52)
(31, 36)
(31, 42)
(31, 12)
(31, 27)
(31, 54)
(31, 42)
(31, 64)
(31, 35)
(31, 17)
(31, 44)
(31, 35)
(31, 12)
(31, 28)
(31, 28)
(31, 43)
(31, 34)
(31, 42)
(31, 23)
(31, 49)
(31, 32)
(31, 22)
(31, 40)
(31, 38)
(31, 52)
(31, 36)
(31, 42)
(31, 12)
(31, 27)
(31, 54)
(31, 42)
(31, 64)
(31, 35)
(31, 17)
(31, 44)
(31, 35)
(31, 12)
(31, 28)
(31, 28)
(31, 43)
(31, 34)
(31, 42)
(31, 23)
(31, 49)
(31, 32)
(31, 22)
(31, 40)
(31, 38)
(31, 52)
(31, 36)
(31, 42)
(31, 12)
(31, 27)
(31, 54)
(31, 42)
(31, 64)
(31, 35)
(31, 17)
(31, 44)
(31, 35)
(31, 12)
(31, 28)
(31, 28)
(31, 43)
(31, 34)
(31, 42)
(31, 23)
(31, 49)
(31, 32)
(31, 22)
(31, 40)
(31, 38)
(31, 52)
(31, 36)
(31, 42)
(31, 12)
(31, 27)
(31, 54)
(31, 42)
(31, 64)
(

In [10]:
col_names = ['pupil_diam_x_mean', 'pupil_diam_y_mean', 'pupil_diam_x_sd', 'pupil_diam_y_sd', 'diam_x_psd0.2Hz',
               'diam_y_psd0.2Hz', 'diam_x_psd0.4Hz', 'diam_y_psd0.4Hz', 'diam_x_psd0.6Hz', 
               'diam_y_psd0.6Hz', 'diam_x_psd1Hz', 'diam_y_psd1Hz',
				'disp_mean', 'disp_y_mean', 'disp_x_sd', 'disp_y_sd',
				'fix_mean', 'fix_sd',
				'sacc_dur_mean', 'sacc_dur_sd', 'sacc_ampl_mean', 'sacc_ampl_sd',
				'Blink frequency', 'fixation frequency', 'fixation duration maximum', 
               'fixation dispersion total', 'fixation dispersion maximum', 
               'saccade frequency', 'saccade duration average', 
               'saccade amplitude average', 'saccade latency average']
len(col_names)

31

In [34]:


# In[97]:


#minimum number of obs
min_index = min(shape_)

max_index = max(shape_)

X = [] #eye input data
Y = [] #label data
P = [] #participant data
S = [] #session data

# In[98]:


for index_obs in all_data.keys():
    #print(index_obs)
    X.append( all_data[index_obs]['data'][:, :] )
    Y.append( all_data[index_obs]['label'] )
    P.append( all_data[index_obs]['participant'] )
    S.append(all_data[index_obs]['sess'])
    
X = np.asarray(X)
Y = np.asarray(Y)
P = np.asarray(P)
S = np.asarray(S)
names = np.asarray(col_names)


np.save(eog_dir_+ 'data_X', X)
np.save(eog_dir_+ 'data_Y', Y)
np.save(eog_dir_+ 'data_P', P)
np.save(eog_dir_ + 'data_colnames', col_names)
