In [82]:
import pandas as pd
import numpy as np
import h5py
import os
import scipy.io

In [83]:
dataset_folder = os.path.join('MODA_GC-3.0', 'output') # path to dataset
user_groups = ['exp', 'ne', 're'] # expert, nonexpert and researcher groups which were used to form the GC
freq_sampling_rate = 100 # Hz

In [84]:
# phase#1 : 405 blocks of 115 sec sampled at 100 Hz with a NaN between each block (4657905 samples)
#		an unseen block is marked NaN.

# load scores averaged across scorers

# this dictionary will contain avg score for each group - exp, ne and re
average_score_vect = {}
for group in user_groups:
    file_name = 'scoreAvg_{}_p1.mat'.format(group)
    file_path = os.path.join(dataset_folder, group, file_name)

    # read file of each group and save data to python dictionary
    with h5py.File(file_path, 'r') as file:
        data = file.get('scoreVectorAvg').value
        # save each dictionary to average_score_vect dictionary
        average_score_vect[group] = data

  data = file.get('scoreVectorAvg').value


In [85]:
gc_vects = {}
for group in user_groups:
    file_name = 'GCVect_{}_p1.mat'.format(group)
    file_path = os.path.join(dataset_folder, group, file_name)
    gc_vects[group] = scipy.io.loadmat(file_path) # load matlab matrix
    gc_vects[group] = np.array(gc_vects[group]['GCVect'])

In [86]:
# load events for each spindle
gc_events = {}
for group in user_groups:
    file_name = 'GC_spindlesLst_4EEGVect_{}_p1.txt'.format(group)
    file_path = os.path.join(dataset_folder, group, file_name)
    gc_events[group] = pd.read_csv(file_path, sep='\t', header=0, index_col=0).iloc[:, :-1]

In [87]:
annotation_data = {} # dictionary of pandas data frames

for group in user_groups:
    folder_path = os.path.join(dataset_folder, group, 'annotFiles')

    annotation_data[group] = {}
    for file_name in os.listdir(folder_path):
        file_path = os.path.join(folder_path, file_name)

        # read data from text file and drop last column
        annotation_data[group][file_name] = pd.read_csv(file_path, sep='\t', header=0).iloc[:, :-1]

In [88]:
def using_clump(a):
    return np.array([a[s] for s in np.ma.clump_unmasked(np.ma.masked_invalid(a))])

features = {} # todo 414 instead of 415
for group, vect in gc_vects.items():
    features[group] = using_clump(vect)

In [89]:
print(annotation_data['exp']['01-01-0001_MODA_GS.txt'])
print(gc_events['exp'])

       startSec  durationSec      eventName
0     8218.9336       115.00  segmentViewed
1     8488.9336       115.00  segmentViewed
2    14548.9336       115.00  segmentViewed
3    18328.9336       115.00  segmentViewed
4    19398.9336       115.00  segmentViewed
..          ...          ...            ...
128  25604.3336         0.87        spindle
129  25608.3736         1.04        spindle
130  25612.8936         0.51        spindle
131  25616.7136         0.40        spindle
132  25630.1036         0.84        spindle

[133 rows x 3 columns]
          startSamples  durationSamples  startSec  durationSec
eventNum                                                      
1                 1441               57     14.41         0.57
2                 2147               69     21.47         0.69
3                 3878              106     38.78         1.06
4                 5001               55     50.01         0.55
5                 5808               79     58.08         0.79
...    