In [46]:
import pandas as pd
import numpy as np
import h5py
import os
import scipy.io

In [47]:
dataset_folder = os.path.join('MODA_GC-master', 'output') # path to dataset
user_groups = ['exp', 'ne', 're'] # expert, nonexpert and researcher groups which were used to form the GC
freq_sampling_rate = 100 # Hz

In [48]:
# phase#1 : 405 blocks of 115 sec sampled at 100 Hz with a NaN between each block (4657905 samples)
#		an unseen block is marked NaN.

# load scores averaged across scorers

# this dictionary will contain avg score for each group - exp, ne and re
average_score_vect = {}
for group in user_groups:
    file_name = 'scoreAvg_{}_p1.mat'.format(group)
    file_path = os.path.join(dataset_folder, group, file_name)

    # read file of each group and save data to python dictionary
    with h5py.File(file_path, 'r') as file:
        data = file.get('scoreVectorAvg').value
        # save each dictionary to average_score_vect dictionary
        average_score_vect[group] = data

  data = file.get('scoreVectorAvg').value


In [49]:
gc_vects = {}
for group in user_groups:
    file_name = 'GCVect_{}_p1.mat'.format(group)
    file_path = os.path.join(dataset_folder, group, file_name)
    gc_vects[group] = scipy.io.loadmat(file_path) # load matlab matrix

In [50]:
# load events for each spindle
gc_events = {}
for group in user_groups:
    file_name = 'GC_spindlesLst_4EEGVect_{}_p1.txt'.format(group)
    file_path = os.path.join(dataset_folder, group, file_name)

    with open(file_path, 'r') as file:
        line = file.readline() # read first line
        cols = line.split() # columns are first line in the file
        rows = []


        line = file.readline()
        while line:
            line = line.split() # remove whitespaces
            rows.append(line) # append to rows
            line = file.readline() # read single line

        gc_events[group] = pd.DataFrame(rows, columns=cols)

In [51]:
# finally load annotation data
annot_data = {}
for group in user_groups:
    folder_path = os.path.join(dataset_folder, group, 'annotFiles')

    annot_data[group] = {}
    for file_name in os.listdir(folder_path):
        file_path = os.path.join(folder_path, file_name)

        with open(file_path, 'r') as file:
            line = file.readline() # read columns
            cols = line.split() # strip whitespaces
            rows = [] # rows

            line = file.readline()
            while line:
                line = line.split()
                rows.append(line)
                file = file.readline()

            annot_data[group][file_name] = pd.DataFrame(rows, columns=cols)