In [1]:
import pandas as pd
import os
import json
import numpy as np

In [2]:
def get_bckg_seiz_list(label_lists):
    if label_lists[0][3] == 'bckg':
        return [0], [0]
    else:
        onsets = []
        offsets = []
        for idx, label_list in enumerate(label_lists):
            onsets.append(float(label_list[1]))
            offsets.append(float(label_list[2]))
    return onsets, offsets

In [3]:
# specify the directory you want to start from
root_dir = './input/TUSZv2_labels'  # replace this with your directory

df = pd.DataFrame(columns=['filename', 'filepath', 'onsets', 'offsets', 'length'])
# walk through directory structure
for dir_name, subdir_list, file_list in os.walk(root_dir):
    for fname in file_list:
        # check if the file has '.csv_bi' extension
        if fname.endswith('.csv_bi'):
            # construct full file path
            file_path = os.path.join(dir_name, fname)

            # open and read the file
            with open(file_path, 'r') as file:
                labels_lines = file.readlines()
                # print(fname, labels_lines)
                duration = labels_lines[2].split('secs')[0].split('= ')[1]
                label_lists = [line.strip().split(',') for line in labels_lines[6:]]
                onset, offset = get_bckg_seiz_list(label_lists)
                new_record = {"filename": fname.split('.csv_bi')[0],
                           "filepath": "/".join(dir_name.split("/")[3:]),
                           "length": float(duration),
                           "onsets": [onset],
                           "offsets": [offset]}
                df = pd.concat([df, pd.DataFrame(new_record)])

df.sample(n=5)

Unnamed: 0,filename,filepath,onsets,offsets,length
0,aaaaanrp_s007_t004,TUSZv2/edf/train/aaaaanrp/s007_2013_08_08/01_t...,[0],[0],300.0
0,aaaaaool_s007_t002,TUSZv2/edf/dev/aaaaaool/s007_2013_07_22/01_tcp_ar,[0],[0],687.0
0,aaaaanme_s006_t008,TUSZv2/edf/train/aaaaanme/s006_2014_09_23/01_t...,[0],[0],831.0
0,aaaaasvq_s004_t002,TUSZv2/edf/eval/aaaaasvq/s004_2015_04_02/01_tc...,[0],[0],601.0
0,aaaaaoek_s006_t004,TUSZv2/edf/dev/aaaaaoek/s006_2013_03_02/01_tcp_ar,[0],[0],300.0


In [4]:
# specify the directory you want to start from
root_dir = './input/TUSZv2_labels'

fs_df = pd.DataFrame(columns=['filename', 'fs', 'channels'])
# walk through directory structure
for dir_name, subdir_list, file_list in os.walk(root_dir):
    for fname in file_list:
        # check if the file has '.csv_bi' extension
        if fname.endswith('.json'):
            # construct full file path
            file_path = os.path.join(dir_name, fname)

            # open and read the file
            with open(file_path, 'r') as file:
                headers = json.load(file)
                fs_list = []
                channel_list = []
                for ch_header in headers:
                    channel_list.append(ch_header['label'])
                    fs_list.append(ch_header['sample_rate'])

                new_record = {"filename": fname.split('_header.json')[0],
                              "channels": [channel_list],
                              "fs": [fs_list]}
                fs_df = pd.concat([fs_df, pd.DataFrame(new_record)])

fs_df.sample(n=5)

Unnamed: 0,filename,fs,channels
0,aaaaaoya_s008_t002,"[256, 256, 256, 256, 256, 256, 256, 256, 256, ...","[EEG FP1-REF, EEG FP2-REF, EEG F3-REF, EEG F4-..."
0,aaaaaovk_s001_t001,"[256, 256, 256, 256, 256, 256, 256, 256, 256, ...","[EEG FP1-REF, EEG FP2-REF, EEG F3-REF, EEG F4-..."
0,aaaaaplb_s002_t011,"[256, 256, 256, 256, 256, 256, 256, 256, 256, ...","[EEG FP1-REF, EEG FP2-REF, EEG F3-REF, EEG F4-..."
0,aaaaaqkh_s001_t004,"[256, 256, 256, 256, 256, 256, 256, 256, 256, ...","[EEG FP1-REF, EEG FP2-REF, EEG F3-REF, EEG F4-..."
0,aaaaaoek_s031_t013,"[256, 256, 256, 256, 256, 256, 256, 256, 256, ...","[EEG FP1-REF, EEG FP2-REF, EEG F3-REF, EEG F4-..."


In [5]:
# There are duplicated files in eval/aaaaaqvx/s003_2015_08_24 and eval/aaaaaqvx/s010_2015_08_27
fs_df = fs_df.drop_duplicates(subset='filename', keep='first')
df = df.drop_duplicates(subset='filename', keep='first')

In [6]:
fs_df['channels'].drop_duplicates()

0    [EEG FP1-LE, EEG FP2-LE, EEG F3-LE, EEG F4-LE,...
0    [EEG FP1-LE, EEG FP2-LE, EEG F3-LE, EEG F4-LE,...
0    [EEG FP1-REF, EEG FP2-REF, EEG F3-REF, EEG F4-...
0    [EEG FP1-REF, EEG FP2-REF, EEG F3-REF, EEG F4-...
0    [EEG FP1-REF, EEG FP2-REF, EEG F3-REF, EEG F4-...
0    [EEG FP1-REF, EEG FP2-REF, EEG F3-REF, EEG F4-...
0    [EEG FP1-REF, EEG FP2-REF, EEG F3-REF, EEG F4-...
0    [EEG FP1-REF, EEG FP2-REF, EEG F3-REF, EEG F4-...
0    [EEG FP1-LE, EEG FP2-LE, EEG F3-LE, EEG F4-LE,...
0    [EEG FP1-REF, EEG FP2-REF, EEG F3-REF, EEG F4-...
0    [EEG FP1-REF, EEG FP2-REF, EEG F3-REF, EEG F4-...
0    [EEG FP1-REF, EEG FP2-REF, EEG F3-REF, EEG F4-...
0    [EEG FP1-REF, EEG FP2-REF, EEG F3-REF, EEG F4-...
0    [EEG FP1-REF, EEG FP2-REF, EEG F3-REF, EEG F4-...
0    [EEG FP1-REF, EEG FP2-REF, EEG F3-REF, EEG F4-...
0    [EEG FP1-REF, EEG FP2-REF, EEG F3-REF, EEG F4-...
0    [EEG FP1-REF, EEG FP2-REF, EEG F3-REF, EEG F4-...
0    [EEG FP1-REF, EEG FP2-REF, EEG F3-REF, EEG F4-...
0    [EEG 

In [7]:
# Explode the lists in the 'channels' column
df_exploded = fs_df.explode('channels')

# Get the unique values from the 'channels' column
unique_channels = df_exploded['channels'].value_counts()
unique_channels

EEG FP2-REF    6814
EEG FP1-REF    6814
EEG F4-REF     6814
EEG C3-REF     6814
EEG C4-REF     6814
               ... 
ECG EKG-REF      15
EEG OZ-REF       15
PULSE RATE       15
EEG 23-LE        14
EEG 24-LE        14
Name: channels, Length: 202, dtype: int64

In [8]:
bipolar_montage = [('FP1', 'F7'), ('F7', 'T3'), ('T3', 'T5'), ('T5', 'O1'),
                   ('FP1', 'F3'), ('F3', 'C3'), ('C3', 'P3'), ('P3', 'O1'),
                   ('FP2', 'F8'), ('F8', 'T4'), ('T4', 'T6'), ('T6', 'O2'),
                   ('FP2', 'F4'), ('F4', 'C4'), ('C4', 'P4'), ('P4', 'O2'),
                   ('FZ', 'CZ'), ('PZ', 'CZ'), ('C3', 'CZ'), ('C4', 'CZ'),]

def find_index(channels, word):
    for i, channel in enumerate(channels):
        if channel.startswith('EEG {}'.format(word)):
            return i
    return -1

def get_bipolar_montage_index(channels):
    bipolar_montage_index = []
    for x, y in bipolar_montage:
        bipolar_montage_index.append((find_index(channels, x), find_index(channels, y)))

    return bipolar_montage_index

In [9]:
fs_df['bipolar_montage'] = fs_df['channels'].apply(get_bipolar_montage_index)

In [10]:
fs_df.sample(3)

Unnamed: 0,filename,fs,channels,bipolar_montage
0,aaaaandx_s005_t005,"[256, 256, 256, 256, 256, 256, 256, 256, 256, ...","[EEG FP1-REF, EEG FP2-REF, EEG F3-REF, EEG F4-...","[(0, 10), (10, 12), (12, 14), (14, 8), (0, 2),..."
0,aaaaarcs_s003_t004,"[256, 256, 256, 256, 256, 256, 256, 256, 256, ...","[EEG FP1-REF, EEG FP2-REF, EEG F3-REF, EEG F4-...","[(0, 10), (10, 12), (12, 14), (14, 8), (0, 2),..."
0,aaaaamof_s003_t000,"[256, 256, 256, 256, 256, 256, 256, 256, 256, ...","[EEG FP1-REF, EEG FP2-REF, EEG F3-REF, EEG F4-...","[(0, 10), (10, 12), (12, 14), (14, 8), (0, 2),..."


In [11]:
def check_fs_values(row):
    # Get the 'bipolar_montage' and 'fs' columns
    bipolar_montage_list = row['bipolar_montage']
    fs = row['fs']
    # Extract the indices from the 'bipolar_montage' list
    indices = [index for tup in bipolar_montage_list for index in tup if index != -1]

    # Get the unique values of 'fs' where the index is present in 'bipolar_montage'
    unique_fs_values = set([fs[index] for index in indices])

    # Return True if all the unique values are the same, False otherwise
    return len(unique_fs_values) == 1

In [12]:
fs_df['same_elements'] = fs_df.apply(lambda row: check_fs_values(row), axis=1)
fs_df[fs_df['same_elements']!=True]

Unnamed: 0,filename,fs,channels,bipolar_montage,same_elements


In [13]:
fs_df['sampling_frequency'] = fs_df['fs'].apply(lambda x: x[0])

In [14]:
# Remove the 'same_elements' column
fs_df = fs_df.drop('same_elements', axis=1)

# Rename the 'fs' column to 'fs_list'
fs_df = fs_df.rename(columns={'fs': 'fs_list'})
fs_df.sample(n=3)

Unnamed: 0,filename,fs_list,channels,bipolar_montage,sampling_frequency
0,aaaaatvr_s002_t011,"[256, 256, 256, 256, 256, 256, 256, 256, 256, ...","[EEG FP1-REF, EEG FP2-REF, EEG F3-REF, EEG F4-...","[(0, 10), (10, 12), (12, 14), (14, 8), (0, 2),...",256
0,aaaaaiat_s009_t002,"[256, 256, 256, 256, 256, 256, 256, 256, 256, ...","[EEG FP1-REF, EEG FP2-REF, EEG F3-REF, EEG F4-...","[(0, 10), (10, 12), (12, 14), (14, 8), (0, 2),...",256
0,aaaaaiea_s004_t004,"[400, 400, 400, 400, 400, 400, 400, 400, 400, ...","[EEG FP1-REF, EEG FP2-REF, EEG F3-REF, EEG F4-...","[(0, 10), (10, 12), (12, 14), (14, 8), (0, 2),...",400


## Merge the two dataframes

In [15]:
# Check number of rows before merge
rows_before_merge = len(df)

# Merge the DataFrames based on 'filename'
merged_df = pd.merge(df, fs_df, on='filename', how='inner')

# Check number of rows after merge
rows_after_merge = len(merged_df)

# Compare the number of rows
data_missed = rows_before_merge != rows_after_merge
merged_df.sample(n=5)

Unnamed: 0,filename,filepath,onsets,offsets,length,fs_list,channels,bipolar_montage,sampling_frequency
7064,aaaaatba_s004_t000,TUSZv2/edf/eval/aaaaatba/s004_2015_03_11/01_tc...,[0],[0],300.0,"[256, 256, 256, 256, 256, 256, 256, 256, 256, ...","[EEG FP1-REF, EEG FP2-REF, EEG F3-REF, EEG F4-...","[(0, 10), (10, 12), (12, 14), (14, 8), (0, 2),...",256
1550,aaaaamhb_s006_t004,TUSZv2/edf/train/aaaaamhb/s006_2012_05_09/01_t...,[0],[0],960.0,"[512, 512, 512, 512, 512, 512, 512, 512, 512, ...","[EEG FP1-REF, EEG FP2-REF, EEG F3-REF, EEG F4-...","[(0, 10), (10, 12), (12, 14), (14, 8), (0, 2),...",512
4925,aaaaamqq_s010_t000,TUSZv2/edf/dev/aaaaamqq/s010_2012_01_07/01_tcp_ar,[0],[0],300.0,"[256, 256, 256, 256, 256, 256, 256, 256, 256, ...","[EEG FP1-REF, EEG FP2-REF, EEG F3-REF, EEG F4-...","[(0, 10), (10, 12), (12, 14), (14, 8), (0, 2),...",256
4070,aaaaalxt_s002_t008,TUSZv2/edf/train/aaaaalxt/s002_2011_03_25/03_t...,[0],[0],601.0,"[256, 256, 256, 256, 256, 256, 256, 256, 256, ...","[EEG FP1-REF, EEG FP2-REF, EEG F3-REF, EEG F4-...","[(0, 10), (10, 12), (12, 14), (14, 8), (0, 2),...",256
2344,aaaaabnm_s001_t000,TUSZv2/edf/train/aaaaabnm/s001_2003_05_05/02_t...,[0],[0],1217.0,"[250, 250, 250, 250, 250, 250, 250, 250, 250, ...","[EEG FP1-LE, EEG FP2-LE, EEG F3-LE, EEG F4-LE,...","[(0, 12), (12, 14), (14, 16), (16, 10), (0, 2)...",250


In [16]:
# Print the result
print("Any data missed during merging:", data_missed)

Any data missed during merging: False


In [17]:
merged_df['mode'] = merged_df['filepath'].apply(lambda x: x.split('/')[2])
merged_df['patient'] = merged_df['filepath'].apply(lambda x: x.split('/')[3])
merged_df.sample(3)

Unnamed: 0,filename,filepath,onsets,offsets,length,fs_list,channels,bipolar_montage,sampling_frequency,mode,patient
7262,aaaaaghb_s010_t001,TUSZv2/edf/eval/aaaaaghb/s010_2014_07_02/01_tc...,"[10.9989, 84.4329, 204.5445]","[18.5129, 106.0303, 220.697]",311.0,"[256, 256, 256, 256, 256, 256, 256, 256, 256, ...","[EEG FP1-REF, EEG FP2-REF, EEG F3-REF, EEG F4-...","[(0, 10), (10, 12), (12, 14), (14, 8), (0, 2),...",256,eval,aaaaaghb
6417,aaaaaqrs_s007_t005,TUSZv2/edf/dev/aaaaaqrs/s007_2014_02_06/01_tcp_ar,[0],[0],601.0,"[256, 256, 256, 256, 256, 256, 256, 256, 256, ...","[EEG FP1-REF, EEG FP2-REF, EEG F3-REF, EEG F4-...","[(0, 10), (10, 12), (12, 14), (14, 8), (0, 2),...",256,dev,aaaaaqrs
5113,aaaaaoek_s031_t009,TUSZv2/edf/dev/aaaaaoek/s031_2013_08_24/01_tcp_ar,[0],[0],300.0,"[256, 256, 256, 256, 256, 256, 256, 256, 256, ...","[EEG FP1-REF, EEG FP2-REF, EEG F3-REF, EEG F4-...","[(0, 10), (10, 12), (12, 14), (14, 8), (0, 2),...",256,dev,aaaaaoek


## Labeling

In [18]:
def get_labels(x):
    onsets = x['onsets']
    offsets = x['offsets']
    length = x['length']
    labels = np.zeros(int(length // 12), dtype=int)
    if len(offsets) == 1 and offsets[0] == 0:
        return labels

    for on, off in zip(onsets, offsets):
        on_index = int(np.round(on/12))
        off_index = int(np.round(off/12))
        if on_index!=off_index:
            labels[on_index:off_index] = 1
    return labels

merged_df['labels'] = merged_df.apply(get_labels, axis=1)

In [19]:
merged_df.sample(n=5)

Unnamed: 0,filename,filepath,onsets,offsets,length,fs_list,channels,bipolar_montage,sampling_frequency,mode,patient,labels
4903,aaaaaoxa_s002_t000,TUSZv2/edf/dev/aaaaaoxa/s002_2013_03_11/01_tcp_ar,[0],[0],743.0,"[250, 250, 250, 250, 250, 250, 250, 250, 250, ...","[EEG FP1-REF, EEG FP2-REF, EEG F3-REF, EEG F4-...","[(0, 10), (10, 12), (12, 14), (14, 8), (0, 2),...",250,dev,aaaaaoxa,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2535,aaaaaltg_s008_t001,TUSZv2/edf/train/aaaaaltg/s008_2014_08_21/01_t...,[0],[0],300.0,"[256, 256, 256, 256, 256, 256, 256, 256, 256, ...","[EEG FP1-REF, EEG FP2-REF, EEG F3-REF, EEG F4-...","[(0, 10), (10, 12), (12, 14), (14, 8), (0, 2),...",256,train,aaaaaltg,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
4747,aaaaagus_s001_t000,TUSZv2/edf/dev/aaaaagus/s001_2008_03_31/02_tcp_le,[0],[0],488.0,"[250, 250, 250, 250, 250, 250, 250, 250, 250, ...","[EEG FP1-LE, EEG FP2-LE, EEG F3-LE, EEG F4-LE,...","[(0, 12), (12, 14), (14, 16), (16, 10), (0, 2)...",250,dev,aaaaagus,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
484,aaaaacyf_s007_t001,TUSZv2/edf/train/aaaaacyf/s007_2015_03_30/01_t...,[0],[0],51.0,"[256, 256, 256, 256, 256, 256, 256, 256, 256, ...","[EEG FP1-REF, EEG FP2-REF, EEG F3-REF, EEG F4-...","[(0, 10), (10, 12), (12, 14), (14, 8), (0, 2),...",256,train,aaaaacyf,"[0, 0, 0, 0]"
3888,aaaaaqvr_s004_t002,TUSZv2/edf/train/aaaaaqvr/s004_2014_01_23/01_t...,[0],[0],601.0,"[256, 256, 256, 256, 256, 256, 256, 256, 256, ...","[EEG FP1-REF, EEG FP2-REF, EEG F3-REF, EEG F4-...","[(0, 10), (10, 12), (12, 14), (14, 8), (0, 2),...",256,train,aaaaaqvr,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."


In [21]:
# Set 'filename' column as the index
merged_df.set_index('filename', inplace=True)

In [23]:
merged_df.to_json('TUSZv2_info.json')

## Statistics

In [26]:
merged_df.groupby('mode')['sampling_frequency'].value_counts()

mode   sampling_frequency
dev    256                   1526
       250                    163
       400                     75
       512                     36
       1000                    32
eval   256                    831
       1000                    18
       250                     16
train  256                   3013
       250                    930
       400                    580
       512                     94
       1000                    47
Name: sampling_frequency, dtype: int64

In [27]:
def get_seizure_length(x):
    onsets = x['onsets']
    offsets = x['offsets']
    seizure_length = 0
    for x, y in zip(onsets, offsets):
        seizure_length += (y-x)
    return seizure_length

merged_df['seizure_length'] = merged_df.apply(get_seizure_length, axis=1)
merged_df.sample(n=5)

Unnamed: 0_level_0,filepath,onsets,offsets,length,fs_list,channels,bipolar_montage,sampling_frequency,mode,patient,labels,seizure_length
filename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
aaaaaprj_s004_t000,TUSZv2/edf/train/aaaaaprj/s004_2013_08_10/01_t...,[0],[0],300.0,"[256, 256, 256, 256, 256, 256, 256, 256, 256, ...","[EEG FP1-REF, EEG FP2-REF, EEG F3-REF, EEG F4-...","[(0, 10), (10, 12), (12, 14), (14, 8), (0, 2),...",256,train,aaaaaprj,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.0
aaaaaoxa_s006_t001,TUSZv2/edf/dev/aaaaaoxa/s006_2013_03_27/01_tcp_ar,[0],[0],1069.0,"[256, 256, 256, 256, 256, 256, 256, 256, 256, ...","[EEG FP1-REF, EEG FP2-REF, EEG F3-REF, EEG F4-...","[(0, 10), (10, 12), (12, 14), (14, 8), (0, 2),...",256,dev,aaaaaoxa,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.0
aaaaatds_s004_t008,TUSZv2/edf/train/aaaaatds/s004_2015_04_28/01_t...,[0],[0],300.0,"[256, 256, 256, 256, 256, 256, 256, 256, 256, ...","[EEG FP1-REF, EEG FP2-REF, EEG F3-REF, EEG F4-...","[(0, 10), (10, 12), (12, 14), (14, 8), (0, 2),...",256,train,aaaaatds,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.0
aaaaaraf_s003_t003,TUSZv2/edf/eval/aaaaaraf/s003_2014_09_23/01_tc...,[0],[0],327.0,"[256, 256, 256, 256, 256, 256, 256, 256, 256, ...","[EEG FP1-REF, EEG FP2-REF, EEG F3-REF, EEG F4-...","[(0, 10), (10, 12), (12, 14), (14, 8), (0, 2),...",256,eval,aaaaaraf,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.0
aaaaarsm_s002_t007,TUSZv2/edf/train/aaaaarsm/s002_2014_09_16/01_t...,[91.2652],[180.0936],276.0,"[256, 256, 256, 256, 256, 256, 256, 256, 256, ...","[EEG FP1-REF, EEG FP2-REF, EEG F3-REF, EEG F4-...","[(0, 10), (10, 12), (12, 14), (14, 8), (0, 2),...",256,train,aaaaarsm,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, ...",88.8284


In [34]:
merged_df.groupby('mode')['seizure_length'].sum()/3600

mode
dev      18.586235
eval      7.568529
train    47.943315
Name: seizure_length, dtype: float64

In [30]:
merged_df['seiz_window_num'] = merged_df['labels'].apply(lambda x: np.sum(x))
merged_df.sample(n=5)

Unnamed: 0_level_0,filepath,onsets,offsets,length,fs_list,channels,bipolar_montage,sampling_frequency,mode,patient,labels,seizure_length,seiz_window_num
filename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
aaaaaroo_s004_t004,TUSZv2/edf/train/aaaaaroo/s004_2014_08_13/01_t...,"[129.0345, 254.0817]","[171.1462, 405.9846]",601.0,"[256, 256, 256, 256, 256, 256, 256, 256, 256, ...","[EEG FP1-REF, EEG FP2-REF, EEG F3-REF, EEG F4-...","[(0, 10), (10, 12), (12, 14), (14, 8), (0, 2),...",256,train,aaaaaroo,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, ...",194.0146,16
aaaaahxq_s004_t001,TUSZv2/edf/train/aaaaahxq/s004_2009_03_15/02_t...,[0],[0],315.0,"[250, 250, 250, 250, 250, 250, 250, 250, 250, ...","[EEG FP1-LE, EEG FP2-LE, EEG F3-LE, EEG F4-LE,...","[(0, 12), (12, 14), (14, 16), (16, 10), (0, 2)...",250,train,aaaaahxq,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.0,0
aaaaaqvx_s007_t001,TUSZv2/edf/eval/aaaaaqvx/s007_2015_08_27/01_tc...,[0],[0],24.0,"[250, 250, 250, 250, 250, 250, 250, 250, 250, ...","[EEG FP1-REF, EEG FP2-REF, EEG F3-REF, EEG F4-...","[(0, 10), (10, 12), (12, 14), (14, 8), (0, 2),...",250,eval,aaaaaqvx,"[0, 0]",0.0,0
aaaaambs_s009_t004,TUSZv2/edf/train/aaaaambs/s009_2015_08_26/01_t...,[0],[0],601.0,"[256, 256, 256, 256, 256, 256, 256, 256, 256, ...","[EEG FP1-REF, EEG FP2-REF, EEG F3-REF, EEG F4-...","[(0, 10), (10, 12), (12, 14), (14, 8), (0, 2),...",256,train,aaaaambs,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.0,0
aaaaaool_s008_t008,TUSZv2/edf/dev/aaaaaool/s008_2013_07_23/01_tcp_ar,[0],[0],601.0,"[256, 256, 256, 256, 256, 256, 256, 256, 256, ...","[EEG FP1-REF, EEG FP2-REF, EEG F3-REF, EEG F4-...","[(0, 10), (10, 12), (12, 14), (14, 8), (0, 2),...",256,dev,aaaaaool,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.0,0


In [32]:
merged_df.groupby('mode')['seiz_window_num'].sum() * 12

mode
dev       70980
eval      27360
train    174504
Name: seiz_window_num, dtype: int64

In [33]:
merged_df.groupby('mode')['patient'].nunique()

mode
dev       53
eval      43
train    579
Name: patient, dtype: int64

## Scoring

In [41]:
import epilepsy_performance_metrics.src.timescoring.annotations as annotation

Annotation(events=[], mask=array([False]), fs=0.08333333333333333)

In [50]:
merged_df['events'] = merged_df['labels'].apply(lambda x: annotation.Annotation(x, 1/12).events if len(x)> 1 else [])

## Number of Event

In [62]:
merged_df.groupby('mode')['events'].sum().apply(len)

mode
dev       944
eval      410
train    2127
Name: events, dtype: int64

In [63]:
merged_df.sample(n=5)

Unnamed: 0_level_0,filepath,onsets,offsets,length,fs_list,channels,bipolar_montage,sampling_frequency,mode,patient,labels,seizure_length,seiz_window_num,events
filename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
aaaaaict_s003_t000,TUSZv2/edf/dev/aaaaaict/s003_2011_01_05/01_tcp_ar,"[301.8333, 1158.7778]","[323.8333, 1170.3889]",1228.0,"[250, 250, 250, 250, 250, 250, 250, 250, 250, ...","[EEG FP1-REF, EEG FP2-REF, EEG F3-REF, EEG F4-...","[(0, 10), (10, 12), (12, 14), (14, 8), (0, 2),...",250,dev,aaaaaict,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",33.6111,3,"[(300.0, 324.0), (1164.0, 1176.0)]"
aaaaasfw_s003_t009,TUSZv2/edf/eval/aaaaasfw/s003_2015_04_06/01_tc...,[0],[0],601.0,"[256, 256, 256, 256, 256, 256, 256, 256, 256, ...","[EEG FP1-REF, EEG FP2-REF, EEG F3-REF, EEG F4-...","[(0, 10), (10, 12), (12, 14), (14, 8), (0, 2),...",256,eval,aaaaasfw,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.0,0,[]
aaaaakmw_s003_t011,TUSZv2/edf/train/aaaaakmw/s003_2010_11_02/01_t...,[0],[0],60.0,"[250, 250, 250, 250, 250, 250, 250, 250, 250, ...","[EEG FP1-REF, EEG FP2-REF, EEG F3-REF, EEG F4-...","[(0, 10), (10, 12), (12, 14), (14, 8), (0, 2),...",250,train,aaaaakmw,"[0, 0, 0, 0, 0]",0.0,0,[]
aaaaaiwu_s002_t002,TUSZv2/edf/train/aaaaaiwu/s002_2009_08_07/03_t...,[0],[0],55.0,"[400, 400, 400, 400, 400, 400, 400, 400, 400, ...","[EEG FP1-REF, EEG FP2-REF, EEG F3-REF, EEG F4-...","[(0, 10), (10, 12), (12, 14), (14, 8), (0, 2),...",400,train,aaaaaiwu,"[0, 0, 0, 0]",0.0,0,[]
aaaaakoe_s002_t002,TUSZv2/edf/train/aaaaakoe/s002_2010_10_12/03_t...,"[1.0, 110.2422, 235.834, 406.9023, 884.1563]","[80.1758, 207.8164, 329.8242, 476.1484, 917.0]",918.0,"[256, 256, 256, 256, 256, 256, 256, 256, 256, ...","[EEG FP1-REF, EEG FP2-REF, EEG F3-REF, EEG F4-...","[(0, 10), (10, 12), (12, 14), (14, 8), (0, 2),...",256,train,aaaaakoe,"[1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, ...",372.83,30,"[(0, 84.0), (108.0, 204.0), (240.0, 324.0), (4..."
