In [54]:
import mne

import pandas as pd
import numpy as np

from utils_eeg.data_loader import DataLoader as EEGDataLoader
from utils_eeg.tuh_eeg_utils import tuh_eeg_apply_TUH_bipolar_montage, tuh_eeg_load_data_with_annotations, MONTAGE_ALL
print(MONTAGE_ALL)
PATH_TRAINING = "/home/Bachelor-Thesis-JLB/TUH_EEG_SEIZ/edf/train"
PATH_TEST = "/home/Bachelor-Thesis-JLB/TUH_EEG_SEIZ/edf/dev"
config_dataset = {
    "extensions": [".edf", ".csv_bi"],
    "l_freq": 0.5,
    "h_freq": 50,
    "resample_freq": 50,
}

['FP1-F7', 'F7-T3', 'T3-T5', 'T5-O1', 'FP2-F8', 'F8-T4', 'T4-T6', 'T6-O2', 'T3-C3', 'C3-CZ', 'CZ-C4', 'C4-T4', 'FP1-F3', 'F3-C3', 'C3-P3', 'P3-O1', 'FP2-F4', 'F4-C4', 'C4-P4', 'P4-O2']


In [55]:
eeg_data_loader = EEGDataLoader(PATH_TRAINING, extensions=[".edf",".csv_bi"]).file_tuples

In [66]:


def preprocess_eeg_data(file_path):
    raw = tuh_eeg_load_data_with_annotations(edf_file=file_path[0], annotations_csv_file=file_path[1])

    # apply bipolar montage
    raw = tuh_eeg_apply_TUH_bipolar_montage(raw, file_path[0], only_return_bipolar=True)

    # only select channels that are included in all recordings
    raw = raw.pick(MONTAGE_ALL)

    # filter data
    raw = raw.filter(l_freq=config_dataset["l_freq"], h_freq=config_dataset["h_freq"], fir_design='firwin', method='fir', verbose=False)

    # resample data
    raw = raw.resample(config_dataset["resample_freq"], npad='auto', verbose=False)

    #  further processing

    return raw


def add_annotations(df, annotation):
    df["OT"] = 0
    for row in annotation.iterrows():
        # print(row[1])
        if row[1]["label"] == "seiz":
            df.loc[(df["time"] >= row[1]["start_time"]) & (df["time"] <= row[1]["stop_time"]), "OT"] = 1
    return df
    

In [74]:
df = None
count = 0
eeg_data_loader = EEGDataLoader(PATH_TRAINING, extensions=[".edf",".csv_bi"]).file_tuples
# interate through eeg_data_loader in random order
np.random.seed(42)
eeg_data_loader = np.array(eeg_data_loader)
np.random.shuffle(eeg_data_loader)


for idx, (eeg, annotation) in enumerate(eeg_data_loader):
    # print(eeg, annotation)
    df_annotation = pd.read_csv(annotation, skiprows=5)
    # display(df_annotation.head())
    # display(df_annotation["label"].values)
    if "seiz" not in df_annotation["label"].values:
        continue
    print(f"Seizure on {idx}")
    count += 1
    # display(df_annotation.head())
    raw = preprocess_eeg_data((eeg, annotation))

    if df is None:
        df = raw.to_data_frame()
        df = add_annotations(df, df_annotation)
    else:
        df = pd.concat([df, add_annotations(raw.to_data_frame(), df_annotation)])
    
    if count > 10:
        break



df

Seizure on 1
Seizure on 3
Seizure on 15
Seizure on 17
Seizure on 18
Seizure on 26
Seizure on 33
Seizure on 36
Seizure on 45
Seizure on 49
Seizure on 59


Unnamed: 0,time,FP1-F7,F7-T3,T3-T5,T5-O1,FP2-F8,F8-T4,T4-T6,T6-O2,T3-C3,...,C4-T4,FP1-F3,F3-C3,C3-P3,P3-O1,FP2-F4,F4-C4,C4-P4,P4-O2,OT
0,0.00,-0.024861,-0.009711,0.029561,0.031523,0.003674,-0.034006,0.025238,0.014558,0.026063,...,-0.020818,-0.017191,0.008681,0.010458,0.024562,-0.023345,0.013831,0.015163,0.003815,0
1,0.02,0.024829,0.009692,-0.029625,-0.031479,-0.003708,0.033883,-0.025075,-0.014598,-0.025988,...,0.020624,0.017185,-0.008652,-0.010551,-0.024565,0.023311,-0.013761,-0.015188,-0.003861,0
2,0.04,-0.024798,-0.009673,0.029690,0.031436,0.003743,-0.033761,0.024913,0.014637,0.025914,...,-0.020431,-0.017179,0.008622,0.010643,0.024568,-0.023278,0.013690,0.015212,0.003907,0
3,0.06,0.024766,0.009654,-0.029754,-0.031392,-0.003778,0.033639,-0.024751,-0.014676,-0.025839,...,0.020237,0.017173,-0.008593,-0.010736,-0.024571,0.023244,-0.013620,-0.015236,-0.003954,0
4,0.08,-0.024735,-0.009635,0.029818,0.031349,0.003812,-0.033517,0.024588,0.014715,0.025765,...,-0.020043,-0.017168,0.008563,0.010828,0.024574,-0.023211,0.013549,0.015261,0.004000,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
46795,935.90,5275.967885,-1914.593555,603.489320,4471.383912,-1251.651994,-7315.392441,10408.300147,-11943.962785,2086.596315,...,-1846.170395,1686.190829,3761.779815,-985.570691,3973.847608,-3069.027760,-3651.846280,5628.512595,-9010.345628,0
46796,935.92,10295.183686,-5639.677835,169.134000,5974.810500,-712.389547,-10611.057557,10211.813363,-7318.442938,1755.497965,...,-3015.940529,1244.787396,5166.216419,-8128.961113,12517.407649,-3772.145188,-4535.361388,7321.180973,-7443.751077,0
46797,935.94,10805.213405,-4542.159354,1391.468250,380.231866,-2405.476275,-11253.182331,11452.444051,-5871.400918,489.142358,...,-3213.067047,2484.962919,4267.233490,-3324.138890,4606.696647,-5006.757219,-5438.834339,10657.030372,-8289.054286,0
46798,935.96,7391.323468,-2275.075052,-620.357151,2520.253507,2053.970071,-10877.710997,12538.306920,-12005.272638,-497.090960,...,-11.376034,1327.411539,3291.745917,-4531.241714,6928.229030,-3583.659019,-5228.705874,10812.298739,-10290.640491,0


In [75]:
# # change first column to datetime format YYYY/MM/DD 
# df["Unnamed: 0"] = pd.to_datetime(df["Unnamed: 0"], format="%Y-%m-%d")
# df.head()

# # df.to_csv("dataset/seizure/seizure.csv")

# df = pd.read_csv("dataset/seizure/seizure.csv")
# df.head()

In [76]:
df

Unnamed: 0,time,FP1-F7,F7-T3,T3-T5,T5-O1,FP2-F8,F8-T4,T4-T6,T6-O2,T3-C3,...,C4-T4,FP1-F3,F3-C3,C3-P3,P3-O1,FP2-F4,F4-C4,C4-P4,P4-O2,OT
0,0.00,-0.024861,-0.009711,0.029561,0.031523,0.003674,-0.034006,0.025238,0.014558,0.026063,...,-0.020818,-0.017191,0.008681,0.010458,0.024562,-0.023345,0.013831,0.015163,0.003815,0
1,0.02,0.024829,0.009692,-0.029625,-0.031479,-0.003708,0.033883,-0.025075,-0.014598,-0.025988,...,0.020624,0.017185,-0.008652,-0.010551,-0.024565,0.023311,-0.013761,-0.015188,-0.003861,0
2,0.04,-0.024798,-0.009673,0.029690,0.031436,0.003743,-0.033761,0.024913,0.014637,0.025914,...,-0.020431,-0.017179,0.008622,0.010643,0.024568,-0.023278,0.013690,0.015212,0.003907,0
3,0.06,0.024766,0.009654,-0.029754,-0.031392,-0.003778,0.033639,-0.024751,-0.014676,-0.025839,...,0.020237,0.017173,-0.008593,-0.010736,-0.024571,0.023244,-0.013620,-0.015236,-0.003954,0
4,0.08,-0.024735,-0.009635,0.029818,0.031349,0.003812,-0.033517,0.024588,0.014715,0.025765,...,-0.020043,-0.017168,0.008563,0.010828,0.024574,-0.023211,0.013549,0.015261,0.004000,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
46795,935.90,5275.967885,-1914.593555,603.489320,4471.383912,-1251.651994,-7315.392441,10408.300147,-11943.962785,2086.596315,...,-1846.170395,1686.190829,3761.779815,-985.570691,3973.847608,-3069.027760,-3651.846280,5628.512595,-9010.345628,0
46796,935.92,10295.183686,-5639.677835,169.134000,5974.810500,-712.389547,-10611.057557,10211.813363,-7318.442938,1755.497965,...,-3015.940529,1244.787396,5166.216419,-8128.961113,12517.407649,-3772.145188,-4535.361388,7321.180973,-7443.751077,0
46797,935.94,10805.213405,-4542.159354,1391.468250,380.231866,-2405.476275,-11253.182331,11452.444051,-5871.400918,489.142358,...,-3213.067047,2484.962919,4267.233490,-3324.138890,4606.696647,-5006.757219,-5438.834339,10657.030372,-8289.054286,0
46798,935.96,7391.323468,-2275.075052,-620.357151,2520.253507,2053.970071,-10877.710997,12538.306920,-12005.272638,-497.090960,...,-11.376034,1327.411539,3291.745917,-4531.241714,6928.229030,-3583.659019,-5228.705874,10812.298739,-10290.640491,0


In [77]:
# drop time column
df = df.drop(columns=["time"])
df.head()


Unnamed: 0,FP1-F7,F7-T3,T3-T5,T5-O1,FP2-F8,F8-T4,T4-T6,T6-O2,T3-C3,C3-CZ,...,C4-T4,FP1-F3,F3-C3,C3-P3,P3-O1,FP2-F4,F4-C4,C4-P4,P4-O2,OT
0,-0.024861,-0.009711,0.029561,0.031523,0.003674,-0.034006,0.025238,0.014558,0.026063,-0.016778,...,-0.020818,-0.017191,0.008681,0.010458,0.024562,-0.023345,0.013831,0.015163,0.003815,0
1,0.024829,0.009692,-0.029625,-0.031479,-0.003708,0.033883,-0.025075,-0.014598,-0.025988,0.016751,...,0.020624,0.017185,-0.008652,-0.010551,-0.024565,0.023311,-0.013761,-0.015188,-0.003861,0
2,-0.024798,-0.009673,0.02969,0.031436,0.003743,-0.033761,0.024913,0.014637,0.025914,-0.016724,...,-0.020431,-0.017179,0.008622,0.010643,0.024568,-0.023278,0.01369,0.015212,0.003907,0
3,0.024766,0.009654,-0.029754,-0.031392,-0.003778,0.033639,-0.024751,-0.014676,-0.025839,0.016697,...,0.020237,0.017173,-0.008593,-0.010736,-0.024571,0.023244,-0.01362,-0.015236,-0.003954,0
4,-0.024735,-0.009635,0.029818,0.031349,0.003812,-0.033517,0.024588,0.014715,0.025765,-0.016671,...,-0.020043,-0.017168,0.008563,0.010828,0.024574,-0.023211,0.013549,0.015261,0.004,0


In [83]:
# df.to_csv("dataset/seizure/seizure.csv")
df = pd.read_csv("dataset/seizure/seizure.csv")

In [84]:
df

Unnamed: 0.1,Unnamed: 0,FP1-F7,F7-T3,T3-T5,T5-O1,FP2-F8,F8-T4,T4-T6,T6-O2,T3-C3,...,C4-T4,FP1-F3,F3-C3,C3-P3,P3-O1,FP2-F4,F4-C4,C4-P4,P4-O2,OT
0,0,-0.024861,-0.009711,0.029561,0.031523,0.003674,-0.034006,0.025238,0.014558,0.026063,...,-0.020818,-0.017191,0.008681,0.010458,0.024562,-0.023345,0.013831,0.015163,0.003815,0
1,1,0.024829,0.009692,-0.029625,-0.031479,-0.003708,0.033883,-0.025075,-0.014598,-0.025988,...,0.020624,0.017185,-0.008652,-0.010551,-0.024565,0.023311,-0.013761,-0.015188,-0.003861,0
2,2,-0.024798,-0.009673,0.029690,0.031436,0.003743,-0.033761,0.024913,0.014637,0.025914,...,-0.020431,-0.017179,0.008622,0.010643,0.024568,-0.023278,0.013690,0.015212,0.003907,0
3,3,0.024766,0.009654,-0.029754,-0.031392,-0.003778,0.033639,-0.024751,-0.014676,-0.025839,...,0.020237,0.017173,-0.008593,-0.010736,-0.024571,0.023244,-0.013620,-0.015236,-0.003954,0
4,4,-0.024735,-0.009635,0.029818,0.031349,0.003812,-0.033517,0.024588,0.014715,0.025765,...,-0.020043,-0.017168,0.008563,0.010828,0.024574,-0.023211,0.013549,0.015261,0.004000,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
389045,46795,5275.967885,-1914.593555,603.489320,4471.383912,-1251.651994,-7315.392441,10408.300147,-11943.962785,2086.596315,...,-1846.170395,1686.190829,3761.779815,-985.570691,3973.847608,-3069.027760,-3651.846280,5628.512595,-9010.345628,0
389046,46796,10295.183686,-5639.677835,169.134000,5974.810500,-712.389547,-10611.057557,10211.813363,-7318.442938,1755.497965,...,-3015.940529,1244.787396,5166.216419,-8128.961113,12517.407649,-3772.145188,-4535.361388,7321.180973,-7443.751077,0
389047,46797,10805.213405,-4542.159354,1391.468250,380.231866,-2405.476275,-11253.182331,11452.444051,-5871.400918,489.142358,...,-3213.067047,2484.962919,4267.233490,-3324.138890,4606.696647,-5006.757219,-5438.834339,10657.030372,-8289.054286,0
389048,46798,7391.323468,-2275.075052,-620.357151,2520.253507,2053.970071,-10877.710997,12538.306920,-12005.272638,-497.090960,...,-11.376034,1327.411539,3291.745917,-4531.241714,6928.229030,-3583.659019,-5228.705874,10812.298739,-10290.640491,0


In [85]:
# rename first column to date
df = df.rename(columns={"Unnamed: 0": "date"})
df.head()

Unnamed: 0,date,FP1-F7,F7-T3,T3-T5,T5-O1,FP2-F8,F8-T4,T4-T6,T6-O2,T3-C3,...,C4-T4,FP1-F3,F3-C3,C3-P3,P3-O1,FP2-F4,F4-C4,C4-P4,P4-O2,OT
0,0,-0.024861,-0.009711,0.029561,0.031523,0.003674,-0.034006,0.025238,0.014558,0.026063,...,-0.020818,-0.017191,0.008681,0.010458,0.024562,-0.023345,0.013831,0.015163,0.003815,0
1,1,0.024829,0.009692,-0.029625,-0.031479,-0.003708,0.033883,-0.025075,-0.014598,-0.025988,...,0.020624,0.017185,-0.008652,-0.010551,-0.024565,0.023311,-0.013761,-0.015188,-0.003861,0
2,2,-0.024798,-0.009673,0.02969,0.031436,0.003743,-0.033761,0.024913,0.014637,0.025914,...,-0.020431,-0.017179,0.008622,0.010643,0.024568,-0.023278,0.01369,0.015212,0.003907,0
3,3,0.024766,0.009654,-0.029754,-0.031392,-0.003778,0.033639,-0.024751,-0.014676,-0.025839,...,0.020237,0.017173,-0.008593,-0.010736,-0.024571,0.023244,-0.01362,-0.015236,-0.003954,0
4,4,-0.024735,-0.009635,0.029818,0.031349,0.003812,-0.033517,0.024588,0.014715,0.025765,...,-0.020043,-0.017168,0.008563,0.010828,0.024574,-0.023211,0.013549,0.015261,0.004,0


In [86]:
df["date"] = pd.to_datetime("2000-01-01") + pd.to_timedelta(df["date"], unit="m")
display(df)

Unnamed: 0,date,FP1-F7,F7-T3,T3-T5,T5-O1,FP2-F8,F8-T4,T4-T6,T6-O2,T3-C3,...,C4-T4,FP1-F3,F3-C3,C3-P3,P3-O1,FP2-F4,F4-C4,C4-P4,P4-O2,OT
0,2000-01-01 00:00:00,-0.024861,-0.009711,0.029561,0.031523,0.003674,-0.034006,0.025238,0.014558,0.026063,...,-0.020818,-0.017191,0.008681,0.010458,0.024562,-0.023345,0.013831,0.015163,0.003815,0
1,2000-01-01 00:01:00,0.024829,0.009692,-0.029625,-0.031479,-0.003708,0.033883,-0.025075,-0.014598,-0.025988,...,0.020624,0.017185,-0.008652,-0.010551,-0.024565,0.023311,-0.013761,-0.015188,-0.003861,0
2,2000-01-01 00:02:00,-0.024798,-0.009673,0.029690,0.031436,0.003743,-0.033761,0.024913,0.014637,0.025914,...,-0.020431,-0.017179,0.008622,0.010643,0.024568,-0.023278,0.013690,0.015212,0.003907,0
3,2000-01-01 00:03:00,0.024766,0.009654,-0.029754,-0.031392,-0.003778,0.033639,-0.024751,-0.014676,-0.025839,...,0.020237,0.017173,-0.008593,-0.010736,-0.024571,0.023244,-0.013620,-0.015236,-0.003954,0
4,2000-01-01 00:04:00,-0.024735,-0.009635,0.029818,0.031349,0.003812,-0.033517,0.024588,0.014715,0.025765,...,-0.020043,-0.017168,0.008563,0.010828,0.024574,-0.023211,0.013549,0.015261,0.004000,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
389045,2000-02-02 11:55:00,5275.967885,-1914.593555,603.489320,4471.383912,-1251.651994,-7315.392441,10408.300147,-11943.962785,2086.596315,...,-1846.170395,1686.190829,3761.779815,-985.570691,3973.847608,-3069.027760,-3651.846280,5628.512595,-9010.345628,0
389046,2000-02-02 11:56:00,10295.183686,-5639.677835,169.134000,5974.810500,-712.389547,-10611.057557,10211.813363,-7318.442938,1755.497965,...,-3015.940529,1244.787396,5166.216419,-8128.961113,12517.407649,-3772.145188,-4535.361388,7321.180973,-7443.751077,0
389047,2000-02-02 11:57:00,10805.213405,-4542.159354,1391.468250,380.231866,-2405.476275,-11253.182331,11452.444051,-5871.400918,489.142358,...,-3213.067047,2484.962919,4267.233490,-3324.138890,4606.696647,-5006.757219,-5438.834339,10657.030372,-8289.054286,0
389048,2000-02-02 11:58:00,7391.323468,-2275.075052,-620.357151,2520.253507,2053.970071,-10877.710997,12538.306920,-12005.272638,-497.090960,...,-11.376034,1327.411539,3291.745917,-4531.241714,6928.229030,-3583.659019,-5228.705874,10812.298739,-10290.640491,0


In [51]:
# replace all "-" in the date column "/"
df["date"] = df["date"].astype(str).str.replace("-", "/")

In [87]:
df

Unnamed: 0,date,FP1-F7,F7-T3,T3-T5,T5-O1,FP2-F8,F8-T4,T4-T6,T6-O2,T3-C3,...,C4-T4,FP1-F3,F3-C3,C3-P3,P3-O1,FP2-F4,F4-C4,C4-P4,P4-O2,OT
0,2000-01-01 00:00:00,-0.024861,-0.009711,0.029561,0.031523,0.003674,-0.034006,0.025238,0.014558,0.026063,...,-0.020818,-0.017191,0.008681,0.010458,0.024562,-0.023345,0.013831,0.015163,0.003815,0
1,2000-01-01 00:01:00,0.024829,0.009692,-0.029625,-0.031479,-0.003708,0.033883,-0.025075,-0.014598,-0.025988,...,0.020624,0.017185,-0.008652,-0.010551,-0.024565,0.023311,-0.013761,-0.015188,-0.003861,0
2,2000-01-01 00:02:00,-0.024798,-0.009673,0.029690,0.031436,0.003743,-0.033761,0.024913,0.014637,0.025914,...,-0.020431,-0.017179,0.008622,0.010643,0.024568,-0.023278,0.013690,0.015212,0.003907,0
3,2000-01-01 00:03:00,0.024766,0.009654,-0.029754,-0.031392,-0.003778,0.033639,-0.024751,-0.014676,-0.025839,...,0.020237,0.017173,-0.008593,-0.010736,-0.024571,0.023244,-0.013620,-0.015236,-0.003954,0
4,2000-01-01 00:04:00,-0.024735,-0.009635,0.029818,0.031349,0.003812,-0.033517,0.024588,0.014715,0.025765,...,-0.020043,-0.017168,0.008563,0.010828,0.024574,-0.023211,0.013549,0.015261,0.004000,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
389045,2000-02-02 11:55:00,5275.967885,-1914.593555,603.489320,4471.383912,-1251.651994,-7315.392441,10408.300147,-11943.962785,2086.596315,...,-1846.170395,1686.190829,3761.779815,-985.570691,3973.847608,-3069.027760,-3651.846280,5628.512595,-9010.345628,0
389046,2000-02-02 11:56:00,10295.183686,-5639.677835,169.134000,5974.810500,-712.389547,-10611.057557,10211.813363,-7318.442938,1755.497965,...,-3015.940529,1244.787396,5166.216419,-8128.961113,12517.407649,-3772.145188,-4535.361388,7321.180973,-7443.751077,0
389047,2000-02-02 11:57:00,10805.213405,-4542.159354,1391.468250,380.231866,-2405.476275,-11253.182331,11452.444051,-5871.400918,489.142358,...,-3213.067047,2484.962919,4267.233490,-3324.138890,4606.696647,-5006.757219,-5438.834339,10657.030372,-8289.054286,0
389048,2000-02-02 11:58:00,7391.323468,-2275.075052,-620.357151,2520.253507,2053.970071,-10877.710997,12538.306920,-12005.272638,-497.090960,...,-11.376034,1327.411539,3291.745917,-4531.241714,6928.229030,-3583.659019,-5228.705874,10812.298739,-10290.640491,0


In [53]:
df.to_csv("dataset/seizure/seizure.csv", index=False)