### Imports

In [28]:
from mne.io import read_raw_edf
import numpy as np
import os
import pandas as pd

### CSV Creation

In [29]:
main_path = '/Users/bryanmcelvy/Documents/physionet.org/files/chbmit/1.0.0'

patient_data = pd.DataFrame(columns="start,end".split(','))
patient_data.rename_axis("filename", axis="index")
patient_data.head()

Unnamed: 0,start,end


In [30]:
''' Patient 01 '''

patient = 'chb01'

# Create lists of seizure start/end times paired with filenames
filenames = []
starts = list(-1 * np.ones(shape=44, dtype=int))
ends = list(-1 * np.ones(shape=44, dtype=int))

for n in list(np.linspace(start=1, stop=44, num=44, dtype=int)):
    if n == 3: starts[n-1], ends[n-1] = 2996, 3036
    elif n == 4: starts[n-1], ends[n-1] = 1467, 1494
    elif n == 15: starts[n-1], ends[n-1] = 1732, 1772
    elif n == 16: starts[n-1], ends[n-1] = 1015, 1066
    elif n == 18: starts[n-1], ends[n-1] = 1720, 1810
    elif n == 21: starts[n-1], ends[n-1] = 327, 420
    elif n == 26: starts[n-1], ends[n-1] = 1862, 1963
    
    if n <= 9: file_num = "0" + str(n)
    elif n == 44: file_num = str(46) # No file 44 or file 45
    else: file_num = str(n)
    filenames.append(patient + f"_{file_num}.edf")

# Save to DataFrame
temp_df = pd.DataFrame(columns = patient_data.keys(), index=filenames)
temp_df["start"], temp_df["end"] = starts, ends
patient_data = pd.concat([patient_data, temp_df])

temp_df.head()

Unnamed: 0,start,end
chb01_01.edf,-1,-1
chb01_02.edf,-1,-1
chb01_03.edf,2996,3036
chb01_04.edf,1467,1494
chb01_05.edf,-1,-1


In [31]:
''' Patient 02 '''

patient = 'chb02'

# Create lists of seizure start/end times paired with filenames
filenames = []
starts = list(-1 * np.ones(shape=35, dtype=int))
ends = list(-1 * np.ones(shape=35, dtype=int))

for n in list(np.linspace(start=1, stop=35, num=35, dtype=int)):
    if n == 16: starts[n-1], ends[n-1] = 130, 212
    elif n == 19: starts[n-1], ends[n-1] = 3369, 3378
    
    if n <= 9: file_num = "0" + str(n)
    else: file_num = str(n)
    filenames.append(patient + f"_{file_num}.edf")

# Add file 16+
filenames.append(patient + f"_16+.edf")
starts.append(2972)
ends.append(3053)

# Save to DataFrame
temp_df = pd.DataFrame(columns = patient_data.keys(), index=filenames)
temp_df["start"], temp_df["end"] = starts, ends
temp_df.sort_index(axis='index', inplace=True)

patient_data = pd.concat([patient_data, temp_df])

temp_df.head()

Unnamed: 0,start,end
chb02_01.edf,-1,-1
chb02_02.edf,-1,-1
chb02_03.edf,-1,-1
chb02_04.edf,-1,-1
chb02_05.edf,-1,-1


In [32]:
''' Patient 03 '''

patient = 'chb03'

# Create lists of seizure start/end times paired with filenames
filenames = []
starts = list(-1 * np.ones(shape=38, dtype=int))
ends = list(-1 * np.ones(shape=38, dtype=int))

for n in list(np.linspace(start=1, stop=38, num=38, dtype=int)):
    if n == 1: starts[n-1], ends[n-1] = 362, 414
    elif n == 2: starts[n-1], ends[n-1] = 731, 796
    elif n == 3: starts[n-1], ends[n-1] = 432, 501
    elif n == 4: starts[n-1], ends[n-1] = 2162, 2214
    elif n == 34: starts[n-1], ends[n-1] = 1982, 2029
    elif n == 35: starts[n-1], ends[n-1] = 2592, 2656
    elif n == 36: starts[n-1], ends[n-1] = 1725, 1778
    
    if n <= 9: file_num = "0" + str(n)
    else: file_num = str(n)
    filenames.append(patient + f"_{file_num}.edf")
    
# Save to DataFrame
temp_df = pd.DataFrame(columns = patient_data.keys(), index=filenames)
temp_df["start"], temp_df["end"] = starts, ends
temp_df.sort_index(axis='index', inplace=True)

patient_data = pd.concat([patient_data, temp_df])

temp_df.head()

Unnamed: 0,start,end
chb03_01.edf,362,414
chb03_02.edf,731,796
chb03_03.edf,432,501
chb03_04.edf,2162,2214
chb03_05.edf,-1,-1


In [33]:
''' Patient 04 '''

patient = 'chb04'

# Create lists of seizure start/end times paired with filenames
filenames = []
starts = list(-1 * np.ones(shape=43, dtype=int))
ends = list(-1 * np.ones(shape=43, dtype=int))

for n in list(np.linspace(start=1, stop=43, num=43, dtype=int)):
    if n == 5: starts[n-1], ends[n-1] = 7804, 7853
    elif n == 8: starts[n-1], ends[n-1] = 6646, 6657
    elif n == 28: starts[n-1], ends[n-1] = 1679, 1781
    
    if n <= 9: file_num = "0" + str(n)
    else: file_num = str(n)
    filenames.append(patient + f"_{file_num}.edf")

# Add second seizure from file 28
filenames.append(patient + "_28.edf")
starts.append(3782); ends.append(3898)

# Save to DataFrame
temp_df = pd.DataFrame(columns = patient_data.keys(), index=filenames)
temp_df["start"], temp_df["end"] = starts, ends
temp_df.sort_index(axis='index', inplace=True)

patient_data = pd.concat([patient_data, temp_df])

temp_df.head()

Unnamed: 0,start,end
chb04_01.edf,-1,-1
chb04_02.edf,-1,-1
chb04_03.edf,-1,-1
chb04_04.edf,-1,-1
chb04_05.edf,7804,7853


In [34]:
''' Patient 05 '''

patient = 'chb05'

# Create lists of seizure start/end times paired with filenames
filenames = []
starts = list(-1 * np.ones(shape=39, dtype=int))
ends = list(-1 * np.ones(shape=39, dtype=int))

for n in list(np.linspace(start=1, stop=39, num=39, dtype=int)):
    if n == 6: starts[n-1], ends[n-1] = 417, 532
    elif n == 13: starts[n-1], ends[n-1] = 1086, 1196
    elif n == 16: starts[n-1], ends[n-1] = 2317, 2413
    elif n == 17: starts[n-1], ends[n-1] = 2451, 2571
    elif n == 22: starts[n-1], ends[n-1] = 2348, 2465
    
    if n <= 9: file_num = "0" + str(n)
    else: file_num = str(n)
    filenames.append(patient + f"_{file_num}.edf")

# Save to DataFrame
temp_df = pd.DataFrame(columns = patient_data.keys(), index=filenames)
temp_df["start"], temp_df["end"] = starts, ends
temp_df.sort_index(axis='index', inplace=True)

patient_data = pd.concat([patient_data, temp_df])

temp_df.head()

Unnamed: 0,start,end
chb05_01.edf,-1,-1
chb05_02.edf,-1,-1
chb05_03.edf,-1,-1
chb05_04.edf,-1,-1
chb05_05.edf,-1,-1


In [35]:
''' Patient 06 '''

patient = 'chb06'

# Create lists of seizure start/end times paired with filenames
filenames = []
starts = list(-1 * np.ones(shape=24, dtype=int))
ends = list(-1 * np.ones(shape=24, dtype=int))

for n in list(np.linspace(start=1, stop=24, num=24, dtype=int)):
    if n == 1: starts[n-1], ends[n-1] = 1724, 1738
    elif n == 4: starts[n-1], ends[n-1] = 327, 347
    elif n == 9: starts[n-1], ends[n-1] = 12500, 12516
    elif n == 10: starts[n-1], ends[n-1] = 10833, 10845
    elif n == 13: starts[n-1], ends[n-1] = 506, 519
    elif n == 18: starts[n-1], ends[n-1] = 7799, 7811
    elif n == 24: starts[n-1], ends[n-1] = 9387, 9403
    
    if n <= 9: file_num = "0" + str(n)
    else: file_num = str(n)
    filenames.append(patient + f"_{file_num}.edf")

# Add additional seizures from file 01
filenames.append(patient + "_01.edf")
starts.append(7461); ends.append(7476)

filenames.append(patient + "_01.edf")
starts.append(13525); ends.append(13540)

# Add additional seizure from file 04
filenames.append(patient + "_04.edf")
starts.append(6211); ends.append(6231)

# Save to DataFrame
temp_df = pd.DataFrame(columns = patient_data.keys(), index=filenames)
temp_df["start"], temp_df["end"] = starts, ends
temp_df.sort_index(axis='index', inplace=True)

# Delete empty rows
temp_df.drop(patient+"_11.edf", inplace=True)
for i in range(19,24): temp_df.drop(f"{patient}_{i}.edf", inplace=True)
temp_df.head()

patient_data = pd.concat([patient_data, temp_df])

In [36]:
''' Patient 07 '''

patient = 'chb07'

# Create lists of seizure start/end times paired with filenames
filenames = []
starts = list(-1 * np.ones(shape=19, dtype=int))
ends = list(-1 * np.ones(shape=19, dtype=int))

for n in list(np.linspace(start=1, stop=19, num=19, dtype=int)):
    if n == 12: starts[n-1], ends[n-1] = 4920, 5006
    elif n == 13: starts[n-1], ends[n-1] = 3285, 3381
    elif n == 19: starts[n-1], ends[n-1] = 13688, 13831
    
    if n <= 9: file_num = "0" + str(n)
    else: file_num = str(n)
    filenames.append(patient + f"_{file_num}.edf")

# Save to DataFrame
temp_df = pd.DataFrame(columns = patient_data.keys(), index=filenames)
temp_df["start"], temp_df["end"] = starts, ends
temp_df.sort_index(axis='index', inplace=True)
temp_df.head()

patient_data = pd.concat([patient_data, temp_df])

In [None]:
''' Patient 08 '''

patient = 'chb08'

# Create lists of seizure start/end times paired with filenames
filenames = []
starts = list(-1 * np.ones(shape=29, dtype=int))
ends = list(-1 * np.ones(shape=29, dtype=int))

for n in list(np.linspace(start=1, stop=19, num=19, dtype=int)):
    if n == 2: starts[n-1], ends[n-1] = 2670, 2841
    elif n == 5: starts[n-1], ends[n-1] = 2856, 3046
    elif n == 11: starts[n-1], ends[n-1] = 2988, 3122
    elif n == 13: starts[n-1], ends[n-1] = 2417, 2577
    elif n == 21: starts[n-1], ends[n-1] = 2083, 2347
    
    if n <= 9: file_num = "0" + str(n)
    else: file_num = str(n)
    filenames.append(patient + f"_{file_num}.edf")

# Save to DataFrame
temp_df = pd.DataFrame(columns = patient_data.keys(), index=filenames)
temp_df["start"], temp_df["end"] = starts, ends
temp_df.sort_index(axis='index', inplace=True)

# Delete empty rows
temp_df.drop(patient+"_1.edf", inplace=True)
for i in range(len(start)): temp_df.drop(f"{patient}_{i}.edf", inplace=True)
temp_df.head()

patient_data = pd.concat([patient_data, temp_df])

In [37]:
patient_data.describe()

Unnamed: 0,start,end
count,243,243
unique,36,37
top,-1,-1
freq,197,197


In [38]:
patient_data.to_csv("patient_data.csv")

### Data CSV Generation

In [None]:
file_path = '/Users/bryanmcelvy/Documents/physionet.org/files/chbmit/1.0.0/chb01'
file = read_raw_edf(input_fname = file_path + '/chb01_03' + '.edf', preload=False, verbose='ERROR')
ch_names = file.ch_names
fs = int(file.info['sfreq'])

In [None]:
hour3 = (np.arange(2996, 3037, step=1/fs) * int(fs)).astype(int)
hour4 = (np.arange(1467, 1495, step=1/fs) * int(fs)).astype(int)
hour15 = (np.arange(1732, 1773, step=1/fs) * int(fs)).astype(int)
hour16 = (np.arange(1015, 1067, step=1/fs) * int(fs)).astype(int)
hour18 = (np.arange(1720, 1811, step=1/fs) * int(fs)).astype(int)
hour21 = (np.arange(327, 421, step=1/fs) * int(fs)).astype(int)
hour26 = (np.arange(1862, 1964, step=1/fs) * int(fs)).astype(int)

In [None]:
num_true = len(hour3) + len(hour4) + len(hour15) + len(hour16) + len(hour18) + len(hour21) + len(hour26)
num_true

In [None]:
# Import data
file_path = '/Users/bryanmcelvy/Documents/physionet.org/files/chbmit/1.0.0/chb01'
file = read_raw_edf(input_fname = file_path + '/chb01_03' + '.edf', preload=False, verbose='ERROR')
x_temp1 = pd.DataFrame(np.transpose(file.get_data()), columns = file.ch_names)

# Annotate rows (0 – Normal, 1 – Seizure)
x_temp1.insert(loc=0, column='State', value = [1 if 2996 <= (i / fs) < 3037 else 0 for i in range(len(x_temp1))])

# Separate Class 0 and Class 1 samples
x_temp2 = pd.DataFrame(x_temp1.loc[x_temp1['State']==1, :])
x_temp1 = x_temp1.drop(index=x_temp1.loc[x_temp1['State']==1, :].index)

# Undersample Class 0 samples via random selection to create balanced class ratio
time = pd.Series([i for i in range(3600) if i < 2996 or i >= 3037])
time = time.sample(n=int(len(x_temp2)/fs), replace=False).sort_values().multiply(fs)
for t in time:
    x_temp2 = pd.concat([x_temp2, x_temp1.iloc[t:t+256, :]])

x_data = x_temp2.copy().reset_index(drop=True)
print(x_data.shape)

In [None]:
# Import data
file_path = '/Users/bryanmcelvy/Documents/physionet.org/files/chbmit/1.0.0/chb01'
file = read_raw_edf(input_fname = file_path + '/chb01_04' + '.edf', preload=False, verbose='ERROR')
x_temp1 = pd.DataFrame(np.transpose(file.get_data()), columns = file.ch_names)

# Annotate rows (Class 0 – Normal, Class 1 – Seizure)
x_temp1.insert(loc=0, column='State', value = [1 if 1467 <= (i / fs) < 1495 else 0 for i in range(len(x_temp1))])

# Separate Class 0 and Class 1 samples
x_temp2 = pd.DataFrame(x_temp1.loc[x_temp1['State']==1, :]) # Isolate Class 1 samples
x_temp1 = x_temp1.drop(index=x_temp1.loc[x_temp1['State']==1, :].index) # Remove Class 1 samples

# Undersample Class 0 samples via random selection to create balanced class ratio
time = pd.Series([i for i in range(3600) if i < 1467 or i >= 1495])
time = time.sample(n=int(len(x_temp2)/fs), replace=False).sort_values().multiply(fs)
for t in time:
    x_temp2 = pd.concat([x_temp2, x_temp1.iloc[t:t+256, :]])

x_data = pd.concat([x_data, x_temp2]).reset_index(drop=True)
print(x_data.shape)

In [None]:
# Import data
file_path = '/Users/bryanmcelvy/Documents/physionet.org/files/chbmit/1.0.0/chb01'
file = read_raw_edf(input_fname = file_path + '/chb01_15' + '.edf', preload=False, verbose='ERROR')
x_temp1 = pd.DataFrame(np.transpose(file.get_data()), columns = file.ch_names)

# Annotate rows (Class 0 – Normal, Class 1 – Seizure)
x_temp1.insert(loc=0, column='State', value = [1 if 1732 <= (i / fs) < 1773 else 0 for i in range(len(x_temp1))])

# Separate Class 0 and Class 1 samples
x_temp2 = pd.DataFrame(x_temp1.loc[x_temp1['State']==1, :]) # Isolate Class 1 samples
x_temp1 = x_temp1.drop(index=x_temp1.loc[x_temp1['State']==1, :].index) # Remove Class 1 samples

# Undersample Class 0 samples via random selection to create balanced class ratio
time = pd.Series([i for i in range(3600) if i < 1732 or i >= 1773])
time = time.sample(n=int(len(x_temp2)/fs), replace=False).sort_values().multiply(fs)
for t in time:
    x_temp2 = pd.concat([x_temp2, x_temp1.iloc[t:t+256, :]])

x_data = pd.concat([x_data, x_temp2]).reset_index(drop=True)
print(x_data.shape)

In [None]:
# Import data
file_path = '/Users/bryanmcelvy/Documents/physionet.org/files/chbmit/1.0.0/chb01'
file = read_raw_edf(input_fname = file_path + '/chb01_16' + '.edf', preload=False, verbose='ERROR')
x_temp1 = pd.DataFrame(np.transpose(file.get_data()), columns = file.ch_names)

# Annotate rows (Class 0 – Normal, Class 1 – Seizure)
x_temp1.insert(loc=0, column='State', value = [1 if 1015 <= (i / fs) < 1067 else 0 for i in range(len(x_temp1))])

# Separate Class 0 and Class 1 samples
x_temp2 = pd.DataFrame(x_temp1.loc[x_temp1['State']==1, :]) # Isolate Class 1 samples
x_temp1 = x_temp1.drop(index=x_temp1.loc[x_temp1['State']==1, :].index) # Remove Class 1 samples

# Undersample Class 0 samples via random selection to create balanced class ratio
time = pd.Series([i for i in range(3600) if i < 1015 or i >= 1067])
time = time.sample(n=int(len(x_temp2)/fs), replace=False).sort_values().multiply(fs)
for t in time:
    x_temp2 = pd.concat([x_temp2, x_temp1.iloc[t:t+256, :]])

x_data = pd.concat([x_data, x_temp2]).reset_index(drop=True)
print(x_data.shape)

In [None]:
# Import data
file_path = '/Users/bryanmcelvy/Documents/physionet.org/files/chbmit/1.0.0/chb01'
file = read_raw_edf(input_fname = file_path + '/chb01_18' + '.edf', preload=False, verbose='ERROR')
x_temp1 = pd.DataFrame(np.transpose(file.get_data()), columns = file.ch_names)

# Annotate rows (Class 0 – Normal, Class 1 – Seizure)
x_temp1.insert(loc=0, column='State', value = [1 if 1720 <= (i / fs) < 1811 else 0 for i in range(len(x_temp1))])

# Separate Class 0 and Class 1 samples
x_temp2 = pd.DataFrame(x_temp1.loc[x_temp1['State']==1, :]) # Isolate Class 1 samples
x_temp1 = x_temp1.drop(index=x_temp1.loc[x_temp1['State']==1, :].index) # Remove Class 1 samples

# Undersample Class 0 samples via random selection to create balanced class ratio
time = pd.Series([i for i in range(3600) if i < 1720 or i >= 1811])
time = time.sample(n=int(len(x_temp2)/fs), replace=False).sort_values().multiply(fs)
for t in time:
    x_temp2 = pd.concat([x_temp2, x_temp1.iloc[t:t+256, :]])

x_data = pd.concat([x_data, x_temp2]).reset_index(drop=True)
print(x_data.shape)

In [None]:
# Import data
file_path = '/Users/bryanmcelvy/Documents/physionet.org/files/chbmit/1.0.0/chb01'
file = read_raw_edf(input_fname = file_path + '/chb01_21' + '.edf', preload=False, verbose='ERROR')
x_temp1 = pd.DataFrame(np.transpose(file.get_data()), columns = file.ch_names)

# Annotate rows (Class 0 – Normal, Class 1 – Seizure)
x_temp1.insert(loc=0, column='State', value = [1 if 327 <= (i / fs) < 421 else 0 for i in range(len(x_temp1))])

# Separate Class 0 and Class 1 samples
x_temp2 = pd.DataFrame(x_temp1.loc[x_temp1['State']==1, :]) # Isolate Class 1 samples
x_temp1 = x_temp1.drop(index=x_temp1.loc[x_temp1['State']==1, :].index) # Remove Class 1 samples

# Undersample Class 0 samples via random selection to create balanced class ratio
time = pd.Series([i for i in range(3600) if i < 327 or i >= 421])
time = time.sample(n=int(len(x_temp2)/fs), replace=False).sort_values().multiply(fs)
for t in time:
    x_temp2 = pd.concat([x_temp2, x_temp1.iloc[t:t+256, :]])

x_data = pd.concat([x_data, x_temp2]).reset_index(drop=True)
print(x_data.shape)

In [None]:
# Import data
file_path = '/Users/bryanmcelvy/Documents/physionet.org/files/chbmit/1.0.0/chb01'
file = read_raw_edf(input_fname = file_path + '/chb01_26' + '.edf', preload=False, verbose='ERROR')
x_temp1 = pd.DataFrame(np.transpose(file.get_data()), columns = file.ch_names)

# Annotate rows (Class 0 – Normal, Class 1 – Seizure)
x_temp1.insert(loc=0, column='State', value = [1 if 1862 <= (i / fs) < 1964 else 0 for i in range(len(x_temp1))])

# Separate Class 0 and Class 1 samples
x_temp2 = pd.DataFrame(x_temp1.loc[x_temp1['State']==1, :]) # Isolate Class 1 samples
x_temp1 = x_temp1.drop(index=x_temp1.loc[x_temp1['State']==1, :].index) # Remove Class 1 samples

# Undersample Class 0 samples via random selection to create balanced class ratio
time = pd.Series([i for i in range(3600) if i < 1862 or i >= 1964])
time = time.sample(n=int(len(x_temp2)/fs), replace=False).sort_values().multiply(fs)
for t in time:
    x_temp2 = pd.concat([x_temp2, x_temp1.iloc[t:t+256, :]])

x_data = pd.concat([x_data, x_temp2]).reset_index(drop=True)
print(x_data.shape)

In [None]:
x_data.iloc[:, 1:] = x_data.iloc[:, 1:].multiply(1E6)
x_data

In [None]:
x_data.to_csv(path_or_buf='data.csv', index=False)