In [0]:
!pip install matplotlib pandas numpy scipy seaborn mne
!pip install beautifulsoup4 requests wget
!pip install h5py tables kaggle
!pip install wfdb pyEDFlib

Collecting mne
[?25l  Downloading https://files.pythonhosted.org/packages/01/af/9c64ac8f75b1c932ca5fb16bc27740cd9b9817f9173a6608ae999e82bb6a/mne-0.20.0-py3-none-any.whl (6.5MB)
[K     |████████████████████████████████| 6.6MB 2.8MB/s 
Installing collected packages: mne
Successfully installed mne-0.20.0
Collecting wget
  Downloading https://files.pythonhosted.org/packages/47/6a/62e288da7bcda82b935ff0c6cfe542970f04e29c756b0e147251b2fb251f/wget-3.2.zip
Building wheels for collected packages: wget
  Building wheel for wget (setup.py) ... [?25l[?25hdone
  Created wheel for wget: filename=wget-3.2-cp36-none-any.whl size=9682 sha256=8ecba1ebf47026c862a7fa16cdc6bfa7116120b7c611bbc5f8b64da576dd65e9
  Stored in directory: /root/.cache/pip/wheels/40/15/30/7d8f7cea2902b4db79e3fea550d7d7b85ecb27ef992b618f3f
Successfully built wget
Installing collected packages: wget
Successfully installed wget-3.2
Collecting wfdb
[?25l  Downloading https://files.pythonhosted.org/packages/b2/96/c2200539fdf4f087e

**The Epileptologie Database** 


In [0]:
# colours for printing outputs
class color:
   PURPLE = '\033[95m'
   CYAN = '\033[96m'
   DARKCYAN = '\033[36m'
   BLUE = '\033[94m'
   GREEN = '\033[92m'
   YELLOW = '\033[93m'
   RED = '\033[91m'
   BOLD = '\033[1m'
   UNDERLINE = '\033[4m'
   END = '\033[0m'
  
print(color.BOLD+color.UNDERLINE+'Title'+color.END)
print('Hello World')

[1m[4mTitle[0m
Hello World


In [0]:
import glob            # for file locations
import pprint          # for pretty printing
import re

pp = pprint.PrettyPrinter()

def file_list(folder_path, output=False):
    # create an empty list
    file_list = []
    # for file name in the folder path...
    for filename in glob.glob(folder_path):
        # ... append it to the list
        file_list.append(filename)
        
    # sort alphabetically
    file_list.sort()
    
    # Output
    if output:
        print(str(len(file_list)) + " files found")
        pp.pprint(file_list)
    
    return file_list

In [0]:
import sys
import os
from bs4 import BeautifulSoup
import requests
import re
import wget
import zipfile


def find_files(url):
    # get a soup of the directory url
    soup = BeautifulSoup(requests.get(url).text, features="html.parser")

    # make a list of all the links in the url
    hrefs_list = []
    for link in soup.find_all('a'):
        hrefs_list.append(link.get('href'))

    return hrefs_list
    
    
def download_file(download_file_url, file_dir, output=False):
    if output:
        # print it is downloading
        print('Downloading: '+ download_file_url)
    # download the file to the directory
    wget.download(download_file_url, file_dir)
    
    
# needs a directory to download it to
def download_epileptologie(DIR, output=False):
    
    # directory url
    front_url = 'http://epileptologie-bonn.de/cms/front_content.php?idcat=193&lang=3&changelang=3'
    dir_url = 'http://epileptologie-bonn.de/cms'

    hrefs_dir_list = find_files(front_url)
    
    # for each link in the directory
    for link in hrefs_dir_list:
        # download the files outside of participant folders we want
        if re.findall('zip', str(link)):
            # if the file doesnt already exist in the directory
            if not os.path.exists(os.path.join(DIR, link)):
                download_file(dir_url+'/'+str(link), DIR, output)
                zip_file_name = link.split('/')[-1]
                zip_ref = zipfile.ZipFile(os.path.join(DIR, zip_file_name), 'r')
                zip_ref.extractall(os.path.join(DIR, zip_file_name[0]))
                zip_ref.close()
                os.remove(os.path.join(DIR, zip_file_name))

In [0]:
DOWNLOAD_DIR = "Epileptologie Database"

if not os.path.exists(DOWNLOAD_DIR):
  os.makedirs(DOWNLOAD_DIR)
  download_epileptologie(DOWNLOAD_DIR, output=True)

else:
  print("Already Downloaded")

Downloading: http://epileptologie-bonn.de/cms/upload/workgroup/lehnertz/Z.zip
Downloading: http://epileptologie-bonn.de/cms/upload/workgroup/lehnertz/O.zip
Downloading: http://epileptologie-bonn.de/cms/upload/workgroup/lehnertz/N.zip
Downloading: http://epileptologie-bonn.de/cms/upload/workgroup/lehnertz/F.zip
Downloading: http://epileptologie-bonn.de/cms/upload/workgroup/lehnertz/S.zip


In [0]:

# get folder lists
EPIL_dir_file_list = file_list(os.path.join(DOWNLOAD_DIR, '*'), output=True)

5 files found
['Epileptologie Database/F',
 'Epileptologie Database/N',
 'Epileptologie Database/O',
 'Epileptologie Database/S',
 'Epileptologie Database/Z']


In [0]:
epil_baseline_file = os.path.join(EPIL_dir_file_list[0], 'F060.txt')
epil_seizure_file = os.path.join(EPIL_dir_file_list[3], 'S033.txt')

In [0]:

import pandas as pd     # dataframes
import re

def data_load(file_path, output=False):

    # read in the datafile
    data = pd.read_csv(file_path,                 # file in
                       header=None,               # no column names at top of file
                       dtype=float)               # read data as 'floating points' (e.g. 1.0)

    if output:
        print(color.BOLD+color.UNDERLINE+'\n'+re.findall('\w\d+',file_path)[0]+color.END)
        # Output detailed information on the data
        print(color.BOLD+'\nData Information'+color.END)
        data.info()

        # Output first 5 rows and columns
        print(color.BOLD+'\nDataframe Head'+color.END)
        display(data.head())
        
    return data
        

epil_baseline_df = data_load(epil_baseline_file, output=True)
epil_seizure_df = data_load(epil_seizure_file, output=True)

[1m[4m
F060[0m
[1m
Data Information[0m
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4097 entries, 0 to 4096
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   0       4097 non-null   float64
dtypes: float64(1)
memory usage: 32.1 KB
[1m
Dataframe Head[0m


Unnamed: 0,0
0,162.0
1,164.0
2,164.0
3,154.0
4,139.0


[1m[4m
S033[0m
[1m
Data Information[0m
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4097 entries, 0 to 4096
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   0       4097 non-null   float64
dtypes: float64(1)
memory usage: 32.1 KB
[1m
Dataframe Head[0m


Unnamed: 0,0
0,216.0
1,182.0
2,146.0
3,128.0
4,116.0


In [0]:
import mne 

channel_name= ['CZ']
channel_type = ['eeg']
sample_rate = 173.61 # in hz

# create an mne info file with meta data about the EEG
info = mne.create_info(ch_names=channel_name, sfreq=sample_rate, 
                       ch_types=channel_type)

# show the info file
display(info)

<Info | 7 non-empty values
 bads: []
 ch_names: CZ
 chs: 1 EEG
 custom_ref_applied: False
 highpass: 0.0 Hz
 lowpass: 86.8 Hz
 meas_date: unspecified
 nchan: 1
 projs: []
 sfreq: 173.6 Hz
>

In [0]:
def mne_object(data, info, output=False):
    data = data.apply(lambda x: x*1e-6)
    # transpose the data
    data_T = data.transpose()
    # create raw mne object
    raw = mne.io.RawArray(data_T, info)
    
    return raw

epil_baseline_mne = mne_object(epil_baseline_df, info, output=True)
epil_seizure_mne = mne_object(epil_seizure_df, info)

Creating RawArray with float64 data, n_channels=1, n_times=4097
    Range : 0 ... 4096 =      0.000 ...    23.593 secs
Ready.
Creating RawArray with float64 data, n_channels=1, n_times=4097
    Range : 0 ... 4096 =      0.000 ...    23.593 secs
Ready.


In [0]:
%matplotlib notebook
plot_kwargs = {
    'scalings': dict(eeg=20e-4),   # zooms the plot out
    'highpass': 0.53,              # filters out low frequencies
    'lowpass': 40.,                # filters out high frequencies
    'n_channels': 1,               # just plot the one channel
    'duration': 24                 # number of seconds to plot
}

print(color.BOLD+color.UNDERLINE+"Inter-Ictal"+color.END)
epil_baseline_mne.plot(**plot_kwargs)
print(color.BOLD+color.UNDERLINE+"Ictal"+color.END)
epil_seizure_mne.plot(**plot_kwargs)

[1m[4mInter-Ictal[0m
Setting up band-pass filter from 0.53 - 40 Hz

IIR filter parameters
---------------------
Butterworth bandpass zero-phase (two-pass forward and reverse) non-causal filter:
- Filter order 16 (effective, after forward-backward)
- Cutoffs at 0.53, 40.00 Hz: -6.02, -6.02 dB



<IPython.core.display.Javascript object>

[1m[4mIctal[0m
Setting up band-pass filter from 0.53 - 40 Hz

IIR filter parameters
---------------------
Butterworth bandpass zero-phase (two-pass forward and reverse) non-causal filter:
- Filter order 16 (effective, after forward-backward)
- Cutoffs at 0.53, 40.00 Hz: -6.02, -6.02 dB



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [0]:
import random
import matplotlib.pyplot as plt
from scipy import signal

for directory in EPIL_dir_file_list:
  #if re.findall('N|F|S',directory[-1]):
  # make a list of all the files in the directory
  files = file_list(os.path.join(directory, '*'))
  # randomly select 9 files from the list
  sampled_files = random.sample(files, 9)


  fig, axs = plt.subplots(3, 3, sharex=True, sharey=True)
  x=0
  y=0
  for file in sampled_files:

      # read in the datafile
      data = pd.read_csv(file,                      # file in
                          header=None,               # no column names at top of file
                          dtype=float)               # read data as 'floating points' (e.g. 1.0)

      # filter the data
      b, a = signal.butter(4, [1/(sample_rate/2), 30/(sample_rate/2)], 'bandpass', analog=False)
      filt_data = signal.filtfilt(b, a, data.T).T
      
      axs[x, y].plot(filt_data)
      axs[x, y].set_title(re.findall('\w\d+', file)[0], pad =-15)
      # plot all of them on the same scale
      axs[x, y].set_ylim([-2100, 2100])

      x+=1

      if x == 3:
          y +=1
          x=0


  # add a big axes, hide frame
  fig.add_subplot(111, frameon=False)
  # hide tick and tick label of the big axes
  plt.tick_params(labelcolor='none', top=False, bottom=False, left=False, right=False)
  plt.grid(False)
  plt.xlabel("Datapoints", labelpad =0.5)
  plt.ylabel("Microvolts (uV)", labelpad =20)
  plt.subplots_adjust(wspace=0.1, hspace=0.1)    

  if directory[-1] == 'N':
      plt.title('Inter-ictal: Opposite Hippocampus')

  elif directory[-1] == 'F':
      plt.title('Inter-ictal: Epileptogenic Zone')

  elif directory[-1] == 'S':
      plt.title('Ictal: Epileptogenic Zone')
  
  elif directory[-1] == 'Z':
      plt.title('Surface EEG: Eyes Open')
  
  elif directory[-1] == 'O':
      plt.title('Surface EEG: Eyes Closed')

  plt.show()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

2) **NEDC TUH EEG Seizure corpus**
The full TUH EEG corpus is the world’s largest publicly available corpus of clinical EEG data. The corpus contains 15,757 hours (56,726,510 secs) of EEG recordings from 13,539 patients.

In [0]:
def find_files(url, headers):
    # get a soup of the directory url
    soup = BeautifulSoup(requests.get(url, auth=(headers['user'], headers['passwd'])).text, 
                         features="html.parser")
    # make a list of all the links in the url
    hrefs_list = []
    for link in soup.find_all('a'):
        hrefs_list.append(link.get('href'))

    return hrefs_list 
    
def download_file(download_file_url, file_path, headers, output=False):
    if output:
        # print it is downloading
        print('Downloading: '+ download_file_url)
    # download the file to the directory
    r = requests.get(download_file_url, auth=(headers['user'], headers['passwd']))
    with open(file_path, 'wb') as f:
      f.write(r.content)

# needs a directory to download it to
def download_TUH(DIR, headers, sub_dir, output=False):
    
    # directory url
    dir_url = 'https://www.isip.piconepress.com/projects/tuh_eeg/downloads/tuh_eeg_seizure/v1.5.0/'+sub_dir

    hrefs_dir_list = find_files(dir_url, headers)
    
    # for each link in the directory
    for link in hrefs_dir_list:
        # download the files outside of participant folders we want
        if re.findall('.xlsx|\.edf|\.tse(?!_)', str(link)):
            # if the file doesnt already exist in the directory
            if not os.path.exists(os.path.join(DIR, link)):
                download_file(dir_url+'/'+str(link), DIR+'/'+str(link), headers, output)

Lets first download the information file. You will need to fill out a form located on this page to get a username and password sent to your email.

Once you have your username and password just type it in below.

In [0]:
from getpass import getpass
import os
import sys
import os
from bs4 import BeautifulSoup
import requests
import re
import wget
import zipfile

DOWNLOAD_DIR = "TUH Database"

if not os.path.exists(DOWNLOAD_DIR):
  os.makedirs(DOWNLOAD_DIR)

user = getpass('TUH Username: ')
key = getpass('TUH Password: ')
auth_dict = {'user': user, 'passwd': key}

download_TUH(DOWNLOAD_DIR, auth_dict, '_DOCS', output=True)

TUH Username: ··········
TUH Password: ··········
Downloading: https://www.isip.piconepress.com/projects/tuh_eeg/downloads/tuh_eeg_seizure/v1.5.0/_DOCS/seizures_types_v01.xlsx
Downloading: https://www.isip.piconepress.com/projects/tuh_eeg/downloads/tuh_eeg_seizure/v1.5.0/_DOCS/seizures_v32r.xlsx


In [0]:
import pandas as pd
seiz_types_path = '/content/TUH Database/seizures_types_v01.xlsx'
seiz_types = pd.read_excel(seiz_types_path)

seiz_types = seiz_types.set_index('Class Code')
display(seiz_types)

Unnamed: 0_level_0,Class No.,Event Name,Signs,Locality,Description
Class Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
,0,No Event,,,An unclassified event
SPSW,1,Spike/Sharp and Wave,Electrographic,All,"Spike and wave/complexes , sharp and wave/comp..."
GPED,2,Generalized Periodic Epileptiform Discharges,Electrographic,Generalized,Diffused periodic discharges
PLED,3,Periodic Lateralized Epileptiform Discharges,Electrographic,Hemispheric/Focal,Focal periodic discharges
EYBL,4,Eye blink,Clinical & Electrographic,Focal,"A specific type of sharp, high amplitude eye m..."
ARTF,5,Artifacts (All),Clinical & Electrographic,All,"Any non-brain activity electrical signal, such..."
BCKG,6,Background,Electrographic,,Baseline/non-interesting events
SEIZ,7,Seizure,Clinical &| Electrographic,All,Common seizure class which can include all typ...
FNSZ,8,Focal Non-Specific Seizure,Electrographic,Hemispheric/Focal,Focal seizures which cannot be specified with ...
GNSZ,9,Generalized Non-Specific Seizure,Electrographic,Generalized,Generalized seizures which cannot be further c...


In [0]:
seiz_info_path = '/content/TUH Database/seizures_v32r.xlsx'
train_info = pd.read_excel(seiz_info_path, 'train')

train_seiz_type = train_info.iloc[1:12,26:30]
train_seiz_type.columns = ['Class Code', 'Events', 'Freq.', 'Cum.']
train_seiz_type = train_seiz_type.set_index('Class Code')
train_seiz_type.join(seiz_types) 

Unnamed: 0_level_0,Events,Freq.,Cum.,Class No.,Event Name,Signs,Locality,Description
Class Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
FNSZ,1536,0.648101,0.648101,8.0,Focal Non-Specific Seizure,Electrographic,Hemispheric/Focal,Focal seizures which cannot be specified with ...
GNSZ,408,0.172152,0.820253,9.0,Generalized Non-Specific Seizure,Electrographic,Generalized,Generalized seizures which cannot be further c...
SPSZ,49,0.0206751,0.840928,10.0,Simple Partial Seizure,Clinical & Electrographic,All,Partial seizures during consciousness; Type sp...
CPSZ,277,0.116878,0.957806,11.0,Complex Partial Seizure,Clinical & Electrographic,All,Partial Seizures during unconsciousness; Type ...
ABSZ,50,0.021097,0.978903,12.0,Absence Seizure,Clinical & Electrographic,Generalized,Absence Discharges observed on EEG; patient lo...
TNSZ,18,0.00759494,0.986498,13.0,Tonic Seizure,Clinical & Electrographic,All,Stiffening of body during seizure (EEG effects...
CNSZ,0,0.0,0.986498,14.0,Clonic Seizure,Clinical & Electrographic,All,Jerking/shivering of body during seizure
TCSZ,30,0.0126582,0.999156,15.0,Tonic Clonic Seizure,Clinical & Electrographic,All,At first stiffening and then jerking of body (...
ATSZ,0,0.0,0.999156,16.0,Atonic Seizure,Clinical & Electrographic,,Sudden loss of muscle tone
MYSZ,2,0.000843882,1.0,17.0,Myoclonic Seizure,Clinical & Electrographic,,Myoclonous jerks of limbs


In [0]:
# ----------------
# Descriptive Keys
# ----------------
train_type_key = train_info.iloc[24:43,16:21]
train_type_key.columns = ['EEG Type', 'EEG SubType', 'Rooms', 'REMOVE', 'Description']
train_type_key = train_type_key.drop(['Rooms','REMOVE'], axis = 1)
train_type_key['EEG Type'] = train_type_key['EEG Type'].ffill()
train_type_key = train_type_key.set_index('EEG Type')

# ------------
# Type Summary
# ------------
train_type_summary = train_info.iloc[1:7,16:20]
train_type_summary.columns = ['EEG Type', 'Sessions', 'Freq.', 'Cum.']
train_type_summary = train_type_summary.set_index('EEG Type')

desc = train_type_key[train_type_key.isnull().any(axis=1)].iloc[:-1]
train_type_summary = train_type_summary.join(desc)
train_type_summary = train_type_summary.drop('EEG SubType', axis=1)

train_type_summary[['Description','Sessions', 'Freq.', 'Cum.']]

Unnamed: 0_level_0,Description,Sessions,Freq.,Cum.
EEG Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
EMU,Epilepsy Monitoring Unit,162,0.136709,0.136709
ICU,Intensive Care Unit,438,0.36962,0.506329
Inpatient,Inpatient But Not ICU,350,0.295359,0.801688
Outpatient,Routine EEGs,193,0.162869,0.964557
Unknown,,42,0.035443,1.0
Total:,,1185,1.0,


In [0]:
# ---------------
# SubType Summary
# ---------------

train_loc_summary = train_info.iloc[1:16,21:25]
train_loc_summary.columns = ['EEG SubType', 'Sessions', 'Freq.', 'Cum.']
train_loc_summary = train_loc_summary.set_index('EEG SubType')

desc = train_type_key.dropna()
desc = desc.reset_index(drop=True)
desc = desc.set_index('EEG SubType')

train_loc_summary = train_loc_summary.join(desc)
train_loc_summary[['Description','Sessions', 'Freq.', 'Cum.']]

Unnamed: 0_level_0,Description,Sessions,Freq.,Cum.
EEG SubType,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
BURN,Burn Unit,16,0.0135021,0.0135021
CICU,Cardiac Intensive Care,28,0.0236287,0.0371308
EMU,Epilepsy Monitoring Unit,163,0.137553,0.174684
ER,Emergency Room,8,0.00675105,0.181435
General,Inpatient But Not ICU or Outpatient,342,0.288608,0.470042
ICU,Intensive Care Unit,13,0.0109705,0.481013
NICU,Neuro-ICU Facility (about 5 to 6 rooms),142,0.119831,0.600844
NSICU,Neural Surgical ICU (about 10 rooms),81,0.0683544,0.669198
OR,Operating Room,1,0.000843882,0.670042
Outpatient,5th Floor Neurology Department,192,0.162025,0.832068


In [0]:
train_class_summary = train_info.iloc[9:12,16:20]
train_class_summary.columns = ['Normal Classification', 'Sessions', 'Freq.', 'Cum.']
train_class_summary = train_class_summary.set_index('Normal Classification')
train_class_summary

Unnamed: 0_level_0,Sessions,Freq.,Cum.
Normal Classification,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Normal,156,0.131646,0.131646
Abnormal,1029,0.868354,1.0
Total:,1185,1.0,


In [0]:
# just want the info per file here
file_info = train_info.iloc[1:6101,1:15]
# cleans some of the names
file_info_cols = ['File No.', 'Patient', 'Session', 'File', 
                       'EEG Type', 'EEG SubType', 'LTM or Routine', 
                       'Normal/Abnormal', 'No. Seizures File', 
                       'No. Seizures/Session', 'Filename', 'Seizure Start', 
                       'Seizure Stop', 'Seizure Type']
file_info.columns = file_info_cols

# we forward fill as there are gaps in the excel file to represent the info 
# is the same as above (apart from in the filename, seizure start, seizure stop 
# and seizure type columns)
for col_name in file_info.columns[:-4]:
  file_info[col_name] = file_info[col_name].ffill()

# patient ID is an integer rather than float
file_info['Patient'] = file_info['Patient'].astype(int)

file_info.head()

Unnamed: 0,File No.,Patient,Session,File,EEG Type,EEG SubType,LTM or Routine,Normal/Abnormal,No. Seizures File,No. Seizures/Session,Filename,Seizure Start,Seizure Stop,Seizure Type
1,1,77,s003,t000,Outpatient,Outpatient,Routine,Abnormal,0.0,0.0,./edf/train/01_tcp_ar/000/00000077/s003_2010_0...,,,
2,2,254,s005,t000,Outpatient,Outpatient,Routine,Abnormal,0.0,0.0,./edf/train/01_tcp_ar/002/00000254/s005_2010_1...,,,
3,3,254,s006,t001,Outpatient,Outpatient,Routine,Abnormal,0.0,0.0,./edf/train/01_tcp_ar/002/00000254/s006_2011_0...,,,
4,4,254,s007,t000,Inpatient,General,Routine,Abnormal,0.0,0.0,./edf/train/01_tcp_ar/002/00000254/s007_2013_0...,,,
5,5,272,s007,t000,ICU,BURN,LTM,Abnormal,0.0,0.0,./edf/train/01_tcp_ar/002/00000272/s007_2003_0...,,,


In [0]:
#LOAD DATA

# our example events file picked from the events filename
SEIZURE_EVENTS_FILE = file_info[file_info['No. Seizures File']>0]['Filename'].iloc[20]

# we use the above to get the file directory this file is in
example_file_dir = '/'.join(SEIZURE_EVENTS_FILE.split('/')[1:-1])

# this will download all edf and event files for the selected patient
download_TUH(DOWNLOAD_DIR, auth_dict, example_file_dir, output=True)

Downloading: https://www.isip.piconepress.com/projects/tuh_eeg/downloads/tuh_eeg_seizure/v1.5.0/edf/train/01_tcp_ar/008/00000883/s002_2010_09_01/00000883_s002_t000.edf
Downloading: https://www.isip.piconepress.com/projects/tuh_eeg/downloads/tuh_eeg_seizure/v1.5.0/edf/train/01_tcp_ar/008/00000883/s002_2010_09_01/00000883_s002_t000.tse


In [0]:
import pandas as pd
import numpy as np
import pyedflib

def data_load(data_file, selected_channels=[]):

    try:
        # use the reader to get an EdfReader file
        f = pyedflib.EdfReader(data_file)

        # get the names of the signals
        channel_names = f.getSignalLabels()
        # get the sampling frequencies of each signal
        channel_freq = f.getSampleFrequencies()
        
        # get a list of the EEG channels
        if len(selected_channels) == 0:
            selected_channels = channel_names

        # make an empty file of 0's
        sigbufs = np.zeros((f.getNSamples()[0],len(selected_channels)))
        # for each of the channels in the selected channels
        for i, channel in enumerate(selected_channels):
            try:
              # add the channel data into the array
              sigbufs[:, i] = f.readSignal(channel_names.index(channel))
            
            except:
              ValueError
              # This happens if the sampling rate of that channel is 
              # different to the others.
              # For simplicity, in this case we just make it na.
              sigbufs[:, i] = np.nan


        # turn to a pandas df and save a little space
        df = pd.DataFrame(sigbufs, columns = selected_channels)#.astype('float32')

        # get equally increasing numbers upto the length of the data depending
        # on the length of the data divided by the sampling frequency
        index_increase = np.linspace(0,
                                      len(df)/channel_freq[0],
                                      len(df), endpoint=False)

        # round these to the lowest nearest decimal to get the seconds
        #seconds = np.floor(index_increase).astype('uint16')

        seconds = index_increase
        
        # make a column the timestamp
        df['Time'] = seconds

        # make the time stamp the index
        df = df.set_index('Time')

        # name the columns as channel
        df.columns.name = 'Channel'

        return df, channel_freq[0]

    except:
        OSError
        return pd.DataFrame(), None

seiz_edf_name = SEIZURE_EVENTS_FILE.split('/')[-1][:-3]+'edf'
seiz_data, seiz_freq = data_load(DOWNLOAD_DIR+'/'+seiz_edf_name)
display(seiz_data.shape)

(400250, 36)

In [0]:
#PLOT DATA
import mne
def mne_object(data, freq, events_tse = pd.DataFrame()):
  # create an mne info file with meta data about the EEG
  info = mne.create_info(ch_names=list(data.columns), 
                         sfreq=freq, 
                         ch_types=['eeg']*data.shape[-1])
  
  # data needs to be in volts rather than in microvolts
  data = data.apply(lambda x: x*1e-6)
  # transpose the data
  data_T = data.transpose()
  
  # create raw mne object
  raw = mne.io.RawArray(data_T, info)

  if not events_tse.empty:

    raw.set_annotations(mne.Annotations(events_tse['Start'],
                                          events_tse['End'] - events_tse['Start'],
                                          events_tse['Code']))

  return raw

seiz_events_name = SEIZURE_EVENTS_FILE.split('/')[-1]
events_tse = pd.read_csv(DOWNLOAD_DIR+'/'+seiz_events_name, 
                             skiprows=1,
                             sep = ' ',
                             header=None,
                             names =['Start', 'End', 'Code', 'Certainty'])

tuh_mne = mne_object(seiz_data, seiz_freq, events_tse)

Creating RawArray with float64 data, n_channels=36, n_times=400250
    Range : 0 ... 400249 =      0.000 ...  1600.996 secs
Ready.


In [0]:
import math

# this just downloads the file we want
def download_TUH_file(DIR, headers, file, output=False):
    # directory url
    dir_url = 'https://www.isip.piconepress.com/projects/tuh_eeg/downloads/tuh_eeg_seizure/v1.5.0/'+file
    download_file(dir_url, DIR+'/'+file.split('/')[-1], headers, output)

for seiz_type in file_info['Seizure Type'].unique():
    try:
        math.isnan(seiz_type)
    except:
        seiz_type_files = file_info[file_info['Seizure Type']==seiz_type]['Filename']
        seiz_type_file = seiz_type_files.sample().values[0]
        print(color.BOLD+color.UNDERLINE+seiz_type_file.split('/')[-1][:-3]+color.END)

        # this will download the edf and associated event file
        download_TUH_file(DOWNLOAD_DIR, auth_dict, seiz_type_file, output=True)
        download_TUH_file(DOWNLOAD_DIR, auth_dict, seiz_type_file[:-3]+'edf', output=True)

        # get file pats
        seiz_edf_name = seiz_type_file.split('/')[-1][:-3]+'edf'
        seiz_tse_name = DOWNLOAD_DIR+'/'+seiz_type_file.split('/')[-1]

        seiz_data, seiz_freq = data_load(DOWNLOAD_DIR+'/'+seiz_edf_name)
        events_tse = pd.read_csv(seiz_tse_name, skiprows=1, sep = ' ', header=None,
                                 names =['Start', 'End', 'Code', 'Certainty'])

        tuh_mne = mne_object(seiz_data, seiz_freq, events_tse)

        for class_code in events_tse['Code'].unique():
            code_events = events_tse.where(events_tse['Code'] == class_code).dropna()
            tuh_mne.plot(start = code_events.sample(random_state = 0)['Start'].values[0], 
                            duration = 30, **plot_kwargs)


[1m[4m00006563_s001_t000.[0m
Downloading: https://www.isip.piconepress.com/projects/tuh_eeg/downloads/tuh_eeg_seizure/v1.5.0/./edf/train/01_tcp_ar/065/00006563/s001_2010_02_08/00006563_s001_t000.tse
Downloading: https://www.isip.piconepress.com/projects/tuh_eeg/downloads/tuh_eeg_seizure/v1.5.0/./edf/train/01_tcp_ar/065/00006563/s001_2010_02_08/00006563_s001_t000.edf
Creating RawArray with float64 data, n_channels=36, n_times=224000
    Range : 0 ... 223999 =      0.000 ...   895.996 secs
Ready.


KeyError: ignored