TUAR data loading, referencing, mapping and annotating

In [56]:
import mne
import sys
import os
import importlib
import pandas as pd
import numpy as np

from braindecode.preprocessing.preprocess import preprocess, Preprocessor, zscore
from braindecode.datasets import (create_from_mne_raw, create_from_mne_epochs)
from braindecode.preprocessing.windowers import create_windows_from_events
from braindecode.datasets.sleep_physionet import SleepPhysionet
from braindecode.datasets import BaseDataset, BaseConcatDataset, WindowsDataset
from mne_extras import write_edf

from plot import Plot
from segment import Segmenter
from helper_funcs import HelperFuncs as hf

import matplotlib.pyplot as plt

from mat73 import loadmat
from tqdm import tqdm

import pprint

pp = pprint.PrettyPrinter(indent=2)


In [234]:

# annotation map
REC_MAP = {
    0: 'null', 1: 'spsw', 2: 'gped', 3: 'pled', 4: 'eyeb', 5: 'artf',
    6: 'bckg', 7: 'seiz', 8: 'fnsz', 9: 'gnsz', 10: 'spsz', 11: 'cpsz',
    12: 'absz', 13: 'tnsz', 14: 'cnsz', 15: 'tcsz', 16: 'atsz', 17: 'mysz',
    18: 'nesz', 19: 'intr', 20: 'slow', 21: 'eyem', 22: 'chew', 23: 'shiv',
    24: 'musc', 25: 'elpp', 26: 'elst', 27: 'calb', 28: 'hphs', 29: 'trip',
    30: 'elec', 100: 'eyem_chew', 101: 'eyem_shiv', 102: 'eyem_musc', 103: 'eyem_elec', 
    104: 'chew_shiv', 105: 'chew_musc', 106: 'chew_elec', 107: 'shiv_musc', 108: 'shiv_elec',
    109: 'musc_elec'
}


# EEG channel montage map for TUAR
MONTAGE_MAP = {
    0: 'Fp1-F7', # EEG FP1-REF -- EEG F7-REF
    # 1: 'F7-T3', # EEG F7-REF -- EEG T3-REF
    # 2: 'T3-T5', # EEG T3-REF -- EEG T5-REF
    # 3: 'T5-O1', # EEG T5-REF -- EEG O1-REF
    # 4: 'Fp2-F8', # EEG FP2-REF -- EEG F8-REF
    # 5: 'F8-T4', # EEG F8-REF -- EEG T4-REF
    # 6: 'T4-T6', # EEG T4-REF -- EEG T6-REF
    # 7: 'T6-O2', # EEG T6-REF -- EEG O2-REF
    # # 8: 'A1-T3', # EEG A1-REF -- EEG T3-REF
    # 9: 'T3-C3', # EEG T3-REF -- EEG C3-REF
    # 10: 'C3-Cz', # EEG C3-REF -- EEG CZ-REF
    # 11: 'Cz-C4', # EEG CZ-REF -- EEG C4-REF
    # 12: 'C4-T4', # EEG C4-REF -- EEG T4-REF
    # # 13: 'T4-A2', # EEG T4-REF -- EEG A2-REF
    # 14: 'Fp1-F3', # EEG FP1-REF -- EEG F3-REF
    # 15: 'F3-C3', # EEG F3-REF -- EEG C3-REF
    # 16: 'C3-P3', # EEG C3-REF -- EEG P3-REF
    # 17: 'P3-O1', # EEG P3-REF -- EEG O1-REF
    # 18: 'Fp2-F4', # EEG FP2-REF -- EEG F4-REF
    # 19: 'F4-C4', # EEG F4-REF -- EEG C4-REF
    # 20: 'C4-P4', # EEG C4-REF -- EEG P4-REF
    21: 'P4-O2', # EEG P4-REF -- EEG O2-REF
}

TUAR_EXCLUDE_LIST = ['EEG T3-REF', 'EEG T5-REF', 'EEG F3-REF', 'EEG C3-REF', 'EEG P3-REF',
                'EEG O1-REF', 'EEG FP2-REF', 'EEG F8-REF', 'EEG T4-REF', 'EEG T6-REF', 'EEG F4-REF',
                'EEG C4-REF', 'EEG FZ-REF', 'EEG CZ-REF', 'EEG PZ-REF',
                'EEG ROC-REF', 'EEG LOC-REF', 'EEG EKG1-REF', 'EEG T1-REF', 'EEG T2-REF', 'PHOTIC-REF',
                'IBI', 'BURSTS', 'SUPPR']

# Standard 10-20 alphabetic channel names
STANDARD_10_20 = ['Fp1', 'F7', 'T3', 'T5', 'F3', 'C3', 'P3', 'O1', 'Fp2', 'F8', 'T4', 'T6', 'F4', 'C4',
                 'P4', 'O2', 'Fz', 'Cz', 'Pz']
# TUAR 10-20 channel names
TUAR_CHANNELS = ['EEG FP1-REF', 'EEG F7-REF', 'EEG T3-REF', 'EEG T5-REF', 'EEG F3-REF', 'EEG C3-REF', 'EEG P3-REF',
                 'EEG O1-REF', 'EEG FP2-REF', 'EEG F8-REF', 'EEG T4-REF', 'EEG T6-REF', 'EEG F4-REF',
                 'EEG C4-REF', 'EEG P4-REF', 'EEG O2-REF', 'EEG FZ-REF', 'EEG CZ-REF', 'EEG PZ-REF']
# Non-EEG channels to exclude
exclude_channels = ['EEG ROC-REF', 'EEG LOC-REF', 'EEG EKG1-REF', 'EEG T1-REF', 'EEG T2-REF', 'PHOTIC-REF',
                    'IBI', 'BURSTS', 'SUPPR']
# Mapping of TUAR channels to standard 10-20 channels
MAPPING_TUAR_STANDARD_10_20 = dict(zip(TUAR_CHANNELS, STANDARD_10_20))

In [235]:
def get_file_list(x):
    return [os.path.join(x, fname) for fname in os.listdir(x)]

def get_id(x):
    return x.split('/')[-1]

# get TUAR annotations from txt (.rec) file
def get_tuar_annotations(txt):
    with open(txt, "r") as f:
        annotations = []
        for l in f.readlines():
            annot = l.rstrip().split(',')
            # select only first and last EEG channels (Fp1-F7, P4-O2)
            if int(annot[0]) in MONTAGE_MAP.keys():
                annot[0] = MONTAGE_MAP[int(annot[0])]
                annot[-1] = REC_MAP[int(annot[-1])]
                annotations.append(annot)
    # pp.pprint(annotations)
    return annotations

# Rename channels according to standard montage and map
def create_eeg_montage(raw):
    # Exclude non-EEG channels
    channels = [ch for ch in raw.ch_names if ch not in exclude_channels]
    raw.pick_channels(channels)

    raw.pick_channels(TUAR_CHANNELS, ordered=True)
    # Rename channels to standard 10-20 alphabetic
    print('Renaming to standard 10-20 alphabetic channels ...')
    # print(STANDARD_10_20)
    mne.rename_channels(raw.info, MAPPING_TUAR_STANDARD_10_20)
    # Make standard montage
    montage = mne.channels.make_standard_montage('standard_alphabetic')
    raw.set_montage(montage)

    return raw

# rereference channels according to TUAR montage
def rereference_channels(raw):
    data = []
    for k, v in MONTAGE_MAP.items():
        ch1, ch2 = v.split('-')
        x = raw[ch1][0] - raw[ch2][0]
        data.append(x[0])
    return np.array(data)

In [None]:

data_dir = '/media/maligan/My Passport/msc_thesis/data/tuar/v2_1_0/edf/01_tcp_ar/'

files = []
descriptions = []
error = []

subjects = {}
for subject in hf.get_file_list(data_dir):
    recordings = {}
    for recording in hf.get_file_list(subject):
        dates = {}
        for date in hf.get_file_list(recording):
            for raw_path in hf.get_file_list(date):
                # print(hf.get_id(raw_path))
                annotations = []
                if '.rec' in hf.get_id(raw_path) and not '.rec_orig' in hf.get_id(raw_path):
                    # annotation file
                    # print(f'Getting ANNOTATIONS from: {raw_path}')
                    annotations = get_tuar_annotations(raw_path)
                    # print(annotations)
                if '.edf' in hf.get_id(raw_path):
                    # raw file
                    # print(f'Getting RAW EDF from: {raw_path}')
                    raw = mne.io.read_raw_edf(raw_path)
                    raw = create_eeg_montage(raw)
                    data = rereference_channels(raw)
                    
                    info = mne.create_info([MONTAGE_MAP[0], MONTAGE_MAP[21]], ch_types=['eeg']*2, sfreq=raw.info['sfreq'])
                    raw = mne.io.RawArray(data, info)
                
                if annotations:
                    # transpose annotation list
                    annots = list(zip(*annotations))
                    # calculate durations
                    # print(durations)
                    durations = np.array(annots[2]).astype(float) - np.array(annots[1]).astype(float)
                    # set annotations per channel
                    raw = raw.set_annotations(mne.Annotations(onset=annots[1], duration=durations, description=annots[3], ch_names=[[x] for x in annots[0]]))

            to_export = raw_path.split('.')[0].split('/')[-1]
            try:
                raw.save(f"/media/maligan/My Passport/msc_thesis/data/tuar/v2_1_0/processed/{to_export}_2_channels.fif", picks=['Fp1-F7', 'P4-O2'])
            except:
                error.append(to_export)

            pp.pprint(error)

            descriptions += [{'subject': subject, 'recording': recording, 'date': date}]
            dates[hf.get_id(date)] = raw
        recordings[hf.get_id(recording)] = dates
    subjects[hf.get_id(subject)] = recordings


In [232]:
# # TUAR sample
# raw_folder = './data/sample_TUAR/s005_2010_11_15/'
# edf_f = '00000254_s005_t000.edf'
# annot_txt = '00000254_s005_t000.rec'

# raw_path = raw_folder + edf_f

# files = []

# for raw_path in hf.get_file_list(date):
#     if '.rec' in hf.get_id(raw_path) and not '.rec_orig' in hf.get_id(raw_path):
#         # annotation file
#         annotations = get_tuar_annotations(raw_path)
#     if '.edf' in hf.get_id(raw_path):
#         # raw file
#         raw = mne.io.read_raw_edf(raw_path)
#         raw = create_eeg_montage(raw)
#         data = rereference_channels(raw)
        
#         info = mne.create_info([MONTAGE_MAP[0], MONTAGE_MAP[21]], ch_types=['eeg']*2, sfreq=raw.info['sfreq'])
#         raw = mne.io.RawArray(data, info)

#         # transpose annotation list
#         annots = list(zip(*annotations))
#         # calculate durations
#         durations = np.array(annots[2]).astype(float) - np.array(annots[1]).astype(float)
#         # set annotations per channel
#         raw = raw.set_annotations(mne.Annotations(onset=annots[1], duration=durations, description=annots[3], ch_names=[[x] for x in annots[0]]))


# files += [raw, annotations]

Extracting EDF parameters from /media/maligan/My Passport/msc_thesis/data/tuar/v2.1.0/edf/01_tcp_ar/107/00010748/s001_2013_09_19/00010748_s001_t000.edf...
EDF file detected
Setting channel info structure...
Creating raw.info structure...
Renaming to standard 10-20 alphabetic channels ...
Creating RawArray with float64 data, n_channels=2, n_times=370500
    Range : 0 ... 370499 =      0.000 ...  1481.996 secs
Ready.


In [279]:
# hf.get_file_list(data_dir)
# hf.get_file_list(subject)
# hf.get_file_list(recording)
hf.get_file_list(date)
raw_path

'/media/maligan/My Passport/msc_thesis/data/tuar/v2.1.0/edf/01_tcp_ar/107/00010748/s001_2013_09_19/00010748_s001_t000.rec_orig'

In [314]:

dir_path = '/media/maligan/My Passport/msc_thesis/data/tuar/v2_1_0/processed/'

files = hf.get_file_list(dir_path)
dataset = []
descriptions = []

for i, f in enumerate(files):
    dataset += [mne.io.read_raw_fif(f)]
    descriptions += [{'subject': i}]


FileNotFoundError: [Errno 2] No such file or directory: '/media/maligan/My Passport/msc_thesis/data/tuar/v2_1_0/processed/'

In [None]:
annotations = []

for f in files:
    for annot in mne.io.read_raw_fif(f).annotations:
        annotations += [annot['description']]
    
# mne.io.read_raw_fif(files[0])._annotations[0]['description']

In [329]:
np.unique(annotations)

array(['bckg', 'chew', 'chew_elec', 'chew_musc', 'elec', 'eyem',
       'eyem_chew', 'eyem_elec', 'eyem_musc', 'eyem_shiv', 'musc',
       'musc_elec', 'shiv'], dtype='<U9')

### TUAR segmenter