In [1]:
%reload_ext autoreload
%autoreload 3

import mne
import pandas as pd
import numpy as np
from src.preprocessing import utils
from pathlib import Path
import re
import matplotlib.pyplot as plt

# data directories
data_path = f'/Volumes/Extreme SSD/PhD/MPI-LEMON/EEG_Raw_BIDS_ID/'
mpi_path = f'/Volumes/EEG_MPILMBB_LEMON/EEG_Preprocessed_BIDS_ID/EEG_Preprocessed/'

### Which data are excluded from MPI dataset for processing?

In [69]:
# get all subject ids in preprocessed folder
sub_ids_pro = []
for subject_path in sorted(Path(mpi_path).glob('sub-*')):
  sub_ids_pro.append(re.search('sub-(.*)_(.*)', subject_path.stem).group(1))
sub_ids_pro = set(sub_ids_pro)  # drop duplicates

# get all subject ids in raw folder
sub_ids_raw = []
for subject_path in sorted(Path(data_path).glob('sub-*')):
  sub_ids_raw.append(re.search('sub-(.*)', subject_path.stem).group(1))
sub_ids_raw = set(sub_ids_raw)  # drop any possible duplicates

# find any ids in sub_ids_raw that are not in sub_ids_pro
sub_ids_excluded = sorted(list(sub_ids_pro - sub_ids_raw))

## TODO: delete the entire cell: Annotations & Data Segmentation
### Data with different annotation patterns

In [124]:
fnames_annot_misbehave = {}
sub_ids_raw = []
for subject_path in sorted(Path(data_path).glob('sub-*')):
  sub_id = subject_path.stem
  sub_ids_raw.append(subject_path.stem)
  raw = mne.io.read_raw_brainvision(data_path+f'/{sub_id}/RSEEG/{sub_id}.vhdr', verbose=False)
  
  # check annot names
  points = utils.annotations_checker(raw.annotations.description.copy())

  # save those with deflection
  if sum(points.values()) != 2:
     fnames_annot_misbehave[sub_id] = points

### 1. segment and preprocess data with common annotation patterns

In [136]:
sub_ids_normal = sorted(list(set(sub_ids_raw) - set(fnames_annot_misbehave.keys())))
output_path = '/Volumes/Extreme SSD/PhD/MPI-LEMON/EEG_Raw_segmented/'
rsfreq = 250

In [138]:
for sub_id in sub_ids_normal[:5]:
    raw = mne.io.read_raw_brainvision(data_path+f'/{sub_id}/RSEEG/{sub_id}.vhdr', verbose=False)
    onsets, pattern = utils.find_pattern(raw)
    raws = utils.segment_raw(raw, onsets, pattern, duration=60)

    # save EC/EO segments in brainvision format
    ## create subject folder
    subject_path = output_path + sub_id
    Path(subject_path).mkdir(parents=True, exist_ok=True)
    ## downsample and save
    for k in raws.keys():
        raws[k].resample(rsfreq)
        raws[k].export(subject_path + f'/{sub_id}_{k}.vhdr')

### 2. segment raw data with not common annotation patterns

In [None]:
typ1 = ['sub-010026']
# an extera ''New Segment/', 'Comment/no USB Connection to actiCAP' markers in the beginning.

typ2 = ['sub-010030']
#an extera ''New Segment/', 'Comment/no USB Connection to actiCAP' markers in the beginning without extra switch markers.

typ3 = ['sub-010062', 'sub-010064', 'sub-010134'] # only one switch markers in the beginning 

typ4 = ['sub-010191'] # an extra 'New Segment/' 'Comment/no USB Connection to actiCAP' 'Stimulus/S  1' markers in the beginning.

typ5 = ['sub-010264'] # no switch markers in the beginning

# ----------------------------------------------
typ6 = ['sub-010126']
# this subject there is no switch markers, and the annotation for eye closed is 'Stimulus/S208'

# no switch markers in the annotations, and there is only two useless markers in the beginning
typ7 = ['sub-010138', 'sub-010155', 'sub-010157',
        'sub-010162', 'sub-010163', 'sub-010164', 'sub-010165',
        'sub-010166', 'sub-010168', 'sub-010228', 'sub-010233',
        'sub-010239', 'sub-010255', 'sub-010257', 'sub-010258',
        'sub-010260', 'sub-010261', 'sub-010262', 'sub-010263',
        'sub-010267', 'sub-010268', 'sub-010269', 'sub-010270',
        'sub-010271', 'sub-010272', 'sub-010273', 'sub-010274',
        'sub-010275', 'sub-010284', 'sub-010311', 'sub-010315',
        'sub-010318']

## Divide data to eye closed and eye open segments

In [133]:
# Check if all the switch markers's onsets are the same as the beginning of the EC/EO segments
sanity_check = {}
for subject_path in sorted(Path(data_path).glob('sub-*')):
    sub_id = subject_path.stem
    raw = mne.io.read_raw_brainvision(data_path+f'/{sub_id}/RSEEG/{sub_id}.vhdr', verbose=False)

    # create a dictionary of annotations
    desc = utils.change_annot_names(raw.annotations.description.copy(),
                                    enumerated=True)
    onsets = raw.annotations.onset.copy()
    annot_dict = {k: v for k, v in zip(desc, onsets)}

    # find the points where a switch between EC/EO happens
    switch_onsets, switch_pattern = utils.find_switch_onset_pattern(annot_dict)
    
    # segment raw data based on switch points
    raws = utils.segment_raw(raw, switch_onsets, switch_pattern, duration=60)
    
    # check if the segments are correct (only EC or EO in each segment)
    for s in ['EC', 'EO']:
        sanity_check[sub_id + '_' + s] = utils.check_segments(raws[s])

In [None]:
### ------------- resampling & save
raw.resample(128)

### ------------- pick eeg
raw.pick(picks='eeg')

### ------------- train-test-val split

### ------------- segmentation

### ------------- baseline correction (subtracted from each input channel its average over the first 0.5 s)
data = raw.get_data()
data = data - data[:, :int(raw.info['sfreq'] * 0.5)].mean(axis=1, keepdims=True)

# recreate raw object
raw_EC = mne.io.RawArray(data, raw.info)

### ------------- scaling

### ------------- clamping

### ------------- rereferencing??

### ------------- functional connectivity

## Processing

In [2]:
bad_chs = {}
### ------------- load raw data
sub_id = 'sub-010026'
raw = mne.io.read_raw_brainvision(data_path+f'/{sub_id}/RSEEG/{sub_id}.vhdr', verbose=False,
                                  eog=['VEOG'])

### ------------- divide into EC/EO segments
# create a dictionary of annotations
desc = utils.change_annot_names(raw.annotations.description.copy(),
                                enumerated=True)
onsets = raw.annotations.onset.copy()
annot_dict = {k: v for k, v in zip(desc, onsets)}

# find the points where a switch between EC/EO happens
switch_onsets, switch_pattern = utils.find_switch_onset_pattern(annot_dict)

# segment raw data based on switch points
raws = utils.segment_raw(raw, switch_onsets, switch_pattern, duration=60)
raw_EO = raws['EO']
raw_EC = raws['EC']
del raws

In [44]:
from sklearn.preprocessing import robust_scale, scale

def plot_raw(data, psd=False):
    data = data.reshape(data.shape[0], -1)
    data = mne.io.RawArray(data, raw.info)
    if psd:
        data.compute_psd().plot(average=True, spatial_colors=True)
    else:
        data.plot()

In [22]:
# # use a sample of the data
# raw = raw_EO.copy().crop(0, 60)
# ### ------------- resampling & save
# raw.resample(128)

# ### ------------- pick eeg
# raw.pick(picks='eeg')

### ------------- segment data into 3s epochs
data = raw.get_data()
data = data.reshape(data.shape[0], -1, 384)

# ### ------------- baseline correction (subtracted from each input channel and each segment its average over the first 0.5 s)
data = data - data[:, :, :int(raw.info['sfreq'] * 0.5)].mean(axis=2, keepdims=True)
# plot_raw(data)

### ------------- robust scaling the data (subtracting the median and dividing by the interquartile range)

### ------------- clamping

### ------------- rereferencing??

### ------------- functional connectivity

In [62]:
data_ = robust_scale(data.reshape(61, -1), axis=1)

# clamp values greater than 20 s.d.
n = 2
data_[data_ > n] = n

plot_raw(data_)
# plt.plot(data_[4, :])
# plt.show()


Creating RawArray with float64 data, n_channels=61, n_times=7680
    Range : 0 ... 7679 =      0.000 ...    59.992 secs
Ready.
Using pyopengl with version 3.1.6


Channels marked as bad:
none


In [6]:
data_ = np.zeros((data.shape[0], data.shape[1], data.shape[2]))
for i in range(data.shape[1]):
    d = data[:, i, :].reshape(data.shape[0], -1)
    data_[:, i, :] = robust_scale(d, axis=0)

Creating RawArray with float64 data, n_channels=61, n_times=7680
    Range : 0 ... 7679 =      0.000 ...    59.992 secs
Ready.
Using pyopengl with version 3.1.6


Channels marked as bad:
none
