# Preprocessing: BJH016


This markdown files loads the clean data and does a bipolar rereference as well as epochs the data


In [2]:
import matplotlib
# matplotlib.use("Qt5Agg")
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from scipy import signal, stats
import mat73
import re
from neurodsp.timefrequency import compute_wavelet_transform
from BCI2kReader import BCI2kReader as b2k
import os
import mne
from tabulate import tabulate
import IPython
import seaborn as sns

In [None]:
%matplotlib qt5

In [3]:
## Prep paths ##

subject = 'BJH016'
orig_data_fi = '/home/brooke/knight_server/remote/WashU/data/PacmanTask/BJH016/PacmanTask/ECOGS001R01.dat'
raw_data_dir = f"/home/brooke/pacman/raw_data/{subject}"
preproc_data_dir = f"/home/brooke/pacman/preprocessing/{subject}/ieeg"

In [3]:
## Load Data ##

# load filtered data #
filtered_clean_fif = mne.io.Raw(f"{raw_data_dir}/ieeg/{subject}_notched_filtered_clean_ieeg.fif")

# load raw data #
raw_clean_fif = mne.io.Raw(f"{raw_data_dir}/ieeg/{subject}_raw_clean_ieeg.fif")



Opening raw data file /home/brooke/pacman/raw_data/BJH016/ieeg/BJH016_notched_filtered_clean_ieeg.fif...
    Range : 0 ... 1963999 =      0.000 ...   982.000 secs
Ready.
Opening raw data file /home/brooke/pacman/raw_data/BJH016/ieeg/BJH016_notched_filtered_clean_ieeg-1.fif...
    Range : 1964000 ... 3927999 =    982.000 ...  1963.999 secs
Ready.
Opening raw data file /home/brooke/pacman/raw_data/BJH016/ieeg/BJH016_notched_filtered_clean_ieeg-2.fif...
    Range : 3928000 ... 5891999 =   1964.000 ...  2945.999 secs
Ready.
Opening raw data file /home/brooke/pacman/raw_data/BJH016/ieeg/BJH016_notched_filtered_clean_ieeg-3.fif...
    Range : 5892000 ... 5901899 =   2946.000 ...  2950.950 secs
Ready.
Opening raw data file /home/brooke/pacman/raw_data/BJH016/ieeg/BJH016_raw_clean_ieeg.fif...
    Range : 0 ... 1963999 =      0.000 ...   982.000 secs
Ready.
Opening raw data file /home/brooke/pacman/raw_data/BJH016/ieeg/BJH016_raw_clean_ieeg-1.fif...
    Range : 1964000 ... 3927999 =    982.000 

In [4]:
raw_clean_fif.info

0,1
Measurement date,Unknown
Experimenter,Unknown
Digitized points,0 points
Good channels,"272 sEEG, 1 Stimulus"
Bad channels,"EMPTY, EMPTY_2, EMPTY_3, EMPTY_4, REF1, REF2, EMPTY_227, EMPTY_228, Ekg1, ekg2, EMPTY_254, EMPTY_255, EMPTY_256, GL1, GL2, GL3, HL1, HL2, HL3, HL4, IL1, IL2, IL3, BL1, F8, EL1, EL2, HL14, HL15, HL16, BL16, DL14, EL12, FL12, GL16, GL15, HL16, IL16, JL14, KL16, NR12, OR16, FP1, F3, C3, P3, O1, FP2, F4, C4, P4, O2, F7, T7, P7, F8, T8, P8, F9, F10, FPZ, FZ, CZ, PZ, OZ, DC01, DC02, DC03, DC04, DC05, DC06, DC07, DC08, DC09, DC10, DC11, DC12, DC13, DC14, DC15, DC16"
EOG channels,Not available
ECG channels,Not available
Sampling frequency,2000.00 Hz
Highpass,0.00 Hz
Lowpass,1000.00 Hz


## Bipolar Rereferencing

In [5]:
# helper functions

def probe_and_num(elec_str): 
    ''' This convenience function takes an electrode string like LHH1 and outputs ('LHH', 1)
    '''
    regex_str = '(\D+)(\d+)' # group of letters followed by group of digits
    matcher = re.compile(regex_str,re.IGNORECASE|re.DOTALL).search(elec_str)
    if matcher:
        probe, num = matcher.groups()
        return probe, int(num)
    else:
        return None, None
    
def find_bipolar_pair(ch, labels, remove):
    ''' Find the pair of a given electrode for bipolar referencing.
        Given a single *ch* and a list of *labels*, some of which you want to *remove*,
        this finds the next channel on that probe that isn't meant to be removed (WM is ok).
    '''
    bipolar_pair = None
    probe, num = probe_and_num(ch)
    other_in_probe = [other_ch for other_ch in labels if probe_and_num(other_ch)[0]==probe]
    for i in range(len(other_in_probe)-num):
        next_ch = probe + str(num+i+1)
        if next_ch in remove or next_ch == 'STI':
            continue
        else:
            bipolar_pair = next_ch
            break
    return bipolar_pair

In [6]:
# Here I iterate through the electrodes in my ROIs and match them up with their bipolar pair
pairs = []
anode = []
cathode = []
pairs_name = []
pairs_map = {}
remove = filtered_clean_fif.info['bads']
labels = filtered_clean_fif.info['ch_names']
for ch in labels:
    # if a channel is meant to be removed, it doesn't get to be in a bipolar pair
    if ch in remove or ch == 'STI':
        print(f"{ch} noref")
    else:
        pair = find_bipolar_pair(ch, labels, remove)
        if pair:
            anode.append(ch)
            cathode.append(pair)
            pairs.append((ch, pair))
            pairs_map[ch] = f"{ch}-{pair}"
            pairs_name.append(f"{ch}-{pair}")
            # print(ch, pair)

EMPTY noref
EMPTY_2 noref
EMPTY_3 noref
EMPTY_4 noref
REF1 noref
REF2 noref
BL1 noref
BL16 noref
DL14 noref
EL1 noref
EL2 noref
EL12 noref
FL12 noref
GL1 noref
GL2 noref
GL3 noref
GL15 noref
GL16 noref
HL1 noref
HL2 noref
HL3 noref
HL4 noref
HL14 noref
HL15 noref
HL16 noref
IL1 noref
IL2 noref
IL3 noref
IL16 noref
JL14 noref
KL16 noref
NR12 noref
OR16 noref
EMPTY_227 noref
EMPTY_228 noref
FP1 noref
F3 noref
C3 noref
P3 noref
O1 noref
FP2 noref
F4 noref
C4 noref
P4 noref
O2 noref
F7 noref
T7 noref
P7 noref
F8 noref
T8 noref
P8 noref
F9 noref
F10 noref
FPZ noref
FZ noref
CZ noref
PZ noref
OZ noref
Ekg1 noref
ekg2 noref
EMPTY_254 noref
EMPTY_255 noref
EMPTY_256 noref
DC01 noref
DC02 noref
DC03 noref
DC04 noref
DC05 noref
DC06 noref
DC07 noref
DC08 noref
DC09 noref
DC10 noref
DC11 noref
DC12 noref
DC13 noref
DC14 noref
DC15 noref
DC16 noref
STI noref


In [7]:
## Apply Rereference #

if filtered_clean_fif.info['ch_names'] == raw_clean_fif.info['ch_names'] and filtered_clean_fif.info['bads'] == raw_clean_fif.info['bads']:
    
    # load filtered data
    filtered_clean_fif.load_data()
    
    # set filtered reference 
    bp_filt_fif = mne.set_bipolar_reference(filtered_clean_fif, anode = anode, cathode = cathode)
    
    # save
    bp_filt_fif.save(f"{preproc_data_dir}/{subject}_bp_filtered_clean_data.fif", overwrite = True)
    
    # clear var
    bp_filt_fif = []
    filtered_clean_fif = []
    
    # load raw data
    raw_clean_fif.load_data()
    
    # set raw reference 
    bp_raw_fif = mne.set_bipolar_reference(raw_clean_fif, anode = anode, cathode = cathode)
    
    # save
    bp_raw_fif.save(f"{preproc_data_dir}/{subject}_bp_raw_clean_data.fif", overwrite = True)

    
    # clear var
    bp_raw_fif = []
    raw_clean_fif = []    
    
    

Reading 0 ... 5901899  =      0.000 ...  2950.950 secs...
sEEG channel type selected for re-referencing
Creating RawArray with float64 data, n_channels=178, n_times=5901900
    Range : 0 ... 5901899 =      0.000 ...  2950.950 secs
Ready.
Added the following bipolar channels:
AL1-AL2, AL2-AL3, AL3-AL4, AL4-AL5, AL5-AL6, AL6-AL7, AL7-AL8, AL8-AL9, AL9-AL10, AL10-AL11, AL11-AL12, AL12-AL13, AL13-AL14, AL14-AL15, AL15-AL16, BL2-BL3, BL3-BL4, BL4-BL5, BL5-BL6, BL6-BL7, BL7-BL8, BL8-BL9, BL9-BL10, BL10-BL11, BL11-BL12, BL12-BL13, BL13-BL14, BL14-BL15, CL1-CL2, CL2-CL3, CL3-CL4, CL4-CL5, CL5-CL6, CL6-CL7, CL7-CL8, CL8-CL9, CL9-CL10, CL10-CL11, CL11-CL12, CL12-CL13, CL13-CL14, DL1-DL2, DL2-DL3, DL3-DL4, DL4-DL5, DL5-DL6, DL6-DL7, DL7-DL8, DL8-DL9, DL9-DL10, DL10-DL11, DL11-DL12, DL12-DL13, EL3-EL4, EL4-EL5, EL5-EL6, EL6-EL7, EL7-EL8, EL8-EL9, EL9-EL10, EL10-EL11, EL11-EL13, EL13-EL14, FL1-FL2, FL2-FL3, FL3-FL4, FL4-FL5, FL5-FL6, FL6-FL7, FL7-FL8, FL8-FL9, FL9-FL10, FL10-FL11, GL4-GL5, GL5-GL6,

  bp_filt_fif.save(f"{preproc_data_dir}/{subject}_bp_filtered_clean_data.fif", overwrite = True)


Overwriting existing file.
Writing /home/brooke/pacman/preprocessing/BJH016/ieeg/BJH016_bp_filtered_clean_data-1.fif
Overwriting existing file.
Writing /home/brooke/pacman/preprocessing/BJH016/ieeg/BJH016_bp_filtered_clean_data-2.fif
Closing /home/brooke/pacman/preprocessing/BJH016/ieeg/BJH016_bp_filtered_clean_data-2.fif
Closing /home/brooke/pacman/preprocessing/BJH016/ieeg/BJH016_bp_filtered_clean_data-1.fif
Closing /home/brooke/pacman/preprocessing/BJH016/ieeg/BJH016_bp_filtered_clean_data.fif
[done]
Reading 0 ... 5901899  =      0.000 ...  2950.950 secs...
sEEG channel type selected for re-referencing
Creating RawArray with float64 data, n_channels=178, n_times=5901900
    Range : 0 ... 5901899 =      0.000 ...  2950.950 secs
Ready.
Added the following bipolar channels:
AL1-AL2, AL2-AL3, AL3-AL4, AL4-AL5, AL5-AL6, AL6-AL7, AL7-AL8, AL8-AL9, AL9-AL10, AL10-AL11, AL11-AL12, AL12-AL13, AL13-AL14, AL14-AL15, AL15-AL16, BL2-BL3, BL3-BL4, BL4-BL5, BL5-BL6, BL6-BL7, BL7-BL8, BL8-BL9, BL9-

  bp_raw_fif.save(f"{preproc_data_dir}/{subject}_bp_raw_clean_data.fif", overwrite = True)


Overwriting existing file.
Writing /home/brooke/pacman/preprocessing/BJH016/ieeg/BJH016_bp_raw_clean_data-1.fif
Overwriting existing file.
Writing /home/brooke/pacman/preprocessing/BJH016/ieeg/BJH016_bp_raw_clean_data-2.fif
Closing /home/brooke/pacman/preprocessing/BJH016/ieeg/BJH016_bp_raw_clean_data-2.fif
Closing /home/brooke/pacman/preprocessing/BJH016/ieeg/BJH016_bp_raw_clean_data-1.fif
Closing /home/brooke/pacman/preprocessing/BJH016/ieeg/BJH016_bp_raw_clean_data.fif
[done]


In [None]:
# Visualize it #

filtered_notch_bp_data.plot(events=events, color='b', bad_color = 'cyan', n_channels = 1, clipping = None, event_color = 'r')

## Epoching the data 

Alright, the longest remaining trial is 14 seconds... so we will epoch with 1 seconds before and 14s afterwards. But because we need padding for filtering the signal we need to add .75s (1500 samples) on either side.

In [4]:
## Epoching the raw data ##

# load data 
bp_raw_fif = mne.io.Raw(f"{preproc_data_dir}/{subject}_bp_raw_clean_data.fif")
events = mne.find_events(bp_raw_fif, output='step', consecutive = False, stim_channel='STI')

# # create events
    
# only epoch good channels
channels_of_interest =  [x for x in bp_raw_fif.info['ch_names'] if x not in bp_raw_fif.info['bads']]

# # epoch the data
epoched_data = mne.Epochs(bp_raw_fif, events, 
                          event_id = 1, tmin = -4, tmax = 17, 
                          baseline = None, picks = channels_of_interest,
                             reject_by_annotation = False)
# save the data
epoched_data.save(f"{preproc_data_dir}/{subject}_bp_clean_pres-locked_ieeg.fif", overwrite = True, split_size = '1.9GB')


Opening raw data file /home/brooke/pacman/preprocessing/BJH016/ieeg/BJH016_bp_raw_clean_data.fif...
    Range : 0 ... 2077999 =      0.000 ...  1038.999 secs
Ready.
Opening raw data file /home/brooke/pacman/preprocessing/BJH016/ieeg/BJH016_bp_raw_clean_data-1.fif...
    Range : 2078000 ... 4155999 =   1039.000 ...  2077.999 secs
Ready.
Opening raw data file /home/brooke/pacman/preprocessing/BJH016/ieeg/BJH016_bp_raw_clean_data-2.fif...
    Range : 4156000 ... 5901899 =   2078.000 ...  2950.950 secs
Ready.


  bp_raw_fif = mne.io.Raw(f"{preproc_data_dir}/{subject}_bp_raw_clean_data.fif")


904 events found
Event IDs: [0 1]
Not setting metadata
452 matching events found
No baseline correction applied
0 projection items activated
Loading data for 452 events and 42001 original time points ...


  epoched_data.save(f"{preproc_data_dir}/{subject}_bp_clean_pres-locked_ieeg.fif", overwrite = True, split_size = '1.9GB')


0 bad epochs dropped
Loading data for 1 events and 42001 original time points ...
Splitting into 7 parts
Loading data for 65 events and 42001 original time points ...
Loading data for 65 events and 42001 original time points ...
Loading data for 65 events and 42001 original time points ...
Loading data for 65 events and 42001 original time points ...
Loading data for 64 events and 42001 original time points ...
Loading data for 64 events and 42001 original time points ...
Loading data for 64 events and 42001 original time points ...


In [9]:
bp_raw_fif.info

0,1
Measurement date,Unknown
Experimenter,Unknown
Digitized points,0 points
Good channels,"257 sEEG, 1 Stimulus"
Bad channels,"BL1, BL16, C3, C4, CZ, DC01, DC02, DC03, DC04, DC05, DC06, DC07, DC08, DC09, DC10, DC11, DC12, DC13, DC14, DC15, DC16, DL14, EL1, EL12, EL2, EMPTY, EMPTY_2, EMPTY_227, EMPTY_228, EMPTY_254, EMPTY_255, EMPTY_256, EMPTY_3, EMPTY_4, Ekg1, F10, F3, F4, F7, F8, F9, FL12, FP1, FP2, FPZ, FZ, GL1, GL15, GL16, GL2, GL3, HL1, HL14, HL15, HL16, HL2, HL3, HL4, IL1, IL16, IL2, IL3, JL14, KL16, NR12, O1, O2, OR16, OZ, P3, P4, P7, P8, PZ, REF1, REF2, T7, T8, ekg2"
EOG channels,Not available
ECG channels,Not available
Sampling frequency,2000.00 Hz
Highpass,0.00 Hz
Lowpass,1000.00 Hz


## Create Other Events

In [10]:
## Load Behavioral Data ##
last_dot_data = pd.read_csv(f"{raw_data_dir}/behave/{subject}_last_dot_events.csv")
last_dot_data

Unnamed: 0,trial_numeric,TrialType,sample,sample_before,event
0,2,8,96500,0,1
1,3,11,119100,0,1
2,5,10,137800,0,1
3,6,15,157400,0,1
4,8,20,194700,0,1
...,...,...,...,...,...
397,446,17,5770200,0,1
398,447,15,5782200,0,1
399,448,18,5793400,0,1
400,449,19,5811600,0,1


In [11]:
# filter to good epochs and fix trial indexing
last_dot_data['trial_numeric'] = last_dot_data['trial_numeric'] - 1

# create events
last_dot_events = last_dot_data[['sample', 'sample_before', 'event']].copy().to_numpy()


In [12]:
# epoch the data
last_dot_epochs = mne.Epochs(bp_raw_fif, last_dot_events, 
                          event_id = 1, tmin = -.75 * 4, tmax =.75 * 3, 
                          baseline = None, picks = channels_of_interest,
                             reject_by_annotation = False)
# # save the data
last_dot_epochs.save(f"{preproc_data_dir}/{subject}_bp_raw_clean_last_dot_events.fif", overwrite = True, split_size = '1.9GB')


Not setting metadata
402 matching events found
No baseline correction applied
0 projection items activated
Overwriting existing file.
Loading data for 402 events and 10501 original time points ...


  last_dot_epochs.save(f"{preproc_data_dir}/{subject}_bp_raw_clean_last_dot_events.fif", overwrite = True, split_size = '1.9GB')


0 bad epochs dropped
Loading data for 1 events and 10501 original time points ...
Splitting into 2 parts
Loading data for 201 events and 10501 original time points ...
Loading data for 201 events and 10501 original time points ...


Okay, what we need to do is pul the sample of the last dot eaten on each trial

and then also the last turn around

looking for a comparison between these moments on ghost and no ghost trials

we can either get the average and then compare peaks via foof groups in hc and ofc and anterior cingulate

## Last Away

In [5]:
## Load Behavioral Data ##
last_away_data = pd.read_csv(f"{raw_data_dir}/behave/{subject}_last_away_events.csv")
last_away_data

Unnamed: 0,trial_numeric,TrialType,sample,sample_before,event
0,2,8,96600,0,1
1,3,11,119300,0,1
2,6,15,157900,0,1
3,8,20,216300,0,1
4,9,5,269900,0,1
...,...,...,...,...,...
351,445,7,5759500,0,1
352,446,17,5770500,0,1
353,447,15,5782600,0,1
354,448,18,5797900,0,1


In [6]:
# filter to good epochs and fix trial indexing
last_away_data['trial_numeric'] = last_away_data['trial_numeric'] - 1

# create events
last_away_events = last_away_data[['sample', 'sample_before', 'event']].copy().to_numpy()


In [9]:
# epoch the data
last_away_epochs = mne.Epochs(bp_raw_fif, last_away_events, 
                          event_id = 1, tmin = -5, tmax =5, 
                          baseline = None, picks = channels_of_interest,
                             reject_by_annotation = False)
# # save the data
last_away_epochs.save(f"{preproc_data_dir}/{subject}_bp_raw_clean_last_away_events.fif", overwrite = True, split_size = '1.9GB')


Not setting metadata
356 matching events found
No baseline correction applied
0 projection items activated
Overwriting existing file.
Loading data for 356 events and 20001 original time points ...


  last_away_epochs.save(f"{preproc_data_dir}/{subject}_bp_raw_clean_last_away_events.fif", overwrite = True, split_size = '1.9GB')


0 bad epochs dropped
Loading data for 1 events and 20001 original time points ...
Splitting into 3 parts
Loading data for 119 events and 20001 original time points ...
Loading data for 119 events and 20001 original time points ...
Loading data for 118 events and 20001 original time points ...


In [16]:
last_away_epochs.info

0,1
Measurement date,Unknown
Experimenter,Unknown
Digitized points,0 points
Good channels,"1 Stimulus, 178 sEEG"
Bad channels,
EOG channels,Not available
ECG channels,Not available
Sampling frequency,2000.00 Hz
Highpass,0.00 Hz
Lowpass,1000.00 Hz
