# Preprocessing: BJH017


This markdown files loads the clean data and does a bipolar rereference as well as epochs the data


In [1]:
import matplotlib
matplotlib.use("Qt5Agg")
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from scipy import signal, stats
import re
import os
import mne
import IPython
import seaborn as sns

In [2]:
## Prep paths ##

subject = 'BJH017'
raw_data_dir = f"/home/brooke/pacman/raw_data/{subject}"
preproc_data_dir = f"/home/brooke/pacman/preprocessing/{subject}/ieeg"

In [3]:
## Load Data ##

# load filtered data #
filtered_clean_fif = mne.io.Raw(f"{raw_data_dir}/ieeg/{subject}_notched_filtered_clean_ieeg.fif")

# load raw data #
raw_clean_fif = mne.io.Raw(f"{raw_data_dir}/ieeg/{subject}_raw_clean_ieeg.fif")



Opening raw data file /home/brooke/pacman/raw_data/BJH017/ieeg/BJH017_notched_filtered_clean_ieeg.fif...
    Range : 0 ... 1963999 =      0.000 ...   982.000 secs
Ready.
Opening raw data file /home/brooke/pacman/raw_data/BJH017/ieeg/BJH017_notched_filtered_clean_ieeg-1.fif...
    Range : 1964000 ... 3739199 =    982.000 ...  1869.600 secs
Ready.
Opening raw data file /home/brooke/pacman/raw_data/BJH017/ieeg/BJH017_raw_clean_ieeg.fif...
    Range : 0 ... 1963999 =      0.000 ...   982.000 secs
Ready.
Opening raw data file /home/brooke/pacman/raw_data/BJH017/ieeg/BJH017_raw_clean_ieeg-1.fif...
    Range : 1964000 ... 3739199 =    982.000 ...  1869.600 secs
Ready.


## Bipolar Rereferencing

In [4]:
# helper functions

def probe_and_num(elec_str): 
    ''' This convenience function takes an electrode string like LHH1 and outputs ('LHH', 1)
    '''
    if '_' in elec_str:
        return None, None
    else:   
        regex_str = '(\D+)(\d+)' # group of letters followed by group of digits
        matcher = re.compile(regex_str,re.IGNORECASE|re.DOTALL).search(elec_str)
        if matcher:
            probe, num = matcher.groups()
            return probe, int(num)
        else:
            return None, None
    
def find_bipolar_pair(ch, labels, remove):
    ''' Find the pair of a given electrode for bipolar referencing.
        Given a single *ch* and a list of *labels*, some of which you want to *remove*,
        this finds the next channel on that probe that isn't meant to be removed (WM is ok).
    '''
    bipolar_pair = None
    probe, num = probe_and_num(ch)
    other_in_probe = [other_ch for other_ch in labels if probe_and_num(other_ch)[0]==probe]
    for i in range(len(other_in_probe)-num):
        next_ch = probe + str(num+i+1)
        if next_ch in remove or next_ch == 'STI':
            continue
        else:
            bipolar_pair = next_ch
            break
    return bipolar_pair

In [5]:
# Here I iterate through the electrodes in my ROIs and match them up with their bipolar pair
pairs = []
anode = []
cathode = []
pairs_name = []
pairs_map = {}
remove = filtered_clean_fif.info['bads']
labels = filtered_clean_fif.info['ch_names']
for ch in labels:
    # if a channel is meant to be removed, it doesn't get to be in a bipolar pair
    if ch in remove or ch == 'STI':
        print(f"{ch} noref")
    else:
        pair = find_bipolar_pair(ch, labels, remove)
        if pair:
            anode.append(ch)
            cathode.append(pair)
            pairs.append((ch, pair))
            pairs_map[ch] = f"{ch}-{pair}"
            pairs_name.append(f"{ch}-{pair}")
            print(ch, pair)

EMPTY noref
EMPTY_2 noref
EMPTY_3 noref
EMPTY_4 noref
REF1 noref
REF2 noref
AR1 AR2
AR2 AR3
AR3 AR4
AR4 AR5
AR5 AR6
AR6 AR7
AR7 AR8
AR8 AR9
AR9 AR10
AR10 AR11
AR11 AR12
AR12 AR13
AR14 noref
BR1 noref
BR2 noref
BR3 BR4
BR4 BR5
BR5 BR6
BR6 BR7
BR7 BR8
BR8 BR9
BR9 BR10
BR10 BR11
BR11 BR12
BR12 BR13
BR13 BR14
BR14 BR15
BR15 BR16
CR1 CR2
CR2 CR3
CR3 CR6
CR4 noref
CR5 noref
CR6 CR7
CR7 CR8
CR8 CR9
CR9 CR10
CR10 CR11
CR11 CR12
CR12 CR13
CR13 CR14
CR14 CR15
CR15 CR16
DR1 DR2
DR2 DR3
DR3 DR4
DR4 DR5
DR5 DR6
DR6 DR7
DR7 DR8
DR8 DR9
DR9 DR10
DR10 DR11
DR11 DR12
DR12 DR13
DR13 DR14
ER1 ER2
ER2 ER3
ER3 ER4
ER4 ER5
ER5 ER6
ER6 ER7
ER7 ER8
ER8 ER9
ER9 ER10
ER10 ER11
ER11 ER12
FR1 FR2
FR2 FR3
FR3 FR4
FR4 FR5
FR5 FR6
FR6 FR7
FR7 FR8
FR8 FR9
FR9 FR10
FR10 FR11
FR11 FR12
GR1 noref
GR2 noref
GR3 GR4
GR4 GR5
GR5 GR6
GR6 GR7
GR7 GR8
GR8 GR9
GR9 GR10
GR10 GR11
GR11 GR12
GR12 GR13
GR13 GR14
HR1 HR2
HR2 HR3
HR3 HR4
HR4 HR5
HR5 HR6
HR6 HR7
HR7 HR8
HR8 HR9
HR9 HR10
HR10 HR11
HR11 HR12
HR12 HR13
HR13 HR14
IR1 IR2

In [6]:
## Apply Rereference #

if filtered_clean_fif.info['ch_names'] == raw_clean_fif.info['ch_names'] and filtered_clean_fif.info['bads'] == raw_clean_fif.info['bads']:
    
    # load filtered data
    filtered_clean_fif.load_data()
    
    # set filtered reference 
    bp_filt_fif = mne.set_bipolar_reference(filtered_clean_fif, anode = anode, cathode = cathode)
    
    # save
    bp_filt_fif.save(f"{preproc_data_dir}/{subject}_bp_filtered_clean_data.fif", overwrite = True)
    
    # clear var
    bp_filt_fif = []
    filtered_clean_fif = []
    
    # load raw data
    raw_clean_fif.load_data()
    
    # set raw reference 
    bp_raw_fif = mne.set_bipolar_reference(raw_clean_fif, anode = anode, cathode = cathode)
    
    # save
    bp_raw_fif.save(f"{preproc_data_dir}/{subject}_bp_raw_clean_data.fif", overwrite = True)

    
    # clear var
    bp_raw_fif = []
    raw_clean_fif = []    
    
    

Reading 0 ... 3739199  =      0.000 ...  1869.600 secs...
sEEG channel type selected for re-referencing
Creating RawArray with float64 data, n_channels=186, n_times=3739200
    Range : 0 ... 3739199 =      0.000 ...  1869.600 secs
Ready.
Added the following bipolar channels:
AR1-AR2, AR2-AR3, AR3-AR4, AR4-AR5, AR5-AR6, AR6-AR7, AR7-AR8, AR8-AR9, AR9-AR10, AR10-AR11, AR11-AR12, AR12-AR13, BR3-BR4, BR4-BR5, BR5-BR6, BR6-BR7, BR7-BR8, BR8-BR9, BR9-BR10, BR10-BR11, BR11-BR12, BR12-BR13, BR13-BR14, BR14-BR15, BR15-BR16, CR1-CR2, CR2-CR3, CR3-CR6, CR6-CR7, CR7-CR8, CR8-CR9, CR9-CR10, CR10-CR11, CR11-CR12, CR12-CR13, CR13-CR14, CR14-CR15, CR15-CR16, DR1-DR2, DR2-DR3, DR3-DR4, DR4-DR5, DR5-DR6, DR6-DR7, DR7-DR8, DR8-DR9, DR9-DR10, DR10-DR11, DR11-DR12, DR12-DR13, DR13-DR14, ER1-ER2, ER2-ER3, ER3-ER4, ER4-ER5, ER5-ER6, ER6-ER7, ER7-ER8, ER8-ER9, ER9-ER10, ER10-ER11, ER11-ER12, FR1-FR2, FR2-FR3, FR3-FR4, FR4-FR5, FR5-FR6, FR6-FR7, FR7-FR8, FR8-FR9, FR9-FR10, FR10-FR11, FR11-FR12, GR3-GR4, GR4-GR

  bp_filt_fif.save(f"{preproc_data_dir}/{subject}_bp_filtered_clean_data.fif", overwrite = True)


Closing /home/brooke/pacman/preprocessing/BJH017/ieeg/BJH017_bp_filtered_clean_data.fif
Writing /home/brooke/pacman/preprocessing/BJH017/ieeg/BJH017_bp_filtered_clean_data-1.fif
Closing /home/brooke/pacman/preprocessing/BJH017/ieeg/BJH017_bp_filtered_clean_data-1.fif
[done]
Reading 0 ... 3739199  =      0.000 ...  1869.600 secs...
sEEG channel type selected for re-referencing
Creating RawArray with float64 data, n_channels=186, n_times=3739200
    Range : 0 ... 3739199 =      0.000 ...  1869.600 secs
Ready.
Added the following bipolar channels:
AR1-AR2, AR2-AR3, AR3-AR4, AR4-AR5, AR5-AR6, AR6-AR7, AR7-AR8, AR8-AR9, AR9-AR10, AR10-AR11, AR11-AR12, AR12-AR13, BR3-BR4, BR4-BR5, BR5-BR6, BR6-BR7, BR7-BR8, BR8-BR9, BR9-BR10, BR10-BR11, BR11-BR12, BR12-BR13, BR13-BR14, BR14-BR15, BR15-BR16, CR1-CR2, CR2-CR3, CR3-CR6, CR6-CR7, CR7-CR8, CR8-CR9, CR9-CR10, CR10-CR11, CR11-CR12, CR12-CR13, CR13-CR14, CR14-CR15, CR15-CR16, DR1-DR2, DR2-DR3, DR3-DR4, DR4-DR5, DR5-DR6, DR6-DR7, DR7-DR8, DR8-DR9, DR

  bp_raw_fif.save(f"{preproc_data_dir}/{subject}_bp_raw_clean_data.fif", overwrite = True)


Closing /home/brooke/pacman/preprocessing/BJH017/ieeg/BJH017_bp_raw_clean_data.fif
Writing /home/brooke/pacman/preprocessing/BJH017/ieeg/BJH017_bp_raw_clean_data-1.fif
Closing /home/brooke/pacman/preprocessing/BJH017/ieeg/BJH017_bp_raw_clean_data-1.fif
[done]


In [None]:
# Visualize it #

# bp_raw_fif.plot(events=events, color='b', bad_color = 'cyan', n_channels = 1, clipping = None, event_color = 'r')

## Epoching the data 

### Onset

In [7]:
## Epoching the raw data ##

# load data 
bp_filtered_fif = mne.io.Raw(f"{preproc_data_dir}/{subject}_bp_filtered_clean_data.fif")
events = mne.find_events(bp_filtered_fif, output='step', consecutive = False, stim_channel='STI')

# create events
    
# only epoch good channels
channels_of_interest =  [x for x in bp_filtered_fif.info['ch_names'] if x not in bp_filtered_fif.info['bads']]

# epoch the data
epoched_data = mne.Epochs(bp_filtered_fif, events, 
                          event_id = 1, tmin = -4, tmax = 12, 
                          baseline = None, picks = channels_of_interest,
                             reject_by_annotation = False)
# save the data
epoched_data.save(f"{preproc_data_dir}/{subject}_bp_clean_pres-locked_ieeg.fif", overwrite = True, split_size = '1.9GB')

Opening raw data file /home/brooke/pacman/preprocessing/BJH017/ieeg/BJH017_bp_filtered_clean_data.fif...
    Range : 0 ... 2077999 =      0.000 ...  1038.999 secs
Ready.
Opening raw data file /home/brooke/pacman/preprocessing/BJH017/ieeg/BJH017_bp_filtered_clean_data-1.fif...
    Range : 2078000 ... 3739199 =   1039.000 ...  1869.600 secs
Ready.


  bp_filtered_fif = mne.io.Raw(f"{preproc_data_dir}/{subject}_bp_filtered_clean_data.fif")


482 events found on stim channel STI
Event IDs: [0 1]
Not setting metadata
241 matching events found
No baseline correction applied
0 projection items activated
Loading data for 1 events and 32001 original time points ...
Splitting into 3 parts
Loading data for 81 events and 32001 original time points ...


  epoched_data.save(f"{preproc_data_dir}/{subject}_bp_clean_pres-locked_ieeg.fif", overwrite = True, split_size = '1.9GB')


Loading data for 80 events and 32001 original time points ...
Loading data for 80 events and 32001 original time points ...


### Trial End

In [8]:
## Epoching the raw data ##

# load data 
bp_filtered_fif = mne.io.Raw(f"{preproc_data_dir}/{subject}_bp_filtered_clean_data.fif")
events = mne.find_events(bp_filtered_fif, output='step', consecutive = False, stim_channel='STI')

# create events
    
# only epoch good channels
channels_of_interest =  [x for x in bp_filtered_fif.info['ch_names'] if x not in bp_filtered_fif.info['bads']]

# epoch the data
epoched_data = mne.Epochs(bp_filtered_fif, events, 
                          event_id = 0, tmin = -5, tmax = 5, 
                          baseline = None, picks = channels_of_interest,
                             reject_by_annotation = False)
# save the data
epoched_data.save(f"{preproc_data_dir}/{subject}_bp_clean_end-locked_ieeg.fif", overwrite = True, split_size = '1.9GB')

Opening raw data file /home/brooke/pacman/preprocessing/BJH017/ieeg/BJH017_bp_filtered_clean_data.fif...
    Range : 0 ... 2077999 =      0.000 ...  1038.999 secs
Ready.
Opening raw data file /home/brooke/pacman/preprocessing/BJH017/ieeg/BJH017_bp_filtered_clean_data-1.fif...
    Range : 2078000 ... 3739199 =   1039.000 ...  1869.600 secs
Ready.


  bp_filtered_fif = mne.io.Raw(f"{preproc_data_dir}/{subject}_bp_filtered_clean_data.fif")


482 events found on stim channel STI
Event IDs: [0 1]
Not setting metadata
241 matching events found
No baseline correction applied
0 projection items activated
Loading data for 1 events and 20001 original time points ...
Splitting into 2 parts
Loading data for 121 events and 20001 original time points ...


  epoched_data.save(f"{preproc_data_dir}/{subject}_bp_clean_end-locked_ieeg.fif", overwrite = True, split_size = '1.9GB')


Loading data for 120 events and 20001 original time points ...


### Last Away

In [9]:
## Load Behavioral Data ##
last_away_data = pd.read_csv(f"{raw_data_dir}/behave/{subject}_last_away_events.csv")
last_away_data

Unnamed: 0,neural_trial_numeric,TrialType,sample,sample_before,event
0,1,5,110800,0,1
1,4,18,180600,0,1
2,5,19,298900,0,1
3,6,15,317000,0,1
4,7,9,326200,0,1
...,...,...,...,...,...
200,235,14,3521200,0,1
201,236,2,3532600,0,1
202,237,12,3543100,0,1
203,238,8,3554900,0,1


In [10]:
# create events
last_away_events = last_away_data[['sample', 'sample_before', 'event']].copy().to_numpy()


In [11]:
# load data 
bp_filtered_fif = mne.io.Raw(f"{preproc_data_dir}/{subject}_bp_filtered_clean_data.fif")
    
# only epoch good channels
channels_of_interest =  [x for x in bp_filtered_fif.info['ch_names'] if x not in bp_filtered_fif.info['bads']]

# epoch the data
last_away_epochs = mne.Epochs(bp_filtered_fif, last_away_events, 
                          event_id = 1, tmin = -5, tmax =5, 
                          baseline = None, picks = channels_of_interest,
                             reject_by_annotation = False)
# # save the data
last_away_epochs.save(f"{preproc_data_dir}/{subject}_bp_filtered_clean_last_away_events.fif", overwrite = True, split_size = '1.9GB')


Opening raw data file /home/brooke/pacman/preprocessing/BJH017/ieeg/BJH017_bp_filtered_clean_data.fif...
    Range : 0 ... 2077999 =      0.000 ...  1038.999 secs
Ready.
Opening raw data file /home/brooke/pacman/preprocessing/BJH017/ieeg/BJH017_bp_filtered_clean_data-1.fif...
    Range : 2078000 ... 3739199 =   1039.000 ...  1869.600 secs
Ready.
Not setting metadata
205 matching events found
No baseline correction applied
0 projection items activated
Loading data for 1 events and 20001 original time points ...
Splitting into 2 parts
Loading data for 103 events and 20001 original time points ...


  bp_filtered_fif = mne.io.Raw(f"{preproc_data_dir}/{subject}_bp_filtered_clean_data.fif")
  last_away_epochs.save(f"{preproc_data_dir}/{subject}_bp_filtered_clean_last_away_events.fif", overwrite = True, split_size = '1.9GB')


Loading data for 102 events and 20001 original time points ...


### First Dot

In [12]:
## Load Behavioral Data ##
first_dot_data = pd.read_csv(f"{raw_data_dir}/behave/{subject}_first_dot_events.csv")
first_dot_data

Unnamed: 0,neural_trial_numeric,TrialType,sample,sample_before,event
0,1,5,106000,0,1
1,2,16,123200,0,1
2,3,7,147000,0,1
3,4,18,164200,0,1
4,5,19,291400,0,1
...,...,...,...,...,...
220,236,2,3531000,0,1
221,237,12,3541600,0,1
222,238,8,3553000,0,1
223,239,20,3562400,0,1


In [13]:
# create events
first_dot_data = first_dot_data[['sample', 'sample_before', 'event']].copy().to_numpy()


In [14]:
# load data 
bp_filtered_fif = mne.io.Raw(f"{preproc_data_dir}/{subject}_bp_filtered_clean_data.fif")

# only epoch good channels
channels_of_interest =  [x for x in bp_filtered_fif.info['ch_names'] if x not in bp_filtered_fif.info['bads']]


# epoch the data
first_dot_epochs = mne.Epochs(bp_filtered_fif, first_dot_data, 
                          event_id = 1, tmin = -3, tmax =6, 
                          baseline = None, picks = channels_of_interest,
                             reject_by_annotation = False)
# # save the data
first_dot_epochs.save(f"{preproc_data_dir}/{subject}_bp_filtered_clean_first_dot_events.fif", overwrite = True, split_size = '1.9GB')


Opening raw data file /home/brooke/pacman/preprocessing/BJH017/ieeg/BJH017_bp_filtered_clean_data.fif...
    Range : 0 ... 2077999 =      0.000 ...  1038.999 secs
Ready.
Opening raw data file /home/brooke/pacman/preprocessing/BJH017/ieeg/BJH017_bp_filtered_clean_data-1.fif...
    Range : 2078000 ... 3739199 =   1039.000 ...  1869.600 secs
Ready.
Not setting metadata
225 matching events found
No baseline correction applied
0 projection items activated
Loading data for 1 events and 18001 original time points ...
Splitting into 2 parts
Loading data for 113 events and 18001 original time points ...


  bp_filtered_fif = mne.io.Raw(f"{preproc_data_dir}/{subject}_bp_filtered_clean_data.fif")
  first_dot_epochs.save(f"{preproc_data_dir}/{subject}_bp_filtered_clean_first_dot_events.fif", overwrite = True, split_size = '1.9GB')


Loading data for 112 events and 18001 original time points ...


## First Move

In [15]:
## Load Behavioral Data ##
first_move_data = pd.read_csv(f"{raw_data_dir}/behave/{subject}_first_move_events.csv")
first_move_data

Unnamed: 0,neural_trial_numeric,TrialType,sample,sample_before,event
0,1,5,105800,0,1
1,2,16,122400,0,1
2,3,7,146000,0,1
3,4,18,164200,0,1
4,5,19,290400,0,1
...,...,...,...,...,...
235,236,2,3530000,0,1
236,237,12,3540600,0,1
237,238,8,3552200,0,1
238,239,20,3562000,0,1


In [16]:
# create events
first_move_data = first_move_data[['sample', 'sample_before', 'event']].copy().to_numpy()


In [17]:
# load data 
bp_filtered_fif = mne.io.Raw(f"{preproc_data_dir}/{subject}_bp_filtered_clean_data.fif")

# only epoch good channels
channels_of_interest =  [x for x in bp_filtered_fif.info['ch_names'] if x not in bp_filtered_fif.info['bads']]


# epoch the data
first_move_epochs = mne.Epochs(bp_filtered_fif, first_move_data, 
                          event_id = 1, tmin = -5, tmax =5, 
                          baseline = None, picks = channels_of_interest,
                             reject_by_annotation = False)
# # save the data
first_move_epochs.save(f"{preproc_data_dir}/{subject}_bp_filtered_clean_first_move_events.fif", overwrite = True, split_size = '1.9GB')


Opening raw data file /home/brooke/pacman/preprocessing/BJH017/ieeg/BJH017_bp_filtered_clean_data.fif...
    Range : 0 ... 2077999 =      0.000 ...  1038.999 secs
Ready.
Opening raw data file /home/brooke/pacman/preprocessing/BJH017/ieeg/BJH017_bp_filtered_clean_data-1.fif...
    Range : 2078000 ... 3739199 =   1039.000 ...  1869.600 secs
Ready.
Not setting metadata
240 matching events found
No baseline correction applied
0 projection items activated
Loading data for 1 events and 20001 original time points ...
Splitting into 2 parts
Loading data for 120 events and 20001 original time points ...


  bp_filtered_fif = mne.io.Raw(f"{preproc_data_dir}/{subject}_bp_filtered_clean_data.fif")
  first_move_epochs.save(f"{preproc_data_dir}/{subject}_bp_filtered_clean_first_move_events.fif", overwrite = True, split_size = '1.9GB')


Loading data for 120 events and 20001 original time points ...


## Ghost Attack

In [18]:
## Load Behavioral Data ##
ghost_attack_data = pd.read_csv(f"{raw_data_dir}/behave/{subject}_attack_events.csv")
ghost_attack_data

Unnamed: 0,neural_trial_numeric,TrialType,sample,sample_before,event
0,1,5,110600,0,1
1,2,16,130400,0,1
2,3,7,149600,0,1
3,6,15,316800,0,1
4,8,4,341200,0,1
...,...,...,...,...,...
67,222,7,3375600,0,1
68,225,15,3414000,0,1
69,228,3,3446000,0,1
70,234,13,3509200,0,1


In [19]:
# create events
ghost_attack_data = ghost_attack_data[['sample', 'sample_before', 'event']].copy().to_numpy()


In [20]:
# load data 
bp_filtered_fif = mne.io.Raw(f"{preproc_data_dir}/{subject}_bp_filtered_clean_data.fif")

# only epoch good channels
channels_of_interest =  [x for x in bp_filtered_fif.info['ch_names'] if x not in bp_filtered_fif.info['bads']]


# epoch the data
ghost_attack_epochs = mne.Epochs(bp_filtered_fif, ghost_attack_data, 
                          event_id = 1, tmin = -4, tmax =6, 
                          baseline = None, picks = channels_of_interest,
                             reject_by_annotation = False)
# # save the data
ghost_attack_epochs.save(f"{preproc_data_dir}/{subject}_bp_filtered_clean_ghost_attack_events.fif", overwrite = True, split_size = '1.9GB')


Opening raw data file /home/brooke/pacman/preprocessing/BJH017/ieeg/BJH017_bp_filtered_clean_data.fif...
    Range : 0 ... 2077999 =      0.000 ...  1038.999 secs
Ready.
Opening raw data file /home/brooke/pacman/preprocessing/BJH017/ieeg/BJH017_bp_filtered_clean_data-1.fif...
    Range : 2078000 ... 3739199 =   1039.000 ...  1869.600 secs
Ready.
Not setting metadata
72 matching events found
No baseline correction applied
0 projection items activated
Loading data for 1 events and 20001 original time points ...
Loading data for 72 events and 20001 original time points ...


  bp_filtered_fif = mne.io.Raw(f"{preproc_data_dir}/{subject}_bp_filtered_clean_data.fif")
  ghost_attack_epochs.save(f"{preproc_data_dir}/{subject}_bp_filtered_clean_ghost_attack_events.fif", overwrite = True, split_size = '1.9GB')
