# Preprocessing: BJH024


This markdown files loads the clean data and does a bipolar rereference as well as epochs the data. Skipped channel name in regions we aren't interested in. I think its fine.


In [1]:
import matplotlib
matplotlib.use("Qt5Agg")
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from scipy import signal, stats
import mat73
import re
from neurodsp.timefrequency import compute_wavelet_transform
import os
import mne
import IPython
import seaborn as sns

In [2]:
## Prep paths ##

subject = 'BJH024'
raw_data_dir = f"/home/brooke/pacman/raw_data/{subject}"
preproc_data_dir = f"/home/brooke/pacman/preprocessing/{subject}/ieeg"

In [3]:
## Load Data ##

# load filtered data #
filtered_clean_fif = mne.io.Raw(f"{raw_data_dir}/ieeg/{subject}_notched_filtered_clean_ieeg.fif")

# load raw data #
raw_clean_fif = mne.io.Raw(f"{raw_data_dir}/ieeg/{subject}_raw_clean_ieeg.fif")



Opening raw data file /home/brooke/pacman/raw_data/BJH024/ieeg/BJH024_notched_filtered_clean_ieeg.fif...
    Range : 0 ... 2085999 =      0.000 ...  1042.999 secs
Ready.
Opening raw data file /home/brooke/pacman/raw_data/BJH024/ieeg/BJH024_notched_filtered_clean_ieeg-1.fif...
    Range : 2086000 ... 4171999 =   1043.000 ...  2085.999 secs
Ready.
Opening raw data file /home/brooke/pacman/raw_data/BJH024/ieeg/BJH024_notched_filtered_clean_ieeg-2.fif...
    Range : 4172000 ... 4365199 =   2086.000 ...  2182.599 secs
Ready.
Opening raw data file /home/brooke/pacman/raw_data/BJH024/ieeg/BJH024_raw_clean_ieeg.fif...
    Range : 0 ... 2085999 =      0.000 ...  1042.999 secs
Ready.
Opening raw data file /home/brooke/pacman/raw_data/BJH024/ieeg/BJH024_raw_clean_ieeg-1.fif...
    Range : 2086000 ... 4171999 =   1043.000 ...  2085.999 secs
Ready.
Opening raw data file /home/brooke/pacman/raw_data/BJH024/ieeg/BJH024_raw_clean_ieeg-2.fif...
    Range : 4172000 ... 4365199 =   2086.000 ...  2182.599

In [4]:
raw_clean_fif.info['ch_names']

['AL1',
 'AL2',
 'AL3',
 'AL5',
 'REF1',
 'REF2',
 'AL6',
 'AL7',
 'AL8',
 'AL9',
 'AL11',
 'AL12',
 'AL13',
 'AL14',
 'BL1',
 'BL2',
 'BL3',
 'BL4',
 'BL5',
 'BL6',
 'BL7',
 'BL9',
 'BL10',
 'BL11',
 'BL12',
 'BL14',
 'BL15',
 'BL16',
 'CL1',
 'CL2',
 'CL7',
 'CL8',
 'CL9',
 'CL10',
 'CL12',
 'CL13',
 'CL14',
 'CL15',
 'CL16',
 'DL1',
 'DL2',
 'DL3',
 'DL11',
 'DL12',
 'DL13',
 'DL14',
 'DL15',
 'DL16',
 'EL1',
 'EL2',
 'EL3',
 'EL4',
 'EL5',
 'EL6',
 'EL7',
 'EL8',
 'EL9',
 'EL10',
 'EL11',
 'EL12',
 'FL1',
 'FL2',
 'FL3',
 'FL4',
 'FL5',
 'FL6',
 'FL7',
 'FL8',
 'FL9',
 'FL10',
 'GL1',
 'GL2',
 'GL3',
 'GL4',
 'GL5',
 'GL6',
 'GL7',
 'GL8',
 'GL9',
 'GL10',
 'GL11',
 'GL12',
 'GL13',
 'GL14',
 'HL1',
 'HL2',
 'HL3',
 'HL4',
 'HL5',
 'HL6',
 'HL7',
 'HL8',
 'HL9',
 'HL10',
 'HL11',
 'HL12',
 'HL13',
 'HL14',
 'IL1',
 'IL2',
 'IL3',
 'IL4',
 'IL5',
 'IL6',
 'IL11',
 'IL12',
 'IL13',
 'IL14',
 'JL1',
 'JL2',
 'JL3',
 'JL4',
 'JL5',
 'JL6',
 'JL7',
 'JL8',
 'JL9',
 'JL10',
 'JL11',
 'JL

## Bipolar Rereferencing

In [5]:
# helper functions

def probe_and_num(elec_str): 
    ''' This convenience function takes an electrode string like LHH1 and outputs ('LHH', 1)
    '''
    if '_' in elec_str:
        return None, None
    else:   
        regex_str = '(\D+)(\d+)' # group of letters followed by group of digits
        matcher = re.compile(regex_str,re.IGNORECASE|re.DOTALL).search(elec_str)
        if matcher:
            probe, num = matcher.groups()
            return probe, int(num)
        else:
            return None, None
    
def find_bipolar_pair(ch, labels, remove):
    ''' Find the pair of a given electrode for bipolar referencing.
        Given a single *ch* and a list of *labels*, some of which you want to *remove*,
        this finds the next channel on that probe that isn't meant to be removed (WM is ok).
    '''
    bipolar_pair = None
    probe, num = probe_and_num(ch)
    other_in_probe = [other_ch for other_ch in labels if probe_and_num(other_ch)[0]==probe]
    for i in range(len(other_in_probe)-num):
        next_ch = probe + str(num+i+1)
        if next_ch in remove or next_ch == 'STI' or next_ch not in labels:
            continue
        else:
            bipolar_pair = next_ch
            break
    return bipolar_pair

In [6]:
# ch = 'AL13'

bipolar_pair = None
probe, num = probe_and_num(ch)
other_in_probe = [other_ch for other_ch in labels if probe_and_num(other_ch)[0]==probe]
for i in range(len(other_in_probe)-num):
    next_ch = probe + str(num+i+1)
    if next_ch in remove or next_ch == 'STI' or next_ch not in labels:
        continue
    else:
        bipolar_pair = next_ch
        break

NameError: name 'ch' is not defined

In [31]:
filtered_clean_fif.info['ch_names']

['AL1',
 'AL2',
 'AL3',
 'AL5',
 'REF1',
 'REF2',
 'AL6',
 'AL7',
 'AL8',
 'AL9',
 'AL11',
 'AL12',
 'AL13',
 'AL14',
 'BL1',
 'BL2',
 'BL3',
 'BL4',
 'BL5',
 'BL6',
 'BL7',
 'BL9',
 'BL10',
 'BL11',
 'BL12',
 'BL14',
 'BL15',
 'BL16',
 'CL1',
 'CL2',
 'CL7',
 'CL8',
 'CL9',
 'CL10',
 'CL12',
 'CL13',
 'CL14',
 'CL15',
 'CL16',
 'DL1',
 'DL2',
 'DL3',
 'DL11',
 'DL12',
 'DL13',
 'DL14',
 'DL15',
 'DL16',
 'EL1',
 'EL2',
 'EL3',
 'EL4',
 'EL5',
 'EL6',
 'EL7',
 'EL8',
 'EL9',
 'EL10',
 'EL11',
 'EL12',
 'FL1',
 'FL2',
 'FL3',
 'FL4',
 'FL5',
 'FL6',
 'FL7',
 'FL8',
 'FL9',
 'FL10',
 'GL1',
 'GL2',
 'GL3',
 'GL4',
 'GL5',
 'GL6',
 'GL7',
 'GL8',
 'GL9',
 'GL10',
 'GL11',
 'GL12',
 'GL13',
 'GL14',
 'HL1',
 'HL2',
 'HL3',
 'HL4',
 'HL5',
 'HL6',
 'HL7',
 'HL8',
 'HL9',
 'HL10',
 'HL11',
 'HL12',
 'HL13',
 'HL14',
 'IL1',
 'IL2',
 'IL3',
 'IL4',
 'IL5',
 'IL6',
 'IL11',
 'IL12',
 'IL13',
 'IL14',
 'JL1',
 'JL2',
 'JL3',
 'JL4',
 'JL5',
 'JL6',
 'JL7',
 'JL8',
 'JL9',
 'JL10',
 'JL11',
 'JL

In [7]:
# Here I iterate through the electrodes in my ROIs and match them up with their bipolar pair
pairs = []
anode = []
cathode = []
pairs_name = []
pairs_map = {}
remove = filtered_clean_fif.info['bads']
labels = filtered_clean_fif.info['ch_names']
for ch in labels:
    # if a channel is meant to be removed, it doesn't get to be in a bipolar pair
    if ch in remove or ch == 'STI':
        print(f"{ch} noref")
    else:
        pair = find_bipolar_pair(ch, labels, remove)
        if pair:
            anode.append(ch)
            cathode.append(pair)
            pairs.append((ch, pair))
            pairs_map[ch] = f"{ch}-{pair}"
            pairs_name.append(f"{ch}-{pair}")
            print(ch, pair)

AL1 AL2
AL2 AL3
AL3 AL5
AL5 AL6
REF1 noref
REF2 noref
AL6 AL7
AL7 AL8
AL8 AL9
AL9 AL11
AL11 AL12
BL1 BL2
BL2 BL3
BL3 BL4
BL4 BL5
BL5 BL6
BL6 BL7
BL7 BL9
BL9 BL10
BL10 BL11
BL11 BL12
BL12 BL14
CL1 CL2
CL2 CL7
CL7 CL8
CL8 CL9
CL9 CL10
DL1 DL2
DL2 DL3
EL1 EL4
EL2 noref
EL3 noref
EL4 EL5
EL5 EL6
EL6 EL7
EL7 EL8
EL8 EL9
EL9 EL10
EL10 EL11
EL11 EL12
FL1 FL2
FL2 FL3
FL3 FL4
FL4 FL5
FL5 FL6
FL6 FL7
FL7 FL9
FL8 noref
FL9 FL10
GL1 noref
GL2 noref
GL3 GL4
GL4 GL5
GL5 GL6
GL6 GL7
GL7 GL8
GL8 GL9
GL9 GL10
GL10 GL11
GL11 GL12
GL12 GL13
GL13 GL14
HL1 noref
HL2 noref
HL3 noref
HL4 noref
HL5 HL6
HL6 HL7
HL7 HL8
HL8 HL9
HL9 HL10
HL10 HL11
HL11 HL12
HL12 HL13
HL13 HL14
IL1 IL2
IL2 IL3
IL3 IL4
IL4 IL5
IL5 IL6
JL1 JL2
JL2 JL3
JL3 JL4
JL4 JL5
JL5 JL6
JL6 JL7
JL7 JL8
JL8 JL9
JL9 JL10
JL10 JL11
JL11 JL12
AR1 AR2
AR2 AR3
AR3 AR4
AR4 AR5
AR5 AR6
AR6 AR7
AR7 AR8
AR8 AR9
AR9 AR10
AR10 AR11
AR11 AR13
BR1 BR2
BR2 BR3
BR3 BR4
BR4 BR5
BR5 BR6
BR6 BR7
BR7 BR8
BR8 BR9
BR9 BR10
BR10 BR11
BR11 BR12
BR12 BR13
BR13 BR14
BR

In [8]:
## Apply Rereference #

if filtered_clean_fif.info['ch_names'] == raw_clean_fif.info['ch_names'] and filtered_clean_fif.info['bads'] == raw_clean_fif.info['bads']:
    
    # load filtered data
    filtered_clean_fif.load_data()
    
    # set filtered reference 
    bp_filt_fif = mne.set_bipolar_reference(filtered_clean_fif, anode = anode, cathode = cathode)
    
    # save
    bp_filt_fif.save(f"{preproc_data_dir}/{subject}_bp_filtered_clean_data.fif", overwrite = True)
    
    # clear var
    bp_filt_fif = []
    filtered_clean_fif = []
    
    # load raw data
    raw_clean_fif.load_data()
    
    # set raw reference 
    bp_raw_fif = mne.set_bipolar_reference(raw_clean_fif, anode = anode, cathode = cathode)
    
    # save
    bp_raw_fif.save(f"{preproc_data_dir}/{subject}_bp_raw_clean_data.fif", overwrite = True)

    
    # clear var
    # bp_raw_fif = []
    # raw_clean_fif = []    
    
    

Reading 0 ... 4365199  =      0.000 ...  2182.599 secs...
sEEG channel type selected for re-referencing
Creating RawArray with float64 data, n_channels=163, n_times=4365200
    Range : 0 ... 4365199 =      0.000 ...  2182.599 secs
Ready.
Added the following bipolar channels:
AL1-AL2, AL2-AL3, AL3-AL5, AL5-AL6, AL6-AL7, AL7-AL8, AL8-AL9, AL9-AL11, AL11-AL12, BL1-BL2, BL2-BL3, BL3-BL4, BL4-BL5, BL5-BL6, BL6-BL7, BL7-BL9, BL9-BL10, BL10-BL11, BL11-BL12, BL12-BL14, CL1-CL2, CL2-CL7, CL7-CL8, CL8-CL9, CL9-CL10, DL1-DL2, DL2-DL3, EL1-EL4, EL4-EL5, EL5-EL6, EL6-EL7, EL7-EL8, EL8-EL9, EL9-EL10, EL10-EL11, EL11-EL12, FL1-FL2, FL2-FL3, FL3-FL4, FL4-FL5, FL5-FL6, FL6-FL7, FL7-FL9, FL9-FL10, GL3-GL4, GL4-GL5, GL5-GL6, GL6-GL7, GL7-GL8, GL8-GL9, GL9-GL10, GL10-GL11, GL11-GL12, GL12-GL13, GL13-GL14, HL5-HL6, HL6-HL7, HL7-HL8, HL8-HL9, HL9-HL10, HL10-HL11, HL11-HL12, HL12-HL13, HL13-HL14, IL1-IL2, IL2-IL3, IL3-IL4, IL4-IL5, IL5-IL6, JL1-JL2, JL2-JL3, JL3-JL4, JL4-JL5, JL5-JL6, JL6-JL7, JL7-JL8, JL8-J

  bp_filt_fif.save(f"{preproc_data_dir}/{subject}_bp_filtered_clean_data.fif", overwrite = True)


Overwriting existing file.
Writing /home/brooke/pacman/preprocessing/BJH024/ieeg/BJH024_bp_filtered_clean_data-1.fif
Closing /home/brooke/pacman/preprocessing/BJH024/ieeg/BJH024_bp_filtered_clean_data-1.fif
Closing /home/brooke/pacman/preprocessing/BJH024/ieeg/BJH024_bp_filtered_clean_data.fif
[done]
Reading 0 ... 4365199  =      0.000 ...  2182.599 secs...
sEEG channel type selected for re-referencing
Creating RawArray with float64 data, n_channels=163, n_times=4365200
    Range : 0 ... 4365199 =      0.000 ...  2182.599 secs
Ready.
Added the following bipolar channels:
AL1-AL2, AL2-AL3, AL3-AL5, AL5-AL6, AL6-AL7, AL7-AL8, AL8-AL9, AL9-AL11, AL11-AL12, BL1-BL2, BL2-BL3, BL3-BL4, BL4-BL5, BL5-BL6, BL6-BL7, BL7-BL9, BL9-BL10, BL10-BL11, BL11-BL12, BL12-BL14, CL1-CL2, CL2-CL7, CL7-CL8, CL8-CL9, CL9-CL10, DL1-DL2, DL2-DL3, EL1-EL4, EL4-EL5, EL5-EL6, EL6-EL7, EL7-EL8, EL8-EL9, EL9-EL10, EL10-EL11, EL11-EL12, FL1-FL2, FL2-FL3, FL3-FL4, FL4-FL5, FL5-FL6, FL6-FL7, FL7-FL9, FL9-FL10, GL3-GL4, 

  bp_raw_fif.save(f"{preproc_data_dir}/{subject}_bp_raw_clean_data.fif", overwrite = True)


Overwriting existing file.
Writing /home/brooke/pacman/preprocessing/BJH024/ieeg/BJH024_bp_raw_clean_data-1.fif
Closing /home/brooke/pacman/preprocessing/BJH024/ieeg/BJH024_bp_raw_clean_data-1.fif
Closing /home/brooke/pacman/preprocessing/BJH024/ieeg/BJH024_bp_raw_clean_data.fif
[done]


In [8]:
# Visualize it #

# bp_raw_fif.plot(events=events, color='b', bad_color = 'cyan', n_channels = 1, clipping = None, event_color = 'r')

## Epoching the data 

In [6]:
bp_raw_fif = mne.io.Raw(f"{preproc_data_dir}/{subject}_bp_raw_clean_data.fif")
events = mne.find_events(bp_raw_fif, output='step', consecutive = False, stim_channel='STI')

Opening raw data file /home/brooke/pacman/preprocessing/BJH024/ieeg/BJH024_bp_raw_clean_data.fif...
    Range : 0 ... 2263999 =      0.000 ...  1131.999 secs
Ready.
Opening raw data file /home/brooke/pacman/preprocessing/BJH024/ieeg/BJH024_bp_raw_clean_data-1.fif...
    Range : 2264000 ... 4365199 =   1132.000 ...  2182.599 secs
Ready.


  bp_raw_fif = mne.io.Raw(f"{preproc_data_dir}/{subject}_bp_raw_clean_data.fif")


484 events found
Event IDs: [0 1 2]


In [7]:
channels_of_interest =  [x for x in bp_raw_fif.info['ch_names'] if x not in bp_raw_fif.info['bads']]
channels_of_interest

['AL13',
 'AL14',
 'BL15',
 'BL16',
 'CL12',
 'CL13',
 'CL14',
 'CL15',
 'CL16',
 'DL11',
 'DL12',
 'DL13',
 'DL14',
 'DL15',
 'DL16',
 'IL11',
 'IL12',
 'IL13',
 'IL14',
 'AR14',
 'CR16',
 'DR11',
 'DR12',
 'DR13',
 'DR15',
 'DR16',
 'IR12',
 'IR13',
 'IR14',
 'JR13',
 'JR14',
 'STI',
 'AL1-AL2',
 'AL2-AL3',
 'AL3-AL5',
 'AL5-AL6',
 'AL6-AL7',
 'AL7-AL8',
 'AL8-AL9',
 'AL9-AL11',
 'AL11-AL12',
 'BL1-BL2',
 'BL2-BL3',
 'BL3-BL4',
 'BL4-BL5',
 'BL5-BL6',
 'BL6-BL7',
 'BL7-BL9',
 'BL9-BL10',
 'BL10-BL11',
 'BL11-BL12',
 'BL12-BL14',
 'CL1-CL2',
 'CL2-CL7',
 'CL7-CL8',
 'CL8-CL9',
 'CL9-CL10',
 'DL1-DL2',
 'DL2-DL3',
 'EL1-EL4',
 'EL4-EL5',
 'EL5-EL6',
 'EL6-EL7',
 'EL7-EL8',
 'EL8-EL9',
 'EL9-EL10',
 'EL10-EL11',
 'EL11-EL12',
 'FL1-FL2',
 'FL2-FL3',
 'FL3-FL4',
 'FL4-FL5',
 'FL5-FL6',
 'FL6-FL7',
 'FL7-FL9',
 'FL9-FL10',
 'GL3-GL4',
 'GL4-GL5',
 'GL5-GL6',
 'GL6-GL7',
 'GL7-GL8',
 'GL8-GL9',
 'GL9-GL10',
 'GL10-GL11',
 'GL11-GL12',
 'GL12-GL13',
 'GL13-GL14',
 'HL5-HL6',
 'HL6-HL7',
 'H

In [67]:
## Epoching the raw data ##

# load data 
bp_raw_fif = mne.io.Raw(f"{preproc_data_dir}/{subject}_bp_raw_clean_data.fif")
events = mne.find_events(bp_raw_fif, output='step', consecutive = False, stim_channel='STI')

# create events
    
# only epoch good channels
channels_of_interest =  [x for x in bp_raw_fif.info['ch_names'] if x not in bp_raw_fif.info['bads']]

# epoch the data
epoched_data = mne.Epochs(bp_raw_fif, events, 
                          event_id = 1, tmin = -4, tmax = 12, 
                          baseline = None, picks = channels_of_interest,
                             reject_by_annotation = False)
# save the data
epoched_data.save(f"{preproc_data_dir}/{subject}_bp_clean_pres-locked_ieeg.fif", overwrite = True, split_size = '1.9GB')

Opening raw data file /home/brooke/pacman/preprocessing/BJH024/ieeg/BJH024_bp_raw_clean_data.fif...
    Range : 0 ... 2263999 =      0.000 ...  1131.999 secs
Ready.
Opening raw data file /home/brooke/pacman/preprocessing/BJH024/ieeg/BJH024_bp_raw_clean_data-1.fif...
    Range : 2264000 ... 4365199 =   1132.000 ...  2182.599 secs
Ready.


  bp_raw_fif = mne.io.Raw(f"{preproc_data_dir}/{subject}_bp_raw_clean_data.fif")


484 events found
Event IDs: [0 1 2]
Not setting metadata
240 matching events found
No baseline correction applied
0 projection items activated
Loading data for 240 events and 32001 original time points ...


  epoched_data.save(f"{preproc_data_dir}/{subject}_bp_clean_pres-locked_ieeg.fif", overwrite = True, split_size = '1.9GB')


0 bad epochs dropped
Loading data for 1 events and 32001 original time points ...
Splitting into 3 parts
Loading data for 80 events and 32001 original time points ...
Loading data for 80 events and 32001 original time points ...
Loading data for 80 events and 32001 original time points ...


## Last Away

In [68]:
## Load Behavioral Data ##
last_away_data = pd.read_csv(f"{raw_data_dir}/behave/{subject}_last_away_events.csv")
last_away_data

Unnamed: 0,trial_numeric,TrialType,sample,sample_before,event
0,3,4,65600,0,1
1,4,15,85500,0,1
2,5,11,99600,0,1
3,6,6,118600,0,1
4,10,12,220400,0,1
...,...,...,...,...,...
215,236,9,4201100,0,1
216,237,1,4213600,0,1
217,238,16,4236000,0,1
218,239,8,4245900,0,1


In [69]:
# filter to good epochs and fix trial indexing
last_away_data['trial_numeric'] = last_away_data['trial_numeric'] - 1

# create events
last_away_events = last_away_data[['sample', 'sample_before', 'event']].copy().to_numpy()


In [70]:
# epoch the data
last_away_epochs = mne.Epochs(bp_raw_fif, last_away_events, 
                          event_id = 1, tmin = -5, tmax =5, 
                          baseline = None, picks = channels_of_interest,
                             reject_by_annotation = False)
# # save the data
last_away_epochs.save(f"{preproc_data_dir}/{subject}_bp_raw_clean_last_away_events.fif", overwrite = True, split_size = '1.9GB')


Not setting metadata
220 matching events found
No baseline correction applied
0 projection items activated
Loading data for 220 events and 20001 original time points ...


  last_away_epochs.save(f"{preproc_data_dir}/{subject}_bp_raw_clean_last_away_events.fif", overwrite = True, split_size = '1.9GB')


0 bad epochs dropped
Loading data for 1 events and 20001 original time points ...
Splitting into 2 parts
Loading data for 110 events and 20001 original time points ...
Loading data for 110 events and 20001 original time points ...


## Last Dot

In [None]:
## Load Behavioral Data ##
last_dot_data = pd.read_csv(f"{raw_data_dir}/behave/{subject}_last_dot_events.csv")
last_dot_data

In [None]:
# filter to good epochs and fix trial indexing
last_dot_data['trial_numeric'] = last_dot_data['trial_numeric'] - 1

# create events
last_dot_events = last_dot_data[['sample', 'sample_before', 'event']].copy().to_numpy()


In [None]:
# epoch the data
last_dot_epochs = mne.Epochs(bp_raw_fif, last_dot_events, 
                          event_id = 1, tmin = -.75 * 4, tmax =.75 * 3, 
                          baseline = None, picks = channels_of_interest,
                             reject_by_annotation = False)
# # save the data
last_dot_epochs.save(f"{preproc_data_dir}/{subject}_bp_raw_clean_last_dot_events.fif", overwrite = True, split_size = '1.9GB')
