# Cleaning: BJH030

Nothing useable, so much spiking activity

### Prep

In [1]:
import matplotlib
matplotlib.use("Qt5Agg")
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from scipy import signal, stats
import mat73
import re
from neurodsp.timefrequency import compute_wavelet_transform
import os
import mne
import IPython
import seaborn as sns

In [2]:
%matplotlib qt5

In [2]:
# helper functions#

def rle(inarray):
        """ run length encoding. Partial credit to R rle function. 
            Multi datatype arrays catered for including non Numpy
            returns: tuple (runlengths, startpositions, values) """
        ia = np.asarray(inarray)                # force numpy
        n = len(ia)
        if n == 0: 
            return (None, None, None)
        else:
            y = ia[1:] != ia[:-1]               # pairwise unequal (string safe)
            i = np.append(np.where(y), n - 1)   # must include last element posi
            z = np.diff(np.append(-1, i))       # run lengths
            p = np.cumsum(np.append(0, z))[:-1] # positions
            return(z, p, ia[i])
        
def find_bad_trial_times(idx, srate, trial_begins, trial_lengths):
    ''' takes and index of trial lengths, beginnings and the sampling rate and returns the the start time and and
    duration of the times to annotae as BAD '''
    start_time = trial_begins[0][idx]/srate
    duration = trial_lengths[0][idx]/srate
    
    return start_time, duration        

In [3]:
## Prep paths ##

subject = 'BJH030'
raw_data_dir = f"/home/brooke/pacman/raw_data/{subject}"
preproc_data_dir = f"/home/brooke/pacman/preprocessing/{subject}/ieeg"

In [4]:
## Load Data ##

raw_fif = mne.io.Raw(f"{raw_data_dir}/ieeg/{subject}_raw_ieeg.fif")

Opening raw data file /home/brooke/pacman/raw_data/BJH030/ieeg/BJH030_raw_ieeg.fif...
    Range : 0 ... 2085999 =      0.000 ...  1042.999 secs
Ready.
Opening raw data file /home/brooke/pacman/raw_data/BJH030/ieeg/BJH030_raw_ieeg-1.fif...
    Range : 2086000 ... 3081799 =   1043.000 ...  1540.899 secs
Ready.


In [20]:
## Filtering ##

# load data #
raw_fif.load_data()

# highpass filter @ 1 and lowpass @ 200 #
filtered_data = raw_fif.filter(l_freq=1, h_freq=150) # skip this step for TF analysis, always do this on the least processed data, or do .1 to 250

# remove 60hz line noise #
freqs = (60, 120, 180, 240)
filtered_notch_data = filtered_data.notch_filter(freqs=freqs) 


Reading 0 ... 3081799  =      0.000 ...  1540.899 secs...
Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 1 - 1.5e+02 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 1.00
- Lower transition bandwidth: 1.00 Hz (-6 dB cutoff frequency: 0.50 Hz)
- Upper passband edge: 150.00 Hz
- Upper transition bandwidth: 37.50 Hz (-6 dB cutoff frequency: 168.75 Hz)
- Filter length: 6601 samples (3.300 sec)



[Parallel(n_jobs=1)]: Done  17 tasks      | elapsed:    1.0s
[Parallel(n_jobs=1)]: Done  71 tasks      | elapsed:    4.0s
[Parallel(n_jobs=1)]: Done 161 tasks      | elapsed:    9.0s


Setting up band-stop filter

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandstop filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower transition bandwidth: 0.50 Hz
- Upper transition bandwidth: 0.50 Hz
- Filter length: 13201 samples (6.601 sec)



[Parallel(n_jobs=1)]: Done  17 tasks      | elapsed:    1.1s
[Parallel(n_jobs=1)]: Done  71 tasks      | elapsed:    4.8s
[Parallel(n_jobs=1)]: Done 161 tasks      | elapsed:   10.9s


In [5]:
## Load/Save Filtered Data ##

# save #
# filtered_notch_data.save(f"{raw_data_dir}/ieeg/{subject}_notched_filtered_ieeg.fif", overwrite = True)

# load #
filtered_notch_data = mne.io.Raw(f"{raw_data_dir}/ieeg/{subject}_notched_filtered_ieeg.fif")
events = mne.find_events(filtered_notch_data, output='step', consecutive = False, stim_channel='STI')

Opening raw data file /home/brooke/pacman/raw_data/BJH030/ieeg/BJH030_notched_filtered_ieeg.fif...
    Range : 0 ... 2085999 =      0.000 ...  1042.999 secs
Ready.
Opening raw data file /home/brooke/pacman/raw_data/BJH030/ieeg/BJH030_notched_filtered_ieeg-1.fif...
    Range : 2086000 ... 3081799 =   1043.000 ...  1540.899 secs
Ready.
482 events found
Event IDs: [0 1 2]


In [6]:
filtered_notch_data.info['ch_names']

['AR1',
 'AR2',
 'AR3',
 'AR4',
 'REF1',
 'REF2',
 'AR5',
 'AR6',
 'BR1',
 'BR2',
 'BR3',
 'BR4',
 'BR5',
 'BR6',
 'BR7',
 'BR8',
 'BR9',
 'BR10',
 'BR11',
 'BR12',
 'BR13',
 'BR14',
 'CR1',
 'CR2',
 'CR3',
 'CR4',
 'CR5',
 'CR6',
 'CR7',
 'CR8',
 'CR9',
 'CR10',
 'CR11',
 'CR12',
 'CR13',
 'CR14',
 'DR1',
 'DR2',
 'DR3',
 'DR4',
 'DR5',
 'DR6',
 'DR7',
 'DR8',
 'DR9',
 'DR10',
 'DR11',
 'DR12',
 'DR13',
 'DR14',
 'ER1',
 'ER2',
 'ER3',
 'ER4',
 'ER5',
 'ER6',
 'ER7',
 'ER8',
 'ER9',
 'ER10',
 'ER11',
 'ER12',
 'FR1',
 'FR2',
 'FR3',
 'FR4',
 'FR5',
 'FR6',
 'FR7',
 'FR8',
 'FR9',
 'FR10',
 'GR1',
 'GR2',
 'GR3',
 'GR4',
 'GR5',
 'GR6',
 'GR7',
 'GR8',
 'GR9',
 'GR10',
 'HR1',
 'HR2',
 'HR3',
 'HR4',
 'HR5',
 'HR6',
 'HR7',
 'HR8',
 'HR9',
 'HR10',
 'HR11',
 'HR12',
 'HR13',
 'HR14',
 'IR1',
 'IR2',
 'IR3',
 'IR4',
 'IR5',
 'IR6',
 'IR7',
 'IR8',
 'IR9',
 'IR10',
 'IR11',
 'IR12',
 'IR13',
 'IR14',
 'IR15',
 'IR16',
 'JAL1',
 'JAL2',
 'JAL3',
 'JAL4',
 'JAL5',
 'JAL6',
 'JAL7',
 'JAL8'

## Cleaning Neural Data

#### Bad Channels

Epileptic channels from my cleaning: 'OFL11', 'OFL12', 'FR2', 'FR3', 'FR1', 'FR2', 'FR3', 'FR4', 'DR2', 'DR3', 'DR3', 'DR4', 'DR1', 'DR2', 'OFL2', 'OFL3', 'OFL1', 'OFL2', 'OFL3', 'OFL4', 'OFL4', 'OFL5', 'LCL2', 'LCL4', 'LCL5','LCL6', 'LCL3', 'CR4', 'CR2', 'CR3', 'CR1', 'BR4', 'BR3', 'BR2', 'BR1', 'KBL1', 'KBL2', 'KBL3', 'KBL4', 'KBL5', 'MDL2', 'MDL3', 'MDL4', 'NEL2', 'NEL1', 'NEL4', 'NEL3', 'NEL6', 'NEL5', 'NEL8', 'NEL7', 'MDL6', 'MDL5', 'MDL9', 'MDL8', 'MDL7', 'PGL1', 'PGL2', 'PGL3', 'PGL5', 'PGL4', 'LCL7', 'LCL1', 'LCL9', 'LCL8', 'RIL2', 'RIL1', 'RIL4', 'RIL3'


Noisy channels: 

In [7]:
# Remove Channels from filtered data

# remove non seeg electrodes electrodes #
string_lst = ['EMPTY', 'REF', 'ekg']

bad_channels_reg = [re.findall(r"(?=("+'|'.join(string_lst)+r"))", name, re.IGNORECASE) for name in filtered_notch_data.info['ch_names']]
bad_channels_index = [i for i,x in enumerate(bad_channels_reg) if  x]
bad_channels = [filtered_notch_data.info['ch_names'][i] for i in bad_channels_index]

filtered_notch_data.info['bads'].extend(bad_channels)  

# # remove epileptic electrodes
filtered_notch_data.info['bads'].extend(['OFL11', 'OFL12', 'FR2', 'FR3', 'FR1', 'FR2', 'FR3', 'FR4', 'DR2', 'DR3', 'DR3', 'DR4', 'DR1', 'DR2', 'OFL2', 'OFL3', 'OFL1', 'OFL2', 'OFL3', 'OFL4', 'OFL4', 'OFL5', 'LCL2', 'LCL4', 'LCL5','LCL6', 'LCL3', 'CR4', 'CR2', 'CR3', 'CR1', 'BR4', 'BR3', 'BR2', 'BR1', 'KBL1', 'KBL2', 'KBL3', 'KBL4', 'KBL5', 'MDL2', 'MDL3', 'MDL4', 'NEL2', 'NEL1', 'NEL4', 'NEL3', 'NEL6', 'NEL5', 'NEL8', 'NEL7', 'MDL6', 'MDL5', 'MDL9', 'MDL8', 'MDL7', 'PGL1', 'PGL2', 'PGL3', 'PGL5', 'PGL4', 'LCL7', 'LCL1', 'LCL9', 'LCL8', 'RIL2', 'RIL1', 'RIL4', 'RIL3'])

# remove out of brain electrodes
## TODO hasn't been localized

# remove scalp and dc channels
scalp_dc_channels = [ 'EMPTY',
 'EMPTY_226',
 'EMPTY_227',
 'EMPTY_228',
 'EMPTY_229',
 'EMPTY_230',
 'EMPTY_231',
 'FP1',
 'F3',
 'C3',
 'P3',
 'O1',
 'FP2',
 'F4',
 'C4',
 'P4',
 'O2',
 'F7',
 'T7',
 'P7',
 'F8',
 'T8',
 'P8',
 'F9',
 'F10',
 'FPZ',
 'FZ',
 'CZ',
 'PZ',
 'OZ',
 'EKG1',
 'EKG2']

filtered_notch_data.info['bads'].extend(scalp_dc_channels)
filtered_notch_data

0,1
Measurement date,Unknown
Experimenter,Unknown
Digitized points,0 points
Good channels,"256 sEEG, 1 Stimulus"
Bad channels,"REF1, REF2, EMPTY, EMPTY_226, EMPTY_227, EMPTY_228, EMPTY_229, EMPTY_230, EMPTY_231, EKG1, EKG2, OFL11, OFL12, FR2, FR3, FR1, FR2, FR3, FR4, DR2, DR3, DR3, DR4, DR1, DR2, OFL2, OFL3, OFL1, OFL2, OFL3, OFL4, OFL4, OFL5, LCL2, LCL4, LCL5, LCL6, LCL3, CR4, CR2, CR3, CR1, BR4, BR3, BR2, BR1, KBL1, KBL2, KBL3, KBL4, KBL5, MDL2, MDL3, MDL4, NEL2, NEL1, NEL4, NEL3, NEL6, NEL5, NEL8, NEL7, MDL6, MDL5, MDL9, MDL8, MDL7, PGL1, PGL2, PGL3, PGL5, PGL4, LCL7, LCL1, LCL9, LCL8, RIL2, RIL1, RIL4, RIL3, EMPTY, EMPTY_226, EMPTY_227, EMPTY_228, EMPTY_229, EMPTY_230, EMPTY_231, FP1, F3, C3, P3, O1, FP2, F4, C4, P4, O2, F7, T7, P7, F8, T8, P8, F9, F10, FPZ, FZ, CZ, PZ, OZ, EKG1, EKG2"
EOG channels,Not available
ECG channels,Not available
Sampling frequency,2000.00 Hz
Highpass,1.00 Hz
Lowpass,150.00 Hz


In [8]:
# Remove Channels from filtered data

# remove non seeg electrodes electrodes #
string_lst = ['EMPTY', 'REF', 'ekg']

bad_channels_reg = [re.findall(r"(?=("+'|'.join(string_lst)+r"))", name, re.IGNORECASE) for name in raw_fif.info['ch_names']]
bad_channels_index = [i for i,x in enumerate(bad_channels_reg) if  x]
bad_channels = [raw_fif.info['ch_names'][i] for i in bad_channels_index]

raw_fif.info['bads'].extend(bad_channels)  

# # remove epileptic electrodes
raw_fif.info['bads'].extend(['OFL11', 'OFL12', 'FR2', 'FR3', 'FR1', 'FR2', 'FR3', 'FR4', 'DR2', 'DR3', 'DR3', 'DR4', 'DR1', 'DR2', 'OFL2', 'OFL3', 'OFL1', 'OFL2', 'OFL3', 'OFL4', 'OFL4', 'OFL5', 'LCL2', 'LCL4', 'LCL5','LCL6', 'LCL3', 'CR4', 'CR2', 'CR3', 'CR1', 'BR4', 'BR3', 'BR2', 'BR1', 'KBL1', 'KBL2', 'KBL3', 'KBL4', 'KBL5', 'MDL2', 'MDL3', 'MDL4', 'NEL2', 'NEL1', 'NEL4', 'NEL3', 'NEL6', 'NEL5', 'NEL8', 'NEL7', 'MDL6', 'MDL5', 'MDL9', 'MDL8', 'MDL7', 'PGL1', 'PGL2', 'PGL3', 'PGL5', 'PGL4', 'LCL7', 'LCL1', 'LCL9', 'LCL8', 'RIL2', 'RIL1', 'RIL4', 'RIL3'])

# remove out of brain electrodes
## TODO hasn't been localized

# remove scalp and dc channels
scalp_dc_channels = [ 'EMPTY',
 'EMPTY_226',
 'EMPTY_227',
 'EMPTY_228',
 'EMPTY_229',
 'EMPTY_230',
 'EMPTY_231',
 'FP1',
 'F3',
 'C3',
 'P3',
 'O1',
 'FP2',
 'F4',
 'C4',
 'P4',
 'O2',
 'F7',
 'T7',
 'P7',
 'F8',
 'T8',
 'P8',
 'F9',
 'F10',
 'FPZ',
 'FZ',
 'CZ',
 'PZ',
 'OZ',
 'EKG1',
 'EKG2']

raw_fif.info['bads'].extend(scalp_dc_channels)
raw_fif

0,1
Measurement date,Unknown
Experimenter,Unknown
Digitized points,0 points
Good channels,"256 sEEG, 1 Stimulus"
Bad channels,"REF1, REF2, EMPTY, EMPTY_226, EMPTY_227, EMPTY_228, EMPTY_229, EMPTY_230, EMPTY_231, EKG1, EKG2, OFL11, OFL12, FR2, FR3, FR1, FR2, FR3, FR4, DR2, DR3, DR3, DR4, DR1, DR2, OFL2, OFL3, OFL1, OFL2, OFL3, OFL4, OFL4, OFL5, LCL2, LCL4, LCL5, LCL6, LCL3, CR4, CR2, CR3, CR1, BR4, BR3, BR2, BR1, KBL1, KBL2, KBL3, KBL4, KBL5, MDL2, MDL3, MDL4, NEL2, NEL1, NEL4, NEL3, NEL6, NEL5, NEL8, NEL7, MDL6, MDL5, MDL9, MDL8, MDL7, PGL1, PGL2, PGL3, PGL5, PGL4, LCL7, LCL1, LCL9, LCL8, RIL2, RIL1, RIL4, RIL3, EMPTY, EMPTY_226, EMPTY_227, EMPTY_228, EMPTY_229, EMPTY_230, EMPTY_231, FP1, F3, C3, P3, O1, FP2, F4, C4, P4, O2, F7, T7, P7, F8, T8, P8, F9, F10, FPZ, FZ, CZ, PZ, OZ, EKG1, EKG2"
EOG channels,Not available
ECG channels,Not available
Sampling frequency,2000.00 Hz
Highpass,0.00 Hz
Lowpass,1000.00 Hz


#### Bad Epochs from noisy data



In [9]:
# # Annotate Noisy Epochs from Bob's cleaning

# # read from csv from interactive session
bad_annots = mne.read_annotations('../ieeg/saved_annotations.fif')   


#### Bad epochs from bad trial data

In [10]:
## quality check trials ##

# calculate trial onsets and offsets
sti_raw = filtered_notch_data.get_data(picks = ['STI'])
lengths, positionsm, val  = rle(sti_raw[0])

# lengths, beginnings, endings, for trials
trial_lengths = [lengths[x] for x in np.where(val == 1)]
trial_begins = [positionsm[x] for x in np.where(val == 1)]
trial_ends = trial_lengths[0] + trial_begins[0]
trial_baseline = trial_begins[0] - 1000

# lengths, beginnings, endings, for itis
iti_lengths = [lengths[x] for x in np.where(val == 0)]
iti_begins = [positionsm[x] for x in np.where(val == 0)]
iti_ends = iti_begins[0] + iti_lengths[0]

# save trial lengths for later
np.save(f"{preproc_data_dir}/{subject}_trial_lengths.npy", trial_lengths)

In [11]:
# exclude bad trials/apochs from r behavioral analysis (paused trials and no biscuits)

# load bad trial data
bad_trials = np.genfromtxt(f"{raw_data_dir}/behave/{subject}_bad_trials.csv", delimiter = ',', skip_header = 1)

onsets = []
durations = []
for bad in bad_trials:
    start, dur = find_bad_trial_times(int(bad), raw_fif.info['sfreq'], trial_begins, trial_lengths)
    onsets.append(start)
    durations.append(dur)

# update descriptions
descriptions = ['bad'] * len(durations)


In [12]:
## exclude all bad epochs from both bad trials and investigating neural data ##

# combine with bad trial data #
bad_annots.append(onsets, durations, descriptions)

# # # apply to filtered data #
filtered_notch_data.set_annotations(bad_annots)

# # # apply to not filtered data #
raw_fif.set_annotations(bad_annots)

0,1
Measurement date,Unknown
Experimenter,Unknown
Digitized points,0 points
Good channels,"256 sEEG, 1 Stimulus"
Bad channels,"REF1, REF2, EMPTY, EMPTY_226, EMPTY_227, EMPTY_228, EMPTY_229, EMPTY_230, EMPTY_231, EKG1, EKG2, OFL11, OFL12, FR2, FR3, FR1, FR2, FR3, FR4, DR2, DR3, DR3, DR4, DR1, DR2, OFL2, OFL3, OFL1, OFL2, OFL3, OFL4, OFL4, OFL5, LCL2, LCL4, LCL5, LCL6, LCL3, CR4, CR2, CR3, CR1, BR4, BR3, BR2, BR1, KBL1, KBL2, KBL3, KBL4, KBL5, MDL2, MDL3, MDL4, NEL2, NEL1, NEL4, NEL3, NEL6, NEL5, NEL8, NEL7, MDL6, MDL5, MDL9, MDL8, MDL7, PGL1, PGL2, PGL3, PGL5, PGL4, LCL7, LCL1, LCL9, LCL8, RIL2, RIL1, RIL4, RIL3, EMPTY, EMPTY_226, EMPTY_227, EMPTY_228, EMPTY_229, EMPTY_230, EMPTY_231, FP1, F3, C3, P3, O1, FP2, F4, C4, P4, O2, F7, T7, P7, F8, T8, P8, F9, F10, FPZ, FZ, CZ, PZ, OZ, EKG1, EKG2"
EOG channels,Not available
ECG channels,Not available
Sampling frequency,2000.00 Hz
Highpass,0.00 Hz
Lowpass,1000.00 Hz


## Visualizing Neural Data

next step, working on filtering out the high frequency noise

then look at Marks script to come up with some plot of every trial, sort longest to shortest, with theta power as the color tiome as x, trial on y

In [14]:
# plot for cleaning #
filtered_notch_data.drop_channels(filtered_notch_data.info['bads'])
filtered_notch_data.plot(events=events, color='b', bad_color = 'red', n_channels = 230, clipping = None, event_color = 'r')

Using qt as 2D backend.


<mne_qt_browser._pg_figure.MNEQtBrowser at 0x7f6ca4059040>

In [None]:
# plot psd of all channels #

filtered_notch_data.plot_psd()

In [None]:
# plot psd of bad channels #

filtered_notch_data.plot_psd(picks = filtered_notch_data.info['bads'])

In [17]:
# # From interactive sessions
# interactive_annot = filtered_notch_data.annotations
# for x in range(0, len(filtered_notch_data.annotations)):
#     print(interactive_annot[x])
    
    
# filtered_notch_data.annotations.save('../ieeg/saved_annotations.fif', overwrite = True)    

OrderedDict([('onset', 6.220399854039831), ('duration', 1.1461714263765392), ('description', 'BAD'), ('orig_time', None)])
OrderedDict([('onset', 11.681806833240508), ('duration', 1.0109662179031673), ('description', 'BAD'), ('orig_time', None)])
OrderedDict([('onset', 80.51719833298122), ('duration', 2.7501968541742627), ('description', 'BAD'), ('orig_time', None)])
OrderedDict([('onset', 127.16861280224319), ('duration', 0.7897213313103748), ('description', 'BAD'), ('orig_time', None)])
OrderedDict([('onset', 177.00575198294567), ('duration', 0.958727841902089), ('description', 'BAD'), ('orig_time', None)])
OrderedDict([('onset', 203.7516084426434), ('duration', 1.1246615068467065), ('description', 'BAD'), ('orig_time', None)])
OrderedDict([('onset', 207.44209606483705), ('duration', 0.6299333576600361), ('description', 'BAD'), ('orig_time', None)])
OrderedDict([('onset', 285.56151452879834), ('duration', 1.2537210240258219), ('description', 'BAD'), ('orig_time', None)])
OrderedDict(

  filtered_notch_data.annotations.save('../ieeg/saved_annotations.fif', overwrite = True)


## Save Files

In [13]:
# save filtered data #
filtered_notch_data.save(f"{raw_data_dir}/ieeg/{subject}_notched_filtered_clean_ieeg.fif", overwrite = True)


# save raw clean data #
raw_fif.save(f"{raw_data_dir}/ieeg/{subject}_raw_clean_ieeg.fif", overwrite = True)


Overwriting existing file.
Writing /home/brooke/pacman/raw_data/BJH030/ieeg/BJH030_notched_filtered_clean_ieeg.fif
Overwriting existing file.
Writing /home/brooke/pacman/raw_data/BJH030/ieeg/BJH030_notched_filtered_clean_ieeg-1.fif
Closing /home/brooke/pacman/raw_data/BJH030/ieeg/BJH030_notched_filtered_clean_ieeg-1.fif
Closing /home/brooke/pacman/raw_data/BJH030/ieeg/BJH030_notched_filtered_clean_ieeg.fif
[done]
Overwriting existing file.
Writing /home/brooke/pacman/raw_data/BJH030/ieeg/BJH030_raw_clean_ieeg.fif
Overwriting existing file.
Writing /home/brooke/pacman/raw_data/BJH030/ieeg/BJH030_raw_clean_ieeg-1.fif
Closing /home/brooke/pacman/raw_data/BJH030/ieeg/BJH030_raw_clean_ieeg-1.fif
Closing /home/brooke/pacman/raw_data/BJH030/ieeg/BJH030_raw_clean_ieeg.fif
[done]
