# Cleaning: SLCH018


This script cleans the data both by abnormal trials and by noisy channels and epileptic activity.


### Prep

In [1]:
import matplotlib
# matplotlib.use("Qt5Agg")
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from scipy import signal, stats
#import mat73
import re
#from neurodsp.timefrequency import compute_wavelet_transform
from BCI2kReader import BCI2kReader as b2k
import os
import mne
#from tabulate import tabulate
import IPython
import seaborn as sns

In [5]:
%matplotlib qt5

qt.qpa.xcb: X server does not support XInput 2
Qt: Session management error: Authentication Rejected, reason : None of the authentication protocols specified are supported and host-based authentication failed


qt.qpa.xcb: QXcbConnection: XCB error: 1 (BadRequest), sequence: 169, resource id: 132, major code: 130 (Unknown), minor code: 47


In [2]:
# helper functions#

def rle(inarray):
        """ run length encoding. Partial credit to R rle function. 
            Multi datatype arrays catered for including non Numpy
            returns: tuple (runlengths, startpositions, values) """
        ia = np.asarray(inarray)                # force numpy
        n = len(ia)
        if n == 0: 
            return (None, None, None)
        else:
            y = ia[1:] != ia[:-1]               # pairwise unequal (string safe)
            i = np.append(np.where(y), n - 1)   # must include last element posi
            z = np.diff(np.append(-1, i))       # run lengths
            p = np.cumsum(np.append(0, z))[:-1] # positions
            return(z, p, ia[i])
        
def find_bad_trial_times(idx, srate, trial_begins, trial_lengths):
    ''' takes and index of trial lengths, beginnings and the sampling rate and returns the the start time and and
    duration of the times to annotae as BAD '''
    start_time = trial_begins[0][idx]/srate
    duration = trial_lengths[0][idx]/srate
    
    return start_time, duration        

In [3]:
## Prep paths ##

subject = 'SLCH018'
raw_data_dir = f"/home/brooke/pacman/raw_data/{subject}"
preproc_data_dir = f"/home/brooke/pacman/preprocessing/{subject}/ieeg"

In [4]:
## Load Data ##


raw_fif = mne.io.Raw(f"{raw_data_dir}/ieeg/{subject}_raw_ieeg.fif")

Opening raw data file /home/brooke/pacman/raw_data/SLCH018/ieeg/SLCH018_raw_ieeg.fif...


    Range : 0 ... 2779999 =      0.000 ...  1389.999 secs
Ready.
Opening raw data file /home/brooke/pacman/raw_data/SLCH018/ieeg/SLCH018_raw_ieeg-1.fif...
    Range : 2780000 ... 2958799 =   1390.000 ...  1479.399 secs
Ready.


In [5]:
## Filtering ##

# load data #
raw_fif.load_data()

# highpass filter @ 1 and lowpass @ 200 #
filtered_data = raw_fif.filter(l_freq=1, h_freq=150) # skip this step for TF analysis, always do this on the least processed data, or do .1 to 250

# remove 60hz line noise #
freqs = (60, 120, 180, 240)
filtered_notch_data = filtered_data.notch_filter(freqs=freqs) 


Reading 0 ... 2958799  =      0.000 ...  1479.399 secs...


Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 1 - 1.5e+02 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 1.00
- Lower transition bandwidth: 1.00 Hz (-6 dB cutoff frequency: 0.50 Hz)
- Upper passband edge: 150.00 Hz
- Upper transition bandwidth: 37.50 Hz (-6 dB cutoff frequency: 168.75 Hz)
- Filter length: 6601 samples (3.300 s)



[Parallel(n_jobs=1)]: Done  17 tasks      | elapsed:    1.3s
[Parallel(n_jobs=1)]: Done  71 tasks      | elapsed:    5.1s
[Parallel(n_jobs=1)]: Done 161 tasks      | elapsed:   11.2s


Filtering raw data in 1 contiguous segment
Setting up band-stop filter

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandstop filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower transition bandwidth: 0.50 Hz
- Upper transition bandwidth: 0.50 Hz
- Filter length: 13201 samples (6.601 s)



[Parallel(n_jobs=1)]: Done  17 tasks      | elapsed:    1.4s
[Parallel(n_jobs=1)]: Done  71 tasks      | elapsed:    5.1s
[Parallel(n_jobs=1)]: Done 161 tasks      | elapsed:   11.2s


In [6]:
## Load/Save Filtered Data ##

# save #
# filtered_notch_data.save(f"{raw_data_dir}/{subject}_notched_filtered_ieeg.fif", overwrite = True)

# load #
# filtered_notch_data = mne.io.Raw(f"{raw_data_dir}/{subject}_notched_filtered_ieeg.fif")
events = mne.find_events(filtered_notch_data, output='step', consecutive = False, stim_channel='STI')

480 events found on stim channel STI
Event IDs: [0 1]


In [7]:
filtered_notch_data.info['ch_names']


['NA',
 'NA_2',
 'NA_3',
 'NA_4',
 'REF1',
 'FEF2',
 '`A1',
 '`A2',
 '`A3',
 '`A4',
 '`A5',
 '`A6',
 '`A7',
 '`A8',
 '`A9',
 '`A10',
 '`A11',
 '`A12',
 '`B1',
 '`B2',
 '`B3',
 '`B4',
 '`B5',
 '`B6',
 '`B7',
 '`B8',
 '`B9',
 '`B10',
 '`B11',
 '`B12',
 '`C1',
 '`C2',
 '`C3',
 '`C4',
 '`C5',
 '`C6',
 '`C7',
 '`C8',
 '`C9',
 '`C10',
 '`C11',
 '`C12',
 '`C13',
 '`C14',
 '`D1',
 '`D2',
 '`D3',
 '`D4',
 '`D5',
 '`D6',
 '`E1',
 '`E2',
 '`E3',
 '`E4',
 '`E5',
 '`E6',
 '`E7',
 '`E8',
 '`E9',
 '`E10',
 'A61',
 'A62',
 'A63',
 'A64',
 '`F1',
 '`F2',
 '`F3',
 '`F4',
 '`F5',
 '`F6',
 '`F7',
 '`F8',
 '`F9',
 '`F10',
 '`G1',
 '`G2',
 '`G3',
 '`G4',
 '`G5',
 '`G6',
 '`G7',
 '`G8',
 '`G9',
 '`G10',
 '`G11',
 '`G12',
 '`G13',
 '`G14',
 '`G15',
 '`G16',
 '`H1',
 '`H2',
 '`H3',
 '`H4',
 '`H5',
 '`H6',
 '`H7',
 '`H8',
 '`i1',
 '`i2',
 '`i3',
 '`i4',
 '`i5',
 '`i6',
 '`i7',
 '`i8',
 '`i9',
 '`i10',
 '`i11',
 '`i12',
 '`i13',
 '`i14',
 'J`1',
 'J`2',
 'J`3',
 'J`4',
 'J`5',
 'J`6',
 'J`7',
 'J`8',
 'J`9',
 'J

## Cleaning Neural Data

#### Bad Channels

Epileptic channels from my cleaning: no spikes but slowing in 'C1, 'C2, 'C3

Noisy channels: 

In [10]:
# Remove Channels from filtered data

# remove non seeg electrodes electrodes #
string_lst = ['EMPTY', 'REF', 'ekg']

bad_channels_reg = [re.findall(r"(?=("+'|'.join(string_lst)+r"))", name, re.IGNORECASE) for name in filtered_notch_data.info['ch_names']]
bad_channels_index = [i for i,x in enumerate(bad_channels_reg) if  x]
bad_channels = [filtered_notch_data.info['ch_names'][i] for i in bad_channels_index]

filtered_notch_data.info['bads'].extend(bad_channels)  

# # remove epileptic electrodes --- uncomment these 3 lines after inspection with BOB
bob_bad_channels =['NA', 'NA_2', 'NA_3', 'NA_4','`F4', '`F5','`i1', '`i2', '`C14','`E10','`C1', '`C2', '`C3']
filtered_notch_data.info['bads'].extend(bob_bad_channels)  


# remove out of brain electrodes
## TODO hasn't been localized

# remove scalp and dc channels
scalp_dc_channels = ['Fp1', 'Fp2', 'F7', 'F3', 'Fz', 'F4', 'F8', 'T7', 'C3', 'Cz', 'C4', 'T8', 'P7', 'P3', 'Pz', 'P4', 'P8', 'O1', 'O2']


filtered_notch_data.info['bads'].extend(scalp_dc_channels)
filtered_notch_data

0,1
Measurement date,Unknown
Experimenter,Unknown
Participant,Unknown

0,1
Digitized points,0 points
Good channels,"159 sEEG, 1 Stimulus"
Bad channels,"REF1, NA, NA_2, NA_3, NA_4, `F4, `F5, `i1, `i2, `C14, `E10, `C1, `C2, `C3, Fp1, Fp2, F7, F3, Fz, F4, F8, T7, C3, Cz, C4, T8, P7, P3, Pz, P4, P8, O1, O2"
EOG channels,Not available
ECG channels,Not available

0,1
Sampling frequency,2000.00 Hz
Highpass,1.00 Hz
Lowpass,150.00 Hz
Filenames,SLCH018_raw_ieeg.fif<br>SLCH018_raw_ieeg-1.fif
Duration,00:24:40 (HH:MM:SS)


#### Bad Epochs from noisy data



In [13]:
# Annotate Noisy Epochs from Bob's cleaning

# read from csv from interactive session
bad_annots = mne.read_annotations('../ieeg/saved_annotations.fif')   


#### Bad epochs from bad trial data

In [15]:
## quality check trials ##

# calculate trial onsets and offsets
sti_raw = filtered_notch_data.get_data(picks = ['STI'])
lengths, positionsm, val  = rle(sti_raw[0])

# lengths, beginnings, endings, for trials
trial_lengths = [lengths[x] for x in np.where(val == 1)]
trial_begins = [positionsm[x] for x in np.where(val == 1)]
trial_ends = trial_lengths[0] + trial_begins[0]
trial_baseline = trial_begins[0] - 1000

# lengths, beginnings, endings, for itis
iti_lengths = [lengths[x] for x in np.where(val == 0)]
iti_begins = [positionsm[x] for x in np.where(val == 0)]
iti_ends = iti_begins[0] + iti_lengths[0]

# save trial lengths for later
np.save(f"{preproc_data_dir}/{subject}_trial_lengths.npy", trial_lengths)

In [18]:
# exclude bad trials/apochs from r behavioral analysis (paused trials and no biscuits)

# load bad trial data
bad_trials = np.genfromtxt(f"{raw_data_dir}/behave/{subject}_bad_trials.csv", delimiter = ',', skip_header = 1)

onsets = []
durations = []
for bad in bad_trials:
    start, dur = find_bad_trial_times(int(bad), raw_fif.info['sfreq'], trial_begins, trial_lengths)
    onsets.append(start)
    durations.append(dur)

# update descriptions
descriptions = ['bad'] * len(durations)


In [19]:
# exclude all bad epochs from both bad trials and investigating neural data ##

# combine with bad trial data #
bad_annots.append(onsets, durations, descriptions)

# # apply to filtered data #
filtered_notch_data.set_annotations(bad_annots)

# # apply to not filtered data #
raw_fif.set_annotations(bad_annots)

0,1
Measurement date,Unknown
Experimenter,Unknown
Participant,Unknown

0,1
Digitized points,0 points
Good channels,"159 sEEG, 1 Stimulus"
Bad channels,"REF1, NA, NA_2, NA_3, NA_4, `F4, `F5, `i1, `i2, `C14, `E10, `C1, `C2, `C3, Fp1, Fp2, F7, F3, Fz, F4, F8, T7, C3, Cz, C4, T8, P7, P3, Pz, P4, P8, O1, O2"
EOG channels,Not available
ECG channels,Not available

0,1
Sampling frequency,2000.00 Hz
Highpass,1.00 Hz
Lowpass,150.00 Hz
Filenames,SLCH018_raw_ieeg.fif<br>SLCH018_raw_ieeg-1.fif
Duration,00:24:40 (HH:MM:SS)


## Visualizing Neural Data

In [10]:
# plot for cleaning #

filtered_notch_data.plot(events=events, color='b', bad_color = 'red', n_channels = 230, clipping = None, event_color = 'r')

Using qt as 2D backend.


qt.qpa.xcb: X server does not support XInput 2
Qt: Session management error: Authentication Rejected, reason : None of the authentication protocols specified are supported and host-based authentication failed
qt.qpa.xcb: QXcbConnection: XCB error: 1 (BadRequest), sequence: 169, resource id: 132, major code: 130 (Unknown), minor code: 47


<mne_qt_browser._pg_figure.MNEQtBrowser at 0x7fe233a3bac0>

qt.qpa.xcb: QXcbConnection: XCB error: 3 (BadWindow), sequence: 16688, resource id: 6408313, major code: 40 (TranslateCoords), minor code: 0


In [14]:
# ## From interactive sessions --- saving annotations from BOB
# interactive_annot = filtered_notch_data.annotations
# for x in range(0, len(filtered_notch_data.annotations)):
#     print(interactive_annot[x])
    
    
# filtered_notch_data.annotations.save('../ieeg/saved_annotations.fif', overwrite = True) 

OrderedDict([('onset', 267.47691623508354), ('duration', 0.31999253336249467), ('description', 'bad'), ('orig_time', None)])
OrderedDict([('onset', 708.5281063221374), ('duration', 7.081316247557993), ('description', 'bad'), ('orig_time', None)])
OrderedDict([('onset', 765.8638055001952), ('duration', 0.7703523951317948), ('description', 'bad'), ('orig_time', None)])
OrderedDict([('onset', 991.8237181849581), ('duration', 0.9599776000874272), ('description', 'bad'), ('orig_time', None)])
OrderedDict([('onset', 1030.2368959341334), ('duration', 1.7125526322545284), ('description', 'bad'), ('orig_time', None)])
OrderedDict([('onset', 1146.941580179323), ('duration', 2.9510422521204873), ('description', 'bad'), ('orig_time', None)])
OrderedDict([('onset', 1269.868341462116), ('duration', 0.7822039704415147), ('description', 'bad'), ('orig_time', None)])
OrderedDict([('onset', 1321.2486240470391), ('duration', 8.977568297113294), ('description', 'bad'), ('orig_time', None)])
OrderedDict([(

  filtered_notch_data.annotations.save('../ieeg/saved_annotations.fif', overwrite = True)


## Save Files

In [20]:
# save filtered data #
filtered_notch_data.save(f"{raw_data_dir}/ieeg/{subject}_notched_filtered_clean_ieeg.fif", overwrite = True)


# save raw clean data #
raw_fif.save(f"{raw_data_dir}/ieeg/{subject}_raw_clean_ieeg.fif", overwrite = True)


Writing /home/brooke/pacman/raw_data/SLCH018/ieeg/SLCH018_notched_filtered_clean_ieeg.fif
Closing /home/brooke/pacman/raw_data/SLCH018/ieeg/SLCH018_notched_filtered_clean_ieeg.fif
Writing /home/brooke/pacman/raw_data/SLCH018/ieeg/SLCH018_notched_filtered_clean_ieeg-1.fif
Closing /home/brooke/pacman/raw_data/SLCH018/ieeg/SLCH018_notched_filtered_clean_ieeg-1.fif
[done]
Writing /home/brooke/pacman/raw_data/SLCH018/SLCH018_raw_clean_ieeg.fif
Closing /home/brooke/pacman/raw_data/SLCH018/SLCH018_raw_clean_ieeg.fif
Writing /home/brooke/pacman/raw_data/SLCH018/SLCH018_raw_clean_ieeg-1.fif
Closing /home/brooke/pacman/raw_data/SLCH018/SLCH018_raw_clean_ieeg-1.fif
[done]


In [21]:
raw_fif

0,1
Measurement date,Unknown
Experimenter,Unknown
Participant,Unknown

0,1
Digitized points,0 points
Good channels,"159 sEEG, 1 Stimulus"
Bad channels,"REF1, NA, NA_2, NA_3, NA_4, `F4, `F5, `i1, `i2, `C14, `E10, `C1, `C2, `C3, Fp1, Fp2, F7, F3, Fz, F4, F8, T7, C3, Cz, C4, T8, P7, P3, Pz, P4, P8, O1, O2"
EOG channels,Not available
ECG channels,Not available

0,1
Sampling frequency,2000.00 Hz
Highpass,1.00 Hz
Lowpass,150.00 Hz
Filenames,SLCH018_raw_ieeg.fif<br>SLCH018_raw_ieeg-1.fif
Duration,00:24:40 (HH:MM:SS)


In [22]:
filtered_notch_data

0,1
Measurement date,Unknown
Experimenter,Unknown
Participant,Unknown

0,1
Digitized points,0 points
Good channels,"159 sEEG, 1 Stimulus"
Bad channels,"REF1, NA, NA_2, NA_3, NA_4, `F4, `F5, `i1, `i2, `C14, `E10, `C1, `C2, `C3, Fp1, Fp2, F7, F3, Fz, F4, F8, T7, C3, Cz, C4, T8, P7, P3, Pz, P4, P8, O1, O2"
EOG channels,Not available
ECG channels,Not available

0,1
Sampling frequency,2000.00 Hz
Highpass,1.00 Hz
Lowpass,150.00 Hz
Filenames,SLCH018_raw_ieeg.fif<br>SLCH018_raw_ieeg-1.fif
Duration,00:24:40 (HH:MM:SS)
