# Cleaning: ?


This script cleans the data both by abnormal trials and by noisy channels and epileptic activity. 


### Prep

In [None]:
import matplotlib
matplotlib.use("Qt5Agg")
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from scipy import signal, stats
import mat73
import re
from neurodsp.timefrequency import compute_wavelet_transform
from BCI2kReader import BCI2kReader as b2k
import os
import mne
from tabulate import tabulate
import IPython
import seaborn as sns

In [None]:
%matplotlib qt5

In [None]:
# helper functions#

def rle(inarray):
        """ run length encoding. Partial credit to R rle function. 
            Multi datatype arrays catered for including non Numpy
            returns: tuple (runlengths, startpositions, values) """
        ia = np.asarray(inarray)                # force numpy
        n = len(ia)
        if n == 0: 
            return (None, None, None)
        else:
            y = ia[1:] != ia[:-1]               # pairwise unequal (string safe)
            i = np.append(np.where(y), n - 1)   # must include last element posi
            z = np.diff(np.append(-1, i))       # run lengths
            p = np.cumsum(np.append(0, z))[:-1] # positions
            return(z, p, ia[i])
        
def find_bad_trial_times(idx, srate, trial_begins, trial_lengths):
    ''' takes and index of trial lengths, beginnings and the sampling rate and returns the the start time and and
    duration of the times to annotae as BAD '''
    start_time = trial_begins[0][idx]/srate
    duration = trial_lengths[0][idx]/srate
    
    return start_time, duration        

In [None]:
## Prep paths ##

subject = 'X'
raw_data_dir = f"/home/brooke/pacman/raw_data/{subject}"
preproc_data_dir = f"/home/brooke/pacman/preprocessing/{subject}/ieeg"

In [None]:
## Load Data ##

raw_fif = mne.io.Raw(f"{raw_data_dir}/ieeg/{subject}_raw_ieeg.fif")

In [None]:
## Filtering ##

# load data #
raw_fif.load_data()

# highpass filter @ 1 and lowpass @ 200 #
filtered_data = raw_fif.filter(l_freq=1, h_freq=150) # skip this step for TF analysis, always do this on the least processed data, or do .1 to 250

# remove 60hz line noise #
freqs = (60, 120, 180, 240)
filtered_notch_data = filtered_data.notch_filter(freqs=freqs) 


In [None]:
## Load/Save Filtered Data ##

# save #
# filtered_notch_data.save(f"{raw_data_dir}/ieeg/{subject}_notched_filtered_ieeg.fif", overwrite = True)

# load #
filtered_notch_data = mne.io.Raw(f"{raw_data_dir}/ieeg/{subject}_notched_filtered_ieeg.fif")
events = mne.find_events(filtered_notch_data, output='step', consecutive = False, stim_channel='STI')

In [None]:
filtered_notch_data.info['ch_names']

## Cleaning Neural Data

#### Bad Channels

Epileptic channels from my cleaning: 

Noisy channels: 

In [None]:
# Remove Channels from filtered data

# remove non seeg electrodes electrodes #
string_lst = ['EMPTY', 'REF', 'ekg']

bad_channels_reg = [re.findall(r"(?=("+'|'.join(string_lst)+r"))", name, re.IGNORECASE) for name in filtered_notch_data.info['ch_names']]
bad_channels_index = [i for i,x in enumerate(bad_channels_reg) if  x]
bad_channels = [filtered_notch_data.info['ch_names'][i] for i in bad_channels_index]

filtered_notch_data.info['bads'].extend(bad_channels)  

# # remove epileptic electrodes
filtered_notch_data.info['bads'].append('')  
filtered_notch_data.info['bads'].append('')  
filtered_notch_data.info['bads'].append('')  


# remove out of brain electrodes
## TODO hasn't been localized

# remove scalp and dc channels
scalp_dc_channels = []

filtered_notch_data.info['bads'].extend(scalp_dc_channels)
filtered_notch_data

In [None]:
# Remove Channels from filtered data

# remove non seeg electrodes electrodes #
string_lst = ['EMPTY', 'REF', 'ekg']

bad_channels_reg = [re.findall(r"(?=("+'|'.join(string_lst)+r"))", name, re.IGNORECASE) for name in raw_fif.info['ch_names']]
bad_channels_index = [i for i,x in enumerate(bad_channels_reg) if  x]
bad_channels = [raw_fif.info['ch_names'][i] for i in bad_channels_index]

raw_fif.info['bads'].extend(bad_channels)  

# # remove epileptic electrodes
# # remove epileptic electrodes
filtered_notch_data.info['bads'].append('')  
filtered_notch_data.info['bads'].append('')  
filtered_notch_data.info['bads'].append('')  

# remove out of brain electrodes
## TODO hasn't been localized

# remove scalp and dc channels
scalp_dc_channels = []

raw_fif.info['bads'].extend(scalp_dc_channels)
raw_fif

#### Bad Epochs from noisy data



In [11]:
# # Annotate Noisy Epochs from Bob's cleaning

# # read from csv from interactive session
# bad_annots = mne.read_annotations('../ieeg/saved_annotations.fif')   


#### Bad epochs from bad trial data

In [12]:
# ## quality check trials ##

# # calculate trial onsets and offsets
# sti_raw = filtered_notch_data.get_data(picks = ['STI'])
# lengths, positionsm, val  = rle(sti_raw[0])

# # lengths, beginnings, endings, for trials
# trial_lengths = [lengths[x] for x in np.where(val == 1)]
# trial_begins = [positionsm[x] for x in np.where(val == 1)]
# trial_ends = trial_lengths[0] + trial_begins[0]
# trial_baseline = trial_begins[0] - 1000

# # lengths, beginnings, endings, for itis
# iti_lengths = [lengths[x] for x in np.where(val == 0)]
# iti_begins = [positionsm[x] for x in np.where(val == 0)]
# iti_ends = iti_begins[0] + iti_lengths[0]

# # save trial lengths for later
# np.save(f"{preproc_data_dir}/{subject}_trial_lengths.npy", trial_lengths)

In [13]:
# # exclude bad trials/apochs from r behavioral analysis (paused trials and no biscuits)

# # load bad trial data
# bad_trials = np.genfromtxt(f"{raw_data_dir}/behave/{subject}_bad_trials.csv", delimiter = ',', skip_header = 1)

# onsets = []
# durations = []
# for bad in bad_trials:
#     start, dur = find_bad_trial_times(int(bad), raw_fif.info['sfreq'], trial_begins, trial_lengths)
#     onsets.append(start)
#     durations.append(dur)

# # update descriptions
# descriptions = ['bad'] * len(durations)


In [14]:
## exclude all bad epochs from both bad trials and investigating neural data ##

# # combine with bad trial data #
# bad_annots.append(onsets, durations, descriptions)

# # # apply to filtered data #
# filtered_notch_data.set_annotations(bad_annots)

# # # apply to not filtered data #
# raw_fif.set_annotations(bad_annots)

0,1
Measurement date,Unknown
Experimenter,Unknown
Digitized points,0 points
Good channels,"148 sEEG, 1 Stimulus"
Bad channels,"CH48, CH47, CH54, CH53, CH13, CH6, CH7, CH2, CH3, CH4, CH5, CH65, CH89, CH0, CH1, CH90, CH91, CH92, CH93, CH94, CH95, CH96, CH97, CH98, CH99, CH100, CH101, CH102, CH103, CH104, CH105, CH106, CH107, CH108, CH109, CH110, CH111, CH112, CH113, CH114, CH115, CH116, CH117, CH118, CH119, CH120, CH121, CH122, CH123, CH124, CH125, CH126, CH127, CH128, CH129, CH130, CH131, CH132, CH133, CH134, CH135, CH136, CH137, CH138, CH139, CH140, CH141, CH142, CH143, CH144, CH145, CH146, CH147"
EOG channels,Not available
ECG channels,Not available
Sampling frequency,512.00 Hz
Highpass,0.00 Hz
Lowpass,256.00 Hz


## Visualizing Neural Data

next step, working on filtering out the high frequency noise

then look at Marks script to come up with some plot of every trial, sort longest to shortest, with theta power as the color tiome as x, trial on y

In [None]:
# plot for cleaning #

filtered_notch_data.plot(events=events, color='b', bad_color = 'red', n_channels = 230, clipping = None, event_color = 'r')

In [None]:
# plot psd of all channels #

filtered_notch_data.plot_psd()

In [None]:
# plot psd of bad channels #

filtered_notch_data.plot_psd(picks = filtered_notch_data.info['bads'])

## Save Files

In [None]:
# save filtered data #
filtered_notch_data.save(f"{raw_data_dir}/ieeg/{subject}_notched_filtered_clean_ieeg.fif", overwrite = True)


# save raw clean data #
raw_fif.save(f"{raw_data_dir}/ieeg/{subject}_raw_clean_ieeg.fif", overwrite = True)
