# Preprocess Continuous Data - Resting State
10/01/2023

In [1]:
import numpy as np
import mne
from glob import glob
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
import seaborn as sns
from scipy.stats import zscore, linregress, ttest_ind, ttest_rel, ttest_1samp
import pandas as pd
from mne.preprocessing.bads import _find_outliers
import os 
import joblib
import emd
import re

import warnings
warnings.filterwarnings('ignore')

In [2]:
import sys
sys.path.append('/Users/christinamaher/Documents/GitHub/LFPAnalysis')

In [3]:
from LFPAnalysis import lfp_preprocess_utils, sync_utils, analysis_utils, nlx_utils

In [4]:
# Specify root directory for un-archived data and results 
base_dir = '/Users/christinamaher/Documents/Github/resting_state_ieeg' # this is the root directory for most un-archived data and results 

subject_id = 'MS018'

# I have saved most of my raw data in the 'projects directory'
neural_dir = f'{base_dir}/preprocess/raw_data/{subject_id}'
anat_dir = f'{base_dir}/anat'
save_dir = f'{base_dir}preprocess/clean_data/{subject_id}'
os.makedirs(save_dir,exist_ok = True) #added so you don't have to manually make subject folders in clean_data



#### Import Data

In [5]:
edf_files = glob(f'{neural_dir}/*.edf')

mne_data = mne.io.read_raw_edf(edf_files[0], preload=True)
mne_data

0,1
Measurement date,"January 01, 2001 19:00:21 GMT"
Experimenter,Unknown
Digitized points,Not available
Good channels,276 EEG
Bad channels,
EOG channels,Not available
ECG channels,Not available
Sampling frequency,1024.00 Hz
Highpass,0.00 Hz
Lowpass,512.00 Hz


In [None]:
mne_data.ch_names

In [None]:
# Sanity check one channel data
plt.plot(mne_data._data[0,:4999])
plt.title("Raw iEEG, electrode 0, samples 0-4999")
plt.show()

# Import Anat Recon Info - check all elecs are present in data + recon sheet


In [None]:
# Load the electrode localization data
anat_file = glob(f'{anat_dir}/{subject_id}_labels.csv')[0]
elec_locs = pd.read_csv(anat_file)
# Sometimes there's extra columns with no entries: 
elec_locs = elec_locs[elec_locs.columns.drop(list(elec_locs.filter(regex='Unnamed')))]
elec_locs = elec_locs.dropna(axis=0, how = 'all') #some recons have a bunch of empty rows at the end 
elec_locs

In [None]:
elec_locs.label # pulls electrode names

### Fix edf channel names

In [None]:
new_mne_names, unmatched_names, unmatched_seeg = lfp_preprocess_utils.match_elec_names(mne_data.ch_names, elec_locs.label)

In [None]:
new_name_dict = {x:y for (x,y) in zip(mne_data.ch_names, new_mne_names)}
new_name_dict

In [None]:
# Rename the mne data according to the localization data
mne_data.rename_channels(new_name_dict)

In [None]:
unmatched_seeg #make sure there are no unmatched names

In [None]:
anat_names = list(elec_locs.label.str.lower())
sum([ch not in mne_data.ch_names for ch in anat_names]) #if there are no missing channels, sum = 0. if sum >0, find the missing elecs
print([ch for ch in mne_data.ch_names if ch not in anat_names ]) #print extra channels in mne_data.ch_names and make sure none of them are neural channels (will be EEG etc.)

In [None]:
# Note, there is surface EEG data that we should separately indicate from the sEEG:
right_seeg_names = [i for i in mne_data.ch_names if i.startswith('r')]
left_seeg_names = [i for i in mne_data.ch_names if i.startswith('l')]
print(f'We have a total of', len(left_seeg_names), 'left &', len(right_seeg_names), 'right sEEG electrodes')
print(f'We have a total of {len(left_seeg_names) + len(right_seeg_names)} sEEG electrodes')


In [None]:
drop_chans = list(set(mne_data.ch_names)^set(left_seeg_names+right_seeg_names)) # it is either called DC1 or research
mne_data.drop_channels(drop_chans) #number of chans should = number of seegs 

In [None]:
# Set channel types:
sEEG_mapping_dict = {f'{x}':'seeg' for x in left_seeg_names+right_seeg_names}
mne_data.set_channel_types(sEEG_mapping_dict)

In [None]:
# make montage (convert mm to m)

montage = mne.channels.make_dig_montage(ch_pos=dict(zip(elec_locs.label, 
                                                        elec_locs[['mni_x', 'mni_y', 'mni_z']].to_numpy(dtype=float)/1000)),
                                        coord_frame='mni_tal')

mne_data.set_montage(montage, match_case=False, on_missing='warn')

### Notch filter line noise

In [None]:
# Identify line noise
mne_data.info['line_freq'] = 60

# Notch out 60 Hz noise and harmonics 
mne_data.notch_filter(freqs=(60, 120, 180, 240))

### Resampling data 

In [None]:
#all patients should be resampled to 500 Hz
resample_sr = 500
mne_data.resample(sfreq=resample_sr, npad='auto', n_jobs=-1)

### Bad Channel Removal (manual)

Let's pick out any bad channels missed by automatic screening (visual inspection as a reference), or restore channels that were erroneously deemed bad. You have to press the "power" button twice (once for the plot and once for the panel beneath it) when you're done so that you're manual changes are saved.

In [None]:
%matplotlib notebook
%matplotlib notebook
fig = mne_data.plot(start=0, duration=120, n_channels=50, scalings=mne_data._data.max()/20)
fig.fake_keypress('a')

In [None]:
mne_data.info['bads'] #sanity check that bads info saved
len(mne_data.info['bads']) # number of bad electrodes 

## Save raw LFP data
Notch filtered and resampled with bad elecs indicated

In [None]:
#### important - check anat file to see if manual examination has a space in column name!

#define oob elecs as bad before saving out lfp file 
oob_elec = [elec_locs['label'].iloc[ind].lower() for ind, data in elec_locs['Manual Examination'].str.lower().items() if data=='oob']
oob_elec

In [None]:
bad_ch= mne_data.info['bads']
bad_ch = bad_ch + oob_elec
mne_data.info['bads'] = list(np.unique(bad_ch)) #updated so no duplicates in bad elecs
mne_data.info['bads'] # make sure an WM referenced pairs that include these channels is excluded. 

In [None]:
mne_data.save(f'{save_dir}/{subject_id}_raw_ieeg.fif',overwrite=True) #updated to add subject name to file & save to clean_data 

# Rereference data 

### WM REF

In [None]:
anode_list, cathode_list, drop_wm_channels, oob_channels = lfp_preprocess_utils.wm_ref(mne_data=mne_data, 
                                                                                elec_path=anat_file, 
                                                                                unmatched_seeg = unmatched_seeg,
                                                                                bad_channels=mne_data.info['bads'],
                                                                                      site = 'MSSM')

In [10]:
def create_clean_anode_cathode_lists(al, cl, bc):
    """
    This function takes two lists of electrode names, 'al' and 'cl', and returns their cleaned version (eliminating electrodes that have been marked as OOB or noisy)

    Args:
        al (list of strings): anode list output from wm_ref().
        cl (list of strings): cathode list output from wm_ref().
        bc (list of strings): electrode list containing the names of all electrodes that were identified as OOB and noisy.

    Returns:
        anode_list_clean (list of strings): all clean in-brain anode electrodes
        cathode_list_clean (list of strings): all clean in-brain cathode electrodes
        removed_cathode_list (list of strings): cathodes that are removed to align with the clean anode list.
    """
    anode_list_clean = []
    removed_anode_index = []
    for i, ch in enumerate(al):

        if ch not in bc:  # You should have 'bad_ch' defined elsewhere
            anode_list_clean.append(ch)
        else:
            removed_anode_index.append(i)

    cathode_list_update = [cl[i] for i in range(len(cl)) if i not in removed_anode_index]
    removed_cathode_list = [cathode_list[i] for i in range(len(cathode_list)) if i in removed_anode_index]

    cathode_list_clean = []
    for ch in cathode_list_update:
        if ch not in bc:  # You should have 'bad_ch' defined elsewhere
            cathode_list_clean.append(ch)

    return anode_list_clean, cathode_list_clean, removed_cathode_list

In [None]:
anode_list_clean, cathode_list_clean, removed_cathode_list = create_clean_anode_cathode_lists(al=anode_list, cl=cathode_list, bc=bad_ch)

In [None]:
mne_data_wm_reref = mne.set_bipolar_reference(mne_data, 
                      anode=anode_list, 
                      cathode=cathode_list,
                      copy=True)

mne_data_wm_reref #none of the bad channels should be rereferenced (see above) - should we drop these before saving?

In [None]:
mne_data.drop_channels(mne_data.info['bads']) # now make sure the bad channels (OOB and noisy) are dropped the bad ones
mne_data_wm_reref.drop_channels(drop_wm_channels)
mne_data_wm_reref.drop_channels(removed_cathode_list) # you need to drop any that still remain at this point (otherwise they will remain in the dataframe as a single, non-rereferenced elec)

To start annotating, press 'Add new label' in the bottom panel. Then left click and drag around window of interest. 

In [None]:
%matplotlib notebook
%matplotlib notebook
# use the epoch code to select only the WM referenced pairs
fig = mne_data_wm_reref.plot(start=2, duration=50, n_channels=20,scalings=mne_data._data.max()/20 ) # plot all channels at once
fig.fake_keypress("a")

In [None]:
### function to eliminate need to specifically define good epochs! 

def join_good_segs(mne_data):
    #creates indices of good epochs after labeling bad times manually, then crops good epochs and joins data 
    
    ### get good times: 
    good_start = list([mne_data_wm_reref.first_time]) #first timepoint in recording (should be 0)
    good_end = []
    
    for annot in mne_data.annotations:
        bad_start = mne_data.time_as_index(annot['onset']) #onset is start time of bad epoch 
        # ^ start time of bad epoch converted to index, then subtract 1 for end of good epoch
        bad_end = mne_data.time_as_index(annot['onset'] + annot['duration']) #onset + duration = end time of bad epoch
        # ^ end time of bad epoch converted to index 
        # must get bad start and end as indices so you can +-1 for good epochs - cannot +-1 using time only indexes

        good_end.append(mne_data.times[bad_start - 1]) #the start time of a bad epoch is the end of a good epoch - 1
        good_start.append(mne_data.times[bad_end+1]) #the end time of a bad epoch is the start of a good epoch +1 index
        #convert to integers before appending - indexing np arrays later is annoying
                          
    good_end.append(mne_data.times[mne_data.last_samp]) #index of last timepoint in recording (should = mne_data.n_times)
    
    ### get good data epochs and concatenate 
    good_segs = []
    for start,end in list(zip(good_start,good_end)):
        good_segs.append(mne_data.copy().crop(tmin=float(start), tmax=float(end),
                include_tmax=True))
    
    return mne.concatenate_raws(good_segs)
    
#derived from: 
    # source: https://mne.discourse.group/t/removing-time-segments-from-raw-object-without-epoching/4169/2
    # source: https://github.com/mne-tools/mne-python/blob/maint/1.5/mne/io/base.py#L681-L742
    

In [None]:
mne_data_wm_reref_clean = join_good_segs(mne_data_wm_reref)
mne_data_wm_reref_clean

## Save reref data

In [None]:
mne_data_wm_reref_clean.save(f'{save_dir}/{subject_id}_wm_ref_ieeg.fif',overwrite=True)