# Preprocess Pancreatitis EEG data

In [14]:
import sys
sys.path.append('/home/wanglab/Documents/George Kenefati/Code/eeg_toolkit/')
import os
import mne
import glob
import eeg_toolkit
from eeg_toolkit.preprocess import _preprocess
from eeg_toolkit.source_localization import source_localization

In [15]:
# Settings
times_tup,time_win_path = _preprocess.get_time_window(5)

# Read in processed raw data
data_path = '../Data/Raw/'
save_path = '../Data/Processed Data/'
epo_path = save_path + time_win_path

if not os.path.exists(epo_path):
    os.makedirs(epo_path)

# Noise cov: crop raw during eyes-open resting condition
noise_cov_win = 5.5, 7.5 # rest_min, rest_max

[-2.5,0.0,2.5]


In [16]:
from mne.preprocessing import ICA
from pyprep.find_noisy_channels import NoisyChannels

from IPython import display
RANDOM_STATE = 42  

def clear_display():  
    display.clear_output(wait=True)  

RESAMPLE_FREQ = 400  

def load_raw_data(eeg_data_raw_file, eog):  
    """  
    Load raw EDF data with specified EOG channel.  
    """  
    return mne.io.read_raw_edf(eeg_data_raw_file, eog=[eog], preload=True)  

def set_montage(mne_obj, montage):  
    """  
    Set custom montage for Raw or Epochs object.  
    """
    print("setting custom montage...")
    print(montage)
    if type(montage)==str:
        relative_path = os.path.join(os.path.dirname(__file__),montage_path)
        montage = mne.channels.read_custom_montage(relative_path)
        mne_obj.set_montage(montage,on_missing='ignore')  
    else:
        mne_obj.set_montage(montage,on_missing='ignore')  

def get_time_window(peri_stim_time_win=None):
    """
    Get the tmin,tmax,bmax for any custom time window.
    Also get the custom save path.
    """    
    bmax=0.
    if peri_stim_time_win==None:
        t_win = float(input("Please enter the peri-stimulus time window."+
        "\nEx: '0 (default)' = [-0.2,0.8], '2' = [-1.0,1.0], etc...\n\n>> "))
    else: 
        t_win = float(peri_stim_time_win)
        
    if t_win==0.:
        tmin,tmax = -0.2,0.8
        time_win_path=''
    else:
        tmin,tmax = -t_win/2,t_win/2
    print(f"[{tmin},{bmax},{tmax}]")
    time_win_path=f'{int(t_win)}_sec_time_window/'
    # print(time_win_path)
    return (tmin,bmax,tmax),time_win_path

def make_sub_time_win_path(sub_id,save_path_cont,save_path_zepo,
                          include_zepochs=True):
    """
    Make a subject's time window data path
    """
    subpath_cont =  os.path.join(save_path_cont,sub_id)
    if not os.path.exists(subpath_cont): # continuous
        os.mkdir(subpath_cont)
    if include_zepochs:
        subpath_zepo =  os.path.join(save_path_zepo,sub_id)
        if not os.path.exists(subpath_zepo): # zepochs
            os.mkdir(subpath_zepo)
    return subpath_cont,subpath_zepo

In [17]:
# Get stc only from selected labels
roi_names = [# Left
             'rostralanteriorcingulate-lh', # Left Rostral ACC
             'caudalanteriorcingulate-lh', # Left Caudal ACC
             'postcentral-lh', # Left S1,
             'insula-lh', 'superiorfrontal-lh', # Left Insula, Left DL-PFC,
             'medialorbitofrontal-lh', # Left Medial-OFC
             # Right
             'rostralanteriorcingulate-rh', # Right Rostral ACC
             'caudalanteriorcingulate-rh', # Right Caudal ACC
             'postcentral-rh', # , Right S1
             'insula-rh', 'superiorfrontal-rh', # Right Insula, Right DL-PFC
             'medialorbitofrontal-rh'] # Right Medial-OFC

In [20]:
sub_ids = [
    '001',
    # '002',
    # 'T01',
    # 'T02',
    # 'T03',
]

In [24]:
sub_id=sub_ids[0]

In [34]:
path = os.getcwd()
print("Current Directory", path)
 
# prints parent directory
print(x)


Current Directory /home/wanglab/Documents/George Kenefati/Pancreatitis Pain Study/Code
/home/wanglab/Documents/George Kenefati/Pancreatitis Pain Study


In [37]:
os.path.abspath(os.path.join(os.getcwd(), os.pardir))

'/home/wanglab/Documents/George Kenefati/Pancreatitis Pain Study'

In [25]:
"""  
Preprocess raw EDF data to filtered FIF format.  
"""  
for sub_folder in os.listdir(data_path):  
    if sub_folder.startswith(sub_id):  
        save_fname_fif = sub_id + '_preprocessed-raw.fif'  
        print(sub_id, save_fname_fif)  
        break  

eeg_data_raw_file = os.path.join(data_path, sub_folder, next(subfile for subfile in os.listdir(os.path.join(data_path,sub_folder)) if (subfile.endswith(('.edf', '.EDF')))))  

# read data, set EOG channel, and drop unused channels
print(f"{sub_id}\nreading raw file...")
raw = load_raw_data(eeg_data_raw_file, 'eog')

montage_fname = '../montages/Hydro_Neo_Net_64_xyz_cms_No_FID.sfp'
Fp1_eog_flag=0
# 32 channel case
if 'X' in raw.ch_names and len(raw.ch_names)<64:  
    raw = load_raw_data(eeg_data_raw_file, 'Fp1')  
    Fp1_eog_flag=1
    
    non_eeg_chs = ['X', 'Y', 'Z'] if 'X' in raw.ch_names else []  
    non_eeg_chs += ['Oth4'] if 'Oth4' in raw.ch_names else []  

    raw.drop_channels(non_eeg_chs)
    montage_fname = '../montages/Hydro_Neo_Net_32_xyz_cms_No_Fp1.sfp'
    set_montage(raw, montage_fname)

# 64 channel case
else:
    wrong_64_mtg_flag=0
    if {'FT7', 'P05'}.issubset(set(raw.ch_names)):  
        wrong_64_mtg_flag=1
        eog_adj = 4
    elif 'VEO' in raw.ch_names or 'VEOG' in raw.ch_names:  
        eog_adj = 5
        raw = load_raw_data(eeg_data_raw_file, 'VEO' if 'VEO' in raw.ch_names else 'VEOG')  
        non_eeg_chs = ['HEOG', 'EKG', 'EMG', 'Trigger'] if 'HEOG' in raw.ch_names else ['HEO', 'EKG', 'EMG', 'Trigger']  
        raw.drop_channels(non_eeg_chs)
        montage_fname = '../montages/Hydro_Neo_Net_64_xyz_cms_No_FID.sfp'
        set_montage(raw, montage_fname)

    if "EEG66" in raw.ch_names:
        non_eeg_chs = ['EEG66','EEG67','EEG68','EEG69']
        raw.drop_channels(non_eeg_chs)

    # For 64 channel gTec cap
    if 'AF8' in raw.ch_names:
        # Form the 10-20 montage
        mont1020 = mne.channels.make_standard_montage('standard_1020')
        # Choose what channels you want to keep 
        # Make sure that these channels exist e.g. T1 does not exist in the standard 10-20 EEG system!
        kept_channels = raw.info['ch_names'][:64]
        ind = [i for (i, channel) in enumerate(mont1020.ch_names) if channel.lower() in map(str.lower, kept_channels)]
        mont1020_new = mont1020.copy()
        # Keep only the desired channels
        mont1020_new.ch_names = [mont1020.ch_names[x] for x in ind]
        kept_channel_info = [mont1020.dig[x+3] for x in ind]
        # Keep the first three rows as they are the fiducial points information
        mont1020_new.dig = mont1020.dig[0:3]+kept_channel_info
        set_montage(raw, mont1020_new)
        raw.pick([*range(62)]) # drop reference channels

    # make adjustment for wrong montage subjects
    if wrong_64_mtg_flag:
        raw.drop_channels(['FT7','FT8','PO5','PO6']) # for subjects C24, 055, 056, 047
        montage_fname = '../montages/Hydro_Neo_Net_64_xyz_cms_No_FID_Caps.sfp' 
        set_montage(raw, montage_fname)

# CLBP 007 and 010 had extremely noisy data near the ends of their recordings.
# Crop it out. 
parent_dir = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
if ('Chronic Low Back Pain' in parent_dir and sub_id=='007'):
    raw.crop(tmax=1483) 
if ('Chronic Low Back Pain' in parent_dir and sub_id=='010'):
    raw.crop(tmax=1997.8)
  
# high level inspection
print(raw.ch_names)
print(len(raw.ch_names))
                      
# apply notch filter
print(f"{sub_id}\napplying notch filter...")
raw = raw.notch_filter(60., notch_widths=3)
clear_display()

# apply bandpass filter
print(f"{sub_id}\napplying bandpass filter...")
raw = raw.filter(l_freq=1., h_freq=100.)
clear_display()

# resample data to decrease file size
print(f"{sub_id}\nresampling data from {raw.info['sfreq']} Hz to {RESAMPLE_FREQ} Hz...")
raw.resample(RESAMPLE_FREQ, npad='auto')
clear_display()

# find bad channels automatically
print(f"{sub_id}\nremoving bad channels...")
raw_pyprep = NoisyChannels(raw, random_state=RANDOM_STATE)
raw_pyprep.find_all_bads(ransac=False, channel_wise=False, max_chunk_size=None)
raw.info['bads'] = raw_pyprep.get_bads()
raw.interpolate_bads(reset_bads=True)
clear_display()

# re-reference channels
print(f"{sub_id}\nre-referencing channels to average...")
raw, _ = mne.set_eeg_reference(raw, ref_channels='average', copy=True)
clear_display()

# fit ICA 
print(f"{sub_id}\nfitting ICA...")
num_goods = len(raw.ch_names) - len(raw.info['bads']) - 1 # adjust for EOG
ica = ICA(n_components=int(np.floor(num_goods/2)), random_state=RANDOM_STATE, max_iter='auto')
ica.fit(raw)
clear_display()

# find EOG artifacts
print(f"{sub_id}\nfinding EOG artifacts...")

try: 
    eog_indices, eog_scores = ica.find_bads_eog(raw,
                                                threshold='auto')
    ica.exclude = eog_indices

except:
    ica.exclude = [0,1]
clear_display()

# apply ICA
print(f"{sub_id}\napplying ICA...")
ica.apply(raw)
clear_display()

# save copy of data
print(f"Saving processed data as '{save_fname_fif}'...")

if 'VEO' in raw.ch_names:
    raw.drop_channels('VEO')
elif 'VEOG' in raw.ch_names:
    raw.drop_channels('VEOG')
elif Fp1_eog_flag:
    montage_fname = '../montages/Hydro_Neo_Net_32_xyz_cms_No_Fp1.sfp'
    set_montage(raw,montage_fname)

raw.save(save_path+save_fname_fif, 
         verbose=True, overwrite=True)
clear_display()

# high level inspection
print(raw.ch_names)
print('\nNumber of remaining channels: ',len(raw.ch_names) - len(raw.info['bads']))
print('\nDropped channels: ',raw.info['bads'])

print("Raw data preprocessing complete.")

clear_display()

001
removing bad channels...


ValueError: array must not contain infs or NaNs

In [26]:
raw

0,1
Measurement date,"January 01, 2000 00:00:00 GMT"
Experimenter,Unknown
Participant,XX

0,1
Digitized points,64 points
Good channels,50 EEG
Bad channels,"F1, P4, P7, Oz, P8, FC2, PO3, P3, P6, O2, F9, F2"
EOG channels,Not available
ECG channels,Not available

0,1
Sampling frequency,400.00 Hz
Highpass,1.00 Hz
Lowpass,100.00 Hz
Filenames,001 test pinprick2023.12.04_09.58.12.EDF
Duration,00:25:26 (HH:MM:SS)


In [27]:
data = raw.get_data()

In [28]:
import numpy as np 
inds = np.where(np.isnan(data) | np.isinf(data))

In [29]:
inds

(array([], dtype=int64), array([], dtype=int64))

In [30]:
import pandas as pd
df = pd.DataFrame(data)