In [1]:
import sys
import mne
import pandas as pd
import numpy as np
import os.path
from os import path
import pyprep
from mne import Epochs, pick_types, find_events, pick_types, set_eeg_reference, channels, preprocessing
from mne.io import concatenate_raws, read_raw_bdf, read_raw_eeglab, eeglab
from mne.preprocessing import ICA, create_eog_epochs, create_ecg_epochs, corrmap,  annotate_muscle_zscore
import matplotlib.pyplot as plt
from scipy.integrate import simps
from mne.viz import plot_topomap
from matplotlib import cm, colors, colorbar
from fooof.bands import Bands
import math
    
%matplotlib qt 


**Input**: The input to this script will be the matlab version of the raw data (.set files).

**Output**: This will result in epoched preprocessed eeg data, saved as an .fif file.

**Observations**: Keep a record of the quality of the data, such as bridged electrodes, flat lined electrodes, number of epochs dropped, types of artifacts in the data, which ICA components removed - these should be inputted to https://docs.google.com/spreadsheets/d/1lvMoz_4yqfM0uZXONilXo1MfdgEKzUtDy3Ju8tA1PHA/edit?usp=sharing

### Order of pre-processing

1. Load in data, improve the electrode names, drop RESP and ECG as well as M1, M2, set the montage
2. Visualise the data as a psd to get names of any flatlined channels. (record them in the google spreadsheet)
3. Store the flat lined channels as 'bads'
4. Inspect raw data - look at levels of environmental noise to get initial idea of quality
5. Filter: apply high pass, decide whether to apply low pass at this point, plus notch filtering for line noise
6. Check for bridged electrodes, save a list of them to be later intepolated (also record them in the google spreadsheet)
7. Annotage bad electrodes on the interactive plot, by clicking on the labels at the side
8. Annotate bad segments of data - open up interactive plot, drag across bad segments
9. Make a copy of the raw data, call it 'data'
10. Run ICA on 'data' and plot components -  Reject eye blink, heart rate and broadband noise related components - check with Holly if any uncertainty!
11. Run the automatic component labelling
12. Apply ICA to the data and do some more plots to check whether eye movement etc has successfully been removed.
13. Check how many epochs have been dropped
14. Segment data into 3 second epochs
15. Interpolate bridged and bad electrodes
16. Add 'CPz' back in and Rereference to common 
17. Final ICA check - top 5 components, should all be brain! Take a picture of these to save as a record.If not all brain, the data needs further cleaning.
18. Save data


Done!



### (1) Load up the data
Load up one subject, from one condition at a time to process.


Set the parameters at the beginning - **month, pre or post, closed or open, ppt num**

In [None]:
subjects = ['02']
conditions = ['pre'] # select either 'pre' or 'post'
tasks = ['closed'] # select either 'open' or 'closed'
month = 'september' #'august' #'march'

In [None]:
segmentlength = 3 # means our epochs will be three seconds long
E_error = True
# this is a for loop, but actually we can ignore than as we are not doing batch processing
for sub in subjects:
    for condition in conditions:
        for task in tasks:
            if task == 'open':
                tasklabel = 'eo'
            else:
                tasklabel = 'ec'
          #  root = 'C:\\Users\\dedbl\\Documents\\onaya-eeg\\setfiles\\sub-01\\pre-retreat-rsEEG\\eyes_closed\\'
            root = 'D:\\onaya-eeg\\data-'+month+'\\sub-'+sub+'\\'+condition+'-retreat-rsEEG\\eyes_'+task+'\\'
          ##  root = 'X:\\CompSci\\ResearchProjects\\EJONeill\\Neuroimaging\\onaya\\sub-'+sub+'\\'+condition+'-retreat-rsEEG\\eyes_'+task+'\\'
            fname = root + 'sub-'+sub+'-'+tasklabel+'-'+condition+'.set'
            
            # make folder to save preprocessing plots in
            plotpath = root+'plots_preprocessing' 
            if not os.path.exists(plotpath):
                os.makedirs(plotpath)   
            raw = mne.io.read_raw_eeglab(input_fname=fname, preload=True)
            endPoint = (raw.n_times - 1) / raw.info['sfreq']
            print(raw.info['ch_names'])
            
            if E_error:
                # Get the current channel names
                channel_names = raw.ch_names
                # Rename the channels by removing the first 'E' character
                new_channel_names = [name.replace('E', '', 1) for name in channel_names]
                # Update the channel names in the raw data
                raw.rename_channels({old: new for old, new in zip(channel_names, new_channel_names)})
            
            raw = raw.crop(tmin=5,tmax=endPoint-5) # do not crop, as we want to retain as much of the data as possible.
            raw.set_channel_types({'EOG':'eog'})
         #   raw = raw.drop_channels(ch_names = ['RESP3','BIP1', 'M1', 'M2'])
            raw = raw.drop_channels(ch_names = ['RESP','ECG', 'M1', 'M2'])
            
            # set the montage
            easycap_montage = mne.channels.make_standard_montage('standard_1005')
            raw.set_montage(easycap_montage)

### (2) Visualise the montage and sanity check PSD
Check for power line noise. If these plots fail to run it likely means some electrodes have inf values. Check the description thats output, get the names of flat-line electrodes to input to the next step. Check for whether there is an alpha peak 

In [None]:
#easycap_montage.plot()
raw.plot_psd_topomap(ch_type='eeg', normalize=True)
raw.plot_sensors(show_names=True,kind='topomap')
raw.plot_psd(fmax=100)
raw.plot()

### (3) Label any flatlined channels as 'bad' here
otherwise psd plots tricky to read. These can be identified in the above, as a warning message will say Zero value in spectrum for a list of channels, or that these channels might be dead. Also in the psd this produces, note any channels that have an specially low or high value, these should be noted as bad in the below code too

In [None]:
def flat_lined_chans(data, chans):
    if chans == []:
        print("Either there are no flatlined channels, or you have forgotten to enter them")
    else:
        print('Flatlined channels to be dropped are ', chans)
        # Create a figure with two subplots
        data.plot_psd(fmax=100)
        raw.info["bads"].extend(chans)
       # data = data.drop_channels(ch_names = chans)
        data.plot_psd(fmax=100)
        plt.show()
    return data
    

In [None]:
raw = flat_lined_chans(raw, chans = [''])  #names of flat lined channels go here
raw.plot_psd(fmax=100)
raw.plot()

### (4) Inspect raw data quality - Plot the data
Likely this will be highly noisy at this point due to broadband noise

In [None]:
raw.plot() 

### (5) 'Filter'
High pass filter the data at 0.53 hz and low-pass 45 hz, apply notch filter too for 60hz and its harmonics

In [None]:
raw = raw.filter(l_freq = 0.53, h_freq = 45)
raw = raw.notch_filter([60, 120])
raw.plot_psd(fmax=100)

raw.plot()
        

temp_version.annotations.save('annotations.txt', overwrite=True)
annot = mne.read_annotations('annotations.txt')
raw.set_annotations(annot) 

### (6) Identify bridged electrodes
Save the indexes, so they can be interpolated after artifact removal

In [None]:


ed_data = mne.preprocessing.compute_bridged_electrodes(raw)
bridged_idx = ed_data[0]

if len(bridged_idx) > 3:
    print("Warning: Over two pairs electrodes show bridging: ", len(bridged_idx))
elif len(bridged_idx) == 0:
    print("No bridged electrodes, congratulations!")
else:
    print("Number of bridged pairs are: ", len(bridged_idx))



### (7) Annotate obviously noisy electrodes - though unlikely to need this step

In [None]:
raw.plot()

### (8) Start cutting out noisy segments i.e gross artifacts
Highlight these segments in the interactive plot


Look out for:
1. Squid jumps (electrical)
2. Gross muscle artefact
3. sweat

raw.plot()
raw.plot_psd(fmax=45)
        

### (9) make copy of the data

In [None]:
data = raw.copy() # reconst_raw

### (10)  Compute ICA and plot components
Look out for heart rate, eye blinks and eye movements  - also for electrodes with high impedance, and for broadband noise, 'white noise'. Mark these on the plots.

In [None]:
#from autoreject import get_rejection_threshold 
print("Computing ICA, identifying eog and muscle activity, excluding components that capture these")
#ica_components = 63 # there are 64 channels - should be lower dimensionality than num of channels
ica = mne.preprocessing.ICA(n_components=50, random_state=97, max_iter='auto',method='fastica')
ica.fit(data, reject_by_annotation=True, picks = 'eeg')
ica

In [None]:
ica.plot_components(inst=data)
ica.plot_sources(data) # note applied to original raw

### (11) As a support tool use the ICA automatic identifier to look for eye and heart artifacts

In [None]:
from mne_icalabel import label_components
label_components(data, ica, method='iclabel')

Check that you have correctly selected the ICA components to be removed

In [None]:
print(ica.exclude)

### (12) Apply the ICA to the raw data, now saved as reconst_raw
After this plot the new output to check it has successfully removed eye movements etc

In [None]:
print(ica.exclude)
reconst_raw = raw.copy()
#spare_unfiltered=unfiltered.copy()
ica.apply(reconst_raw)

In [None]:

reconst_raw = reconst_raw.filter(l_freq = 0.1, h_freq = 45)
reconst_raw.plot()
reconst_raw.plot_psd(fmax=100)

### (13) Visualise how much data has been dropped

In [None]:
epochs = mne.make_fixed_length_epochs(raw, duration=1, preload=True, reject_by_annotation=True)
epochs.plot_drop_log()
plt.savefig(plotpath+'\\epoch_droplog', dpi=100, format='pdf')

### (14) Segment the data into 3 second epochs

In [None]:
epochs = mne.make_fixed_length_epochs(reconst_raw, duration=segmentlength, preload=True, reject_by_annotation = True)


### Save a .txt file with annotation information

In [None]:
reconst_raw.annotations.save('august_sub-'+sub+'-'+condition+'-'+tasklabel+'-saved-annotations.txt', overwrite=True)

In [None]:
epochs.plot()


### (15)  Interpolate 'bads' and bridged electrodes

In [None]:
epochs = mne.preprocessing.interpolate_bridged_electrodes(epochs, bridged_idx, bad_limit=4)
epochs.interpolate_bads(reset_bads=False, verbose = True)

In [None]:
epochs.plot_psd(fmax=45)
#epochs.info['ch_names']

### (16)  Add in CPz and re-reference
Blog on why original reference needs to be addd in to avoid rank deficient data:
https://sccn.ucsd.edu/wiki/Makoto%27s_preprocessing_pipeline

In [None]:

epochs = mne.add_reference_channels(epochs, ref_channels='CPz',copy=True) # add reference channel back in
epochs.plot_psd(fmax = 45)
# set the references to average
epochs = epochs.set_eeg_reference(ref_channels='average', ch_type = 'eeg')
epochs.plot_psd(fmax = 45)

### (17) Final ICA check

In [None]:
#ica_components = 63 # there are 64 channels - should be lower dimensionality than num of channels
ica = mne.preprocessing.ICA(n_components=5, random_state=97, max_iter='auto',method='fastica')#, fit_params=dict(extended=True)) # method='fastica',)#, method='picard')
ica.fit(epochs, reject_by_annotation=True)#, picks = 'eeg')
ica
ica.plot_components(inst=epochs)
ica.plot_sources(epochs) # note applied to original raw

In [None]:
epochs.plot()

In [None]:
epochs.plot_psd(fmax = 45)
raw.plot(start=5, duration=20, title= 'Original data')
epochs.plot(title = 'ICA applied to reconstruction')

### (18) Saving the data

In [None]:
root = 'X:\\CompSci\\ResearchProjects\\EJONeill\\Neuroimaging\\onaya\\'
output_folder_root= root +'preprocessed\\'
filename = 'sub-'+sub+'-'+tasklabel+'-'+condition

epochs.save(fname = output_folder_root+'epochs\\'+month+'_2023\\'+filename+'-epo.fif', overwrite = True)