In [None]:
# Preprocessing

# Import necessary libraries
import mne
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import zscore
from sklearn.decomposition import PCA
from mne_icalabel import label_components
from mne.preprocessing import ICA
import copy

# Set up matplotlib and MNE configurations
matplotlib.use('Agg')
mne.viz.set_browser_backend('matplotlib', verbose=None)
mne.set_config('MNE_BROWSER_BACKEND', 'matplotlib')
mne.cuda.init_cuda(verbose=True)

# Define input and output directories
files_in = '../data/in/subjects/'
files_out = '../data/out/subjects/'

# Load subject names
with open("./names.txt", "r") as names:
    subject_list = names.read().split('\n')

modes = ['EC', 'EO']

# Load custom montage
montage_path = r"../data/in/MFPRL_UPDATED_V2.sfp"
montage = mne.channels.read_custom_montage(montage_path)

# Define bad channels list 
bad_list = [
    [[],  # 101 EC
     []],  # 101 EO
    [[],  # 102
     []],  # 102
    [[],  # 103
     []],  # 103
    [[],  # 104
     []],  # 104
    [[],  # 105
     []],  # 105
    [[],  # 111
     []],  # 111
    [[],  # 112
     []],  # 112
    [[],  # 113
     []],  # 113
    [[],  # 114
     []],  # 114
    [[],  # 115
     []],  # 115
    [[],  # 115
     []],  # 115
    [[],  # 117
     []],  # 117
    [[],  # 118
     []],  # 118
    [[],  # 119
     []],  # 119
    [[],  # 120
     []],  # 120
    [[],  # 401
     []],  # 401
    [[],  # 402
     []],  # 402
    [[],  # 403
     []],  # 403
    [[],  # 404
     []],  # 404
    [[],  # 406
     []],  # 406
    [[],  # 407
     []],  # 407
    [[],  # 408
     []],  # 408
    [[],  # 409
     []],  # 409
    [[],  # 410
     []],  # 410
    [[],  # 411
     []],  # 411
    [[],  # 412
     []],  # 412
    [[],  # 413
     []],  # 413
    [[],  # 414
     []],  # 414
    [[],  # 415
     []],  # 415
    [[],  # 416
     []]  # 416
]

# Define the map of channel names using the provided keys
ch_map = {'Ch1': 'Fp1', 'Ch2': 'Fz', 'Ch3': 'F3', 'Ch4': 'F7', 'Ch5': 'LHEye', 'Ch6': 'FC5',
          # Setting FPz as GND so it matches montage
          'Ch7': 'FC1', 'Ch8': 'C3', 'Ch9': 'T7', 'Ch10': 'GND', 'Ch11': 'CP5', 'Ch12': 'CP1',
          'Ch13': 'Pz', 'Ch14': 'P3', 'Ch15': 'P7', 'Ch16': 'O1', 'Ch17': 'Oz', 'Ch18': 'O2',
          'Ch19': 'P4', 'Ch20': 'P8', 'Ch21': 'Rmastoid', 'Ch22': 'CP6', 'Ch23': 'CP2', 'Ch24': 'Cz',
          'Ch25': 'C4', 'Ch26': 'T8', 'Ch27': 'RHEye', 'Ch28': 'FC6', 'Ch29': 'FC2', 'Ch30': 'F4',
          'Ch31': 'F8', 'Ch32': 'Fp2', 'Ch33': 'AF7', 'Ch34': 'AF3', 'Ch35': 'AFz', 'Ch36': 'F1',
          'Ch37': 'F5', 'Ch38': 'FT7', 'Ch39': 'FC3', 'Ch40': 'FCz', 'Ch41': 'C1', 'Ch42': 'C5',
          'Ch43': 'TP7', 'Ch44': 'CP3', 'Ch45': 'P1', 'Ch46': 'P5', 'Ch47': 'Lneck', 'Ch48': 'PO3',
          'Ch49': 'POz', 'Ch50': 'PO4', 'Ch51': 'Rneck', 'Ch52': 'P6', 'Ch53': 'P2', 'Ch54': 'CPz',
          'Ch55': 'CP4', 'Ch56': 'TP8', 'Ch57': 'C6', 'Ch58': 'C2', 'Ch59': 'FC4', 'Ch60': 'FT8',
          'Ch61': 'F6', 'Ch62': 'F2', 'Ch63': 'AF4', 'Ch64': 'RVEye'}

def process_subject(subject, mode, i_s, j_m):
    print(f"Processing {subject} - {mode}")
    
    input_path = f"{files_in}{subject}/{mode}/"
    output_path = f"{files_out}{subject}/{mode}/"
    
    log_file = f"{output_path}log.txt"
    with open(log_file, "w") as log:
        # Load data
        EEG = mne.io.read_raw_fif(f"{output_path}{subject}_maprenamed&nfiltered.fif", preload=True)
        
        # Mark bad channels
        bad_txt = bad_list[i_s][j_m]
        if bad_txt:
            EEG.info['bads'].extend(bad_txt)
            log.write(f"Bad channels: {EEG.info['bads']}\n")
        
        # Save data with bad channels marked
        EEG.save(f"{output_path}{subject}_badchannels.fif", overwrite=True)
        
        # Perform ICA
        original_EEG = EEG.copy()
        original_EEG.set_eeg_reference('average')
        original_EEG.drop_channels(['Rmastoid'])
        
        # PCA for determining number of ICA components
        data = original_EEG.get_data().T
        pca = PCA()
        pca.fit(data)
        
        # Plot PCA explained variance
        plt.figure(figsize=(10, 5))
        plt.plot(np.cumsum(pca.explained_variance_ratio_))
        plt.xlabel('Number of Components')
        plt.ylabel('Cumulative Explained Variance')
        plt.title('Explained Variance by PCA Components')
        plt.grid(True)
        plt.savefig(f"{output_path}{subject}_PCA_variance.png")
        plt.close()
        
        # Perform ICA
        n_components = 0.99
        ica = ICA(n_components=n_components, max_iter="auto", method="infomax",
                  random_state=97, fit_params=dict(extended=True))
        picks_eeg = mne.pick_types(original_EEG.info, meg=False, eeg=True, eog=False,
                                   stim=False, emg=False, exclude='bads')
        ica.fit(original_EEG, picks=picks_eeg, decim=3)
        
        # Plot ICA components
        ica.plot_sources(original_EEG, show_scrollbars=False, show=False)
        plt.savefig(f"{output_path}{subject}_ica_timeseries.png")
        plt.close()
        
        # Label ICA components
        ic_labels = label_components(original_EEG, ica, method='iclabel')
        component_labels = ic_labels["labels"]
        component_probabilities = ic_labels["y_pred_proba"]
        
        # Save labels and probabilities
        with open(f"{output_path}labels.txt", "w") as labelf, open(f"{output_path}probs.txt", "w") as probf:
            for label, prob in zip(component_labels, component_probabilities):
                labelf.write(f"{label}\n")
                probf.write(f"{prob}\n")
        
        # Exclude components based on label and probability
        exclude_idx = [idx for idx, (label, prob) in enumerate(zip(component_labels, component_probabilities))
                       if label not in ["brain", "other"] or prob < 0.70]
        
        # Apply ICA
        reconst_EEG = original_EEG.copy()
        ica.apply(reconst_EEG, exclude=exclude_idx)
        
        # Save ICA-processed data
        reconst_EEG.save(f"{output_path}{subject}_ICA.fif", overwrite=True)
        
        # Interpolate bad channels
        original_EEG_data = original_EEG.copy().pick_types(meg=False, eeg=True, exclude=[])
        original_EEG_data_interp = original_EEG_data.copy().interpolate_bads(reset_bads=False)
        original_EEG_data_interp.save(f"{output_path}{subject}_interpolated.fif", overwrite=True)
        
        # Epoching
        epoch_no = np.floor(reconst_EEG.get_data().shape[1] / reconst_EEG.info['sfreq'])
        onsets = np.arange(0, reconst_EEG.get_data().shape[1] / reconst_EEG.info['sfreq'], 1)
        durations = np.zeros_like(onsets)
        descriptions = ['Event'] * len(onsets)
        annotations = mne.Annotations(onsets, durations, descriptions)
        reconst_EEG.set_annotations(annotations)
        events, event_id = mne.events_from_annotations(reconst_EEG)
        
        tmin, tmax = -0.5, 0.5
        epochs_all = mne.Epochs(reconst_EEG, events, event_id=event_id,
                                tmin=tmin, tmax=tmax, proj=True, baseline=None, preload=True)
        
        # Z-score normalization and epoch rejection
        zscore_threshold = 6
        to_drop = []
        temp_data = np.zeros_like(epochs_all._data)
        for i in range(len(epochs_all)):
            temp_data[i] = zscore(epochs_all._data[i], axis=1)
            if np.any(np.abs(temp_data[i]) > zscore_threshold):
                to_drop.append(i)
        epochs_all.drop(to_drop)
        
        # Resample epochs
        desired_sfreq = 512
        epochs_all.resample(desired_sfreq, npad='auto')
        
        # Save epoched data
        epochs_all.save(f"{output_path}{subject}_epoched.fif", overwrite=True)

# Main processing loop
for i_s, subject in enumerate(subject_list):
    for j_m, mode in enumerate(modes):
        if '116' in subject:  # Process only subject 116 as per your requirements
            process_subject(subject, mode, i_s, j_m)
        else:
            print(f"Skipping {subject}")

print("Processing complete.")