In [1]:
import mne
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import json
from glob import glob
import os

# Data Reading

In [2]:
# Define the relative path pattern for .edf files in all subdirectories of the Data directory
path_pattern = os.path.join('..', '..', 'Data', 'Subj*', '*.edf')

# Use glob to get all matching files
edf_files = glob(path_pattern, recursive=True)

# Remove all files that contain .md in it
edf_files = [f for f in edf_files if '.md' not in f]

In [3]:
edf_files

['..\\..\\Data\\Subj1\\Alexis_Fast_EPOCX_218750_2024.05.20T15.14.17+07.00.edf',
 '..\\..\\Data\\Subj1\\Alexis_Slow_EPOCX_218750_2024.05.13T13.51.48+07.00.edf',
 '..\\..\\Data\\Subj10\\Livy_Fast_EPOCX_218750_2024.05.22T15.14.00+07.00.edf',
 '..\\..\\Data\\Subj10\\Livy_Slow_EPOCX_218750_2024.05.16T12.01.27+07.00.edf',
 '..\\..\\Data\\Subj11\\Nicole_Fast_EPOCX_218750_2024.05.15T12.15.16+07.00.edf',
 '..\\..\\Data\\Subj11\\Nicole_Slow_EPOCX_218750_2024.05.20T11.20.47+07.00.edf',
 '..\\..\\Data\\Subj12\\Ryan_Fast_EPOCX_218750_2024.05.20T16.03.17+07.00.edf',
 '..\\..\\Data\\Subj12\\Ryan_Slow_EPOCX_218750_2024.05.13T17.27.48+07.00.edf',
 '..\\..\\Data\\Subj13\\Sanni_Fast_EPOCX_218750_2024.05.22T13.12.31+07.00.edf',
 '..\\..\\Data\\Subj13\\Sanni_Slow_EPOCX_218750_2024.05.15T14.29.45+07.00.edf',
 '..\\..\\Data\\Subj14\\Sherryn_Fast_EPOCX_218750_2024.05.20T12.36.50+07.00.edf',
 '..\\..\\Data\\Subj14\\Sherryn_Slow_EPOCX_218750_2024.05.13T15.14.14+07.00.edf',
 '..\\..\\Data\\Subj15\\Vincent_Fast_E

In [4]:
# Creating recording metadata dataframe: subject_id, pace, file_path
baseline_metadata = []
for file in edf_files:
    # Extract the subject_id and pace from the file path
    subject_id = int(file.split(os.sep)[-2].split('Subj')[-1])
    pace = file.split(os.sep)[-1].split('_')[1]
    baseline_metadata.append([subject_id, pace, file])

In [55]:
md = pd.DataFrame(baseline_metadata, columns=['subject_id', 'pace', 'file_path'])
md.to_csv('baseline_metadata.csv', index=False)

In [6]:
def get_stimulus_recording(path):
    raw_edf = mne.io.read_raw_edf(path, preload=True, verbose=False)
    with(open(path.replace('edf', 'json'))) as f:
        data_json = json.load(f)
    # Get recording start time
    recording_start_time = pd.to_datetime(data_json['Markers'][0]['startDatetime'])

    # Get start time from data_json with phase_name = 'instructions_task'
    starttime_data = pd.to_datetime(data_json['Markers'][1]['endDatetime'])
    endtime_data = pd.to_datetime(data_json['Markers'][2]['startDatetime'])

    # Relative start time
    relative_start_time = (starttime_data - recording_start_time).total_seconds()
    relative_end_time = (endtime_data - recording_start_time).total_seconds()
    
    r = raw_edf.copy().crop(tmin=relative_start_time, tmax=relative_end_time)
    
    # Floor r duration to 60 seconds
    r_duration = r.times[-1]
    if r_duration > 60:
        r.crop(tmin=0, tmax=60)
    
    return r

In [7]:
def read_and_get_data(path):
    recording = get_stimulus_recording(path)
    
    # Select Channels in standard 10-20 format that EPOC X used
    # AF3, AF4, F3, F4, FC5, FC6, T7, T8, P7, P8, O1, O2
    montage = mne.channels.make_standard_montage('standard_1020')
    recording.drop_channels([ch for ch in recording.ch_names if ch not in montage.ch_names])    
    
    # Setting montage (position of electrodes)
    recording.set_montage('standard_1020')
    
    # Setting EEG reference
    recording.set_eeg_reference('average', verbose = False)
    
    # Bandpass Filter at 1 - 50 Hz
    recording.filter(l_freq = 1, h_freq = 50, verbose=False)
    
    return recording

In [8]:
# There are 2 weird defect datas
defect_datas = ['..\\..\\Data\\Subj1\\Alexis_Slow_EPOCX_218750_2024.05.13T13.51.48+07.00.edf', '..\\..\\Data\\Subj3\\Bennett_Fast_EPOCX_218750_2024.05.13T14.24.51+07.00.edf']
defect_record_fixed = []
for path in defect_datas:
    raw_edf = mne.io.read_raw_edf(path, preload=True, verbose=False)
    with(open(path.replace('edf', 'json'))) as f:
        data_json = json.load(f)
    # Get recording start time
    recording_start_time = pd.to_datetime(data_json['Markers'][0]['startDatetime'])

    # Get start time from data_json with phase_name = 'instructions_task'
    starttime_data = pd.to_datetime(data_json['Markers'][1]['endDatetime'])
    endtime_data = pd.to_datetime(data_json['Markers'][5]['startDatetime'])

    # Relative start time
    relative_start_time = (starttime_data - recording_start_time).total_seconds()
    relative_end_time = (endtime_data - recording_start_time).total_seconds()

    recording = raw_edf.copy().crop(tmin=relative_start_time, tmax=relative_end_time)

    # Floor r duration to 60 seconds
    r_duration = recording.times[-1]
    if r_duration > 60:
        recording.crop(tmin=0, tmax=60)
        
    montage = mne.channels.make_standard_montage('standard_1020')
    recording.drop_channels([ch for ch in recording.ch_names if ch not in montage.ch_names])    
    
    # Setting montage (position of electrodes)
    recording.set_montage('standard_1020')
    
    # Setting EEG reference
    recording.set_eeg_reference('average', verbose = False)
    
    # Bandpass Filter at 1 - 50 Hz
    recording.filter(l_freq = 1, h_freq = 50, verbose=False)
    
    defect_record_fixed.append(recording)

In [9]:
raws = [read_and_get_data(edf) for edf in edf_files]
raws[1] = defect_record_fixed[0]
raws[18] = defect_record_fixed[1]

# Creating Epochs

In [11]:
# Creating epochs of 2 seconds with 50% overlap
duration = 2
overlap = duration / 2
epochs = [mne.make_fixed_length_epochs(r, duration=duration, overlap=overlap, preload=True, verbose=False) for r in raws]

# Data Cleaning

In [12]:
# Autoreject
from autoreject import AutoReject

n_interpolate = [2, 3, 4]
ar = AutoReject(n_interpolate=n_interpolate, verbose=False, random_state=42)

epochs_ar = []
reject_log = []
for epoch in epochs:
    ar.fit(epoch)
    e, r = ar.transform(epoch, return_log = True)
    epochs_ar.append(e)
    reject_log.append(r)

Dropped 8 epochs: 2, 3, 10, 18, 19, 53, 54, 55
Dropped 2 epochs: 54, 55
Dropped 15 epochs: 5, 6, 7, 8, 16, 17, 18, 34, 35, 44, 45, 51, 52, 53, 57
Dropped 34 epochs: 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 18, 19, 20, 21, 22, 23, 25, 26, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52
Dropped 19 epochs: 0, 1, 2, 9, 10, 11, 12, 13, 14, 15, 24, 32, 33, 34, 35, 38, 39, 51, 52
Dropped 13 epochs: 1, 7, 8, 9, 12, 20, 21, 25, 31, 32, 38, 54, 55
Dropped 15 epochs: 9, 10, 11, 16, 17, 21, 26, 27, 28, 30, 31, 32, 47, 48, 49
Dropped 1 epoch: 7
Dropped 2 epochs: 26, 27
Dropped 8 epochs: 6, 20, 21, 45, 46, 47, 53, 54
Dropped 30 epochs: 1, 2, 3, 5, 6, 7, 12, 17, 18, 19, 20, 22, 23, 24, 25, 34, 35, 36, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58
Dropped 11 epochs: 15, 16, 17, 18, 20, 34, 35, 36, 37, 38, 58
Dropped 5 epochs: 13, 14, 15, 16, 17
Dropped 13 epochs: 5, 6, 7, 9, 10, 11, 12, 16, 17, 30, 31, 32, 33
Dropped 4 epochs: 19, 30, 31, 54
Dropped 3 epochs: 43, 44, 45
Dropped 3 epochs: 5, 13

In [56]:
import os

# Export epochs_ar
for i, e in enumerate(epochs_ar):
    directory = f"../../CleanedEpochs/Baseline/Subj{md['subject_id'][i]}/"
    filename = f"{directory}{md['pace'][i]}-epo.fif"
    
    # Create the directory if it doesn't exist
    os.makedirs(directory, exist_ok=True)
    
    # Save the epoch
    e.save(filename)