In [None]:
from google.colab import drive
drive.mount('/content/drive')

!pip install mne pyEDFlib pandas numpy scipy


Mounted at /content/drive
Collecting mne
  Downloading mne-1.11.0-py3-none-any.whl.metadata (15 kB)
Collecting pyEDFlib
  Downloading pyedflib-0.1.42-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.2 kB)
Downloading mne-1.11.0-py3-none-any.whl (7.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.5/7.5 MB[0m [31m40.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyedflib-0.1.42-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.8/2.8 MB[0m [31m30.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pyEDFlib, mne
Successfully installed mne-1.11.0 pyEDFlib-0.1.42


In [None]:
import os
import pandas as pd
import numpy as np
import mne
from scipy.signal import find_peaks


In [None]:
events_root = "/content/drive/MyDrive/SeizeIT2/seizure_events"
ecg_root    = "/content/drive/MyDrive/SeizeIT2/ECG"
output_csv  = "/content/drive/MyDrive/SeizeIT2/preictal_rmssd_results.csv"


In [None]:
#square root of the mean of the squared differences between successive RR intervals
def compute_rmssd(rr_intervals_ms):
    if len(rr_intervals_ms) < 2: #need at least 2 RR intervals for RMSSD
        return np.nan
    diff = np.diff(rr_intervals_ms)
    return np.sqrt(np.mean(diff**2))


In [None]:
def extract_rr_intervals(ecg_data, sfreq):
    peaks, _ = find_peaks(ecg_data, distance=0.3 * sfreq)  #require 300ms between peaks for max 200bpm
    print(f"Peaks found in segment: {len(peaks)}")
    if len(peaks) < 2:
        return None
    peak_times = peaks / sfreq
    rr_intervals = np.diff(peak_times) * 1000  #convert to ms
    return rr_intervals


In [None]:
#main function
def get_preictal_rmssd(edf_path, seizure_onset_sec):
    raw = mne.io.read_raw_edf(edf_path, preload=True, verbose=False)
    raw.set_channel_types({'ECG SD': 'ecg'})
    #identify ECG channel
    ecg_chs = mne.pick_types(raw.info, ecg=True)
    if len(ecg_chs) == 0:
        return np.nan, np.nan, np.nan  #return 3 values

    ecg = raw.get_data()[ecg_chs[0]]
    sfreq = raw.info['sfreq']

    #60-second preictal window
    start_time = max(seizure_onset_sec - 60, 0)
    end_time = seizure_onset_sec

    #convert time to samples
    start_sample = int(start_time * sfreq)
    end_sample = int(end_time * sfreq)

    segment = ecg[start_sample:end_sample]
    if len(segment) < 1:
        return np.nan, start_time, end_time  #return 3 values

    rr = extract_rr_intervals(segment, sfreq)
    if rr is None:
        return np.nan, start_time, end_time  #return 3 values

    #compute RMSSD on the entire 60-second segment and return
    return compute_rmssd(rr), start_time, end_time


In [None]:
#match tsv to edf file
def match_edf_from_tsv_path(tsv_path):
    filename = os.path.basename(tsv_path)
    base = filename.replace("_events.tsv", "")
    expected_edf = base + "_ecg.edf"

    subject = filename.split("_")[0]  # sub-001
    edf_dir = os.path.join(ecg_root, subject)

    edf_path = os.path.join(edf_dir, expected_edf)
    return edf_path if os.path.exists(edf_path) else None


In [None]:
#main code

results = []

for subject_folder in sorted(os.listdir(events_root)): #each patient
    subj_path = os.path.join(events_root, subject_folder)
    ses_path = os.path.join(subj_path, "ses-01", "eeg")

    if not os.path.exists(ses_path):
        continue

    for tsv_file in sorted(os.listdir(ses_path)):
        if not tsv_file.endswith(".tsv"):
            continue

        tsv_path = os.path.join(ses_path, tsv_file)
        events = pd.read_csv(tsv_path, sep="\t")

        #fix possible capitalization issue
        seizure_rows = events[events["eventType"]\
                              .str.strip()\
                              .str.contains("sz", case=False, na=False)]

        print(f"{tsv_file}: Found {len(seizure_rows)} seizures")  #debug

        if len(seizure_rows) == 0:
            continue

        edf_path = match_edf_from_tsv_path(tsv_path)
        if edf_path is None:
            print("Missing EDF for:", tsv_path) #debug
            continue
        else:
            print("Found EDF:", edf_path)  #debug

        #compute RMSSD per seizure
        for idx, row in seizure_rows.iterrows():
            onset = row["onset"]
            rmssd, w_start, w_end = get_preictal_rmssd(edf_path, onset)

            #save results: each seizure is one row
            results.append({
                "subject": subject_folder,
                "events_file": tsv_file,
                "edf_file": os.path.basename(edf_path),
                "seizure_onset_sec": onset,
                "preictal_start_sec": w_start,
                "preictal_end_sec": w_end,
                "rmssd": rmssd
            })

        print(f"Processed: {tsv_path}")

#save results
df_results = pd.DataFrame(results)
df_results.to_csv(output_csv, index=False)
print(f"CSV saved to {output_csv}, total seizures processed: {len(results)}")


sub-001_ses-01_task-szMonitoring_run-03_events.tsv: Found 1 seizures
Found EDF: /content/drive/MyDrive/SeizeIT2/ECG/sub-001/sub-001_ses-01_task-szMonitoring_run-03_ecg.edf
Peaks found in segment: 152
Processed: /content/drive/MyDrive/SeizeIT2/seizure_events/sub-001/ses-01/eeg/sub-001_ses-01_task-szMonitoring_run-03_events.tsv
sub-001_ses-01_task-szMonitoring_run-05_events.tsv: Found 1 seizures
Found EDF: /content/drive/MyDrive/SeizeIT2/ECG/sub-001/sub-001_ses-01_task-szMonitoring_run-05_ecg.edf
Peaks found in segment: 142
Processed: /content/drive/MyDrive/SeizeIT2/seizure_events/sub-001/ses-01/eeg/sub-001_ses-01_task-szMonitoring_run-05_events.tsv
sub-001_ses-01_task-szMonitoring_run-07_events.tsv: Found 1 seizures
Found EDF: /content/drive/MyDrive/SeizeIT2/ECG/sub-001/sub-001_ses-01_task-szMonitoring_run-07_ecg.edf
Peaks found in segment: 155
Processed: /content/drive/MyDrive/SeizeIT2/seizure_events/sub-001/ses-01/eeg/sub-001_ses-01_task-szMonitoring_run-07_events.tsv
sub-001_ses-01_t