# Download iEEG Around Seizure Times

In [None]:
import scipy.io
import numpy as np
import pandas as pd
from ieeg.auth import Session
from ieeg_utils import *
import os

SEIZURE_TIMES_DIRECTORY = "../../../Data/seizures/source_xlsx/times/"
SEIZURE_DATASETS_DIRECTORY = "../../../Data/seizures/source_xlsx/metadata/"
SEIZURE_TABLES_DIRECTORY = "../../../Data/seizures/tables/"
SEIZURE_CLIPS_DIRECTORY = "../../../Data/seizures/ieeg_clips/"

## Build seizure time tables and save to csv

In [None]:
xlsx_patient_hup_ids = []
# Iterate through all files in TABLES_DIRECTORY
for filename in os.listdir(SEIZURE_TIMES_DIRECTORY):
    if filename.endswith(".npy") and not filename.startswith("."):
        # Get the patient_hup_id from the filename which is after _ and before .
        patient_hup_id = filename.split("_")[1].split(".")[0]
        xlsx_patient_hup_ids.append(patient_hup_id)

xlsx_patient_hup_ids = sorted(xlsx_patient_hup_ids)
len(xlsx_patient_hup_ids)

In [None]:
def create_pwd_file(username, password, fname=None):
    if fname is None:
        fname = "{}_ieeglogin.bin".format(username[:3])
    with open(fname, "wb") as f:
        f.write(password.encode())
    print("-- -- IEEG password file saved -- --")


create_pwd_file("dma", "mycqEv-pevfo4-roqfan")

with open("dma_ieeglogin.bin", "r") as f:
    session = Session("dma", f.read())

In [None]:
num_seizures = []
num_seizures_in_consideration = 0
for patient_hup_id in xlsx_patient_hup_ids:
    # Initialize a new dataframe with columns "start_sec", "end_sec", "dataset_name"
    seizures_df = pd.DataFrame(columns=["start_sec", "end_sec", "dataset_name"])

    seizure_times = np.load(
        os.path.join(SEIZURE_TIMES_DIRECTORY, f"HUP_{patient_hup_id}.npy")
    )
    seizure_datasets = np.load(
        os.path.join(SEIZURE_DATASETS_DIRECTORY, f"HUP_{patient_hup_id}.npy")
    )
    assert len(seizure_times) == len(seizure_datasets)

    for i in range(len(seizure_times)):
        # Concatenate a new row to the dataframe
        seizures_df = pd.concat(
            [
                seizures_df,
                pd.DataFrame(
                    {
                        "start_sec": [seizure_times[i][0]],
                        "end_sec": [seizure_times[i][1]],
                        "dataset_name": [seizure_datasets[i]],
                    }
                ),
            ]
        )
    # reset the index of the dataframe
    seizures_df = seizures_df.reset_index(drop=True)

    # Change any occurance of "one file" in the dataset_name column to HUP{patient_hup_id}_phaseII
    seizures_df.loc[
        seizures_df["dataset_name"] == "one file", "dataset_name"
    ] = f"HUP{patient_hup_id}_phaseII"

    # Change any occurance of "D01" in the dataset_name column to HUP{patient_hup_id}_phaseII_D01
    seizures_df.loc[
        seizures_df["dataset_name"] == "D01", "dataset_name"
    ] = f"HUP{patient_hup_id}_phaseII_D01"

    # Change any occurance of "D02" in the dataset_name column to HUP{patient_hup_id}_phaseII_D02
    seizures_df.loc[
        seizures_df["dataset_name"] == "D02", "dataset_name"
    ] = f"HUP{patient_hup_id}_phaseII_D02"

    # Change any occurance of "D03" in the dataset_name column to HUP{patient_hup_id}_phaseII_D03
    seizures_df.loc[
        seizures_df["dataset_name"] == "D03", "dataset_name"
    ] = f"HUP{patient_hup_id}_phaseII_D03"

    # Change any occurance of "D04" in the dataset_name column to HUP{patient_hup_id}_phaseII_D04
    seizures_df.loc[
        seizures_df["dataset_name"] == "D04", "dataset_name"
    ] = f"HUP{patient_hup_id}_phaseII_D04"

    # Sort by 'dataset_name' and 'start_sec' to ensure that seizures are in chronological order within each dataset
    seizures_df = seizures_df.sort_values(["dataset_name", "start_sec"])

    # Reset the index of the dataframe
    seizures_df = seizures_df.reset_index(drop=True)

    # patient_num_seizures = seizures_df.shape[0]
    # if patient_num_seizures <= 20:
    #     num_seizures_in_consideration += 1
    # num_seizures.append(patient_num_seizures)
    # print(seizures_df.shape)

    # # Save to csv
    # seizures_df.to_csv(
    #     os.path.join(SEIZURE_TABLES_DIRECTORY, f"HUP_{patient_hup_id}.csv"),
    #     index=False,
    # )

    # # Iterate through the unique dataset_names in the dataframe
    # for dataset_name in seizures_df["dataset_name"].unique():
    #     print(f"Opening dataset {dataset_name}...")
    #     try:
    #         session.open_dataset(dataset_name)
    #     except Exception as e:
    #         print(e)
    #         continue

In [None]:
# Find the unique elements in num_seizures
np.unique(num_seizures)

## Download iEEG surrounding seizures

In [68]:
ONE_MINUTE_USEC = 6e7
SEVEN_HOURS_USEC = 7 * 60 * ONE_MINUTE_USEC
SEVEN_HOURS_IN_SECONDS = 7 * 60 * 60

In [None]:
num_seizures_in_consideration = 0
total_num_seizures = 0

for patient_hup_id in xlsx_patient_hup_ids:
    seizures_df = pd.read_csv(
        os.path.join(SEIZURE_TABLES_DIRECTORY, f"HUP_{patient_hup_id}.csv")
    )
    # Drop any rows that contain NaN values
    seizures_df = seizures_df.dropna()

    # Iterate through all the rows in the dataframe using the iterrows() function
    for seizure_index, seizure_row in seizures_df.iterrows():
        total_num_seizures += 1
        start_sec, end_sec, dataset_name = (
            seizure_row["start_sec"],
            seizure_row["end_sec"],
            seizure_row["dataset_name"],
        )

        # Check if this is not the first seizure in this dataset and the gap from the previous seizure is less than 7 hours
        if (
            seizure_index != 0
            and dataset_name == seizures_df.iloc[seizure_index - 1]["dataset_name"]
            and start_sec - seizures_df.iloc[seizure_index - 1]["end_sec"]
            < SEVEN_HOURS_IN_SECONDS
        ):
            print(
                f"Skipping seizure {seizure_index} for {dataset_name} because it's too close to the previous seizure"
            )
            continue

        # Check if this is not the last seizure in this dataset and the gap to the next seizure is less than 7 hours
        if (
            seizure_index != len(seizures_df) - 1
            and dataset_name == seizures_df.iloc[seizure_index + 1]["dataset_name"]
            and seizures_df.iloc[seizure_index + 1]["start_sec"] - end_sec
            < SEVEN_HOURS_IN_SECONDS
        ):
            print(
                f"Skipping seizure {seizure_index} for {dataset_name} because it's too close to the next seizure"
            )
            continue

        start_usec, end_usec = int(start_sec * 1e6), int(end_sec * 1e6)
        midpoint_usec = start_usec + int((end_usec - start_usec) / 2)
        seizure_window_start_usec = midpoint_usec - ONE_MINUTE_USEC * 5
        seizure_window_end_usec = midpoint_usec + ONE_MINUTE_USEC * 5
        entire_window_start_usec = seizure_window_start_usec - SEVEN_HOURS_USEC
        entire_window_end_usec = seizure_window_end_usec + SEVEN_HOURS_USEC

        try:
            dataset = session.open_dataset(dataset_name)
            channel_labels = dataset.get_channel_labels()
            duration_usec = dataset.get_time_series_details(channel_labels[5]).duration
        except Exception as error:
            print(f"Error while opening dataset {dataset_name}: {error}")
            continue

        # Check if the seizure window is within the dataset
        if entire_window_start_usec < 0 or entire_window_end_usec > duration_usec:
            print(
                f"Skipping seizure {seizure_index} for {dataset_name} because it's out of dataset bounds"
            )
            continue

        num_seizures_in_consideration += 1
        print(f"Downloading {dataset_name} for seizure {seizure_index}...")

        assert (
            entire_window_end_usec - entire_window_start_usec
            == 2 * SEVEN_HOURS_USEC + 10 * ONE_MINUTE_USEC
        )

        num_10_min_windows_in_7_hours = 42

        for ten_minute_window_idx in range(num_10_min_windows_in_7_hours * 2 + 1):
            print(f"Downloading 2 minute window #{ten_minute_window_idx}")
            ten_minute_window_start_usec = (
                entire_window_start_usec + ten_minute_window_idx * 10 * ONE_MINUTE_USEC
            )
            ten_minute_window_end_usec = (
                ten_minute_window_start_usec + 10 * ONE_MINUTE_USEC
            )

            # find the midpoint of the ten minute window
            ten_minute_window_midpoint_usec = ten_minute_window_start_usec + int(
                (ten_minute_window_end_usec - ten_minute_window_start_usec) / 2
            )

            if ten_minute_window_idx == num_10_min_windows_in_7_hours:
                assert ten_minute_window_midpoint_usec == midpoint_usec

            if ten_minute_window_idx == num_10_min_windows_in_7_hours * 2:
                assert ten_minute_window_end_usec == entire_window_end_usec

            two_minute_downsampled_window_start_usec = (
                ten_minute_window_midpoint_usec - ONE_MINUTE_USEC
            )
            two_minute_downsampled_window_end_usec = (
                ten_minute_window_midpoint_usec + ONE_MINUTE_USEC
            )

            assert (
                two_minute_downsampled_window_end_usec
                - two_minute_downsampled_window_start_usec
                == 2 * ONE_MINUTE_USEC
            )

            ##############################
            # Download the 2 minute window
            ##############################

            try:
                local_res, sample_rate = get_iEEG_data(
                    "dma",
                    "dma_ieeglogin.bin",
                    dataset_name,
                    two_minute_downsampled_window_start_usec,
                    two_minute_downsampled_window_end_usec,
                )
            except Exception as error:
                print(f"Error while downloading from iEEG: {error}")
                continue

            assert (
                len(local_res)
                == sample_rate
                * (
                    two_minute_downsampled_window_end_usec
                    - two_minute_downsampled_window_start_usec
                )
                / 1e6
            )

            # Check if local_res is empty
            if not local_res.empty:
                # Try Save local_res to a pickle file
                with open(
                    os.path.join(
                        SEIZURE_CLIPS_DIRECTORY,
                        f"HUP_{patient_hup_id}_seizure_{seizure_index}_wd_{ten_minute_window_idx}_fs_{int(sample_rate)}.pkl",
                    ),
                    "wb",
                ) as f:
                    pickle.dump(local_res, f)

        assert ten_minute_window_end_usec == entire_window_end_usec