In [1]:
import os
import numpy as np
import pandas as pd
from ieeg.auth import Session

from get_iEEG_data import *
from spike_detector import *
from spike_morphology import *
from iEEG_helper_functions import *

In [2]:
SPIKES_OUTPUT_DIR = "../../Data/spikes/devin_spikes/"

In [3]:
good_hup_ids_for_spike_detector = np.load("good_hup_ids_for_spike_detector.npy")
good_hup_ids_for_spike_detector

array([137, 138, 139, 140, 141, 142, 143, 145, 146, 148, 150, 151, 152,
       153, 154, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166,
       167, 168, 169, 170, 171, 172, 173, 174, 175, 177, 178, 179, 180,
       181, 182, 184, 185, 186, 187, 188, 189, 190, 191, 192, 196, 197,
       199, 201, 202, 204, 205, 206, 207, 209, 210, 211, 213, 214, 215,
       219, 221, 223, 224, 225])

In [4]:
good_hup_ids_for_spike_detector.shape

(70,)

In [5]:
# Load HUP_implant_dates.xlsx
nina_patients_df = pd.read_excel("../../Data/HUP_implant_dates.xlsx")
# Make the hup_id column integers
nina_patients_df["hup_id"] = nina_patients_df["hup_id"].astype(int)
nina_patients_df

Unnamed: 0,hup_id,IEEG_Portal_Number,Implant_Date,implant_time,Explant_Date,weight_kg
0,225,HUP225_phaseII,2021-10-18,07:15:00,2021-10-26 17:30:00,58.5
1,224,HUP224_phaseII,2021-10-13,07:15:00,2021-10-20 00:00:00,85.5
2,223,HUP223_phaseII,2021-09-29,07:15:00,2021-10-08 08:21:00,101.4
3,221,HUP221_phaseII,2021-08-16,07:15:00,2021-08-23 00:00:00,124.3
4,219,HUP219_phaseII,2021-07-12,07:15:00,2021-07-16 08:18:00,101.6
...,...,...,...,...,...,...
75,141,HUP141_phaseII,2017-05-24,07:15:00,2017-06-01 00:00:00,85.7
76,140,HUP140_phaseII_D01-D02,2017-05-10,07:15:00,2017-05-19 00:00:00,56.7
77,139,HUP139_phaseII,2017-04-26,07:15:00,2017-05-09 00:00:00,69.8
78,138,HUP138_phaseII,2017-04-12,07:15:00,2017-04-20 00:00:00,84.4


In [6]:
# Add a boolean column in nina_patients_df called is_single_dataset and make it True if IEEG_Portal_Number ends with "phaseII"
nina_patients_df["is_single_dataset"] = nina_patients_df[
    "IEEG_Portal_Number"
].str.endswith("phaseII")
# Add a boolean column in nina_patients_df called is_good_for_spike_detector and make it True if the row's hup_id is in good_hup_ids_for_spike_detector
nina_patients_df["is_good_for_spike_detector"] = nina_patients_df["hup_id"].isin(
    good_hup_ids_for_spike_detector
)
nina_patients_df

Unnamed: 0,hup_id,IEEG_Portal_Number,Implant_Date,implant_time,Explant_Date,weight_kg,is_single_dataset,is_good_for_spike_detector
0,225,HUP225_phaseII,2021-10-18,07:15:00,2021-10-26 17:30:00,58.5,True,True
1,224,HUP224_phaseII,2021-10-13,07:15:00,2021-10-20 00:00:00,85.5,True,True
2,223,HUP223_phaseII,2021-09-29,07:15:00,2021-10-08 08:21:00,101.4,True,True
3,221,HUP221_phaseII,2021-08-16,07:15:00,2021-08-23 00:00:00,124.3,True,True
4,219,HUP219_phaseII,2021-07-12,07:15:00,2021-07-16 08:18:00,101.6,True,True
...,...,...,...,...,...,...,...,...
75,141,HUP141_phaseII,2017-05-24,07:15:00,2017-06-01 00:00:00,85.7,True,True
76,140,HUP140_phaseII_D01-D02,2017-05-10,07:15:00,2017-05-19 00:00:00,56.7,False,True
77,139,HUP139_phaseII,2017-04-26,07:15:00,2017-05-09 00:00:00,69.8,True,True
78,138,HUP138_phaseII,2017-04-12,07:15:00,2017-04-20 00:00:00,84.4,True,True


In [7]:
# Drop the rows in nina_patients_df where is_single_dataset is False
nina_patients_df = nina_patients_df[nina_patients_df.is_single_dataset == True]
# Drop the rows in nina_patients_df where is_good_for_spike_detector is False
nina_patients_df = nina_patients_df[nina_patients_df.is_good_for_spike_detector == True]
# Sort by hup_id in ascending order
nina_patients_df = nina_patients_df.sort_values(by=["hup_id"], ascending=True)
# Drop columns Implant_Date, implant_time, Explant_Date, weight_kg
nina_patients_df = nina_patients_df.drop(
    columns=["Implant_Date", "implant_time", "Explant_Date", "weight_kg"]
)
# Reset index
nina_patients_df = nina_patients_df.reset_index(drop=True)
nina_patients_df

Unnamed: 0,hup_id,IEEG_Portal_Number,is_single_dataset,is_good_for_spike_detector
0,138,HUP138_phaseII,True,True
1,139,HUP139_phaseII,True,True
2,141,HUP141_phaseII,True,True
3,142,HUP142_phaseII,True,True
4,143,HUP143_phaseII,True,True
5,145,HUP145_phaseII,True,True
6,146,HUP146_phaseII,True,True
7,150,HUP150_phaseII,True,True
8,151,HUP151_phaseII,True,True
9,154,HUP154_phaseII,True,True


In [8]:
# Remove the row where hup_id == 143 and hup_id == 210
nina_patients_df = nina_patients_df[nina_patients_df["hup_id"] != 143]
nina_patients_df = nina_patients_df[nina_patients_df["hup_id"] != 210]
# Reset the index
nina_patients_df = nina_patients_df.reset_index(drop=True)
nina_patients_df

Unnamed: 0,hup_id,IEEG_Portal_Number,is_single_dataset,is_good_for_spike_detector
0,138,HUP138_phaseII,True,True
1,139,HUP139_phaseII,True,True
2,141,HUP141_phaseII,True,True
3,142,HUP142_phaseII,True,True
4,145,HUP145_phaseII,True,True
5,146,HUP146_phaseII,True,True
6,150,HUP150_phaseII,True,True
7,151,HUP151_phaseII,True,True
8,154,HUP154_phaseII,True,True
9,157,HUP157_phaseII,True,True


In [9]:
# Load rid_hup_table.csv from ./data/
rid_hup_table_df = pd.read_csv("../../Data/rid_hup_table.csv")
# Drop the t3_subject_id and ieegportalsubjno columns
rid_hup_table_df = rid_hup_table_df.drop(columns=["t3_subject_id", "ieegportalsubjno"])
# Rename hupsubjno to hup_id
rid_hup_table_df = rid_hup_table_df.rename(columns={"hupsubjno": "hup_id"})
# Sort by hup_id
rid_hup_table_df = rid_hup_table_df.sort_values(by=["hup_id"])
# Only keep rows where hup_id is in nina_patients_df's hup_id column
rid_hup_table_df = rid_hup_table_df[
    rid_hup_table_df["hup_id"].isin(nina_patients_df["hup_id"])
]
# Reset the index
rid_hup_table_df = rid_hup_table_df.reset_index(drop=True)
rid_hup_table_df.head()

Unnamed: 0,record_id,hup_id
0,278,138
1,294,141
2,295,142
3,167,145
4,301,146


In [10]:
# Load master_elecs.csv from ./data/
master_elecs_df = pd.read_csv("../../Data/master_elecs.csv")

# only take the numbers in rid column
master_elecs_df["rid"] = master_elecs_df["rid"].str.extract("(\d+)", expand=False)
master_elecs_df["rid"] = master_elecs_df["rid"].astype(int)

# Drop mni_x, mni_y, mni_z, mm_x, mm_y, mm_z columns
master_elecs_df = master_elecs_df.drop(
    columns=["mni_x", "mni_y", "mni_z", "mm_x", "mm_y", "mm_z"]
)
# Rename rid to record_id
master_elecs_df = master_elecs_df.rename(columns={"rid": "record_id"})
# Add a column called hup_id using the table rid_hup_table_df
master_elecs_df = master_elecs_df.merge(rid_hup_table_df, on="record_id", how="left")
# Drop the rows where hup_id is NaN
master_elecs_df = master_elecs_df.dropna(subset=["hup_id"])
# Make hup_id an integer
master_elecs_df["hup_id"] = master_elecs_df["hup_id"].astype(int)
# Sort by hup_id
master_elecs_df = master_elecs_df.sort_values(by=["hup_id"])
# Reset index
master_elecs_df = master_elecs_df.reset_index(drop=True)
master_elecs_df.head()

Unnamed: 0,record_id,name,vox_x,vox_y,vox_z,label,soz,resected,spike_rate,engel,hup_id
0,278,RA06,162.025,142.32,129.025,EmptyLabel,False,False,2.028986,2.0,138
1,278,RE01,147.501,167.997,157.781,right insula,False,False,3.594203,2.0,138
2,278,RC08,154.563,59.4944,151.858,right lateral occipital,False,False,0.057971,2.0,138
3,278,RC07,150.259,62.5073,148.975,right lateral occipital,False,False,,2.0,138
4,278,RC06,145.976,65.6267,144.613,EmptyLabel,False,False,,2.0,138


In [11]:
# Only keep rows in nina_patients_df whose hup_id is in master_elecs_df's hup_id column
nina_patients_df = nina_patients_df[
    nina_patients_df["hup_id"].isin(master_elecs_df["hup_id"])
]
# Reset index
nina_patients_df = nina_patients_df.reset_index(drop=True)
nina_patients_df

Unnamed: 0,hup_id,IEEG_Portal_Number,is_single_dataset,is_good_for_spike_detector
0,138,HUP138_phaseII,True,True
1,141,HUP141_phaseII,True,True
2,142,HUP142_phaseII,True,True
3,145,HUP145_phaseII,True,True
4,146,HUP146_phaseII,True,True
5,150,HUP150_phaseII,True,True
6,151,HUP151_phaseII,True,True
7,154,HUP154_phaseII,True,True
8,157,HUP157_phaseII,True,True
9,158,HUP158_phaseII,True,True


In [12]:
nina_patients_df[nina_patients_df["hup_id"] % 4 == 0].reset_index(drop=True)

Unnamed: 0,hup_id,IEEG_Portal_Number,is_single_dataset,is_good_for_spike_detector
0,160,HUP160_phaseII,True,True
1,172,HUP172_phaseII,True,True
2,180,HUP180_phaseII,True,True
3,184,HUP184_phaseII,True,True
4,188,HUP188_phaseII,True,True
5,192,HUP192_phaseII,True,True
6,196,HUP196_phaseII,True,True
7,204,HUP204_phaseII,True,True
8,224,HUP224_phaseII,True,True


In [13]:
nina_patients_df[nina_patients_df["hup_id"] % 4 == 1].reset_index(drop=True)

Unnamed: 0,hup_id,IEEG_Portal_Number,is_single_dataset,is_good_for_spike_detector
0,141,HUP141_phaseII,True,True
1,145,HUP145_phaseII,True,True
2,157,HUP157_phaseII,True,True
3,161,HUP161_phaseII,True,True
4,165,HUP165_phaseII,True,True
5,169,HUP169_phaseII,True,True
6,173,HUP173_phaseII,True,True
7,177,HUP177_phaseII,True,True
8,185,HUP185_phaseII,True,True
9,189,HUP189_phaseII,True,True


In [14]:
nina_patients_df[nina_patients_df["hup_id"] % 4 == 2].reset_index(drop=True)

Unnamed: 0,hup_id,IEEG_Portal_Number,is_single_dataset,is_good_for_spike_detector
0,138,HUP138_phaseII,True,True
1,142,HUP142_phaseII,True,True
2,146,HUP146_phaseII,True,True
3,150,HUP150_phaseII,True,True
4,154,HUP154_phaseII,True,True
5,158,HUP158_phaseII,True,True
6,166,HUP166_phaseII,True,True
7,170,HUP170_phaseII,True,True
8,174,HUP174_phaseII,True,True
9,178,HUP178_phaseII,True,True


In [15]:
nina_patients_df[nina_patients_df["hup_id"] % 4 == 3].reset_index(drop=True)

Unnamed: 0,hup_id,IEEG_Portal_Number,is_single_dataset,is_good_for_spike_detector
0,151,HUP151_phaseII,True,True
1,171,HUP171_phaseII,True,True
2,175,HUP175_phaseII,True,True
3,187,HUP187_phaseII,True,True
4,199,HUP199_phaseII,True,True
5,207,HUP207_phaseII,True,True
6,223,HUP223_phaseII,True,True


## Select a batch

In [None]:
batch = nina_patients_df[nina_patients_df["hup_id"] % 4 == 3].reset_index(drop=True)
batch

In [None]:
# def create_pwd_file(username, password, fname=None):
#     if fname is None:
#         fname = "{}_ieeglogin.bin".format(username[:3])
#     with open(fname, "wb") as f:
#         f.write(password.encode())
#     print("-- -- IEEG password file saved -- --")


# create_pwd_file("dma", "mycqEv-pevfo4-roqfan")
# print("Using Devin session")
# with open("dma_ieeglogin.bin", "r") as f:
#     session = Session("dma", f.read())
print("Using Carlos session")
with open("agu_ieeglogin.bin", "r") as f:
    session = Session("aguilac", f.read())

In [None]:
# Iterate through every row in batch
for index, row in batch.iterrows():
    hup_id = row["hup_id"]
    dataset_name = row["IEEG_Portal_Number"]

    dataset = session.open_dataset(dataset_name)

    all_channel_labels = np.array(dataset.get_channel_labels())
    channel_labels_to_download = all_channel_labels[
        electrode_selection(all_channel_labels)
    ]

    duration_usec = dataset.get_time_series_details(
        channel_labels_to_download[0]
    ).duration
    duration_hours = int(duration_usec / 1000000 / 60 / 60)
    enlarged_duration_hours = duration_hours + 24

    print(f"Opening {dataset_name} with duration {duration_hours} hours")

    # Calculate the total number of 2-minute intervals in the enlarged duration
    total_intervals = enlarged_duration_hours * 30  # 60min/hour / 2min = 30

    synchrony_broadband_vector_to_save = np.full(total_intervals, np.nan)
    synchrony_60_100_vector_to_save = np.full(total_intervals, np.nan)
    synchrony_100_125_vector_to_save = np.full(total_intervals, np.nan)

    # Loop through each 2-minute interval
    for interval in range(total_intervals):
        print(f"Getting iEEG data for interval {interval} out of {total_intervals}")
        duration_usec = 1.2e8  # 2 minutes
        start_time_usec = interval * 2 * 60 * 1e6  # 2 minutes in microseconds
        stop_time_usec = start_time_usec + duration_usec

        try:
            ieeg_data, fs = get_iEEG_data(
                "dma",
                "dma_ieeglogin.bin",
                dataset_name,
                start_time_usec,
                stop_time_usec,
                channel_labels_to_download,
            )
            fs = int(fs)
        except Exception as e:
            # handle the exception
            print(f"Error: {e}")
            break

        # Drop rows that has any nan
        ieeg_data = ieeg_data.dropna(axis=0, how="any")
        if ieeg_data.empty:
            print("Empty dataframe after dropping nan, skip...")
            continue

        good_channels_res = detect_bad_channels_optimized(ieeg_data.to_numpy(), fs)
        good_channel_indicies = good_channels_res[0]
        good_channel_labels = channel_labels_to_download[good_channel_indicies]
        ieeg_data = ieeg_data[good_channel_labels].to_numpy()

        # Check if ieeg_data is empty after dropping bad channels
        if ieeg_data.size == 0:
            print("Empty dataframe after dropping bad channels, skip...")
            continue

        ieeg_data = common_average_montage(ieeg_data)

        # Apply the filters directly on the DataFrame
        ieeg_data = notch_filter(ieeg_data, 59, 61, fs)

        ##############################
        # Calculate synchrony (broadband)
        ##############################
        _, R = calculate_synchrony(ieeg_data.T)
        synchrony_broadband_vector_to_save[interval] = R

        ##############################
        # Calculate synchrony (60-100Hz)
        ##############################
        ieeg_data_60_100 = bandpass_filter(ieeg_data, 60, 100, fs)
        _, R = calculate_synchrony(ieeg_data_60_100.T)
        synchrony_60_100_vector_to_save[interval] = R

        ##############################
        # Calculate synchrony (100-125Hz)
        ##############################
        try:
            ieeg_data_100_125 = bandpass_filter(ieeg_data, 100, 125, fs)
            _, R = calculate_synchrony(ieeg_data_100_125.T)
            synchrony_100_125_vector_to_save[interval] = R
        except Exception as e:
            print(f"Error: {e}")

        print(f"Finished calculating synchrony for interval {interval}")

        ##############################
        # Detect spikes
        ##############################
        ieeg_data_for_spikes = bandpass_filter(ieeg_data, 1, 70, fs)

        spike_output = spike_detector(
            data=ieeg_data_for_spikes,
            fs=fs,
            labels=good_channel_labels,
        )
        if len(spike_output) == 0:
            print("No spikes detected, skip saving...")
            continue
        else:
            print(f"Detected {len(spike_output)} spikes")

        ##############################
        # Extract spike morphologies
        ##############################
        # Preallocate the result array
        spike_output_to_save = np.empty((spike_output.shape[0], 15), dtype=np.float64)
        spike_output_to_save[:, :] = np.NaN  # Fill with NaNs

        for i, spike in enumerate(spike_output):
            peak_index, channel_index = spike
            spike_signal = ieeg_data_for_spikes[
                peak_index - 1000 : peak_index + 1000, channel_index
            ]

            # Fill the first two columns with peak_index and channel_index
            spike_output_to_save[i, 0] = peak_index
            spike_output_to_save[i, 1] = channel_index

            try:
                (
                    basic_features,
                    advanced_features,
                    is_valid,
                    bad_reason,
                ) = extract_spike_morphology(spike_signal)

                if is_valid:
                    # Fill the rest of the columns with computed features
                    spike_output_to_save[i, 2:7] = basic_features
                    spike_output_to_save[i, 7:15] = advanced_features
            except Exception as e:
                print(f"Error extracting spike features: {e}")
                continue

        ##############################
        # Save the spike output
        ##############################
        np.save(
            os.path.join(SPIKES_OUTPUT_DIR, f"{dataset_name}_{interval}.npy"),
            spike_output_to_save,
        )
        print(f"Saved spike output for interval {interval} for HUP {hup_id}")