In [1]:
import os
import numpy as np
import pandas as pd
from ieeg.auth import Session

from get_iEEG_data import *
from iEEG_helper_functions import *

In [2]:
SPIKES_OUTPUT_DIR = "../../Data/spikes/devin_spikes/"
SYNCHRONY_broadband_DIRECTORY = "../../Data/synchrony/all/broadband"

In [3]:
# Load HUP_implant_dates.xlsx
nina_patients_df = pd.read_excel("../../Data/HUP_implant_dates.xlsx")
# Make the hup_id column integers
nina_patients_df["hup_id"] = nina_patients_df["hup_id"].astype(int)
nina_patients_df

Unnamed: 0,hup_id,IEEG_Portal_Number,Implant_Date,implant_time,Explant_Date,weight_kg
0,225,HUP225_phaseII,2021-10-18,07:15:00,2021-10-26 17:30:00,58.5
1,224,HUP224_phaseII,2021-10-13,07:15:00,2021-10-20 00:00:00,85.5
2,223,HUP223_phaseII,2021-09-29,07:15:00,2021-10-08 08:21:00,101.4
3,221,HUP221_phaseII,2021-08-16,07:15:00,2021-08-23 00:00:00,124.3
4,219,HUP219_phaseII,2021-07-12,07:15:00,2021-07-16 08:18:00,101.6
...,...,...,...,...,...,...
75,141,HUP141_phaseII,2017-05-24,07:15:00,2017-06-01 00:00:00,85.7
76,140,HUP140_phaseII_D01-D02,2017-05-10,07:15:00,2017-05-19 00:00:00,56.7
77,139,HUP139_phaseII,2017-04-26,07:15:00,2017-05-09 00:00:00,69.8
78,138,HUP138_phaseII,2017-04-12,07:15:00,2017-04-20 00:00:00,84.4


## Already completed

In [4]:
def get_patient_hup_ids(directory):
    # List all files in the directory
    files = os.listdir(directory)

    # Filter out files based on the given pattern and extract patient_hup_id as integers
    patient_hup_ids = [
        int(f.split("_")[1].split(".")[0])
        for f in files
        if f.startswith("HUP_") and f.endswith(".npy")
    ]

    return patient_hup_ids


directory = "../../Data/synchrony/all/broadband/"
completed_hup_ids = get_patient_hup_ids(directory)
completed_hup_ids.sort()
print(completed_hup_ids)

[138, 139, 141, 142, 143, 144, 145, 146, 150, 151, 154, 155, 157, 158, 160, 161, 163, 164, 165, 166, 169, 170, 171, 172, 173, 174, 175, 177, 178, 180, 182, 184, 185, 186, 187, 188, 189, 190, 191, 192, 196, 199, 202, 204, 205, 206, 207, 210, 217, 219, 221, 223, 225]


In [5]:
# Only keep the rows in nina_patients_df that have hup_ids not in completed_hup_ids
nina_patients_df = nina_patients_df[~nina_patients_df["hup_id"].isin(completed_hup_ids)]
# Reset the index
nina_patients_df = nina_patients_df.reset_index(drop=True)
nina_patients_df

Unnamed: 0,hup_id,IEEG_Portal_Number,Implant_Date,implant_time,Explant_Date,weight_kg
0,224,HUP224_phaseII,2021-10-13,07:15:00,2021-10-20 00:00:00,85.5
1,216,HUP216_phaseII_D01-D02,2021-04-05,07:15:00,2021-04-20 00:00:00,77.1
2,215,HUP215_phaseII_D01-04,2021-01-25,07:15:00,2021-02-05 00:00:00,90.3
3,214,HUP214_phaseII_D01-D02,2021-01-13,07:15:00,2021-01-27 00:00:00,59.2
4,213,HUP213_phaseII_D01-02,2021-01-04,07:15:00,2021-01-29 00:00:00,87.5
5,211,HUP211_phaseII,2020-12-07,07:15:00,2020-12-16 00:00:00,69.9
6,209,HUP209_phaseII_D01-D02,2020-11-09,07:15:00,2020-11-25 17:44:00,70.3
7,208,HUP208_phaseII_D01-D02,2020-10-26,07:15:00,2020-11-07 00:00:00,85.4
8,201,HUP201_phaseII_D01-D02,2020-02-26,07:15:00,2020-03-11 00:00:00,97.1
9,197,HUP197_phaseII_D01-02,2019-10-28,07:15:00,2019-11-15 00:00:00,88.8


In [6]:
# Add a boolean column in nina_patients_df called is_single_dataset and make it True if IEEG_Portal_Number ends with "phaseII"
nina_patients_df["is_single_dataset"] = nina_patients_df[
    "IEEG_Portal_Number"
].str.endswith("phaseII")
nina_patients_df

Unnamed: 0,hup_id,IEEG_Portal_Number,Implant_Date,implant_time,Explant_Date,weight_kg,is_single_dataset
0,224,HUP224_phaseII,2021-10-13,07:15:00,2021-10-20 00:00:00,85.5,True
1,216,HUP216_phaseII_D01-D02,2021-04-05,07:15:00,2021-04-20 00:00:00,77.1,False
2,215,HUP215_phaseII_D01-04,2021-01-25,07:15:00,2021-02-05 00:00:00,90.3,False
3,214,HUP214_phaseII_D01-D02,2021-01-13,07:15:00,2021-01-27 00:00:00,59.2,False
4,213,HUP213_phaseII_D01-02,2021-01-04,07:15:00,2021-01-29 00:00:00,87.5,False
5,211,HUP211_phaseII,2020-12-07,07:15:00,2020-12-16 00:00:00,69.9,True
6,209,HUP209_phaseII_D01-D02,2020-11-09,07:15:00,2020-11-25 17:44:00,70.3,False
7,208,HUP208_phaseII_D01-D02,2020-10-26,07:15:00,2020-11-07 00:00:00,85.4,False
8,201,HUP201_phaseII_D01-D02,2020-02-26,07:15:00,2020-03-11 00:00:00,97.1,False
9,197,HUP197_phaseII_D01-02,2019-10-28,07:15:00,2019-11-15 00:00:00,88.8,False


In [7]:
# Drop the rows in nina_patients_df where is_single_dataset is False
nina_patients_df = nina_patients_df[nina_patients_df.is_single_dataset == True]
# Sort by hup_id in ascending order
nina_patients_df = nina_patients_df.sort_values(by=["hup_id"], ascending=True)
# Drop columns Implant_Date, implant_time, Explant_Date, weight_kg
nina_patients_df = nina_patients_df.drop(
    columns=["Implant_Date", "implant_time", "Explant_Date", "weight_kg"]
)
# Reset index
nina_patients_df = nina_patients_df.reset_index(drop=True)
nina_patients_df

Unnamed: 0,hup_id,IEEG_Portal_Number,is_single_dataset
0,162,HUP162_phaseII,True
1,211,HUP211_phaseII,True
2,224,HUP224_phaseII,True


In [8]:
nina_patients_df[nina_patients_df["hup_id"] % 6 == 0].reset_index(drop=True)

Unnamed: 0,hup_id,IEEG_Portal_Number,is_single_dataset
0,162,HUP162_phaseII,True


In [9]:
nina_patients_df[nina_patients_df["hup_id"] % 6 == 1].reset_index(drop=True)

Unnamed: 0,hup_id,IEEG_Portal_Number,is_single_dataset
0,211,HUP211_phaseII,True


In [10]:
nina_patients_df[nina_patients_df["hup_id"] % 6 == 2].reset_index(drop=True)

Unnamed: 0,hup_id,IEEG_Portal_Number,is_single_dataset
0,224,HUP224_phaseII,True


In [None]:
nina_patients_df[nina_patients_df["hup_id"] % 6 == 3].reset_index(drop=True)

In [None]:
nina_patients_df[nina_patients_df["hup_id"] % 6 == 4].reset_index(drop=True)

In [None]:
nina_patients_df[nina_patients_df["hup_id"] % 6 == 5].reset_index(drop=True)

## Select a batch

In [None]:
batch = nina_patients_df[nina_patients_df["hup_id"] % 6 == 2].reset_index(drop=True)
batch

In [None]:
def create_pwd_file(username, password, fname=None):
    if fname is None:
        fname = "{}_ieeglogin.bin".format(username[:3])
    with open(fname, "wb") as f:
        f.write(password.encode())
    print("-- -- IEEG password file saved -- --")


create_pwd_file("dma", "mycqEv-pevfo4-roqfan")
print("Using Devin session")
with open("dma_ieeglogin.bin", "r") as f:
    session = Session("dma", f.read())
# print("Using Carlos session")
# with open("agu_ieeglogin.bin", "r") as f:
#     session = Session("aguilac", f.read())

In [None]:
# Iterate through every row in batch
for index, row in batch.iterrows():
    hup_id = row["hup_id"]
    dataset_name = row["IEEG_Portal_Number"]
    print(dataset_name)

    dataset = session.open_dataset(dataset_name)

    all_channel_labels = np.array(dataset.get_channel_labels())
    channel_labels_to_download = all_channel_labels[
        electrode_selection(all_channel_labels)
    ]

    duration_usec = dataset.get_time_series_details(
        channel_labels_to_download[0]
    ).duration
    duration_hours = int(duration_usec / 1000000 / 60 / 60)
    enlarged_duration_hours = duration_hours + 24

    print(f"Opening {dataset_name} with duration {duration_hours} hours")

    # Calculate the total number of 2-minute intervals in the enlarged duration
    total_intervals = enlarged_duration_hours * 30  # 60min/hour / 2min = 30

    synchrony_broadband_vector_to_save = np.full(total_intervals, np.nan)

    # Loop through each 2-minute interval
    for interval in range(total_intervals):
        print(f"Getting iEEG data for interval {interval} out of {total_intervals}")
        duration_usec = 1.2e8  # 2 minutes
        start_time_usec = interval * 2 * 60 * 1e6  # 2 minutes in microseconds
        stop_time_usec = start_time_usec + duration_usec

        print(start_time_usec, stop_time_usec)
        print(channel_labels_to_download)

        try:
            ieeg_data, fs = get_iEEG_data(
                "dma",
                "dma_ieeglogin.bin",
                dataset_name,
                start_time_usec,
                stop_time_usec,
                all_channel_labels,
            )
            fs = int(fs)
        except Exception as e:
            # handle the exception
            print(f"Error: {e}")
            break

    #     # Drop rows that has any nan
    #     ieeg_data = ieeg_data.dropna(axis=0, how="any")
    #     if ieeg_data.empty:
    #         print("Empty dataframe after dropping nan, skip...")
    #         continue

    #     good_channels_res = detect_bad_channels_optimized(ieeg_data.to_numpy(), fs)
    #     good_channel_indicies = good_channels_res[0]
    #     good_channel_labels = channel_labels_to_download[good_channel_indicies]
    #     ieeg_data = ieeg_data[good_channel_labels].to_numpy()

    #     # Check if ieeg_data is empty after dropping bad channels
    #     if ieeg_data.size == 0:
    #         print("Empty dataframe after dropping bad channels, skip...")
    #         continue

    #     ieeg_data = common_average_montage(ieeg_data)

    #     # Apply the filters directly on the DataFrame
    #     ieeg_data = notch_filter(ieeg_data, 59, 61, fs)

    #     ##############################
    #     # Calculate synchrony (broadband)
    #     ##############################
    #     _, R = calculate_synchrony(ieeg_data.T)
    #     synchrony_broadband_vector_to_save[interval] = R

    #     print(f"Finished calculating synchrony for interval {interval}")

    # ##############################
    # # Save the synchrony output
    # ##############################
    # # np.save(
    # #     os.path.join(SYNCHRONY_broadband_DIRECTORY, f"HUP_{hup_id}.npy"),
    # #     synchrony_broadband_vector_to_save,
    # # )
    # # print(f"Saved synchrony output for HUP {hup_id}")

In [None]:
all_channel_labels

In [None]:
# !jupyter nbconvert --to python download_calculate_all_iter_2.ipynb