# Download iEEG

In [2]:
from ieeg.auth import Session
from ieeg_utils import *
import pandas as pd
import numpy as np

## Load data

In [3]:
patients_df = pd.read_excel("../../../Data/selected_patients_all.xlsx")
patients_df

Unnamed: 0,hup_id,region,lateralization,weight_kg,record_id,r_id
0,137,multifocal,bilateral,141.3,280,280
1,138,mesial temporal,left,84.4,278,278
2,140,mesial temporal,left,56.7,320,320
3,141,mesial temporal,right,85.7,294,294
4,142,mesial temporal,left,65.3,295,295
...,...,...,...,...,...,...
73,219,mesial temporal,right,101.6,695,695
74,221,mesial temporal,right,124.3,621,621
75,223,mesial temporal,left,101.4,785,785
76,224,mesial temporal,bilateral,85.5,617,617


In [5]:
# Load master_elecs.csv from ./data/
master_elecs_df = pd.read_csv("../../../Data/master_elecs.csv")

# only take the numbers in rid column
master_elecs_df["rid"] = master_elecs_df["rid"].str.extract("(\d+)", expand=False)
master_elecs_df["rid"] = master_elecs_df["rid"].astype(int)

# Drop mni_x, mni_y, mni_z, mm_x, mm_y, mm_z columns
master_elecs_df = master_elecs_df.drop(
    columns=["mni_x", "mni_y", "mni_z", "mm_x", "mm_y", "mm_z"]
)

master_elecs_df

Unnamed: 0,rid,name,vox_x,vox_y,vox_z,label,soz,resected,spike_rate,engel
0,13,LST01,80.6116,106.5480,64.5941,left inferior temporal,False,False,1.091902,1.0
1,13,LST02,72.0779,109.4150,63.1223,left inferior temporal,False,False,1.091902,1.0
2,13,LST03,64.9060,112.3760,68.7455,EmptyLabel,False,False,1.419472,1.0
3,13,LST04,65.0210,114.6600,78.2339,left middle temporal,False,False,0.655141,1.0
4,13,MST01,131.7410,64.3756,70.4205,right lingual,True,False,3.439490,1.0
...,...,...,...,...,...,...,...,...,...,...
14212,785,RB08,154.2550,114.2730,136.7560,EmptyLabel,False,,0.369914,1.0
14213,785,RB09,159.1350,111.9920,136.6960,EmptyLabel,False,,0.665845,1.0
14214,785,RB10,164.7520,109.9030,137.7640,right middle temporal,False,,4.586930,1.0
14215,785,RB11,169.6320,107.6220,137.7040,right middle temporal,False,,2.071517,1.0


In [4]:
# Load rid_hup_table.csv from ./data/
rid_hup_table_df = pd.read_csv("../../Data/rid_hup_table.csv")
# Drop the t3_subject_id and ieegportalsubjno columns
rid_hup_table_df = rid_hup_table_df.drop(columns=["t3_subject_id", "ieegportalsubjno"])
rid_hup_table_df

Unnamed: 0,record_id,hupsubjno
0,623,35
1,624,36
2,625,37
3,626,38
4,627,39
...,...,...
212,534,250
213,923,251
214,918,252
215,864,253


In [5]:
patient_hup_ids = patients_df.hup_id.to_numpy()
patient_weights = patients_df.weight_kg.to_numpy()
assert len(patient_hup_ids) == len(patient_weights)
len(patient_hup_ids)

8

In [6]:
all_patient_hup_ids = pd.read_excel("../../Data/HUP_implant_dates.xlsx")
all_patient_hup_ids = all_patient_hup_ids["ptID"].to_numpy()
all_patient_hup_ids

array([225, 224, 223, 221, 219, 217, 216, 215, 214, 213, 211, 210, 209,
       208, 207, 206, 205, 204, 202, 201, 199, 197, 196, 195, 194, 193,
       192, 191, 190, 189, 188, 187, 186, 185, 184, 182, 181, 180, 179,
       178, 177, 175, 174, 173, 172, 171, 170, 169, 168, 167, 166, 165,
       164, 163, 162, 161, 160, 159, 158, 157, 156, 155, 154, 153, 152,
       151, 150, 149, 148, 147, 146, 145, 144, 143, 142, 141, 140, 139,
       138, 137])

In [7]:
# Create a mapping between patient ids and the index of the patient in the patients_df dataframe
patient_hup_id_to_index = {}
for i, patient_id in enumerate(all_patient_hup_ids):
    patient_hup_id_to_index[patient_id] = i
patient_hup_id_to_index

{225: 0,
 224: 1,
 223: 2,
 221: 3,
 219: 4,
 217: 5,
 216: 6,
 215: 7,
 214: 8,
 213: 9,
 211: 10,
 210: 11,
 209: 12,
 208: 13,
 207: 14,
 206: 15,
 205: 16,
 204: 17,
 202: 18,
 201: 19,
 199: 20,
 197: 21,
 196: 22,
 195: 23,
 194: 24,
 193: 25,
 192: 26,
 191: 27,
 190: 28,
 189: 29,
 188: 30,
 187: 31,
 186: 32,
 185: 33,
 184: 34,
 182: 35,
 181: 36,
 180: 37,
 179: 38,
 178: 39,
 177: 40,
 175: 41,
 174: 42,
 173: 43,
 172: 44,
 171: 45,
 170: 46,
 169: 47,
 168: 48,
 167: 49,
 166: 50,
 165: 51,
 164: 52,
 163: 53,
 162: 54,
 161: 55,
 160: 56,
 159: 57,
 158: 58,
 157: 59,
 156: 60,
 155: 61,
 154: 62,
 153: 63,
 152: 64,
 151: 65,
 150: 66,
 149: 67,
 148: 68,
 147: 69,
 146: 70,
 145: 71,
 144: 72,
 143: 73,
 142: 74,
 141: 75,
 140: 76,
 139: 77,
 138: 78,
 137: 79}

In [8]:
ieeg_offset_row1_df = pd.read_excel("../../Data/ieeg_offset/row_1.xlsx", header=None)
ieeg_offset_row2_df = pd.read_excel("../../Data/ieeg_offset/row_2.xlsx", header=None)
ieeg_offset_row3_df = pd.read_excel("../../Data/ieeg_offset/row_3.xlsx", header=None)

In [9]:
# Load ./data/ieeg_starts.xlsx into a dataframe
ieeg_starts_df = pd.read_excel("../../Data/ieeg_starts.xlsx")
ieeg_starts_df

Unnamed: 0,hup_id,ieeg_start
0,225,38.024167
1,224,37.582778
2,223,32.985556
3,221,37.690000
4,219,33.396667
...,...,...
75,141,37.249444
76,140,35.197500
77,139,37.151667
78,138,37.529444


## Download

In [10]:
def create_pwd_file(username, password, fname=None):
    if fname is None:
        fname = "{}_ieeglogin.bin".format(username[:3])
    with open(fname, "wb") as f:
        f.write(password.encode())
    print("-- -- IEEG password file saved -- --")


create_pwd_file("dma", "mycqEv-pevfo4-roqfan")

with open("dma_ieeglogin.bin", "r") as f:
    session = Session("dma", f.read())

-- -- IEEG password file saved -- --


In [12]:
for i, row in patients_df.iterrows():
    # Get patient id and weight
    patient_hup_id, patient_weight = row.hup_id, row.weight_kg
    patient_idx = patient_hup_id_to_index[patient_hup_id]

    #########################################
    # Get ieeg dataset name
    #########################################
    ieeg_dataset_names = []
    # Check if ieeg_offset_row1_df[patient_idx] is all NaNs
    if ieeg_offset_row1_df[patient_idx].isnull().values.all():
        ieeg_dataset_names = [f"HUP{patient_hup_id}_phaseII"]
    else:
        ieeg_dataset_names = ieeg_offset_row1_df[patient_idx].dropna().to_list()

    print(f"HUP{patient_hup_id} has {str(ieeg_dataset_names)} datasets")

    #########################################
    # Check tables
    #########################################

    if rid_hup_table_df.loc[rid_hup_table_df["hupsubjno"] == patient_hup_id].empty:
        print(f"HUP{patient_hup_id} has no info on rid_hup_table sheet")
        continue

    patient_rid = int(
        rid_hup_table_df.loc[rid_hup_table_df["hupsubjno"] == patient_hup_id].record_id
    )

    print(f"HUP{patient_hup_id} has rid {patient_rid}")

    patient_electrodes_df = master_elecs_df.loc[master_elecs_df["rid"] == patient_rid]

    if patient_electrodes_df.empty:
        print(f"HUP{patient_hup_id} has no info on master electrode sheet")
        continue

    #########################################
    # Get iEEG data
    #########################################

    for dataset_idx, dataset_name in enumerate(ieeg_dataset_names):
        #########################################
        # Get the useful channels
        #########################################
        dataset = session.open_dataset(dataset_name)
        channel_labels = dataset.get_channel_labels()
        duration_usec = dataset.get_time_series_details(channel_labels[5]).duration
        duration_hours = int(duration_usec / 1000000 / 60 / 60)
        enlarged_duration_hours = duration_hours + 24
        channel_types_df = check_channel_types(channel_labels)

        print(f"Opening {dataset_name} with duration {duration_hours} hours")

        seeg_electrodes_df = channel_types_df.loc[channel_types_df["type"] == "seeg"]

        # Find the rows in patient_electrodes_df that correspond to seeg_electrodes
        seeg_electrodes_df = patient_electrodes_df.loc[
            patient_electrodes_df["name"].isin(
                seeg_electrodes_df.name.to_numpy(dtype=str)
            )
        ]
        # Find the ones that have label not 'EmptyLabel'
        grey_matter_seeg_electrodes_df = seeg_electrodes_df.loc[
            seeg_electrodes_df["label"] != "EmptyLabel"
        ]

        # Find the indices of the names of the grey matter electrodes in channel_types_df
        grey_matter_seeg_electrodes_indices = channel_types_df.loc[
            channel_types_df["name"].isin(grey_matter_seeg_electrodes_df.name)
        ].copy()  # Added .copy() here to create a new DataFrame

        all_indices = grey_matter_seeg_electrodes_indices.name.to_numpy(dtype=str)

        #########################################
        # Get iEEG data
        #########################################

        for hour in range(enlarged_duration_hours):
            print(f"Getting iEEG data for hour {hour} out of {enlarged_duration_hours}")
            duration_usec = 1.2e8  # 2 minute
            start_time_usec = hour * 3600 * 1e6
            stop_time_usec = start_time_usec + duration_usec

            try:
                local_res, sample_rate = get_iEEG_data(
                    "dma",
                    "dma_ieeglogin.bin",
                    dataset_name,
                    start_time_usec,
                    stop_time_usec,
                    select_electrodes=all_indices,
                )
            except Exception as e:
                # handle the exception
                break

            assert local_res.shape[1] == len(grey_matter_seeg_electrodes_indices)

            # Drop rows that has any nan
            local_res = local_res.dropna(axis=0, how="any")

            # Check if local_res is empty
            if not local_res.empty:
                # Try Save local_res to a pickle file
                with open(
                    f"../../Data/ieeg/temporal_neocortical/2_min/{dataset_name}_hr_{hour}_fs_{int(sample_rate)}.pkl",
                    "wb",
                ) as f:
                    pickle.dump(local_res, f)

Getting iEEG data for hour 69 out of 179
Getting iEEG data for hour 70 out of 179
Getting iEEG data for hour 71 out of 179
Getting iEEG data for hour 72 out of 179
Getting iEEG data for hour 73 out of 179
Getting iEEG data for hour 74 out of 179
Getting iEEG data for hour 75 out of 179
Getting iEEG data for hour 76 out of 179
Getting iEEG data for hour 77 out of 179
Getting iEEG data for hour 78 out of 179
Getting iEEG data for hour 79 out of 179
Getting iEEG data for hour 80 out of 179
Getting iEEG data for hour 81 out of 179
Getting iEEG data for hour 82 out of 179
Getting iEEG data for hour 83 out of 179
Getting iEEG data for hour 84 out of 179
Getting iEEG data for hour 85 out of 179
Getting iEEG data for hour 86 out of 179
Getting iEEG data for hour 87 out of 179
Getting iEEG data for hour 88 out of 179
Getting iEEG data for hour 89 out of 179
Getting iEEG data for hour 90 out of 179
Getting iEEG data for hour 91 out of 179
Getting iEEG data for hour 92 out of 179
Getting iEEG dat

In [19]:
dataset = session.open_dataset("HUP141_phaseII")
# Get list of methods of ds that doesn't start with __
methods = [method for method in dir(dataset) if not method.startswith("__")]
channel_labels = dataset.get_channel_labels()
print(methods)

['_SERVER_GAP_VALUE', '_get_unmontaged_data', 'add_annotations', 'ch_labels', 'current_montage', 'delete_annotation_layer', 'derive_dataset', 'end_time', 'getChannelLabels', 'getData', 'get_annotation_layers', 'get_annotations', 'get_channel_indices', 'get_channel_labels', 'get_current_montage', 'get_data', 'get_dataframe', 'get_time_series_details', 'montages', 'move_annotation_layer', 'name', 'session', 'set_current_montage', 'snap_id', 'start_time', 'ts_array', 'ts_details', 'ts_details_by_id']


In [20]:
dataset.get_dataframe

<bound method Dataset.get_dataframe of Dataset with: 117 channels.>

In [21]:
start_times = []
end_times = []
durations = []

for channel_label in channel_labels:
    channel = dataset.get_time_series_details(channel_label)
    start_times.append(channel.start_time)
    end_times.append(channel.end_time)
    durations.append(channel.duration)
    print(dataset.get_time_series_details(channel_label).duration)

# print(np.unique(start_times))
# print(np.unique(end_times))
# print(np.unique(durations))

527012890625.0
527012890625.0
527012890625.0
527012890625.0
527012890625.0
527012890625.0
527012890625.0
527012890625.0
527012890625.0
527012890625.0
527012890625.0
527012890625.0
527012890625.0
527012890625.0
527012890625.0
527012890625.0
527012890625.0
527012890625.0
527012890625.0
527012890625.0
527012890625.0
527012890625.0
527012890625.0
527012890625.0
527012890625.0
527012890625.0
527012890625.0
527012890625.0
527012890625.0
527012890625.0
527012890625.0
527012890625.0
527012890625.0
527012890625.0
527012890625.0
527012890625.0
527012890625.0
527012890625.0
527012890625.0
527012890625.0
527012890625.0
527012890625.0
527012890625.0
527012890625.0
527012890625.0
527012890625.0
527012890625.0
527012890625.0
527012890625.0
527012890625.0
527012890625.0
527012890625.0
527012890625.0
527012890625.0
527012890625.0
527012890625.0
527012890625.0
527012890625.0
527012890625.0
527012890625.0
527012890625.0
527012890625.0
527012890625.0
527012890625.0
527012890625.0
527012890625.0
5270128906