# Build giant tables

In [1]:
import numpy as np
import pandas as pd
import os
from scipy import interpolate
import matplotlib.pyplot as plt

In [2]:
AD_RATIO_THRESHOLD = -0.4054

In [3]:
# Load HUP_implant_dates.xlsx
patients_df = pd.read_excel("../../Data/HUP_implant_dates.xlsx")
patients_df

Unnamed: 0,hup_id,IEEG_Portal_Number,Implant_Date,implant_time,Explant_Date,weight_kg
0,225,HUP225_phaseII,2021-10-18,07:15:00,2021-10-26 17:30:00,58.5
1,224,HUP224_phaseII,2021-10-13,07:15:00,2021-10-20 00:00:00,85.5
2,223,HUP223_phaseII,2021-09-29,07:15:00,2021-10-08 08:21:00,101.4
3,221,HUP221_phaseII,2021-08-16,07:15:00,2021-08-23 00:00:00,124.3
4,219,HUP219_phaseII,2021-07-12,07:15:00,2021-07-16 08:18:00,101.6
...,...,...,...,...,...,...
75,141,HUP141_phaseII,2017-05-24,07:15:00,2017-06-01 00:00:00,85.7
76,140,HUP140_phaseII_D01-D02,2017-05-10,07:15:00,2017-05-19 00:00:00,56.7
77,139,HUP139_phaseII,2017-04-26,07:15:00,2017-05-09 00:00:00,69.8
78,138,HUP138_phaseII,2017-04-12,07:15:00,2017-04-20 00:00:00,84.4


In [4]:
# Create a mapping between patient ids and the index of the patient in the patients_df dataframe
patient_hup_id_to_index = {}
for i, patient_id in enumerate(patients_df["hup_id"]):
    patient_hup_id_to_index[patient_id] = i
patient_hup_id_to_index

{225: 0,
 224: 1,
 223: 2,
 221: 3,
 219: 4,
 217: 5,
 216: 6,
 215: 7,
 214: 8,
 213: 9,
 211: 10,
 210: 11,
 209: 12,
 208: 13,
 207: 14,
 206: 15,
 205: 16,
 204: 17,
 202: 18,
 201: 19,
 199: 20,
 197: 21,
 196: 22,
 195: 23,
 194: 24,
 193: 25,
 192: 26,
 191: 27,
 190: 28,
 189: 29,
 188: 30,
 187: 31,
 186: 32,
 185: 33,
 184: 34,
 182: 35,
 181: 36,
 180: 37,
 179: 38,
 178: 39,
 177: 40,
 175: 41,
 174: 42,
 173: 43,
 172: 44,
 171: 45,
 170: 46,
 169: 47,
 168: 48,
 167: 49,
 166: 50,
 165: 51,
 164: 52,
 163: 53,
 162: 54,
 161: 55,
 160: 56,
 159: 57,
 158: 58,
 157: 59,
 156: 60,
 155: 61,
 154: 62,
 153: 63,
 152: 64,
 151: 65,
 150: 66,
 149: 67,
 148: 68,
 147: 69,
 146: 70,
 145: 71,
 144: 72,
 143: 73,
 142: 74,
 141: 75,
 140: 76,
 139: 77,
 138: 78,
 137: 79}

In [5]:
# Load ./data/ieeg_starts.xlsx into a dataframe
ieeg_starts_df = pd.read_excel("../../Data/ieeg_starts.xlsx")
ieeg_starts_df

Unnamed: 0,hup_id,ieeg_start
0,225,38.024167
1,224,37.582778
2,223,32.985556
3,221,37.690000
4,219,33.396667
...,...,...
75,141,37.249444
76,140,35.197500
77,139,37.151667
78,138,37.529444


In [6]:
all_med_names = []

for i, row in patients_df.iterrows():
    # Get patient id and weight
    patient_hup_id = row.hup_id

    # Load HUP_{patient_hup_id}.npy from ../../Data/medications
    aed_np_file = np.load(
        f"../../Data/medications/HUP_{patient_hup_id}.npy", allow_pickle=True
    )

    all_dose_curves_plot = aed_np_file[0]
    all_tHr_plot = aed_np_file[1]
    all_med_names_plot = aed_np_file[2]

    # Plot dose curves
    for med_name in all_med_names_plot:
        all_med_names.append(med_name)

all_med_names = np.unique(np.array(all_med_names, dtype=str))
all_med_names

array(['brivaracetam', 'carbamazepine', 'clobazam', 'clonazepam',
       'clorazepate', 'eslicarbazepine', 'felbamate', 'gabapentin',
       'lacosamide', 'lamotrigine', 'levetiracetam', 'lorazepam',
       'oxcarbazepine', 'phenytoin', 'pregabalin', 'rufinamide',
       'topiramate', 'valproic acid', 'zonisamide'], dtype='<U15')

In [7]:
frequency_bands = {
    "delta": (0.5, 4),
    "theta": (4, 8),
    "alpha": (8, 12),
    "beta": (12, 30),
    "gamma": (30, 100),
}

In [13]:
for i, row in patients_df.iterrows():
    # Get patient id and weight
    patient_hup_id, patient_weight = row.hup_id, row.weight_kg
    patient_idx = patient_hup_id_to_index[patient_hup_id]

    # Find the ieeg_start value for patient_hup_id in ieeg_starts_df and convert it into float
    ieeg_start_hrs = float(
        ieeg_starts_df.loc[
            ieeg_starts_df["hup_id"] == patient_hup_id, "ieeg_start"
        ].values[0]
    )
    aligned_emu_start_time_hrs = round(ieeg_start_hrs)

    ##############################################
    # MEDICATIONS
    ##############################################

    # Load HUP_{patient_hup_id}.npy from ../../Data/medications
    aed_np_file = np.load(
        f"../../Data/medications/HUP_{patient_hup_id}.npy", allow_pickle=True
    )

    all_dose_curves_plot = aed_np_file[0]
    all_tHr_plot = aed_np_file[1]
    all_med_names_plot = aed_np_file[2]

    # Construct the time axis
    emu_start_time_hrs = min([all_tHr_plot[i][0] for i in range(len(all_tHr_plot))])
    emu_end_time_hrs = all_tHr_plot[0][-1]
    max_dose_duration_hrs = emu_end_time_hrs - emu_start_time_hrs
    max_length = max([len(all_tHr_plot[i]) for i in range(len(all_tHr_plot))])
    time_axis = np.linspace(emu_start_time_hrs, emu_end_time_hrs, max_length)

    # Create a dataframe that will hold the dose curves for all patients
    hourly_patient_features_df = pd.DataFrame(columns=["emu_time"])
    hourly_patient_features_df["emu_time"] = time_axis
    # Add a column called lorazepam that is zero everywhere
    for potential_med_name in all_med_names:
        hourly_patient_features_df[f"med_{potential_med_name}_raw"] = np.zeros(
            len(time_axis)
        )
    hourly_patient_features_df

    sum_array = []

    ##############################################
    # MEDICATIONS
    ##############################################
    for med_idx, med_name in enumerate(all_med_names_plot):
        dose_times = all_tHr_plot[med_idx].flatten()
        dose = all_dose_curves_plot[med_idx].flatten()

        interp_func = interpolate.interp1d(
            dose_times, dose, bounds_error=False, fill_value=0
        )
        dose_interp = interp_func(time_axis)

        if med_name != "lorazepam":
            sum_array.append(dose_interp)

        # Create a new column in all_dose_curves_df for the dose curve of med_name
        hourly_patient_features_df[f"med_{med_name}_raw"] = dose_interp

    cumulative_dose_curve = np.sum(sum_array, axis=0)

    # cumulative_dose_curve = cumulative_dose_curve / cumulative_dose_curve.max()
    assert len(cumulative_dose_curve) == len(
        time_axis
    ), "cumulative_dose_curve and time_axis should have the same length"

    hourly_patient_features_df["med_sum_no_lorazepam_raw"] = cumulative_dose_curve

    ##############################################
    # Group by hour and compute mean
    ##############################################
    # Create a new column for the integer hour
    hourly_patient_features_df["emu_hour"] = hourly_patient_features_df[
        "emu_time"
    ].astype(int)
    # Group by hour and compute mean
    hourly_patient_features_df = hourly_patient_features_df.groupby("emu_hour").mean()
    # Reset the index
    hourly_patient_features_df = hourly_patient_features_df.reset_index()
    # Drop the emu_time column
    hourly_patient_features_df = hourly_patient_features_df.drop(columns=["emu_time"])

    ##############################################
    # SEIZURE COUNT
    ##############################################
    seizure_times_sec = np.load(
        f"../../Data/seizures/source_mat/HUP_{patient_hup_id}.npy"
    )
    seizure_times_sec = seizure_times_sec + (ieeg_start_hrs * 3600)

    for seizure_time_sec in seizure_times_sec:
        seizure_time_hr = int(seizure_time_sec[0] / 3600)

    seizure_times_hr = seizure_times_sec / 3600

    # Add an integer column called num_seizures that is zero everywhere
    hourly_patient_features_df["num_seizures"] = np.zeros(
        len(hourly_patient_features_df), dtype=int
    )

    for sz_hr in seizure_times_hr[:, 0]:
        # Increment the num_seizures column by 1 at the index corresponding to sz_hr
        hourly_patient_features_df.loc[
            hourly_patient_features_df["emu_hour"] == int(sz_hr), "num_seizures"
        ] += 1

    ##############################################
    # ALPHA-DELTA RATIO
    ##############################################
    # Load as numpy array from ../../Data/ad_ratios/hourly/HUP_XXX.npy
    ad_ratios_np = np.load(f"../../Data/ad_ratios/hourly/HUP_{patient_hup_id}.npy")
    # create a dictionary that maps emu_hour to ad_ratios
    ad_ratios_dict = dict(
        zip(
            range(
                aligned_emu_start_time_hrs,
                aligned_emu_start_time_hrs + len(ad_ratios_np),
            ),
            ad_ratios_np,
        )
    )
    # create the new column ad_ratios by mapping from the dictionary
    hourly_patient_features_df["ad_ratio"] = hourly_patient_features_df["emu_hour"].map(
        ad_ratios_dict
    )

    ##############################################
    # AWAKE
    ##############################################
    # Add a column called awake that is 1 if ad_ratio > AD_RATIO_THRESHOLD and 0 otherwise
    hourly_patient_features_df["awake"] = (
        hourly_patient_features_df["ad_ratio"] > AD_RATIO_THRESHOLD
    ).astype(int)

    ##############################################
    # SPIKES
    ##############################################
    # Load as numpy array from ../../Data/spikes/hourly_avg/all/HUP_XXX.npy
    # load the numpy arrays
    spikes_hourly_avg_all_np = np.load(
        f"../../Data/spikes/hourly_avg/all/HUP_{patient_hup_id}.npy"
    )
    spikes_hourly_avg_soz_np = np.load(
        f"../../Data/spikes/hourly_avg/soz/HUP_{patient_hup_id}.npy"
    )
    spikes_hourly_sum_all_np = np.load(
        f"../../Data/spikes/hourly_sum/all/HUP_{patient_hup_id}.npy"
    )
    spikes_hourly_sum_soz_np = np.load(
        f"../../Data/spikes/hourly_sum/soz/HUP_{patient_hup_id}.npy"
    )

    # create dictionaries that map emu_hour to each array
    spikes_avg_all_dict = dict(
        zip(
            range(
                aligned_emu_start_time_hrs,
                aligned_emu_start_time_hrs + len(spikes_hourly_avg_all_np),
            ),
            spikes_hourly_avg_all_np,
        )
    )
    spikes_avg_soz_dict = dict(
        zip(
            range(
                aligned_emu_start_time_hrs,
                aligned_emu_start_time_hrs + len(spikes_hourly_avg_soz_np),
            ),
            spikes_hourly_avg_soz_np,
        )
    )
    spikes_sum_all_dict = dict(
        zip(
            range(
                aligned_emu_start_time_hrs,
                aligned_emu_start_time_hrs + len(spikes_hourly_sum_all_np),
            ),
            spikes_hourly_sum_all_np,
        )
    )
    spikes_sum_soz_dict = dict(
        zip(
            range(
                aligned_emu_start_time_hrs,
                aligned_emu_start_time_hrs + len(spikes_hourly_sum_soz_np),
            ),
            spikes_hourly_sum_soz_np,
        )
    )

    # create the new columns by mapping from the dictionaries
    hourly_patient_features_df["spikes_avg_all"] = hourly_patient_features_df[
        "emu_hour"
    ].map(spikes_avg_all_dict)
    hourly_patient_features_df["spikes_avg_soz"] = hourly_patient_features_df[
        "emu_hour"
    ].map(spikes_avg_soz_dict)
    hourly_patient_features_df["spikes_sum_all"] = hourly_patient_features_df[
        "emu_hour"
    ].map(spikes_sum_all_dict)
    hourly_patient_features_df["spikes_sum_soz"] = hourly_patient_features_df[
        "emu_hour"
    ].map(spikes_sum_soz_dict)

    ##############################################
    # TEAGER ENERGY
    ##############################################
    should_skip_patient_for_teager_energy = True
    for frequency_band in frequency_bands:
        try:
            teager_energy_np = np.load(
                f"../../Data/teager_energy/{frequency_band}/HUP_{patient_hup_id}.npy"
            )
        except FileNotFoundError:
            print(f"File not found for HUP {patient_hup_id} {frequency_band}")
            continue
        should_skip_patient_for_teager_energy = False

        teager_energy_dict = dict(
            zip(
                range(
                    aligned_emu_start_time_hrs,
                    aligned_emu_start_time_hrs + len(teager_energy_np),
                ),
                teager_energy_np,
            )
        )
        # create the new column ad_ratios by mapping from the dictionary
        hourly_patient_features_df[
            f"teager_energy_{frequency_band}"
        ] = hourly_patient_features_df["emu_hour"].map(teager_energy_dict)

    ##############################################
    # KURAMOTO ORDER PARAMETER
    ##############################################
    should_skip_patient_for_plv = True
    for frequency_band in frequency_bands:
        try:
            kuramoto_np = np.load(
                f"../../Data/plv/kuramoto/{frequency_band}/HUP_{patient_hup_id}.npy"
            )
        except FileNotFoundError:
            print(
                f"File not found for ../../Data/plv/kuramoto/{frequency_band}/HUP_{patient_hup_id}.npy"
            )
            continue

        should_skip_patient_for_plv = False

        kuramoto_dict = dict(
            zip(
                range(
                    aligned_emu_start_time_hrs,
                    aligned_emu_start_time_hrs + len(kuramoto_np),
                ),
                kuramoto_np,
            )
        )
        # create the new column ad_ratios by mapping from the dictionary
        hourly_patient_features_df[
            f"kuramoto_{frequency_band}"
        ] = hourly_patient_features_df["emu_hour"].map(kuramoto_dict)

    ##############################################
    # TAPER
    ##############################################

    # # Only consider the columns that represent medications
    # med_columns = [
    #     col
    #     for col in hourly_patient_features_df.columns
    #     if col.startswith("med_") and col.endswith("_raw")
    # ]

    # # Exclude columns that are all zeros and the column for lorazepam
    # non_zero_med_columns = [
    #     col
    #     for col in med_columns
    #     if hourly_patient_features_df[col].sum() != 0 and col != "med_lorazepam_raw"
    # ]

    # # Filter the first 2 days
    # df_first_2_days = hourly_patient_features_df[
    #     hourly_patient_features_df["emu_hour"] <= 23
    # ]

    # # Calculate the 20% of the peak for each medication during the first 2 days
    # peak_20_percent = df_first_2_days[non_zero_med_columns].max() * 0.3

    # # Create a dataframe where each cell is True if the dosage is below the 20% peak, and False otherwise
    # below_20_percent = hourly_patient_features_df[non_zero_med_columns].lt(
    #     peak_20_percent
    # )

    # # Get the earliest hour starting from emu_hour 24 at which at least two medications are tapered off
    # tapering_hours = below_20_percent[hourly_patient_features_df["emu_hour"] >= 24].sum(
    #     axis=1
    # )
    # first_tapering_hour = tapering_hours[tapering_hours >= 1].index.min()

    # print(first_tapering_hour, first_tapering_hour // 24 + 1)

    ##############################################
    # PLOT STUFF
    ##############################################
    print(
        f"Plotting for HUP {patient_hup_id}, {patient_hup_id_to_index[patient_hup_id]}"
    )
    # plot_stuff(hourly_patient_features_df)

    ##############################################
    # SAVE TO CSV
    ##############################################
    if should_skip_patient_for_teager_energy or should_skip_patient_for_plv:
        print(f"Skipping HUP {patient_hup_id} due to missing files")
        continue

    hourly_patient_features_df.to_csv(
        f"../../Data/giant_tables/HUP_{patient_hup_id}.csv", index=False
    )

File not found for HUP 225 delta
File not found for HUP 225 theta
File not found for HUP 225 alpha
File not found for HUP 225 beta
File not found for HUP 225 gamma
File not found for ../../Data/plv/kuramoto/delta/HUP_225.npy
File not found for ../../Data/plv/kuramoto/theta/HUP_225.npy
File not found for ../../Data/plv/kuramoto/alpha/HUP_225.npy
File not found for ../../Data/plv/kuramoto/beta/HUP_225.npy
File not found for ../../Data/plv/kuramoto/gamma/HUP_225.npy
Plotting for HUP 225, 0
Skipping HUP 225 due to missing files
Plotting for HUP 224, 1
Plotting for HUP 223, 2
File not found for HUP 221 delta
File not found for HUP 221 theta
File not found for HUP 221 alpha
File not found for HUP 221 beta
File not found for HUP 221 gamma
File not found for ../../Data/plv/kuramoto/delta/HUP_221.npy
File not found for ../../Data/plv/kuramoto/theta/HUP_221.npy
File not found for ../../Data/plv/kuramoto/alpha/HUP_221.npy
File not found for ../../Data/plv/kuramoto/beta/HUP_221.npy
File not found 