# Taper Before/After Analysis

Next up: Taper Transition Analysis

In [None]:
import os, math
import numpy as np
import pandas as pd
from scipy import stats
from scipy.stats import sem, t
import matplotlib.pyplot as plt

In [None]:
TABLES_DIRECTORY = "../../Data/giant_tables"
TAPER_THRESHOLD = 0.5

In [None]:
patient_hup_ids = []
# Iterate through all files in TABLES_DIRECTORY
for filename in os.listdir(TABLES_DIRECTORY):
    # Only look at filename that are .csv files and does not begin with .
    if filename.endswith(".csv") and not filename.startswith("."):
        # Get the patient_hup_id from the filename which is after _ and before .
        patient_hup_id = int(filename.split("_")[1].split(".")[0])
        patient_hup_ids.append(patient_hup_id)

patient_hup_ids = sorted(patient_hup_ids)
len(patient_hup_ids)

## Plotting function

In [None]:
def plot_stuff(hourly_patient_features_df, before_taper_period, after_taper_period):
    med_cols = [
        col
        for col in hourly_patient_features_df.columns
        if col.startswith("med_") and not hourly_patient_features_df[col].eq(0).all()
    ]
    med_cols_no_raw = [col.split("_raw")[0] for col in med_cols]

    fig, ax = plt.subplots(3, 1, figsize=(5, 5), sharex=True)

    # Plotting Individual ASMs
    for i, col in enumerate(med_cols):
        if col != "med_sum_no_lorazepam_raw":
            label = med_cols_no_raw[i].replace("med_", "")
            normalized_data = (
                hourly_patient_features_df[col] / hourly_patient_features_df[col].max()
            )
            ax[0].plot(hourly_patient_features_df["emu_hour"], normalized_data)

    # Adding horizontal bars
    ax[0].hlines(
        1, before_taper_period[0], before_taper_period[1], color="green", linewidth=2
    )
    ax[0].hlines(
        1, after_taper_period[0], after_taper_period[1], color="red", linewidth=2
    )

    # Adding text annotations
    ax[0].text(
        (before_taper_period[0] + before_taper_period[1]) / 2,
        1,
        "before taper",
        ha="center",
        va="bottom",
    )
    ax[0].text(
        (after_taper_period[0] + after_taper_period[1]) / 2,
        1,
        "after taper",
        ha="center",
        va="bottom",
    )

    seizure_hours = hourly_patient_features_df[
        hourly_patient_features_df["num_seizures"] >= 1
    ]["emu_hour"].values
    for idx in seizure_hours:
        ax[0].axvline(x=idx, color="red", linestyle="dotted")

    ax[0].set_ylabel("Load")
    ax[0].set_ylim([0, 1.3])
    ax[0].set_title("Individual ASMs")

    # Plotting Teager Energy (Delta Band)
    ax[1].plot(
        hourly_patient_features_df["emu_hour"],
        hourly_patient_features_df["teager_energy_delta"],
    )
    ax[1].set_ylabel("Teager Energy")
    ax[1].set_title("Teager Energy (Delta Band)")

    # Plotting Kuramoto (Delta Band)
    ax[2].plot(
        hourly_patient_features_df["emu_hour"],
        hourly_patient_features_df["kuramoto_delta"],
    )
    ax[2].set_ylabel("R")
    ax[2].set_title("Synchrony (Delta Band)")

    ax[2].set_xlabel("Time (hours)")

    plt.tight_layout()
    plt.show()

## Find taper period function

In [None]:
def find_taper_periods(hourly_patient_features_df, patient_hup_id):
    # Drop the 'med_lorazepam_raw' column
    hourly_patient_features_df = hourly_patient_features_df.drop(
        "med_lorazepam_raw", axis=1
    )

    # Find the first 'emu_hour' where both 'teager_energy' and 'kuramoto' are not nan
    start_point = hourly_patient_features_df[
        (~hourly_patient_features_df["teager_energy_delta"].isna())
        & (~hourly_patient_features_df["kuramoto_delta"].isna())
    ].emu_hour.min()

    # Get medication columns
    med_columns = [
        col for col in hourly_patient_features_df.columns if col.startswith("med_")
    ]

    # 2 day periods with 3 day gap
    before_taper_period = (start_point, start_point + 48)
    after_taper_period = (start_point + 48 + 72, start_point + 48 + 72 + 48)
    period_length_days = 2
    gap_length_days = 3

    if (
        hourly_patient_features_df[
            (hourly_patient_features_df["num_seizures"] > 0)
            & (hourly_patient_features_df["emu_hour"] >= after_taper_period[0])
            & (hourly_patient_features_df["emu_hour"] <= after_taper_period[1])
        ].shape[0]
        > 0
    ):
        print("2 day periods with 3 day gap contains seizures")
        # 2 day periods with 2 day gap
        before_taper_period = (start_point, start_point + 48)
        after_taper_period = (start_point + 48 + 48, start_point + 48 + 48 + 48)
        period_length_days = 2
        gap_length_days = 2

    # check if after_taper_period contains seizures
    if (
        hourly_patient_features_df[
            (hourly_patient_features_df["num_seizures"] > 0)
            & (hourly_patient_features_df["emu_hour"] >= after_taper_period[0])
            & (hourly_patient_features_df["emu_hour"] <= after_taper_period[1])
        ].shape[0]
        > 0
    ):
        print("2 day periods with 2 day gap contains seizures")
        # 2 day periods with 1 day gap
        before_taper_period = (start_point, start_point + 48)
        after_taper_period = (start_point + 48 + 24, start_point + 48 + 24 + 48)
        period_length_days, gap_length_days = 2, 1

    # check if after_taper_period contains seizures
    if (
        hourly_patient_features_df[
            (hourly_patient_features_df["num_seizures"] > 0)
            & (hourly_patient_features_df["emu_hour"] >= after_taper_period[0])
            & (hourly_patient_features_df["emu_hour"] <= after_taper_period[1])
        ].shape[0]
        > 0
    ):
        print("2 day periods with 1 day gap contains seizures")
        # 1 day periods with 2 day gap
        before_taper_period = (start_point, start_point + 24)
        after_taper_period = (start_point + 24 + 48, start_point + 24 + 48 + 24)
        period_length_days = 1
        gap_length_days = 1

    # check if after_taper_period contains seizures
    if (
        hourly_patient_features_df[
            (hourly_patient_features_df["num_seizures"] > 0)
            & (hourly_patient_features_df["emu_hour"] >= after_taper_period[0])
            & (hourly_patient_features_df["emu_hour"] <= after_taper_period[1])
        ].shape[0]
        > 0
    ):
        print("1 day periods with 2 day gap contains seizures")
        # 1 day periods with 1 day gap
        before_taper_period = (start_point, start_point + 24)
        after_taper_period = (start_point + 24 + 24, start_point + 24 + 24 + 24)
        period_length_days = 1
        gap_length_days = 1

    # check if the entire two periods contains seizures
    if (
        hourly_patient_features_df[
            (hourly_patient_features_df["num_seizures"] > 0)
            & (hourly_patient_features_df["emu_hour"] >= before_taper_period[0])
            & (hourly_patient_features_df["emu_hour"] <= after_taper_period[1])
        ].shape[0]
        > 0
    ):
        print("1 day periods with 1 day gap contains seizures")
        print("Last straw, both periods contain seizures, discard!")
        return (np.nan, np.nan), (np.nan, np.nan), None, None

    return before_taper_period, after_taper_period, period_length_days, gap_length_days

## Plot all time series

In [None]:
good_hup_ids = []

for patient_hup_id in patient_hup_ids:
    # Read in the giant table for this patient
    hourly_patient_features_df = pd.read_csv(
        os.path.join(TABLES_DIRECTORY, f"HUP_{str(patient_hup_id)}.csv")
    )
    (
        before_taper_period,
        after_taper_period,
        period_length_days,
        gap_length_days,
    ) = find_taper_periods(hourly_patient_features_df, patient_hup_id)

    # If any value in before_taper_period or after_taper_period is nan, skip this patient
    if (
        np.isnan(before_taper_period[0])
        or np.isnan(before_taper_period[1])
        or np.isnan(after_taper_period[0])
        or np.isnan(after_taper_period[1])
    ):
        print(
            f"Patient {patient_hup_id} has nan values in before_taper_period or after_taper_period"
        )
        continue

    print(
        f"Patient {patient_hup_id} before taper period: {before_taper_period} after taper period: {after_taper_period}"
    )
    plot_stuff(hourly_patient_features_df, before_taper_period, after_taper_period)
    good_hup_ids.append(patient_hup_id)

In [None]:
len(good_hup_ids)

## Synchrony before/after taper

In [None]:
teager_delta_bad = []
# Placeholder for percent change and standard errors in teager energy for each frequency band
percent_changes = []
standard_errors = []
# Calculate the number of rows needed based on the length of patient_hup_ids
num_rows = math.ceil(len(patient_hup_ids) / 3)

# The frequency bands we're considering
freq_bands = ["delta", "theta", "alpha", "beta", "gamma"]

for band in freq_bands:
    # Create a figure with subplots
    fig, axs = plt.subplots(num_rows, 3, figsize=(12, num_rows * 5))

    # Initialize counters
    count = 0
    match_expectation_counter = 0
    changes_in_band = []

    for patient_hup_id in good_hup_ids:
        # Read in the giant table for this patient
        hourly_patient_features_df = pd.read_csv(
            os.path.join(TABLES_DIRECTORY, f"HUP_{str(patient_hup_id)}.csv")
        )
        (
            before_taper_period,
            after_taper_period,
            period_length_days,
            gap_length_days,
        ) = find_taper_periods(hourly_patient_features_df, patient_hup_id)

        # If any value in before_taper_period or after_taper_period is nan, skip this patient
        if (
            np.isnan(before_taper_period[0])
            or np.isnan(before_taper_period[1])
            or np.isnan(after_taper_period[0])
            or np.isnan(after_taper_period[1])
        ):
            print(
                f"Patient {patient_hup_id} has nan values in before_taper_period or after_taper_period"
            )
            continue

        # Get the 'emu_hour' and 'teager_energy_{band}' rows within the before and after taper periods
        before_taper = hourly_patient_features_df.loc[
            (hourly_patient_features_df["emu_hour"] >= before_taper_period[0])
            & (hourly_patient_features_df["emu_hour"] < before_taper_period[1]),
            f"kuramoto_{band}",
        ]
        after_taper = hourly_patient_features_df.loc[
            (hourly_patient_features_df["emu_hour"] >= after_taper_period[0])
            & (hourly_patient_features_df["emu_hour"] < after_taper_period[1]),
            f"kuramoto_{band}",
        ]

        # Assert the length of before and after taper periods are both a multiple of 24
        assert len(before_taper) % 24 == 0
        assert len(after_taper) % 24 == 0

        # Remove nan values
        before_taper = before_taper.dropna()
        after_taper = after_taper.dropna()

        # # # Drop the first two elements of before_taper
        # # before_taper = before_taper.iloc[2:]
        # Drop the lowest half of values in before_taper
        before_taper = before_taper.sort_values().iloc[len(before_taper) // 2 :]
        # Drop the lowest half of values in after_taper
        after_taper = after_taper.sort_values().iloc[len(after_taper) // 2 :]

        if len(before_taper) == 0 or len(after_taper) == 0:
            print(
                f"Patient {patient_hup_id} has no values in before_taper or after_taper"
            )
            continue

        # Calculate the mean teager energy before and after the taper period
        mean_before_taper = before_taper.mean()
        mean_after_taper = after_taper.mean()

        # Calculate the percent change relative to the teager energy in the before_taper period
        percent_change = (
            (mean_after_taper - mean_before_taper) / mean_before_taper * 100
        )
        changes_in_band.append(percent_change)

        # Calculate the percent change relative to the teager energy in the before_taper period
        percent_change = (
            (mean_after_taper - mean_before_taper) / mean_before_taper * 100
        )
        changes_in_band.append(percent_change)

        # Create box plots
        row = count // 3
        col = count % 3
        axs[row, col].boxplot(
            [before_taper, after_taper], labels=["Before Taper", "After Taper"]
        )

        axs[row, col].set_title(f"HUP {patient_hup_id}")
        # Set y-axis label
        axs[row, col].set_ylabel(f"Teager Energy ({band.capitalize()} Band)")

        # Wilcoxon rank-sum test
        w_stat, w_p = stats.ranksums(before_taper, after_taper)
        w_p = round(w_p, 3)
        w_stat = round(w_stat, 3)

        # Kolmogorov-Smirnov test
        ks_stat, ks_p = stats.ks_2samp(before_taper, after_taper)
        ks_p = round(ks_p, 3)
        ks_stat = round(ks_stat, 3)

        # Check if the mean of before_taper is larger than the mean of after_taper
        if after_taper.mean() > before_taper.mean() and w_p < 0.05:
            match_expectation_counter += 1
        else:
            if band == "delta":
                teager_delta_bad.append(patient_hup_id)

        # Add test statistics to subplot
        axs[row, col].text(
            0.1,
            0.9,
            f"p={w_p}",
            transform=axs[row, col].transAxes,
            bbox=dict(facecolor="red", alpha=0.5),
        )

        count += 1

    # Calculate the average percent change and standard error for the band across all patients
    avg_percent_change = np.mean(changes_in_band)
    se = sem(changes_in_band)
    percent_changes.append(avg_percent_change)
    standard_errors.append(se)

    # Remove unused subplots
    for i in range(count, num_rows * 3):
        fig.delaxes(axs.flatten()[i])

    # Adjust spacing
    plt.tight_layout()

    # Show the plot
    plt.show()

    # Print the counter for patients where the mean of before_taper is larger than the mean of after_taper
    print(
        f"The mean of after_taper is bigger than the mean of before_taper for {match_expectation_counter} out of {count} patients in {band.capitalize()} band."
    )

In [None]:
# The frequency bands we're considering, with capitalized names
freq_bands = ["Delta", "Theta", "Alpha", "Beta", "Gamma"]

# Create the figure to show percent change across different frequency bands with confidence intervals
plt.figure(figsize=(10, 5))
bars = plt.bar(freq_bands, percent_changes, yerr=standard_errors, capsize=5)
plt.xlabel("Frequency Band")
plt.ylabel("Percent Change")
plt.title("Percent Change in Synchrony")
plt.show()

## Teager energy before/after taper

In [None]:
teager_delta_bad = []
# Placeholder for percent change and standard errors in teager energy for each frequency band
teager_delta_bad = []
percent_changes = []
standard_errors = []
all_changes = []  # This will store the changes for each frequency band
# Calculate the number of rows needed based on the length of patient_hup_ids
num_rows = math.ceil(len(patient_hup_ids) / 3)

# The frequency bands we're considering
freq_bands = ["delta", "theta", "alpha", "beta", "gamma"]

for band in freq_bands:
    # Create a figure with subplots
    fig, axs = plt.subplots(num_rows, 3, figsize=(12, num_rows * 5))

    # Initialize counters
    count = 0
    match_expectation_counter = 0
    changes_in_band = []

    for patient_hup_id in good_hup_ids:
        # Read in the giant table for this patient
        hourly_patient_features_df = pd.read_csv(
            os.path.join(TABLES_DIRECTORY, f"HUP_{str(patient_hup_id)}.csv")
        )
        (
            before_taper_period,
            after_taper_period,
            period_length_days,
            gap_length_days,
        ) = find_taper_periods(hourly_patient_features_df, patient_hup_id)

        # If any value in before_taper_period or after_taper_period is nan, skip this patient
        if (
            np.isnan(before_taper_period[0])
            or np.isnan(before_taper_period[1])
            or np.isnan(after_taper_period[0])
            or np.isnan(after_taper_period[1])
        ):
            print(
                f"Patient {patient_hup_id} has nan values in before_taper_period or after_taper_period"
            )
            continue

        # Get the 'emu_hour' and 'teager_energy_{band}' rows within the before and after taper periods
        before_taper = hourly_patient_features_df.loc[
            (hourly_patient_features_df["emu_hour"] >= before_taper_period[0])
            & (hourly_patient_features_df["emu_hour"] < before_taper_period[1]),
            f"teager_energy_{band}",
        ]
        after_taper = hourly_patient_features_df.loc[
            (hourly_patient_features_df["emu_hour"] >= after_taper_period[0])
            & (hourly_patient_features_df["emu_hour"] < after_taper_period[1]),
            f"teager_energy_{band}",
        ]

        # Assert the length of before and after taper periods are both a multiple of 24
        assert len(before_taper) % 24 == 0
        assert len(after_taper) % 24 == 0

        # Remove nan values
        before_taper = before_taper.dropna()
        after_taper = after_taper.dropna()

        # # # Drop the first two elements of before_taper
        # # before_taper = before_taper.iloc[2:]
        # # Drop the lowest half of values in before_taper
        # before_taper = before_taper.sort_values().iloc[len(before_taper) // 2 :]
        # # Drop the lowest half of values in after_taper
        # after_taper = after_taper.sort_values().iloc[len(after_taper) // 2 :]

        if len(before_taper) == 0 or len(after_taper) == 0:
            print(
                f"Patient {patient_hup_id} has no values in before_taper or after_taper"
            )
            continue

        # Calculate the mean teager energy before and after the taper period
        mean_before_taper = before_taper.mean()
        mean_after_taper = after_taper.mean()

        # Calculate the percent change relative to the teager energy in the before_taper period
        percent_change = (
            (mean_after_taper - mean_before_taper) / mean_before_taper * 100
        )
        changes_in_band.append(percent_change)

        # Calculate the percent change relative to the teager energy in the before_taper period
        percent_change = (
            (mean_after_taper - mean_before_taper) / mean_before_taper * 100
        )
        changes_in_band.append(percent_change)

        # Create box plots
        row = count // 3
        col = count % 3
        axs[row, col].boxplot(
            [before_taper, after_taper], labels=["Before Taper", "After Taper"]
        )

        axs[row, col].set_title(f"HUP {patient_hup_id}")
        # Set y-axis label
        axs[row, col].set_ylabel(f"Teager Energy ({band.capitalize()} Band)")

        # Wilcoxon rank-sum test
        w_stat, w_p = stats.ranksums(before_taper, after_taper)
        w_p = round(w_p, 3)
        w_stat = round(w_stat, 3)

        # Kolmogorov-Smirnov test
        ks_stat, ks_p = stats.ks_2samp(before_taper, after_taper)
        ks_p = round(ks_p, 3)
        ks_stat = round(ks_stat, 3)

        # Check if the mean of before_taper is larger than the mean of after_taper
        if after_taper.mean() < before_taper.mean() and w_p < 0.05:
            match_expectation_counter += 1
        else:
            if band == "delta":
                teager_delta_bad.append(patient_hup_id)

        # Add test statistics to subplot
        axs[row, col].text(
            0.1,
            0.9,
            f"p={w_p}",
            transform=axs[row, col].transAxes,
            bbox=dict(facecolor="red", alpha=0.5),
        )

        count += 1

    # Calculate the average percent change and standard error for the band across all patients
    avg_percent_change = np.mean(changes_in_band)
    se = sem(changes_in_band)
    percent_changes.append(avg_percent_change)
    standard_errors.append(se)

    # Remove unused subplots
    for i in range(count, num_rows * 3):
        fig.delaxes(axs.flatten()[i])

    # Adjust spacing
    plt.tight_layout()

    # Show the plot
    plt.show()

    # Print the counter for patients where the mean of before_taper is larger than the mean of after_taper
    print(
        f"The mean of after_taper is smaller than the mean of before_taper for {match_expectation_counter} out of {count} patients in {band.capitalize()} band."
    )

# The frequency bands we're considering, with capitalized names
freq_bands = ["Delta", "Theta", "Alpha", "Beta", "Gamma"]

# Create the figure to show percent change across different frequency bands with confidence intervals
plt.figure(figsize=(10, 5))
bars = plt.bar(freq_bands, percent_changes, yerr=standard_errors, capsize=5)
plt.xlabel("Frequency Band")
plt.ylabel("Percent Change")
plt.title("Percent Change in Teager Energy")
plt.show()