# Mix Effect Models

In [1]:
import os
import numpy as np
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf

TABLES_DIRECTORY = "../../Data/giant_tables"

In [2]:
patient_hup_ids = [210]
# # Iterate through all files in TABLES_DIRECTORY
# for filename in os.listdir(TABLES_DIRECTORY):
#     # Only look at filename that are .csv files and does not begin with .
#     if filename.endswith(".csv") and not filename.startswith("."):
#         # Get the patient_hup_id from the filename which is after _ and before .
#         patient_hup_id = int(filename.split("_")[1].split(".")[0])
#         patient_hup_ids.append(patient_hup_id)

# patient_hup_ids = sorted(patient_hup_ids)
# len(patient_hup_ids)

In [4]:
for patient_hup_id in patient_hup_ids:
    # Read in the giant table for this patient
    hourly_patient_features_df = pd.read_csv(
        os.path.join(TABLES_DIRECTORY, f"HUP_{patient_hup_id}.csv")
    )

    # Only keep the emu_hour, teager_energy_delta, kuramoto_delta, spikes_avg_all, and med_sum_no_lorazepam_raw columns
    data = hourly_patient_features_df[
        [
            "emu_hour",
            "teager_energy_delta",
            "kuramoto_delta",
            "spikes_avg_all",
            "med_sum_no_lorazepam_raw",
        ]
    ]

    # Fill missing values using linear interpolation
    data_interpolated = data.interpolate(method="linear")

    # Drop remaining rows with missing values
    data_clean = data_interpolated.dropna(
        subset=[
            "teager_energy_delta",
            "kuramoto_delta",
            "spikes_avg_all",
            "med_sum_no_lorazepam_raw",
        ]
    )

    # Define the model formula
    predict_teager_energy_delta_formula = "teager_energy_delta ~ kuramoto_delta + spikes_avg_all + med_sum_no_lorazepam_raw"
    predict_aed_sum_formula = "med_sum_no_lorazepam_raw ~ kuramoto_delta + spikes_avg_all + teager_energy_delta"
    predict_kuramoto_delta_formula = "kuramoto_delta ~ med_sum_no_lorazepam_raw + spikes_avg_all + teager_energy_delta"
    predict_spikes_formula = "spikes_avg_all ~ med_sum_no_lorazepam_raw + kuramoto_delta + teager_energy_delta"

    all_formulae = [
        predict_teager_energy_delta_formula,
        predict_aed_sum_formula,
        predict_kuramoto_delta_formula,
        predict_spikes_formula,
    ]

    for formula in all_formulae:
        # Fit the model
        model = smf.ols(formula, data=data_clean)
        result = model.fit()

        # Print the summary statistics of the fitted model
        print(result.summary())

                             OLS Regression Results                            
Dep. Variable:     teager_energy_delta   R-squared:                       0.501
Model:                             OLS   Adj. R-squared:                  0.495
Method:                  Least Squares   F-statistic:                     75.43
Date:                 Fri, 28 Jul 2023   Prob (F-statistic):           8.44e-34
Time:                         07:48:12   Log-Likelihood:                 45.833
No. Observations:                  229   AIC:                            -83.67
Df Residuals:                      225   BIC:                            -69.93
Df Model:                            3                                         
Covariance Type:             nonrobust                                         
                               coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------------
Intercept     