## Notebook for loading FEAT model from original paper and get new predictions

In [None]:
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import average_precision_score

import numpy as np
import pandas as pd

from _load_llm_results import *
import numpy as np
import warnings
import os
import importlib
import sys; sys.path.append('../')
from evaluate_model import read_data
from sklearn.metrics import average_precision_score, roc_auc_score

warnings.filterwarnings("ignore")

%matplotlib inline

from tqdm import tqdm
from glob import glob

data_dir = '../data'
paper_dir = '../paper/floats/'

"""Model features
(reversed)
[(sum_enc_during_htn_meds_3>=1.500000)]
[(median_enc_during_htn_meds_4_plus>=1.250000)]
[sd_enc_during_htn_meds_2]
[(mean_systolic>=128.641357)]
[(max.CALCIUM>=10.150000)]
[(re_htn_spec_sum>=40.500000)]
"""

# Hardcoding the best FEAT model from original paper 
class FeatTransformer(BaseEstimator, TransformerMixin):
    def __init__(self):
        self.ss = StandardScaler()
        self.feature_names = [
            'sum_enc_during_htn_meds_3>1',
            'median_enc_during_htn_meds_4_plus>1.25',
            'sd_enc_during_htn_meds_2',
            'mean_systolic>128.6',
            'max.CALCIUM>10.1',
            're_htn_spec_sum>40']

    def fit(self, X, y=None):
        self.ss.fit(self.feat_model(X))
        return self
    
    def feat_model(self, X):
        if type(X).__name__ == 'DataFrame':
            X = X.values
        Phi = []
        Phi.append(X[:,280] >= 1.5)
        Phi.append(X[:,285] >= 1.25)
        Phi.append(X[:,287])
        Phi.append(X[:,19]>=128.641357)
        Phi.append(X[:,89]>=10.15)
        Phi.append(X[:,308]>=40.5)
       
        Phi = np.array(Phi).transpose()
        return Phi
    
    def transform(self, X):
        Phi = self.ss.transform(self.feat_model(X))
        return Phi

ft_lr_estimator = Pipeline( [
    ('prep', FeatTransformer()),
    ('est', LogisticRegression(C=1.0, penalty='l2', intercept_scaling=1.0, solver='liblinear'))
] )

In [None]:
#Read test dataset (300 random patients)
targets = {
            'htn_dx_ia':'Htndx',
            'res_htn_dx_ia':'ResHtndx', 
            'htn_hypok_dx_ia':'HtnHypoKdx', 
            'HTN_heuristic':'HtnHeuri', 
            'res_HTN_heuristic':'ResHtnHeuri',
            'hypoK_heuristic_v4':'HtnHypoKHeuri'
            }

drop_cols = ['UNI_ID'] + list(targets.keys())

df_train = pd.read_csv(
            '../data/Dataset' + str(101) + '/' + 'ResHtndx' + '/' + 'ResHtndxATrain.csv')

X_train = df_train.drop(drop_cols,axis=1)
y_train = df_train['res_htn_dx_ia'].values

In [None]:
ft_lr_estimator.fit(X_train, y_train)

In [None]:
def bootstrap(val, n = 1000, fn=np.mean):
    val_samples = []
    for i in range(n):
        sample = np.random.randint(0,len(val)-1, size=len(val))
        val_samples.append( fn(val[sample]) )
    m = np.mean(val_samples)
    sd = np.std(val_samples)
    ci_upper  = np.quantile(val_samples,0.95)
    ci_lower  = np.quantile(val_samples,0.05)
    return m, sd, ci_upper,ci_lower


def evaluate_all_folds(metric, feat_model, target, res_dict, folds=['A', 'B', 'C', 'D', 'E'], bootstrap=False):
    for FOLD in folds:
        X_train, y_train, X_test, y_test = read_data(
            target, FOLD, 1, False,
            False, data_dir, 1318
        )

        def eval(X, y):
            if metric in [average_precision_score, roc_auc_score]:
                return metric(y, feat_model.predict_proba(X)[:,1])
            else:
                return feat_model.predict(X)

        entry = f"{metric.__name__}_fold_out_{FOLD}"
        if bootstrap:
            val_samples = []
            for i in tqdm(range(1_000)):
                samples = np.random.randint(0,len(X_test)-1, size=len(X_test))
                val_samples.append( eval(X_test.iloc[samples, :], y_test[samples]) )

            res_dict[f"{entry}_mean"] = np.mean(val_samples)
            res_dict[f"{entry}_std"] = np.std(val_samples)
            res_dict[f"{entry}_ci_upper"] = np.quantile(val_samples,0.95)
            res_dict[f"{entry}_ci_lower"] = np.quantile(val_samples,0.05)
        else:
            res_dict[entry] = eval(X_test, y_test)

    return res_dict

In [None]:
held_out_performances = {
    'size' : 45,
    'target' : 'ResHtndx',
    'model' : 'FEAT',
    'scale' : False,
    'RunID' : 1,
    'fold' : 'A',
    'random_state' : 1318 
}

for metric_f in [average_precision_score, roc_auc_score]:
    held_out_performances = evaluate_all_folds(metric_f, ft_lr_estimator, 'res_htn_dx_ia', held_out_performances, ['A'], bootstrap=True)
    

held_out_performances = pd.DataFrame().from_records([held_out_performances])
print("-"*120)

# Create LaTeX table
latex_table = held_out_performances.to_latex(
    index=True,
    column_format="lrrr",
    escape=False
)

# Save LaTeX table to file
filename = f"{paper_dir}/tab_feat.tex"
with open(filename, 'w') as f:
    f.write(latex_table)

print(f"\nLaTeX table saved to {filename}\n")