In [1]:
from _load_llm_results import *

import numpy as np
import warnings
import os
import importlib
import sys; sys.path.append('../')
from evaluate_model import read_data

from sklearn.metrics import accuracy_score, precision_score, balanced_accuracy_score
from sklearn.metrics import average_precision_score
from sklearn.metrics import roc_auc_score

warnings.filterwarnings("ignore")

%matplotlib inline

from tqdm import tqdm
from glob import glob

## Rich prompt, expert features

In [2]:
import nevergrad as ng

class Richprompt_Expertfeatures_model_wrapper():
    def __init__(self, X, y, metric=average_precision_score): #roc_auc_score
        self.X = X
        self.y = y
        self.metric = metric

    def eval(
        self, 

        # Setting original values as default values
        high_BP_during_htn_meds_3_1: int = 2,
        sum_enc_during_htn_meds_4_plus: int = 2,
        mean_diastolic_1: int = 80,
        mean_systolic_1: int = 140,
        high_BP_during_htn_meds_2: int = 5,
        high_BP_during_htn_meds_3_2: int = 5,

        # Now for the probabilities
        high_BP_during_htn_meds_3_1_prob: float = 0.4,
        sum_enc_during_htn_meds_4_plus_prob: float = 0.4,
        mean_diastolic_1_prob: float = 0.1,
        mean_systolic_1_prob: float = 0.1,
        high_BP_during_htn_meds_2_prob: float = 0.1,
        high_BP_during_htn_meds_3_2_prob: float = 0.1,

        # the scaling conditionals
        Med_Potassium_N: int = 0,
        Dx_HypoK_N: int = 0,
        mean_systolic_2: int = 130,
        mean_diastolic_2: int = 75,

        Med_Potassium_N_Dx_HypoK_N_prob: float = 0.5,
        mean_systolic_mean_diastolic: float = 0.5
    ) -> float:

        probabilities = []
        for _, row in self.X.iterrows():
            prob = 0.0
            if row['high_BP_during_htn_meds_3'] >= high_BP_during_htn_meds_3_1:
                prob += high_BP_during_htn_meds_3_1_prob

            if row['sum_enc_during_htn_meds_4_plus'] >= sum_enc_during_htn_meds_4_plus:
                prob += sum_enc_during_htn_meds_4_plus_prob

            if row['mean_diastolic'] > mean_diastolic_1:
                prob += mean_diastolic_1_prob

            if row['mean_systolic'] > mean_systolic_1:
                prob += mean_systolic_1_prob

            if row['high_BP_during_htn_meds_2'] > high_BP_during_htn_meds_2:
                prob += high_BP_during_htn_meds_2_prob

            if row['high_BP_during_htn_meds_3'] > high_BP_during_htn_meds_3_2:
                prob += high_BP_during_htn_meds_3_2_prob

            # cliping
            prob = min(1.0, prob)

            if row['Med_Potassium_N'] > Med_Potassium_N and row['Dx_HypoK_N'] > Dx_HypoK_N:
                prob *= Med_Potassium_N_Dx_HypoK_N_prob
            if row['mean_systolic'] < mean_systolic_2 and row['mean_diastolic'] < mean_diastolic_2:
                prob *= mean_systolic_mean_diastolic

            probabilities.append(prob)
        
        probabilities = np.array(probabilities)
        
        # making it a minimization problem
        
        if self.metric in [average_precision_score, roc_auc_score]:
            return 1 - self.metric(self.y, probabilities)

        return 1 - self.metric(self.y, np.where(probabilities>0.5, 1.0, 0.0))

In [3]:
# data used to generate the model ----------------------------------------------
X_train, y_train, X_test, y_test = read_data(
    'res_htn_dx_ia', 'C', 1, False, True, data_dir, 24284 )

model = Richprompt_Expertfeatures_model_wrapper(X_train, y_train)

# should have same auprc as train, 0.6132813108536367
print("with default values, AUPRC on it's original training data is", 1-model.eval())

# held-out data ----------------------------------------------------------------
X_train, y_train, X_test, y_test = read_data(
    'res_htn_dx_ia', 'A', 101, False, False, data_dir, 1318 )    
    
# X_train = pd.concat([X_train, X_test], axis=0)
# y_train = pd.concat([y_train, y_test], axis=0)

model = Richprompt_Expertfeatures_model_wrapper(X_train, y_train)

print("with default values, AUPRC on entire training data is", 1-model.eval())
model.metric = roc_auc_score
print("with default values, AUROC on entire training data is", 1-model.eval())
model.metric = average_precision_score

parametrization = ng.p.Instrumentation(
    # high_BP_during_htn_meds_3_1 = ng.p.Scalar(lower=0, upper=10).set_integer_casting(),
    # sum_enc_during_htn_meds_4_plus = ng.p.Scalar(lower=0, upper=10).set_integer_casting(),
    # mean_diastolic_1 = ng.p.Scalar(lower=40, upper=120).set_integer_casting(),
    # mean_systolic_1 = ng.p.Scalar(lower=40, upper=160).set_integer_casting(),
    # high_BP_during_htn_meds_2 = ng.p.Scalar(lower=0, upper=10).set_integer_casting(),
    # high_BP_during_htn_meds_3_2 = ng.p.Scalar(lower=0, upper=10).set_integer_casting(),

    high_BP_during_htn_meds_3_1_prob = ng.p.Scalar(lower=-1.0, upper=1.0),
    sum_enc_during_htn_meds_4_plus_prob = ng.p.Scalar(lower=-1.0, upper=1.0),
    mean_diastolic_1_prob = ng.p.Scalar(lower=-1.0, upper=1.0),
    mean_systolic_1_prob = ng.p.Scalar(lower=-1.0, upper=1.0),
    high_BP_during_htn_meds_2_prob = ng.p.Scalar(lower=-1.0, upper=1.0),
    high_BP_during_htn_meds_3_2_prob = ng.p.Scalar(lower=-1.0, upper=1.0),
    
    Med_Potassium_N = ng.p.Scalar(lower=0, upper=10).set_integer_casting(),
    Dx_HypoK_N = ng.p.Scalar(lower=0, upper=10).set_integer_casting(),
    mean_systolic_2 = ng.p.Scalar(lower=40, upper=160).set_integer_casting(),
    mean_diastolic_2 = ng.p.Scalar(lower=40, upper=120).set_integer_casting(),

    Med_Potassium_N_Dx_HypoK_N_prob = ng.p.Scalar(lower=-1.0, upper=1.0),
    mean_systolic_mean_diastolic = ng.p.Scalar(lower=-1.0, upper=1.0),
)
optimizer = ng.optimizers.NGOpt(parametrization=parametrization, budget=1_000)

# optimizing on all training data
# optimizer.tell(ng.p.MultiobjectiveReference(), [1, 1])
recommendation = optimizer.minimize(model.eval, verbosity=0)

print("="*80)
for k, v in recommendation.kwargs.items():
    print(f'{k:34s} : {v}')
print("="*80)

# Should be better than default values
print("with optimized values (using 101 training data), AUPRC on train is", 1-model.eval(**recommendation.kwargs))
model.metric = roc_auc_score
print("with optimized values (using 101 training data), AUROC on train is", 1-model.eval(**recommendation.kwargs))
model.metric = average_precision_score

model_final = Richprompt_Expertfeatures_model_wrapper(X_test, y_test)
print("WITHOUT using optimized values, AUPRC on 101 held-out data is", 1-model_final.eval())
print("using optimized values, AUPRC on 101 held-out data is", 1-model_final.eval(**recommendation.kwargs))

with default values, AUPRC on it's original training data is 0.6132813108536367
with default values, AUPRC on entire training data is 0.6317278522679678
with default values, AUROC on entire training data is 0.9373139971703176
high_BP_during_htn_meds_3_1_prob   : 0.36039935875308055
sum_enc_during_htn_meds_4_plus_prob : 0.2385765713385698
mean_diastolic_1_prob              : 0.19564956672171355
mean_systolic_1_prob               : 0.0139234647246416
high_BP_during_htn_meds_2_prob     : -0.16690324382529445
high_BP_during_htn_meds_3_2_prob   : 0.08002335703445942
Med_Potassium_N                    : 3
Dx_HypoK_N                         : 5
mean_systolic_2                    : 115
mean_diastolic_2                   : 88
Med_Potassium_N_Dx_HypoK_N_prob    : 0.7875239121317318
mean_systolic_mean_diastolic       : -0.2702438634934956
with optimized values (using 101 training data), AUPRC on train is 0.7421522816566817
with optimized values (using 101 training data), AUROC on train is 0.93342

In [4]:
print("Pareto front:")
for param in sorted(optimizer.pareto_front(), key=lambda p: p.losses[0]):
    print(f"{param} with losses {param.losses}")

Pareto front:
Instrumentation(Tuple(),Dict(Dx_HypoK_N=Scalar{Cl(0,10,b),Int}[sigma=Scalar{exp=2.03}],Med_Potassium_N=Scalar{Cl(0,10,b),Int}[sigma=Scalar{exp=2.03}],Med_Potassium_N_Dx_HypoK_N_prob=Scalar{Cl(-1,1,b)}[sigma=Scalar{exp=2.03}],high_BP_during_htn_meds_2_prob=Scalar{Cl(-1,1,b)}[sigma=Scalar{exp=2.03}],high_BP_during_htn_meds_3_1_prob=Scalar{Cl(-1,1,b)}[sigma=Scalar{exp=2.03}],high_BP_during_htn_meds_3_2_prob=Scalar{Cl(-1,1,b)}[sigma=Scalar{exp=2.03}],mean_diastolic_1_prob=Scalar{Cl(-1,1,b)}[sigma=Scalar{exp=2.03}],mean_diastolic_2=Scalar{Cl(40,120,b),Int}[sigma=Scalar{exp=2.03}],mean_systolic_1_prob=Scalar{Cl(-1,1,b)}[sigma=Scalar{exp=2.03}],mean_systolic_2=Scalar{Cl(40,160,b),Int}[sigma=Scalar{exp=2.03}],mean_systolic_mean_diastolic=Scalar{Cl(-1,1,b)}[sigma=Scalar{exp=2.03}],sum_enc_during_htn_meds_4_plus_prob=Scalar{Cl(-1,1,b)}[sigma=Scalar{exp=2.03}])):((), {'high_BP_during_htn_meds_3_1_prob': 0.36039935875308055, 'sum_enc_during_htn_meds_4_plus_prob': 0.2385765713385698, 

In [5]:
def evaluate_all_folds_nevergrad_wrapper(
    metric, model, target, res_dict, folds=['A', 'B', 'C', 'D', 'E'], 
    bootstrap=False, n_reps=1_000):
    
    model.metric = metric

    for FOLD in folds:
        X_train, y_train, X_test, y_test = read_data(
            target, FOLD, 101, False, False, data_dir, 1318 )
        
        def eval(model, X, y):
            model.X = X
            model.y = y

            # remove the recommendation kwargs to get the original model
            return 1 - model.eval(**recommendation.kwargs) 
            
        for partition, X, y in [('train', X_train, y_train), ('fold_out', X_test, y_test)]:
            entry = f"{metric.__name__}_{partition}_{FOLD}"
            if bootstrap:
                val_samples = []
                for i in tqdm(range(n_reps)):
                    samples = np.random.randint(0,len(y)-1, size=len(y))
                    m = eval(model, X.iloc[samples, :], y.iloc[samples])
                    val_samples.append( m )

                res_dict[f"{entry}_mean"] = np.mean(val_samples)
                res_dict[f"{entry}_std"] = np.std(val_samples)
                res_dict[f"{entry}_ci_upper"] = np.quantile(val_samples,0.95)
                res_dict[f"{entry}_ci_lower"] = np.quantile(val_samples,0.05)
            else:
                res_dict[entry] = eval(model, X, y)

    return res_dict

In [6]:
held_out_performances = {
    'size' : 50,
    'target' : 'ResHtndx',
    'scale' : False,
    'RunID' : 101,
}
for metric_f in [average_precision_score, roc_auc_score]:
    held_out_performances = evaluate_all_folds_nevergrad_wrapper(
        metric_f, model_final, 'res_htn_dx_ia', held_out_performances, ['A'],
        bootstrap=True, n_reps=1_000)

final_performances_df = pd.DataFrame(held_out_performances, index=[0])

100%|██████████| 1000/1000 [01:50<00:00,  9.03it/s]
100%|██████████| 1000/1000 [00:35<00:00, 28.08it/s]
100%|██████████| 1000/1000 [01:49<00:00,  9.12it/s]
100%|██████████| 1000/1000 [00:41<00:00, 24.28it/s]


In [7]:
display(final_performances_df.T)

# Create LaTeX table
latex_table = final_performances_df.to_latex(
    index=True,
    column_format="lrrr",
    escape=False
)

# Save LaTeX table to file
filename = f"{paper_dir}/tab_parameter_optimization_richprompt_expertfeatures.tex"
with open(filename, 'w') as f:
    f.write(latex_table)

print(f"\nLaTeX table saved to {filename}\n")

Unnamed: 0,0
size,50
target,ResHtndx
scale,False
RunID,101
average_precision_score_train_A_mean,0.739439
average_precision_score_train_A_std,0.041928
average_precision_score_train_A_ci_upper,0.806424
average_precision_score_train_A_ci_lower,0.66807
average_precision_score_fold_out_A_mean,0.852941
average_precision_score_fold_out_A_std,0.038591



LaTeX table saved to ../paper_rebuttal/tab_parameter_optimization_richprompt_expertfeatures.tex



## Simple prompt, all features

In [8]:
class Simpleprompt_Allfeatures_model_wrapper():
    def __init__(self, X, y, metric=average_precision_score): # 
        self.X = X
        self.y = y
        self.metric = metric

    def eval(
        self,

        high_BP_during_htn_meds_3_weight: float = 1.1,
        high_BP_during_htn_meds_4_plus_weight: float = 1.1,
        HTN_MED_days_ACEI_ARB_weight: float = 0.0001,
        HTN_MED_days_BETA_BLOCKERS_weight: float = 0.0001,
        HTN_MED_days_THIAZIDE_weight: float = 0.0001,

        sum_I16_0_weight: float = 0.0001,
        sum_I16_1_weight: float = 0.0001,
        sum_I16_9_weight: float = 0.0001,
    ) -> float:
        risk_score = (
            self.X['high_BP_during_htn_meds_3'] * high_BP_during_htn_meds_3_weight +
            self.X['high_BP_during_htn_meds_4_plus'] * high_BP_during_htn_meds_4_plus_weight +
            self.X['HTN_MED_days_ACEI_ARB'] * HTN_MED_days_ACEI_ARB_weight +
            self.X['HTN_MED_days_BETA_BLOCKERS'] * HTN_MED_days_BETA_BLOCKERS_weight +
            self.X['HTN_MED_days_THIAZIDE'] * HTN_MED_days_THIAZIDE_weight +
            self.X['sum_I16_0'] * sum_I16_0_weight +
            self.X['sum_I16_1'] * sum_I16_1_weight +
            self.X['sum_I16_9'] * sum_I16_9_weight
        )

        min_score = risk_score.min()
        max_score = risk_score.max()
        # print(min_score) 0.0
        # print(max_score) 95.24940000000002

        probabilities = (risk_score - min_score) / (max_score - min_score)
        # probabilities = risk_score/100
        # probabilities = risk_score
        
        # making it a minimization problem
        if self.metric in [average_precision_score, roc_auc_score]:
            return 1 - self.metric(self.y, probabilities)
        return 1 - self.metric(self.y, np.where(probabilities>0.5, 1.0, 0.0))

In [9]:
# data used to generate the model ----------------------------------------------
X_train, y_train, X_test, y_test = read_data(
    'res_htn_dx_ia', 'D', 1, False, False, data_dir, 14724 )

model = Simpleprompt_Allfeatures_model_wrapper(X_train, y_train)

# should have same auprc as train, 0.6420409555591419
print("with default values, AUPRC on it's original training data is", 1-model.eval())

# held-out data ----------------------------------------------------------------
X_train, y_train, X_test, y_test = read_data(
    'res_htn_dx_ia', 'A', 101, False, False, data_dir, 1318 )    
    
model = Simpleprompt_Allfeatures_model_wrapper(X_train, y_train)

print("with default values, AUPRC on entire training data is", 1-model.eval())

parametrization = ng.p.Instrumentation(
    # range based on the values observed in the final model
    high_BP_during_htn_meds_3_weight = ng.p.Scalar(lower=-1e+3, upper=1e+3),
    high_BP_during_htn_meds_4_plus_weight = ng.p.Scalar(lower=-1e+3, upper=1e+3),
    HTN_MED_days_ACEI_ARB_weight = ng.p.Scalar(lower=-1e+3, upper=1e+3),
    HTN_MED_days_BETA_BLOCKERS_weight = ng.p.Scalar(lower=-1e+3, upper=1e+3),
    HTN_MED_days_THIAZIDE_weight = ng.p.Scalar(lower=-1e+3, upper=1e+3),
    sum_I16_0_weight = ng.p.Scalar(lower=-1e+3, upper=1e+3),
    sum_I16_1_weight = ng.p.Scalar(lower=-1e+3, upper=1e+3),
    sum_I16_9_weight = ng.p.Scalar(lower=-1e+3, upper=1e+3),
)

optimizer = ng.optimizers.NGOpt(parametrization=parametrization, budget=1_000)

# optimizing on all training data
recommendation = optimizer.minimize(model.eval)

print("="*80)
for k, v in recommendation.kwargs.items():
    print(f'{k:34s} : {v}')
print("="*80)

# Should be better than default values
print("with optimized values (using 101 training data), AUPRC on train is", 1-model.eval(**recommendation.kwargs))
model.metric = roc_auc_score
print("with optimized values (using 101 training data), AUROC on train is", 1-model.eval(**recommendation.kwargs))
model.metric = average_precision_score

model_final = Simpleprompt_Allfeatures_model_wrapper(X_test, y_test)
print("WITHOUT using optimized values, AUPRC on 101 held-out data is", 1-model_final.eval())
print("using optimized values, AUPRC on 101 held-out data is", 1-model_final.eval(**recommendation.kwargs))


with default values, AUPRC on it's original training data is 0.6420409555591419
with default values, AUPRC on entire training data is 0.656380143528881
high_BP_during_htn_meds_3_weight   : 956.0105800628662
high_BP_during_htn_meds_4_plus_weight : 696.3979005813599
HTN_MED_days_ACEI_ARB_weight       : 444.72396373748774
HTN_MED_days_BETA_BLOCKERS_weight  : 842.2763347625732
HTN_MED_days_THIAZIDE_weight       : 406.34989738464355
sum_I16_0_weight                   : 216.78805351257324
sum_I16_1_weight                   : -53.42525243759155
sum_I16_9_weight                   : -663.8421714305878
with optimized values (using 101 training data), AUPRC on train is 0.5498383331386929
with optimized values (using 101 training data), AUROC on train is 0.9092916036493147
WITHOUT using optimized values, AUPRC on 101 held-out data is 0.861274450135219
using optimized values, AUPRC on 101 held-out data is 0.7436184750661603


In [10]:
held_out_performances = {
    'size' : 50,
    'target' : 'ResHtndx',
    'scale' : False,
    'RunID' : 101,
}
for metric_f in [average_precision_score, roc_auc_score]:
    held_out_performances = evaluate_all_folds_nevergrad_wrapper(
        metric_f, model_final, 'res_htn_dx_ia', held_out_performances, ['A'],
        bootstrap=True, n_reps=1_000)

final_performances_df = pd.DataFrame(held_out_performances, index=[0])

100%|██████████| 1000/1000 [00:59<00:00, 16.69it/s]
100%|██████████| 1000/1000 [00:50<00:00, 19.68it/s]
100%|██████████| 1000/1000 [01:08<00:00, 14.68it/s]
100%|██████████| 1000/1000 [01:04<00:00, 15.58it/s]


In [11]:
display(final_performances_df.T)

# Create LaTeX table
latex_table = final_performances_df.to_latex(
    index=True,
    column_format="lrrr",
    escape=False
)

# Save LaTeX table to file
filename = f"{paper_dir}/tab_parameter_optimization_simpleprompt_allfeatures.tex"
with open(filename, 'w') as f:
    f.write(latex_table)

print(f"\nLaTeX table saved to {filename}\n")

Unnamed: 0,0
size,50
target,ResHtndx
scale,False
RunID,101
average_precision_score_train_A_mean,0.546601
average_precision_score_train_A_std,0.050509
average_precision_score_train_A_ci_upper,0.624898
average_precision_score_train_A_ci_lower,0.460054
average_precision_score_fold_out_A_mean,0.744379
average_precision_score_fold_out_A_std,0.050547



LaTeX table saved to ../paper_rebuttal/tab_parameter_optimization_simpleprompt_allfeatures.tex

