In [9]:
# !pip install nevergrad 

from _load_llm_results import *

import numpy as np
import warnings
import os
import importlib
import sys; sys.path.append('../')
from evaluate_model import read_data

from sklearn.metrics import accuracy_score, precision_score, balanced_accuracy_score
from sklearn.metrics import average_precision_score
from sklearn.metrics import roc_auc_score

warnings.filterwarnings("ignore")

%matplotlib inline

from tqdm import tqdm
from glob import glob

data_dir = '../data'
paper_dir = '../paper_rebuttal/'

## Rich prompt, expert features

In [10]:
import nevergrad as ng

class Richprompt_Expertfeatures_model_wrapper():
    def __init__(self, X, y, metric=average_precision_score):
        self.X = X
        self.y = y
        self.metric = metric

    def eval(
        self, 

        # Setting original values as default values
        high_BP_during_htn_meds_3_1: int = 2,
        sum_enc_during_htn_meds_4_plus: int = 2,
        mean_diastolic_1: int = 80,
        mean_systolic_1: int = 140,
        high_BP_during_htn_meds_2: int = 5,
        high_BP_during_htn_meds_3_2: int = 5,

        # Now for the probabilities
        high_BP_during_htn_meds_3_1_prob: float = 0.4,
        sum_enc_during_htn_meds_4_plus_prob: float = 0.4,
        mean_diastolic_1_prob: float = 0.1,
        mean_systolic_1_prob: float = 0.1,
        high_BP_during_htn_meds_2_prob: float = 0.1,
        high_BP_during_htn_meds_3_2_prob: float = 0.1,

        # the scaling conditionals
        Med_Potassium_N: int = 0,
        Dx_HypoK_N: int = 0,
        mean_systolic_2: int = 130,
        mean_diastolic_2: int = 75,

        Med_Potassium_N_Dx_HypoK_N_prob: float = 0.5,
        mean_systolic_mean_diastolic: float = 0.5
    ) -> float:

        probabilities = []
        for _, row in self.X.iterrows():
            prob = 0.0
            if row['high_BP_during_htn_meds_3'] >= high_BP_during_htn_meds_3_1:
                prob += high_BP_during_htn_meds_3_1_prob

            if row['sum_enc_during_htn_meds_4_plus'] >= sum_enc_during_htn_meds_4_plus:
                prob += sum_enc_during_htn_meds_4_plus_prob

            if row['mean_diastolic'] > mean_diastolic_1:
                prob += mean_diastolic_1_prob

            if row['mean_systolic'] > mean_systolic_1:
                prob += mean_systolic_1_prob

            if row['high_BP_during_htn_meds_2'] > high_BP_during_htn_meds_2:
                prob += high_BP_during_htn_meds_2_prob

            if row['high_BP_during_htn_meds_3'] > high_BP_during_htn_meds_3_2:
                prob += high_BP_during_htn_meds_3_2_prob

            # cliping
            prob = min(1.0, prob)

            if row['Med_Potassium_N'] > Med_Potassium_N and row['Dx_HypoK_N'] > Dx_HypoK_N:
                prob *= Med_Potassium_N_Dx_HypoK_N_prob
            if row['mean_systolic'] < mean_systolic_2 and row['mean_diastolic'] < mean_diastolic_2:
                prob *= mean_systolic_mean_diastolic

            probabilities.append(prob)
        
        probabilities = np.array(probabilities)
        
        # making it a minimization problem
        
        if self.metric in [average_precision_score, roc_auc_score]:
            return 1 - self.metric(self.y, probabilities)
        return 1 - self.metric(self.y, np.where(probabilities>0.5, 1.0, 0.0))

# data used to generate the model ----------------------------------------------
X_train, y_train, X_test, y_test = read_data(
    'res_htn_dx_ia', 'C', 1, False, True, data_dir, 24284 )

model = Richprompt_Expertfeatures_model_wrapper(X_train, y_train)

# should have same auprc as train, 0.6132813108536367
print("with default values, AUPRC on it's original training data is", 1-model.eval())

# held-out data ----------------------------------------------------------------
X_train, y_train, X_test, y_test = read_data(
    'res_htn_dx_ia', 'A', 101, False, False, data_dir, 1318 )    
    
model = Richprompt_Expertfeatures_model_wrapper(X_train, y_train)

print("with default values, AUPRC on entire training data is", 1-model.eval())
parametrization = ng.p.Instrumentation(
    high_BP_during_htn_meds_3_1 = ng.p.Scalar(lower=1, upper=12).set_integer_casting(),
    sum_enc_during_htn_meds_4_plus = ng.p.Scalar(lower=1, upper=12).set_integer_casting(),
    mean_diastolic_1 = ng.p.Scalar(lower=40, upper=120).set_integer_casting(),
    mean_systolic_1 = ng.p.Scalar(lower=40, upper=160).set_integer_casting(),
    high_BP_during_htn_meds_2 = ng.p.Scalar(lower=1, upper=12).set_integer_casting(),
    high_BP_during_htn_meds_3_2 = ng.p.Scalar(lower=1, upper=12).set_integer_casting(),

    high_BP_during_htn_meds_3_1_prob = ng.p.Scalar(lower=0.0, upper=1.0),
    sum_enc_during_htn_meds_4_plus_prob = ng.p.Scalar(lower=0.0, upper=1.0),
    mean_diastolic_1_prob = ng.p.Scalar(lower=0.0, upper=1.0),
    mean_systolic_1_prob = ng.p.Scalar(lower=0.0, upper=1.0),
    high_BP_during_htn_meds_2_prob = ng.p.Scalar(lower=0.0, upper=1.0),
    high_BP_during_htn_meds_3_2_prob = ng.p.Scalar(lower=0.0, upper=1.0),
    
    Med_Potassium_N = ng.p.Scalar(lower=1, upper=12).set_integer_casting(),
    Dx_HypoK_N = ng.p.Scalar(lower=1, upper=12).set_integer_casting(),
    mean_systolic_2 = ng.p.Scalar(lower=40, upper=160).set_integer_casting(),
    mean_diastolic_2 = ng.p.Scalar(lower=40, upper=120).set_integer_casting(),

    Med_Potassium_N_Dx_HypoK_N_prob = ng.p.Scalar(lower=0.0, upper=1.0),
    mean_systolic_mean_diastolic = ng.p.Scalar(lower=0.0, upper=1.0),
)
optimizer = ng.optimizers.NGOpt(parametrization=parametrization, budget=5_000)

# optimizing on all training data
recommendation = optimizer.minimize(model.eval)

print("="*80)
for k, v in recommendation.kwargs.items():
    print(f'{k:34s} : {v}')
print("="*80)

# Should be better than default values
print("with optimized values, AUPRC on entire training data is", 1-model.eval(**recommendation.kwargs))

model_final = Richprompt_Expertfeatures_model_wrapper(X_test, y_test)
print("with optimized values, AUPRC on exclusive held-out data is", 1-model_final.eval(**recommendation.kwargs))

with default values, AUPRC on it's original training data is 0.6132813108536367
with default values, AUPRC on entire training data is 0.6317278522679678
high_BP_during_htn_meds_3_1        : 1
sum_enc_during_htn_meds_4_plus     : 8
mean_diastolic_1                   : 77
mean_systolic_1                    : 127
high_BP_during_htn_meds_2          : 4
high_BP_during_htn_meds_3_2        : 4
high_BP_during_htn_meds_3_1_prob   : 0.21068673805789528
sum_enc_during_htn_meds_4_plus_prob : 0.4642918201322573
mean_diastolic_1_prob              : 0.01794148887962255
mean_systolic_1_prob               : 0.2986749927809628
high_BP_during_htn_meds_2_prob     : 0.14595446871434706
high_BP_during_htn_meds_3_2_prob   : 0.31259606549943786
Med_Potassium_N                    : 3
Dx_HypoK_N                         : 4
mean_systolic_2                    : 142
mean_diastolic_2                   : 80
Med_Potassium_N_Dx_HypoK_N_prob    : 0.6179528884420663
mean_systolic_mean_diastolic       : 0.796199514234630

In [11]:
def evaluate_all_folds(
    metric, model, target, res_dict, folds=['A', 'B', 'C', 'D', 'E'], 
    bootstrap=False, n_reps=1_000):
    
    model.metric = metric

    # Stolen from 04. loading feat model
    for FOLD in folds:
        X_train, y_train, X_test, y_test = read_data(
            target, FOLD, 101, False, False, data_dir, 1318 )
        
        def eval(model, X, y):
            model.X = X
            model.y = y
            return 1 - model.eval(**recommendation.kwargs) # remove the recommendation kwargs to get the original model

        entry = f"{metric.__name__}_train_{FOLD}"
        if bootstrap:
            val_samples = []
            for i in tqdm(range(n_reps)):
                samples = np.random.randint(0, len(y_train)-1, size=len(y_train))
                val_samples.append( eval(model, X_train.iloc[samples, :], y_train.iloc[samples]) )
                
            res_dict[f"{entry}_mean"] = np.mean(val_samples)
            res_dict[f"{entry}_std"] = np.std(val_samples)
            res_dict[f"{entry}_ci_upper"] = np.quantile(val_samples,0.95)
            res_dict[f"{entry}_ci_lower"] = np.quantile(val_samples,0.05)
        else:
            res_dict[entry] = eval(model, X_train, y_train)
            
        entry = f"{metric.__name__}_fold_out_{FOLD}"
        if bootstrap:
            val_samples = []
            for i in tqdm(range(n_reps)):
                samples = np.random.randint(0,len(y_test)-1, size=len(y_test))
                val_samples.append( eval(model, X_test.iloc[samples, :], y_test.iloc[samples]) )

            res_dict[f"{entry}_mean"] = np.mean(val_samples)
            res_dict[f"{entry}_std"] = np.std(val_samples)
            res_dict[f"{entry}_ci_upper"] = np.quantile(val_samples,0.95)
            res_dict[f"{entry}_ci_lower"] = np.quantile(val_samples,0.05)
        else:
            res_dict[entry] = eval(model, X_test, y_test)

    return res_dict

In [12]:
held_out_performances = {
    'size' : 50,
    'target' : 'ResHtndx',
    'scale' : False,
    'RunID' : 101,
}
for metric_f in [average_precision_score, roc_auc_score]:
    held_out_performances = evaluate_all_folds(
        metric_f, model_final, 'res_htn_dx_ia', held_out_performances, ['A'],
        bootstrap=True, n_reps=1_000)

final_performances_df = pd.DataFrame(held_out_performances, index=[0])

  0%|▏                                                                | 3/1000 [00:00<00:39, 25.18it/s]

100%|██████████████████████████████████████████████████████████████| 1000/1000 [00:41<00:00, 24.30it/s]
100%|██████████████████████████████████████████████████████████████| 1000/1000 [00:14<00:00, 70.07it/s]
100%|██████████████████████████████████████████████████████████████| 1000/1000 [00:40<00:00, 24.69it/s]
100%|██████████████████████████████████████████████████████████████| 1000/1000 [00:15<00:00, 65.31it/s]


In [13]:
display(final_performances_df.T)

# Create LaTeX table
latex_table = final_performances_df.to_latex(
    index=True,
    column_format="lrrr",
    escape=False
)

# Save LaTeX table to file
filename = f"{paper_dir}/tab_parameter_optimization_richprompt_expertfeatures.tex"
with open(filename, 'w') as f:
    f.write(latex_table)

print(f"\nLaTeX table saved to {filename}\n")

Unnamed: 0,0
size,50
target,ResHtndx
scale,False
RunID,101
average_precision_score_train_A_mean,0.746561
average_precision_score_train_A_std,0.041821
average_precision_score_train_A_ci_upper,0.81321
average_precision_score_train_A_ci_lower,0.676125
average_precision_score_fold_out_A_mean,0.79137
average_precision_score_fold_out_A_std,0.052784



LaTeX table saved to ../paper_rebuttal//tab_parameter_optimization_richprompt_expertfeatures.tex



## Simple prompt, all features

In [20]:
class Simpleprompt_Allfeatures_model_wrapper():
    def __init__(self, X, y, metric=average_precision_score):
        self.X = X
        self.y = y
        self.metric = metric

    def eval(
        self,

        high_BP_during_htn_meds_3_weight: float = 1.1,
        high_BP_during_htn_meds_4_plus_weight: float = 1.1,
        HTN_MED_days_ACEI_ARB_weight: float = 0.0001,
        HTN_MED_days_BETA_BLOCKERS_weight: float = 0.0001,
        HTN_MED_days_THIAZIDE_weight: float = 0.0001,

        sum_I16_0_weight: float = 0.0001,
        sum_I16_1_weight: float = 0.0001,
        sum_I16_9_weight: float = 0.0001,
    ) -> float:
        risk_score = (
            self.X['high_BP_during_htn_meds_3'] * high_BP_during_htn_meds_3_weight +
            self.X['high_BP_during_htn_meds_4_plus'] * high_BP_during_htn_meds_4_plus_weight +
            self.X['HTN_MED_days_ACEI_ARB'] * HTN_MED_days_ACEI_ARB_weight +
            self.X['HTN_MED_days_BETA_BLOCKERS'] * HTN_MED_days_BETA_BLOCKERS_weight +
            self.X['HTN_MED_days_THIAZIDE'] * HTN_MED_days_THIAZIDE_weight +
            self.X['sum_I16_0'] * sum_I16_0_weight +
            self.X['sum_I16_1'] * sum_I16_1_weight +
            self.X['sum_I16_9'] * sum_I16_9_weight
        )

        # min_score = self.X['risk_score'].min()
        # max_score = self.X['risk_score'].max()
        # print(min_score) 0.0
        # print(max_score) 95.24940000000002

        probabilities = risk_score/100
        
        # making it a minimization problem
        if self.metric in [average_precision_score, roc_auc_score]:
            return 1 - self.metric(self.y, probabilities)
        return 1 - self.metric(self.y, np.where(probabilities>0.5, 1.0, 0.0))

# data used to generate the model ----------------------------------------------
X_train, y_train, X_test, y_test = read_data(
    'res_htn_dx_ia', 'D', 1, False, False, data_dir, 14724 )

model = Simpleprompt_Allfeatures_model_wrapper(X_train, y_train)

# should have same auprc as train, 0.6420409555591419
print("with default values, AUPRC on it's original training data is", 1-model.eval())

# held-out data ----------------------------------------------------------------
X_train, y_train, X_test, y_test = read_data(
    'res_htn_dx_ia', 'A', 101, False, False, data_dir, 1318 )    
    
model = Simpleprompt_Allfeatures_model_wrapper(X_train, y_train)

print("with default values, AUPRC on entire training data is", 1-model.eval())

parametrization = ng.p.Instrumentation(
    # range based on the values observed in the final model
    high_BP_during_htn_meds_3_weight = ng.p.Scalar(lower=-1e3, upper=1e+3),
    high_BP_during_htn_meds_4_plus_weight = ng.p.Scalar(lower=-1e3, upper=1e+3),
    HTN_MED_days_ACEI_ARB_weight = ng.p.Scalar(lower=-1e3, upper=1e+3),
    HTN_MED_days_BETA_BLOCKERS_weight = ng.p.Scalar(lower=-1e3, upper=1e+3),
    HTN_MED_days_THIAZIDE_weight = ng.p.Scalar(lower=-1e3, upper=1e+3),
    sum_I16_0_weight = ng.p.Scalar(lower=-1e3, upper=1e+3),
    sum_I16_1_weight = ng.p.Scalar(lower=-1e3, upper=1e+3),
    sum_I16_9_weight = ng.p.Scalar(lower=-1e3, upper=1e+3),
)

optimizer = ng.optimizers.NGOpt(parametrization=parametrization, budget=5_000)

# optimizing on all training data
recommendation = optimizer.minimize(model.eval)

print("="*80)
for k, v in recommendation.kwargs.items():
    print(f'{k:34s} : {v}')
print("="*80)

# Should be better than default values
print("with optimized values, AUPRC on entire training data is", 1-model.eval(**recommendation.kwargs))

model_final = Simpleprompt_Allfeatures_model_wrapper(X_test, y_test)
print("with optimized values, AUPRC on exclusive held-out data is", 1-model_final.eval(**recommendation.kwargs))

with default values, AUPRC on it's original training data is 0.6420409555591419
with default values, AUPRC on entire training data is 0.656380143528881
high_BP_during_htn_meds_3_weight   : -219.14531551343913
high_BP_during_htn_meds_4_plus_weight : 615.1236775076127
HTN_MED_days_ACEI_ARB_weight       : 186.11443379075894
HTN_MED_days_BETA_BLOCKERS_weight  : 375.652129220681
HTN_MED_days_THIAZIDE_weight       : 185.41021462145048
sum_I16_0_weight                   : -63.55474805408397
sum_I16_1_weight                   : 84.93436240108295
sum_I16_9_weight                   : 764.2627016735107
with optimized values, AUPRC on entire training data is 0.5507720082899977
with optimized values, AUPRC on exclusive held-out data is 0.7419528321248432


In [21]:
held_out_performances = {
    'size' : 50,
    'target' : 'ResHtndx',
    'scale' : False,
    'RunID' : 101,
}
for metric_f in [average_precision_score, roc_auc_score]:
    held_out_performances = evaluate_all_folds(
        metric_f, model_final, 'res_htn_dx_ia', held_out_performances, ['A'],
        bootstrap=True, n_reps=1_000)

final_performances_df = pd.DataFrame(held_out_performances, index=[0])

  4%|██▍                                                            | 38/1000 [00:00<00:02, 372.44it/s]

100%|█████████████████████████████████████████████████████████████| 1000/1000 [00:03<00:00, 290.36it/s]
100%|█████████████████████████████████████████████████████████████| 1000/1000 [00:02<00:00, 363.19it/s]
100%|█████████████████████████████████████████████████████████████| 1000/1000 [00:04<00:00, 239.04it/s]
100%|█████████████████████████████████████████████████████████████| 1000/1000 [00:03<00:00, 284.60it/s]


In [22]:
display(final_performances_df.T)

# Create LaTeX table
latex_table = final_performances_df.to_latex(
    index=True,
    column_format="lrrr",
    escape=False
)

# Save LaTeX table to file
filename = f"{paper_dir}/tab_parameter_optimization_simpleprompt_allfeatures.tex"
with open(filename, 'w') as f:
    f.write(latex_table)

print(f"\nLaTeX table saved to {filename}\n")

Unnamed: 0,0
size,50
target,ResHtndx
scale,False
RunID,101
average_precision_score_train_A_mean,0.546772
average_precision_score_train_A_std,0.05198
average_precision_score_train_A_ci_upper,0.630672
average_precision_score_train_A_ci_lower,0.460735
average_precision_score_fold_out_A_mean,0.744141
average_precision_score_fold_out_A_std,0.050528



LaTeX table saved to ../paper_rebuttal//tab_parameter_optimization_simpleprompt_allfeatures.tex

