In [2]:
from _load_llm_results import *

import numpy as np
import warnings
import os
import importlib
import sys; sys.path.append('../')
from evaluate_model import read_data

from sklearn.metrics import accuracy_score, precision_score, balanced_accuracy_score
from sklearn.metrics import average_precision_score
from sklearn.metrics import roc_auc_score

warnings.filterwarnings("ignore")

%matplotlib inline

from tqdm import tqdm
from glob import glob

In [3]:
import nevergrad as ng

class Nevergrad_wrapper():
    # res_htn_dx_ia_GPT_4o_iterative_Classifier_False_True_True_1_A_1318_program
    def __init__(self, X, y, metric=average_precision_score):
        self.X = X
        self.y = y
        self.metric = metric

    def eval(self, 
        # Setting original values as default values
        high_BP_during_htn_meds_3_1: int = 2,
        sum_enc_during_htn_meds_4_plus_1: int = 2,
        mean_systolic_1: int = 135,
        mean_diastolic: int = 80,
        low_K_N_1: int = 0,
        high_BP_during_htn_meds_4_plus_2: int = 5,
        high_BP_during_htn_meds_2: int = 5,
        mean_systolic_2: int = 140,
        high_BP_during_htn_meds_3_2: int = 1,
        low_K_N_2: int = 0,
        high_BP_during_htn_meds_4_plus_3: int = 1,

        # Now for the probabilities
        high_BP_during_htn_meds_3_1_prob: float = 0.9,
        mean_systolic_diastolic_prob: float = -0.3,
        low_K_N_1_prob: float = -0.2,
        high_BP_during_htn_meds_4_plus_2_prob: float = 0.1,
        else_init_prob: float = 0.1,
        high_BP_during_htn_meds_2_mean_systolic_prob: float = 0.4,
        high_BP_during_htn_meds_3_2_prob: float = 0.2,
        low_K_N_high_BP_during_htn_meds_4_plus_prob: float = 0.3
    ) -> float:
        probabilities = []
        for _, row in self.X.iterrows():
            if (row['high_BP_during_htn_meds_3'] >= high_BP_during_htn_meds_3_1
            or  row['high_BP_during_htn_meds_4_plus'] >= sum_enc_during_htn_meds_4_plus_1):
                prob = high_BP_during_htn_meds_3_1_prob

                if row['mean_systolic'] < mean_systolic_1 and row['mean_diastolic'] < mean_diastolic:
                    prob += mean_systolic_diastolic_prob

                if row['low_K_N'] > low_K_N_1:
                    prob += low_K_N_1_prob

                if row['high_BP_during_htn_meds_4_plus'] > high_BP_during_htn_meds_4_plus_2:
                    prob += high_BP_during_htn_meds_4_plus_2_prob

                probabilities.append(prob)
            else:
                prob = else_init_prob

                if row['high_BP_during_htn_meds_2'] >= high_BP_during_htn_meds_2 or row['mean_systolic'] >= mean_systolic_2:
                    prob += high_BP_during_htn_meds_2_mean_systolic_prob

                if row['high_BP_during_htn_meds_3'] >= high_BP_during_htn_meds_3_2:
                    prob += high_BP_during_htn_meds_3_2_prob

                if row['low_K_N'] == low_K_N_2 and row['high_BP_during_htn_meds_4_plus'] >= high_BP_during_htn_meds_4_plus_3:
                    prob += low_K_N_high_BP_during_htn_meds_4_plus_prob

                probabilities.append(prob)
                
        probabilities = np.array(probabilities)
        
        # making it a minimization problem
        if self.metric in [average_precision_score, roc_auc_score]:
            return 1 - self.metric(self.y, probabilities)
        
        return 1 - self.metric(self.y, np.where(probabilities>0.5, 1.0, 0.0))

In [4]:
# data used to generate the model ----------------------------------------------
# res_htn_dx_ia_GPT_4o_iterative_Classifier_False_True_True_1_A_1318_program
X_train, y_train, X_test, y_test = read_data(
    'res_htn_dx_ia', 'A', 1, False, True, data_dir, 1318 )

model = Nevergrad_wrapper(X_train, y_train)

# should have same auprc as train, 0.6369666860876757
print("with default values, AUPRC on it's original training data is", 1-model.eval())

# held-out data ----------------------------------------------------------------
X_train, y_train, X_test, y_test = read_data(
    'res_htn_dx_ia', 'A', 101, False, False, data_dir, 1318 )    
    
model = Nevergrad_wrapper(X_train, y_train)

print("with default values, AUPRC on entire training data (for dataset 101) is", 1-model.eval())
parametrization = ng.p.Instrumentation(
    high_BP_during_htn_meds_3_1 = ng.p.Scalar(lower=1, upper=5).set_integer_casting(),
    sum_enc_during_htn_meds_4_plus_1 = ng.p.Scalar(lower=1, upper=5).set_integer_casting(),
    mean_systolic_1 = ng.p.Scalar(lower=40, upper=160).set_integer_casting(),
    mean_diastolic = ng.p.Scalar(lower=40, upper=120).set_integer_casting(),
    low_K_N_1 = ng.p.Scalar(lower=1, upper=5).set_integer_casting(),
    high_BP_during_htn_meds_4_plus_2 = ng.p.Scalar(lower=1, upper=5).set_integer_casting(),
    high_BP_during_htn_meds_2 = ng.p.Scalar(lower=1, upper=5).set_integer_casting(),
    mean_systolic_2 = ng.p.Scalar(lower=40, upper=160).set_integer_casting(),
    high_BP_during_htn_meds_3_2 = ng.p.Scalar(lower=1, upper=5).set_integer_casting(),
    low_K_N_2 = ng.p.Scalar(lower=1, upper=5).set_integer_casting(),
    high_BP_during_htn_meds_4_plus_3 = ng.p.Scalar(lower=1, upper=5).set_integer_casting(),

    high_BP_during_htn_meds_3_1_prob = ng.p.Scalar(lower=0.0, upper=1.0),
    mean_systolic_diastolic_prob = ng.p.Scalar(lower=0.0, upper=1.0),
    low_K_N_1_prob = ng.p.Scalar(lower=0.0, upper=1.0),
    high_BP_during_htn_meds_4_plus_2_prob = ng.p.Scalar(lower=0.0, upper=1.0),
    else_init_prob = ng.p.Scalar(lower=0.0, upper=1.0),
    high_BP_during_htn_meds_2_mean_systolic_prob = ng.p.Scalar(lower=0.0, upper=1.0),
    high_BP_during_htn_meds_3_2_prob = ng.p.Scalar(lower=0.0, upper=1.0),
    low_K_N_high_BP_during_htn_meds_4_plus_prob = ng.p.Scalar(lower=0.0, upper=1.0)
)
optimizer = ng.optimizers.NGOpt(parametrization=parametrization, budget=5_000)

# optimizing on all training data
recommendation = optimizer.minimize(model.eval)

print("="*80)
for k, v in recommendation.kwargs.items():
    print(f'{k:34s} : {v}')
print("="*80)

# Should be better than default values
print("with optimized values (using 101 training data), AUPRC on train is", 1-model.eval(**recommendation.kwargs))

model_final = Nevergrad_wrapper(X_test, y_test)
print("WITHOUT using optimized values, AUPRC on 101 held-out data is", 1-model_final.eval())
print("using optimized values, AUPRC on 101 held-out data is", 1-model_final.eval(**recommendation.kwargs))

with default values, AUPRC on it's original training data is 0.6369666860876757
with default values, AUPRC on entire training data (for dataset 101) is 0.6459524020656409
high_BP_during_htn_meds_3_1        : 5
sum_enc_during_htn_meds_4_plus_1   : 1
mean_systolic_1                    : 134
mean_diastolic                     : 82
low_K_N_1                          : 1
high_BP_during_htn_meds_4_plus_2   : 1
high_BP_during_htn_meds_2          : 4
mean_systolic_2                    : 143
high_BP_during_htn_meds_3_2        : 1
low_K_N_2                          : 2
high_BP_during_htn_meds_4_plus_3   : 3
high_BP_during_htn_meds_3_1_prob   : 0.6983980392445647
mean_systolic_diastolic_prob       : 0.27525870220852455
low_K_N_1_prob                     : 0.4753538503386059
high_BP_during_htn_meds_4_plus_2_prob : 0.9511656486463488
else_init_prob                     : 0.20516104299879243
high_BP_during_htn_meds_2_mean_systolic_prob : 0.14810670756114538
high_BP_during_htn_meds_3_2_prob   : 0.1918

In [5]:
def evaluate_all_folds_nevergrad_wrapper(
    metric, model, target, res_dict, folds=['A', 'B', 'C', 'D', 'E'], 
    bootstrap=False, n_reps=1_000):
    
    model.metric = metric

    for FOLD in folds:
        X_train, y_train, X_test, y_test = read_data(
            target, FOLD, 101, False, False, data_dir, 1318 )
        
        def eval(model, X, y):
            model.X = X
            model.y = y

            # remove the recommendation kwargs to get the original model
            return 1 - model.eval(**recommendation.kwargs)

        entry = f"{metric.__name__}_train_{FOLD}"
        if bootstrap:
            val_samples = []
            for i in tqdm(range(n_reps)):
                samples = np.random.randint(0, len(y_train)-1, size=len(y_train))
                val_samples.append( eval(model, X_train.iloc[samples, :], y_train.iloc[samples]) )
                
            res_dict[f"{entry}_mean"] = np.mean(val_samples)
            res_dict[f"{entry}_std"] = np.std(val_samples)
            res_dict[f"{entry}_ci_upper"] = np.quantile(val_samples,0.95)
            res_dict[f"{entry}_ci_lower"] = np.quantile(val_samples,0.05)
        else:
            res_dict[entry] = eval(model, X_train, y_train)
            
        entry = f"{metric.__name__}_fold_out_{FOLD}"
        if bootstrap:
            val_samples = []
            for i in tqdm(range(n_reps)):
                samples = np.random.randint(0,len(y_test)-1, size=len(y_test))
                val_samples.append( eval(model, X_test.iloc[samples, :], y_test.iloc[samples]) )

            res_dict[f"{entry}_mean"] = np.mean(val_samples)
            res_dict[f"{entry}_std"] = np.std(val_samples)
            res_dict[f"{entry}_ci_upper"] = np.quantile(val_samples,0.95)
            res_dict[f"{entry}_ci_lower"] = np.quantile(val_samples,0.05)
        else:
            res_dict[entry] = eval(model, X_test, y_test)

    return res_dict

In [6]:
held_out_performances = {
    'size' : 50,
    'target' : 'ResHtndx',
    'scale' : False,
    'RunID' : 101,
}
for metric_f in [average_precision_score, roc_auc_score]:
    held_out_performances = evaluate_all_folds_nevergrad_wrapper(
        metric_f, model_final, 'res_htn_dx_ia', held_out_performances, ['A'],
        bootstrap=True, n_reps=1_000)

final_performances_df = pd.DataFrame(held_out_performances, index=[0])

  0%|          | 0/1000 [00:00<?, ?it/s]

100%|██████████| 1000/1000 [00:37<00:00, 27.02it/s]
100%|██████████| 1000/1000 [00:16<00:00, 62.32it/s]
100%|██████████| 1000/1000 [00:45<00:00, 21.90it/s]
100%|██████████| 1000/1000 [00:11<00:00, 83.79it/s]


In [7]:
display(final_performances_df.T)

# Create LaTeX table
latex_table = final_performances_df.to_latex(
    index=True,
    column_format="lrrr",
    escape=False
)

# Save LaTeX table to file
filename = f"{paper_dir}/tab_parameter_optimization.tex"
with open(filename, 'w') as f:
    f.write(latex_table)
print(f"\nLaTeX table saved to {filename}\n")

Unnamed: 0,0
size,50
target,ResHtndx
scale,False
RunID,101
average_precision_score_train_A_mean,0.665864
average_precision_score_train_A_std,0.048651
average_precision_score_train_A_ci_upper,0.742817
average_precision_score_train_A_ci_lower,0.587111
average_precision_score_fold_out_A_mean,0.778305
average_precision_score_fold_out_A_std,0.053004



LaTeX table saved to ../paper/tab_parameter_optimization.tex



In [12]:
class Nevergrad_wrapper():
    # res_htn_dx_ia_GPT_4o_iterative_Classifier_False_False_False_1_C_14724_program
    def __init__(self, X, y, metric=average_precision_score):
        self.X = X
        self.y = y
        self.metric = metric

    def eval(self,
        mean_systolic_weight: float = 0.1,
        mean_diastolic_weight: float = 0.2,
        high_BP_during_htn_meds_3_weight: float = 0.25,
        high_BP_during_htn_meds_4_plus_weight: float = 0.3,
        HTN_MED_days_ACEI_ARB_weight: float = 0.05,
        HTN_MED_days_BETA_BLOCKERS_weight: float = 0.05,
        MED_HTN_N_weight: float = 0.05,
        q3_RDW_weight: float = -0.15,
        sd_enc_during_htn_meds_2_weight: float = -0.15
    ) -> float:
        probabilities = (
            mean_systolic_weight * self.X['mean_systolic'] / 200 +
            mean_diastolic_weight * self.X['mean_diastolic'] / 120 +
            high_BP_during_htn_meds_3_weight * self.X['high_BP_during_htn_meds_3'] / 10 +
            high_BP_during_htn_meds_4_plus_weight * self.X['high_BP_during_htn_meds_4_plus'] / 10 +
            HTN_MED_days_ACEI_ARB_weight * self.X['HTN_MED_days_ACEI_ARB'] / 3650 +
            HTN_MED_days_BETA_BLOCKERS_weight * self.X['HTN_MED_days_BETA_BLOCKERS'] / 3650 +
            MED_HTN_N_weight * self.X['MED_HTN_N'] / 100 +
            q3_RDW_weight * self.X['q3.RDW'] / 25 +
            sd_enc_during_htn_meds_2_weight * self.X['sd_enc_during_htn_meds_2'] / 100
        )
        probabilities = np.clip(probabilities, 0, 1)
        
        # making it a minimization problem
        if self.metric in [average_precision_score, roc_auc_score]:
            return 1 - self.metric(self.y, probabilities)
        
        return 1 - self.metric(self.y, np.where(probabilities > 0.5, 1.0, 0.0))

In [None]:
# data used to generate the model ----------------------------------------------
# res_htn_dx_ia_GPT_4o_iterative_Classifier_False_False_False_1_C_14724_program
X_train, y_train, X_test, y_test = read_data(
    'res_htn_dx_ia', 'C', 1, False, False, data_dir, 14724 )

model = Nevergrad_wrapper(X_train, y_train)

# should have same auprc as train, 0.6566308946685496
print("with default values, AUPRC on it's original training data is", 1-model.eval())

# held-out data ----------------------------------------------------------------
X_train, y_train, X_test, y_test = read_data(
    'res_htn_dx_ia', 'A', 101, False, False, data_dir, 1318 )    
    
model = Nevergrad_wrapper(X_train, y_train)

print("with default values, AUPRC on entire training data (for dataset 101) is", 1-model.eval())

parametrization = ng.p.Instrumentation(
    mean_systolic_weight = ng.p.Scalar(lower=0.0, upper=1.0),
    mean_diastolic_weight = ng.p.Scalar(lower=0.0, upper=1.0),
    high_BP_during_htn_meds_3_weight = ng.p.Scalar(lower=0.0, upper=1.0),
    high_BP_during_htn_meds_4_plus_weight = ng.p.Scalar(lower=0.0, upper=1.0),
    HTN_MED_days_ACEI_ARB_weight = ng.p.Scalar(lower=0.0, upper=1.0),
    HTN_MED_days_BETA_BLOCKERS_weight = ng.p.Scalar(lower=0.0, upper=1.0),
    MED_HTN_N_weight = ng.p.Scalar(lower=0.0, upper=1.0),
    q3_RDW_weight = ng.p.Scalar(lower=0.0, upper=1.0),
    sd_enc_during_htn_meds_2_weight = ng.p.Scalar(lower=0.0, upper=1.0),
)
optimizer = ng.optimizers.NGOpt(parametrization=parametrization, budget=5_000)

# optimizing on all training data
recommendation = optimizer.minimize(model.eval)

print("="*80)
for k, v in recommendation.kwargs.items():
    print(f'{k:34s} : {v}')
print("="*80)

# Should be better than default values
print("with optimized values (using 101 training data), AUPRC on train is", 1-model.eval(**recommendation.kwargs))

model_final = Nevergrad_wrapper(X_test, y_test)
print("WITHOUT using optimized values, AUPRC on 101 held-out data is", 1-model_final.eval())
print("using optimized values, AUPRC on 101 held-out data is", 1-model_final.eval(**recommendation.kwargs))


with default values, AUPRC on it's original training data is 0.6566308946685496
with default values, AUPRC on entire training data (for dataset 101) is 0.6710920402400595
Instrumentation(Tuple(),Dict(HTN_MED_days_ACEI_ARB_weight=Scalar{Cl(0,1,b)}[sigma=Scalar{exp=2.03}],HTN_MED_days_BETA_BLOCKERS_weight=Scalar{Cl(0,1,b)}[sigma=Scalar{exp=2.03}],MED_HTN_N_weight=Scalar{Cl(0,1,b)}[sigma=Scalar{exp=2.03}],high_BP_during_htn_meds_3_weight=Scalar{Cl(0,1,b)}[sigma=Scalar{exp=2.03}],high_BP_during_htn_meds_4_plus_weight=Scalar{Cl(0,1,b)}[sigma=Scalar{exp=2.03}],mean_diastolic_weight=Scalar{Cl(0,1,b)}[sigma=Scalar{exp=2.03}],mean_systolic_weight=Scalar{Cl(0,1,b)}[sigma=Scalar{exp=2.03}],q3_RDW_weight=Scalar{Cl(0,1,b)}[sigma=Scalar{exp=2.03}],sd_enc_during_htn_meds_2_weight=Scalar{Cl(0,1,b)}[sigma=Scalar{exp=2.03}])):((), {'mean_systolic_weight': 0.26367748447872674, 'mean_diastolic_weight': 0.15882717378847303, 'high_BP_during_htn_meds_3_weight': 0.7495114421271543, 'high_BP_during_htn_meds_4_

In [15]:
held_out_performances = {
    'size' : 50,
    'target' : 'ResHtndx',
    'scale' : False,
    'RunID' : 101,
}
for metric_f in [average_precision_score, roc_auc_score]:
    held_out_performances = evaluate_all_folds_nevergrad_wrapper(
        metric_f, model_final, 'res_htn_dx_ia', held_out_performances, ['A'],
        bootstrap=True, n_reps=1_000)

final_performances_df = pd.DataFrame(held_out_performances, index=[0])

100%|██████████| 1000/1000 [00:09<00:00, 107.85it/s]
100%|██████████| 1000/1000 [00:04<00:00, 249.71it/s]
100%|██████████| 1000/1000 [00:05<00:00, 178.75it/s]
100%|██████████| 1000/1000 [00:04<00:00, 231.65it/s]


In [16]:
display(final_performances_df.T)

# Create LaTeX table
latex_table = final_performances_df.to_latex(
    index=True,
    column_format="lrrr",
    escape=False
)

# Save LaTeX table to file
filename = f"{paper_dir}/tab_parameter_optimization.tex"
with open(filename, 'w') as f:
    f.write(latex_table)
print(f"\nLaTeX table saved to {filename}\n")

Unnamed: 0,0
size,50
target,ResHtndx
scale,False
RunID,101
average_precision_score_train_A_mean,0.66515
average_precision_score_train_A_std,0.046536
average_precision_score_train_A_ci_upper,0.739928
average_precision_score_train_A_ci_lower,0.587183
average_precision_score_fold_out_A_mean,0.827068
average_precision_score_fold_out_A_std,0.04747



LaTeX table saved to ../paper/tab_parameter_optimization.tex

