# Find best models

This notebook will load all models and re-evaluate them on all 5 folds, without re-fit. The best model for each phenotype will be selected for further experiments

In [1]:
from _load_llm_results import *

import numpy as np
import warnings
import os
import importlib
import sys; sys.path.append('../')
from evaluate_model import read_data
from sklearn.metrics import average_precision_score, roc_auc_score

warnings.filterwarnings("ignore")

%matplotlib inline

from tqdm import tqdm
from glob import glob

def evaluate_all_folds(metric, cp_function, cp_df, folds=['A', 'B', 'C', 'D', 'E'], bootstrap=False, n_reps=1_000, override_runid=False):
    
    runid = cp_df['RunID']
    if override_runid:
        runid = '101' # enforcing to use the held out dataset
        # print("Overriding RunID, Using runID", runid)
        cp_df['RunID'] = runid
    
    def eval(X, y):
        try:
            prob = np.array(cp_function(X))
        except Exception:
            prob = np.zeros(shape=len(X))
        try:
            if metric in [average_precision_score, roc_auc_score]:
                return metric(y, prob)
            else:
                return metric(y, np.where(prob> 0.5), 1, 0)
        except ValueError:
            return np.nan

    for FOLD in folds:
        X_train, y_train, X_test, y_test = read_data(
            targets_rev[cp_df['target']], FOLD, runid, cp_df['scale'],
            cp_df['icd_only'], data_dir, cp_df['random_state']
        )

        for partition, X, y in [('train', X_train, y_train), ('fold_out', X_test, y_test)]:
            entry = f"{metric.__name__}_{partition}_{FOLD}"
            if bootstrap:
                val_samples = []
                for i in tqdm(range(n_reps)):
                    samples = np.random.randint(0, len(y), size=len(y))
                    val_samples.append( eval(X.iloc[samples, :], y.iloc[samples]) )

                cp_df[f"{entry}_mean"] = np.mean(val_samples)
                cp_df[f"{entry}_std"] = np.std(val_samples)
                cp_df[f"{entry}_ci_upper"] = np.quantile(val_samples,0.95)
                cp_df[f"{entry}_ci_lower"] = np.quantile(val_samples,0.05)
            else:
                cp_df[entry] = eval(X, y)

    return cp_df

def load_results(constraints=dict(scale=[False])):
    count = 0
    for file in tqdm(glob(f"{results_path}/**/*.json", recursive=True)):
        
        try:
            cp_df = pd.read_json(file, typ="series")
        except Exception as e:
            # print("Bad results in file", file)
            continue
            
        # unifying column name
        cp_df = cp_df.rename(index={"few_feature": "icd_only"})
            
        stay = True
        for kc, vc in constraints.items():
            if cp_df[kc] not in vc:
                stay = False
                # print("skipping", file)
                break
        if not stay:
            # print('continuing')
            continue

        # Evaluating the program in the other folds
        cp_file = file.replace('.json', '_program') \
                      .replace(cp_df['target'], targets_rev[cp_df['target']]) \
                      .replace('/', '.')[3:]

        # Loading the program and counting the number of lines as a measure of interpretability
        cp_file_read = file.replace('.json', '_program.py') \
                            .replace(cp_df['target'], targets_rev[cp_df['target']])

        if 'iterative' not in file: # removing the fold from file name if not iterative
            cp_file = cp_file.replace(f"_{cp_df['fold']}_", '__')
            cp_file_read = cp_file_read.replace(f"_{cp_df['fold']}_", '__')

        # print(cp_file)
        with open(cp_file_read, 'r') as f:
            lines = f.readlines()

            if "from sklearn." in ' '.join(lines) or "np.random." in ' '.join(lines):
                print("LLM tried to cheat. ignoring this solution")
                continue

            # Writing number of lines as size. complexity still counts the number of operations
            if 'GPT' in file and False:
                cp_df['size'] = len([l for l in lines if l.strip(' \n') != ''])

        try:
            cp_module = importlib.__import__(
                cp_file,
                globals(),
                locals(),
                ["predict_hyptertension"],
            )
            cp_function = cp_module.predict_hypertension

            # Evaluating on other held-out folds. notice that the LLMs did not see the
            # entire training data even during its training!
            cp_df = evaluate_all_folds(average_precision_score, cp_function, cp_df)
        except:
            continue

        results.append(cp_df)
        count += 1

    print('loaded',count,'files')
    results_df = pd.DataFrame(data=results)\

    # Beautifying it
    results_df["model"] = results_df["model"].apply(lambda m: nice_model_labels[m])
    results_df["target"] = results_df["target"].apply(lambda t: dnames_to_nice[t])

    results_df = results_df[results_df["model"].isin(order)]

    print(results_df["model"].unique())
    print(results_df["target"].unique())
    
    return results_df
    
results_df = pd.concat([
    # GPT 4o SEDI
    # load_results( # rich prompt, expert features
    #     constraints = {
    #         'scale': [False],
    #         'model': ['GPT_4o_iterative_Classifier'], 
    #         'prompt_richness' : [True],
    #         'icd_only' : [True],
    #     }
    # ),
    # load_results( # simple prompt, all features
    #     constraints = {
    #         'scale': [False],
    #         'model': ['GPT_4o_iterative_Classifier'], 
    #         'prompt_richness' : [False],
    #         'icd_only' : [False],
    #     }
    # ),

    # GPT 4o zero-shot
    load_results( # rich prompt, expert features
        constraints = {
            'scale': [False],
            'model': ['GPT_4o_Classifier'], 
            'prompt_richness' : [True],
            'icd_only' : [True],
        }
    ),
    load_results( # simple prompt, all features
        constraints = {
            'scale': [False],
            'model': ['GPT_4o_Classifier'], 
            'prompt_richness' : [False],
            'icd_only' : [False],
        }
    )
])

# Making it the format seaborn likes
results_df_melted = pd.melt(
    results_df, 
    id_vars=['model', 'target', 'fold', 'RunID', 'random_state', 'prompt_richness', 'icd_only', 'scale']
)

print(results_df.columns)
print(results_df.shape)

100%|██████████| 7200/7200 [01:55<00:00, 62.18it/s] 


loaded 300 files
['gpt-4o']
['HTN Heuristic' 'HTN Diagnosis' 'HTN-Hypokalemia Diagnosis'
 'Htn-Hypokalemia Heuristic' 'Resistant HTN Heuristic'
 'Resistant HTN Diagnosis']


  5%|▌         | 392/7200 [00:01<00:32, 210.46it/s]

LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution


  6%|▌         | 414/7200 [00:07<08:32, 13.24it/s] 

LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution


  6%|▌         | 429/7200 [00:11<14:03,  8.03it/s]

LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution


  6%|▌         | 448/7200 [00:18<23:01,  4.89it/s]

LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution


 39%|███▉      | 2800/7200 [01:01<00:22, 198.95it/s]

LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution


 39%|███▉      | 2821/7200 [01:07<06:00, 12.13it/s] 

LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution


 39%|███▉      | 2836/7200 [01:11<09:18,  7.82it/s]

LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution


 40%|███▉      | 2875/7200 [01:16<07:23,  9.75it/s]

LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution


 55%|█████▌    | 3988/7200 [01:20<00:12, 253.44it/s]

LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution


 56%|█████▌    | 4014/7200 [01:23<01:57, 27.01it/s] 

LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution


 56%|█████▌    | 4033/7200 [01:28<04:32, 11.60it/s]

LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution


 56%|█████▌    | 4046/7200 [01:30<05:29,  9.56it/s]

LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution


 57%|█████▋    | 4084/7200 [01:32<03:48, 13.63it/s]

LLM tried to cheat. ignoring this solution


 72%|███████▏  | 5194/7200 [01:37<00:08, 246.80it/s]

LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution


 72%|███████▏  | 5219/7200 [01:39<00:48, 41.09it/s] 

LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution


 73%|███████▎  | 5237/7200 [01:40<01:12, 27.22it/s]

LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution


 73%|███████▎  | 5280/7200 [01:41<00:56, 34.20it/s]

LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution


 89%|████████▊ | 6383/7200 [01:46<00:03, 231.27it/s]

LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution


 89%|████████▉ | 6407/7200 [01:47<00:08, 90.71it/s] 

LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution


 89%|████████▉ | 6425/7200 [01:48<00:18, 42.33it/s]

LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution


 89%|████████▉ | 6438/7200 [01:48<00:21, 35.20it/s]

LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution


 90%|████████▉ | 6472/7200 [01:49<00:16, 43.74it/s]

LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution
LLM tried to cheat. ignoring this solution


100%|██████████| 7200/7200 [01:52<00:00, 63.85it/s] 

loaded 165 files
['gpt-4o']
['HTN Heuristic' 'HTN Diagnosis' 'HTN-Hypokalemia Diagnosis'
 'Htn-Hypokalemia Heuristic' 'Resistant HTN Heuristic'
 'Resistant HTN Diagnosis']
Index(['accuracy_score_train', 'precision_score_train',
       'average_precision_score_train', 'roc_auc_score_train',
       'balanced_accuracy_score_train', 'accuracy_score_test',
       'precision_score_test', 'average_precision_score_test',
       'roc_auc_score_test', 'balanced_accuracy_score_test', 'messages',
       'model', 'target', 'fold', 'RunID', 'random_state', 'representation',
       'representation_fmt', 'size', 'complexity', 'scale', 'icd_only',
       'prompt_richness', 'time', 'pred', 'pred_proba',
       'average_precision_score_train_A', 'average_precision_score_fold_out_A',
       'average_precision_score_train_B', 'average_precision_score_fold_out_B',
       'average_precision_score_train_C', 'average_precision_score_fold_out_C',
       'average_precision_score_train_D', 'average_precision_scor




In [2]:
print(results_df_melted.variable.unique())
display(results_df_melted.sample(5))

['accuracy_score_train' 'precision_score_train'
 'average_precision_score_train' 'roc_auc_score_train'
 'balanced_accuracy_score_train' 'accuracy_score_test'
 'precision_score_test' 'average_precision_score_test'
 'roc_auc_score_test' 'balanced_accuracy_score_test' 'messages'
 'representation' 'representation_fmt' 'size' 'complexity' 'time' 'pred'
 'pred_proba' 'average_precision_score_train_A'
 'average_precision_score_fold_out_A' 'average_precision_score_train_B'
 'average_precision_score_fold_out_B' 'average_precision_score_train_C'
 'average_precision_score_fold_out_C' 'average_precision_score_train_D'
 'average_precision_score_fold_out_D' 'average_precision_score_train_E'
 'average_precision_score_fold_out_E']


Unnamed: 0,model,target,fold,RunID,random_state,prompt_richness,icd_only,scale,variable,value
17448,gpt-4o,HTN Heuristic,C,1,16695,False,False,False,average_precision_score_train_C,0.438108
7785,gpt-4o,HTN-Hypokalemia Diagnosis,D,1,27690,True,True,False,messages,"[{'role': 'system', 'content': 'You are an AI ..."
17295,gpt-4o,Htn-Hypokalemia Heuristic,B,1,27690,True,True,False,average_precision_score_train_C,1.0
21326,gpt-4o,HTN Diagnosis,D,1,14724,False,False,False,average_precision_score_fold_out_E,0.961383
13455,gpt-4o,Htn-Hypokalemia Heuristic,A,1,1318,True,True,False,pred_proba,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ..."


In [3]:
for target in ['Resistant HTN Diagnosis']: # ['HTN Heuristic' 'HTN Diagnosis' 'HTN-Hypokalemia Diagnosis' 'Htn-Hypokalemia Heuristic' 'Resistant HTN Heuristic' 'Resistant HTN Diagnosis']
    for prompt_richness in [False, True]:
        for icd_only in [False, True]:
            for metric in [
                'average_precision_score_fold_out', 
            ]:
                data = (
                    results_df_melted[ ( (results_df_melted['variable'].str.contains(metric))
                                    | (results_df_melted['variable']=='size') )
                                    & (results_df_melted['prompt_richness']==prompt_richness)
                                    & (results_df_melted['icd_only']==icd_only)
                                    & (results_df_melted['target']==target)
                    ]
                    .dropna()
                )

                if len(data)==0:
                    continue

                print("prompt_richness", prompt_richness, "expert_features", icd_only)
                
                print(data.target.unique())
                # removing the fold name from the metric
                data['variable'].replace({f'{metric}{fold}' : metric for fold in ['_A', '_B', '_C', '_D', '_E']}, inplace=True)
                data['value'] = data['value'].astype(float)
                data['Strategy'] = ['SEDI / Train' if 'iter' in v or 'gpt' not in v else 'Zero Shot' for v in data['model'].values]

                # first we group each model's performance on the held out data and take the mean.
                # then, we ignore folds and random states to get the final model.
                model_metrics_all_folds = data.groupby([
                    'target', 'model', 'prompt_richness', 'icd_only', 'Strategy', 'scale',
                    'RunID', 'fold', 'random_state', 'variable']
                )['value'].mean().reset_index()

                model_metrics_all_folds = model_metrics_all_folds.pivot_table(
                    index=['target', 'model', 'prompt_richness', 'icd_only', 'Strategy', 'scale',
                        'RunID', 'fold', 'random_state'],
                    columns='variable',
                    values='value'
                ).reset_index()

                # Sorting by size, in case of ties in metric
                best_models = model_metrics_all_folds\
                    .sort_values([metric, 'size'], ascending=[False, True])\
                    .drop_duplicates(['target', 'model', 'prompt_richness', 'icd_only', 'Strategy', 'scale'])
                    
                display(best_models.T)

                best_models[f'size'] = np.nan
                for partition in ['train', 'fold_out']:
                    for metric_f in [average_precision_score, roc_auc_score]:
                        best_models[f'mean_{metric_f.__name__}_{partition}_mean'] = np.nan
                        best_models[f'mean_{metric_f.__name__}_{partition}_std'] = np.nan
                        best_models[f'mean_{metric_f.__name__}_{partition}_ci_lower'] = np.nan
                        best_models[f'mean_{metric_f.__name__}_{partition}_ci_upper'] = np.nan
                

                # Evaluating it on the held out test partition -----
                held_out_performances = []
                for i, row in best_models.iterrows():
                    # print(row)
                    # finding the correct file
                    filename = (
                        "/".join([results_path,
                                targets_rev[dnames_to_ugly[row['target']]],
                                nice_to_ugly[row['model']] ])
                        + "/"
                        + "_".join(
                            [
                                dnames_to_ugly[row['target']],
                                nice_to_ugly[row['model']],
                                str(row['scale']),
                                str(row['icd_only']),
                                str(row['prompt_richness']),
                                str(row['RunID']),
                                row['fold'],
                                str(row['random_state']),
                            ]
                        )
                        + '.json'
                    )
                    # print(filename)
                    cp_df = pd.read_json(filename, typ="series")

                    cp_df = cp_df.rename(index={"few_feature": "icd_only"})

                    # Evaluating the program in the other folds
                    cp_file = filename.replace('.json', '_program') \
                                .replace(cp_df['target'], targets_rev[cp_df['target']]) \
                                .replace('/', '.')[3:]

                    if 'GPT' in filename:
                        # Loading the program and counting the number of lines as a measure of interpretability
                        cp_file_read = filename.replace('.json', '_program.py') \
                                        .replace(cp_df['target'], targets_rev[cp_df['target']])

                        if 'iterative' not in filename: # removing the fold from file name if not iterative
                            cp_file_read = cp_file_read.replace(f"_{cp_df['fold']}_", '__')

                        with open(cp_file_read, 'r') as f:
                            # Writing number of lines as size. complexity still counts the number of operations
                            cp_df['size'] = len([l for l in f.readlines() if l.strip(' \n') != ''])

                    if 'iterative' not in filename: # removing the fold from file name if not iterative
                        cp_file = cp_file.replace(f"_{cp_df['fold']}_", '__')

                    # print(cp_file)

                    cp_function = None
                    try:
                        cp_module = importlib.__import__(
                            cp_file, # This is going to be unique for each algorithm
                            globals(),
                            locals(),
                            ["predict_hyptertension"],
                        )
                        cp_function = cp_module.predict_hypertension
                    except:
                        continue
                    
                    cp_df['RunID'] = '101'
                    best_models.loc[i, f'size'] = cp_df['size']
                    for metric_f in [average_precision_score, roc_auc_score]:
                        cp_df = evaluate_all_folds(metric_f, cp_function, cp_df, ['A'], bootstrap=True, override_runid=True)
                        
                        for partition in ['train', 'fold_out']:
                            best_models.loc[i, f'mean_{metric_f.__name__}_{partition}_mean'] = cp_df[f'{metric_f.__name__}_{partition}_A_mean']
                            best_models.loc[i, f'mean_{metric_f.__name__}_{partition}_std'] = cp_df[f'{metric_f.__name__}_{partition}_A_std']
                            best_models.loc[i, f'mean_{metric_f.__name__}_{partition}_ci_upper'] = cp_df[f'{metric_f.__name__}_{partition}_A_ci_upper']
                            best_models.loc[i, f'mean_{metric_f.__name__}_{partition}_ci_lower'] = cp_df[f'{metric_f.__name__}_{partition}_A_ci_lower']
                        
                display(best_models.T)
        
                print("-"*120)

                # Create LaTeX table
                latex_table = best_models.to_latex(
                    index=True,
                    column_format="lrrr",
                    escape=False
                )
                
                # Save LaTeX table to file
                filename = f"{paper_dir}/tab_best_model_{metric}_{prompt_richness}_{icd_only}.tex"
                with open(filename, 'w') as f:
                    f.write(latex_table)
                
                print(f"\nLaTeX table saved to {filename}\n")

prompt_richness False expert_features False
['Resistant HTN Diagnosis']


Unnamed: 0_level_0,0
variable,Unnamed: 1_level_1
target,Resistant HTN Diagnosis
model,gpt-4o
prompt_richness,False
icd_only,False
Strategy,Zero Shot
scale,False
RunID,1
fold,A
random_state,24284
average_precision_score_fold_out,0.284354


100%|██████████| 1000/1000 [00:06<00:00, 155.89it/s]
100%|██████████| 1000/1000 [00:03<00:00, 312.32it/s]
100%|██████████| 1000/1000 [00:06<00:00, 148.70it/s]
100%|██████████| 1000/1000 [00:03<00:00, 276.04it/s]


Unnamed: 0_level_0,0
variable,Unnamed: 1_level_1
target,Resistant HTN Diagnosis
model,gpt-4o
prompt_richness,False
icd_only,False
Strategy,Zero Shot
scale,False
RunID,1
fold,A
random_state,24284
average_precision_score_fold_out,0.284354


------------------------------------------------------------------------------------------------------------------------

LaTeX table saved to ../paper_rebuttal/tab_best_model_average_precision_score_fold_out_False_False.tex

prompt_richness True expert_features True
['Resistant HTN Diagnosis']


Unnamed: 0_level_0,7
variable,Unnamed: 1_level_1
target,Resistant HTN Diagnosis
model,gpt-4o
prompt_richness,True
icd_only,True
Strategy,Zero Shot
scale,False
RunID,1
fold,A
random_state,24481
average_precision_score_fold_out,0.544349


100%|██████████| 1000/1000 [00:02<00:00, 424.25it/s]
100%|██████████| 1000/1000 [00:01<00:00, 646.58it/s]
100%|██████████| 1000/1000 [00:02<00:00, 487.89it/s]
100%|██████████| 1000/1000 [00:01<00:00, 518.44it/s]


Unnamed: 0_level_0,7
variable,Unnamed: 1_level_1
target,Resistant HTN Diagnosis
model,gpt-4o
prompt_richness,True
icd_only,True
Strategy,Zero Shot
scale,False
RunID,1
fold,A
random_state,24481
average_precision_score_fold_out,0.544349


------------------------------------------------------------------------------------------------------------------------

LaTeX table saved to ../paper_rebuttal/tab_best_model_average_precision_score_fold_out_True_True.tex



In [4]:
# reminder of filename
# filename = (
#     rdir
#     + "/"
#     + "_".join(
#         [
#             targets[target],
#             name,
#             str(scale),
#             str(few_feature),
#             str(prompt_richness),
#             str(repeat),
#             str(fold),
#             str(random_state),
#         ]
#     )
# )