### OpenAI performance calculations for HCP models and data sets ###
We have updates on data sets and prompts.
Use this code as a template to run predictions for the trainin and test sets

In [1]:
import os
import copy
import numpy as np
import pandas as pd
import time
import logging
from pandas.core.frame import DataFrame

logger = logging.getLogger(__name__)

# Appearance of the Notebook
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
np.set_printoptions(linewidth=110)
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 100)
pd.set_option('display.width', 1000)

# Import this module with autoreload
%load_ext autoreload
%autoreload 2
from llmt.performance import Performance

In [2]:
data_dir = os.path.join(os.environ.get('DATA'), 'hcp')

# Training sets with predictions
data_file_names = ['hcp-train-250413-samples.parquet', 'hcp-train-250701-samples.parquet']

# Define the names of the true and predicted columns
# We need to define these names somewhere to combine the correct columns
col_dict = {'mental_health': 'pred_mh',
            'inpatient': 'pred_ip',
            'outpatient': 'pred_op'}

# We need the combination columns for the performance table
col_dict.update({'mental_health_inpatient': 'pred_mh_ip',
                 'mental_health_outpatient': 'pred_mh_op'})

true_col_list = list(col_dict.keys())
pred_col_list = [col_dict.get(k) for k in true_col_list]
print(true_col_list)
print(pred_col_list)

['mental_health', 'inpatient', 'outpatient', 'mental_health_inpatient', 'mental_health_outpatient']
['pred_mh', 'pred_ip', 'pred_op', 'pred_mh_ip', 'pred_mh_op']


### Data sets ###

In [3]:
orig_file_name_list = ['hcp-train-250413.parquet', 'hcp-train-250701.parquet']
col_list = ['mental_health', 'inpatient', 'outpatient']
for fidx, data_file_name in enumerate(orig_file_name_list):
    data_file = os.path.join(data_dir, data_file_name)
    df = pd.read_parquet(data_file)
    print(f'dataset: {os.path.splitext(data_file_name)[0]}')
    for col in col_list:
        ct = df[col].\
            value_counts().\
            to_frame().\
            reset_index(drop=False).\
            sort_values(by=col, ascending=False).\
            reset_index(drop=True)
        ct.insert(loc=0, column='dataset', value=os.path.splitext(data_file_name)[0])
        display(ct)
        print(f'TOTAL: {ct['count'].sum()}')

dataset: hcp-train-250413


Unnamed: 0,dataset,mental_health,count
0,hcp-train-250413,2.0,16
1,hcp-train-250413,1.0,136
2,hcp-train-250413,0.0,35


TOTAL: 187


Unnamed: 0,dataset,inpatient,count
0,hcp-train-250413,2.0,5
1,hcp-train-250413,1.0,69
2,hcp-train-250413,0.0,113


TOTAL: 187


Unnamed: 0,dataset,outpatient,count
0,hcp-train-250413,2.0,51
1,hcp-train-250413,1.0,94
2,hcp-train-250413,0.0,42


TOTAL: 187
dataset: hcp-train-250701


Unnamed: 0,dataset,mental_health,count
0,hcp-train-250701,1,125
1,hcp-train-250701,0,62


TOTAL: 187


Unnamed: 0,dataset,inpatient,count
0,hcp-train-250701,1,71
1,hcp-train-250701,0,116


TOTAL: 187


Unnamed: 0,dataset,outpatient,count
0,hcp-train-250701,1,115
1,hcp-train-250701,0,72


TOTAL: 187


### Functions ###

In [11]:
def performance_table(data:DataFrame, true_pred_cols:dict) -> DataFrame:
    """ Create a binary performance table """
    performance_df_list = []
    for v, (true_col, pred_col) in enumerate(true_pred_cols.items()):
        performance_dict = Performance(data=data).binary_performance(true_col=true_col, pred_col=pred_col)
        performance_df = pd.DataFrame(performance_dict, index=[v])
        performance_df.insert(loc=0, column='category', value=true_col)
        performance_df_list.append(performance_df)
    performance_df = pd.concat(performance_df_list, axis=0)
    return performance_df

### Code to prepare model prompts and outputs ###

In [31]:
# Select data set
for fidx, data_file_name in enumerate(data_file_names):
    data_file = os.path.join(data_dir, data_file_name)
    df = pd.read_parquet(data_file)
    print(f'Data set {fidx + 1} / {len(data_file_names)}: {data_file_name}')
    
    # Model
    model_list = list(df['model'].unique())
    midx = 0
    model = model_list[midx]
    print(f'Model    {midx + 1} / {len(model_list)}: {model}')
    
    # Prompt
    prompt_list = list(df['prompt'].unique())
    pidx = 0
    prompt = prompt_list[pidx]
    print(f'Prompt   {pidx + 1} / {len(prompt_list)}')

    data_df = df.loc[(df['model'] == model) & (df['prompt'] == prompt)]
    
    performance_df = performance_table(data=data_df, true_pred_cols=col_dict)
    performance_df.insert(loc=0, column='dataset', value=os.path.splitext(data_file_name)[0])
    performance_df.insert(loc=1, column='model', value=model) 
    performance_df.insert(loc=2, column='prompt', value=prompt)
    print()
    display(performance_df)

Data set 1 / 2: hcp-train-250413-samples.parquet
Model    1 / 2: gpt-4o-1120
Prompt   1 / 2



Unnamed: 0,dataset,model,prompt,category,p,n,tp,tn,fp,fn,recall,precision,min_precision,specificity,f_score
0,hcp-train-250413-samples,gpt-4o-1120,1,mental_health,136,35,112,29,6,24,0.8235,0.9492,0.7953,0.8286,0.8819
1,hcp-train-250413-samples,gpt-4o-1120,1,inpatient,69,113,68,104,9,1,0.9855,0.8831,0.3791,0.9204,0.9315
2,hcp-train-250413-samples,gpt-4o-1120,1,outpatient,94,42,76,31,11,18,0.8085,0.8736,0.6912,0.7381,0.8398
3,hcp-train-250413-samples,gpt-4o-1120,1,mental_health_inpatient,62,104,60,92,12,2,0.9677,0.8333,0.3735,0.8846,0.8955
4,hcp-train-250413-samples,gpt-4o-1120,1,mental_health_outpatient,87,40,72,33,7,15,0.8276,0.9114,0.685,0.825,0.8675


Data set 2 / 2: hcp-train-250701-samples.parquet
Model    1 / 2: gpt-4o-1120
Prompt   1 / 2



Unnamed: 0,dataset,model,prompt,category,p,n,tp,tn,fp,fn,recall,precision,min_precision,specificity,f_score
0,hcp-train-250701-samples,gpt-4o-1120,1,mental_health,125,62,103,42,20,22,0.824,0.8374,0.6684,0.6774,0.8306
1,hcp-train-250701-samples,gpt-4o-1120,1,inpatient,71,116,70,108,8,1,0.9859,0.8974,0.3797,0.931,0.9396
2,hcp-train-250701-samples,gpt-4o-1120,1,outpatient,115,72,91,54,18,24,0.7913,0.8349,0.615,0.75,0.8125
3,hcp-train-250701-samples,gpt-4o-1120,1,mental_health_inpatient,62,125,60,111,14,2,0.9677,0.8108,0.3316,0.888,0.8824
4,hcp-train-250701-samples,gpt-4o-1120,1,mental_health_outpatient,102,85,79,67,18,23,0.7745,0.8144,0.5455,0.7882,0.794
