### OpenAI predictions and performance for updated training set ###
We have updates on data sets and prompts.
Use this code as a template to run predictions for the trainin and test sets

In [1]:
import os
import copy
import numpy as np
import pandas as pd
import time
import logging
from collections import deque
from pandas.core.frame import DataFrame

logger = logging.getLogger(__name__)

# Appearance of the Notebook
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
np.set_printoptions(linewidth=110)
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 100)
pd.set_option('display.width', 1000)

# Import this module with autoreload
%load_ext autoreload
%autoreload 2
from llmt.openaimodel import OpenAIModel
from llmt.performance import Performance

In [2]:
data_dir = os.path.join(os.environ.get('DATA'), 'hcp')
print(data_dir)
train_file_name = 'hcp-train-250701.parquet'
train_file = os.path.join(data_dir, train_file_name)
print(train_file)
train_df = pd.read_parquet(train_file)
print(train_df.shape)
print(len(train_df['id'].unique()))
display(train_df.head(2))
company_id_list = list(train_df['id'].unique())
print(len(company_id_list))

/app/data/hcp
/app/data/hcp/hcp-train-250701.parquet
(187, 7)
187


Unnamed: 0,id,name,description,mental_health,inpatient,outpatient,dset
0,491639-77,144-Bed Hospital Facility Joint Venture in Den...,Provider of behavioral health services located...,1,1,1,train
1,10995-58,Acadia Healthcare (NAS: ACHC),Acadia Healthcare Co Inc acquires and develops...,1,1,1,train


187


In [3]:
# Define the names of the true and predicted columns
# We need to define these names somewhere to combine the correct columns
col_dict = {'mental_health': 'pred_mh',
            'inpatient': 'pred_ip',
            'outpatient': 'pred_op'}

# We need the combination columns for the performance table
stat_col_dict = copy.deepcopy(col_dict)
stat_col_dict.update({'mental_health_inpatient': 'pred_mh_ip',
                      'mental_health_outpatient': 'pred_mh_op'})

# Create a new dictionary with the values above
# We need this dictionary to run a loop over the true/pred pairs
true_col_list = list(col_dict.keys())
pred_col_list = [col_dict.get(k) for k in true_col_list]
print(true_col_list)
print(pred_col_list)

# Check environmental variables
print(os.environ.get('OPENAI_API_ENDPOINT'))
print(os.environ.get('OPENAI_API_VERSION'))

['mental_health', 'inpatient', 'outpatient']
['pred_mh', 'pred_ip', 'pred_op']
https://azure-ai.hms.edu
2025-04-01-preview


### Functions ###

In [29]:
# Run the prediction functions for a specific model
def predict_classes(model, name, description, temperature, version_dict=None):
    if version_dict is None:
        version_dict = {'mh': 3, 'ip': 1, 'op': 1}
    response_list = []
    response_mh = model.predict_mh(name=name, 
                                   description=description, 
                                   temperature=temperature, 
                                   version=version_dict.get('mh'))
    response_list.append(response_mh)
    response_ip = model.predict_ip(name=name, 
                                   description=description, 
                                   temperature=temperature, 
                                   version=version_dict.get('ip'))
    response_list.append(response_ip)
    response_op = model.predict_op(name=name, 
                                   description=description, 
                                   temperature=temperature, 
                                   version=version_dict.get('op'))
    response_list.append(response_op)
    assert len(response_list) == 3
    return response_list

# Function to send a sample to the model
def send_sample(cdf: DataFrame, deployment: str, version_dict: dict) -> DataFrame:
    assert len(cdf) == 1, 'The input data frame is one sample and should have just ONE ROW!'
    ser = cdf.dropna(axis=1).iloc[0]
    company_n = ser.get('name')
    company_d = ser.get('description')
    if None not in [company_n, company_d]:
        try:
            model = OpenAIModel(model=deployment)
            response_list = predict_classes(model=model,
                                            name=company_n,
                                            description=company_d,
                                            temperature=0,
                                            version_dict=version_dict)
            # Add the responses to the company_df dataframe
            for response in response_list:
                cdf = cdf.assign(**response)
        except Exception as e:
            logger.error(f'ERROR sending sample to model: {e}')
        else:
            # Combine the binary columns
            cdf = Performance(data=cdf). \
                combine_columns(true_col_list=['mental_health', 'inpatient'],
                                pred_col_list=['pred_mh', 'pred_ip']). \
                rename(columns={'pred_mh_pred_ip': 'pred_mh_ip'})

            cdf = Performance(data=cdf). \
                combine_columns(true_col_list=['mental_health', 'outpatient'],
                                pred_col_list=['pred_mh', 'pred_op']). \
                rename(columns={'pred_mh_pred_op': 'pred_mh_op'})
    else:
        logger.warning(f'Sample {ser.get("id")} has missing values for name or description. Skipping...')
        cdf = cdf.assign(pred_mh=np.nan, pred_ip=np.nan, pred_op=np.nan,
                         verified_op=np.nan, pred_mh_ip=np.nan, pred_mh_op=np.nan)
    return cdf

def performance_table(data, true_pred_cols):
    performance_df_list = []
    for v, (true_col, pred_col) in enumerate(true_pred_cols.items()):
        performance_dict = Performance(data=data).binary_performance(true_col=true_col, pred_col=pred_col)
        performance_df = pd.DataFrame(performance_dict, index=[v])
        performance_df.insert(loc=0, column='category', value=true_col)
        performance_df_list.append(performance_df)
    performance_df = pd.concat(performance_df_list, axis=0)
    return performance_df

def save_output(data: pd.core.frame.DataFrame, file_base: str, file_dir: str) -> bool:
    """ Save a data frame as .parquet and .csv files """
    file_base_name = os.path.basename(os.path.splitext(file_base)[0])
    assert os.path.exists(file_dir), f'output directory {file_dir} does not exist!'
    parquet_file = os.path.join(file_dir, f'{file_base_name}.parquet')
    csv_file = os.path.join(file_dir, f'{file_base_name}.csv')
    success = False
    try:
        data.to_parquet(parquet_file)
        data.to_csv(csv_file, index=False)
    except Exception as e:
        logger.error(f'ERROR saving output files: {e}')
    else:
        assert os.path.exists(parquet_file), f'Parquet file {parquet_file} not found.'
        assert os.path.exists(csv_file), f'CSV file {csv_file} not found.'
        success = True
    return success

In [30]:
file_base_name = 'hcp-train-predictions'
file_base = os.path.basename(os.path.splitext(file_base_name)[0])
print(file_base)

hcp-train-predictions


### Code to prepare model prompts and outputs ###

In [31]:
df = copy.deepcopy(train_df)
display(df.head(2))

# Run an example ID, just to make sure it's all working
idx = 25
company_id = df['id'].unique()[idx]
company_df = df.loc[df['id'] == company_id]
name = company_df.get('name').values[0]
description = company_df.get('description').values[0]
print()
print(name)
print()
print(description)

version_dict_1 = {'mh': 3, 'ip': 1, 'op': 1}
version_dict_2 = {'mh': 4, 'ip': 2, 'op': 2}
version_dict = version_dict_1

deployment_name_1 = 'gpt-4o-1120'
deployment_name_2 = 'gpt-4.1'
deployment_name = deployment_name_1

model = OpenAIModel(model=deployment_name)
temperature = 0
response_list = predict_classes(model=model, 
                                name=name, 
                                description=description, 
                                temperature=temperature, 
                                version_dict=version_dict)

# Add the responses to the company_df dataframe
for response in response_list:
    company_df = company_df.assign(**response)

# Create the combination of columns
company_df = Performance(data=company_df).\
    combine_columns(true_col_list=['mental_health', 'inpatient'],
                    pred_col_list=['pred_mh', 'pred_ip']).\
    rename(columns={'pred_mh_pred_ip': 'pred_mh_ip'})

company_df = Performance(data=company_df).\
    combine_columns(true_col_list=['mental_health', 'outpatient'],
                    pred_col_list=['pred_mh', 'pred_op']).\
    rename(columns={'pred_mh_pred_op': 'pred_mh_op'})

print()
print(response_list)
print()
display(company_df)

# Use the function
company_df = df.loc[df['id'] == company_id]
test_df = send_sample(cdf=company_df, deployment=deployment_name, version_dict=version_dict)
display(test_df)

Unnamed: 0,id,name,description,mental_health,inpatient,outpatient,dset
0,491639-77,144-Bed Hospital Facility Joint Venture in Den...,Provider of behavioral health services located...,1,1,1,train
1,10995-58,Acadia Healthcare (NAS: ACHC),Acadia Healthcare Co Inc acquires and develops...,1,1,1,train



BaseCamp Recovery Center

Provider of a comprehensive treatment facility in Columbus, Ohio. The company offers treatment of drugs addiction, alcoholism, detox therapy, hospitalization services, aftercare services, and mental health disorders, enabling patients to avail the required medical services and quit addication.

[{'pred_mh': 1}, {'pred_ip': 1}, {'pred_op': 1, 'verified_op': 1}]



Unnamed: 0,id,name,description,mental_health,inpatient,outpatient,dset,pred_mh,pred_ip,pred_op,verified_op,mental_health_inpatient,pred_mh_ip,mental_health_outpatient,pred_mh_op
0,467540-11,BaseCamp Recovery Center,Provider of a comprehensive treatment facility...,1,0,1,train,1,1,1,1,0,1,1,1


Unnamed: 0,id,name,description,mental_health,inpatient,outpatient,dset,pred_mh,pred_ip,pred_op,verified_op,mental_health_inpatient,pred_mh_ip,mental_health_outpatient,pred_mh_op
0,467540-11,BaseCamp Recovery Center,Provider of a comprehensive treatment facility...,1,0,1,train,1,1,1,1,0,1,1,1


In [32]:
# Run model for the test data
test_file_name = 'hcp-test-250701.parquet'
df_test = pd.read_parquet(os.path.join(data_dir, test_file_name))
display(df_test.head(2))
company_id_list = list(df_test['id'].unique())
print(len(company_id_list))

Unnamed: 0,id,name,description,mental_health,inpatient,outpatient,dset
0,525519-64,10-4 Medical,Provider focused on whole-person healthcare in...,,,,test
1,111309-13,12 Keys Rehab,Provider of rehabilitation services intended t...,,,,test


1838


In [33]:
# Run an example ID, just to make sure it's all working
idx = 25
company_id_list = list(df_test['id'].unique())
company_id = company_id_list[idx]
company_df = df_test.loc[df_test['id'] == company_id]
name = company_df.get('name').values[0]
description = company_df.get('description').values[0]
print()
print(name)
print()
print(description)
company_df = company_df.dropna(axis=1)
display(company_df)
result_df = send_sample(cdf=company_df,
                        deployment=deployment_name_1,
                        version_dict=version_dict_1)


Accounts Receivable Management and Data Services

Provider of Medicaid and charity care eligibility services in the state of New Jersey. The company provides management, billing and financial consulting services to hospitals. It offers charity care eligibility screening and enrollment, accounts receivable, audit\compliance, behavioral health, financial educational and consulting services, as well as Medicaid programs.


Unnamed: 0,id,name,description,dset
25,54155-80,Accounts Receivable Management and Data Services,Provider of Medicaid and charity care eligibil...,test


In [27]:
idx = 310
company_id = company_id_list[idx]
company_df = df_test.loc[df_test['id'] == company_id]
company_df = company_df.dropna(axis=1)
display(company_df)
result_df = send_sample(cdf=company_df,
                        deployment=deployment_name_1,
                        version_dict=version_dict_1)
display(result_df)

Unnamed: 0,id,name,dset
310,145425-97,Cherokee Health Systems,test


Sample 145425-97 has missing values for name or description. Skipping...


Unnamed: 0,id,name,dset,pred_mh,pred_ip,pred_op,pred_mh_ip,pred_mh_op
310,145425-97,Cherokee Health Systems,test,,,,,


In [12]:
ser = company_df.iloc[0]
company_n = ser.get('name')
company_d = ser.get('description')

In [19]:
ser.id

'145425-97'

In [39]:
print([company_n, company_d])
None in [company_n, company_d]

['Chesapeake Treatment Services', 'Provider of addiction prevention treatment intended to help patients break the cycle of addiction and get on the pathway to sustained long-term recovery. The company offers Medication-Assisted Treatment (MAT) which is a combination of therapy and medications to treat Opioid Use Disorder.']


False

In [16]:
print(c)
print(company_id)
print(company_df)

310
145425-97
            id                     name  dset
310  145425-97  Cherokee Health Systems  test


In [12]:
print(c)

0


### Run models on Training data with evaluations ###

In [6]:
# Information required to run the loops
df = copy.deepcopy(train_df)
company_id_list = sorted(list(df['id'].unique()))
deployment_name_list = ['gpt-4o-1120', 'gpt-4.1']
version_dict_list = [{'mh': 3, 'ip': 1, 'op': 1}, {'mh': 4, 'ip': 2, 'op': 2}]
file_base_name = 'hcp-train-250701'

# LOOP OVER MODELS
results_samples_df_list = []
results_stat_df_list = []
for d, deployment_name in enumerate(deployment_name_list):
    print()
    print(f'Model  {d+1}/{len(deployment_name_list)}: {deployment_name}')
    
    # LOOP OVER PROMPT VERSIONS
    for p, prompt_version_dict in enumerate(version_dict_list):   
        print()
        print(f'Prompt {p+1}/{len(version_dict_list)}: {prompt_version_dict}')

        # LOOP OVER THE SAMPLES
        results_samples_list = []
        for c, company_id in enumerate(company_id_list[:6]):
            if (c+1) % 2 == 0:
                print(f'Sample {c+1}/{len(company_id_list)}')
            company_df = df.loc[df['id'] == company_id]
            company_name = company_df.get('name').values[0]
            company_desc = company_df.get('description').values[0]
            result_df = send_sample(company_df=company_df, 
                                    deployment_name=deployment_name, 
                                    version_dict=prompt_version_dict)
            results_samples_list.append(result_df)
        # Assemble the results for this model/prompt combination
        results_samples = pd.concat(results_samples_list, axis=0, ignore_index=True)
        results_stat = performance_table(data=results_samples, true_pred_cols=stat_col_dict)
        # Add some descriptions for model and prompt to the tables
        results_samples.insert(loc=0, column='model', value=deployment_name)
        results_samples.insert(loc=1, column='prompt', value=p+1)
        results_stat.insert(loc=0, column='model', value=deployment_name)
        results_stat.insert(loc=1, column='prompt', value=p+1)
        # Save the table in a list before we run the next prompt/model combination
        results_samples_df_list.append(results_samples)
        results_stat_df_list.append(results_stat)
results_samples_df = pd.concat(results_samples_df_list, axis=0, ignore_index=True)
results_stat_df = pd.concat(results_stat_df_list, axis=0, ignore_index=True)
# Save the data as .parquet and .csv files
# save_output(data=results_samples_df, file_base=f'{file_base_name}-samples', file_dir=data_dir)
# save_output(data=results_stat_df, file_base=f'{file_base_name}-performance', file_dir=data_dir)


Model  1/2: gpt-4o-1120

Prompt 1/2: {'mh': 3, 'ip': 1, 'op': 1}
Sample 2/187
Sample 4/187
Sample 6/187


  'specificity': round(float(tn / (tn + fp)), decimals), # True negative rate



Prompt 2/2: {'mh': 4, 'ip': 2, 'op': 2}
Sample 2/187
Sample 4/187
Sample 6/187


  'specificity': round(float(tn / (tn + fp)), decimals), # True negative rate



Model  2/2: gpt-4.1

Prompt 1/2: {'mh': 3, 'ip': 1, 'op': 1}
Sample 2/187
Sample 4/187
Sample 6/187


  'specificity': round(float(tn / (tn + fp)), decimals), # True negative rate



Prompt 2/2: {'mh': 4, 'ip': 2, 'op': 2}
Sample 2/187
Sample 4/187
Sample 6/187


  'specificity': round(float(tn / (tn + fp)), decimals), # True negative rate


In [5]:
# Get the results
performance_test_file = 'hcp-train-250701-performance.parquet'
pdf = pd.read_parquet(os.path.join(data_dir, performance_test_file))
display(pdf.head())

Unnamed: 0,model,prompt,category,p,n,tp,tn,fp,fn,recall,precision,min_precision,specificity,f_score
0,gpt-4o-1120,1,mental_health,125,62,103,42,20,22,0.824,0.8374,0.6684,0.6774,0.8306
1,gpt-4o-1120,1,inpatient,71,116,70,108,8,1,0.9859,0.8974,0.3797,0.931,0.9396
2,gpt-4o-1120,1,outpatient,115,72,91,54,18,24,0.7913,0.8349,0.615,0.75,0.8125
3,gpt-4o-1120,1,mental_health_inpatient,62,125,60,111,14,2,0.9677,0.8108,0.3316,0.888,0.8824
4,gpt-4o-1120,1,mental_health_outpatient,102,85,79,67,18,23,0.7745,0.8144,0.5455,0.7882,0.794


In [7]:
for m, model in enumerate(pdf['model'].unique()):
    for p, prompt in enumerate(pdf['prompt'].unique()):
        stat = pdf.loc[(pdf['model'] == model) & (pdf['prompt'] == prompt)]
        print()
        print(f'MODEL {model} PROMPT {prompt}') 
        display(stat)


MODEL gpt-4o-1120 PROMPT 1


Unnamed: 0,model,prompt,category,p,n,tp,tn,fp,fn,recall,precision,min_precision,specificity,f_score
0,gpt-4o-1120,1,mental_health,125,62,103,42,20,22,0.824,0.8374,0.6684,0.6774,0.8306
1,gpt-4o-1120,1,inpatient,71,116,70,108,8,1,0.9859,0.8974,0.3797,0.931,0.9396
2,gpt-4o-1120,1,outpatient,115,72,91,54,18,24,0.7913,0.8349,0.615,0.75,0.8125
3,gpt-4o-1120,1,mental_health_inpatient,62,125,60,111,14,2,0.9677,0.8108,0.3316,0.888,0.8824
4,gpt-4o-1120,1,mental_health_outpatient,102,85,79,67,18,23,0.7745,0.8144,0.5455,0.7882,0.794



MODEL gpt-4o-1120 PROMPT 2


Unnamed: 0,model,prompt,category,p,n,tp,tn,fp,fn,recall,precision,min_precision,specificity,f_score
5,gpt-4o-1120,2,mental_health,125,62,91,52,10,34,0.728,0.901,0.6684,0.8387,0.8053
6,gpt-4o-1120,2,inpatient,71,116,70,108,8,1,0.9859,0.8974,0.3797,0.931,0.9396
7,gpt-4o-1120,2,outpatient,115,72,94,52,20,21,0.8174,0.8246,0.615,0.7222,0.821
8,gpt-4o-1120,2,mental_health_inpatient,62,125,51,116,9,11,0.8226,0.85,0.3316,0.928,0.8361
9,gpt-4o-1120,2,mental_health_outpatient,102,85,70,70,15,32,0.6863,0.8235,0.5455,0.8235,0.7487



MODEL gpt-4.1 PROMPT 1


Unnamed: 0,model,prompt,category,p,n,tp,tn,fp,fn,recall,precision,min_precision,specificity,f_score
10,gpt-4.1,1,mental_health,125,62,95,47,15,30,0.76,0.8636,0.6684,0.7581,0.8085
11,gpt-4.1,1,inpatient,71,116,70,110,6,1,0.9859,0.9211,0.3797,0.9483,0.9524
12,gpt-4.1,1,outpatient,115,72,92,53,19,23,0.8,0.8288,0.615,0.7361,0.8142
13,gpt-4.1,1,mental_health_inpatient,62,125,55,114,11,7,0.8871,0.8333,0.3316,0.912,0.8594
14,gpt-4.1,1,mental_health_outpatient,102,85,74,70,15,28,0.7255,0.8315,0.5455,0.8235,0.7749



MODEL gpt-4.1 PROMPT 2


Unnamed: 0,model,prompt,category,p,n,tp,tn,fp,fn,recall,precision,min_precision,specificity,f_score
15,gpt-4.1,2,mental_health,125,62,90,54,8,35,0.72,0.9184,0.6684,0.871,0.8072
16,gpt-4.1,2,inpatient,71,116,70,109,7,1,0.9859,0.9091,0.3797,0.9397,0.9459
17,gpt-4.1,2,outpatient,115,72,92,51,21,23,0.8,0.8142,0.615,0.7083,0.807
18,gpt-4.1,2,mental_health_inpatient,62,125,52,118,7,10,0.8387,0.8814,0.3316,0.944,0.8595
19,gpt-4.1,2,mental_health_outpatient,102,85,70,71,14,32,0.6863,0.8333,0.5455,0.8353,0.7527
