### Multi-Model Performance Evaluation on the training set ###
Use the manual labels as ground truth

In [61]:
import os
import copy
import numpy as np
import pandas as pd
from typing import List
import glob
import logging

logger = logging.getLogger(__name__)

# Binary perfomance metrics
from sklearn.metrics import confusion_matrix

# Import this module with autoreload
%load_ext autoreload
%autoreload 2
import llmt
from llmt.performance import Performance, binary_performance

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [6]:
data_root = os.environ.get('DATA')
data_dir = os.path.join(data_root, 'hcp')
model_dir = os.path.join(data_dir, 'models')
output_file_list = glob.glob(os.path.join(model_dir, 'hcp_predictions_*.parquet'))
print(*output_file_list, sep='\n')
pred_col_list = ['pred_mh', 'pred_ip', 'pred_op']

/app/data/hcp/models/hcp_predictions_ollama_20250520_deepseek-v3_671b.parquet
/app/data/hcp/models/hcp_predictions_ollama_20250520_llama2_70b.parquet
/app/data/hcp/models/hcp_predictions_ollama_20250520_llama3_70b.parquet
/app/data/hcp/models/hcp_predictions_ollama_20250520_llama2_13b.parquet
/app/data/hcp/models/hcp_predictions_ollama_20250520_llama3_8b.parquet
/app/data/hcp/models/hcp_predictions_20250520_gpt-4o.parquet
/app/data/hcp/models/hcp_predictions_ollama_20250520_llama4_maverick.parquet
/app/data/hcp/models/hcp_predictions_ollama_20250520_llama2_7b.parquet


In [7]:
# Helper functions
def flat_list(input_list: list) -> list:
    flattened_list = [item for sublist in input_list for item in sublist]
    return flattened_list 

In [62]:
# Data set that we sent to the HCP team
df_file_name = 'Predictions-2025-04-24.xlsx'
df_file = os.path.join(data_dir, df_file_name)
dfxl = pd.read_excel(df_file)
# Train and test data
df_train = dfxl.loc[dfxl['dset'] == 'train']
df_test = dfxl.loc[dfxl['dset'] == 'test']
display(df_train.head(2))
print(dfxl.shape)
print(len(dfxl['id'].unique()))
print(df_test.shape)
print(df_train.shape)

Unnamed: 0,id,name,description,mental_health,inpatient,outpatient,dset,pred_mh,pred_ip,pred_op,verified_op,temperature
0,491639-77,144-Bed Hospital Facility Joint Venture in Den...,Provider of behavioral health services located...,1.0,1.0,1.0,train,1,1,1,1,0
1,10995-58,Acadia Healthcare (NAS: ACHC),Acadia Healthcare Co Inc acquires and develops...,1.0,1.0,2.0,train,1,1,0,1,0


(687, 12)
687
(500, 12)
(187, 12)


In [63]:
# Column names
true_mh = 'mental_health'
true_ip = 'inpatient'
true_op = 'outpatient'

pred_mh = 'pred_mh'
pred_ip = 'pred_ip'
pred_op = 'pred_op'

# Dictionary with true:pred column names
col_dict = {true_mh: pred_mh, true_ip:pred_ip, true_op:pred_op}
display(col_dict)

{'mental_health': 'pred_mh', 'inpatient': 'pred_ip', 'outpatient': 'pred_op'}

In [85]:
# Calculate performance for training data
perf = Performance(data=df_train)
performance_df_list = []
for c, (true_col, pred_col) in enumerate(col_dict.items()):
    performance_dict = perf.binary_performance(true_col=true_col, pred_col=pred_col)
    performance_df = pd.DataFrame(performance_dict, index=[c]).\
                    assign(category=true_col)
    performance_df_list.append(performance_df)

# Mental health and inpatient services
true_combined = Performance(data=df_train).\
    combine_binary_columns(input_col_list=[true_mh, true_ip], output_col_name='true_mh_ip')
pred_combined = Performance(data=df_train).\
    combine_binary_columns(input_col_list=[pred_mh, pred_ip], output_col_name='pred_mh_ip')
df_mh_ip = true_combined.merge(pred_combined, on='id', how='left')
performance_dict = Performance(data=df_mh_ip).binary_performance(true_col='true_mh_ip', 
                                                                 pred_col='pred_mh_ip')
performance_df = pd.DataFrame(performance_dict, index=[0]).\
                assign(category='mental_health_inpatient')

21 rows removed from data!


In [86]:
display(performance_df)

Unnamed: 0,p,n,tp,tn,fp,fn,recall,precision,min_precision,specificity,f_score,category
0,62,104,58,92,12,4,0.9355,0.8286,0.3735,0.8846,0.8788,mental_health_inpatient
