### Prompt Engineering 01 MENTAL HEALTH ###

In [28]:
import os
import pandas as pd
import numpy as np

# Import this module with autoreload
%load_ext autoreload
%autoreload 2
import llmt
from llmt.llmtools import Prompt, MentalHealth
from llmt.llmtools import process_prompt
from llmt.openai import OpenAI, create_messages
from llmt.performance import Performance

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
# Directories and files
data_dir = os.path.join(os.environ.get('HOME'), 'home_data', 'hcp')
test_file_name = 'hcp-alldata-250413.parquet'
test_file = os.path.join(data_dir, test_file_name)
df = pd.read_parquet(test_file)
# Filter the labeled data
df = df.loc[df['dset'] == 'train'].\
                astype({'mental_health': int,
                        'inpatient': int,
                        'outpatient': int})


display(df.head())
print(df.shape)

Unnamed: 0,id,name,description,mental_health,inpatient,outpatient,dset
0,431643-07,Actriv,Provider of healthcare staffing services based...,2,0,0,train
1,310749-31,Alima,Operator of a non-governmental organization in...,0,0,0,train
2,162054-28,Apothecare,Provider of pharmacy services intended to prov...,0,0,0,train
3,597285-28,April Health (Clinics/Outpatient Services),Provider of mental health services intended to...,1,0,1,train
4,373978-90,Arise Child and Family Service,Operator of independent living centers caterin...,2,0,0,train


(187, 7)


### Define the prompts and the response format ###

In [10]:
mental_health_business_definition = Prompt().load(prompt_name='mental_health_definition')
system_prompt = Prompt().load(prompt_name='mental_health_system_01')

system_prompt += f' according to the following definition: {mental_health_business_definition}'

# For the user prompt, we want to include the name of the business and the description
def create_user_prompt(name: str, description: str):
    user_prompt = f"""
        The business or provider {name} is described as: {description} 
        Does the business provide mental or behavioral health care services?
        """
    return process_prompt(user_prompt)

In [21]:
# Create an example for messages to be sent
idx = 2
df_idx = df.iloc[idx]
display(df_idx)
user_prompt = create_user_prompt(name=df_idx['name'], description=df_idx['description'])
messages = create_messages(system_prompt=system_prompt, user_prompt=user_prompt)
print(*messages, sep='\n\n')

id                                                       162054-28
name                                                    Apothecare
description      Provider of pharmacy services intended to prov...
mental_health                                                    0
inpatient                                                        0
outpatient                                                       0
dset                                                         train
Name: 2, dtype: object

{'role': 'system', 'content': 'You are an advanced AI system designed to assist a healthcare policy researcher \nin determining whether a business qualifies as a medical facility, \nsuch as a hospital or clinic, that provides mental or behavioral \nhealthcare services for human patients according to the following definition: A healthcare business that provides mental and behavioral health care services to \nhuman patients is typically a specialized facility or organization, \nsuch as a mental health clinic, psychiatric hospital, or counseling center, \nthat offers assessments, diagnoses, and evidence-based treatments for various \nmental health and behavioral disorders. \nThese services may include individual and group therapy, \npsychiatric evaluations, medication management, and crisis intervention, \ndelivered by a team of professionals such as psychiatrists, psychologists, \nlicensed counselors, and social workers. \nThe aim of such a business is to support patients in managing the

### Send messages to the model ###

In [22]:
model = 'gpt-4o'
temperature = 0
response_format = MentalHealth
messages = create_messages(system_prompt=system_prompt, user_prompt=user_prompt)
client = OpenAI().create_client()
output = OpenAI().send_messages(messages=messages,
                                model=model,
                                temperature=temperature,
                                response_format=response_format,
                                client=client)
# key = 'pred_mh'
# output.update({key: 1 if output.get(key) == True else 0})

In [23]:
print(output)

{'pred_mh': False, 'pred_mh_score': 0.2, 'refusal': None}


#### Process predictions for all samples ###

In [24]:
model = 'gpt-4o'
temperature = 0
response_format = MentalHealth
client = OpenAI().create_client()

In [25]:
company_id_list = sorted(list(df['id'].unique()))
results_df_list = []
for c, company_id in enumerate(company_id_list):
    if (c + 1) % 20 == 0:
        print(f'Sending description {c + 1} / {len(company_id_list)} to the model')
    df_id = df.loc[df['id'] == company_id]
    user_prompt_id = create_user_prompt(name=df_id['name'].values[0], 
                                        description=df_id['description'].values[0])
    messages = create_messages(system_prompt=system_prompt, user_prompt=user_prompt_id)
    output = OpenAI().send_messages(messages=messages,
                                    model=model,
                                    temperature=temperature,
                                    response_format=response_format,
                                    client=client)
    # Replace the boolean with binary outcome prediction
    key = 'pred_mh'
    output.update({key: 1 if output.get(key) == True else 0})
    results_df_list.append(df_id.assign(**output))
results_df = pd.concat(results_df_list, axis=0, ignore_index=True)
# Save the results
results_file_name = 'mh_prompt_01_250418.parquet'
results_file = os.path.join(data_dir, results_file_name)
results_df.to_parquet(results_file)

Sending description 20 / 187 to the model
Sending description 40 / 187 to the model
Sending description 60 / 187 to the model
Sending description 80 / 187 to the model
Sending description 100 / 187 to the model
Sending description 120 / 187 to the model
Sending description 140 / 187 to the model
Sending description 160 / 187 to the model
Sending description 180 / 187 to the model


In [26]:
results_df.head()

Unnamed: 0,id,name,description,mental_health,inpatient,outpatient,dset,pred_mh,pred_mh_score,refusal
0,100363-69,Chilton Hospital,Operator of inpatient hospital center. The com...,1,1,1,train,1,0.95,
1,10127-62,Quest Diagnostics (NYS: DGX),Quest Diagnostics is a leading independent pro...,1,0,0,train,0,0.2,
2,10195-21,IASIS Healthcare,Owner and operator of medium-sized acute care ...,1,1,1,train,1,0.8,
3,10468-27,Behavioral Centers of America,Provider of psychiatric healthcare services. T...,1,1,1,train,1,0.99,
4,107240-50,Alvarado Parkway Institute,Operator of a psychiatric health care facility...,1,1,1,train,1,0.99,


In [29]:
display(Performance(data=results_df).\
        binary_performance(true_col='mental_health', pred_col='pred_mh'))

{'p': 136,
 'n': 35,
 'tp': 126,
 'tn': 19,
 'fp': 16,
 'fn': 10,
 'recall': 0.9265,
 'precision': 0.8873,
 'min_precision': 0.7953,
 'specificity': 0.5429,
 'f_score': 0.9065}