### Prompt Engineering 02 MENTAL HEALTH ###

In [9]:
import os
import pandas as pd
import numpy as np
import logging

logger = logging.getLogger(__name__)

# Import this module with autoreload
%load_ext autoreload
%autoreload 2
import llmt
from llmt.llmtools import Prompt, MentalHealth
from llmt.llmtools import process_prompt
from llmt.openai import OpenAI, create_messages
from llmt.performance import Performance

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
# Parameters
model = 'gpt-4o'
temperature = 0

# Directories and files
data_dir = os.path.join(os.environ.get('HOME'), 'home_data', 'hcp')
test_file_name = 'hcp-alldata-250413.parquet'
test_file = os.path.join(data_dir, test_file_name)
df = pd.read_parquet(test_file)
# Filter the labeled data
df = df.loc[df['dset'] == 'train'].\
                reset_index(drop=True).\
                astype({'mental_health': int,
                        'inpatient': int,
                        'outpatient': int})
display(df.head())
print(df.shape)

Unnamed: 0,id,name,description,mental_health,inpatient,outpatient,dset
0,431643-07,Actriv,Provider of healthcare staffing services based...,2,0,0,train
1,310749-31,Alima,Operator of a non-governmental organization in...,0,0,0,train
2,162054-28,Apothecare,Provider of pharmacy services intended to prov...,0,0,0,train
3,597285-28,April Health (Clinics/Outpatient Services),Provider of mental health services intended to...,1,0,1,train
4,373978-90,Arise Child and Family Service,Operator of independent living centers caterin...,2,0,0,train


(187, 7)


### Prompt development ###

In [3]:
# Pick an example of a company that provides mental health services
mental_health = 0
inpatient = 0
id_list = list(df.loc[(df['mental_health'] == mental_health) & (df['inpatient'] == inpatient), 'id'].unique())
print(f'Found {len(id_list)} company ids')

# Pick a company ID
# idx = 10
# company_id = id_list[idx]
company_id = '135432-64'
ser_id = df.loc[df['id'] == company_id].iloc[0]
display(ser_id)
name = ser_id['name']
description = ser_id['description']
print()
print(name)
print(description)

Found 25 company ids


id                                                       135432-64
name                       Chicago House And Social Service Agency
description      Non-profit organization offering housing, heal...
mental_health                                                    0
inpatient                                                        0
outpatient                                                       0
dset                                                         train
Name: 90, dtype: object


Chicago House And Social Service Agency
Non-profit organization offering housing, health, and employment support to those impacted by HIV/AIDS and the LGBTQ+ community. Services include TransLife Care, HIV testing, case management, and free rapid screenings for HIV, syphilis, and Hepatitis-C. The agency also provides essential health services to vulnerable individuals.


In [4]:
print(Prompt().list())
prompt_name = 'mental_health_system_02'
system_prompt = Prompt().load(prompt_name=prompt_name)

# For the user prompt, we want to include the name of the business and the description
def create_user_prompt(name: str, description: str):
    user_prompt = f"""
        The business {name} is described as: {description} 
        Does the business provide mental or behavioral health care services to human patients?
        """
    return process_prompt(user_prompt)

user_prompt = create_user_prompt(name=name, description=description)
messages = create_messages(system_prompt=system_prompt, user_prompt=user_prompt)

['mental_health_system_01', 'mental_health_definition', 'mental_health_system_02']


In [5]:
print(system_prompt)
print()
print(user_prompt)

You are an AI system assisting a healthcare policy researcher in identifying whether a business qualifies as a medical facility or organization that provides direct mental or behavioral healthcare services to human patients.
A qualifying business must meet all of the following criteria:

It is a specialized facility or organization, such as a mental health clinic, psychiatric hospital, counseling center, or behavioral health treatment center.  
It provides direct services to human patients, including assessments, diagnoses, therapy (individual, group, or family), psychiatric evaluations, medication management, and/or crisis intervention.  
Services are delivered by licensed mental health professionals such as psychiatrists, psychologists, licensed counselors, clinical social workers, or psychiatric nurse practitioners.  
The facility’s core mission is to support mental health, well-being, and recovery in a clinical setting.

Exclude any business that falls into the following categories

In [6]:
# Send prompt to model
model = 'gpt-4o'
temperature = 0
response_format = MentalHealth
client = OpenAI().create_client()

response = OpenAI().send_messages(messages=messages, 
                                  model=model, 
                                  response_format=response_format,
                                  temperature=0,
                                  client=client)
print(response)

{'pred_mh': False, 'pred_mh_score': 0.4, 'refusal': None}


In [7]:
# Run the prompt on all data
company_id_list = sorted(list(df['id'].unique()))
results_df_list = []

for c, company_id in enumerate(company_id_list):
    if (c + 1) % 20 == 0:
        print(f'Sending description {c + 1} / {len(company_id_list)} to the model')
    df_id = df.loc[df['id'] == company_id]
    user_prompt_id = create_user_prompt(name=df_id['name'].values[0], 
                                        description=df_id['description'].values[0])
    messages = create_messages(system_prompt=system_prompt, user_prompt=user_prompt_id)
    output = OpenAI().send_messages(messages=messages,
                                    model=model,
                                    temperature=temperature,
                                    response_format=response_format,
                                    client=client)
    
   # Replace the boolean with binary outcome prediction
    key = 'pred_mh'
    output.update({key: 1 if output.get(key) == True else 0})
    results_df_list.append(df_id.assign(**output))

results_df = pd.concat(results_df_list, axis=0, ignore_index=True)
# Save the results
results_file_name = 'mh_prompt_02_250418.parquet'
results_file = os.path.join(data_dir, results_file_name)
results_df.to_parquet(results_file)

Sending description 20 / 187 to the model
Sending description 40 / 187 to the model
Sending description 60 / 187 to the model
Sending description 80 / 187 to the model
Sending description 100 / 187 to the model
Sending description 120 / 187 to the model
Sending description 140 / 187 to the model
Sending description 160 / 187 to the model
Sending description 180 / 187 to the model


In [8]:
# Save the results
results_file_name = 'mh_prompt_02_250418.parquet'
results_file = os.path.join(data_dir, results_file_name)
results_df.to_parquet(results_file)

In [10]:
display(Performance(data=results_df).\
        binary_performance(true_col='mental_health', pred_col='pred_mh'))

{'p': 136,
 'n': 35,
 'tp': 90,
 'tn': 29,
 'fp': 6,
 'fn': 46,
 'recall': 0.6618,
 'precision': 0.9375,
 'min_precision': 0.7953,
 'specificity': 0.8286,
 'f_score': 0.7759}