### Prompt Engineering INPATIENT HEALTHCARE ###

In [1]:
import os
import pandas as pd
import numpy as np
import logging

logger = logging.getLogger(__name__)

# Import this module with autoreload
%load_ext autoreload
%autoreload 2
import llmt
from llmt.llmtools import Prompt
from llmt.llmtools import MentalHealth, InpatientServices
from llmt.llmtools import process_prompt
from llmt.openai import OpenAI, create_messages
from llmt.performance import Performance

In [2]:
# Parameters
model = 'gpt-4o'
temperature = 0

# Directories and files
data_dir = os.path.join(os.environ.get('HOME'), 'home_data', 'hcp')
test_file_name = 'hcp-alldata-250413.parquet'
test_file = os.path.join(data_dir, test_file_name)
df = pd.read_parquet(test_file)
# Filter the labeled data
df = df.loc[df['dset'] == 'train'].\
                reset_index(drop=True).\
                astype({'mental_health': int,
                        'inpatient': int,
                        'outpatient': int})
display(df.head())
print(df.shape)

Unnamed: 0,id,name,description,mental_health,inpatient,outpatient,dset
0,431643-07,Actriv,Provider of healthcare staffing services based...,2,0,0,train
1,310749-31,Alima,Operator of a non-governmental organization in...,0,0,0,train
2,162054-28,Apothecare,Provider of pharmacy services intended to prov...,0,0,0,train
3,597285-28,April Health (Clinics/Outpatient Services),Provider of mental health services intended to...,1,0,1,train
4,373978-90,Arise Child and Family Service,Operator of independent living centers caterin...,2,0,0,train


(187, 7)


### Prompt development ###

In [3]:
# Pick an example of a company that provides mental health services
mental_health = 0
inpatient = 0
id_list = list(df.loc[(df['mental_health'] == mental_health) & (df['inpatient'] == inpatient), 'id'].unique())
print(f'Found {len(id_list)} company ids')

# Pick a company ID
idx = 5
company_id = id_list[idx]
# company_id = '135432-64'

ser_id = df.loc[df['id'] == company_id].iloc[0]
display(ser_id)
name = ser_id['name']
description = ser_id['description']
print()
print(name)
print(description)

Found 25 company ids


id                                                       124869-43
name                                                     Holobiome
description      Operator of a biotechnology company intended t...
mental_health                                                    0
inpatient                                                        0
outpatient                                                       0
dset                                                         train
Name: 36, dtype: object


Holobiome
Operator of a biotechnology company intended to solve the complexities of the human gut microbiome. The company offers mental health therapies that are driven by mapping and manipulating the gut-brain axis via next-generation probiotics through microbiome interventions, enabling healthcare providers to treat diseases related to the nervous system.


In [4]:
variable = 'inpatient'
prompt_version = 1
prompt_name = f'{variable}_system_{str(prompt_version).zfill(2)}'
system_prompt = Prompt().load(prompt_name=prompt_name)

# For the user prompt, we want to include the name of the business and the description
def create_user_prompt(name: str, description: str, variable: str):
    user_prompt = f"""
        The organization {name} is described as: {description} 
        Does this organization provide {variable} healthcare services?
        """
    return process_prompt(user_prompt)

user_prompt = create_user_prompt(name=name, description=description, variable=variable)
messages = create_messages(system_prompt=system_prompt, user_prompt=user_prompt)

In [5]:
print(system_prompt)
print()
print(user_prompt)

You are a specialized AI assistant supporting a healthcare policy researcher. Your task is to analyze organization names and descriptions and determine whether the organizations provide inpatient healthcare services.
Definition of Inpatient Healthcare Services:
Inpatient healthcare involves medical treatment administered to individuals admitted to a healthcare facility — such as a hospital, residential treatment center, or inpatient rehabilitation facility — where they stay overnight or for an extended duration under continuous medical supervision.
Instructions:
Carefully read the provided description of the organization. Assess whether the organization actively delivers inpatient healthcare services as defined above.
Respond based on the following criteria:


Respond with Inpatient_Services: True if the organization explicitly offers inpatient medical care, such as hospital stays, residential treatment, or 24-hour supervised care delivered at a physical facility.


Respond with Inpati

In [6]:
# Send prompt to model
model = 'gpt-4o'
temperature = 0
response_format = InpatientServices
client = OpenAI().create_client()

response = OpenAI().send_messages(messages=messages, 
                                  model=model, 
                                  response_format=response_format,
                                  temperature=0,
                                  client=client)
print(response)

{'pred_ip': False, 'pred_ip_score': 0.1, 'refusal': None}


In [7]:
# Run the prompt on all data
company_id_list = sorted(list(df['id'].unique()))
results_df_list = []

for c, company_id in enumerate(company_id_list):
    if (c + 1) % 20 == 0:
        print(f'Sending description {c + 1} / {len(company_id_list)} to the model')
    df_id = df.loc[df['id'] == company_id]
    user_prompt_id = create_user_prompt(name=df_id['name'].values[0], 
                                        description=df_id['description'].values[0],
                                        variable=variable)
    messages = create_messages(system_prompt=system_prompt, user_prompt=user_prompt_id)
    output = OpenAI().send_messages(messages=messages,
                                    model=model,
                                    temperature=temperature,
                                    response_format=response_format,
                                    client=client)
    
   # Replace the boolean with binary outcome prediction
    key = 'pred_ip'
    output.update({key: 1 if output.get(key) == True else 0})
    results_df_list.append(df_id.assign(**output))

results_df = pd.concat(results_df_list, axis=0, ignore_index=True)
# Save the results
results_file_name = f'{variable}_{str(prompt_version).zfill(2)}_results.parquet'
results_file = os.path.join(data_dir, results_file_name)
results_df.to_parquet(results_file)

Sending description 20 / 187 to the model
Sending description 40 / 187 to the model
Sending description 60 / 187 to the model
Sending description 80 / 187 to the model
Sending description 100 / 187 to the model
Sending description 120 / 187 to the model
Sending description 140 / 187 to the model
Sending description 160 / 187 to the model
Sending description 180 / 187 to the model


In [9]:
print(results_file_name)

inpatient_01_results.parquet


In [11]:
display(Performance(data=results_df).\
        binary_performance(true_col='inpatient', pred_col='pred_ip'))

{'p': 69,
 'n': 113,
 'tp': 68,
 'tn': 104,
 'fp': 9,
 'fn': 1,
 'recall': 0.9855,
 'precision': 0.8831,
 'min_precision': 0.3791,
 'specificity': 0.9204,
 'f_score': 0.9315}