In [None]:
import pandas as pd
from openai import AzureOpenAI
import json
import openai
from prompts import *
import pickle

In [None]:
employment_ground_truth = pd.read_csv('eval_employment_prompt.csv')
relation_ground_truth = pd.read_csv('eval_relationship_prompt.csv')

In [None]:
employment_ground_truth.head()

In [None]:
relation_ground_truth.head()

In [None]:
employment_ground_truth.columns

In [None]:
relation_ground_truth.columns

In [None]:
relation_df = relation_ground_truth[['RELATIONSHIP_nonadverse', 'RELATIONSHIP_adverse']]

In [None]:
final_df = employment_ground_truth.join(relation_df, on='index', how='left')

In [None]:
final_df.head()

In [None]:
with open('azure_credentials.json', 'r') as file:
    azure_data = json.load(file)
    api_key = azure_data['API_KEY']
    api_version = azure_data['API_VERSION']
    azure_endpoint = azure_data['AZURE_ENDPOINT']
    azure_deployment_name = azure_data['AZURE_DEPLOYMENT_NAME']

client = AzureOpenAI(
    api_key=api_key,
    api_version=api_version,
    azure_endpoint = azure_endpoint
    )

deployment_name=azure_deployment_name #This will correspond to the custom name you chose for your deployment when you deployed a model. Use a gpt-35-turbo-instruct deployment.

# Defining a function to create the prompt from the instruction system message, the few-shot examples, and the current query
def create_prompt(system_message, user_message):    
    formatted_message = [
        {"role": "system", "content": system_message},
        {"role": "user", "content": user_message}
    ]
    
    return formatted_message

# This function sends the prompt to the GPT model
def send_message(message, model_name, max_response_tokens=500):
    response = client.chat.completions.create(
        model=model_name,
        messages=message,
        temperature=0,
        max_tokens=max_response_tokens,
        top_p=0.95,
        frequency_penalty=0,
        presence_penalty=0,
        stop=None
    )
    
    return response.choices[0].message.content.strip()

In [None]:
final_df['EMPLOYMENT_nonadverse'].value_counts()

In [None]:
index_list = []
llm_response_list = []
system_message = "You are an information extract tool that follows instructions very well and is specifically trained to extract social determinants of health elements from hospital generated free-text."
for index, row in final_df[final_df['EMPLOYMENT_nonadverse'] == True].iterrows():
    free_text = row['text']
    user_message = step1_query_optimized.format(free_text=free_text)
    openai_message = create_prompt(system_message, user_message)
    response = send_message(openai_message, deployment_name)
    
    index_list.append(index)
    llm_response_list.append(response)
    print(free_text)
    print(response)
    print()

llm_employment_nonadverse_step1 = pd.DataFrame({'index': index_list, 'llm_employment_nonadverse': llm_response_list})

with open('llm_employment_nonadverse_step1.pkl', 'wb') as file:
    pickle.dump(llm_employment_nonadverse_step1, file)

In [None]:
final_df['EMPLOYMENT_adverse'].value_counts()

In [None]:
index_list = []
llm_response_list = []
system_message = "You are an information extract tool that follows instructions very well and is specifically trained to extract social determinants of health elements from hospital generated free-text."
for index, row in final_df[final_df['EMPLOYMENT_adverse'] == True].iterrows():
    free_text = row['text']
    user_message = step1_query_optimized.format(free_text=free_text)
    openai_message = create_prompt(system_message, user_message)
    response = send_message(openai_message, deployment_name)
    
    index_list.append(index)
    llm_response_list.append(response)
    print(free_text)
    print(response)
    print()

llm_employment_adverse_step1 = pd.DataFrame({'index': index_list, 'llm_employment_adverse': llm_response_list})

with open('llm_employment_adverse_step1.pkl', 'wb') as file:
    pickle.dump(llm_employment_adverse_step1, file)

In [None]:
final_df['RELATIONSHIP_nonadverse'].value_counts()

In [None]:
index_list = []
llm_response_list = []
system_message = "You are an information extract tool that follows instructions very well and is specifically trained to extract social determinants of health elements from hospital generated free-text."
for index, row in final_df[final_df['RELATIONSHIP_nonadverse'] == True].iterrows():
    free_text = row['text']
    user_message = step1_query_optimized.format(free_text=free_text)
    openai_message = create_prompt(system_message, user_message)
    response = send_message(openai_message, deployment_name)
    
    index_list.append(index)
    llm_response_list.append(response)
    print(free_text)
    print(response)
    print()

llm_relationship_nonadverse_step1 = pd.DataFrame({'index': index_list, 'llm_relationship_nonadverse': llm_response_list})

with open('llm_relationship_nonadverse_step1.pkl', 'wb') as file:
    pickle.dump(llm_relationship_nonadverse_step1, file)

In [None]:
final_df['RELATIONSHIP_adverse'].value_counts()

In [None]:
index_list = []
llm_response_list = []
system_message = "You are an information extract tool that follows instructions very well and is specifically trained to extract social determinants of health elements from hospital generated free-text."
for index, row in final_df[final_df['RELATIONSHIP_adverse'] == True].iterrows():
    free_text = row['text']
    user_message = step1_query_optimized.format(free_text=free_text)
    openai_message = create_prompt(system_message, user_message)
    response = send_message(openai_message, deployment_name)
    
    index_list.append(index)
    llm_response_list.append(response)
    print(free_text)
    print(response)
    print()

llm_relationship_adverse_step1 = pd.DataFrame({'index': index_list, 'llm_relationship_adverse': llm_response_list})

with open('llm_relationship_adverse_step1.pkl', 'wb') as file:
    pickle.dump(llm_relationship_adverse_step1, file)

In [None]:
final_df['TRANSPORTATION_distance'].value_counts()

In [None]:
index_list = []
llm_response_list = []
system_message = "You are an information extract tool that follows instructions very well and is specifically trained to extract social determinants of health elements from hospital generated free-text."
for index, row in final_df[final_df['TRANSPORTATION_distance'] == True].iterrows():
    free_text = row['text']
    user_message = step1_query_optimized.format(free_text=free_text)
    openai_message = create_prompt(system_message, user_message)
    response = send_message(openai_message, deployment_name)
    
    index_list.append(index)
    llm_response_list.append(response)
    print(free_text)
    print(response)
    print()

llm_transportation_distance_step1 = pd.DataFrame({'index': index_list, 'llm_transportation_distance': llm_response_list})

with open('llm_transportation_distance_step1.pkl', 'wb') as file:
    pickle.dump(llm_transportation_distance_step1, file)

In [None]:
final_df['TRANSPORTATION_resource'].value_counts()

In [None]:
final_df['TRANSPORTATION_other'].value_counts()

In [None]:
final_df['HOUSING_poor'].value_counts()

In [None]:
index_list = []
llm_response_list = []
system_message = "You are an information extract tool that follows instructions very well and is specifically trained to extract social determinants of health elements from hospital generated free-text."
for index, row in final_df[final_df['HOUSING_poor'] == True].iterrows():
    free_text = row['text']
    user_message = step1_query_optimized.format(free_text=free_text)
    openai_message = create_prompt(system_message, user_message)
    response = send_message(openai_message, deployment_name)
    
    index_list.append(index)
    llm_response_list.append(response)
    print(free_text)
    print(response)
    print()

llm_housing_poor_step1 = pd.DataFrame({'index': index_list, 'llm_housing_poor': llm_response_list})

with open('llm_housing_poor_step1.pkl', 'wb') as file:
    pickle.dump(llm_housing_poor_step1, file)

In [None]:
final_df['HOUSING_undomiciled'].value_counts()

In [None]:
final_df['HOUSING_other'].value_counts()

In [None]:
final_df['RELATIONSHIP_married'].value_counts()

In [None]:
index_list = []
llm_response_list = []
system_message = "You are an information extract tool that follows instructions very well and is specifically trained to extract social determinants of health elements from hospital generated free-text."
for index, row in final_df[final_df['RELATIONSHIP_married'] == True].iterrows():
    free_text = row['text']
    user_message = step1_query_optimized.format(free_text=free_text)
    openai_message = create_prompt(system_message, user_message)
    response = send_message(openai_message, deployment_name)
    
    index_list.append(index)
    llm_response_list.append(response)
    print(free_text)
    print(response)
    print()

llm_relationship_married_step1 = pd.DataFrame({'index': index_list, 'llm_relationship_married': llm_response_list})

with open('llm_relationship_married_step1.pkl', 'wb') as file:
    pickle.dump(llm_relationship_married_step1, file)

In [None]:
final_df['RELATIONSHIP_partnered'].value_counts()

In [None]:
index_list = []
llm_response_list = []
system_message = "You are an information extract tool that follows instructions very well and is specifically trained to extract social determinants of health elements from hospital generated free-text."
for index, row in final_df[final_df['RELATIONSHIP_partnered'] == True].iterrows():
    free_text = row['text']
    user_message = step1_query_optimized.format(free_text=free_text)
    openai_message = create_prompt(system_message, user_message)
    response = send_message(openai_message, deployment_name)
    
    index_list.append(index)
    llm_response_list.append(response)
    print(free_text)
    print(response)
    print()

llm_relationship_partnered_step1 = pd.DataFrame({'index': index_list, 'llm_relationship_partnered': llm_response_list})

with open('llm_relationship_partnered_step1.pkl', 'wb') as file:
    pickle.dump(llm_relationship_partnered_step1, file)

In [None]:
final_df['RELATIONSHIP_divorced'].value_counts()

In [None]:
index_list = []
llm_response_list = []
system_message = "You are an information extract tool that follows instructions very well and is specifically trained to extract social determinants of health elements from hospital generated free-text."
for index, row in final_df[final_df['RELATIONSHIP_divorced'] == True].iterrows():
    free_text = row['text']
    user_message = step1_query_optimized.format(free_text=free_text)
    openai_message = create_prompt(system_message, user_message)
    response = send_message(openai_message, deployment_name)
    
    index_list.append(index)
    llm_response_list.append(response)
    print(free_text)
    print(response)
    print()

llm_relationship_divorced_step1 = pd.DataFrame({'index': index_list, 'llm_relationship_divorced': llm_response_list})

with open('llm_relationship_divorced_step1.pkl', 'wb') as file:
    pickle.dump(llm_relationship_divorced_step1, file)

In [None]:
final_df['RELATIONSHIP_widowed'].value_counts()

In [None]:
index_list = []
llm_response_list = []
system_message = "You are an information extract tool that follows instructions very well and is specifically trained to extract social determinants of health elements from hospital generated free-text."
for index, row in final_df[final_df['RELATIONSHIP_widowed'] == True].iterrows():
    free_text = row['text']
    user_message = step1_query_optimized.format(free_text=free_text)
    openai_message = create_prompt(system_message, user_message)
    response = send_message(openai_message, deployment_name)
    
    index_list.append(index)
    llm_response_list.append(response)
    print(free_text)
    print(response)
    print()

llm_relationship_widowed_step1 = pd.DataFrame({'index': index_list, 'llm_relationship_widowed': llm_response_list})

with open('llm_relationship_widowed_step1.pkl', 'wb') as file:
    pickle.dump(llm_relationship_widowed_step1, file)

In [None]:
final_df['RELATIONSHIP_single'].value_counts()

In [None]:
index_list = []
llm_response_list = []
system_message = "You are an information extract tool that follows instructions very well and is specifically trained to extract social determinants of health elements from hospital generated free-text."
for index, row in final_df[final_df['RELATIONSHIP_single'] == True].iterrows():
    free_text = row['text']
    user_message = step1_query_optimized.format(free_text=free_text)
    openai_message = create_prompt(system_message, user_message)
    response = send_message(openai_message, deployment_name)
    
    index_list.append(index)
    llm_response_list.append(response)
    print(free_text)
    print(response)
    print()

llm_relationship_single_step1 = pd.DataFrame({'index': index_list, 'llm_relationship_single': llm_response_list})

with open('llm_relationship_single_step1.pkl', 'wb') as file:
    pickle.dump(llm_relationship_single_step1, file)

In [None]:
final_df['EMPLOYMENT_employed'].value_counts()

In [None]:
index_list = []
llm_response_list = []
system_message = "You are an information extract tool that follows instructions very well and is specifically trained to extract social determinants of health elements from hospital generated free-text."
for index, row in final_df[final_df['EMPLOYMENT_employed'] == True].iterrows():
    free_text = row['text']
    user_message = step1_query_optimized.format(free_text=free_text)
    openai_message = create_prompt(system_message, user_message)
    response = send_message(openai_message, deployment_name)
    
    index_list.append(index)
    llm_response_list.append(response)
    print(free_text)
    print(response)
    print()

llm_employment_employed_step1 = pd.DataFrame({'index': index_list, 'llm_employment_employed': llm_response_list})

with open('llm_employment_employed_step1.pkl', 'wb') as file:
    pickle.dump(llm_employment_employed_step1, file)

In [None]:
final_df['EMPLOYMENT_underemployed'].value_counts()

In [None]:
index_list = []
llm_response_list = []
system_message = "You are an information extract tool that follows instructions very well and is specifically trained to extract social determinants of health elements from hospital generated free-text."
for index, row in final_df[final_df['EMPLOYMENT_underemployed'] == True].iterrows():
    free_text = row['text']
    user_message = step1_query_optimized.format(free_text=free_text)
    openai_message = create_prompt(system_message, user_message)
    response = send_message(openai_message, deployment_name)
    
    index_list.append(index)
    llm_response_list.append(response)
    print(free_text)
    print(response)
    print()

llm_employment_underemployed_step1 = pd.DataFrame({'index': index_list, 'llm_employment_underemployed': llm_response_list})

with open('llm_employment_underemployed_step1.pkl', 'wb') as file:
    pickle.dump(llm_employment_underemployed_step1, file)

In [None]:
final_df['EMPLOYMENT_unemployed'].value_counts()

In [None]:
index_list = []
llm_response_list = []
system_message = "You are an information extract tool that follows instructions very well and is specifically trained to extract social determinants of health elements from hospital generated free-text."
for index, row in final_df[final_df['EMPLOYMENT_unemployed'] == True].iterrows():
    free_text = row['text']
    user_message = step1_query_optimized.format(free_text=free_text)
    openai_message = create_prompt(system_message, user_message)
    response = send_message(openai_message, deployment_name)
    
    index_list.append(index)
    llm_response_list.append(response)
    print(free_text)
    print(response)
    print()

llm_employment_unemployed_step1 = pd.DataFrame({'index': index_list, 'llm_employment_unemployed': llm_response_list})

with open('llm_employment_unemployed_step1.pkl', 'wb') as file:
    pickle.dump(llm_employment_unemployed_step1, file)

In [None]:
final_df['EMPLOYMENT_disability'].value_counts()

In [None]:
index_list = []
llm_response_list = []
system_message = "You are an information extract tool that follows instructions very well and is specifically trained to extract social determinants of health elements from hospital generated free-text."
for index, row in final_df[final_df['EMPLOYMENT_disability'] == True].iterrows():
    free_text = row['text']
    user_message = step1_query_optimized.format(free_text=free_text)
    openai_message = create_prompt(system_message, user_message)
    response = send_message(openai_message, deployment_name)
    
    index_list.append(index)
    llm_response_list.append(response)
    print(free_text)
    print(response)
    print()

llm_employment_disability_step1 = pd.DataFrame({'index': index_list, 'llm_employment_disability': llm_response_list})

with open('llm_employment_disability_step1.pkl', 'wb') as file:
    pickle.dump(llm_employment_disability_step1, file)

In [None]:
final_df['EMPLOYMENT_retired'].value_counts()

In [None]:
index_list = []
llm_response_list = []
system_message = "You are an information extract tool that follows instructions very well and is specifically trained to extract social determinants of health elements from hospital generated free-text."
for index, row in final_df[final_df['EMPLOYMENT_retired'] == True].iterrows():
    free_text = row['text']
    user_message = step1_query_optimized.format(free_text=free_text)
    openai_message = create_prompt(system_message, user_message)
    response = send_message(openai_message, deployment_name)
    
    index_list.append(index)
    llm_response_list.append(response)
    print(free_text)
    print(response)
    print()

llm_employment_retired_step1 = pd.DataFrame({'index': index_list, 'llm_employment_retired': llm_response_list})

with open('llm_employment_retired_step1.pkl', 'wb') as file:
    pickle.dump(llm_employment_retired_step1, file)

In [None]:
final_df['EMPLOYMENT_student'].value_counts()

In [None]:
index_list = []
llm_response_list = []
system_message = "You are an information extract tool that follows instructions very well and is specifically trained to extract social determinants of health elements from hospital generated free-text."
for index, row in final_df[final_df['EMPLOYMENT_student'] == True].iterrows():
    free_text = row['text']
    user_message = step1_query_optimized.format(free_text=free_text)
    openai_message = create_prompt(system_message, user_message)
    response = send_message(openai_message, deployment_name)
    
    index_list.append(index)
    llm_response_list.append(response)
    print(free_text)
    print(response)
    print()

llm_employment_student_step1 = pd.DataFrame({'index': index_list, 'llm_employment_student': llm_response_list})

with open('llm_employment_student_step1.pkl', 'wb') as file:
    pickle.dump(llm_employment_student_step1, file)

### Extracting Insights

In [None]:
recall_insights = {}

In [None]:
with open('llm_employment_nonadverse_step1.pkl', 'rb') as file:
    llm_employment_nonadverse_step1 = pickle.load(file)

recall_insights['llm_employment_nonadverse_step1'] = sum(llm_employment_nonadverse_step1['llm_employment_nonadverse'] == 'YES')/len(llm_employment_nonadverse_step1)

In [None]:
with open('llm_employment_adverse_step1.pkl', 'rb') as file:
    llm_employment_adverse_step1 = pickle.load(file)
    
recall_insights['llm_employment_adverse_step1'] = sum(llm_employment_adverse_step1['llm_employment_adverse'] == 'YES')/len(llm_employment_adverse_step1)

In [None]:
with open('llm_employment_employed_step1.pkl', 'rb') as file:
    llm_employment_employed_step1 = pickle.load(file)
    
recall_insights['llm_employment_employed_step1'] = sum(llm_employment_employed_step1['llm_employment_employed'] == 'YES')/len(llm_employment_employed_step1)

In [None]:
with open('llm_employment_retired_step1.pkl', 'rb') as file:
    llm_employment_retired_step1 = pickle.load(file)
    
recall_insights['llm_employment_retired_step1'] = sum(llm_employment_retired_step1['llm_employment_retired'] == 'YES')/len(llm_employment_retired_step1)

In [None]:
with open('llm_employment_student_step1.pkl', 'rb') as file:
    llm_employment_student_step1 = pickle.load(file)
    
recall_insights['llm_employment_student_step1'] = sum(llm_employment_student_step1['llm_employment_student'] == 'YES')/len(llm_employment_student_step1)

In [None]:
with open('llm_employment_underemployed_step1.pkl', 'rb') as file:
    llm_employment_underemployed_step1 = pickle.load(file)
    
recall_insights['llm_employment_underemployed_step1'] = sum(llm_employment_underemployed_step1['llm_employment_underemployed'] == 'YES')/len(llm_employment_underemployed_step1)

In [None]:
with open('llm_employment_unemployed_step1.pkl', 'rb') as file:
    llm_employment_unemployed_step1 = pickle.load(file)
    
recall_insights['llm_employment_unemployed_step1'] = sum(llm_employment_unemployed_step1['llm_employment_unemployed'] == 'YES')/len(llm_employment_unemployed_step1)

In [None]:
with open('llm_housing_poor_step1.pkl', 'rb') as file:
    llm_housing_poor_step1 = pickle.load(file)
    
recall_insights['llm_housing_poor_step1'] = sum(llm_housing_poor_step1['llm_housing_poor'] == 'YES')/len(llm_housing_poor_step1)

In [None]:
with open('llm_relationship_adverse_step1.pkl', 'rb') as file:
    llm_relationship_adverse_step1 = pickle.load(file)
    
recall_insights['llm_relationship_adverse_step1'] = sum(llm_relationship_adverse_step1['llm_relationship_adverse'] == 'YES')/len(llm_relationship_adverse_step1)

In [None]:
with open('llm_relationship_divorced_step1.pkl', 'rb') as file:
    llm_relationship_divorced_step1 = pickle.load(file)
    
recall_insights['llm_relationship_divorced_step1'] = sum(llm_relationship_divorced_step1['llm_relationship_divorced'] == 'YES')/len(llm_relationship_divorced_step1)

In [None]:
with open('llm_relationship_married_step1.pkl', 'rb') as file:
    llm_relationship_married_step1 = pickle.load(file)
    
recall_insights['llm_relationship_married_step1'] = sum(llm_relationship_married_step1['llm_relationship_married'] == 'YES')/len(llm_relationship_married_step1)

In [None]:
with open('llm_relationship_nonadverse_step1.pkl', 'rb') as file:
    llm_relationship_nonadverse_step1 = pickle.load(file)
    
recall_insights['llm_relationship_nonadverse_step1'] = sum(llm_relationship_nonadverse_step1['llm_relationship_nonadverse'] == 'YES')/len(llm_relationship_nonadverse_step1)

In [None]:
with open('llm_relationship_partnered_step1.pkl', 'rb') as file:
    llm_relationship_partnered_step1 = pickle.load(file)
    
recall_insights['llm_relationship_partnered_step1'] = sum(llm_relationship_partnered_step1['llm_relationship_partnered'] == 'YES')/len(llm_relationship_partnered_step1)

In [None]:
with open('llm_relationship_single_step1.pkl', 'rb') as file:
    llm_relationship_single_step1 = pickle.load(file)
    
recall_insights['llm_relationship_single_step1'] = sum(llm_relationship_single_step1['llm_relationship_single'] == 'YES')/len(llm_relationship_single_step1)

In [None]:
with open('llm_relationship_widowed_step1.pkl', 'rb') as file:
    llm_relationship_widowed_step1 = pickle.load(file)
    
recall_insights['llm_relationship_widowed_step1'] = sum(llm_relationship_widowed_step1['llm_relationship_widowed'] == 'YES')/len(llm_relationship_widowed_step1)

In [None]:
with open('llm_transportation_distance_step1.pkl', 'rb') as file:
    llm_transportation_distance_step1 = pickle.load(file)
    
recall_insights['llm_transportation_distance_step1'] = sum(llm_transportation_distance_step1['llm_transportation_distance'] == 'YES')/len(llm_transportation_distance_step1)

In [None]:
recall_insights