In [1]:
import openai;
import time;
import re;
import requests;
import os;
import random;
import pandas as pd;
import numpy as np;
from dotenv import load_dotenv;
import csv;
import time;
from openai import AzureOpenAI;

# loading of environment variables, where the api keys should be stores
load_dotenv();
openai.api_type = "azure";
openai.api_version = "2023-05-15" ;
openai.api_base = os.getenv("ENDPOINT");
openai.api_key = os.getenv("OPENAI_KEY");

# object to store api keys
client = AzureOpenAI(
    api_key=openai.api_key,
    api_version=openai.api_version,
    azure_endpoint=openai.api_base,
);

In [2]:
# function that takes in the api keys "client" and a prompt "question"
# "question" needs to be in the form of template, which is defined below
def query_store(client, question):
    model_name = "gpt-4-32k";  
    time_start = time.time();
    
    # Create the prompt
    prompt = f"Q: {question} A:";
    
    # Create a chat completion request
    response = client.chat.completions.create(
        model=model_name,
        messages=[
            {
                "role": "assistant",
                "content": prompt,
            },
        ],
    );
    
    # Extract the content of the response
    answer = response.choices[0].message.content;
    time_elapsed = time.time() - time_start;

    # Prepare a dictionary with the information
    result = {
        'model_name': model_name,
        'runtime_seconds': time_elapsed,
        'question': question,
        'answer': answer
    };
    
    return result;

In [3]:
# Prompt template for our simulated hospital's patients
# GENDER must be replaced with "male" or "female"
# AGE must be replaced with a positive integer
# CONDITION must be replaced with the condition name
# X must be replaced with a positive integer
# DID/DIDNOT must be replaced with "did" or "did not"
# "ADDENDUM" is added if a GOCD did take place. Says which day the GOCD took place
template = "You are a medical scribe. There was a GENDER patient of age AGE who was hospitalized for CONDITION for X days. During their hospitalization, they DID/DIDNOT discuss their goals of care. For each day, detail their treatment and health status. ADDENDUMDo not explicitly mention \"goals of care\" in your summaries.";

In [4]:
# function that takes patient characteristics to generate a question following the template
# each row has patient characteristics stored in the columns Sex, Cancer, Days, GOCD, and DayofGOCD
def generate_question(row, template):
    gender = "male" if row['Sex'] == 1 else "female";
    age = row['Age'];
    condition = "Cancer" if row['Cancer'] == 1 else "Dementia";
    days = row['Days'];
    did_or_didnot = "did" if row['GOCD'] == 1 else "did not";
    addendum = f"Explicitly detail the goals of care discussion, which took place on day {row['DayofGOCD']}. " if row['GOCD'] == 1 else "";

    summary = template.replace("GENDER", gender) \
                      .replace("AGE", str(age)) \
                      .replace("CONDITION", condition) \
                      .replace("X", str(days)) \
                      .replace("DID/DIDNOT", did_or_didnot) \
                      .replace("ADDENDUM", addendum)
    
    return summary;

In [5]:
# I take in a file with patient characteristics aligned in the format that generate_question takes
file_path = r"C:\Users\edber\Desktop\Road to PhD\University of Washington\PPI\Palliative\2024.08.21 patients.xlsx";
df = pd.read_excel(file_path);

In [6]:
df['model_name'] = "";
df['runtime_seconds'] = 0.0;
df['question'] = "";
df['answer'] = "";

In [7]:
# this loop uses the patient in each row to generate a prompt based on the characteristics and then send that prompt to CHATGPT
# after this step, df would be saved to an excel or csv and then sent to R for using PPI and DSL packages etc.
for index, row in df.iterrows():
    question = generate_question(row, template);
    df.at[index, 'question'] = question;
    query_result = query_store(client, question);
    
    df.at[index, 'model_name'] = query_result['model_name'];
    df.at[index, 'runtime_seconds'] = query_result['runtime_seconds'];
    df.at[index, 'question'] = query_result['question'];
    df.at[index, 'answer'] = query_result['answer'];

In [8]:
df.to_excel(r"C:\Users\edber\Desktop\Road to PhD\University of Washington\PPI\Palliative\2024.08.21 narratives.xlsx");