# Setup

## Set API key

In [1]:
import os   # type: ignore
import openai   # type: ignore

with open("../openai-api-key") as f:
    api_key = f.read()

openai.api_key = api_key   # type: ignore


## Format Data

In [24]:
import pandas as pd
df = pd.read_csv('../Examples/example_outputs.csv', index_col=False)
df['pred'] = df['conf'].round().astype(bool)

print(", ".join(df.columns))
df.head()

F1, AdmitDate, Day_In_Stay, conf, Date_Of_Process, Feature_1, Feature_2, Feature_3, Feature_4, Feature_5, Feature_6, Feature_7, Feature_8, Feature_9, Feature_10, BWLoadDateTime, BWUpdateDateTime, Unnamed: 17, pred


Unnamed: 0,F1,AdmitDate,Day_In_Stay,conf,Date_Of_Process,Feature_1,Feature_2,Feature_3,Feature_4,Feature_5,Feature_6,Feature_7,Feature_8,Feature_9,Feature_10,BWLoadDateTime,BWUpdateDateTime,Unnamed: 17,pred
0,604,9/3/2020,33,0.89,8/10/2020,CBC W/ AUTO DIFF,PT EVALUATION MOD COMPLEXITY,CULTURE SCREEN MRSA,ARTERIAL BLOOD GAS,THERAPEUTIC PROCEDURE,EVAL SWALLOW/ORAL FUNCTION,VANCOMYCIN,GAIT TRAINING 15 MIN,METOPROLOL TART 25MG TAB,INSULIN HUM REGULAR 100U/ML IN,10/8/20 3:27 PM,10/8/20 3:27 PM,,True
1,605,9/3/2020,33,0.86,8/10/2020,CBC W/ AUTO DIFF,ARTERIAL BLOOD GAS,THERAPEUTIC PROCEDURE,VANCOMYCIN,METOPROLOL TART 25MG TAB,MULTIVITAMIN TAB,POTASSIUM CL 20MEQ TER,URINALYSIS AUTO W/O MICRO,MASK TRACH,ASPIRIN 81MG ECT,10/8/20 3:27 PM,10/8/20 3:27 PM,,True
2,586,9/5/2020,33,0.04,9/10/2020,CBC W/ AUTO DIFF,CULTURE SCREEN MRSA,ARTERIAL BLOOD GAS,GAIT TRAINING 15 MIN,MULTIVITAMIN TAB,ASPIRIN 81MG ECT,ATORVASTATIN CALCIUM 40MG TAB,CREAM SWEEN 24 5OZ,PT EVALUATION HIGH COMPLEXITY,IPRATR-ALBUTEROL 0.5-3MG/3ML S,10/8/20 3:27 PM,10/9/20 7:27 AM,,False
3,607,9/29/2020,9,0.04,8/10/2020,CBC W/ AUTO DIFF,CULTURE SCREEN MRSA,ARTERIAL BLOOD GAS,OT EVALUATION HIGH COMPLEXITY,THERAPEUTIC PROCEDURE,INSULIN HUM REGULAR 100U/ML IN,ACETAMINOPHEN 325MG TAB,MULTIVITAMIN TAB,GABAPENTIN 300MG CAP,URINALYSIS AUTO W/O MICRO,10/8/20 3:27 PM,10/8/20 3:27 PM,,False


## Functions

### Creating a prompt

In [3]:
from typing import Optional


def create_prompts(df:pd.DataFrame, seed_pre:Optional[str] = None, seed_post:Optional[str] = None):
    prompts = []
    for _, row in df.iterrows():
        prompt = ""

        if seed_pre is not None:
            prompt += seed_pre

        prompt += "It has been determined that the patient "

        if row['pred']:
            prompt += "may have complications " 
        else:
            prompt += "is unlikely have complications "
        
        prompt += "with a confidence of {}. ".format(row['conf'])

        prompt += "This is due to the following factors: {}, {}, {}, {}, {}".format(
            row['Feature_1'],
            row['Feature_2'],
            row['Feature_3'],
            row['Feature_4'],
            row['Feature_5'],
        )
        
        if seed_post is not None:
            prompt += seed_post

        prompts.append(prompt)

    return prompts

def instruction_based_prompt(df, num_feats=5):
    assert num_feats > 0 and num_feats <= 10, "Must use between 1 and 10 features"
    prompts = []
    for _, row in df.iterrows():
        feats = list(row.iloc[5:5+num_feats])
        feats = [f.strip() for f in feats]
        prompt = "Explain to a nurse why a patient "
        if row['pred']:
            prompt += "may have complications "
        else:
            prompt += "is unlikely have complications "
        prompt += "based on the following features:\n\n"
        prompt += "Features:\n" + "\n".join(feats)
        prompt += "\n\nExplanation:\n\n"
        prompts.append(prompt)
    return prompts


def print_query(prompt, response):
    print("[PROMPT]:\n{}\n[RESPONSE]:\n{}".format(prompt, response['choices'][0]['text']))



# Query Experiments 

## Prompt Types

### TL;DR Summarization

In [14]:
prompts = create_prompts(df, seed_post="\n\ntl;dr:")
print(prompts[0])

It has been determined that the patient may have complications with a confidence of 0.89. This is due to the following factors:  CBC W/ AUTO DIFF,  PT EVALUATION MOD COMPLEXITY,  CULTURE SCREEN MRSA,  ARTERIAL BLOOD GAS,  THERAPEUTIC PROCEDURE

tl;dr:


### Summarize for a 2nd grader

In [7]:
prompts = create_prompts(df,
    seed_pre="My second grader asked me what this passage means:\n\"\"\"\n",
    seed_post="\n\"\"\"\nI rephrased it for him, in plain language a second grader can understand:\n\"\"\"\n"
)
print(prompts[0])

My second grader asked me what this passage means:
"""
It has been determined that the patient may have complications with a confidence of 0.89. This is due to the following factors:  CBC W/ AUTO DIFF,  PT EVALUATION MOD COMPLEXITY,  CULTURE SCREEN MRSA,  ARTERIAL BLOOD GAS,  THERAPEUTIC PROCEDURE
"""
I rephrased it for him, in plain language a second grader can understand:
"""



### Custom Instruction-based prompt

In [19]:
prompts = instruction_based_prompt(df, num_feats=10)
print(prompts[0])

Explain to a nurse why a patient may have complications based on the following features:

Features:
CBC W/ AUTO DIFF
PT EVALUATION MOD COMPLEXITY
CULTURE SCREEN MRSA
ARTERIAL BLOOD GAS
THERAPEUTIC PROCEDURE
EVAL SWALLOW/ORAL FUNCTION
VANCOMYCIN
GAIT TRAINING 15 MIN
METOPROLOL TART 25MG TAB
INSULIN HUM REGULAR 100U/ML IN

Explanation:




## Get Query Response

In [22]:
# engine="davinci"
# engine="ada"
#engine="curie-instruct-beta"
engine="davinci-instruct-beta"

num_token=100
#num_token=50

#temp = None
temp = 0.3

response = openai.Completion.create(engine=engine, prompt=prompts[0], max_tokens=num_token, temperature=temp)

In [6]:
print_query(prompts[0], response)

[PROMPT]:
It has been determined that the patient may have complications with a confidence of 0.89. This is due to the following factors:  CBC W/ AUTO DIFF,  PT EVALUATION MOD COMPLEXITY,  CULTURE SCREEN MRSA,  ARTERIAL BLOOD GAS,  THERAPEUTIC PROCEDURE

tl;dr:
[RESPONSE]:


The patient may have complications with a confidence of 0.89.


## Example outputs

### 2nd Grader Summarization with the curie-instruct-beta model

In [13]:
print_query(prompts[0], response)

[PROMPT]:
My second grader asked me what this passage means:
"""
It has been determined that the patient may have complications with a confidence of 0.89. This is due to the following factors:  CBC W/ AUTO DIFF,  PT EVALUATION MOD COMPLEXITY,  CULTURE SCREEN MRSA,  ARTERIAL BLOOD GAS,  THERAPEUTIC PROCEDURE
"""
I rephrased it for him, in plain language a second grader can understand:
"""

[RESPONSE]:
It means that the doctor thinks that you might have a problem with your blood. He thinks that you might have a problem with your blood because it is not normal. He thinks that you might have a problem with your blood because you have a lot of white blood cells in your blood. He thinks that you might have a problem with your blood because you have a lot of red blood cells in your blood. He thinks that you might have a problem with your blood because you have a lot of oxygen in


### TL;DR: with davinci-instruct-beta

In [18]:
print_query(prompts[0], response)

[PROMPT]:
It has been determined that the patient may have complications with a confidence of 0.89. This is due to the following factors:  CBC W/ AUTO DIFF,  PT EVALUATION MOD COMPLEXITY,  CULTURE SCREEN MRSA,  ARTERIAL BLOOD GAS,  THERAPEUTIC PROCEDURE

tl;dr:
[RESPONSE]:
 the patient has a high chance of having a complication.

The confidence is calculated by the following formula:

confidence = (probability of having the complication) / (1 - (probability of not having the complication))

In this case, the probability of having the complication is 0.89, and the probability of not having the complication is 0.11.

The confidence is calculated as 0.89 / (1 - 0.11) = 0.


### Instruction Prompt w/ Davinci-Instruct-Beta Model

In [23]:
print_query(prompts[0], response)

[PROMPT]:
Explain to a nurse why a patient may have complications based on the following features:

Features:
CBC W/ AUTO DIFF
PT EVALUATION MOD COMPLEXITY
CULTURE SCREEN MRSA
ARTERIAL BLOOD GAS
THERAPEUTIC PROCEDURE
EVAL SWALLOW/ORAL FUNCTION
VANCOMYCIN
GAIT TRAINING 15 MIN
METOPROLOL TART 25MG TAB
INSULIN HUM REGULAR 100U/ML IN

Explanation:


[RESPONSE]:


-A patient may have complications based on the following features:

-CBC with auto differential
-PT evaluation mod complexity
-Culture screen MRSA
-Arterial blood gas
-Therapeutic procedure
-Evaluate swallow/oral function
-Vancomycin
-Gait training 15 min
-Metoprolol tart 25mg tab
-Insulin human regular 100U/ml in
