In [None]:
import os
import numpy as np
import pickle as pkl
from openai import OpenAI
from sklearn.metrics import f1_score

## Load Dataset

In [None]:
indicator = 'positive'

In [None]:
with open(f'time_series_{indicator}.pkl', 'rb') as f:
    data = pkl.load(f)
    
with open(f'indices_{indicator}.pkl', 'rb') as f:
    indices = pkl.load(f)
    
with open(f'labels_{indicator}.pkl', 'rb') as f:
    labels = pkl.load(f)
    
texts = {}
for i in indices:
    with open(os.path.join('gpt_summary', f'{i}_{indicator}.txt'), 'r') as f:
        text = f.read()
        texts[i] = text

In [None]:
data_size = data.shape[0]
window_size = 20
print(data_size, data.shape, window_size)

In [None]:
data_size = len(indices)

num_train = int(data_size * 0.6)
num_test = int(data_size * 0.2)
num_vali = data_size - num_train - num_test

idx_train = np.arange(num_train)
idx_valid = np.arange(num_train, num_train + num_vali)
idx_test = np.arange(num_train + num_vali, num_train + num_vali + num_test)

In [None]:
with open(f'../../encoder/embeddings/healthcare_{indicator}.pkl', 'rb') as f:
    embs = pkl.load(f)

text_emb = {}
for _i, i in enumerate(indices):
    text_emb[i] = embs[_i]

## Prompt GPT 4

In [None]:
def cos(a, b):
    cos_sim = np.dot(a, b)/(norm(a)*norm(b))
    return cos_sim

In [None]:
# Number of in-context examples
k = 5

# OPEN AI API Key
API_KEY = ''

In [None]:
if indicator == 'positive':
    system_prompt = f"Your job is to act as a professional healthcare forecaster. You will be given a healthcare summary of the past 20 weeks. Based on this information, your task is to predict whether the percentage of respiratory specimens testing positive for influenza will exceed the average threshold in the comming week."
elif indicator == 'mortality':
    system_prompt = f"Your job is to act as a professional healthcare forecaster. You will be given a healthcare summary of the past 20 weeks. Based on this information, your task is to predict whether the ratio of mortality from Influenza or Pneumonia to the total number of death will exceed the average threshold in the comming week."

print(system_prompt)

In [None]:
client = OpenAI(api_key=API_KEY)

In [None]:
random.seed(2024)

for _i in idx_test:
    i = indices[_i]
    
    if indicator == 'positive':
        user_prompt = f"Your task is to predict whether the percentage of respiratory specimens testing positive for influenza will:\n"
        user_prompt += "(1) Exceed its average of 6.26%\n(2) Not exceed its average of 6.26%\n"
        user_prompt += "in the coming week. "
        user_prompt += f"First, review the following {k} examples of healthcare summaries and their outcomes so that you can refer to when making predictions.\n\n"
        
    elif indicator == 'mortality':
        user_prompt = f"Your task is to predict whether the ratio of mortality from Influenza or Pneumonia to the total number of death will:\n"
        user_prompt += "(1) Exceed its average of 7.84%\n(2) Not exceed its average of 7.84%\n"
        user_prompt += "in the coming week. "
        user_prompt += f"First, review the following {k} examples of healthcare summaries and their outcomes so that you can refer to when making predictions.\n\n"

    sim = [-cos(text_emb[i], text_emb[indices[ii]]) for ii in idx_train]
    _j_list = np.argsort(sim)
    
    for _k in range(k):
        _j = _j_list[_k]
        j = indices[_j]
        
        _text = texts[j].replace('\n\n', ' ')
        user_prompt += f"Summary #{_k+1}: {_text}"
        
        if labels[_j] == 0:
            if indicator == 'positive':
                user_prompt += f"\nOutcome #{_k+1}: Did not exceed 6.26%\n\n"
            elif indicator == 'mortality':
                user_prompt += f"\nOutcome #{_k+1}: Did not exceed 7.84%\n\n"
        elif labels[_j] == 1:
            if indicator == 'positive':
                user_prompt += f"\nOutcome #{_k+1}: Exceeded 6.26%\n\n"
            elif inidicator == 'mortality':
                user_prompt += f"\nOutcome #{_k+1}: Exceeded 7.84%\n\n"
    
    user_prompt += f"The healthcare situation of the last {window_size} weeks is summarized as follows:\n\n"
    
    _text = texts[i].replace('\n\n', ' ')
    user_prompt += f"Summary: {_text}\n"
    user_prompt += f"Outcome:\n\n"
    
    user_prompt += f"Refer to the provided examples and predict the outcome of the current healthcare summary. "
    user_prompt += "Respond with either \'exceed\' or \'not exceed\'. "
    user_prompt += "Response should not include other terms."
    
    response = client.chat.completions.create(
        model="gpt-4-1106-preview",
        messages=[
        {
          "role": "system",
          "content": system_prompt
        },
        {
          "role": "user",
          "content": user_prompt
        }
        ],
        temperature=0.7,
        max_tokens=2048,
        top_p=1
    )

    text = response.choices[0].message.content
    
    with open(f'gpt_predict_in-context/k{k}_{i}_{indicator}_ref_int.txt', 'w') as f:
        f.write(f'{text}')