In [None]:
import os
import numpy as np
import pickle as pkl
from openai import OpenAI

## Load Dataset

In [None]:
indicator = 'positive'

In [None]:
with open(f'indices_{indicator}.pkl', 'rb') as f:
    indices = pkl.load(f)
    
with open(f'time_series_{indicator}.pkl', 'rb') as f:
    data = pkl.load(f)

In [None]:
data_size = data.shape[0]
window_size = 20
print(data_size, window_size, len(indices), data.shape)

In [None]:
data_size = len(indices)

num_train = int(data_size * 0.6)
num_test = int(data_size * 0.2)
num_vali = data_size - num_train - num_test

idx_train = np.arange(num_train)
idx_valid = np.arange(num_train, num_train + num_vali)
idx_test = np.arange(num_train + num_vali, num_train + num_vali + num_test)

## Prompt GPT 4

In [None]:
API_KEY = ''

In [None]:
if indicator == 'mortality':
    system_prompt = f"Your job is to act as a professional healthcare forecaster. You will be given a time-series data from the past 20 weeks. Based on this information, your task is to predict whether the ratio of mortality from Influenza or Pneumonia to the total number of death will exceed its average in the comming week."
elif indicator == 'positive':
    system_prompt = f"Your job is to act as a professional healthcare forecaster. You will be given a time-series data from the past 20 weeks. Based on this information, your task is to predict whether the percentage of respiratory specimens testing positive for influenza will exceed its average in the comming week."
print(system_prompt)

In [None]:
client = OpenAI(api_key=API_KEY)

In [None]:
for _i in idx_test:
    i = indices[_i]
    
    data_window = data[i:i+window_size]    
    
    if indicator == 'positive':
        total_specimens = '|'.join([x for x in data_window[:,1]])
        total_a = '|'.join([x for x in data_window[:,2]])
        total_b = '|'.join([x for x in data_window[:,3]])
        pos_rate = '|'.join([str(f'{float(x):.2f}') for x in data_window[:,4]])
        a_rate = '|'.join([str(f'{float(x):.2f}') for x in data_window[:,5]])
        b_rate = '|'.join([str(f'{float(x):.2f}') for x in data_window[:,6]])
    
    elif indicator == 'mortality':
        inf_death = '|'.join([x for x in data_window[:,1]])
        pneum_death = '|'.join([x for x in data_window[:,2]])
        total_death = '|'.join([x for x in data_window[:,3]])
        ratio = '|'.join([str(f'{float(x):.2f}') for x in data_window[:,4]])
    
    if indicator == 'positive':
        user_prompt = f"Your task is to predict whether the percentage of respiratory specimens testing positive for influenza will:\n"
        user_prompt += "(1) Exceed its average of 6.26%\n(2) Not exceed its average of 6.26%\n"
        user_prompt += "in the coming week. "
        user_prompt += f"Review the time-series data provided for the last {window_size} weeks. "
        user_prompt += f"Each time-series consists of weekly values separated by a \'|\' token for the following indicators:\n"
        user_prompt += f"- Number of specimens tested: {total_specimens}\n"
        user_prompt += f"- Number of positive specimens for Influenza A: {total_a}\n"
        user_prompt += f"- Number of positive specimens for Influenza B: {total_b}\n"
        user_prompt += f"- Ratio of positive specimens (%): {pos_rate}\n"
        user_prompt += f"- Ratio of positive specimens for Influenza A (%): {a_rate}\n"
        user_prompt += f"- Ratio of positive specimens for Influenza B (%): {b_rate}\n\n"
        user_prompt += f"Based on this time-series data, predict whether the percentage of respiratory specimens testing positive for influenza will exceed its average of 6.26% or not in the comming week. "
        user_prompt += "Respond with either \'exceed\' or \'not exceed\'. Do not provide any other details."
        
    elif indicator == 'mortality':
        user_prompt = f"Your task is to predict whether the ratio of mortality from Influenza or Pneumonia to the total number of death will:\n"
        user_prompt += "(1) Exceed its average of 7.84%\n(2) Does not exceed its average of 7.84%\n"
        user_prompt += "in the coming week. "
        user_prompt += f"Review the time-series data provided for the last {window_size} weeks. "
        user_prompt += f"Each time-series consists of weekly values separated by a \'|\' token for the following indicators:\n"
        user_prompt += f"- Total number of death: {total_death}\n"
        user_prompt += f"- Number of death from influenza: {inf_death}\n"
        user_prompt += f"- Number of death from pneumonia: {pneum_death}\n"
        user_prompt += f"- Ratio of mortality from Influenza or Pneumonia (%): {ratio}\n\n"
        user_prompt += f"Based on this time-series data, predict whether the ratio of mortality from Influenza or Pneumonia to the total number of death will exceed 7.84% or not. "
        user_prompt += "Respond with either \'exceed\' or \'not exceed\'. Do not provide any other details."
    
    response = client.chat.completions.create(
        model="gpt-4-1106-preview",
        messages=[
        {
          "role": "system",
          "content": system_prompt
        },
        {
          "role": "user",
          "content": user_prompt
        }
        ],
        temperature=0.7,
        max_tokens=2048,
        top_p=1
    )

    text = response.choices[0].message.content

    with open(f'gpt_predict_time/{i}_{indicator}.txt', 'w') as f:
        f.write(text)