In [None]:
import os
import numpy as np
import pickle as pkl
from openai import OpenAI
from sklearn.metrics import f1_score

## Load Dataset

In [None]:
city = 'ny'

city_full_name = {
    'ny': 'New York City',
    'hs': 'Houston',
    'sf': 'San Francisco'
}

In [None]:
with open('indices.pkl', 'rb') as f:
    indices = pkl.load(f)
    
with open('dates.pkl', 'rb') as f:
    dates = pkl.load(f)
    
with open(f'time_series_{city}.pkl', 'rb') as f:
    data = pkl.load(f)
    
with open(f'rain_{city}.pkl', 'rb') as f:
    rains = pkl.load(f)
    
texts = {}
for i in indices:
    with open(os.path.join('gpt_summary', f'{city}_{i}.txt'), 'r') as f:
        text = f.read()
        texts[i] = text

In [None]:
data_size = data.shape[0]
window_size = 24
print(data_size, window_size, len(indices))

In [None]:
data_size = len(indices)

num_train = int(data_size * 0.6)
num_test = int(data_size * 0.2)
num_vali = data_size - num_train - num_test

seq_len_day = 1

idx_train = np.arange(num_train - seq_len_day)
idx_valid = np.arange(num_train - seq_len_day, num_train + num_vali - seq_len_day)
idx_test = np.arange(num_train + num_vali - seq_len_day, num_train + num_vali + num_test - seq_len_day)

In [None]:
rain_ratio = np.mean([rains[i] for i in idx_train])
rain_ratio

In [None]:
with open(f'../../encoder/embeddings/weather_{city}.pkl', 'rb') as f:
    embs = pkl.load(f)

text_emb = {}
for _i, i in enumerate(indices[:-1]):
    text_emb[i] = embs[_i]

## Prompt GPT 4

In [None]:
def cos(a, b):
    cos_sim = np.dot(a, b)/(norm(a)*norm(b))
    return cos_sim

In [None]:
# Number of in-context examples
k = 5

# OPEN AI API Key
API_KEY = ''

In [None]:
system_prompt = f"Your job is to act as a professional weather forecaster. You will be given a summary of the weather from the past 24 hours. Based on this information, your task is to predict whether it will rain in the next 24 hours."
print(system_prompt)

In [None]:
client = OpenAI(api_key=API_KEY)

In [None]:
random.seed(2024)

for _i in idx_test:
    i = indices[_i]
    
    user_prompt = f"Your task is to predict whether it will rain or not in {city_full_name[city]} in the next {window_size} hours. "
    user_prompt += f"First, review the following {k} examples of weather summaries and outcomes so that you can refer to when making predictions.\n\n"
    
    sim = [-cos(text_emb[i], text_emb[indices[ii]]) for ii in idx_train]
    _j_list = np.argsort(sim)
    
    references = {}
    
    for _k in range(k):
        _j = _j_list[_k]
        j = indices[_j]
        
        user_prompt += f"Summary #{_k+1}: {texts[j]}"
        
        if rains[_j+1]:
            user_prompt += f"\nOutcome #{_k+1}: It rained.\n\n"
            references[_k+1] = True
        else:
            user_prompt += f"\nOutcome #{_k+1}: It did not rain.\n\n"
            references[_k+1] = False
    
    user_prompt += f"The weather of the last 24 hours is summarized as follows:\n\n"
    user_prompt += f"Summary: {texts[i]}\n"
    user_prompt += f"Outcome:\n\n"

    user_prompt += "Based on the understanding of the provided examples, predict the outcome of the current weather summary. "
    user_prompt += "Respond your prediction with either 'rain' or 'not rain'. "
    user_prompt += "Response should not include other terms."
    
    response = client.chat.completions.create(
        model="gpt-4-1106-preview",
        messages=[
        {
          "role": "system",
          "content": system_prompt
        },
        {
          "role": "user",
          "content": user_prompt
        }
        ],
        temperature=0.7,
        max_tokens=2048,
        top_p=1
    )

    text = response.choices[0].message.content
    
    with open(f'gpt_predict_in-context/{city}_k{k}_{i}_ref.txt', 'w') as f:
        f.write(f'{text}')