In [1]:
import os
import random
import torch
import pandas as pd
import numpy as np
import pickle as pkl
import matplotlib.pyplot as plt
from openai import OpenAI
from tqdm import tqdm, trange
from datetime import datetime
from sklearn.metrics import f1_score, roc_auc_score
from sentence_transformers import SentenceTransformer
from numpy.linalg import norm

In [2]:
indicator = 'mortality'

In [3]:
with open(f'time_series_{indicator}.pkl', 'rb') as f:
    data = pkl.load(f)

In [4]:
with open(f'indices_{indicator}.pkl', 'rb') as f:
    indices = pkl.load(f)

In [5]:
with open(f'labels_{indicator}.pkl', 'rb') as f:
    labels = pkl.load(f)
    
labels.count(0), labels.count(1)

(260, 115)

In [6]:
data_size = data.shape[0]
window_size = 20
print(data_size, data.shape, window_size)

395 (395, 5) 20


In [7]:
texts = {}
for i in indices:
    with open(os.path.join('chatgpt_response_summary', f'{i}_{indicator}.txt'), 'r') as f:
        text = f.read()
        texts[i] = text

In [8]:
data_size = data.shape[0]
window_size = 20
print(data_size, window_size, len(indices))

395 20 375


In [9]:
data_size = len(indices)

num_train = int(data_size * 0.6)
num_test = int(data_size * 0.2)
num_vali = data_size - num_train - num_test

seq_len_day = 1

idx_train = np.arange(num_train)
idx_valid = np.arange(num_train, num_train + num_vali)
idx_test = np.arange(num_train + num_vali, num_train + num_vali + num_test)

In [40]:
config = 'illness_mortality_PatchTST_bert_seed2020_el1_dim64_lr0.0001_bs16_do0.2_dff128_nh4_balance'

with open(f'../../GEON4/embeddings/{config}.pkl', 'rb') as f:
    embs = pkl.load(f)

trues_lm = np.load(f'../../GEON4/results/{config}/true.npy')[idx_test]
preds_lm = np.load(f'../../GEON4/results/{config}/pred.npy')[idx_test]
probs_lm = np.load(f'../../GEON4/results/{config}/prob.npy')[idx_test]

f1_micro = f1_score(trues_lm, preds_lm, average='micro')
f1_macro = f1_score(trues_lm, preds_lm, average='macro')
f1_micro, f1_macro

(0.8133333333333334, 0.7337728194726167)

In [11]:
config = 'illness_mortality_PatchTST_seed2020_el2_dim64_lr0.0001_do0.2_dff128_nh8_balance.pkl'

with open(f'../../Time-Series-Library/results/{config}', 'rb') as f:
    trues, preds, probs, embs = pkl.load(f)

true_test = trues[idx_test]
pred_test = preds[idx_test]
prob_test = probs[idx_test]

test_micro = f1_score(true_test, pred_test, average='micro')
test_macro = f1_score(true_test, pred_test, average='macro')
test_auc = roc_auc_score(true_test, prob_test[:,1])

print(test_micro, test_macro, test_auc)

0.7733333333333333 0.6950490313322172 0.9276923076923077


In [41]:
text_emb = {}

for _i, i in enumerate(indices):
    text_emb[i] = embs[_i]

In [13]:
def cos(a, b):
    cos_sim = np.dot(a, b)/(norm(a)*norm(b))
    return cos_sim

In [25]:
system_prompt = f"Your job is to act as a professional healthcare forecaster. You will be given a healthcare summary of the past 20 weeks. Based on this information, your task is to predict whether the ratio of mortality from Influenza or Pneumonia to the total number of death will exceed the average threshold in the comming week."

print(system_prompt)

Your job is to act as a professional healthcare forecaster. You will be given a healthcare summary of the past 20 weeks. Based on this information, your task is to predict whether the ratio of mortality from Influenza or Pneumonia to the total number of death will exceed the average threshold in the comming week.


In [26]:
k = 5

In [27]:
client = OpenAI(api_key='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')

In [42]:
random.seed(2024)

for _i in idx_test:
    i = indices[_i]
    if os.path.exists(f'chatgpt_predict_GEON4/k{k}_{i}_{indicator}_ref.txt'):
        print('Exists')
        #continue
    
    today = data[i][0]
    print(today)
    
    user_prompt = f"Your task is to predict whether the ratio of mortality from Influenza or Pneumonia to the total number of death will:\n"
    user_prompt += "(1) Exceed its average of 7.84%\n(2) Not exceed its average of 7.84%\n"
    user_prompt += "in the coming week. "
    user_prompt += f"First, review the following {k} examples of healthcare summaries and their outcomes so that you can refer to when making predictions.\n\n"
    
    sim = [-cos(text_emb[i], text_emb[indices[ii]]) for ii in idx_train]
    _j_list = np.argsort(sim)
    
    #random.shuffle(_j_list)
    
    #_j_list = _j_list[:k]
    #random.shuffle(_j_list)
    
    references = {}
    for _k in range(k):
        _j = _j_list[_k]
        j = indices[_j]
        
        _text = texts[j].replace('\n\n', ' ')
        user_prompt += f"Summary #{_k+1}: {_text}"
        
        if labels[_j] == 0:
            #user_prompt += f"\nOutcome #{_k+1}: {indicator2name[indicator]} price decreased by more than 1%\n\n"
            user_prompt += f"\nOutcome #{_k+1}: Did not exceed 7.84%\n\n"
            references[_k+1] = 0
        elif labels[_j] == 1:
            #user_prompt += f"\nOutcome #{_k+1}: {indicator2name[indicator]} price changed minimally between -1% and 1%\n\n"
            user_prompt += f"\nOutcome #{_k+1}: Exceeded 7.84%\n\n"
            references[_k+1] = 1
    
    user_prompt += f"The healthcare situation of the last {window_size} weeks is summarized as follows:\n\n"
    
    _text = texts[i].replace('\n\n', ' ')
    user_prompt += f"Summary: {_text}\n"
    user_prompt += f"Outcome:\n\n"
    
    #user_prompt += f"Based on the understanding of the provided examples, predict the outcome of the current financial summary. "
    user_prompt += f"Refer to the provided examples and predict the outcome of the current healthcare summary. "
    user_prompt += "Respond with either \'exceed\' or \'not exceed\'. "
    #user_prompt += "Also, which summary was most relevant to the current situation? Respond in the format 'Summary X'. "
    user_prompt += "Response should not include other terms."
    
    #print(user_prompt)
    #break
    
    response = client.chat.completions.create(
        model="gpt-4-1106-preview",
        messages=[
        {
          "role": "system",
          "content": system_prompt
        },
        {
          "role": "user",
          "content": user_prompt
        }
        ],
        temperature=0.7,
        max_tokens=2048,
        top_p=1
    )

    text = response.choices[0].message.content
    print(text, labels[_i], '\n')
    
    with open(f'chatgpt_predict_GEON4/k{k}_{i}_{indicator}_ref.txt', 'w') as f:
        f.write(f'{text}')

Exists
2021-22 40
not exceed False 

Exists
2021-22 41
not exceed False 

Exists
2021-22 42
not exceed False 

Exists
2021-22 43
not exceed False 

Exists
2021-22 44
not exceed False 

Exists
2021-22 45
not exceed False 

Exists
2021-22 46
not exceed False 

Exists
2021-22 47
not exceed False 

Exists
2021-22 48
not exceed False 

Exists
2021-22 49
not exceed False 

Exists
2021-22 50
not exceed False 

Exists
2021-22 51
not exceed False 

Exists
2021-22 52
not exceed False 

Exists
2022-23 01
not exceed False 

Exists
2022-23 02
not exceed False 

Exists
2022-23 03
not exceed False 

Exists
2022-23 04
not exceed False 

Exists
2022-23 05


KeyboardInterrupt: 

In [43]:
preds, trues, probs = [], [], []
print(indicator, k)
for _i in idx_test:
    i = indices[_i]
    with open(f'chatgpt_predict_GEON4/k{k}_{i}_{indicator}_ref.txt', 'r') as f:
        pred = f.read()
    if 'not' in pred or 'Not' in pred:
        preds.append(0)
        probs.append([1., 0.])
    else:
        preds.append(1)
        probs.append([0., 1.])
    trues.append(labels[_i])
    
probs = np.array(probs)
f1_micro = f1_score(trues, preds, average='micro')
f1_macro = f1_score(trues, preds, average='macro')
f1_micro, f1_macro

mortality 5


(0.9733333333333334, 0.9467329545454545)

In [44]:
f1_micro = f1_score(trues_lm, preds_lm, average='micro')
f1_macro = f1_score(trues_lm, preds_lm, average='macro')
f1_micro, f1_macro

(0.8133333333333334, 0.7337728194726167)

In [45]:
for lmbda in np.arange(0, 1.01, 0.1):
    probs_comb = (lmbda * probs) + ((1 - lmbda) * probs_lm)
    #probs_comb = (lmbda * probs) + ((1 - lmbda) * prob_test)
    preds = np.argmax(probs_comb, 1)

    f1_micro = f1_score(trues, preds, average='micro')
    f1_macro = f1_score(trues, preds, average='macro')
    auc = roc_auc_score(trues, probs_comb[:,1])
    print(f1_macro, auc, (f1_macro+auc)/2)

0.7337728194726167 0.96 0.8468864097363084
0.7474747474747475 1.0 0.8737373737373737
0.7916666666666666 1.0 0.8958333333333333
0.8243559718969555 1.0 0.9121779859484778
0.9467329545454545 1.0 0.9733664772727273
0.9467329545454545 1.0 0.9733664772727273
0.9467329545454545 1.0 0.9733664772727273
0.9467329545454545 1.0 0.9733664772727273
0.9467329545454545 1.0 0.9733664772727273
0.9467329545454545 1.0 0.9733664772727273
0.9467329545454545 0.9846153846153847 0.9656741695804196


In [44]:
with open('../../Time-Series-Library/results/weather_ny_PatchTST_seed2020_el1_dim64_lr0.001_do0.3_dff256_nh8.pkl', 'rb') as f:
    true_val, pred_val, prob_val, true_test, pred_test, prob_test = pkl.load(f)

In [91]:
with open('../../Time-Series-Library/results/weather_la_PatchTST_seed2020_el1_dim128_lr0.001_do0.1_dff256_nh8.pkl', 'rb') as f:
    true_val, pred_val, prob_val, true_test, pred_test, prob_test = pkl.load(f)

In [208]:
f1_micro = f1_score(true_test, pred_test, average='micro')
f1_macro = f1_score(true_test, pred_test, average='macro')
f1_micro, f1_macro, len(trues)

(0.7819148936170213, 0.6186989859015583, 376)

In [108]:
trues[:10], true_test[:10]

([False, False, False, False, False, False, False, False, False, False],
 array([False, False,  True, False,  True, False,  True,  True, False,
        False]))

In [100]:
prob_test[:10]

array([[0.6181944 , 0.3818056 ],
       [0.35442752, 0.64557254],
       [0.26308966, 0.73691034],
       [0.46852008, 0.5314799 ],
       [0.24967518, 0.7503248 ],
       [0.39745495, 0.602545  ],
       [0.5422722 , 0.45772782],
       [0.28258508, 0.7174149 ],
       [0.83450246, 0.16549757],
       [0.76174814, 0.23825192]], dtype=float32)

In [22]:
k = 10

In [279]:
from collections import Counter

for k in [1, 3, 5, 7, 9]:
    preds, trues = [], []
    
    for _i in tqdm(idx_test):
        i = indices[_i]

        sim = [-cos(text_emb[i], text_emb[indices[ii]]) for ii in idx_train]
        _j_list = np.argsort(sim)

        votes = []
        for _k in range(k):
            _j = _j_list[_k]
            votes.append(labels[_j])
        
        counter = Counter(votes)
        pred, _ = counter.most_common(1)[0]
        
        preds.append(pred)
        trues.append(labels[_i])

    f1_micro = f1_score(trues, preds, average='micro')
    f1_macro = f1_score(trues, preds, average='macro')
    print(f1_micro, f1_macro, np.mean(preds))

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 247/247 [00:09<00:00, 26.99it/s]


0.6477732793522267 0.3292464678178964 1.0161943319838056


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 247/247 [00:09<00:00, 27.09it/s]


0.6842105263157895 0.31673980558361897 0.9676113360323887


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 247/247 [00:09<00:00, 26.40it/s]


0.7004048582995951 0.3228280173791875 0.97165991902834


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 247/247 [00:09<00:00, 26.88it/s]


0.7773279352226721 0.37431637244721355 1.0


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 247/247 [00:09<00:00, 26.19it/s]

0.8016194331983806 0.3656840310494907 0.979757085020243





## VALIDATION

In [66]:
k = 5

In [67]:
def cos(a, b):
    cos_sim = np.dot(a, b)/(norm(a)*norm(b))
    return cos_sim

In [77]:
random.seed(2024)

for _i in idx_test:
    i = indices[_i]
    if os.path.exists(f'chatgpt_predict_GEON4/k{k}_{i}_search.txt'):
        print('Exists')
        #continue
    
    today = data[i][0]
    print(today)
    
    user_prompt = f"Your task is to predict whether the S&P 500 price will:\n"
    user_prompt += f"(1) Decrease: decrease by more than 1%\n(2) Increase: increase by more than 1%\n(3) Neutral: change minimally, between -1% to 1%\nin the next market day. "
    user_prompt += f"First, review the following {k} examples of financial summaries and outcomes so that you can refer to when making predictions.\n\n"
    
    sim = [-cos(text_emb[i], text_emb[indices[ii]]) for ii in idx_train]
    _j_list = np.argsort(sim)
    
    outcomes = {}
    for _k in range(k):
        _j = _j_list[_k]
        j = indices[_j]
        
        user_prompt += f"Summary #{_k+1}: {texts[j]}"
        
        if labels[_j] == 0:
            user_prompt += f"\nOutcome #{_k+1}: S&P 500 price decreased by more than 1%\n\n"
            outcomes[_k+1] = 'decrease'
        elif labels[_j] == 1:
            user_prompt += f"\nOutcome #{_k+1}: S&P 500 price changed minimally between -1% to 1%\n\n"
            outcomes[_k+1] = 'neutral'
        else:
            user_prompt += f"\nOutcome #{_k+1}: S&P 500 price increased by more than 1%\n\n"
            outcomes[_k+1] = 'increase'
        
    user_prompt += f"The current financial situation is summarized as follows:\n\n"
    user_prompt += f"Summary: {texts[i]}\n"
    user_prompt += "Outcome:\n\n"
    
    user_prompt += f"From the {k} summaries, identify the weather condition most relevant to the current financial situation. "
    user_prompt += "What was the outcome of the identified summary? "
    user_prompt += "Respond with either \'decrease\', \'increase\', or \'neutral\'. "
    user_prompt += "Response should not include other terms."
    
    #print(user_prompt)
    #break
    
    response = client.chat.completions.create(
        model="gpt-4-1106-preview",
        messages=[
        {
          "role": "system",
          "content": system_prompt
        },
        {
          "role": "user",
          "content": user_prompt
        }
        ],
        temperature=0.7,
        max_tokens=2048,
        top_p=1
    )

    text = response.choices[0].message.content
    
    print(text, labels[_i], '\n')
    
    with open(f'chatgpt_predict_GEON4/k{k}_{i}_search.txt', 'w') as f:
        f.write(text)

Exists
2022-12-06 00:00:00
decrease 0 

Exists
2022-12-07 00:00:00
decrease 2 

Exists
2022-12-08 00:00:00
decrease 1 

Exists
2022-12-09 00:00:00
neutral 1 

Exists
2022-12-12 00:00:00
neutral 2 

Exists
2022-12-13 00:00:00
neutral 1 

Exists
2022-12-14 00:00:00
neutral 1 

Exists
2022-12-15 00:00:00
neutral 1 

Exists
2022-12-16 00:00:00
neutral 0 

Exists
2022-12-19 00:00:00
neutral 1 

Exists
2022-12-20 00:00:00
neutral 2 

Exists
2022-12-21 00:00:00
neutral 2 

Exists
2022-12-22 00:00:00
neutral 1 

Exists
2022-12-23 00:00:00
neutral 1 

Exists
2022-12-27 00:00:00
neutral 2 

Exists
2022-12-28 00:00:00
neutral 1 

Exists
2022-12-29 00:00:00
neutral 0 

Exists
2022-12-30 00:00:00
neutral 2 

Exists
2023-01-03 00:00:00
neutral 2 

Exists
2023-01-04 00:00:00
neutral 2 

Exists
2023-01-05 00:00:00
neutral 0 

Exists
2023-01-06 00:00:00
neutral 1 

Exists
2023-01-09 00:00:00
neutral 2 

Exists
2023-01-10 00:00:00
neutral 0 

Exists
2023-01-11 00:00:00
neutral 1 

Exists
2023-01-12 00:0

In [78]:
pred2bool = {'decrease': 0, 'neutral': 1, 'increase': 2, 'Neutral': 1, 'Decrease': 0, 'Increase': 2, 'Decreased': 0}

preds, trues, probs = [], [], []

print(k)
for _i in idx_test:
    i = indices[_i]
    with open(f'chatgpt_predict_GEON4/k{k}_{i}_search.txt', 'r') as f:
        pred = f.read()
    try:
        preds.append(pred2bool[pred])
    except:
        if 'Neutral' in pred:
            preds.append(1)
        elif 'neutral' in pred:
            preds.append(1)
        elif 'increase' in pred:
            preds.append(2)
        elif 'Increase' in pred:
            preds.append(2)
        elif 'decrease' in pred:
            preds.append(0)
        elif 'Decreased' in pred:
            preds.append(0)
        else:
            print(pred, 'Neutral' in pred)
            preds.append(1)
    
    if preds[-1] == 0:
        probs.append([1., 0., 0.])
    elif preds[-1] == 1:
        probs.append([0., 1., 0.])
    else:
        probs.append([0., 0., 1.])
        
    trues.append(labels[_i])
    
probs = np.array(probs)
f1_micro = f1_score(trues, preds, average='micro')
f1_macro = f1_score(trues, preds, average='macro')
f1_micro, f1_macro

5


(0.562753036437247, 0.32593443162263386)

In [76]:
for lmbda in np.arange(0, 1.01, 0.1):
    #probs_comb = (lmbda * probs_zero) + ((1 - lmbda) * probs_lm)
    probs_comb = (lmbda * probs) + ((1 - lmbda) * probs_lm)
    #probs_comb = (lmbda * (probs + probs_zero)/2) + ((1 - lmbda) * probs_lm)
    preds = np.argmax(probs_comb, 1)

    f1_micro = f1_score(trues, preds, average='micro')
    f1_macro = f1_score(trues, preds, average='macro')
    print(f1_micro, f1_macro)

0.5910931174089069 0.39013383131030194
0.5910931174089069 0.3700378857477841
0.5910931174089069 0.3882697878702925
0.5910931174089069 0.3922390586740511
0.5789473684210527 0.3937442508643529
0.5789473684210527 0.3937442508643529
0.5789473684210527 0.3937442508643529
0.5789473684210527 0.3937442508643529
0.5789473684210527 0.3937442508643529
0.5789473684210527 0.3937442508643529
0.5789473684210527 0.3937442508643529


In [86]:
best_micro, best_macro, best_config = -1, -1, None
for lmbda1 in np.arange(0, 1.01, 0.1):
    for lmbda2 in np.arange(0, 1.01, 0.1):
        if lmbda1 + lmbda2 > 1:
            continue
        #probs_comb = (lmbda * probs_zero) + ((1 - lmbda) * probs_lm)
        #probs_comb = (lmbda * probs) + ((1 - lmbda) * probs_time)
        probs_comb = (lmbda1 * probs_zero) + (lmbda2 * probs) + ((1 - lmbda1 - lmbda2) * probs_lm)
        preds = np.argmax(probs_comb, 1)

        f1_micro = f1_score(trues, preds, average='micro')
        f1_macro = f1_score(trues, preds, average='macro')
        
        if f1_macro > best_macro:
            best_micro = f1_micro
            best_macro = f1_macro
            best_config = (lmbda1, lmbda2)

print(best_config)
print(best_micro, best_macro)

(0.0, 0.1)
0.6558704453441295 0.39433014051293397


## Pairwise Ranking

In [308]:
from collections import defaultdict

random.seed(2024)

for _i in idx_test:
    i = indices[_i]
    if os.path.exists(f'chatgpt_predict_ret4/k{k}_{i}_pair.txt'):
        print('Exists')
        #continue
    
    today = data[i][0]
    print(today)
    
    sim = [-cos(text_emb[i], text_emb[indices[ii]]) for ii in idx_train]
    _j_list = np.argsort(sim)
    
    scores = defaultdict(int)
        
    for _k1 in range(k):
        for _k2 in range(_k1+1, k):
            
            k1 = _j_list[_k1]
            k2 = _j_list[_k2]
            
            k1_index = indices[k1]
            k2_index = indices[k2]
    
            user_prompt = f"Given the financial summary of the last 20 market days:\n\n"
            user_prompt += f"Summary: {texts[i]}\n\n"
            user_prompt += "which of the following two summaries is more similar to the above summary?\n\n"

            user_prompt += f"Summary A: {texts[k1_index]}\n\n"
            user_prompt += f"Summary B: {texts[k2_index]}\n\n"
            
            user_prompt += 'Respond with either \'Summary A\' or \'Summary B\'. Response should not include other terms.'
            
            #print(rains[k1+1], rains[k2+1])
            
            response = client.chat.completions.create(
            model="gpt-4-1106-preview",
                messages=[
                {
                  "role": "user",
                  "content": user_prompt
                }
                ],
                temperature=0.7,
                max_tokens=2048,
                top_p=1
            )

            text = response.choices[0].message.content
            #print(text, rains[_i+1], '\n')
            
            if 'A' in text:
                scores[_k1] += 1
            elif 'B' in text:
                scores[_k2] += 1
                
    
    with open(f'chatgpt_predict_ret4/k{k}_{i}_pair.txt', 'w') as f:
        strs = []
        for _k1 in range(k):
            k1 = _j_list[_k1]
            k1_index = indices[k1]
            score_k = scores[_k1]
            outcome = labels[k1]
            strs.append(f'{k1},{score_k},{outcome}')
        f.write(' '.join(strs) + '\n')
        print(' '.join(strs))

    
    best_k = max(scores, key=scores.get)
    best_k = _j_list[best_k]
    best_k_index = indices[best_k]
    print('Answer:', labels[_i])
    print('Prediction:', labels[best_k], '\n')

2022-12-06 00:00:00
714,4,0 720,3,1 502,1,1 562,0,1 584,2,1
Answer: 0
Prediction: 0 

2022-12-07 00:00:00
290,3,2 439,1,1 268,0,0 75,4,1 178,2,1
Answer: 2
Prediction: 1 

2022-12-08 00:00:00
412,1,0 355,2,2 137,3,2 447,0,1 423,4,2
Answer: 1
Prediction: 2 

2022-12-09 00:00:00
72,3,1 456,1,1 75,4,1 590,0,1 355,2,2
Answer: 1
Prediction: 1 

2022-12-12 00:00:00
544,2,1 541,4,1 529,3,2 543,0,1 635,1,1
Answer: 2
Prediction: 1 

2022-12-13 00:00:00
683,4,1 312,0,1 589,3,1 89,1,1 604,2,1
Answer: 1
Prediction: 1 

2022-12-14 00:00:00
720,3,1 426,0,1 683,4,1 273,2,0 328,1,2
Answer: 1
Prediction: 1 

2022-12-15 00:00:00
16,4,1 427,0,1 232,3,1 154,1,1 391,2,1
Answer: 1
Prediction: 1 

2022-12-16 00:00:00
210,2,1 202,2,1 461,0,1 32,4,1 600,2,0
Answer: 0
Prediction: 1 

2022-12-19 00:00:00
417,0,2 152,4,1 309,1,1 612,3,1 320,2,2
Answer: 1
Prediction: 1 

2022-12-20 00:00:00
426,2,1 319,3,2 731,4,1 448,1,2 502,0,1
Answer: 2
Prediction: 1 

2022-12-21 00:00:00
394,1,2 300,3,2 90,2,1 367,4,1 399,0,1
A

In [328]:
preds, trues = [], []

for gamma in np.arange(0, 1.01, 0.1):

    for _i in idx_test:
        i = indices[_i]
        with open(f'chatgpt_predict_ret4/k{k}_{i}_pair.txt', 'r') as f:
            _scores = f.read()

        scores = {}
        for j, _score in enumerate(_scores.split()):
            index, score, _label = [int(x) for x in _score.split(',')]
            scores[j] = (_label, score)

        with open(f'chatgpt_predict_text/{i}.txt', 'r') as f:
            pred = f.read()

        if pred2bool[pred] == 0:
            label_scores_text = [1.0, 0.0, 0.0]
        elif pred2bool[pred] == 1:
            label_scores_text = [0.0, 1.0, 0.0]
        else:
            label_scores_text = [0.0, 0.0, 1.0]
        label_scores_text = np.array(label_scores_text)

        """
        # max
        best_score, best_label = -1, None
        for j in range(k):
            _label, score = scores[j]
            if score > best_score:
                best_score, best_label = score, _label
        preds.append(best_label)
        """


        # sum
        label2score = defaultdict(int)
        for j in range(k):
            _label, score = scores[j]
            label2score[_label] += score
        best_label = max(label2score, key=label2score.get)
        #preds.append(best_label)

        label_scores = np.array([float(label2score[l]) for l in range(3)])
        label_scores = label_scores / sum(label_scores)

        label_scores = (gamma * label_scores) + ((1 - gamma) * label_scores_text)

        preds.append(np.argmax(label_scores))

        trues.append(labels[_i])

    f1_micro = f1_score(trues, preds, average='micro')
    f1_macro = f1_score(trues, preds, average='macro')
    print(f1_micro, f1_macro)

0.6882591093117408 0.3434848484848485
0.6882591093117408 0.3434848484848485
0.6882591093117408 0.3434848484848485
0.6882591093117408 0.3434848484848485
0.6882591093117408 0.3434848484848485
0.6909581646423751 0.3443144243144243
0.6963562753036437 0.3492029158256098
0.7009109311740891 0.3535331653854706
0.7031039136302294 0.35643058267870115
0.7032388663967611 0.3576897012362202
0.7011409642988591 0.35749225883004687


In [321]:
pred2bool = {'decrease': 0, 'neutral': 1, 'increase': 2, 'Neutral': 1}

preds, trues = [], []

for _i in idx_test:
    i = indices[_i]
    with open(f'chatgpt_predict_text/{i}.txt', 'r') as f:
        pred = f.read()
    preds.append(pred2bool[pred])
    trues.append(labels[_i])
    
f1_micro = f1_score(trues, preds, average='micro')
f1_macro = f1_score(trues, preds, average='macro')
f1_micro, f1_macro

(0.6882591093117408, 0.3434848484848485)

In [101]:
pred2bool = {'decrease': 0, 'neutral': 1, 'increase': 2, 'Neutral': 1, 'Decrease': 0, 'Increase': 2, 'Decreased': 0}

preds, trues, probs_text = [], [], []

print(k)
for _i in idx_test:
    i = indices[_i]
    with open(f'chatgpt_predict_text/{i}.txt', 'r') as f:
        pred = f.read()
        
    try:
        preds.append(pred2bool[pred])
    except:
        if 'Neutral' in pred:
            preds.append(1)
        elif 'increase' in pred:
            preds.append(2)
        else:
            print(pred, 'Neutral' in pred)
    trues.append(labels[_i])
    
probs_text = np.array(probs_text)
f1_micro = f1_score(trues, preds, average='micro')
f1_macro = f1_score(trues, preds, average='macro')
f1_micro, f1_macro, len(trues)

5


(0.6882591093117408, 0.3434848484848485, 247)

In [102]:
for lmbda in np.arange(0, 1.01, 0.05):
    p = lmbda * probs + (1 - lmbda) * probs_text
    preds = np.argmax(p, 1)
    f1_micro = f1_score(trues, preds, average='micro')
    f1_macro = f1_score(trues, preds, average='macro')
    f1_micro, f1_macro, len(trues)
    print(f1_micro, f1_macro)

NameError: name 'probs' is not defined

In [81]:
text2bool = {'True': True, 'False': False, 'False.': False}

validate = {}
for _i in idx_train:
    i = indices[_i]
    with open(f'chatgpt_validate/{city}_{i}.txt', 'r') as f:
        validate[i] = text2bool[f.read()]

In [91]:
random.seed(2024)

system_prompt = f"Your job is to act as a professional weather forecaster. You will be given a summary of the weather from the past 24 hours. Based on this information, your task is to predict whether it will rain in the next 24 hours."
print(system_prompt, '\n')

for _i in idx_test:
    i = indices[_i]
    if os.path.exists(f'chatgpt_predict_text_rag-val/{city}_k{k}_{i}.txt'):
        print('Exists')
        continue
    
    today = format_date(dates[i])
    print(today)
    
    user_prompt = f"Your task is to predict whether it will rain or not in {city_full_name[city]} in the next {window_size} hours."
    user_prompt += f"The weather of the past 24 hours is summarized as follows:\n\n"
    user_prompt += f"{texts[i]}\n\n"
    user_prompt += f"Below are examples of weather summaries and outcomes that you can refer to when making predictions.\n\n"
    
    #sim = [-cos(embs_text[_i], embs_text[ii]) for ii in idx_train]
    
    _j_list = np.argsort(-sim[_i][:num_train-1])
    
    #_j_list = np.argsort(sim)
    
    cnt, _k = 0, 0
    while cnt < k:
        _j = _j_list[_k]
        j = indices[_j]
        _k += 1
        
        if not validate[j]:
            continue
        
        cnt += 1
        user_prompt += f"Summary #{cnt}: {texts[j]}"
        
        if rains[_j+1]:
            user_prompt += f"\nOutcome #{cnt}: It rained.\n\n"
        else:
            user_prompt += f"\nOutcome #{cnt}: It did not rain.\n\n"
    
    
    #user_prompt += f"Carefully refer to the examples and respond with either \'rain\' or \'not rain\'. Do not provide any other details. "
    user_prompt += f"Identify the example that most closely matches the current weather situation in terms of rain over the next 24 hours. "
    #user_prompt += "Compare the current weather situation with the given examples, and use them collectively to predict whether it will rain in the next 24 hours. "
    user_prompt += "Respond with either 'rain' or 'not rain'. Your response should not include other terms."
    #user_prompt += '\nAlso provide the rationale behind your prediction, focusing on how you used the examples when making predictions.'
    #print(user_prompt)
    #break
    
    response = client.chat.completions.create(
        model="gpt-4-1106-preview",
        messages=[
        {
          "role": "system",
          "content": system_prompt
        },
        {
          "role": "user",
          "content": user_prompt
        }
        ],
        temperature=0.7,
        max_tokens=2048,
        top_p=1
    )

    text = response.choices[0].message.content
    print(text, '\n')

    with open(f'chatgpt_predict_text_rag-val/{city}_k{k}_{i}.txt', 'w') as f:
        f.write(text)
    
    #break

Your job is to act as a professional weather forecaster. You will be given a summary of the weather from the past 24 hours. Based on this information, your task is to predict whether it will rain in the next 24 hours. 

Exists
Exists
Exists
Exists
Exists
Exists
Exists
Exists
Exists
Exists
Exists
November 28th
rain 

November 29th
rain 

November 30th
rain 

December 1st
not rain 

December 2nd
not rain 

December 3rd
not rain 

December 4th
not rain 

December 5th
rain 

December 6th
not rain 

December 7th
not rain 

December 8th
rain 

December 9th
not rain 

December 10th
not rain 

December 11th
not rain 

December 12th
rain 

December 13th
not rain 

December 14th
not rain 

December 15th
not rain 

December 16th
not rain 

December 17th
rain 

December 18th
not rain 

December 19th
not rain 

December 20th
rain 

December 21st
not rain 

December 22nd
rain 

December 23rd
not rain 

December 24th
rain 

December 25th
not rain 

December 26th
rain 

December 27th
rain 

December 2

In [90]:
max(validate.keys())

27072

In [92]:
pred2bool = {'rain': True, 'not rain': False, 'Not rain.': False, 'Rain': True, 'Not rain': False}

preds, trues = [], []

for _i in idx_test:
    i = indices[_i]
    with open(f'chatgpt_predict_text_rag-val/{city}_k{k}_{i}.txt', 'r') as f:
        pred = f.read()
    try:
        preds.append(pred2bool[pred])
    except:
        if 'not rain' in pred:
            preds.append(False)
        else:
            preds.append(True)
    trues.append(rains[_i+1])
    
f1_micro = f1_score(trues, preds, average='micro')
f1_macro = f1_score(trues, preds, average='macro')
f1_micro, f1_macro

(0.6808510638297872, 0.6185064935064934)

In [63]:
sum(preds)/len(preds), sum(trues)/len(trues)

(0.34308510638297873, 0.18882978723404256)

In [80]:
for i in idx_test:
    s = indices[i]
    seq_x_time = time_series[s:s+seq_len]
    seq_y = rains[i+1]
    seq_x_text = texts[i]

November 18th
Prediction: Rain

Rationale: The recent weather summary for New York City exhibits a pattern of significant fluctuations in humidity, with a period of complete saturation and a sharp rise towards the evening, which could be indicative of moisture-laden air and the potential for precipitation. Additionally, the noted dip in air pressure later in the day is often associated with an approaching weather system that could result in unsettled weather, such as rain. The shift in wind direction from northwest to southerly in the evening can bring warmer, moister air from the south, which when combined with the existing conditions, increases the likelihood of precipitation. This assessment is based on comparing the provided weather summaries and outcomes, where stable pressure and humidity levels tended to indicate no rain, while fluctuations in these variables, especially increases in humidity and decreases in pressure, pointed towards the possibility of rain. 

