In [None]:
import os
import numpy as np
import pickle as pkl
from openai import OpenAI
from sklearn.metrics import f1_score

## Load Dataset

In [None]:
indicator = 'sp500'

indicator2name = {
    'sp500': 'S&P 500',
    'nikkei': 'Nikkei 225'
}

In [None]:
with open('indices.pkl', 'rb') as f:
    indices = pkl.load(f)

with open(f'time_series.pkl', 'rb') as f:
    data = pkl.load(f)
    
with open(os.path.join(f'labels_{indicator}.pkl'), 'rb') as f:
    labels = pkl.load(f)

In [None]:
texts = {}
for i in indices:
    with open(os.path.join('gpt_summary', f'{i}.txt'), 'r') as f:
        text = f.read()
        texts[i] = text

In [None]:
data_size = data.shape[0]
window_size = 20
print(data_size, window_size, len(indices))

In [None]:
data_size = len(indices)

num_train = int(data_size * 0.6)
num_test = int(data_size * 0.2)
num_vali = data_size - num_train - num_test

idx_train = np.arange(num_train)
idx_valid = np.arange(num_train, num_train + num_vali)
idx_test = np.arange(num_train + num_vali, num_train + num_vali + num_test)

In [None]:
with open(f'../../encoder/embeddings/finance_{indicator}.pkl', 'rb') as f:
    embs = pkl.load(f)
    
text_emb = {}
for _i, i in enumerate(indices[:-1]):
    text_emb[i] = embs[_i]

## Prompt GPT 4

In [None]:
def cos(a, b):
    cos_sim = np.dot(a, b)/(norm(a)*norm(b))
    return cos_sim

In [None]:
# Number of in-context examples
k = 5

# OPEN AI API Key
API_KEY = ''

In [None]:
system_prompt = f"Your job is to act as a professional financial forecaster. You will be given a summary of the financial situation of the past 20 market days. Based on this information, your task is to predict whether the {indicator2name[indicator]} price will decrease by more than 1%, increase by more than 1%, or change minimally in the next market day."
print(system_prompt)

In [None]:
random.seed(2024)

for _i in idx_test:
    i = indices[_i]
    
    today = data[i][0]
    print(today)
    
    user_prompt = f"Your task is to predict whether the {indicator2name[indicator]} price will:\n"
    user_prompt += f"(1) Decrease: decrease by more than 1%\n(2) Increase: increase by more than 1%\n(3) Neutral: change minimally, between -1% and 1%\nin the next market day. "
    user_prompt += f"First, review the following {k} examples of financial summaries and {indicator2name[indicator]} outcomes so that you can refer to when making predictions.\n\n"
    
    sim = [-cos(text_emb[i], text_emb[indices[ii]]) for ii in idx_train]
    _j_list = np.argsort(sim)
    
    for _k in range(k):
        _j = _j_list[_k]
        j = indices[_j]
        
        _text = texts[j].replace('\n\n', ' ')
        user_prompt += f"Summary #{_k+1}: {_text}"
        
        if labels[_j] == 0:
            user_prompt += f"\nOutcome #{_k+1}: Decreased\n\n"
        elif labels[_j] == 1:
            user_prompt += f"\nOutcome #{_k+1}: Neutral\n\n"
        else:
            user_prompt += f"\nOutcome #{_k+1}: Increased\n\n"
    
    user_prompt += f"The financial situation of the last {window_size} market days is summarized as follows:\n\n"
    
    _text = texts[i].replace('\n\n', ' ')
    user_prompt += f"Summary: {_text}\n"
    user_prompt += f"Outcome:\n\n"
    
    user_prompt += f"Refer to the provided examples and predict the outcome of the current financial summary. "
    user_prompt += "Respond your prediction with either 'decrease', 'increase' or 'neutral'. "
    user_prompt += "Response should not include other terms."
    
    response = client.chat.completions.create(
        model="gpt-4-1106-preview",
        messages=[
        {
          "role": "system",
          "content": system_prompt
        },
        {
          "role": "user",
          "content": user_prompt
        }
        ],
        temperature=0.7,
        max_tokens=2048,
        top_p=1
    )

    text = response.choices[0].message.content
    
    with open(f'gpt_predict_in-context/k{k}_{i}_{indicator}_ref.txt', 'w') as f:
        f.write(f'{text}')