In [2]:
import os
os.environ["OPENAI_API_KEY"] = "OPENAI_API_KEY"

In [3]:
from openai import OpenAI, APITimeoutError, RateLimitError
import pandas as pd, numpy as np, re, time, pickle, os, random

client = OpenAI()

In [4]:
df = pd.read_parquet("prices_feat.parquet")

TICKERS   = ["AAPL","MSFT","AMZN","GOOG","META"]
LOOKBACK  = 30
feat_cols = ["momentum_rsi", "trend_macd", "trend_sma_fast",
             "volatility_atr", "volume_obv"]

TRAIN_DF  = df[df["Date"] <= "2023-12-31"].reset_index(drop=True)
TEST_DF   = df[df["Date"].dt.year == 2024].reset_index(drop=True)

def row_to_line(date_str, row):
    return date_str + " " + ", ".join(f"{k}:{row[k]:.2g}" for k in feat_cols)

In [5]:
def make_sample(idx):
    cur  = TRAIN_DF.iloc[idx]
    hist = TRAIN_DF.iloc[idx-LOOKBACK:idx]
    prompt = (
        f"Below are {LOOKBACK} consecutive trading days of technical indicators for {cur['Ticker']}.\n"
        "Predict the percentage return for the NEXT trading day."
    )
    text = "\n".join(row_to_line(d.strftime('%Y-%m-%d'), r)
                     for d, r in zip(hist["Date"], hist.to_dict('records')))
    answer = f"{cur['return_fwd']:.5f}"
    return {"prompt": prompt + "\n\n" + text, "answer": answer}

samples = []
for tk in TICKERS:                          
    cand = TRAIN_DF[TRAIN_DF["Ticker"] == tk].index
    idx  = random.choice(cand[cand > LOOKBACK])
    samples.append(make_sample(idx))

FEWSHOT_MSGS = [{"role":"system","content":"You output ONLY the number."}]
for ex in samples:
    FEWSHOT_MSGS.append({"role":"user","content":ex["prompt"]})
    FEWSHOT_MSGS.append({"role":"assistant","content":ex["answer"]})

print("Few-shot examples ready:", len(samples))

Few-shot examples ready: 5


In [6]:
all_idx  = np.arange(LOOKBACK, len(TEST_DF))      
idx_list = sorted(np.random.choice(
    all_idx,
    size=int(len(all_idx) * 0.20),  
    replace=False
))

In [7]:
MODEL_NAME = "gpt-3.5-turbo-0125"
CACHE_FILE = "fewshot_preds.pkl"
SLEEP_SEC  = 1.3

few_preds = pickle.load(open(CACHE_FILE,"rb")) if os.path.exists(CACHE_FILE) else []
done_idx  = {i for i,_ in few_preds}

for i in idx_list:
    if i in done_idx:
        continue
    cur  = TEST_DF.iloc[i]
    hist = TEST_DF.iloc[i-LOOKBACK:i]
    if (hist["Ticker"] != cur["Ticker"]).any():
        continue

    data_block = "\n".join(row_to_line(d.strftime('%Y-%m-%d'), r)
                           for d, r in zip(hist["Date"], hist.to_dict('records')))
    user_msg = {
        "role":"user",
        "content": f"Below are {LOOKBACK} consecutive trading days of technical indicators for {cur['Ticker']}.\n"
                   "Predict the percentage return for the NEXT trading day.\n\n" + data_block
    }

    try:
        resp = client.chat.completions.create(
            model      = MODEL_NAME,
            messages   = FEWSHOT_MSGS + [user_msg],
            temperature= 0,
            timeout    = 30
        )
        txt  = resp.choices[0].message.content
        y_hat = float(re.search(r"[-+]?\d*\.?\d+(?:[eE][-+]?\d+)?", txt).group())
    except (RateLimitError, APITimeoutError) as e:
        print("API error:", e)
        time.sleep(10)
        continue

    few_preds.append((i, y_hat))
    if len(few_preds) % 50 == 0:
        pickle.dump(few_preds, open(CACHE_FILE,"wb"))
        print(f"{len(few_preds)} / {len(TEST_DF)-LOOKBACK} done")
    time.sleep(SLEEP_SEC)

pickle.dump(few_preds, open(CACHE_FILE,"wb"))
print("Few-shot inference finished:", len(few_preds))

50 / 1220 done
100 / 1220 done
150 / 1220 done
200 / 1220 done
Few-shot inference finished: 219


In [9]:
def daily_sharpe(sig, returns, dates):
    dfp = pd.DataFrame({"Date": dates, "sig": sig, "ret": returns})
    daily = (dfp.groupby("Date", group_keys=False)
                  .apply(lambda g: g.loc[g.sig==1,"ret"].mean())
                  .dropna())
    return daily.mean() / daily.std(ddof=1) * np.sqrt(252)

idx, y_hat = zip(*few_preds)
y_pred = np.array(y_hat)
y_true = TEST_DF.loc[list(idx), "return_fwd"].values
dates  = TEST_DF.loc[list(idx), "Date"]

mse  = np.mean((y_true - y_pred)**2)
rmse = np.sqrt(mse)
sig  = np.sign(y_pred); sig[sig==0] = -1
sharpe = daily_sharpe(sig, y_true, dates)

print("\n=== Few-shot LLM · Test-2024 ===")
print({"MSE": round(mse,6),
       "RMSE": round(rmse,6),
       "Sharpe": round(sharpe,3)})


=== Few-shot LLM · Test-2024 ===
{'MSE': 0.000322, 'RMSE': 0.017941, 'Sharpe': 0.13}


  daily = (dfp.groupby("Date", group_keys=False)
