In [2]:
import os
os.environ["OPENAI_API_KEY"] = "OPENAI_API_KEY"

In [3]:
from openai import OpenAI, APITimeoutError, RateLimitError
import pandas as pd, numpy as np, re, time, pickle, os


client = OpenAI()  

df = pd.read_parquet("prices_feat.parquet")    
LOOKBACK = 30
TEST_DF  = df[df["Date"].dt.year == 2024].reset_index(drop=True)

feat_cols = ["momentum_rsi", "trend_macd", "trend_sma_fast",
             "volatility_atr", "volume_obv"]

In [4]:
def row_to_line(date_str, row):
    return date_str + " " + ", ".join(f"{k}:{row[k]:.2g}" for k in feat_cols)

_float_re = re.compile(r"[-+]?\d*\.?\d+(?:[eE][-+]?\d+)?")
def parse_float(txt, default=0.0):
    m = _float_re.search(txt)
    return float(m.group()) if m else default

In [5]:
all_idx  = np.arange(LOOKBACK, len(TEST_DF))      
idx_list = sorted(np.random.choice(
    all_idx,
    size = int(len(all_idx) * 0.20),          
    replace = False
))

In [6]:
CACHE = "plain_preds.pkl"
preds_plain = pickle.load(open(CACHE,"rb")) if os.path.exists(CACHE) else []
done_idx    = {i for i,_ in preds_plain}

for i in idx_list:
    if i in done_idx:
        continue

    cur  = TEST_DF.iloc[i]
    hist = TEST_DF.iloc[i-LOOKBACK:i]
    if (hist["Ticker"] != cur["Ticker"]).any():
        continue

    prompt = (
        f"You are a quantitative analyst.\n"
        f"Below are {LOOKBACK} consecutive trading days of technical indicators for {cur['Ticker']}.\n"
        "Predict the percentage return for the NEXT trading day. "
        "Respond with a single decimal number such as 0.0035.\n\n" +
        "\n".join(row_to_line(d.strftime('%Y-%m-%d'), r)
                  for d, r in zip(hist["Date"], hist.to_dict('records')))
    )

    try:
        resp = client.chat.completions.create(
            model="gpt-3.5-turbo-0125",
            messages=[{"role":"user","content":prompt}],
            temperature=0,
            timeout=30
        )
        y_hat = parse_float(resp.choices[0].message.content)
    except (RateLimitError, APITimeoutError) as e:
        print("API error：", e)
        time.sleep(10)
        continue

    preds_plain.append((i, y_hat))

    if len(preds_plain) % 50 == 0:
        pickle.dump(preds_plain, open(CACHE,"wb"))
        print(f"Finished {len(preds_plain)} / {len(TEST_DF)-LOOKBACK}")

    time.sleep(1.3)

pickle.dump(preds_plain, open(CACHE,"wb"))
print("plain-prompt finished：", len(preds_plain))

Finished 400 / 1220
Finished 450 / 1220
plain-prompt finished： 494


In [8]:
def daily_sharpe(sig, returns, dates):
    dfp = pd.DataFrame({"Date": dates, "sig": sig, "ret": returns})
    daily = (dfp.groupby("Date", group_keys=False)
                  .apply(lambda g: g.loc[g.sig==1,"ret"].mean())
                  .dropna())
    return daily.mean() / daily.std(ddof=1) * np.sqrt(252)

idx, y_hat = zip(*preds_plain)
y_pred = np.array(y_hat)
y_true = TEST_DF.loc[list(idx), "return_fwd"].values
dates  = TEST_DF.loc[list(idx), "Date"]

mse  = np.mean((y_true - y_pred)**2)
rmse = np.sqrt(mse)
sig  = np.sign(y_pred); sig[sig==0] = -1
sharpe = daily_sharpe(sig, y_true, dates)

print("\n=== Plain-Prompt LLM • Test-2024 ===")
print({"MSE": round(mse,6),
       "RMSE": round(rmse,6),
       "Sharpe": round(sharpe,3)})


=== Plain-Prompt LLM • Test-2024 ===
{'MSE': 0.000525, 'RMSE': 0.022919, 'Sharpe': 0.808}


  daily = (dfp.groupby("Date", group_keys=False)
