# Experiment 1 - Run Trading Strategies By Prompt Version and Risk Configuration

This notebook runs every prompt configuration created during this research.

In [None]:
import json
import os
import sys
import warnings
from pathlib import Path
from typing import Any

import csv
import pandas as pd
from dotenv import load_dotenv
from openai import OpenAI

warnings.filterwarnings("ignore")


if False:
    %pip install python-dotenv==1.0.1
    %pip install tqdm==4.66.5
    %pip install openai==1.58.1

%load_ext dotenv
load_dotenv(override=False)

PROJECT_ROOT = Path.cwd()

extra_module_paths = [(PROJECT_ROOT / "utils").resolve()]
for candidate in extra_module_paths:
    candidate_str = str(candidate)
    if candidate_str not in sys.path:
        sys.path.insert(0, candidate_str)

DATA_PATH = Path(os.environ.get("DATA_PATH", PROJECT_ROOT / "data"))
FUNDAMENTALS_PATH = Path(os.environ["FUNDAMENTALS_PATH"])
LLM_PROMPTS_PATH = DATA_PATH / "prompts"
HISTORIC_PATH = DATA_PATH / "historic"
MACRO_PATH = DATA_PATH / "macro"
OPTIONS_PATH = DATA_PATH / "options"
LLM_OUTPUT_PATH = DATA_PATH / "prompts"
LOGS_PATH = Path("logs")

for path in (LLM_OUTPUT_PATH, LOGS_PATH):
    path.mkdir(parents=True, exist_ok=True)

from tqdm.notebook import tqdm

from utils.data_utils import (
    HIGH_OBJECTIVES,
    HIGH_RISK_PROFILE,
    LOW_OBJECTIVES,
    LOW_RISK_PROFILE,
    PERSONA,
    evaluate_trading_metrics,
    generate_strategy_for_ticker,
    plot_llm_trade,
)


## Environment and Constants

In [None]:
OPENAI_MODEL = os.getenv("OPENAI_MODEL")  # "gpt-4.1-nano" #
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
STARTDATE = "2018-01-01"
ENDDATE = "2020-01-01"

OPENAI_API_KEY

## LLM Setup

In [None]:
OPENAI_CLIENT = OpenAI(api_key=OPENAI_API_KEY)

In [None]:
def load_stock_data(ticker: str, start: str = STARTDATE, end: str = ENDDATE) -> pd.DataFrame:
    """Load engineered feature data for a ticker within the experimentation window."""

    input_file = HISTORIC_PATH / f"engineered_{ticker}_data.parquet"
    if not input_file.exists():
        raise FileNotFoundError(f"Engineered data not found for {ticker}: {input_file}")

    start_date = pd.to_datetime(start, utc=True)
    end_date = pd.to_datetime(end, utc=True)

    engineered_df = pd.read_parquet(input_file)
    engineered_df.set_index("Date", inplace=True)
    engineered_df.index = pd.to_datetime(engineered_df.index, utc=True)
    return engineered_df.loc[start_date:end_date]

In [None]:
def run_llm_trade_experiment(
    ticker_df: pd.DataFrame,
    ticker: str,
    prompt_version: str,
    risk_version: str,
    *,
    prompt_path: Path = LLM_PROMPTS_PATH,
    output_path: Path = LLM_OUTPUT_PATH,
    client: Any = OPENAI_CLIENT,
    model: str | None = OPENAI_MODEL,
    start_date: str = STARTDATE,
    end_date: str = ENDDATE,
    news_yaml_file: str | None = None,
    plot: bool = True,
) -> tuple[dict[str, float | None], pd.DataFrame]:
    """Run the end-to-end LLM trading experiment for a single ticker."""

    results_dir = (output_path / "results" / risk_version / prompt_version / ticker).resolve()
    figures_dir = (output_path / "figures" / risk_version / prompt_version / ticker).resolve()
    results_dir.mkdir(parents=True, exist_ok=True)
    figures_dir.mkdir(parents=True, exist_ok=True)

    results_file = results_dir / "llm_results.json"
    data_file = results_dir / "llm_data.csv"
    figures = {
        "trades": figures_dir / "fig1_llm_trade_analysis.png",
        "signals": figures_dir / "fig2_llm_trading_signals.png",
        "distributions": figures_dir / "fig3_llm_distributions.png",
    }

    if results_file.exists() and data_file.exists() and all(path.exists() for path in figures.values()):
        with results_file.open("r", encoding="utf-8") as handle:
            llm_trading_metrics = json.load(handle)
        llm_trades_df = pd.read_csv(data_file)
    else:
        output_dir = (output_path / "response" / risk_version / prompt_version).resolve()
        output_dir.mkdir(parents=True, exist_ok=True)

        news_yaml_path = (prompt_path / news_yaml_file) if news_yaml_file else None
        llm_trades_df = generate_strategy_for_ticker(
            ticker_df=ticker_df,
            ticker=ticker,
            LLM_OUTPUT_PATH=output_dir,
            persona=PERSONA,
            HIGH_RISK_PROFILE=HIGH_RISK_PROFILE if risk_version == "r" else LOW_RISK_PROFILE,
            HIGH_OBJECTIVES=HIGH_OBJECTIVES if risk_version == "r" else LOW_OBJECTIVES,
            client=client,
            model=model,
            strategy_yaml_file=prompt_path / f"strat_prompt_{prompt_version}.yml",
            news_yaml_file=news_yaml_path,
            start_date=start_date,
            end_date=end_date,
            max_news=5 if news_yaml_path else 0,
            time_horizon="monthly",
        )

        llm_trading_metrics, llm_trades_df = evaluate_trading_metrics(llm_trades_df)
        with results_file.open("w", encoding="utf-8") as handle:
            json.dump(llm_trading_metrics, handle, indent=4)
        llm_trades_df.to_csv(data_file, index=False, quoting=csv.QUOTE_MINIMAL, escapechar="\\")
        fig_trades, fig_distributions, fig_signals = plot_llm_trade(llm_trades_df, plot=plot)

        fig_trades.savefig(figures["trades"], dpi=300, bbox_inches="tight")
        fig_signals.savefig(figures["signals"], dpi=300, bbox_inches="tight")
        if fig_distributions is not None:
            fig_distributions.savefig(figures["distributions"], dpi=300, bbox_inches="tight")

    return llm_trading_metrics, llm_trades_df

In [6]:
TARGET = "TSLA"
engineered_df = load_stock_data(TARGET)
llm_trading_metrics, llm_trades_df = run_llm_trade_experiment(
    ticker_df=engineered_df,
    ticker=TARGET,
    prompt_version="v4",
    news_yaml_file="analyst_prompt_v1.yml",
    start_date=STARTDATE,
    end_date=ENDDATE,
    risk_version="r",
    model=OPENAI_MODEL,
)
llm_trading_metrics

KeyboardInterrupt: 

In [None]:
TICKERS = ["AAPL", "MSFT", "GOOGL", "TSLA", "AMZN", "META"]

## Prompt V1

In [None]:
for risk in ["r", "nr"]:
    for ticker in tqdm(TICKERS):
        engineered_df = load_stock_data(ticker)
        llm_trading_metrics, llm_trades_df = run_llm_trade_experiment(
            ticker_df=engineered_df, ticker=ticker, prompt_version="v1", risk_version=risk, plot=False
        )


## Prompt V2

In [None]:
for risk in ["r", "nr"]:
    for ticker in tqdm(TICKERS):
        engineered_df = load_stock_data(ticker)
        llm_trading_metrics, llm_trades_df = run_llm_trade_experiment(
            ticker_df=engineered_df, ticker=ticker, prompt_version="v2", risk_version=risk, plot=False
        )


## Prompt V3

In [None]:
for risk in ["r", "nr"]:
    for ticker in tqdm(TICKERS):
        engineered_df = load_stock_data(ticker)
        llm_trading_metrics, llm_trades_df = run_llm_trade_experiment(
            ticker_df=engineered_df, ticker=ticker, prompt_version="v3", risk_version=risk, plot=False
        )


## Prompt V4

In [None]:
for risk in ["r", "nr"]:
    for ticker in tqdm(TICKERS):
        engineered_df = load_stock_data(ticker)
        llm_trading_metrics, llm_trades_df = run_llm_trade_experiment(
            ticker_df=engineered_df,
            ticker=ticker,
            prompt_version="v4",
            news_yaml_file="analyst_prompt_v1.yml",
            risk_version=risk,
            plot=False,
        )

# Aggregate and Analyze All Results

In [None]:
risk_profiles = ["r", "nr"]
prompt_versions = ["v1", "v2", "v3", "v4"]
data = []

results_root = LLM_OUTPUT_PATH / "results"

for risk_profile in risk_profiles:
    for version in prompt_versions:
        for ticker in TICKERS:
            folder_path = results_root / risk_profile / version / ticker
            json_file_path = folder_path / "llm_results.json"

            if json_file_path.exists():
                results = json.loads(json_file_path.read_text(encoding="utf-8"))
                results["Risk Profile"] = "High Risk" if risk_profile == "r" else "Low Risk"
                results["Prompt Version"] = version
                results["Ticker"] = ticker
                data.append(results)

results_df = pd.DataFrame(data)
results_df.tail(1)

In [None]:
from pprint import pprint


pivot_table = results_df[results_df["Risk Profile"] == "High Risk"].pivot_table(
    values=["Sharpe Ratio (Annualized SR)", "Mean Perplexity", "Mean Entropy", "Maximum Drawdown (MDD)"],
    index="Ticker",
    columns="Prompt Version",
    aggfunc="mean",
)

pivot_table = pivot_table.sort_index(axis=1, level=1)
pprint(pivot_table)

In [None]:
summary = (
    results_df[results_df["Risk Profile"] == "High Risk"]
    .groupby("Prompt Version")[["Sharpe Ratio (Annualized SR)", "Mean Perplexity", "Mean Entropy", "Maximum Drawdown (MDD)"]]
    .mean()
    .sort_index()
)

print(summary)


In [None]:
pivot_table = results_df[results_df["Risk Profile"] == "Low Risk"].pivot_table(
    values=["Sharpe Ratio (Annualized SR)", "Mean Perplexity", "Mean Entropy"], index="Ticker", columns="Prompt Version", aggfunc="mean"
)

pivot_table = pivot_table.sort_index(axis=1, level=1)
pprint(pivot_table)

In [None]:
summary = (
    results_df[results_df["Risk Profile"] == "Low Risk"]
    .groupby("Prompt Version")[["Sharpe Ratio (Annualized SR)", "Mean Perplexity", "Mean Entropy"]]
    .mean()
    .sort_index()
)

print(summary)
