# Experiment 1 - LLM Strategies

In [1]:
import os
import json
import sys
import logging
import warnings
import numpy as np
import pandas as pd
warnings.filterwarnings("ignore")


if False:
    %pip install python-dotenv==1.0.1
    %pip install tqdm==4.66.5
    %pip install openai==1.58.1

%load_ext dotenv

FUNDAMENTALS_PATH = os.getenv("FUNDAMENTALS_PATH", '/fundamentals')
LLM_PROMPTS_PATH = os.getenv("LLM_PROMPTS_PATH", '/prompts')
FUNDAMENTALS_PATH = os.getenv("FUNDAMENTALS_PATH", '/fundamentals')
HISTORIC_PATH = os.getenv("HISTORIC_PATH", '/historic')
MACRO_PATH = os.getenv("MACRO_PATH", '/macro')
OPTIONS_PATH = os.getenv("OPTIONS_PATH", '/options')
LLM_OUTPUT_PATH = os.getenv("LLM_OUTPUT_PATH", '/llm_data')
LLM_OUTPUT_PATH = f"{LLM_OUTPUT_PATH}/gated"
LOGS_PATH = os.getenv("LOGS_PATH", '/logs')
paths = [LLM_OUTPUT_PATH, LOGS_PATH]
for path in paths:
    if path and not os.path.exists(path):
        os.makedirs(path)

if "KAGGLE_KERNEL_RUN_TYPE" in os.environ:
    logging.info("Running in Kaggle...")

    DATA_PATH = "/kaggle/input/thesis/data"
    FUNDAMENTALS_PATH = DATA_PATH + FUNDAMENTALS_PATH
    HISTORIC_PATH = DATA_PATH + HISTORIC_PATH
    MACRO_PATH = DATA_PATH + MACRO_PATH
    OPTIONS_PATH = DATA_PATH + OPTIONS_PATH
    LLM_PROMPTS_PATH = DATA_PATH + LLM_PROMPTS_PATH
    sys.path.insert(1, "/kaggle/usr/lib/thesis_utils")
    sys.path.insert(1, "/kaggle/usr/lib/data_utils")
else:
    DATA_PATH = './data'
    module_path = os.path.abspath(os.path.join(os.getcwd(), 'utils'))
    if module_path not in sys.path:
        sys.path.append(module_path)

from tqdm.notebook import tqdm

from openai import OpenAI
from data_utils import generate_strategy_for_ticker, evaluate_trading_metrics, expert_trades, plot_llm_trade, PERSONA, HIGH_RISK_PROFILE, HIGH_OBJECTIVES, LOW_RISK_PROFILE, LOW_OBJECTIVES

## Environment and Constants

In [2]:
OPENAI_MODEL = os.getenv("OPENAI_MODEL")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
STARTDATE = '2018-01-01'
ENDDATE = '2020-01-01'

## LLM Setup

In [3]:
OPENAI_CLIENT = OpenAI(api_key=OPENAI_API_KEY)


In [4]:
def load_stock_data(ticker):
    input_file = f"{HISTORIC_PATH}/engineered_{ticker}_data.parquet"

    start_date = pd.to_datetime(STARTDATE, utc=True)
    end_date = pd.to_datetime(ENDDATE, utc=True)

    engineered_df = pd.read_parquet(input_file)
    engineered_df.set_index('Date', inplace=True)
    engineered_df = engineered_df[(engineered_df.index >= start_date) & (engineered_df.index <= end_date)]

    return engineered_df


In [5]:
def run_llm_trade_experiment(ticker_df,
                              ticker,
                              prompt_version,
                              risk_version,
                              prompt_path=LLM_PROMPTS_PATH,
                              output_path=LLM_OUTPUT_PATH,
                              client=OPENAI_CLIENT,
                              model=OPENAI_MODEL,
                              start_date=STARTDATE,
                              end_date=ENDDATE,
                              eval_yaml_file="evaluator_prompt_v1.yml",
                              news_yaml_file=None,
                              plot=True,
                              max_news=0):
    results_dir = f'{output_path}/results/{risk_version}/{prompt_version}/{ticker}'
    figures_dir = f'{output_path}/figures/{risk_version}/{prompt_version}/{ticker}'
    os.makedirs(results_dir, exist_ok=True)
    os.makedirs(figures_dir, exist_ok=True)

    results_file = os.path.join(results_dir, 'llm_results.json')
    data_file = os.path.join(results_dir, 'llm_data.csv')
    fig1_file = f"{figures_dir}/fig1_llm_trade_analysis.png"
    fig2_file = f"{figures_dir}/fig2_llm_trading_signals.png"
    fig3_file = f"{figures_dir}/fig3_llm_distributions.png"
    fig4_file = f"{figures_dir}/fig4_llm_evaluations.png"

    if os.path.exists(results_file) and os.path.exists(data_file) and all(os.path.exists(f) for f in [fig1_file, fig2_file, fig3_file]):
        with open(results_file, 'r') as f:
            llm_trading_metrics = json.load(f)
        llm_trades_df = pd.read_csv(data_file)
    else:
        output_dir = f'{output_path}/response/{risk_version}/{prompt_version}'
        os.makedirs(output_dir, exist_ok=True)

        llm_trades_df = generate_strategy_for_ticker(
            ticker_df=ticker_df,
            ticker=ticker,
            LLM_OUTPUT_PATH=output_dir,
            persona=PERSONA,
            HIGH_RISK_PROFILE=HIGH_RISK_PROFILE if risk_version == 'r' else LOW_RISK_PROFILE,
            HIGH_OBJECTIVES=HIGH_OBJECTIVES if risk_version == 'r' else LOW_OBJECTIVES,
            client=client,
            model=model,
            strategy_yaml_file=f'{prompt_path}/strat_prompt_{prompt_version}.yml',
            eval_yaml_file=f'{prompt_path}/{eval_yaml_file}' if eval_yaml_file else None,
            news_yaml_file=f'{prompt_path}/{news_yaml_file}' if news_yaml_file else None,
            start_date=start_date,
            end_date=end_date,
            max_news=max_news
        )
        llm_trading_metrics, llm_trades_df = evaluate_trading_metrics(llm_trades_df)

        with open(results_file, 'w') as f:
            json.dump(llm_trading_metrics, f, indent=4)
        llm_trades_df.to_csv(data_file, index=False)

        fig1, fig3, fig2, fig4 = plot_llm_trade(llm_trades_df, plot=plot)
        fig1.savefig(fig1_file, dpi=300, bbox_inches='tight')
        fig2.savefig(fig2_file, dpi=300, bbox_inches='tight')
        fig3.savefig(fig3_file, dpi=300, bbox_inches='tight')
        if fig4 is not None:
            fig4.savefig(fig4_file, dpi=300, bbox_inches='tight')

    return llm_trading_metrics, llm_trades_df

In [6]:
TICKERS = ["AAPL", "MSFT", "GOOGL", "TSLA", "AMZN", "META"]

# Prompt V1

In [7]:
for ticker in tqdm(TICKERS):
    engineered_df = load_stock_data(ticker)
    llm_trading_metrics, llm_trades_df = run_llm_trade_experiment(ticker_df = engineered_df,
                                                                    ticker = ticker,
                                                                    prompt_version = 'v1',
                                                                    risk_version = 'r',
                                                                    plot=False)

  0%|          | 0/6 [00:00<?, ?it/s]

In [8]:
for ticker in tqdm(TICKERS):
    engineered_df = load_stock_data(ticker)
    llm_trading_metrics, llm_trades_df = run_llm_trade_experiment(ticker_df = engineered_df,
                                                                    ticker = ticker,
                                                                    prompt_version = 'v1',
                                                                    risk_version = 'nr',
                                                                    plot=False)

  0%|          | 0/6 [00:00<?, ?it/s]

# Prompt V2

In [9]:
for ticker in tqdm(TICKERS):
    engineered_df = load_stock_data(ticker)
    llm_trading_metrics, llm_trades_df = run_llm_trade_experiment(ticker_df = engineered_df,
                                                                    ticker = ticker,
                                                                    prompt_version = 'v2',
                                                                    risk_version = 'r',
                                                                    plot=False)

  0%|          | 0/6 [00:00<?, ?it/s]

In [10]:
for ticker in tqdm(TICKERS):
    engineered_df = load_stock_data(ticker)
    llm_trading_metrics, llm_trades_df = run_llm_trade_experiment(ticker_df = engineered_df,
                                                                    ticker = ticker,
                                                                    prompt_version = 'v2',
                                                                    risk_version = 'nr',
                                                                    plot=False)

  0%|          | 0/6 [00:00<?, ?it/s]

# Prompt V3

In [11]:
for ticker in tqdm(TICKERS):
    engineered_df = load_stock_data(ticker)
    llm_trading_metrics, llm_trades_df = run_llm_trade_experiment(ticker_df = engineered_df,
                                                                    ticker = ticker,
                                                                    prompt_version = 'v3',
                                                                    risk_version = 'r',
                                                                    plot=False)

  0%|          | 0/6 [00:00<?, ?it/s]

In [12]:
for ticker in tqdm(TICKERS):
    engineered_df = load_stock_data(ticker)
    llm_trading_metrics, llm_trades_df = run_llm_trade_experiment(ticker_df = engineered_df,
                                                                    ticker = ticker,
                                                                    prompt_version = 'v3',
                                                                    risk_version = 'nr',
                                                                    plot=False)

  0%|          | 0/6 [00:00<?, ?it/s]

# Prompt V4

In [13]:
for ticker in tqdm(TICKERS):
    engineered_df = load_stock_data(ticker)
    llm_trading_metrics, llm_trades_df = run_llm_trade_experiment(ticker_df = engineered_df,
                                                                    ticker = ticker,
                                                                    prompt_version = 'v4',
                                                                    risk_version = 'r',
                                                                    news_yaml_file="analyst_prompt_v1.yml",
                                                                    plot=False)

  0%|          | 0/6 [00:00<?, ?it/s]

In [14]:
for ticker in tqdm(TICKERS):
    engineered_df = load_stock_data(ticker)
    llm_trading_metrics, llm_trades_df = run_llm_trade_experiment(ticker_df = engineered_df,
                                                                    ticker = ticker,
                                                                    prompt_version = 'v4',
                                                                    risk_version = 'nr',
                                                                    news_yaml_file="analyst_prompt_v1.yml",
                                                                    plot=False)

  0%|          | 0/6 [00:00<?, ?it/s]

In [15]:
llm_trades_df

Unnamed: 0,Open,High,Low,Close,Volume,IV_Open,IV_High,IV_Low,IV_Close,IV_Volume,...,Total_Tokens,Total_Costs,log_scaled_perplexity,mean_normalized_entropy,max_normalized_entropy,mean_evaluation_score,max_evaluation_score,mean_acceptance_rate,max_acceptance_rate,month
0,42.00,43.00,38.00,38.20,247725500,0.644092,0.681381,0.555497,0.618804,1.0,...,9730884.0,1.888283,0.397405,0.267421,0.496011,0.731221,1.0,0.607882,1.0,2012-05
1,36.53,36.66,33.00,34.03,47316900,0.644092,0.681381,0.555497,0.618804,1.0,...,9730884.0,1.888283,0.397405,0.267421,0.496011,0.731221,1.0,0.607882,1.0,2012-05
2,32.66,33.57,30.95,31.00,21220800,0.644092,0.681381,0.555497,0.618804,1.0,...,9730884.0,1.888283,0.397405,0.267421,0.496011,0.731221,1.0,0.607882,1.0,2012-05
3,31.37,32.50,31.36,32.00,13180800,0.644092,0.681381,0.555497,0.618804,1.0,...,9730884.0,1.888283,0.397405,0.267421,0.496011,0.731221,1.0,0.607882,1.0,2012-05
4,32.98,33.20,31.78,33.03,8407700,0.644092,0.681381,0.555497,0.618804,1.0,...,9730884.0,1.888283,0.397405,0.267421,0.496011,0.731221,1.0,0.607882,1.0,2012-05
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1912,206.30,206.79,205.00,205.12,665022,0.204432,0.207527,0.203654,0.205003,1.0,...,9730884.0,1.888283,0.024454,0.267421,0.496011,0.731221,1.0,0.607882,1.0,2019-12
1913,205.50,207.80,205.34,207.79,1169572,0.206734,0.232562,0.206734,0.231117,1.0,...,9730884.0,1.888283,0.024454,0.267421,0.496011,0.731221,1.0,0.607882,1.0,2019-12
1914,208.42,208.90,206.59,208.10,1532322,0.237737,0.250262,0.237737,0.247230,1.0,...,9730884.0,1.888283,0.024454,0.267421,0.496011,0.731221,1.0,0.607882,1.0,2019-12
1915,207.86,207.88,203.92,204.41,1684756,0.274343,0.297425,0.274343,0.293964,1.0,...,9730884.0,1.888283,0.024454,0.267421,0.496011,0.731221,1.0,0.607882,1.0,2019-12


# All Results

In [16]:
risk_profiles = ["r", "nr"]
prompt_versions = ["v1", "v2", "v3", "v4"]
data = []

for risk_profile in risk_profiles:
    for version in prompt_versions:
        for ticker in TICKERS:
            folder_path = os.path.join(LLM_OUTPUT_PATH, "results", risk_profile, version, ticker)
            json_file_path = os.path.join(folder_path, "llm_results.json")

            # Check if the JSON file exists
            if os.path.exists(json_file_path):
                # Load JSON data
                with open(json_file_path, 'r') as file:
                    results = json.load(file)
                    results['Risk Profile'] = "High Risk" if risk_profile == "r" else "Low Risk"
                    results['Prompt Version'] = version
                    results['Ticker'] = ticker
                    data.append(results)

results_df = pd.DataFrame(data)
results_df.tail(1)


Unnamed: 0,Sharpe Ratio (Annualized SR),Portfolio Turnover (PTR),Maximum Drawdown (MDD),Mean Drawdown Duration (MDDur),Cumulative Returns,Mean Perplexity,Mean Entropy,Max Perplexity,Max Entropy,Mean Normalized Entropy,...,Total Costs,Mean Evaluation Iterations,Max Evaluation Iterations,Mean Evaluation Score,Max Evaluation Score,Mean Acceptance Rate,Max Acceptance Rate,Risk Profile,Prompt Version,Ticker
47,0.565357,4.0,0.687735,100.322309,2.376839,1.220883,0.37556,1.526496,0.687617,0.267421,...,1.888283,1.166406,3.0,0.731221,1.0,0.607882,1.0,Low Risk,v4,META


In [17]:
from pprint import pprint


pivot_table = (
    results_df[results_df["Risk Profile"] == "High Risk"]
    .pivot_table(
        values=["Sharpe Ratio (Annualized SR)", "Mean Perplexity", "Mean Evaluation Score", "Mean Acceptance Rate", "Mean Normalized Entropy"],
        index="Ticker",
        columns="Prompt Version",
        aggfunc="mean"
    )
)

pivot_table = pivot_table.sort_index(axis=1, level=1)
pprint(pivot_table)

               Mean Acceptance Rate Mean Evaluation Score  \
Prompt Version                   v1                    v1   
Ticker                                                      
AAPL                       0.412894              0.591046   
AMZN                       0.368416              0.534205   
GOOGL                      0.448455              0.685614   
META                       0.336843              0.519384   
MSFT                       0.516297              0.663481   
TSLA                       0.184015              0.425050   

               Mean Normalized Entropy Mean Perplexity  \
Prompt Version                      v1              v1   
Ticker                                                   
AAPL                          0.466802        1.476994   
AMZN                          0.482704        1.508255   
GOOGL                         0.487170        1.508483   
META                          0.483964        1.506999   
MSFT                          0.478222      

In [18]:
summary = (
    results_df[results_df["Risk Profile"] == "High Risk"]
    .groupby("Prompt Version")[["Sharpe Ratio (Annualized SR)", "Mean Perplexity", "Mean Evaluation Score", "Mean Acceptance Rate", "Mean Normalized Entropy"]]
    .mean()
    .sort_index()
)

print(summary)


                Sharpe Ratio (Annualized SR)  Mean Perplexity  \
Prompt Version                                                  
v1                                  0.580814         1.505366   
v2                                  0.757520         1.277695   
v3                                  0.757520         1.327281   
v4                                  0.725905         1.298759   

                Mean Evaluation Score  Mean Acceptance Rate  \
Prompt Version                                                
v1                           0.569797              0.377820   
v2                           0.725691              0.705482   
v3                           0.743169              0.783386   
v4                           0.731280              0.652071   

                Mean Normalized Entropy  
Prompt Version                           
v1                             0.481416  
v2                             0.301060  
v3                             0.360060  
v4                  

In [19]:
pivot_table = (
    results_df[results_df["Risk Profile"] == "Low Risk"]
    .pivot_table(
        values=["Sharpe Ratio (Annualized SR)", "Mean Perplexity", "Mean Evaluation Score", "Mean Acceptance Rate", "Mean Normalized Entropy"],
        index="Ticker",
        columns="Prompt Version",
        aggfunc="mean"
    )
)

pivot_table = pivot_table.sort_index(axis=1, level=1)
pprint(pivot_table)

               Mean Acceptance Rate Mean Evaluation Score  \
Prompt Version                   v1                    v1   
Ticker                                                      
AAPL                       0.277254              0.504527   
AMZN                       0.178862              0.443662   
GOOGL                      0.392398              0.524648   
META                       0.153262              0.447813   
MSFT                       0.287050              0.505030   
TSLA                       0.227134              0.386318   

               Mean Normalized Entropy Mean Perplexity  \
Prompt Version                      v1              v1   
Ticker                                                   
AAPL                          0.444660        1.444730   
AMZN                          0.462874        1.472045   
GOOGL                         0.459240        1.465960   
META                          0.467292        1.484553   
MSFT                          0.473051      

In [20]:
summary = (
    results_df[results_df["Risk Profile"] == "Low Risk"]
    .groupby("Prompt Version")[["Sharpe Ratio (Annualized SR)", "Mean Perplexity", "Mean Evaluation Score", "Mean Acceptance Rate", "Mean Normalized Entropy"]]
    .mean()
    .sort_index()
)

print(summary)


                Sharpe Ratio (Annualized SR)  Mean Perplexity  \
Prompt Version                                                  
v1                                  0.742208         1.485100   
v2                                  0.757520         1.256229   
v3                                  0.757520         1.313148   
v4                                  0.818880         1.252679   

                Mean Evaluation Score  Mean Acceptance Rate  \
Prompt Version                                                
v1                           0.468666              0.252660   
v2                           0.731490              0.744295   
v3                           0.743124              0.670551   
v4                           0.731043              0.709531   

                Mean Normalized Entropy  
Prompt Version                           
v1                             0.464286  
v2                             0.280853  
v3                             0.349610  
v4                  