# Experiment - KLD for Reward Regularization

## Prepare Notebook

In [None]:
if False:
    %pip install gym==0.23.0
    %pip install matplotlib==3.8.2
    %pip install numpy==2.2.1
    %pip install openai==1.59.8
    %pip install pandas==2.2.3
    %pip install polars==1.20.0
    %pip install protobuf==3.20.3
    %pip install pydantic==2.10.5
    %pip install python-dotenv==1.0.1
    %pip install PyYAML==6.0.2
    %pip install scikit_learn==1.4.0
    %pip install scipy==1.15.1
    %pip install statsmodels==0.14.1
    %pip install tabulate==0.9.0
    %pip install torch==2.4.1
    %pip install tqdm==4.66.5


In [None]:
import os
import sys
import logging
import warnings
warnings.filterwarnings("ignore")

%load_ext dotenv


MODELS_PATH = os.getenv("MODELS_PATH")
HISTORIC_PATH = os.getenv("HISTORIC_PATH")
LLM_PROMPTS_PATH = os.getenv("LLM_PROMPTS_PATH", '/prompts')
LLM_OUTPUT_PATH = os.getenv("LLM_OUTPUT_PATH", '/llm_data')
LLM_OUTPUT_PATH = f"{LLM_OUTPUT_PATH}/gated"
LOGS_PATH = os.getenv("LOGS_PATH")
MACRO_PATH = os.getenv("MACRO_PATH", '/macro')
OPTIONS_PATH = os.getenv("OPTIONS_PATH", '/options')
RL_OUTPUT_PATH = os.getenv("RL_OUTPUT_PATH", '/rl_data')

paths = [LLM_OUTPUT_PATH, LOGS_PATH, MODELS_PATH]
for path in paths:
    if path and not os.path.exists(path):
        os.makedirs(path)

if "KAGGLE_KERNEL_RUN_TYPE" in os.environ:
    logging.info("Running in Kaggle...")
    for dirname, _, filenames in os.walk("/kaggle/input"):
        for filename in filenames:
            print(os.path.join(dirname, filename))
    DATA_PATH = "/kaggle/input/drl-dataset-quant"
    HISTORIC_PATH = DATA_PATH + HISTORIC_PATH
    LLM_PROMPTS_PATH = DATA_PATH + LLM_PROMPTS_PATH

    sys.path.insert(1, "/kaggle/usr/lib/drlutil")

# Initialize LLM

In [None]:
import pandas as pd
import numpy as np
from datetime import datetime
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
from openai import OpenAI

module_path = os.path.abspath(os.path.join(os.getcwd(), 'utils'))
if module_path not in sys.path:
    sys.path.append(module_path)

from thesis_utils import *
from data_utils import *

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
OPENAI_MODEL = os.getenv("OPENAI_MODEL")
OPENAI_CLIENT = OpenAI(api_key=OPENAI_API_KEY)

# Run LLM+RL

In [None]:
money=100000.
stateLength = 30
observationSpace =  (stateLength - 1) * 5
actionSpace = 2
bounds = [1, 30]
step = 1
numberOfEpisodes = 50
percentageCosts = [0, 0.1, 0.2]
transactionCosts = percentageCosts[1]/100
simulator = TradingSimulator()

TARGET = 'AAPL'
STARTDATE = '2012-01-01'
SPLITDATE = '2018-01-01'
ENDDATE = '2020-01-01'
RISK_EXPERIMENT = 'r'
PROMPT_VERSION = 'v3'

In [None]:
rl_output_dir = f'{RL_OUTPUT_PATH}/{RISK_EXPERIMENT}/{PROMPT_VERSION}'
os.makedirs(rl_output_dir, exist_ok=True)

train_file = f'{rl_output_dir}/{TARGET}_train_results.pkl'
test_file = f'{rl_output_dir}/{TARGET}_test_results.pkl'
sharpe_train_file = f'{rl_output_dir}/{TARGET}_sharpe_train_results.pkl'
sharpe_test_file = f'{rl_output_dir}/{TARGET}_sharpe_test_results.pkl'
time_file = f'{rl_output_dir}/{TARGET}_time_results.pkl'
input_file = f"{HISTORIC_PATH}/engineered_{TARGET}_data.parquet"
engineered_df = pd.read_parquet(input_file)
engineered_df.set_index('Date', inplace=True)
output_dir = f'{LLM_OUTPUT_PATH}/response/{RISK_EXPERIMENT}/{PROMPT_VERSION}'

engineered_df = generate_strategy_for_ticker(ticker_df=engineered_df,
                                            ticker=TARGET,
                                            LLM_OUTPUT_PATH=output_dir,
                                            persona=PERSONA,
                                            HIGH_RISK_PROFILE=HIGH_RISK_PROFILE if RISK_EXPERIMENT == 'r' else LOW_RISK_PROFILE,
                                            HIGH_OBJECTIVES=HIGH_OBJECTIVES if RISK_EXPERIMENT == 'r' else LOW_OBJECTIVES,
                                            client=OPENAI_CLIENT,
                                            model=OPENAI_MODEL,
                                            strategy_yaml_file=f'{LLM_PROMPTS_PATH}/strat_prompt_{PROMPT_VERSION}.yml',
                                            eval_yaml_file=f'{LLM_PROMPTS_PATH}/evaluator_prompt_v1.yml',
                                            news_yaml_file=f'{LLM_PROMPTS_PATH}/analyst_prompt_v1.yml',
                                            start_date=STARTDATE,
                                            end_date=ENDDATE,
                                            max_news=5
                                        )

engineered_df.tail(3)

In [None]:
if os.path.exists(train_file) and os.path.exists(test_file) and os.path.exists(time_file):
    with open(train_file, 'rb') as f:
        train_results = pickle.load(f)
    with open(test_file, 'rb') as f:
        test_results = pickle.load(f)
    with open(time_file, 'rb') as f:
        time_results = pickle.load(f)
else:
    strat, train_env, test_env = simulator.simulateNewStrategy(engineered_df.copy(),
                                                                startingDate=STARTDATE,
                                                                endingDate=ENDDATE,
                                                                splitingDate=SPLITDATE,
                                                                verbose=True,
                                                                plotTraining=True,
                                                                rendering=True,
                                                                showPerformance=True,
                                                                models_path=MODELS_PATH,
                                                                saveStrategy=True,
                                                                money=money,
                                                                observationSpace=observationSpace,
                                                                actionSpace=actionSpace,
                                                                stateLength=stateLength,
                                                                bounds=bounds,
                                                                step=step,
                                                                numberOfEpisodes=numberOfEpisodes,
                                                                transactionCosts=transactionCosts,
                                                                ticker_symbol=TARGET)
    analyser = PerformanceEstimator(train_env.data)
    train_results = analyser.getComputedPerformance()
    analyser = PerformanceEstimator(test_env.data)
    test_results = analyser.getComputedPerformance()
    with open(train_file, 'wb') as f:
        pickle.dump(train_results, f)
    with open(test_file, 'wb') as f:
        pickle.dump(test_results, f)

In [None]:
test_results

In [None]:
test_env.data

In [None]:
test_engineered_df = engineered_df[SPLITDATE:ENDDATE].copy()
test_engineered_df['LLM_Trade_Action'] = test_engineered_df['trade_action']
test_engineered_df['trade_action'] = test_env.data['action'] == 0
test_engineered_df['reward'] = test_env.data['returns']
llm_trading_metrics, llm_trades_df = evaluate_trading_metrics(test_engineered_df)
llm_trades_df['cumulative_returns'] = (1 + test_env.data['returns']).cumprod() - 1
llm_trading_metrics

In [None]:
fig1, fig3, fig2, fig4 = plot_llm_trade(llm_trades_df, plot=False)

change_points = llm_trades_df['LLM_Trade_Action'].shift(1) != llm_trades_df['LLM_Trade_Action']
llm_changes = llm_trades_df[change_points]

longs = llm_changes[llm_changes['LLM_Trade_Action'] == 1]
flats = llm_changes[llm_changes['LLM_Trade_Action'] == 0]

fig1.axes[0].scatter(
    longs.index,
    longs['Close'],
    marker='^',
    facecolors='none',
    edgecolors='purple',
    label='LLM Long',
    s=200
)

fig1.axes[0].scatter(
    flats.index,
    flats['Close'],
    marker='v',
    facecolors='none',
    edgecolors='gray',
    label='LLM Short',
    s=200
)

fig1.axes[0].legend(loc='upper right')
for fig in [fig1, fig2, fig3, fig4]:
    if fig:
        fig.show()
        display(fig)