# Experiment 2: LLM+RL Individual Test Episode For Security and Risk Profiles

In this notebook we test our hybrid architecture on a single equity, allowing us to observe the learning and troubleshoot the RL.

## Notebook Setup



In [None]:
import os
import sys
import logging
import warnings
import pickle
import numpy as np
import pandas as pd
from pprint import pprint
from matplotlib import pyplot as plt
import time
warnings.filterwarnings("ignore")

%load_ext dotenv

FUNDAMENTALS_PATH = os.getenv("FUNDAMENTALS_PATH", '/fundamentals')
LLM_PROMPTS_PATH = os.getenv("LLM_PROMPTS_PATH", '/prompts')
FUNDAMENTALS_PATH = os.getenv("FUNDAMENTALS_PATH", '/fundamentals')
HISTORIC_PATH = os.getenv("HISTORIC_PATH", '/historic')
MACRO_PATH = os.getenv("MACRO_PATH", '/macro')
OPTIONS_PATH = os.getenv("OPTIONS_PATH", '/options')
LLM_OUTPUT_PATH = os.getenv("LLM_OUTPUT_PATH", '/llm_data')
RL_OUTPUT_PATH = os.getenv("RL_OUTPUT_PATH", '/rl_data')
LOGS_PATH = os.getenv("LOGS_PATH", '/logs')
paths = [LLM_OUTPUT_PATH, LOGS_PATH, RL_OUTPUT_PATH]

for path in paths:
    if path and not os.path.exists(path):
        os.makedirs(path)

DATA_PATH = './data'
module_path = os.path.abspath(os.path.join(os.getcwd(), 'utils'))
if module_path not in sys.path:
    sys.path.append(module_path)

from tqdm.notebook import tqdm
from openai import OpenAI
from rl_agent_utils import *
from data_utils import *

## Environment and Constants

In [None]:
OPENAI_MODEL = os.getenv("OPENAI_MODEL")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
STARTDATE = '2012-01-01'
SPLITDATE = '2018-01-01'
ENDDATE = '2020-01-01'

In [None]:
RL_OUTPUT_PATH = "./spottest/"
os.makedirs(path, exist_ok=True)

In [None]:
OPENAI_CLIENT = OpenAI(api_key=OPENAI_API_KEY)
RISK = 'r'
PROMPT_VERSION = 'v4'
TARGET = 'TSLA'

# Generate LLM Strategy

In [None]:
input_file = f"{HISTORIC_PATH}/engineered_{TARGET}_data.parquet"
llm_output_dir = f'{LLM_OUTPUT_PATH}/response/{RISK}/{PROMPT_VERSION}'

engineered_df = pd.read_parquet(input_file)
engineered_df.set_index('Date', inplace=True)
engineered_df = generate_strategy_for_ticker(ticker_df=engineered_df,
                                            ticker=TARGET,
                                            LLM_OUTPUT_PATH=llm_output_dir,
                                            persona=PERSONA,
                                            HIGH_RISK_PROFILE=HIGH_RISK_PROFILE if RISK is 'r' else LOW_RISK_PROFILE,
                                            HIGH_OBJECTIVES=HIGH_OBJECTIVES if RISK is 'r' else LOW_OBJECTIVES,
                                            client=OPENAI_CLIENT,
                                            model=OPENAI_MODEL,
                                            strategy_yaml_file=f'{LLM_PROMPTS_PATH}/strat_prompt_{PROMPT_VERSION}.yml',
                                            news_yaml_file=f'{LLM_PROMPTS_PATH}/analyst_prompt_v1.yml' if PROMPT_VERSION in ['v4'] else None,
                                            start_date=STARTDATE,
                                            end_date=ENDDATE)

engineered_df.tail(50)

In [None]:
pprint(engineered_df[["strat_signal_long", "strat_signal_short", "trade_signal", "trade_action", "action_confidence"]].describe())

In [None]:
test_engineered_df = engineered_df[SPLITDATE:ENDDATE].copy()
llm_trading_metrics, llm_trades_df = evaluate_trading_metrics(test_engineered_df)

pprint(llm_trading_metrics)

# Guide RL

In [None]:
money=100_000.
stateLength = 30
actionSpace = 2
bounds = [1, 30]
step = 1
numberOfEpisodes = 50
percentageCosts = [0, 0.1, 0.2]
transactionCosts = percentageCosts[1]/100
simulator = TradingSimulator()


In [None]:
rl_output_dir = f'{RL_OUTPUT_PATH}/{RISK}/{PROMPT_VERSION}'
os.makedirs(rl_output_dir, exist_ok=True)

train_file = f'{rl_output_dir}/{TARGET}_train_results.pkl'
test_file = f'{rl_output_dir}/{TARGET}_test_results.pkl'
train_env_file = f'{rl_output_dir}/{TARGET}_train_env.pkl'
test_env_file = f'{rl_output_dir}/{TARGET}_test_env.pkl'
q_train_file = f'{rl_output_dir}/{TARGET}_train_q_values.pkl'
q_test_file = f'{rl_output_dir}/{TARGET}_test_q_values.pkl'

if all(os.path.exists(p) for p in [train_file, test_file, train_env_file, test_env_file, q_train_file, q_test_file]):
    with open(train_file, 'rb') as f:
        train_results = pickle.load(f)
    with open(test_file, 'rb') as f:
        test_results = pickle.load(f)
    with open(train_env_file, 'rb') as f:
        train_env = pickle.load(f)
    with open(test_env_file, 'rb') as f:
        test_env = pickle.load(f)
    with open(q_train_file, 'rb') as f:
        qt0, qt1 = pickle.load(f)
    with open(q_test_file, 'rb') as f:
        q0, q1 = pickle.load(f)
else:
    strat, train_env, qt0, qt1, test_env, q0, q1 = simulator.simulateNewStrategy(
        stock_df=engineered_df.copy(),
        startingDate=STARTDATE,
        endingDate=ENDDATE,
        splitingDate=SPLITDATE,
        verbose=True,
        plotTraining=True,
        rendering=True,
        showPerformance=True,
        saveStrategy=True,
        money=money,
        actionSpace=actionSpace,
        stateLength=stateLength,
        bounds=bounds,
        step=step,
        numberOfEpisodes=numberOfEpisodes,
        transactionCosts=transactionCosts,
        ticker_symbol=TARGET
    )
    analyser = PerformanceEstimator(train_env.data)
    train_results = analyser.getComputedPerformance()
    analyser = PerformanceEstimator(test_env.data)
    test_results = analyser.getComputedPerformance()
    with open(train_file, 'wb') as f:
        pickle.dump(train_results, f)
    with open(test_file, 'wb') as f:
        pickle.dump(test_results, f)
    with open(train_env_file, 'wb') as f:
        pickle.dump(train_env, f)
    with open(test_env_file, 'wb') as f:
        pickle.dump(test_env, f)
    with open(q_train_file, 'wb') as f:
        pickle.dump((qt0, qt1), f)
    with open(q_test_file, 'wb') as f:
        pickle.dump((q0, q1), f)

pprint(test_results)

# Analyze Results

In [None]:
train_engineered_df = engineered_df[STARTDATE:SPLITDATE].copy()
train_engineered_df['LLM_Trade_Action'] = train_engineered_df['trade_action']
train_engineered_df['trade_action'] = train_env.data['action'].apply(lambda x: 1 if x == 1 else 0)
train_engineered_df['reward'] = train_env.data['reward']
train_engineered_df['other_reward'] = train_env.data['other_reward']
# train_engineered_df['unshaped_reward'] = train_env.data['unshaped_reward']
llm_trading_metrics, llm_trades_df = evaluate_trading_metrics(train_engineered_df, rl_env=train_env)
llm_trades_df['cumulative_returns'] = (1 + train_env.data['returns']).cumprod() - 1

pprint(llm_trading_metrics)

In [None]:
llm_trades_df[['cumulative_returns', 'returns','reward']]

In [None]:
plot_llm_trade(llm_trades_df, plot=True)

In [None]:
test_engineered_df = engineered_df[SPLITDATE:ENDDATE].copy()
test_engineered_df['LLM_Trade_Action'] = test_engineered_df['trade_action']
test_engineered_df['trade_action'] = test_env.data['action'].apply(lambda x: 1 if x == 1 else 0)
test_engineered_df['reward'] = test_env.data['reward']
test_engineered_df['other_reward'] = test_env.data['other_reward']

# test_engineered_df['unshaped_reward'] = test_env.data['unshaped_reward']
llm_trading_metrics, llm_trades_df = evaluate_trading_metrics(test_engineered_df, rl_env=test_env)
llm_trades_df['cumulative_returns'] = (1 + test_env.data['returns']).cumprod() - 1

pprint(llm_trading_metrics)

In [None]:
pprint(llm_trades_df[['entropy', 'action_confidence', "strat_signal_long", "strat_signal_short"]].describe())


In [None]:
pprint(llm_trades_df[['cumulative_returns', 'returns','reward','other_reward']].describe())

In [None]:
plot_llm_trade(llm_trades_df, plot=True)

In [None]:
test_env.data['2018-10-01':'2019-01-01']

In [None]:
t = test_env.data[['action', 'trade_action', 'trade_signal', 'returns']]
d = t[t['action'] != t['trade_action']]
d