# Benchmark Paper Experiment Test Bench Replication
```BibTeX
@article{theate2021application,
  title={An application of deep reinforcement learning to algorithmic trading},
  author={Th{\'e}ate, Thibaut and Ernst, Damien},
  journal={Expert Systems with Applications},
  volume={173},
  pages={114632},
  year={2021},
  publisher={Elsevier}
}
```

In this notebook we replicate the architecture of our benchmark paper.

## Setup Notebook

In [None]:
import os
import sys
import warnings
import time
import yaml
import pickle
from textwrap import dedent
from datetime import datetime, timedelta

import pandas as pd
import numpy as np
from scipy import stats
from pprint import pprint


module_path = os.path.abspath(os.path.join(os.getcwd(), 'utils'))
if module_path not in sys.path:
    sys.path.append(module_path)

warnings.filterwarnings("ignore")


IN_KAGGLE = False
DATA_PATH = "./data/"
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from tqdm.notebook import tqdm
import logging

from dotenv import load_dotenv
load_dotenv()

from paper_utils import TradingSimulator, PerformanceEstimator
from data_utils import safe_pickle_load

## Experiment Parameters

In [None]:
simulator = TradingSimulator()
strategy = "TDQN"
money=100000.
stateLength = 30
actionSpace = 2
bounds = [1, 30]
step = 2
N_EPISODES = 50
startingDate = '2012-01-01'
endingDate = '2020-01-01'
splitingDate = '2018-01-01'
percentageCosts = [0, 0.1, 0.2]
transactionCosts = percentageCosts[1]/100

STOCKS = {
    'Meta' : 'META',
    'Amazon' : 'AMZN',
    'Google' : 'GOOGL',
    'Microsoft' : 'MSFT',
    'Tesla' : 'TSLA',
    'Apple' : 'AAPL',
}

# Run Experiments

In [None]:
N_EXPERIMENTS = 25
STOCK_RESULTS = {}
RL_OUTPUT_DIR = "./papers/bm_experiment"

In [None]:
for stock_name, stock in tqdm(STOCKS.items(), disable=True):
    train_file = f'{RL_OUTPUT_DIR}/{stock}_bm_train_results.pkl'
    test_file = f'{RL_OUTPUT_DIR}/{stock}_bm_test_results.pkl'
    time_file = f'{RL_OUTPUT_DIR}/{stock}_bm_time_results.pkl'

    # Check if results are already cached
    if os.path.exists(train_file) and os.path.exists(test_file):
        train_results = safe_pickle_load(train_file)
        test_results = safe_pickle_load(test_file)
        STOCK_RESULTS[stock] = (train_results, test_results)
        continue  # Skip the experiment loop if cache exists

    train_results = []
    test_results = []
    train_times = []
    test_times = []
    print(stock_name)
    for i in tqdm(range(N_EXPERIMENTS), desc="Running test episodes...", disable=True):
        start_train_time = time.time()
        strat, train_env, test_env = simulator.simulateNewStrategy(strategy,
                                                                    stock_name,
                                                                    startingDate=startingDate,
                                                                    endingDate=endingDate,
                                                                    splitingDate=splitingDate,
                                                                    verbose=True,
                                                                    plotTraining=False,
                                                                    rendering=False,
                                                                    showPerformance=False,
                                                                    saveStrategy=False,
                                                                    money=money,
                                                                    observationSpace=observationSpace,
                                                                    actionSpace=actionSpace,
                                                                    stateLength=stateLength,
                                                                    bounds=bounds,
                                                                    step=step,
                                                                    numberOfEpisodes=N_EPISODES,
                                                                    transactionCosts=transactionCosts)
        end_train_time = time.time()
        train_times.append(end_train_time - start_train_time)

        analyser = PerformanceEstimator(train_env.data)
        train_perf = analyser.getComputedPerformance()
        train_results.append(train_perf)

        start_test_time = time.time()
        analyser = PerformanceEstimator(test_env.data)
        test_perf = analyser.getComputedPerformance()
        test_results.append(test_perf)
        end_test_time = time.time()
        test_times.append(end_test_time - start_test_time)

    avg_train_time = sum(train_times) / N_EXPERIMENTS
    avg_test_time = sum(test_times) / N_EXPERIMENTS

    time_results = {
        'avg_train_time': avg_train_time,
        'avg_test_time': avg_test_time
    }

    STOCK_RESULTS[stock] = (train_results, test_results, time_results)

    with open(train_file, 'wb') as f:
        pickle.dump(train_results, f)
    with open(test_file, 'wb') as f:
        pickle.dump(test_results, f)
    with open(time_file, 'wb') as f:
        pickle.dump(time_results, f)

## Aggregate Results and T-Tests

In [None]:
final_summary_df = pd.DataFrame()
sharpe_df = pd.read_csv("./papers/results.csv")
sharpe_df.set_index('Stock', inplace=True)

for stock_name, stock in tqdm(STOCKS.items(), disable=False, desc="Testing stock workbench..."):
    print(stock_name)
    test_results = safe_pickle_load(f'{RL_OUTPUT_DIR}/{stock}_bm_test_results.pkl')
    time_results = safe_pickle_load(f'{RL_OUTPUT_DIR}/{stock}_bm_time_results.pkl')
    metrics_data = {}
    for df in test_results:
        for _, row in df.iterrows():
            metric = row['Metric']
            value = row['Value']
            if metric not in metrics_data:
                metrics_data[metric] = []
            metrics_data[metric].append(value)

    stock_summary = {}

    for metric, values in metrics_data.items():
        data = pd.Series(values).fillna(0)
        mean_val = round(data.mean(), 2)
        std_val = round(data.std(), 2)

        # TDQN only has Sharpe ratio! Paper reports only this.
        if metric.lower() == "sharpe ratio":
            tdqn_value = sharpe_df.loc[stock_name, 'TDQN'] if stock_name in sharpe_df.index else 0
        else:
            tdqn_value = 0

        t_stat, p_value = stats.ttest_1samp(data, popmean=tdqn_value)
        p_value = round(p_value, 2)

        stock_summary[metric] = [mean_val, std_val, p_value]

    stock_df = pd.DataFrame(stock_summary, index=["Mean", "+/-", "P-Value"]).T
    stock_df = stock_df.T.unstack().to_frame().T
    stock_df.index = [stock]

    stock_df['Avg Train Time (s)'] = time_results['avg_train_time']
    stock_df['Avg Test Time (s)'] = time_results['avg_test_time']

    if final_summary_df.empty:
        final_summary_df = stock_df
    else:
        final_summary_df = pd.concat([final_summary_df, stock_df])

final_summary_df.to_csv('final_summary.csv', index=True)
pprint(final_summary_df.T)
