The following can only be run once per kernel restart

In [3]:
import multiprocessing as mp
mp.set_start_method('fork')

In [4]:
import pandas as pd
import numpy as np
import logging
from backtesting import Backtest
from backtesting.lib import resample_apply, plot_heatmaps, TRADES_AGG
logging.basicConfig(level=logging.WARNING, format='%(asctime)s %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p')

from strategies import LONG_SHORT_Underwater_w_decay_and_deleverage


In [13]:
OUTPUT_DIR                = "html_files"
OUTPUT_FILE_NAME_PREFIX   = "strat"
OUTPUT_FILE_NAME_PATTERN  = f'{OUTPUT_DIR}/{OUTPUT_FILE_NAME_PREFIX}'
OUTPUT_FILE_EXTENSION     = ".html"

In [14]:
import os

# This will be handy when we want to plot so it doesn't store all the html files in the root
def create_filename(stats_df):
    # Check if the folder html_files exists, if not create it
    if not os.path.exists(OUTPUT_DIR):
      os.makedirs(OUTPUT_DIR)
    
    filename  = str(OUTPUT_FILE_NAME_PATTERN)
    params    = stats_df._strategy.__dict__['_params']

    # unpack the best params and add them to a filename
    for key, value in params.items():
        # keep the first 5 letters from each key
        key       = key[:5]
        # just keep the first 4 digits from each value
        value     = str(value)[:4]
        filename += f'_{key}_{value}_'

    filename += OUTPUT_FILE_EXTENSION
    
    return filename


# Add in our signals
Run the strategies long and short based on our own AI buy and sell signals. Use it as an opportunity to improve the results of our existing AI models.
#### Read in a dataframe of our signals
prep the dataframe so it has the `Close` price, the `signal` column. 
For this we can create an `entries` column and an `exits` column.

| trigger (from CSV)  | entries          | exits            | signal |
| --------            | --------         | --------         | --     |
| 1 day passed        | None             | 1 day passed     | 0      |
| lstm_open-long      | lstm_open-long   | None             | 1      |
| lstm_open-short     | lstm_open-short  | None             | -1     |
| NA                  | None             | None             | 0      |
| stop loss           | None             | stop loss        | 0      |
| take profit         | None             | take profit      | 0      |

                      

We can then tweak our code above to read from the `entries` column instead of the `signal` column. We will be ignoring the LSTM exits for now because the backtest strategy takes over once we are in a trade. Perhaps this is something we will change in the future but for now we can keep it as is.


# Import the trades dataframe and manipulate it.

## Step 1 - Read in the CSV files

In [6]:
from typing import List
lstm_csv_files = ["test_data/2dc0f4b2_minutely.csv",
"test_data/3dca0a12_minutely.csv",
"test_data/9295144c_minutely.csv",
"test_data/b2d47ab1_minutely.csv",
"test_data/b9962cd1_minutely.csv"]

def _read_lstm_files() -> List:
  lstm_dfs = []

  for file in lstm_csv_files:
    lstm_dfs.append(pd.read_csv(file))

  return lstm_dfs
    

## Define constants and mappings

In [7]:
from dataclasses import dataclass
from typing import Optional
from collections import defaultdict

@dataclass
class BackTestColumnValues:
  entries : Optional[str]
  exits   : Optional[str]
  signal  : int

LSTM_TRIGGER_MAP = defaultdict(
  lambda            : BackTestColumnValues(None             , None            , 0 ), 
{
  "1 day passed"    : BackTestColumnValues(None             , "1 day passed"  , 0 ),
  "lstm_open-long"  : BackTestColumnValues("lstm_open-long" , None            , 1 ),
  "lstm_open-short" : BackTestColumnValues("lstm_open-short", None            , -1),  
  "N/A"             : BackTestColumnValues(None             , None            , 0 ),
  "stop loss"       : BackTestColumnValues(None             , "stop loss"     , 0 ),
  "take profit"     : BackTestColumnValues(None             , "take profile"  , 0 ),
})



## Define the transformation 

In [8]:
def _map_lstm_trigger_to_signal(trigger: str) -> Optional[int]:
  return LSTM_TRIGGER_MAP[trigger].signal


def _map_lstm_trigger_to_entries(trigger: str) -> Optional[str]:
  return LSTM_TRIGGER_MAP[trigger].entries


def _map_lstm_trigger_to_exits(trigger: str) -> Optional[str]:
  return LSTM_TRIGGER_MAP[trigger].exits


def _transform_lstm_dfs(lstm_dfs: List):
  for lstm_df in lstm_dfs:    
    lstm_df.rename(columns={'price': 'Close'}, inplace=True)

    lstm_df['trigger']  = lstm_df['trigger'].astype(str)
    lstm_df['entries']  = lstm_df['trigger'].apply(_map_lstm_trigger_to_entries)
    lstm_df['exits']    = lstm_df['trigger'].apply(_map_lstm_trigger_to_exits)
    lstm_df['signal']   = lstm_df['trigger'].apply(_map_lstm_trigger_to_signal)    
    lstm_df['time']     = pd.to_datetime(lstm_df['time'], unit='s').dt.strftime('%Y-%m-%dT%H:%M:%S.%fZ')

## The starting point of backtesting LSTM CSV files

In [9]:
lstm_dfs = _read_lstm_files()
_transform_lstm_dfs(lstm_dfs)

# Run the backtest on the lstm entries from a single dataframe
Based on looking at the stats 1 and 4 had the best results and were fairly similar. I'm going to run this on 1. You can choose to optimize it on any of them.

In [15]:
test_df = lstm_dfs[1] # <-- Change this to the index of the dataframe 0,1,2,3,4 you want to plot
test_df.index = pd.to_datetime(test_df['time']) 
test_df.rename(columns={'open': 'Open', 'high': 'High', 'low': 'Low', 'close': 'Close'}, inplace=True)
test_df.sort_index(inplace=True)

In [17]:
print(f'The first index in the dataframe is {test_df.index[0]}, the second index is {test_df.index[-1]}')

The first index in the dataframe is 2021-01-09 00:01:00+00:00, the second index is 2023-05-10 22:44:00+00:00


In [18]:
start = "2021-01-09"  # Note the strategy requires a warmup period for the ATR to calculate first trades begin after 14 days
end = "2022-05-10"  # It will always close any open trades at the end of the backtest
bt = Backtest(
    test_df.loc[start:end],
    LONG_SHORT_Underwater_w_decay_and_deleverage,
    cash=100_000_000,
    exclusive_orders=False,
    trade_on_close=True,
    margin=1,
)
stats = bt.run()
filename = create_filename(stats)
print(stats)
bt.plot(resample=False, filename=filename)

# Optimize the parameters

In [None]:
stats, heatmap = bt.optimize(
    initial_position_size = np.arange(0.3, 0.6, 0.1).tolist(),
    percent_invested_threshold = np.arange(0.3, 0.8, 0.1).tolist(),
    atr_length = np.arange(12,20,2).tolist(), # 14 days
    atr_multiplier = np.arange(0.3, 0.5, 0.1).tolist(),
    add_size = np.arange(0.05,0.20, 0.05).tolist(),
    delay_period = np.arange(250,750,250).tolist(),
    delta_time = np.arange(500,1000,100).tolist(),
    upper_bound_profit_target = np.arange(0.005, 0.04, 0.005).tolist(),
    lower_bound_loss_threshold = np.arange(-0.10, 0.00, 0.02).tolist(),
    # take_profit_loss_reduction = np.arange(-0.15, -0.05, 0.05).tolist(), # This is the amount that the take profit is reduced by if the position is highly leveraged and we wish to trim
    deleverage_pct = np.arange(0.25, 0.75, 0.25).tolist(), # This is the amount that the position is reduced by if the position is highly leveraged and we wish to trim
    max_loss_threshold = np.arange(-0.15, -0.05, 0.05).tolist(),
    max_hold_length = np.arange(60*8, 60*48, 60*4).tolist(), # 8 hours to 48 hours
    maximize='Equity Final [$]', 
    # maximize='Max. Drawdown [%]',# this can be any of the column names from the stats table the output of the backtest
    # maximize='Win Rate [%]',
    max_tries=200,
    random_state=0,
    
    return_heatmap=True)
best_params = stats._strategy.__dict__["_params"] # This will print out all the parameters used for the best backtest
print(f'Best Parameters: {best_params}')
heatmap.sort_values(ascending=False).iloc[:5] # print the top 5 parameter sets

Backtest.optimize:   0%|          | 0/13 [00:00<?, ?it/s]

Best Parameters: {'initial_position_size': 0.4, 'percent_invested_threshold': 0.5, 'atr_length': 12, 'atr_multiplier': 0.4, 'add_size': 0.05, 'delay_period': 250, 'delta_time': 900, 'upper_bound_profit_target': 0.02, 'lower_bound_loss_threshold': -0.1, 'deleverage_pct': 0.5, 'max_loss_threshold': -0.09999999999999999, 'max_hold_length': 1680}


initial_position_size  percent_invested_threshold  atr_length  atr_multiplier  add_size  delay_period  delta_time  upper_bound_profit_target  lower_bound_loss_threshold  deleverage_pct  max_loss_threshold  max_hold_length
0.4                    0.5                         12          0.4             0.05      250           900         0.020                      -0.10                       0.50            -0.10               1680               1.354023e+08
                       0.4                         12          0.4             0.10      500           600         0.025                      -0.06                       0.50            -0.10               1680               1.298580e+08
0.5                    0.6                         18          0.3             0.05      250           600         0.030                      -0.06                       0.25            -0.10               2400               1.293923e+08
0.3                    0.6                         18          0

In [None]:
stats

Start                     2021-01-09 00:01...
End                       2022-03-10 23:59...
Duration                    425 days 23:58:00
Exposure Time [%]                   80.743318
Equity Final [$]                 135402323.09
Equity Peak [$]                  149505592.19
Return [%]                          35.402323
Buy & Hold Return [%]               -3.068876
Return (Ann.) [%]                   29.651709
Volatility (Ann.) [%]               39.092791
Sharpe Ratio                         0.758496
Sortino Ratio                        1.479336
Calmar Ratio                          1.19009
Max. Drawdown [%]                  -24.915526
Avg. Drawdown [%]                   -0.503028
Max. Drawdown Duration      113 days 10:42:00
Avg. Drawdown Duration        0 days 23:10:00
# Trades                                 1540
Win Rate [%]                        58.376623
Best Trade [%]                      13.150986
Worst Trade [%]                    -26.565973
Avg. Trade [%]                    

In [None]:
# Plot the heatmaps
plot_heatmaps(heatmap, agg='mean')


Run the best strategy

In [None]:
bt.run(**best_params)
bt.plot(resample='2h', filename=f'{filename}_optimized.html')

In [None]:
best_params

{'initial_position_size': 0.4,
 'percent_invested_threshold': 0.5,
 'atr_length': 12,
 'atr_multiplier': 0.4,
 'add_size': 0.05,
 'delay_period': 250,
 'delta_time': 900,
 'upper_bound_profit_target': 0.02,
 'lower_bound_loss_threshold': -0.1,
 'deleverage_pct': 0.5,
 'max_loss_threshold': -0.09999999999999999,
 'max_hold_length': 1680}

Run the full backtest on the entire period but use the params that were optimized on 2021 data

In [None]:
# run it on the full period
start = "2021-01-09"  # Note the strategy requires a warmup period for the ATR to calculate first trades begin after 14 days
end = "2022-03-10"  # It will always close any open trades at the end of the backtest
bt = Backtest(
    test_df, #.loc[start:end],
    LONG_SHORT_Underwater_w_decay_and_deleverage,
    cash=100_000_000,
    exclusive_orders=False,
    trade_on_close=True,
    margin=1,
)
stats = bt.run(**best_params)
filename = create_filename(stats)
print(stats)
bt.plot(resample='2h', filename=filename)

Start                     2021-01-09 00:01...
End                       2023-05-10 23:06...
Duration                    851 days 23:05:00
Exposure Time [%]                   86.873118
Equity Final [$]                 104256409.31
Equity Peak [$]                  187790963.81
Return [%]                           4.256409
Buy & Hold Return [%]              -31.967011
Return (Ann.) [%]                    1.801761
Volatility (Ann.) [%]               37.270378
Sharpe Ratio                         0.048343
Sortino Ratio                        0.071419
Calmar Ratio                         0.036554
Max. Drawdown [%]                  -49.290922
Avg. Drawdown [%]                   -0.755592
Max. Drawdown Duration      325 days 17:40:00
Avg. Drawdown Duration        1 days 23:24:00
# Trades                                 2098
Win Rate [%]                        53.813155
Best Trade [%]                      10.776911
Worst Trade [%]                    -32.653414
Avg. Trade [%]                    

# Test on different periods of time Possibly set up a cross validation
Nice youtube video here https://www.youtube.com/watch?v=9m987swadQU&t=2154s&ab_channel=ChadThackray