The following can only be run once per kernel restart

In [3]:
import multiprocessing as mp
mp.set_start_method('fork')

In [4]:
import pandas as pd
import numpy as np
import logging
from backtesting import Backtest
from backtesting.lib import resample_apply, plot_heatmaps, TRADES_AGG
logging.basicConfig(level=logging.WARNING, format='%(asctime)s %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p')

from strategies import UnderwaterStrategy


In [13]:
OUTPUT_DIR                = "html_files"
OUTPUT_FILE_NAME_PREFIX   = "strat"
OUTPUT_FILE_NAME_PATTERN  = f'{OUTPUT_DIR}/{OUTPUT_FILE_NAME_PREFIX}'
OUTPUT_FILE_EXTENSION     = ".html"

In [14]:
import os

# This will be handy when we want to plot so it doesn't store all the html files in the root
def create_filename(stats_df):
    # Check if the folder html_files exists, if not create it
    if not os.path.exists(OUTPUT_DIR):
      os.makedirs(OUTPUT_DIR)
    
    filename  = str(OUTPUT_FILE_NAME_PATTERN)
    params    = stats_df._strategy.__dict__['_params']

    # unpack the best params and add them to a filename
    for key, value in params.items():
        # keep the first 5 letters from each key
        key       = key[:5]
        # just keep the first 4 digits from each value
        value     = str(value)[:4]
        filename += f'_{key}_{value}_'

    filename += OUTPUT_FILE_EXTENSION
    
    return filename


# Add in our signals
Run the strategies long and short based on our own AI buy and sell signals. Use it as an opportunity to improve the results of our existing AI models.
#### Read in a dataframe of our signals
prep the dataframe so it has the `Close` price, the `signal` column. 
For this we can create an `entries` column and an `exits` column.

| trigger (from CSV)  | entries          | exits            | signal |
| --------            | --------         | --------         | --     |
| 1 day passed        | None             | 1 day passed     | 0      |
| lstm_open-long      | lstm_open-long   | None             | 1      |
| lstm_open-short     | lstm_open-short  | None             | -1     |
| NA                  | None             | None             | 0      |
| stop loss           | None             | stop loss        | 0      |
| take profit         | None             | take profit      | 0      |

                      

We can then tweak our code above to read from the `entries` column instead of the `signal` column. We will be ignoring the LSTM exits for now because the backtest strategy takes over once we are in a trade. Perhaps this is something we will change in the future but for now we can keep it as is.


# Import the trades dataframe and manipulate it.

## Step 1 - Read in the CSV files

In [6]:
from typing import List
lstm_csv_files = ["test_data/2dc0f4b2_minutely.csv",
"test_data/3dca0a12_minutely.csv",
"test_data/9295144c_minutely.csv",
"test_data/b2d47ab1_minutely.csv",
"test_data/b9962cd1_minutely.csv"]

def _read_lstm_files() -> List:
  lstm_dfs = []

  for file in lstm_csv_files:
    lstm_dfs.append(pd.read_csv(file))

  return lstm_dfs
    

## Define constants and mappings

In [7]:
from dataclasses import dataclass
from typing import Optional
from collections import defaultdict

@dataclass
class BackTestColumnValues:
  entries : Optional[str]
  exits   : Optional[str]
  signal  : int

LSTM_TRIGGER_MAP = defaultdict(
  lambda            : BackTestColumnValues(None             , None            , 0 ), 
{
  "1 day passed"    : BackTestColumnValues(None             , "1 day passed"  , 0 ),
  "lstm_open-long"  : BackTestColumnValues("lstm_open-long" , None            , 1 ),
  "lstm_open-short" : BackTestColumnValues("lstm_open-short", None            , -1),  
  "N/A"             : BackTestColumnValues(None             , None            , 0 ),
  "stop loss"       : BackTestColumnValues(None             , "stop loss"     , 0 ),
  "take profit"     : BackTestColumnValues(None             , "take profile"  , 0 ),
})



## Define the transformation 

In [8]:
def _map_lstm_trigger_to_signal(trigger: str) -> Optional[int]:
  return LSTM_TRIGGER_MAP[trigger].signal


def _map_lstm_trigger_to_entries(trigger: str) -> Optional[str]:
  return LSTM_TRIGGER_MAP[trigger].entries


def _map_lstm_trigger_to_exits(trigger: str) -> Optional[str]:
  return LSTM_TRIGGER_MAP[trigger].exits


def _transform_lstm_dfs(lstm_dfs: List):
  for lstm_df in lstm_dfs:    
    lstm_df.rename(columns={'price': 'Close'}, inplace=True)

    lstm_df['trigger']  = lstm_df['trigger'].astype(str)
    lstm_df['entries']  = lstm_df['trigger'].apply(_map_lstm_trigger_to_entries)
    lstm_df['exits']    = lstm_df['trigger'].apply(_map_lstm_trigger_to_exits)
    lstm_df['signal']   = lstm_df['trigger'].apply(_map_lstm_trigger_to_signal)    
    lstm_df['time']     = pd.to_datetime(lstm_df['time'], unit='s').dt.strftime('%Y-%m-%dT%H:%M:%S.%fZ')

## The starting point of backtesting LSTM CSV files

In [9]:
lstm_dfs = _read_lstm_files()
_transform_lstm_dfs(lstm_dfs)

# Run the backtest on the lstm entries from a single dataframe
Based on looking at the stats 1 and 4 had the best results and were fairly similar. I'm going to run this on 1. You can choose to optimize it on any of them.

In [15]:
test_df = lstm_dfs[1] # <-- Change this to the index of the dataframe 0,1,2,3,4 you want to plot
test_df.index = pd.to_datetime(test_df['time']) 
test_df.rename(columns={'open': 'Open', 'high': 'High', 'low': 'Low', 'close': 'Close'}, inplace=True)
test_df.sort_index(inplace=True)

In [17]:
print(f'The first index in the dataframe is {test_df.index[0]}, the second index is {test_df.index[-1]}')

The first index in the dataframe is 2021-01-09 00:01:00+00:00, the second index is 2023-05-10 22:44:00+00:00


In [18]:
start = "2021-01-09"  # Note the strategy requires a warmup period for the ATR to calculate first trades begin after 14 days
end = "2022-05-10"  # It will always close any open trades at the end of the backtest
bt = Backtest(
    test_df.loc[start:end],
    UnderwaterStrategy,
    cash=100_000_000,
    exclusive_orders=False,
    trade_on_close=True,
    margin=1,
)
stats = bt.run()
filename = create_filename(stats)
print(stats)
bt.plot(resample=False, filename=filename)

Start                     2021-01-09 00:01...
End                       2022-05-10 23:59...
Duration                    486 days 23:58:00
Exposure Time [%]                   95.717824
Equity Final [$]                 113415930.01
Equity Peak [$]                  114930581.51
Return [%]                           13.41593
Buy & Hold Return [%]              -23.764112
Return (Ann.) [%]                    9.894885
Volatility (Ann.) [%]               27.848392
Sharpe Ratio                         0.355313
Sortino Ratio                        0.576707
Calmar Ratio                         0.418321
Max. Drawdown [%]                  -23.653836
Avg. Drawdown [%]                   -0.969412
Max. Drawdown Duration      212 days 12:25:00
Avg. Drawdown Duration        3 days 22:34:00
# Trades                                  825
Win Rate [%]                        27.878788
Best Trade [%]                      20.022618
Worst Trade [%]                    -77.879826
Avg. Trade [%]                    

# Optimize the parameters

In [25]:
stats, heatmap = bt.optimize(
    initial_position_size = np.arange(0.3, 0.6, 0.1).tolist(),
    percent_invested_threshold = np.arange(0.3, 0.8, 0.1).tolist(),
    atr_length = np.arange(12,20,2).tolist(), # 14 days
    atr_multiplier = np.arange(0.3, 0.5, 0.1).tolist(),
    add_size = np.arange(0.05,0.20, 0.05).tolist(),
    delay_period = np.arange(250,750,250).tolist(),
    delta_time = np.arange(500,1000,100).tolist(),
    upper_bound_profit_target = np.arange(0.005, 0.04, 0.005).tolist(),
    lower_bound_loss_threshold = np.arange(-0.10, 0.00, 0.02).tolist(),
    # take_profit_loss_reduction = np.arange(-0.15, -0.05, 0.05).tolist(), # This is the amount that the take profit is reduced by if the position is highly leveraged and we wish to trim
    deleverage_pct = np.arange(0.25, 0.75, 0.25).tolist(), # This is the amount that the position is reduced by if the position is highly leveraged and we wish to trim
    max_loss_threshold = np.arange(-0.15, -0.05, 0.05).tolist(),
    # max_hold_length = np.arange(60*8, 60*48, 60*4).tolist(), # 8 hours to 48 hours
    # maximize='Equity Final [$]', 
    # maximize='Max. Drawdown [%]',# this can be any of the column names from the stats table the output of the backtest
    # maximize='Win Rate [%]',
    maximize = 'Sortino Ratio',
    max_tries=200,
    random_state=0,
    
    return_heatmap=True)
best_params = stats._strategy.__dict__["_params"] # This will print out all the parameters used for the best backtest
print(f'Best Parameters: {best_params}')
heatmap.sort_values(ascending=False).iloc[:5] # print the top 5 parameter sets

Backtest.optimize:   0%|          | 0/13 [00:00<?, ?it/s]

Best Parameters: {'initial_position_size': 0.3, 'percent_invested_threshold': 0.7000000000000002, 'atr_length': 14, 'atr_multiplier': 0.4, 'add_size': 0.1, 'delay_period': 500, 'delta_time': 800, 'upper_bound_profit_target': 0.02, 'lower_bound_loss_threshold': -0.1, 'deleverage_pct': 0.25, 'max_loss_threshold': -0.15}


initial_position_size  percent_invested_threshold  atr_length  atr_multiplier  add_size  delay_period  delta_time  upper_bound_profit_target  lower_bound_loss_threshold  deleverage_pct  max_loss_threshold
0.3                    0.7                         14          0.4             0.10      500           800         0.020                      -0.10                       0.25            -0.15                 1.152340
0.5                    0.5                         16          0.3             0.05      250           800         0.005                      -0.08                       0.25            -0.15                 0.872284
                       0.4                         12          0.4             0.15      250           800         0.005                      -0.08                       0.25            -0.10                 0.817457
0.3                    0.7                         18          0.4             0.10      250           700         0.040                      -0

In [26]:
stats

Start                     2021-01-09 00:01...
End                       2023-05-10 22:44...
Duration                    851 days 22:43:00
Exposure Time [%]                   84.593056
Equity Final [$]                 143810876.65
Equity Peak [$]                  179890794.05
Return [%]                          43.810877
Buy & Hold Return [%]              -32.222499
Return (Ann.) [%]                   16.841889
Volatility (Ann.) [%]               26.568483
Sharpe Ratio                         0.633905
Sortino Ratio                         1.15234
Calmar Ratio                         0.765313
Max. Drawdown [%]                  -22.006543
Avg. Drawdown [%]                   -0.484056
Max. Drawdown Duration      181 days 23:37:00
Avg. Drawdown Duration        1 days 04:33:00
# Trades                                 2435
Win Rate [%]                        57.782341
Best Trade [%]                      15.084097
Worst Trade [%]                    -17.398172
Avg. Trade [%]                    

In [27]:
# Plot the heatmaps
plot_heatmaps(heatmap, agg='mean')


Run the best strategy

In [28]:
bt.run(**best_params)
bt.plot(resample='2h', filename=f'{filename}_optimized.html')

The best parameters. NOTE if you find some good ones place them in the `best_params.py` file

In [29]:
best_params

{'initial_position_size': 0.3,
 'percent_invested_threshold': 0.7000000000000002,
 'atr_length': 14,
 'atr_multiplier': 0.4,
 'add_size': 0.1,
 'delay_period': 500,
 'delta_time': 800,
 'upper_bound_profit_target': 0.02,
 'lower_bound_loss_threshold': -0.1,
 'deleverage_pct': 0.25,
 'max_loss_threshold': -0.15}

Run the full backtest on the entire period but use the params that were optimized on 2021 data

In [32]:
# run it on the full period
start = "2021-01-09"  # Note the strategy requires a warmup period for the ATR to calculate first trades begin after 14 days
end = "2023-05-10"  # It will always close any open trades at the end of the backtest
bt = Backtest(
    test_df, #.loc[start:end],
    UnderwaterStrategy,
    cash=100_000_000,
    exclusive_orders=False,
    trade_on_close=True,
    margin=1, # 0.5=2x leverage 1 = 1x leverage, .1 = 10x leverage
)
stats = bt.run(**best_params) # Here we are using the best parameters from the optimization
filename = create_filename(stats)
print(stats)
bt.plot(resample='2h', filename=filename) # Note we are resampling to 2hours feel free to set it to FALSE to see how it really trades

Start                     2021-01-09 00:01...
End                       2023-05-10 22:44...
Duration                    851 days 22:43:00
Exposure Time [%]                   84.611152
Equity Final [$]                   1147357.54
Equity Peak [$]                   101523447.4
Return [%]                         -98.852642
Buy & Hold Return [%]              -32.222499
Return (Ann.) [%]                  -85.250827
Volatility (Ann.) [%]              222.529001
Sharpe Ratio                          -0.3831
Sortino Ratio                        -0.55802
Calmar Ratio                        -0.860171
Max. Drawdown [%]                  -99.109167
Avg. Drawdown [%]                  -20.237501
Max. Drawdown Duration      833 days 21:05:00
Avg. Drawdown Duration      166 days 18:41:00
# Trades                                 2446
Win Rate [%]                        56.950123
Best Trade [%]                      26.195105
Worst Trade [%]                    -25.935411
Avg. Trade [%]                    

You can also run the strategy with a `LONG_ONLY` or `SHORT_ONLY` mode. 

In [None]:
UnderwaterStrategy.trade_type = 'LONG_ONLY'
# run it on the full period
start = "2021-01-09"  # Note the strategy requires a warmup period for the ATR to calculate first trades begin after 14 days
end = "2023-05-10"  # It will always close any open trades at the end of the backtest
bt = Backtest(
    test_df, #.loc[start:end],
    UnderwaterStrategy,
    cash=100_000_000,
    exclusive_orders=False,
    trade_on_close=True,
    margin=1, # 0.5=2x leverage 1 = 1x leverage, .1 = 10x leverage
)
stats = bt.run(**best_params) # Here we are using the best parameters from the optimization
filename = create_filename(stats)
print(stats)
bt.plot(resample='2h', filename=filename) # Note we are resampling to 2hours feel free to set it to FALSE to see how it really trades

#### Go up a few cells and try optimizing the backtest as a long only or short only

# Test on different periods of time Possibly set up a cross validation
Nice youtube video here https://www.youtube.com/watch?v=9m987swadQU&t=2154s&ab_channel=ChadThackray