### **Backtesting: We look at incorporating a _Tailored Designed_ algos, as an ENTRY signal.**
**Features**
- Resamples 1 minute data from a local/CSV to 15 minutes. (currenlty using h5 files saved on Google-Drive)
- Uses Numba, TA-Lib (and for the cherry on top, joblib), to further speed up the computation.
- VectorBT Indicator Factory to comprise all the smaller indicators into one super indicator (Numba, TA-Lib and Joblib for speed) as we deal with various ranges of parameters.
- Dynamic take profit - Generating exit signals using Moving Average cloud cross and MACD Histogram.
- Trailing stop loss.
- Cross validates, by filtering entries on odd dates and compare the best optimized results with entries on the even dates.
- Plotting with Seaborn.

In [None]:
ENABLE_STOCHASTIC = True  # if True TD entries take STOCHASTIC values into account whether STOCHASTIC is based on RSI or HLC values else only TD Sequential is used for entries.
ENABLE_TRAILING_SL = False  # if True portfolio is simulated using Trailing Stop Loss, else it is done using basic Stop Loss

ENABLE_TP_VALUE = False  # if True uses Take profit values else uses Dynamic TP (IF Dynamic TP IS NOT FALSE)
DYNAMIC_TP_SETTING = 'both'  #'ma_cloud', 'macd', 'both' or False
"""
if 'ma_cloud', exit signals are generated using cloud strategy
if 'macd' exit signals are generated when histogram crosses below 0
if 'both' exit signals are generated using ma_cloud + macd
if False exit only based on stop loss or trailing stop loss
"""
# ----------------------------------------------------------------
# Note: If both TP values are utilized then signals are calculated in combination
# Take profit values + cloud or take profit values + both etc.
# ----------------------------------------------------------------

STOP_ORDER_SETTING = 'percent'  # absoulte or percent value
"""
if 'absolute' use absolute terms
if 'percent' use percentage where 0.01 == 1%
"""

In [None]:
# Importing libraries
import vectorbtpro as vbt
import numpy as np
import pandas as pd

import seaborn  # conda install -y -c anaconda seaborn
import talib

from datetime import datetime
from joblib import Memory  # conda install -y joblib
from numba import njit

import gdown  # conda install -y -c conda-forge gdown
import os
# esnure widgets are installed to see progress bar:
# conda install -y -c conda-forge ipywidgets=7.7.2

memory = Memory(location='joblib_cache_dir', verbose=0)

In [None]:
# Vectorbt setting for dark theme for plotting.
vbt.settings.set_theme("dark")

In [None]:
SELECT_DATA_TYPE = 'spx'  # select from 'spx', 'wti'
SELECT_DATA_YEAR = 2018  # select from 2014-2021


In [None]:
# TODO: add to git ignore
SELECT_DOWNLOAD_DATA = 'all'
# Set to 'all' if all h5 files should be download from drive (warning: large file > 300MB)
# Set to 'select' if only the SELECT_DATA_YEAR file should be downloaded
MY_FILENAME_CSV = str(SELECT_DATA_TYPE) + '_' + str(SELECT_DATA_YEAR) + '.csv'
MY_FILENAME_H5 = str(SELECT_DATA_TYPE) + '_' + str(
    SELECT_DATA_YEAR) + '_data.h5'

# Data folder: https://drive.google.com/drive/u/0/folders/1iyHMZoMDNrqNenjEECNPpGlb_k5AKbhn
DRIVE_LINKS = {}
DRIVE_LINKS[2014] = '1Na9UJiDabeKdVSuvk6B_5VASYpqrtmHm'
DRIVE_LINKS[2015] = '1ppCGhAqFnn-08jGnafdnYE6lrsKUscBX'
DRIVE_LINKS[2016] = '1sYXweNNBRyx1_EEdELEpw1sTBJo5-jdB'
DRIVE_LINKS[2017] = '1Cf08sqBuYFaE1zqJGC3QdU2wHY0ukZGS'
DRIVE_LINKS[2018] = '1R1ZNr7P2d0U-Re_Xg0zp7EVwZgpk4zHm'
DRIVE_LINKS[2019] = '1zqpRGZFmAlhjT-VyJtRcoXY5MTW3jGsU'
DRIVE_LINKS[2020] = '1JVEEXRd8J1EOv2DZmm-fiB7FpJTdIsm6'
DRIVE_LINKS[2021] = '1THZnlU65a_v4Q72zN-yFh3UNspbegCax'

# Check if filename hdf exists in local directory
if os.path.isfile('data/' + MY_FILENAME_H5) == True:
    print(f"The file '{MY_FILENAME_H5}' exists in the directory.")

else:
    print(f"The file '{MY_FILENAME_H5}' does not exist in the directory")

    if SELECT_DOWNLOAD_DATA == 'select':
        print("Downloading one year dataset")
        output = 'data/' + MY_FILENAME_H5
        id = DRIVE_LINKS[SELECT_DATA_YEAR]
        gdown.download(id=id, output=output, quiet=False)

    elif SELECT_DOWNLOAD_DATA == 'all':
        print("Downloading entire dataset")
        id = '1Bdasv20K1jlf6pKJe7TR80kFlakBHagy'
        gdown.download_folder(id=id, quiet=True, use_cookies=False)

    else:
        print("Checking for CSV file")

        if os.path.isfile(MY_FILENAME_CSV) == False:
            print(
                f"The file '{MY_FILENAME_CSV}' does not exist in the current directory. Please download manually"
            )
        else:
            # Read from local csv
            csv = pd.read_csv(MY_FILENAME_CSV, index_col=0, parse_dates=True)
            data = vbt.Data.from_data({'close': csv})
            h1_data = data.to_hdf('data/' + MY_FILENAME_H5)


In [None]:
# resmaple to 15min
h1_data = vbt.HDFData.fetch('data/' + MY_FILENAME_H5)
m15_data = h1_data.resample('15T')
m15_data = m15_data.get().dropna()  # Drop NaN values for every column

m15_data = m15_data.assign(even=pd.Series(
    m15_data.index.day %
    2 == 0).values)  # Append a bool column where values for even days are True
m15_data = m15_data.assign(odd=pd.Series(
    m15_data.index.day %
    2 != 0).values)  # Append a bool column where values for odd days are True

high = m15_data.get('high')
low = m15_data.get('low')
close = m15_data.get('close')

In [None]:
# Example "Tailored Designed" inidcator, detect 3 candles up in a row
@memory.cache  # For faster computation and memory caching to restore from memory
@njit(nogil=True)  # Using Numba for compiling
def tds_nb(source, max_count=3, signal_count=3):
    dna = np.full(source.shape, 0)
    upa = np.full(source.shape, 0)
    buy_sig = np.full(source.shape, False)
    sell_sig = np.full(source.shape, False)

    for i in range(4, source.shape[0]):

        if source[i] < source[i - 1]:
            upa[i] = upa[i - 1] + 1  # increment by 1
            if upa[i] > max_count:
                upa[i] = 1  # wrap count back to 1, on max count
            if upa[i] == signal_count:
                buy_sig[i] = True

        if source[i] > source[i - 1]:
            dna[i] = dna[i - 1] + 1  # increment by 1
            if dna[i] > max_count:
                dna[i] = 1  # wrap count back to 1, on max count
            if dna[i] == signal_count:
                sell_sig[i] = True

    return buy_sig, sell_sig


In [None]:
@memory.cache
def stochastic_rsi(source, rsi_period, slowd_period, rsi_oversold_level):
    fastk_period = rsi_period
    slowk_period = slowd_period

    RSI = talib.RSI(close, rsi_period)
    slowk, slowd = talib.STOCH(RSI, RSI, RSI, fastk_period, slowk_period,
                               slowd_period)

    buy_strsi = np.where(slowk <= rsi_oversold_level, True, False)

    return buy_strsi


In [None]:
# Create more inidcators, these can be used for smarter/dynamic exits for optimal take-proit
@memory.cache
def ma_indicator(source, fastma_period, slowma_period):
    fast_ma = talib.SMA(close, timeperiod=fastma_period)
    slow_ma = talib.SMA(close, timeperiod=slowma_period)

    return fast_ma, slow_ma


@memory.cache
def macd_indicator(source, fastmacd_period, slowmacd_period,
                   signalmacd_period):
    macd, macdsignal, macdhist = talib.MACD(close,
                                            fastperiod=fastmacd_period,
                                            slowperiod=slowmacd_period,
                                            signalperiod=signalmacd_period)

    return macdhist


In [None]:
# Indicator function for entries and exits computation, uses TD Sequential, Stochastic RSI, MA and MACD


def super_indicator(source, max_count, signal_count, rsi_period, slowd_period,
                    rsi_oversold_level, fastma_period, slowma_period,
                    fastmacd_period, slowmacd_period, signalmacd_period):

    buy_tds, _ = tds_nb(source, max_count, signal_count)
    buy_strsi = stochastic_rsi(source, rsi_period, slowd_period,
                               rsi_oversold_level)
    fast_ma, slow_ma = ma_indicator(source, fastma_period, slowma_period)
    macdh = macd_indicator(source, fastmacd_period, slowmacd_period,
                           signalmacd_period)

    return buy_tds, buy_strsi, fast_ma, slow_ma, macdh


In [None]:
# Create IF template, so we can generate many parameters later
super_indicator_vbt = vbt.IF(
    class_name='super_indicator_vbt',
    short_name='super_indicator_vbt',
    prepend_name=True,
    input_names=['close'],
    param_names=[
        'max_count', 'signal_count', 'rsi_period', 'slowd_period',
        'rsi_oversold_level', 'fastma_period', 'slowma_period',
        'fastmacd_period', 'slowmacd_period', 'signalmacd_period'
    ],
    output_names=['buy_tds', 'buy_strsi', 'fast_ma', 'slow_ma',
                  'macdh']).with_apply_func(super_indicator,
                                            takes_1d=True,
                                            max_count=3,
                                            signal_count=3,
                                            rsi_period=14,
                                            slowd_period=3,
                                            rsi_oversold_level=30,
                                            fastma_period=30,
                                            slowma_period=100,
                                            fastmacd_period=12,
                                            slowmacd_period=26,
                                            signalmacd_period=9)

In [None]:
# Compute all math/inidcators needed
super_indicator = super_indicator_vbt.run(
    close,
    max_count=3,
    signal_count=3,
    rsi_period=40,
    slowd_period=4,
    rsi_oversold_level=30,
    fastma_period=10,
    slowma_period=21,
    fastmacd_period=np.arange(8, 13),
    slowmacd_period=np.arange(13, 28),
    signalmacd_period=np.arange(7, 30),
    param_product=True,
    execute_kwargs=dict(show_progress=True))


In [None]:
# Entries:  Find best parameters for entries on ODD days
# Filter out stochastic entries based on setting input
if ENABLE_STOCHASTIC:
    entries = super_indicator.buy_tds_equal(
        1) & super_indicator.buy_strsi_equal(1)

elif not ENABLE_STOCHASTIC:
    entries = super_indicator.buy_tds_equal(1)

for x in range(entries.shape[1]):
    entries.iloc[:, x] = np.where(
        np.logical_and(entries.iloc[:, x] == True, m15_data['odd'] == True),
        True, False)  # Filter out entries for odd days only

# Exits using indicators, instead of hard take-profit levels
exits_ma = (super_indicator.close_crossed_below(super_indicator.fast_ma)
            & super_indicator.fast_ma_above(super_indicator.slow_ma)) | (
                super_indicator.close_crossed_below(super_indicator.slow_ma)
                & super_indicator.slow_ma_above(super_indicator.fast_ma))

exits_macd = super_indicator.macdh_crossed_below(0)

# Filter out dynamic take profit conditions based on setting input
if DYNAMIC_TP_SETTING == 'both':
    exits = exits_ma | exits_macd

elif DYNAMIC_TP_SETTING == 'ma_cloud':
    exits = exits_ma

elif DYNAMIC_TP_SETTING == 'macd':
    exits = exits_macd

else:
    exits = np.full(
        entries.shape, False
    )  # Create a numpy array of False values with the shape of entries


In [None]:
# Add Trailing Stop Loss, to reduce drawdown
# Filter out stop order values based on setting input
if ENABLE_TP_VALUE:
    tp = [20, 40, 60]

elif not ENABLE_TP_VALUE:
    tp = np.nan

if ENABLE_TRAILING_SL:

    if STOP_ORDER_SETTING == 'percent':
        # tsl = [0.001, 0.0005]
        # tsl = [0.001, 0.0005, 0.0001]
        tsl = [0.000333]

    else:
        # tsl = [1, 2, 3]
        tsl = [1, 2]

    sl = np.nan

elif not ENABLE_TRAILING_SL:
    tsl = np.nan

    if STOP_ORDER_SETTING == 'percent':
        # sl = [0.01, 0.02, 0.03]
        sl = [0.000333]
    else:
        # sl = [1, 2, 3]
        sl = [1, 2]

pf = vbt.Portfolio.from_signals(close=close,
                                entries=entries,
                                exits=exits,
                                size=2700 * 50,
                                size_type='value',
                                init_cash='auto',
                                sl_stop=vbt.Param(sl),
                                tsl_stop=vbt.Param(tsl),
                                tp_stop=vbt.Param(tp),
                                delta_format=STOP_ORDER_SETTING,
                                fixed_fees=20,
                                freq='15m')

In [None]:
# Save results to file

# Get current timestamp and suffix to filename
now = datetime.now()
timestamp_str = now.strftime("%Y-%m-%d-%H-%M-%S")
file_path = f'results {timestamp_str}'

param_result_matrix = pf.stats(
    ['total_return', 'max_dd', 'win_rate', 'sortino_ratio'], agg_func=None)
param_result_matrix = param_result_matrix.sort_values('Sortino Ratio',
                                                      ascending=False)

# only profitable
# param_result_matrix = param_result_matrix[
#     param_result_matrix['Total Return [%]'] > 1]

param_result_matrix.to_csv(f'{file_path}.csv')
param_result_matrix

In [None]:
# Plot scatter-matrix for

re_read_csv = pd.read_csv(f'{file_path}.csv')
seaborn.pairplot(re_read_csv, x_vars=['Sortino Ratio'], kind='reg')

In [None]:
# print best result values
best_result = param_result_matrix.sort_values('Sortino Ratio',
                                              ascending=False).iloc[0]
best_result

In [None]:
# Use best parameters from ODD days, and test on EVEN days (blind test!)
# If EVEN days are also profitable, we might have a good strategy

best_sl = best_result.name[0]
best_tsl = best_result.name[1]
best_tp = best_result.name[2]
best_td_max = best_result.name[3]
best_td_signal = best_result.name[4]
best_rsi_period = best_result.name[5]
best_sto_rsi_slowd = best_result.name[6]
best_oversold_level = best_result.name[7]
best_fastma_period = best_result.name[8]
best_slowma_period = best_result.name[9]
best_fastmacd_period = best_result.name[10]
best_slowmacd_period = best_result.name[11]
best_signalmacd_period = best_result.name[12]

super_indicator = super_indicator_vbt.run(
    close,
    max_count=best_td_max,
    signal_count=best_td_signal,
    rsi_period=best_rsi_period,
    slowd_period=best_sto_rsi_slowd,
    rsi_oversold_level=best_oversold_level,
    fastma_period=best_fastma_period,
    slowma_period=best_slowma_period,
    fastmacd_period=best_fastmacd_period,
    slowmacd_period=best_slowmacd_period,
    signalmacd_period=best_signalmacd_period)

best_entries = super_indicator.buy_tds_equal(
    1) & super_indicator.buy_strsi_equal(1)
best_entries = np.where(
    np.logical_and(best_entries == True, m15_data['even'] == True), True,
    False)  # Filter out entries for even days only

best_exits_ma = (
    super_indicator.close_crossed_below(super_indicator.fast_ma)
    & super_indicator.fast_ma_above(super_indicator.slow_ma)) | (
        super_indicator.close_crossed_below(super_indicator.slow_ma)
        & super_indicator.slow_ma_above(super_indicator.fast_ma))
best_exits_macd = super_indicator.macdh_crossed_below(0)

# Dynamic Take Profit setting
if DYNAMIC_TP_SETTING == 'both':
    best_exits = best_exits_ma | best_exits_macd

elif DYNAMIC_TP_SETTING == 'ma_cloud':
    best_exits = best_exits_ma

elif DYNAMIC_TP_SETTING == 'macd':
    best_exits = best_exits_macd

else:
    best_exits = np.full(
        best_entries.shape, False
    )  # Create a numpy array of False values with the shape of entries

pf1 = vbt.Portfolio.from_signals(close=close,
                                 entries=best_entries,
                                 exits=best_exits,
                                 size=2700 * 50,
                                 size_type='value',
                                 init_cash='auto',
                                 sl_stop=best_sl,
                                 tsl_stop=best_tsl,
                                 tp_stop=best_tp,
                                 delta_format=STOP_ORDER_SETTING,
                                 fixed_fees=20,
                                 freq='15m')

print(pf1.stats())

In [None]:
# fast plot
vbt.settings.plotting["use_resampler"] = True  # fast plots, but resmapling :(
pf1.plot().show()

# detailed plot
# vbt.settings.plotting["use_resampler"] = False
# fig = pf1.plot()  # 45s
# fig.update_layout(showlegend=False,
#                   hovermode=False)  # faster plots without re-smapling :)
# fig.write_html(fpath + '.html', config={'responsive': True})  # 18s

In [None]:
# Compare best result for entries taken on odd days with entries taken on even days
# Odd days - Optimize performance
print(f'Entries filtered on Odd Days \n{best_result}\n')

# Even days - Based on the best combination of parameters from odd days simulation
even_days_result = pf1.stats(
    ['total_return', 'max_dd', 'win_rate', 'sortino_ratio'])
print(f'Entries filtered on Even Days \n{even_days_result}')