# SINGLE INDICATOR THRESHOLD BACKTESTING PSEUDO-EXPERIMENTS

## code performs backtesting using a single indicator with a threshold crossing strategy:

* if indicator crosses under buy_threshold -> buy
* if indicator crosses over sell_threshold -> sell

All the money available is used for each and every trade starting with $100: money from first trade in full is injected into second trade etc...

This iteration of the code does not allow to loop over different currencies, or indicators, or timeframes, or indicator timeperiods. Pseudo-experiments are cpu time consumming by nature, therefore focusing on one configuration at a time is recommended.

Data is obtained from Coinbase Pro via their API and a Python wrapper module.

There should be data avaible prior to trade start datetime in order for the first trade data point to have meaningful indicator calculation (since indicator requires past data). This is why the code here and there does 2 days shifts.

## Various module imports (extra modules required: numpy, pandas, talib, tqdm)

In [1]:
import itertools
import os
import time
from datetime import datetime, timedelta
from random import randrange

import numpy as np
import pandas as pd
import talib as ta
from tqdm import tqdm

from coinbase_market import CryptoMarket

pd.set_option('display.max_columns', 5000)
pd.set_option('display.max_rows', 5000)
pd.set_option('display.width', 10000)

## Moving to whichever directory containing the data and output folders: 'data/', 'backtesting_output/'

In [2]:
cd D:\crypto\

D:\crypto


In [3]:
os.listdir('.')

['backtesting_output', 'data', 'metamask', 'theta']

## Various backtesting configurations

In [4]:
currency = 'link'  # see https://www.coingecko.com/en for currency symbol
buy_fee = 0.1  # in percent
sell_fee = 0.1  # in percent
initial_cash = 100

verbose = 0  # print-out verbosity: 0, 1, 2, 3

timeframe = 5  # timeframe in minute of one candle

indicator = 'RSI'  # see TA-lib documentation: https://mrjbq7.github.io/ta-lib/
ind_timeperiod = 50  # number of periods used to compute the indicator
ind_buy_min = 30  # scan lower buy limit
ind_buy_max = 40  # scan upper buy limit
ind_sell_min = 35  # scan lower sell limit
ind_sell_max = 55  # scan upper buy limit
ind_step_size = 0.5  # scan step size commong to both buy/sell 
n_ensemble = 10  # number of pseudo-experiments to run

minimum_nday_trade = 15  # minimum number of days to trade

data_first_datetime = datetime(2019, 7, 1, 0, 0, 0)  #  as far back as possible on Coinbase Pro

run_pseudo_experiment = True  # if False, use all the data minus first 2 days, not a random subet

trade_start_datetime = data_first_datetime + timedelta(days=2)  # by default start with earliest available data with 2 days shift
trade_end_datetime = datetime.utcnow()  # UTC-0 (Iceland) is the time zone of the data

nday_trade = trade_end_datetime-trade_start_datetime
nday_trade = nday_trade.days + nday_trade.seconds/3600/24

# Fetch market data

In [5]:
def fetch_data(curr, start_date):

    currency_exchange = ''

    if curr == 'bat' or curr == 'zec':
        currency_exchange = f'{curr.upper()}-USDC'
    else:
        currency_exchange = f'{curr.upper()}-USD'
        
    market_history = CryptoMarket(currency_exchange)
    market_history.get_currency_candle_history(start_date, 'now')
    
fetch_data(currency, data_first_datetime)    

File already exists. Fetching 0 day(s) 3 minute(s) of missing data since last download
2020-11-05 23:24:00 2020-11-05 23:28:00


# Trading function

In [6]:
def trade(df, start_date, stop_date, ind_pair_iterator, n_day):

        df_tmp = pd.DataFrame()
        
        for ind_pair in tqdm (list(ind_pair_iterator), desc="Looping over indicator pairs..."): 
                        
            cash = initial_cash
                
            ind_buy_th = ind_pair[0]
            ind_sell_th = ind_pair[1]
                        
            def trigger(df_copy, indicator):
                df_copy['trigger'] = np.empty(len(df)) * 0
                df_copy.loc[
                    (
                        (df_copy[f'{indicator}'] < ind_buy_th) 
                        & (df_copy.index > start_date)
                    ),
                    'trigger'] = 1  
                    
                df_copy.loc[
                    (
                        (df_copy[f'{indicator}'] > ind_sell_th) 
                    ),
                    'trigger'] = -1  

                df_copy = df_copy.loc[df_copy['trigger'] != 0]

                df_copy = df_copy.loc[df_copy['trigger'].shift() != df_copy['trigger']]
                 
                return df_copy
            
            df_copy = trigger(df.copy(), indicator)
            df_copy.dropna(inplace=True) 
           
            if verbose > 2:
                print(df_copy.head(10))            
            
            coin = 0            
            cash_list = []
            profit_list = []
            max_price_drop = []
            delta_t = []
            buy_price = 0
            tot_volume = 0
            n_trans = 0
            left_unsold = 0
            sell_date = datetime(2000, 1, 1, 0, 0, 0)

            bought = False
            sold = False    
                        
            for idx, row in df_copy.iterrows():

                if row['trigger'] == 1 and not bought and not sold:
                    buy_price = row['close']
                    buy_date = idx
                    coin = (1-buy_fee/100)*cash/buy_price
                    tot_volume += cash                     
                    bought = True
                    if verbose > 3 :
                        print('cash', round(cash,2), idx, 'buy price', row['close'], 'ind:', row[indicator])
                    cash = 0
                    continue
                elif  bought and not sold and row['trigger'] == -1:                    
                    sell_price = row['close']
                    cash = sell_price*(1-sell_fee/100)*coin
                    tot_volume += cash
                    coin = 0
                    sold = True
                    sell_date = idx
                    max_price_drop.append((df.loc[(df.index>buy_date) & (df.index<sell_date)]['close'].min()-buy_price)/buy_price*100)    
                    if verbose > 3:
                        print('cash', round(cash,2), idx, 'sell price', sell_price, 'ind:', row[indicator])
                        
                if bought and sold:
                    profit_list.append((sell_price-buy_price)/buy_price*100)
                    bought = False
                    sold = False
                    n_trans += 1
                    dtime = sell_date-buy_date
                    minutes = dtime.seconds/60+dtime.days*(24*60)
                    delta_t.append(minutes)
                    
            if bought and not sold:
                n_trans += 1
                cash = (1-sell_fee/100)*coin*df['close'].iloc[-1]
                dtime = df.index[-1]-buy_date
                sell_date = df.index[-1]
                minutes = dtime.seconds/60+dtime.days/(24*60)
                delta_t.append(minutes)
                left_unsold = 1
                profit_list.append((df['close'].iloc[-1]-buy_price)/buy_price*100)
                if verbose > 3:
                    print('unsold', sell_date, 'sell price', df['close'].iloc[-1])                
                                     
            try:
                new_row = {
                    'currency': currency, 
                    'timeframe': timeframe,
                    'ind_timeperiod': ind_timeperiod,
                    'indicator': indicator,
                    'ind_buy':ind_buy_th, 
                    'ind_sell':ind_sell_th,  
                    'ttime_mean': np.mean(np.array(delta_t))/60,  # from minute to hour
                    'ttime_max': np.max(np.array(delta_t))/60,  # from minute to hour
                    'ttime_min': np.min(np.array(delta_t))/60,  # from minute to hour
                    'ttime_std': np.std(np.array(delta_t))/60,  # from minute to hour
                    'tot_volume':tot_volume,                    
                    'profit_per_day':(cash-initial_cash)/initial_cash*100/n_day,  # in percent per day
                    'profit_ind_mean': np.mean(np.array(profit_list)),                                        
                    'profit_ind_max': np.max(np.array(profit_list)),                          
                    'profit_ind_min': np.min(np.array(profit_list)),
                    'profit_ind_std': np.std(np.array(profit_list)),                    
                    'cash_final':cash,                            
                    'n_trans': n_trans,
                    'max_price_drop': np.min(np.array(max_price_drop)),
                    'n_trans_per_day': n_trans/n_day,                  
                    'start_date': start_date,
                    'stop_date': stop_date,
                    'n_day': n_day,
                    'left_unsold': left_unsold
                    
                }
                if verbose > 1:
                    print(profit_list)
                df_tmp = df_tmp.append(new_row, ignore_index=True)
            except Exception as exc:
                if verbose > 0:
                    print("Exception occured:", exc.args[0])
                continue
                           

        return df_tmp

# Pseudo-experiments loop

In [None]:
df_results = pd.DataFrame()

df_data_raw = pd.read_csv('data/' + currency + '.csv', index_col='time', usecols=['time', 'close'])
df_data_raw.index = pd.to_datetime(df_data_raw.index)

df_data_raw = df_data_raw.resample('1Min').interpolate()
resample_logic = {'close': 'last'}
sampling_interval = str(timeframe) + 'Min'
df_data_raw = df_data_raw.resample(sampling_interval).apply(resample_logic)

df_data_raw[indicator] = getattr(ta, indicator)(df_data_raw['close'], ind_timeperiod)
df_data_raw.dropna(inplace=True)

ind_buy_list = np.arange(ind_buy_min, ind_buy_max, ind_step_size)
ind_sell_list = np.arange(ind_sell_min, ind_sell_max, ind_step_size)

for idx_ens in range(n_ensemble):        

        # identical iterator created each time to avoid rewinding it (not sure how the rewinding is done)
        ind_pair_iterator = itertools.product(ind_buy_list, ind_sell_list)
    
        if run_pseudo_experiment:
            # draw random start datetime
            ens_start_date = trade_start_datetime + \
                                timedelta(days=randrange(0, int(nday_trade-minimum_nday_trade), 1)) + \
                                timedelta(hours=randrange(-12, 12, 1)) + \
                                timedelta(minutes=randrange(-30, 30, 1)) + \
                                timedelta(seconds=randrange(-30, 30, 1))

            ndays_to_end = trade_end_datetime-ens_start_date
            ndays_to_end = ndays_to_end.days + ndays_to_end.seconds/3600/24
            ens_end_date = ens_start_date + timedelta(days=randrange(minimum_nday_trade, int(ndays_to_end), 1))        
        else:
            ens_start_date = trade_start_datetime
            ens_end_date = trade_end_datetime 

        ens_nday = ens_end_date - ens_start_date
        ens_nday = ens_nday.days + ens_nday.seconds/3600/24
            
        print('Ens:', idx_ens, ', start date:', ens_start_date, ', end date:', ens_end_date)            

        df_data_trade = df_data_raw.copy()
        df_data_trade = df_data_trade[df_data_trade.index>ens_start_date-timedelta(days=2)]  # -2 day for meaningful indicator
        df_data_trade = df_data_trade[df_data_trade.index<ens_end_date]

        df_trade = trade(df_data_trade, 
                         ens_start_date,
                         ens_end_date,
                         ind_pair_iterator,
                         ens_nday)
        
        if os.path.isfile('backtesting_output/trade_results.csv'):
            df_trade.to_csv('backtesting_output/trade_results.csv', mode='a', index=False, header=False)
        else:
            df_trade.to_csv('backtesting_output/trade_results.csv', mode='w', index=False)

Looping over indicator pairs...:   1%|▍                                                | 7/800 [00:00<00:11, 68.38it/s]

Ens: 0 , start date: 2020-08-28 05:55:36 , end date: 2020-09-26 05:55:36


Looping over indicator pairs...: 100%|███████████████████████████████████████████████| 800/800 [00:17<00:00, 46.92it/s]
Looping over indicator pairs...:   0%|▏                                                | 4/800 [00:00<00:20, 38.19it/s]

Ens: 1 , start date: 2020-04-14 23:04:49 , end date: 2020-10-10 23:04:49


Looping over indicator pairs...: 100%|███████████████████████████████████████████████| 800/800 [00:46<00:00, 17.36it/s]
Looping over indicator pairs...:   1%|▎                                                | 5/800 [00:00<00:17, 44.75it/s]

Ens: 2 , start date: 2019-08-20 04:06:59 , end date: 2020-05-01 04:06:59


Looping over indicator pairs...: 100%|███████████████████████████████████████████████| 800/800 [00:58<00:00, 13.75it/s]
Looping over indicator pairs...:   1%|▎                                                | 6/800 [00:00<00:14, 53.22it/s]

Ens: 3 , start date: 2019-10-15 06:13:49 , end date: 2020-04-17 06:13:49


Looping over indicator pairs...:  46%|█████████████████████▊                         | 371/800 [00:10<00:18, 22.83it/s]