Adapted from https://towardsdatascience.com/backtest-trading-strategies-with-pandas-vectorized-backtesting-26001b0ba3a5

Also see https://github.com/gylx/Financial-Machine-Learning-Articles/

In [40]:
import numpy as np
import pandas as pd
import os
import sys
import glob
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import datetime as datetime
import time

import plotly.express as px
from ipywidgets import widgets

In [2]:
file_path = r'C:\Users\torku\Documents\crypto_data'
files = glob.glob(file_path + r'\*.csv')

In [3]:
#initialize df
df = pd.read_csv(files[0])
idx = pd.to_datetime(df['timestamp'])
df = df.set_index(idx)
#df = df.drop(['timestamp'], axis = 1)
#col_name = file.split('\\')[5].split('-')[0]

col_names = []
for file in files:
    print(file)
    df_sub = pd.read_csv(file)
    idx = pd.to_datetime(df_sub['timestamp'])
    df_sub = df_sub.set_index(idx)
    col_name = file.split('\\')[5].split('-')[0] 
    col_names.append(col_name)
    df[col_name] = df_sub['close']
df = df[col_names]

C:\Users\torku\Documents\crypto_data\BTCUSDT-1m-data.csv
C:\Users\torku\Documents\crypto_data\ETHBTC-1m-data.csv
C:\Users\torku\Documents\crypto_data\XRPBTC-1m-data.csv


In [4]:
prices = df.loc[df.index > pd.Timestamp('2021-1-1 01:00:00')]

#price for single coin

#log difference for prices along all time steps
rs = prices.apply(np.log).diff(1).fillna(0)

#set moving average windows  1440 min per day
w1 = 150 #2 * 1440 # short-term moving average window
w2 = 6500 #14 * 1440 # long-term moving average window

x = prices.index
ma_x = prices.rolling(w1).mean() - prices.rolling(w2).mean()

In [5]:
pos = ma_x.apply(np.sign) #give -1 0 1 depending on sign
#pos = pos.apply(lambda x: 1 if x > 0 else 0, axis = 1) #not short selling
#pos /= pos.abs().sum(1).values#.reshape(-1,1)
pos.tail().abs().sum(1).values.reshape(-1,1)

array([[3.],
       [3.],
       [3.],
       [3.],
       [3.]])

In [6]:
#display positions
pos = ma_x.apply(np.sign) #give -1 0 1 depending on sign
pos = pos.applymap(lambda x: 0 if x < 1 else x) #not short selling
#shift +1 to avoid look ahead bias
my_rs = pos.shift(1) * rs #doesn't include costs
my_rs_cumsum = my_rs.cumsum().apply(np.exp)

tc_perc = 0.001 #Binance transaction cost of 0.1
#at every point where we're trading, want to mark the transaction
#this should be the portfolio balance * transaction cost percent
delta_pos = pos.diff(1).abs()
my_tcs = tc_perc * delta_pos # compute transaction costs
my_rs_w_cost = (pos.shift(1) * rs) - my_tcs
my_rs_w_cost_cumsum = np.subtract(my_rs, my_tcs).cumsum().apply(np.exp)

pos_baseline = abs(pos)
base_rs = pos_baseline.shift(1) * rs
base_rs_cumsum = base_rs.cumsum().apply(np.exp)

In [28]:
#norm_prices = (prices-prices.mean())/prices.std()
norm_prices = (norm_prices-norm_prices.min())/(norm_prices.max()-norm_prices.min())

plotly.graph_objs._figure.Figure

In [34]:
prices

Unnamed: 0_level_0,BTCUSDT,ETHBTC,XRPBTC
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2021-01-01 01:01:00,28972.26,0.025318,0.000008
2021-01-01 01:02:00,28998.94,0.025316,0.000008
2021-01-01 01:03:00,28988.15,0.025338,0.000008
2021-01-01 01:04:00,29056.95,0.025321,0.000008
2021-01-01 01:05:00,29089.33,0.025338,0.000008
...,...,...,...
2021-04-25 03:35:00,49885.84,0.044362,0.000021
2021-04-25 03:36:00,49888.22,0.044384,0.000021
2021-04-25 03:37:00,49841.88,0.044366,0.000021
2021-04-25 03:38:00,49864.44,0.044365,0.000021


In [71]:
thinner = 735
#px doesn't support subplots. Ugly work arounds. Consider matplot lib for
#future minimum viable product exploration.
fig = None
fig = make_subplots(rows = 4, cols = 1, shared_xaxes=True,
                    vertical_spacing = 0.02)


sub_prices = px.line(prices[::thinner], x = x[::thinner], y=prices[::thinner].columns)
sub_prices_traces = []
for trace in range(len(sub_prices['data'])):
    sub_prices['data'][trace]['showlegend'] = False
    sub_prices_traces.append(sub_prices['data'][trace])
for traces in sub_prices_traces:
    fig.append_trace(traces, row = 1, col = 1)
    
sub_norm = px.line(norm_prices[::thinner], x = x[::thinner], y=norm_prices[::thinner].columns)
sub_norm_traces = []
for trace in range(len(sub_norm['data'])):
    sub_norm['data'][trace]['showlegend'] = False
    sub_norm_traces.append(sub_norm['data'][trace])
for traces in sub_norm_traces:
    fig.append_trace(traces, row = 2, col = 1)

sub_ma = px.line(ma_x[::thinner], x = x[::thinner], y=prices.rolling(w1).mean()[::thinner].columns)
sub_ma_traces = []
for trace in range(len(sub_ma['data'])):
    sub_ma['data'][trace]['showlegend'] = False
    sub_ma_traces.append(sub_ma['data'][trace])
for traces in sub_ma_traces:
    fig.append_trace(traces, row = 3, col = 1)
    

    
'''
fig.add_scatter(x = x[::thinner], y = prices[::thinner].columns, mode = 'lines',
    name = 'actual', row = 1, col = 1)

fig.add_scatter(x = x[::thinner], y = prices.rolling(w1).mean()[::thinner], mode = 'lines',
    name = 'ma_short', row = 1, col = 1)

fig.add_scatter(x = x[::thinner], y = prices.rolling(w2).mean()[::thinner], mode = 'lines',
    name = 'ma long',row = 1, col = 1)

fig.add_scatter(x = x[::thinner], y = ma_x[::thinner], mode = 'lines',
    name = 'moving average difference', row = 2, col = 1)

fig.add_scatter(x = x[::thinner], y = pos[::thinner], mode = 'lines',
    name = 'position', row = 3, col = 1)

fig.add_scatter(x = x[::thinner], y = my_rs_cumsum[::thinner], mode = 'lines',
    name = 'my_rs_cumsum', row = 4, col = 1)

fig.add_scatter(x = x[::thinner], y = my_rs_w_cost_cumsum[::thinner], mode = 'lines',
    name = 'my_rs_cumsum_with_fee', row = 4, col = 1)

fig.add_scatter(x = x[::thinner], y = base_rs_cumsum[::thinner], mode = 'lines',
    name = 'base_rs_cumsum', row = 4, col = 1)
    '''

fig.show()

In [61]:
#df1 = df.melt(id_vars=['Date']+list(df.keys()[5:]), var_name='AAPL')
#px.line(df1, x='Date', y='value', color='AAPL' 
sub_prices_traces[0]['showlegend'] = False

In [None]:
df_sub = df[['close']]
df_sub = df_sub.loc[df_sub.index > pd.Timestamp('2021-1-1 01:00:00')]

#price for single coin
prices = df_sub['close']

#log difference for prices along all time steps
rs = prices.apply(np.log).diff(1).fillna(0)
w1_list = [x * 15 for x in range(100)]
w1_list.pop(0)


w2_list = [120 * x for x in range(100)]
w2_list.pop(0)

tc_perc = 0.001

x = df_sub.index


set_of_w1 = []
set_of_w2 = []
set_of_w_rs = []
st = time.time()
for w1 in w1_list:
    et = time.time()
    print('Started ' + str(w1) + ' at ' + str(et - st))
    
    for w2 in w2_list: 
        ma_x = prices.rolling(w1).mean() - prices.rolling(w2).mean()

        #display positions
        pos = ma_x.apply(np.sign) #give -1 0 1 depending on sign
        pos = pos.apply(lambda x: 0 if x < 1 else x) #not short selling
        
        #shift +1 to avoid look ahead bias
        my_rs = pos.shift(1) * rs #doesn't include costs
        my_rs_cumsum = my_rs.cumsum().apply(np.exp)

        #at every point where we're trading, want to mark the transaction
        #this should be the portfolio balance * transaction cost percent
        delta_pos = pos.diff(1).abs()
        my_tcs = tc_perc * delta_pos # compute transaction costs
        my_rs_w_cost = (pos.shift(1) * rs) - my_tcs
        my_rs_w_cost_cumsum = np.subtract(my_rs, my_tcs).cumsum().apply(np.exp)
        set_of_w1.append(w1)
        set_of_w2.append(w2)
        set_of_w_rs.append(my_rs_w_cost_cumsum[-1])

In [None]:
import plotly.express as px
df = pd.DataFrame(dict(w1 = set_of_w1, w2 = set_of_w2, z = set_of_w_rs))
fig = px.density_heatmap(data_frame = df, x = 'w1', y = 'w2', z = 'z')
fig.show()

In [None]:
df.sort_values(by = 'z', ascending = False)