In [1]:
# work with saved historic data to minimize the number of api requests

import pandas as pd
import os

symbol = 'btc'

tick_data = pd.read_csv(symbol+'_5min20210128.v.txt', index_col=2, infer_datetime_format=True, parse_dates=True)

#tick_data.iloc[:, 0] = pd.to_datetime(tick_data.iloc[:, 0], infer_datetime_format=True, unit='s')

#tick_data.set_index('time', inplace=True)

tick_data.sort_index(inplace=True)

# implement re-sampling
'''
bar lengths are in seconds
10 min: 600s, 20 min: 1200s, 40 min: 2400s, 60min: 3600s
3 hr: 10800s, 6hr: 21600s, 9hr: 32400s, 12hr: 43200s
'''
df = pd.DataFrame()
bar = '600s'
df = tick_data.resample(bar, label='right').last().ffill()

df.head

<bound method NDFrame.head of                     <TICKER>  <PER>    <TIME>   <OPEN>   <HIGH>    <LOW>  \
<DATE>                                                                     
2020-12-08 00:10:00    BTC.V    5.0  235500.0  18626.8  18629.7  18472.0   
2020-12-08 00:20:00    BTC.V    5.0  235500.0  18626.8  18629.7  18472.0   
2020-12-08 00:30:00    BTC.V    5.0  235500.0  18626.8  18629.7  18472.0   
2020-12-08 00:40:00    BTC.V    5.0  235500.0  18626.8  18629.7  18472.0   
2020-12-08 00:50:00    BTC.V    5.0  235500.0  18626.8  18629.7  18472.0   
...                      ...    ...       ...      ...      ...      ...   
2021-01-26 23:30:00    BTC.V    5.0  235500.0  32342.2  32736.1  32342.2   
2021-01-26 23:40:00    BTC.V    5.0  235500.0  32342.2  32736.1  32342.2   
2021-01-26 23:50:00    BTC.V    5.0  235500.0  32342.2  32736.1  32342.2   
2021-01-27 00:00:00    BTC.V    5.0  235500.0  32342.2  32736.1  32342.2   
2021-01-27 00:10:00    BTC.V    5.0  234500.0  30574.1  30

In [2]:
import time
import numpy as np
import pandas as pd
import datetime as dt
from pylab import mpl, plt

sub = df.copy()

# analyze data sub-set

i = 20 # Number of ticks to use for simple moving average (SMA)
t = str(time.time()) #datetime stamp

sub['Mid'] = sub.loc[:, ('<OPEN>','<CLOSE>')].mean(axis=1)
sub['SMA'] = sub.loc[:, 'Mid'].rolling(i).mean().shift(i)
sub[['Mid', 'SMA']].plot(figsize=(10,6), lw=0.75, title='BTC-USD', ylabel='USD/BTC')
plt.savefig('1-BTC-USD_SMA_{}.png'.format(t))
plt.show()

In [66]:
import pandas as pd
import numpy as np

# vectorized backtesting

ptc = 0.005 # proportional transactional costs - Coinbase Pro charges 0.5% per transaction

data = pd.DataFrame()
data['returns'] = np.log(sub['Mid']/sub['Mid'].shift(1))

data.dropna(inplace=True)

lags = 2

cols = []

for lag in range(1, lags+1):
    col = 'lag_{}'.format(lag)
    data[col] = data['returns'].shift(lag)
    cols.append(col)

data.dropna(inplace=True)

data[cols] = np.where(data[cols] > 0, 1, 0)

data['direction'] = np.where(data['returns'] > 0, 1, -1)

data[cols + ['direction']].head()

Unnamed: 0_level_0,lag_1,lag_2,direction
<DATE>,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-12-08 00:40:00,0.0,0.0,-1
2020-12-08 00:50:00,0.0,0.0,-1
2020-12-08 01:00:00,0.0,0.0,-1
2020-12-08 01:10:00,0.0,0.0,-1
2020-12-08 01:20:00,0.0,0.0,-1


In [67]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from datetime import datetime
import pickle

model = SVC(C=1, kernel='linear', gamma='auto')

split = int(len(data) * 0.80)

train = data.iloc[:split].copy()

model.fit(train[cols], train['direction'])

# persisting the model object
t = str(time.time()) #datetime stamp
pickle.dump(model, open('algorithm-{}.pkl'.format(t), 'wb'))

In [68]:
accuracy_score(train['direction'], model.predict(train[cols]))

0.9951372004168114

In [69]:
test = data.iloc[split:].copy()

test['position'] = model.predict(test[cols])

In [70]:
accuracy_score(test['direction'], test['position'])

0.9958333333333333

In [71]:
test['strategy'] = test['position'] * test['returns']
sum(test['position'].diff() != 0)

1

In [72]:
test['strategy_tc'] = np.where(test['position'].diff() != 0, test['strategy'] - ptc, test['strategy'])

In [73]:
test[['returns', 'strategy', 'strategy_tc']].sum().apply(np.exp)

returns        0.843608
strategy       1.185385
strategy_tc    1.179473
dtype: float64

In [74]:
t = str(time.time())
test[['returns', 'strategy', 'strategy_tc']].cumsum().apply(np.exp).plot(figsize=(10,6))
plt.savefig('1-BTC-USD_StratCompare_{}.png'.format(t))
plt.show()

In [75]:
import pandas as pd
from datetime import datetime
t = str(time.time()) #datetime stamp #datetime stamp
test.to_csv('backtest_{}.csv'.format(t))