In [88]:
# work with saved historic data to minimize the number of api requests

import pandas as pd
import os

symbol = 'btc'

#tick_data = pd.read_csv('online_trading_data.csv', index_col=1, infer_datetime_format=True, parse_dates=True)

#tick_data = pd.read_csv(symbol+'_hourly.v.txt', index_col=2, infer_datetime_format=True, parse_dates=True)

tick_data = pd.read_csv(symbol+'_USD_2021-05-05.csv', index_col=1, infer_datetime_format=True, parse_dates=True)

#tick_data.iloc[:, 0] = pd.to_datetime(tick_data.iloc[:, 0], infer_datetime_format=True, unit='s')

#tick_data.set_index('time', inplace=True)

tick_data.sort_index(inplace=True)

# implement re-sampling
'''
bar lengths are in seconds
5 min: 300s, 10 min: 600s, 20 min: 1200s, 40 min: 2400s, 60min: 3600s
3 hr: 10800s, 6hr: 21600s, 9hr: 32400s, 12hr: 43200s, 15hr: 54000, 18hr: 64800
24 hr: 86400s, 48 hr: 172800s, 72 hr: 259200s, 120 hr: 432000s
'''
df = pd.DataFrame()
bar = '259200s'
df = tick_data['<CLOSE>'].resample(bar, label='right').last().ffill()
df.head()
pd.plotting.autocorrelation_plot(df[-60:])

<AxesSubplot:xlabel='Lag', ylabel='Autocorrelation'>

In [69]:
df.head()

Date
2021-01-05    31431.61228
2021-01-07    36275.75635
2021-01-09    40519.44860
2021-01-11    38709.76537
2021-01-13    34214.61026
Freq: 172800S, Name: <CLOSE>, dtype: float64

In [70]:
import time
import numpy as np
import pandas as pd
import datetime as dt
from pylab import mpl, plt

sub = pd.DataFrame(df)

# analyze data sub-set

i = 3 # Number of ticks to use for simple moving average (SMA)
t = str(time.time()) #datetime stamp

sub['SMA'] = sub['<CLOSE>'].rolling(i).mean().shift(i)
sub.plot(figsize=(10,6), lw=0.75, title='BTC-USD', ylabel='USD/BTC')
#plt.savefig('1-BTC-USD_SMA_{}.png'.format(t))
#plt.show()
sub.head()

Unnamed: 0_level_0,<CLOSE>,SMA
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-01-05,31431.61228,
2021-01-07,36275.75635,
2021-01-09,40519.4486,
2021-01-11,38709.76537,
2021-01-13,34214.61026,


In [97]:
import pandas as pd
import numpy as np

# vectorized backtesting

ptc = 0.005 # proportional transactional costs - Coinbase Pro charges 0.5% per transaction

data = pd.DataFrame()
data['close'] = sub['<CLOSE>']
data['returns'] = np.log(data['close']/data['close'].shift(1))

data.dropna(inplace=True)

data = data[-60:]

lags = 5

cols = []

for lag in range(1, lags+1):
    col = 'lag_{}'.format(lag)
    data[col] = data['returns'].shift(lag)
    cols.append(col)

data.dropna(inplace=True)

data[cols] = np.where(data[cols] >= 0, 1, 0)

data['direction'] = np.where(data['returns'] > 0, 1, -1)

data[cols + ['direction']].head()

Unnamed: 0_level_0,lag_1,lag_2,lag_3,lag_4,lag_5,direction
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-01-19,0.0,1.0,0.0,0.0,1.0,1
2021-01-21,1.0,0.0,1.0,0.0,0.0,-1
2021-01-23,0.0,1.0,0.0,1.0,0.0,-1
2021-01-25,0.0,0.0,1.0,0.0,1.0,-1
2021-01-27,0.0,0.0,0.0,1.0,0.0,1


In [39]:
data.to_csv('backtest_data.csv')

In [98]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from datetime import datetime
import pickle

model = SVC(C=1, kernel='linear', gamma='auto')

split = int(len(data) * 0.80)

train = data.iloc[:split].copy()

model.fit(train[cols], train['direction'])

# persisting the model object
t = str(time.time()) #datetime stamp
pickle.dump(model, open('algorithm-{}.pkl'.format(t), 'wb'))

In [99]:
accuracy_score(train['direction'], model.predict(train[cols]))

0.6363636363636364

In [100]:
test = data.iloc[split:].copy()

test['position'] = model.predict(test[cols])
#test['position'] = test['direction']         # naive prediction

test[['position','direction']].tail()

Unnamed: 0_level_0,position,direction
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-04-29,1,1
2021-05-01,1,1
2021-05-03,1,-1
2021-05-05,-1,-1
2021-05-07,1,1


In [101]:
accuracy_score(test['direction'], test['position'])

0.5454545454545454

In [102]:
test['strategy'] = test['position'] * test['returns']
sum(test['position'].diff() != 0)

5

In [103]:
test['strategy_tc'] = np.where(test['position'].diff() != 0, test['strategy'] - ptc, test['strategy'])

In [104]:
test[['returns', 'strategy', 'strategy_tc']].sum().apply(np.exp)

returns        0.909834
strategy       1.195320
strategy_tc    1.165808
dtype: float64

In [48]:
t = str(time.time())
test[['returns', 'strategy', 'strategy_tc']].cumsum().apply(np.exp).plot(figsize=(10,6))
plt.savefig('1-BTC-USD_StratCompare_{}.png'.format(t))
plt.show()

In [94]:
import pandas as pd
from datetime import datetime
t = str(time.time()) #datetime stamp #datetime stamp
test.to_csv('backtest_{}.csv'.format(t))