In [35]:
# work with saved historic data to minimize the number of api requests

import pandas as pd
import os

symbol = 'btc'

tick_data = pd.read_csv('bitcoin_30sec_0422-0425.csv', index_col=3, infer_datetime_format=True, parse_dates=True)

#tick_data = pd.read_csv('online_trading_data.csv', index_col=1, infer_datetime_format=True, parse_dates=True)

#tick_data = pd.read_csv(symbol+'_USD_2021-05-14.csv', index_col=1, infer_datetime_format=True, parse_dates=True)

#tick_data.iloc[:, 0] = pd.to_datetime(tick_data.iloc[:, 0], infer_datetime_format=True, unit='s')

#tick_data.set_index('time', inplace=True)

tick_data.sort_index(inplace=True)

# implement re-sampling
'''
bar lengths are in seconds
5 min: 300s, 10 min: 600s, 20 min: 1200s, 40 min: 2400s, 60min: 3600s
3 hr: 10800s, 6hr: 21600s, 9hr: 32400s, 12hr: 43200s
'''
df = tick_data['<CLOSE>']
bar = '30s'
df = df.resample(bar, label='right').last().ffill()
df.head()
pd.plotting.autocorrelation_plot(df[-60:])

<AxesSubplot:xlabel='Lag', ylabel='Autocorrelation'>

In [44]:
import time
import numpy as np
import pandas as pd
import datetime as dt
from pylab import mpl, plt

sub = pd.DataFrame(df)

# analyze data sub-set

i = 30 # Number of ticks to use for simple moving average (SMA)
t = str(time.time()) #datetime stamp

sub['SMA'] = sub['<CLOSE>'].rolling(i).mean().shift(i)
sub.plot(figsize=(10,6), lw=0.75, title='BTC-USD', ylabel='USD/BTC')
#plt.savefig('1-BTC-USD_SMA_{}.png'.format(t))
plt.show()
sub.head()

Unnamed: 0_level_0,<CLOSE>,SMA
time,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-04-22 07:52:00+00:00,54171.44,
2021-04-22 07:52:30+00:00,54129.78,
2021-04-22 07:53:00+00:00,54144.1,
2021-04-22 07:53:30+00:00,54198.09,
2021-04-22 07:54:00+00:00,54222.74,


In [63]:
import pandas as pd
import numpy as np

# vectorized backtesting

ptc = 0.005 # proportional transactional costs - Coinbase Pro charges 0.5% per transaction

data = pd.DataFrame()
data['close'] = sub['<CLOSE>']
data['returns'] = np.log(data['close']/data['close'].shift(1))

data.dropna(inplace=True)

data = data[-60:]

lags = 30

cols = []

for lag in range(1, lags+1):
    col = 'lag_{}'.format(lag)
    data[col] = data['returns'].shift(lag)
    cols.append(col)

data.dropna(inplace=True)

data[cols] = np.where(data[cols] >= 0, 1, 0)

data['direction'] = np.where(data['returns'] > 0, 1, -1)

data[cols + ['direction']].head()

Unnamed: 0_level_0,lag_1,lag_2,lag_3,lag_4,lag_5,lag_6,lag_7,lag_8,lag_9,lag_10,...,lag_22,lag_23,lag_24,lag_25,lag_26,lag_27,lag_28,lag_29,lag_30,direction
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-04-25 01:30:00+00:00,0.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,...,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0,1
2021-04-25 01:30:30+00:00,1.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1
2021-04-25 01:31:00+00:00,1.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,-1
2021-04-25 01:31:30+00:00,0.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,-1
2021-04-25 01:32:00+00:00,0.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,-1


In [39]:
data.to_csv('backtest_data.csv')

In [64]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from datetime import datetime
import pickle

model = SVC(C=1, kernel='linear', gamma='auto')

split = int(len(data) * 0.20)

train = data.iloc[:split].copy()

model.fit(train[cols], train['direction'])

# persisting the model object
t = str(time.time()) #datetime stamp
pickle.dump(model, open('algorithm-{}.pkl'.format(t), 'wb'))

In [65]:
accuracy_score(train['direction'], model.predict(train[cols]))

1.0

In [66]:
test = data.iloc[split:].copy()

test['position'] = model.predict(test[cols])
#test['position'] = test['direction']         # naive prediction

test[['position','direction']].tail()

Unnamed: 0_level_0,position,direction
time,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-04-25 01:42:30+00:00,-1,-1
2021-04-25 01:43:00+00:00,1,1
2021-04-25 01:43:30+00:00,1,1
2021-04-25 01:44:00+00:00,-1,-1
2021-04-25 01:44:30+00:00,-1,1


In [58]:
accuracy_score(test['direction'], test['position'])

0.45454545454545453

In [59]:
test['strategy'] = test['position'] * test['returns']
sum(test['position'].diff() != 0)

1

In [60]:
test['strategy_tc'] = np.where(test['position'].diff() != 0, test['strategy'] - ptc, test['strategy'])

In [61]:
test[['returns', 'strategy', 'strategy_tc']].sum().apply(np.exp)

returns        1.001090
strategy       0.998911
strategy_tc    0.993929
dtype: float64

In [62]:
t = str(time.time())
test[['returns', 'strategy', 'strategy_tc']].cumsum().apply(np.exp).plot(figsize=(10,6))
plt.savefig('1-BTC-USD_StratCompare_{}.png'.format(t))
plt.show()

In [20]:
import pandas as pd
from datetime import datetime
t = str(time.time()) #datetime stamp #datetime stamp
test.to_csv('backtest_{}.csv'.format(t))