In [1]:
# work with saved historic data to minimize the number of api requests

import pandas as pd
import os

symbol = 'btc'

tick_data = pd.read_csv('online_trading_data.csv', index_col=1, infer_datetime_format=True, parse_dates=True)

#tick_data = pd.read_csv(symbol+'_5min20210128.v.txt', index_col=2, infer_datetime_format=True, parse_dates=True)

#tick_data.iloc[:, 0] = pd.to_datetime(tick_data.iloc[:, 0], infer_datetime_format=True, unit='s')

#tick_data.set_index('time', inplace=True)

tick_data.sort_index(inplace=True)

# implement re-sampling
'''
bar lengths are in seconds
5 min: 300s, 10 min: 600s, 20 min: 1200s, 40 min: 2400s, 60min: 3600s
3 hr: 10800s, 6hr: 21600s, 9hr: 32400s, 12hr: 43200s
'''
df = pd.DataFrame()
bar = '300s'
df = tick_data.resample(bar, label='right').last().ffill()

df.head

<bound method NDFrame.head of                           date       Mid       Returns  Direction
time                                                             
2021-02-11 03:55:00  1/29/2021  33674.95           NaN       -1.0
2021-02-11 04:00:00  1/29/2021  33674.95           NaN       -1.0
2021-02-11 04:05:00  1/29/2021  33827.12  4.509000e-03        1.0
2021-02-11 04:10:00  1/29/2021  33827.12  4.509000e-03        1.0
2021-02-11 04:15:00  1/29/2021  33415.55 -1.224100e-02       -1.0
...                        ...       ...           ...        ...
2021-02-11 14:35:00  1/29/2021  38168.05  6.578000e-03        1.0
2021-02-11 14:40:00  1/29/2021  38168.05  6.578000e-03        1.0
2021-02-11 14:45:00  1/29/2021  38168.04 -2.620000e-07       -1.0
2021-02-11 14:50:00  1/29/2021  38168.04 -2.620000e-07       -1.0
2021-02-11 14:55:00  1/29/2021  37346.28 -5.800000e-05       -1.0

[133 rows x 4 columns]>

In [3]:
import time
import numpy as np
import pandas as pd
import datetime as dt
from pylab import mpl, plt

sub = df.copy()

# analyze data sub-set

i = 2 # Number of ticks to use for simple moving average (SMA)
t = str(time.time()) #datetime stamp

sub['SMA'] = sub.loc[:, 'Mid'].rolling(i).mean().shift(i)
sub[['Mid', 'SMA']].plot(figsize=(10,6), lw=0.75, title='BTC-USD', ylabel='USD/BTC')
#plt.savefig('1-BTC-USD_SMA_{}.png'.format(t))
plt.show()

In [4]:
import pandas as pd
import numpy as np

# vectorized backtesting

ptc = 0.005 # proportional transactional costs - Coinbase Pro charges 0.5% per transaction

data = pd.DataFrame()
data['Mid'] = sub['Mid']
data['returns'] = np.log(data['Mid']/data['Mid'].shift(1))

data.dropna(inplace=True)

lags = 5

cols = []

for lag in range(1, lags+1):
    col = 'lag_{}'.format(lag)
    data[col] = data['returns'].shift(lag)
    cols.append(col)

data.dropna(inplace=True)

data[cols] = np.where(data[cols] >= 0, 1, 0)

data['direction'] = np.where(data['returns'] > 0, 1, -1)

data[cols + ['direction']].head()

Unnamed: 0_level_0,lag_1,lag_2,lag_3,lag_4,lag_5,direction
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-02-11 04:25:00,1.0,0.0,1.0,1.0,1.0,-1
2021-02-11 04:30:00,0.0,1.0,0.0,1.0,1.0,-1
2021-02-11 04:35:00,1.0,0.0,1.0,0.0,1.0,-1
2021-02-11 04:40:00,0.0,1.0,0.0,1.0,0.0,-1
2021-02-11 04:45:00,1.0,0.0,1.0,0.0,1.0,-1


In [39]:
data.to_csv('backtest_data.csv')

In [5]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from datetime import datetime
import pickle

model = SVC(C=1, kernel='linear', gamma='auto')

split = int(len(data) * 0.80)

train = data.iloc[:split].copy()

model.fit(train[cols], train['direction'])

# persisting the model object
t = str(time.time()) #datetime stamp
pickle.dump(model, open('algorithm-{}.pkl'.format(t), 'wb'))

In [6]:
accuracy_score(train['direction'], model.predict(train[cols]))

0.7623762376237624

In [14]:
test = data.iloc[split:].copy()

test['position'] = model.predict(test[cols])
#test['position'] = test['direction']         # naive prediction

test[['position','direction']].tail()

Unnamed: 0_level_0,position,direction
time,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-02-11 14:35:00,1,1
2021-02-11 14:40:00,-1,-1
2021-02-11 14:45:00,1,-1
2021-02-11 14:50:00,-1,-1
2021-02-11 14:55:00,-1,-1


In [15]:
accuracy_score(test['direction'], test['position'])

0.8461538461538461

In [16]:
test['strategy'] = test['position'] * test['returns']
sum(test['position'].diff() != 0)

9

In [17]:
test['strategy_tc'] = np.where(test['position'].diff() != 0, test['strategy'] - ptc, test['strategy'])

In [18]:
test[['returns', 'strategy', 'strategy_tc']].sum().apply(np.exp)

returns        1.003741
strategy       1.033448
strategy_tc    0.987974
dtype: float64

In [19]:
t = str(time.time())
test[['returns', 'strategy', 'strategy_tc']].cumsum().apply(np.exp).plot(figsize=(10,6))
plt.savefig('1-BTC-USD_StratCompare_{}.png'.format(t))
plt.show()

In [20]:
import pandas as pd
from datetime import datetime
t = str(time.time()) #datetime stamp #datetime stamp
test.to_csv('backtest_{}.csv'.format(t))