In [1]:
# work with saved historic data to minimize the number of api requests

import pandas as pd
import os

symbol = 'btc'

tick_data = pd.read_csv('online_trading_data.csv', index_col=1, infer_datetime_format=True, parse_dates=True)

#tick_data = pd.read_csv(symbol+'_5min20210128.v.txt', index_col=2, infer_datetime_format=True, parse_dates=True)

#tick_data.iloc[:, 0] = pd.to_datetime(tick_data.iloc[:, 0], infer_datetime_format=True, unit='s')

#tick_data.set_index('time', inplace=True)

tick_data.sort_index(inplace=True)

# implement re-sampling
'''
bar lengths are in seconds
5 min: 300s, 10 min: 600s, 20 min: 1200s, 40 min: 2400s, 60min: 3600s
3 hr: 10800s, 6hr: 21600s, 9hr: 32400s, 12hr: 43200s
'''
df = pd.DataFrame()
bar = '1200s'
df = tick_data.resample(bar, label='right').last().ffill()

df.head

<bound method NDFrame.head of                           date       Mid   Returns  Direction
time                                                         
2021-01-31 04:00:00  1/29/2021  33674.95       NaN         -1
2021-01-31 04:20:00  1/29/2021  33415.55 -0.012241         -1
2021-01-31 04:40:00  1/29/2021  33053.60 -0.004268         -1
2021-01-31 05:00:00  1/29/2021  33016.22  0.003980          1
2021-01-31 05:20:00  1/29/2021  33060.23 -0.001761         -1
2021-01-31 05:40:00  1/29/2021  32952.07 -0.004236         -1
2021-01-31 06:00:00  1/29/2021  33070.80  0.000592          1
2021-01-31 06:20:00  1/29/2021  32766.30 -0.008706         -1
2021-01-31 06:40:00  1/29/2021  32900.04  0.005081          1
2021-01-31 07:00:00  1/29/2021  32484.04 -0.005684         -1
2021-01-31 07:20:00  1/29/2021  32576.91  0.000855          1
2021-01-31 07:40:00  1/29/2021  32226.52 -0.002359         -1
2021-01-31 08:00:00  1/29/2021  32278.46  0.002792          1
2021-01-31 08:20:00  1/29/2021  32621.67

In [9]:
import time
import numpy as np
import pandas as pd
import datetime as dt
from pylab import mpl, plt

sub = df.copy()

# analyze data sub-set

i = 2 # Number of ticks to use for simple moving average (SMA)
t = str(time.time()) #datetime stamp

sub['SMA'] = sub.loc[:, 'Mid'].rolling(i).mean().shift(i)
sub[['Mid', 'SMA']].plot(figsize=(10,6), lw=0.75, title='BTC-USD', ylabel='USD/BTC')
#plt.savefig('1-BTC-USD_SMA_{}.png'.format(t))
plt.show()

In [10]:
import pandas as pd
import numpy as np

# vectorized backtesting

ptc = 0.005 # proportional transactional costs - Coinbase Pro charges 0.5% per transaction

data = pd.DataFrame()
data['Mid'] = sub['Mid']
data['returns'] = np.log(data['Mid']/data['Mid'].shift(1))

data.dropna(inplace=True)

lags = 3

cols = []

for lag in range(1, lags+1):
    col = 'lag_{}'.format(lag)
    data[col] = data['returns'].shift(lag)
    cols.append(col)

data.dropna(inplace=True)

data[cols] = np.where(data[cols] >= 0, 1, 0)

data['direction'] = np.where(data['returns'] > 0, 1, -1)

data[cols + ['direction']].head()

Unnamed: 0_level_0,lag_1,lag_2,lag_3,direction
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2021-01-30 05:20:00,0.0,0.0,0.0,1
2021-01-30 05:40:00,1.0,0.0,0.0,-1
2021-01-30 06:00:00,0.0,1.0,0.0,1
2021-01-30 06:20:00,1.0,0.0,1.0,-1
2021-01-30 06:40:00,0.0,1.0,0.0,1


In [39]:
data.to_csv('backtest_data.csv')

In [11]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from datetime import datetime
import pickle

model = SVC(C=1, kernel='linear', gamma='auto')

split = int(len(data) * 0.80)

train = data.iloc[:split].copy()

model.fit(train[cols], train['direction'])

# persisting the model object
t = str(time.time()) #datetime stamp
pickle.dump(model, open('algorithm-{}.pkl'.format(t), 'wb'))

In [12]:
accuracy_score(train['direction'], model.predict(train[cols]))

0.75

In [13]:
test = data.iloc[split:].copy()

#test['position'] = model.predict(test[cols])
test['position'] = test['direction']         # naive prediction

test[['position','direction']].tail()

Unnamed: 0_level_0,position,direction
time,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-01-30 13:40:00,1,1
2021-01-30 14:00:00,1,1
2021-01-30 14:20:00,-1,-1
2021-01-30 14:40:00,1,1
2021-01-30 15:00:00,-1,-1


In [14]:
accuracy_score(test['direction'], test['position'])

1.0

In [15]:
test['strategy'] = test['position'] * test['returns']
sum(test['position'].diff() != 0)

5

In [16]:
test['strategy_tc'] = np.where(test['position'].diff() != 0, test['strategy'] - ptc, test['strategy'])

In [17]:
test[['returns', 'strategy', 'strategy_tc']].sum().apply(np.exp)

returns        0.995851
strategy       1.051261
strategy_tc    1.025305
dtype: float64

In [18]:
t = str(time.time())
test[['returns', 'strategy', 'strategy_tc']].cumsum().apply(np.exp).plot(figsize=(10,6))
plt.savefig('1-BTC-USD_StratCompare_{}.png'.format(t))
plt.show()

In [19]:
import pandas as pd
from datetime import datetime
t = str(time.time()) #datetime stamp #datetime stamp
test.to_csv('backtest_{}.csv'.format(t))