In [106]:
import pandas as pd
import matplotlib.pyplot as plt

def plot_selected(df, columns, start_index, end_index):
    plot_data(df.loc[start_index:end_index,  columns], title='Prices Normalized')
    
def normalize_data(df, start_index):
    return df/df.iloc[start_index,:]

def plot_data(df, title='Crypto Prices'):
    ax = df.plot(title=title, fontsize=2)
    ax.set_xlabel('Date')
    ax.set_ylabel('Price')
    plt.show()
    
       
btc_df = pd.read_csv('btc.v.txt', index_col='<DATE>', parse_dates=True, infer_datetime_format=True)
eth_df = pd.read_csv('eth.v.txt', index_col='<DATE>', parse_dates=True, infer_datetime_format=True)
matic_df = pd.read_csv('matic.v.txt', index_col='<DATE>', parse_dates=True, infer_datetime_format=True)
xtz_df = pd.read_csv('xtz.v.txt', index_col='<DATE>', parse_dates=True, infer_datetime_format=True)

# align to BTC index
eth_df = eth_df.reindex_like(btc_df, method='nearest')
matic_df = matic_df.reindex_like(btc_df, method='nearest')
xtz_df = xtz_df.reindex_like(btc_df, method='nearest')

'''
# fill in missing values
eth_df.fillna(method='ffill', inplace=True)
eth_df.fillna(method='bfill', inplace=True)

matic_df.fillna(method='ffill', inplace=True)
matic_df.fillna(method='bfill', inplace=True)

xtz_df.fillna(method='ffill', inplace=True)
xtz_df.fillna(method='bfill', inplace=True)
'''

# combine price series into one dataframe
crypto_df = btc_df[['<CLOSE>']]
crypto_df = crypto_df.rename(columns={'<CLOSE>':'BTC'})
crypto_df.reindex_like(btc_df)

crypto_df['ETH'] = eth_df['<CLOSE>']
crypto_df['MATIC'] = matic_df['<CLOSE>']
crypto_df['XTZ'] = xtz_df['<CLOSE>']

crypto_df.to_csv('crypto_data20210606.csv')

# normalize price series for comparison
crypto_norm_df = normalize_data(crypto_df, 0)
    
plot_data(crypto_norm_df)


In [74]:
crypto_df.head()

Unnamed: 0_level_0,BTC,ETH,MATIC,XTZ
<DATE>,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2010-07-17,0.04951,3.0,0.004441,2.0
2010-07-18,0.08584,3.0,0.004441,2.0
2010-07-19,0.0808,3.0,0.004441,2.0
2010-07-20,0.07474,3.0,0.004441,2.0
2010-07-21,0.07921,3.0,0.004441,2.0


In [75]:
crypto_df.tail()

Unnamed: 0_level_0,BTC,ETH,MATIC,XTZ
<DATE>,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2021-06-01,36389.27,2594.63,1.811,3.641
2021-06-02,37809.74,2721.36,1.813,3.844
2021-06-03,38779.5,2819.25,1.802,3.838
2021-06-04,37220.06,2732.5,1.675,3.617
2021-06-05,34946.84,2560.4,1.474,3.301


In [76]:
crypto_norm_df = normalize_data(crypto_df, 1000)
plot_data(crypto_norm_df)

In [77]:
crypto_norm_df.tail()

Unnamed: 0_level_0,BTC,ETH,MATIC,XTZ
<DATE>,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2021-06-01,311.019402,864.876667,407.791038,1.8205
2021-06-02,323.160171,907.12,408.241387,1.922
2021-06-03,331.448718,939.75,405.764467,1.919
2021-06-04,318.120171,910.833333,377.167305,1.8085
2021-06-05,298.69094,853.466667,331.907228,1.6505


In [78]:
plot_data(crypto_norm_df)

In [113]:
crypto_norm_df = normalize_data(crypto_df, 3917)
crypto_norm_df.iloc[3917:].plot()

<AxesSubplot:xlabel='<DATE>'>

In [99]:
crypto_norm_df.tail()

Unnamed: 0_level_0,BTC,ETH,MATIC,XTZ
<DATE>,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2021-06-01,0.64766,1.29968,5.309293,0.647289
2021-06-02,0.672942,1.363161,5.315157,0.683378
2021-06-03,0.690202,1.412195,5.282908,0.682311
2021-06-04,0.662447,1.368741,4.910583,0.643022
2021-06-05,0.621988,1.282534,4.321313,0.586844


In [100]:
crypto_df.iloc[3917]

BTC      56185.7200
ETH       1996.3600
MATIC        0.3411
XTZ          5.6250
Name: 2021-04-07 00:00:00, dtype: float64

In [93]:
crypto_df.iloc[3000]

BTC      6446.350000
ETH       217.350000
MATIC       0.004441
XTZ         1.320000
Name: 2018-10-03 00:00:00, dtype: float64

In [112]:
crypto_norm_df = normalize_data(crypto_df, 3947)
crypto_norm_df.iloc[3947:].plot()

<AxesSubplot:xlabel='<DATE>'>

In [114]:
crypto_norm_df = normalize_data(crypto_df, 3612)
crypto_norm_df.iloc[3612:].plot()

<AxesSubplot:xlabel='<DATE>'>

In [104]:
import pandas as pd
eth_df = pd.read_csv('eth.v.txt', index_col='<DATE>', parse_dates=True, infer_datetime_format=True)
eth_df.sort_index(inplace=True)

In [105]:
eth_df.tail()

Unnamed: 0_level_0,<TICKER>,<PER>,<TIME>,<OPEN>,<HIGH>,<LOW>,<CLOSE>,<VOL>,<OPENINT>
<DATE>,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2021-06-01,ETH.V,D,0,2628.17,2739.66,2529.51,2594.63,0,0
2021-06-02,ETH.V,D,0,2597.34,2799.89,2553.68,2721.36,0,0
2021-06-03,ETH.V,D,0,2721.05,2889.62,2667.11,2819.25,0,0
2021-06-04,ETH.V,D,0,2818.33,2871.82,2555.5,2732.5,0,0
2021-06-05,ETH.V,D,0,2734.01,2815.83,2556.39,2560.4,0,0


In [107]:
df = eth_df['<CLOSE>']
#bar = '86400s'
#df = df.resample(bar, label='right').last().ffill()
pd.plotting.autocorrelation_plot(df[-60:])

<AxesSubplot:xlabel='Lag', ylabel='Autocorrelation'>

In [108]:
len(df)

2130

In [109]:
import pandas as pd
import numpy as np

# vectorized backtesting

sub = pd.DataFrame(df)
ptc = 0.005
data = pd.DataFrame()
data['close']=sub['<CLOSE>']
data['returns']=np.log(data['close']/data['close'].shift(1))
data.dropna(inplace=True)
lags = 20
cols=[]

for lag in range(1, lags+1):
    col = 'lag_{}'.format(lag)
    data[col]=data['returns'].shift(lag)
    cols.append(col)
    
data.dropna(inplace=True)

data[cols] = np.where(data[cols] >= 0, 1, 0)

data['direction'] = np.where(data['returns'] > 0, 1, -1)

#data[cols + ['direction']].head()
data[cols + ['direction']].tail()

Unnamed: 0_level_0,lag_1,lag_2,lag_3,lag_4,lag_5,lag_6,lag_7,lag_8,lag_9,lag_10,...,lag_12,lag_13,lag_14,lag_15,lag_16,lag_17,lag_18,lag_19,lag_20,direction
<DATE>,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-06-01,1.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,...,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,-1
2021-06-02,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,...,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1
2021-06-03,1.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,...,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1
2021-06-04,1.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,...,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,-1
2021-06-05,0.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,...,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,-1


In [110]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from datetime import datetime
import pickle
import time

model = SVC(C=1, kernel='linear', gamma='auto')

split = int(len(data)*0.50)

train = data.iloc[:split].copy()

model.fit(train[cols], train['direction'])

t = str(time.time())

pickle.dump(model, open('ETH_algorithm_{}.pkl'.format(t), 'wb'))

In [111]:
accuracy_score(train['direction'], model.predict(train[cols]))

0.571157495256167

In [112]:
test = data.iloc[split:].copy()

test['position'] = -1*model.predict(test[cols])

test[['position', 'direction']].tail()

Unnamed: 0_level_0,position,direction
<DATE>,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-06-01,1,-1
2021-06-02,-1,1
2021-06-03,1,1
2021-06-04,1,-1
2021-06-05,-1,-1


In [113]:
accuracy_score(test['direction'], test['position'])

0.5014218009478673

In [114]:
test['strategy'] = test['position'] * test['returns']
sum(test['position'].diff() != 0)

579

In [115]:
test['strategy_tc'] = np.where(test['position'].diff()!=0, test['strategy']-ptc, test['strategy'])

In [116]:
test[['returns', 'strategy', 'strategy_tc']].sum().apply(np.exp)

returns        5.378879
strategy       1.625703
strategy_tc    0.089900
dtype: float64

In [117]:
test[['returns', 'strategy', 'strategy_tc']].cumsum().apply(np.exp).plot(figsize=(10,6))

<AxesSubplot:xlabel='<DATE>'>

In [120]:
# evaluate model out of sample
eth_df = pd.read_csv('eth.v.txt', index_col='<DATE>', parse_dates=True, infer_datetime_format=True)
eth_df.sort_index(inplace=True)

In [121]:
df = eth_df['<CLOSE>']
#bar = '21600s'
#df = df.resample(bar, label='right').last().ffill()
data = pd.DataFrame(df)
data['close']=sub['<CLOSE>']
data['returns']=np.log(data['close']/data['close'].shift(1))
data.dropna(inplace=True)
data.head()

Unnamed: 0_level_0,<CLOSE>,close,returns
<DATE>,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2015-08-08,1.2,1.2,-0.916291
2015-08-09,1.2,1.2,0.0
2015-08-10,1.2,1.2,0.0
2015-08-11,0.99,0.99,-0.192372
2015-08-12,1.29,1.29,0.264693


In [122]:
lags = 20
cols=[]

for lag in range(1, lags+1):
    col = 'lag_{}'.format(lag)
    data[col]=data['returns'].shift(lag)
    cols.append(col)
    
data.dropna(inplace=True)

data[cols] = np.where(data[cols] >= 0, 1, 0)

data['direction'] = np.where(data['returns'] > 0, 1, -1)

#data[cols + ['direction']].head()
data[cols + ['direction']].tail()

Unnamed: 0_level_0,lag_1,lag_2,lag_3,lag_4,lag_5,lag_6,lag_7,lag_8,lag_9,lag_10,...,lag_12,lag_13,lag_14,lag_15,lag_16,lag_17,lag_18,lag_19,lag_20,direction
<DATE>,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-06-01,1.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,...,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,-1
2021-06-02,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,...,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,1
2021-06-03,1.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,...,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1
2021-06-04,1.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,...,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,-1
2021-06-05,0.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,...,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,-1


In [123]:
test = data.copy()
  
algorithm = pd.read_pickle('ETH_algorithm3.pkl')

test['position'] = algorithm.predict(test[cols])

test[['position', 'direction']].tail()

Unnamed: 0_level_0,position,direction
<DATE>,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-06-01,-1,-1
2021-06-02,1,1
2021-06-03,-1,1
2021-06-04,-1,-1
2021-06-05,1,-1


In [124]:
accuracy_score(test['direction'], test['position'])

0.534850640113798

In [125]:
test['strategy'] = test['position'] * test['returns']
sum(test['position'].diff() != 0)
test['strategy_tc'] = np.where(test['position'].diff()!=0, test['strategy']-ptc, test['strategy'])
test[['returns', 'strategy', 'strategy_tc']].sum().apply(np.exp)

returns        2245.964912
strategy       9490.177796
strategy_tc      41.596358
dtype: float64

In [126]:
test[['returns', 'strategy', 'strategy_tc']].cumsum().apply(np.exp).plot(figsize=(10,6))

<AxesSubplot:xlabel='<DATE>'>