In [12]:
from pathlib import Path
from time import time
import datetime

import numpy as np
import pandas as pd

from scipy.stats import spearmanr

import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
import seaborn as sns

In [13]:
# Global Settings

# Directories
model_dataDir = 'DATA/MODELDATA/'
bench_dataDir = 'DATA/COINHISTDATA/'


# Time
START = 2020
END = 2022


# Helpers
sns.set_style('whitegrid')
np.random.seed(42)
idx = pd.IndexSlice


In [14]:
# Import Data

# Predictions
lr_predictions = pd.read_hdf(f'{model_dataDir}lr_model_predictions.h5', 'lr/predictions')
ridge_predictions = pd.read_hdf(f'{model_dataDir}Ridge_Predictions.h5', 'ridge/predictions')
lasso_predictions = pd.read_hdf(f'{model_dataDir}Lasso_Predictions.h5', 'lasso/predictions')

# Prices
model_mdf = pd.read_csv(model_dataDir + 'ModelData.csv')
model_mdf.rename(columns={'Unnamed: 0': 'Dates'}, inplace=True)
model_mdf['Dates'] = pd.to_datetime(model_mdf['Dates'])
model_mdf.set_index(['Dates', 'Coin'], inplace=True)
prices = model_mdf.loc[:,'Open':'Close']

# Benchmark
bench = pd.read_csv(bench_dataDir + 'bitcoin.csv')
bench.rename(columns={'Unnamed: 0': 'Dates'}, inplace=True)
bench.set_index('Dates', inplace=True)
bench = bench.loc[:, ['Open']]
#bench = bench.Open
bench = bench.pct_change()
bstart = ridge_predictions.index.get_level_values('Dates').min().strftime('%Y-%m-%d')
bstop = (ridge_predictions.index.get_level_values('Dates').max() + pd.DateOffset(1)).strftime('%Y-%m-%d')
bench = bench[bstart:bstop]
bench.rename(columns = {'Open':'Bench'}, inplace = True)

In [15]:
#bench.info()

In [16]:
#bench.head()

In [21]:
def get_backtest_data(predictions, prices):
    """Combine regression model predictions
        with  OHLCV data"""
    
    best_alpha = predictions.groupby('alpha').apply(lambda x: spearmanr(x.actuals, x.predicted)[0]).idxmax()
    predictions = predictions[predictions.alpha == best_alpha]
    predictions.index.names = ['Coin', 'Dates']
    tickers = predictions.index.get_level_values('Coin').unique()
    
    
    start = predictions.index.get_level_values('Dates').min().strftime('%Y-%m-%d')
    stop = (predictions.index.get_level_values('Dates').max() + pd.DateOffset(1)).strftime('%Y-%m-%d')
    idx = pd.IndexSlice
    #prices = prices.sort_index().loc[idx[tickers, start:stop], :]
    predictions = predictions.loc[predictions.alpha == best_alpha, ['predicted']]
    
    #print(predictions.head())
    #print('-----------------')
    #print(prices.head())
    
    return predictions.join(prices, how='right')


data = get_backtest_data(lasso_predictions,prices)
print(data.info())

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 44104 entries, ('basic-attention-token', Timestamp('2019-01-01 00:00:00')) to ('zcash', Timestamp('2022-02-06 00:00:00'))
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   predicted  28392 non-null  float64
 1   Open       44104 non-null  float64
 2   High       44104 non-null  float64
 3   Low        44104 non-null  float64
 4   Close      44104 non-null  float64
dtypes: float64(5)
memory usage: 1.9+ MB
None


In [22]:
#data = data.swaplevel()
data.tail()

Unnamed: 0_level_0,Unnamed: 1_level_0,predicted,Open,High,Low,Close
Coin,Dates,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
zcash,2022-02-02,,100.509081,103.084153,98.24341,98.24341
zcash,2022-02-03,,98.182877,100.833935,95.995052,100.833935
zcash,2022-02-04,,99.948187,114.882213,99.948187,114.882213
zcash,2022-02-05,,115.028496,122.081184,114.873152,121.22363
zcash,2022-02-06,,124.280985,124.993579,121.797601,124.907266


In [None]:
# Daily returns for all the coins in the dataset for the target time period
daily_returns = data.Open.unstack('Coin').sort_index().pct_change()
daily_returns.info(verbose = False)

In [None]:
#daily_returns.head()

In [None]:
# Compute forward returns
fwd_returns = daily_returns.shift(-1)
fwd_returns.head()

In [None]:
# Isolate the predictions
predictions = data.predicted.unstack('Coin')
predictions.info()

In [None]:
#predictions.head(50)

In [None]:
# VECTORIZED BACKTEST

# Set the number of long and short positions
N_LONG = N_SHORT = 20

# Get the trading signals from the model predictions
long_signals = ((predictions
                .where(predictions > 0)
                .rank(axis=1, ascending=False) > N_LONG)
                .astype(int))
short_signals = ((predictions
                  .where(predictions < 0)
                  .rank(axis=1) > N_SHORT)
                 .astype(int))


# Compute the portfolio returns for the backtest
long_returns = long_signals.mul(fwd_returns).mean(axis=1)
short_returns = short_signals.mul(-fwd_returns).mean(axis=1)
strategy = long_returns.add(short_returns).to_frame('Strategy')

In [None]:
#strategy.join(bench).head(100)

In [None]:
#bench.index.dtype

In [None]:
#bench.head(100)

In [None]:
#strategy.index.dtype

In [None]:
#strategy.head(100)

In [None]:
# Results
bench.index = pd.to_datetime(bench.index)
fig, axes = plt.subplots(ncols=2, figsize=(14,5))
strategy.join(bench).add(1).cumprod().sub(1).plot(ax=axes[0], title='Cumulative Return')
sns.distplot(strategy.dropna(), ax=axes[1], hist=False, label='Strategy')
sns.distplot(bench, ax=axes[1], hist=False, label='BTC')
axes[1].set_title('Daily Standard Deviation')
axes[0].yaxis.set_major_formatter(FuncFormatter(lambda y, _: '{:.0%}'.format(y))) 
axes[1].xaxis.set_major_formatter(FuncFormatter(lambda y, _: '{:.0%}'.format(y))) 
sns.despine()
fig.tight_layout();

In [None]:
# EVENT-DRIVEN BACKTEST with Backtestester