In [46]:
from pathlib import Path
from time import time
import datetime

import numpy as np
import pandas as pd

from scipy.stats import spearmanr

import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
import seaborn as sns

In [47]:
# Global Settings

# Directories
model_dataDir = 'DATA/MODELDATA/'

# Time
START = 2020
END = 2022


# Helpers
sns.set_style('whitegrid')
np.random.seed(42)
idx = pd.IndexSlice


In [48]:
# Import Data

# Predictions
lr_predictions = pd.read_hdf(f'{model_dataDir}lr_model_predictions.h5', 'lr/predictions')
ridge_predictions = pd.read_hdf(f'{model_dataDir}Ridge_Predictions.h5', 'ridge/predictions')
lasso_predictions = pd.read_hdf(f'{model_dataDir}Lasso_Predictions.h5', 'lasso/predictions')

# Prices
model_mdf = pd.read_csv(model_dataDir + 'ModelData.csv')
model_mdf.rename(columns={'Unnamed: 0': 'Dates'}, inplace=True)
model_mdf['Dates'] = pd.to_datetime(model_mdf['Dates'])
model_mdf.set_index(['Dates', 'Coin'], inplace=True)
prices = model_mdf.loc[:,'Open':'Close']

In [49]:
ridge_predictions.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,actuals,predicted,alpha
Dates,Coin,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-01-08,algorand,-0.017524,-0.000269,0.0001
2022-01-09,algorand,0.020682,0.004376,0.0001
2022-01-10,algorand,0.025327,0.012404,0.0001
2022-01-11,algorand,-0.007643,0.007684,0.0001
2022-01-12,algorand,-0.044458,-0.003803,0.0001


In [50]:
prices.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Open,High,Low,Close
Dates,Coin,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-01-01,algorand,0.218748,0.222235,0.216208,0.217082
2020-01-02,algorand,0.21711,0.22355,0.21262,0.22113
2020-01-03,algorand,0.225794,0.228268,0.223328,0.22719
2020-01-04,algorand,0.228031,0.235819,0.22705,0.235536
2020-01-05,algorand,0.234223,0.237087,0.229622,0.231797


In [51]:
def get_backtest_data(predictions, prices):
    """Combine chapter 7 regression predictions
        with  OHLCV data"""

    
    
    best_alpha = predictions.groupby('alpha').apply(lambda x: spearmanr(x.actuals, x.predicted)[0]).idxmax()
    predictions = predictions[predictions.alpha == best_alpha]
    predictions.index.names = ['Dates', 'Coin']
    tickers = predictions.index.get_level_values('Coin').unique()
    
    
    start = predictions.index.get_level_values('Dates').min().strftime('%Y-%m-%d')
    stop = (predictions.index.get_level_values('Dates').max() + pd.DateOffset(1)).strftime('%Y-%m-%d')
    idx = pd.IndexSlice
    #prices = prices.sort_index().loc[idx[tickers, start:stop], :]
    predictions = predictions.loc[predictions.alpha == best_alpha, ['predicted']]
    
    print(predictions.head())
    print('-----------------')
    print(prices.head())
    
    return predictions.join(prices, how='right')


data = get_backtest_data(ridge_predictions,prices)
print(data.info())

                     predicted
Dates      Coin               
2022-01-08 algorand  -0.003448
2022-01-09 algorand   0.001322
2022-01-10 algorand   0.003637
2022-01-11 algorand   0.001584
2022-01-12 algorand  -0.001862
-----------------
                         Open      High       Low     Close
Dates      Coin                                            
2020-01-01 algorand  0.218748  0.222235  0.216208  0.217082
2020-01-02 algorand  0.217110  0.223550  0.212620  0.221130
2020-01-03 algorand  0.225794  0.228268  0.223328  0.227190
2020-01-04 algorand  0.228031  0.235819  0.227050  0.235536
2020-01-05 algorand  0.234223  0.237087  0.229622  0.231797
<class 'pandas.core.frame.DataFrame'>
MultiIndex: 30522 entries, (Timestamp('2020-01-01 00:00:00'), 'algorand') to (Timestamp('2022-01-14 00:00:00'), 'zcash')
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   predicted  29273 non-null  float64
 1   Open       30522 non-null  fl

In [52]:
data.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,predicted,Open,High,Low,Close
Dates,Coin,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-01-01,algorand,,0.218748,0.222235,0.216208,0.217082
2020-01-02,algorand,,0.21711,0.22355,0.21262,0.22113
2020-01-03,algorand,,0.225794,0.228268,0.223328,0.22719
2020-01-04,algorand,,0.228031,0.235819,0.22705,0.235536
2020-01-05,algorand,,0.234223,0.237087,0.229622,0.231797


In [53]:
data.predicted.swaplevel()

Coin      Dates     
algorand  2020-01-01         NaN
          2020-01-02         NaN
          2020-01-03         NaN
          2020-01-04         NaN
          2020-01-05         NaN
                          ...   
zcash     2022-01-10    0.000830
          2022-01-11    0.000353
          2022-01-12   -0.003752
          2022-01-13   -0.004153
          2022-01-14   -0.003561
Name: predicted, Length: 30522, dtype: float64

In [None]:
# Vectorized backtest

In [None]:
# Event driven backtest with Backtestester