In [1]:
from pathlib import Path
from time import time
import datetime

import numpy as np
import pandas as pd

from scipy.stats import spearmanr

import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
import seaborn as sns

In [2]:
# Global Settings

# Directories
model_dataDir = 'DATA/MODELDATA/'
bench_dataDir = 'DATA/COINHISTDATA/'


# Time
START = 2020
END = 2022


# Helpers
sns.set_style('whitegrid')
np.random.seed(42)
idx = pd.IndexSlice


In [3]:
# Import Data

# Predictions
lr_predictions = pd.read_hdf(f'{model_dataDir}lr_model_predictions.h5', 'lr/predictions')
ridge_predictions = pd.read_hdf(f'{model_dataDir}Ridge_Predictions.h5', 'ridge/predictions')
lasso_predictions = pd.read_hdf(f'{model_dataDir}Lasso_Predictions.h5', 'lasso/predictions')

# Prices
model_mdf = pd.read_csv(model_dataDir + 'ModelData.csv')
model_mdf.rename(columns={'Unnamed: 0': 'Dates'}, inplace=True)
model_mdf['Dates'] = pd.to_datetime(model_mdf['Dates'])
model_mdf.set_index(['Dates', 'Coin'], inplace=True)
prices = model_mdf.loc[:,'Open':'Close']

# Benchmark
bench = pd.read_csv(bench_dataDir + 'bitcoin.csv')
bench.rename(columns={'Unnamed: 0': 'Dates'}, inplace=True)
bench.set_index('Dates', inplace=True)
bench = bench.Open
bench = bench.pct_change()
bstart = ridge_predictions.index.get_level_values('Dates').min().strftime('%Y-%m-%d')
bstop = (ridge_predictions.index.get_level_values('Dates').max() + pd.DateOffset(1)).strftime('%Y-%m-%d')
bench = bench[bstart:bstop]

In [4]:
bench.tail()

Dates
2022-01-10    0.005876
2022-01-11   -0.001062
2022-01-12    0.006762
2022-01-13    0.024422
2022-01-14   -0.022079
Name: Open, dtype: float64

In [5]:
ridge_predictions.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,actuals,predicted,alpha
Dates,Coin,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-01-08,algorand,-0.017524,-0.000269,0.0001
2022-01-09,algorand,0.020682,0.004376,0.0001
2022-01-10,algorand,0.025327,0.012404,0.0001
2022-01-11,algorand,-0.007643,0.007684,0.0001
2022-01-12,algorand,-0.044458,-0.003803,0.0001


In [None]:
prices.head()

In [None]:
bench.head()

In [None]:
def get_backtest_data(predictions, prices):
    """Combine chapter 7 regression predictions
        with  OHLCV data"""

    
    
    best_alpha = predictions.groupby('alpha').apply(lambda x: spearmanr(x.actuals, x.predicted)[0]).idxmax()
    predictions = predictions[predictions.alpha == best_alpha]
    predictions.index.names = ['Dates', 'Coin']
    tickers = predictions.index.get_level_values('Coin').unique()
    
    
    start = predictions.index.get_level_values('Dates').min().strftime('%Y-%m-%d')
    stop = (predictions.index.get_level_values('Dates').max() + pd.DateOffset(1)).strftime('%Y-%m-%d')
    idx = pd.IndexSlice
    #prices = prices.sort_index().loc[idx[tickers, start:stop], :]
    predictions = predictions.loc[predictions.alpha == best_alpha, ['predicted']]
    
    print(predictions.head())
    print('-----------------')
    print(prices.head())
    
    return predictions.join(prices, how='right')


data = get_backtest_data(ridge_predictions,prices)
print(data.info())

In [None]:
data.head()

In [None]:
#data.swaplevel()

In [None]:
data = data.swaplevel()
data.head()

In [None]:
# Vectorized backtest

In [None]:
# Event driven backtest with Backtestester