In [1]:
import logging
import imp
from dateutil.relativedelta import relativedelta

In [2]:
import pandas as pd
from sklearn.linear_model import LinearRegression

In [3]:
from helpers.dataset import read_quote_dataset, preprocess_quotes
from helpers.backtest import train_model_and_backtest_regressor, get_backtest_performance_metrics

In [4]:
# Configir logging module for jypter notebook
imp.reload(logging)
logging_format = '%(asctime)s - %(levelname)s - %(process)s - %(message)s'
logging.basicConfig(level=logging.DEBUG, format=logging_format)

In [5]:
PARAM_DATASET = '../data/SPY_postprocess_adj.csv.gz'

In [6]:
df = read_quote_dataset(PARAM_DATASET)

In [7]:
df.head()

Unnamed: 0,date,open,high,low,close,close_adj,volume,open_adj,low_adj,high_adj,...,ratio_close_adj_000_close_adj_005_norm,ratio_close_adj_000_close_adj_020_norm,ratio_close_adj_000_ema_005_norm,ratio_close_adj_000_ema_010_norm,ratio_close_adj_000_ema_020_norm,ratio_close_adj_000_ema_050_norm,ratio_close_adj_000_sma_005_norm,ratio_close_adj_000_sma_010_norm,ratio_close_adj_000_sma_020_norm,ratio_close_adj_000_sma_050_norm
0,2000-01-03,148.25,148.25,143.875,145.4375,101.425385,8164300,103.38677,100.335727,103.38677,...,,,,,,,,,,
1,2000-01-04,143.531204,144.0625,139.640594,139.75,97.459068,8089800,100.09601,97.38277,100.466526,...,,,,,,,,,,
2,2000-01-05,139.9375,141.531204,137.25,140.0,97.633377,12177900,97.589791,95.715579,98.70121,...,,,,,,,,,,
3,2000-01-06,139.625,141.5,137.75,137.75,96.064301,6227200,97.371891,96.064301,98.679482,...,,,0.48663,,,,,,,
4,2000-01-07,140.3125,145.75,140.0625,145.75,101.643333,8066500,97.851322,97.676977,101.643333,...,,,0.815422,,,,0.740588,,,


In [8]:
vars_to_shift = ['close_adj', 'close_adj_norm', 'close_adj_std']
shift_periods = [1, 5, 10, 20]
vars_for_return = ['close_adj']
return_periods = [1, 5, 10, 20]

In [9]:
df = preprocess_quotes(
    df, vars_to_shift=vars_to_shift, shift_periods=shift_periods,
    vars_for_return=vars_for_return, return_periods=return_periods,
    shift_date=True
)

In [10]:
df[['date', 'close_adj', 'date_shift_1', 'close_adj_shift_1', 'close_adj_ret_1', 
    'date_shift_5', 'close_adj_shift_5', 'close_adj_ret_5']].head(10)

Unnamed: 0,date,close_adj,date_shift_1,close_adj_shift_1,close_adj_ret_1,date_shift_5,close_adj_shift_5,close_adj_ret_5
0,2000-01-03,101.425385,2000-01-04,97.459068,-0.039106,2000-01-10,101.992004,0.005587
1,2000-01-04,97.459068,2000-01-05,97.633377,0.001789,2000-01-11,100.771645,0.033989
2,2000-01-05,97.633377,2000-01-06,96.064301,-0.016071,2000-01-12,99.76915,0.021875
3,2000-01-06,96.064301,2000-01-07,101.643333,0.058076,2000-01-13,101.120308,0.052631
4,2000-01-07,101.643333,2000-01-10,101.992004,0.00343,2000-01-14,102.493233,0.008362
5,2000-01-10,101.992004,2000-01-11,100.771645,-0.011965,2000-01-18,101.686958,-0.002991
6,2000-01-11,100.771645,2000-01-12,99.76915,-0.009948,2000-01-19,102.51506,0.017301
7,2000-01-12,99.76915,2000-01-13,101.120308,0.013543,2000-01-20,100.945953,0.011795
8,2000-01-13,101.120308,2000-01-14,102.493233,0.013577,2000-01-21,100.727989,-0.00388
9,2000-01-14,102.493233,2000-01-18,101.686958,-0.007867,2000-01-24,97.873047,-0.045078


Check we have the same CAGR than on part 02

In [11]:
x_vars = ['open_adj', 'low_adj', 'high_adj']
y_var = 'close_adj_shift_20'
buy_price_col = 'close_adj'
sell_price_col = 'close_adj_shift_1'
model_class = LinearRegression
model_params = {'fit_intercept': True}

In [12]:
df_backtest = train_model_and_backtest_regressor(df, x_vars=x_vars, y_var=y_var, 
    buy_price_col=buy_price_col, sell_price_col=sell_price_col,
    model_class=model_class, model_params=model_params, 
    backtest_start='2000-02-01', backtest_end='2018-12-31', 
    model_update_frequency='M', train_history_period=relativedelta(months=1),
)

2019-05-05 16:12:18,672 - DEBUG - 22988 - 228 periods to backtest: ['2000-02-01', '2000-02-29', '2000-03-31', '2000-04-30', '2000-05-31', '2000-06-30', '2000-07-31', '2000-08-31', '2000-09-30', '2000-10-31', '2000-11-30', '2000-12-31', '2001-01-31', '2001-02-28', '2001-03-31', '2001-04-30', '2001-05-31', '2001-06-30', '2001-07-31', '2001-08-31', '2001-09-30', '2001-10-31', '2001-11-30', '2001-12-31', '2002-01-31', '2002-02-28', '2002-03-31', '2002-04-30', '2002-05-31', '2002-06-30', '2002-07-31', '2002-08-31', '2002-09-30', '2002-10-31', '2002-11-30', '2002-12-31', '2003-01-31', '2003-02-28', '2003-03-31', '2003-04-30', '2003-05-31', '2003-06-30', '2003-07-31', '2003-08-31', '2003-09-30', '2003-10-31', '2003-11-30', '2003-12-31', '2004-01-31', '2004-02-29', '2004-03-31', '2004-04-30', '2004-05-31', '2004-06-30', '2004-07-31', '2004-08-31', '2004-09-30', '2004-10-31', '2004-11-30', '2004-12-31', '2005-01-31', '2005-02-28', '2005-03-31', '2005-04-30', '2005-05-31', '2005-06-30', '2005-07

2019-05-05 16:12:19,004 - INFO - 22988 - Training dataset is between 2001-12-31 and 2002-01-30.
2019-05-05 16:12:19,013 - INFO - 22988 - Training a model to be tested between 2002-02-28 and 2002-03-31.
2019-05-05 16:12:19,016 - INFO - 22988 - Training dataset is between 2002-01-28 and 2002-02-27.
2019-05-05 16:12:19,027 - INFO - 22988 - Training a model to be tested between 2002-03-31 and 2002-04-30.
2019-05-05 16:12:19,029 - INFO - 22988 - Training dataset is between 2002-02-28 and 2002-03-28.
2019-05-05 16:12:19,039 - INFO - 22988 - Training a model to be tested between 2002-04-30 and 2002-05-31.
2019-05-05 16:12:19,041 - INFO - 22988 - Training dataset is between 2002-04-01 and 2002-04-29.
2019-05-05 16:12:19,051 - INFO - 22988 - Training a model to be tested between 2002-05-31 and 2002-06-30.
2019-05-05 16:12:19,054 - INFO - 22988 - Training dataset is between 2002-04-30 and 2002-05-30.
2019-05-05 16:12:19,063 - INFO - 22988 - Training a model to be tested between 2002-06-30 and 20

2019-05-05 16:12:19,451 - INFO - 22988 - Training dataset is between 2005-05-31 and 2005-06-29.
2019-05-05 16:12:19,459 - INFO - 22988 - Training a model to be tested between 2005-07-31 and 2005-08-31.
2019-05-05 16:12:19,462 - INFO - 22988 - Training dataset is between 2005-06-30 and 2005-07-29.
2019-05-05 16:12:19,470 - INFO - 22988 - Training a model to be tested between 2005-08-31 and 2005-09-30.
2019-05-05 16:12:19,472 - INFO - 22988 - Training dataset is between 2005-08-01 and 2005-08-30.
2019-05-05 16:12:19,481 - INFO - 22988 - Training a model to be tested between 2005-09-30 and 2005-10-31.
2019-05-05 16:12:19,483 - INFO - 22988 - Training dataset is between 2005-08-30 and 2005-09-29.
2019-05-05 16:12:19,491 - INFO - 22988 - Training a model to be tested between 2005-10-31 and 2005-11-30.
2019-05-05 16:12:19,493 - INFO - 22988 - Training dataset is between 2005-09-30 and 2005-10-28.
2019-05-05 16:12:19,503 - INFO - 22988 - Training a model to be tested between 2005-11-30 and 20

2019-05-05 16:12:19,920 - INFO - 22988 - Training a model to be tested between 2008-11-30 and 2008-12-31.
2019-05-05 16:12:19,922 - INFO - 22988 - Training dataset is between 2008-10-30 and 2008-11-28.
2019-05-05 16:12:19,930 - INFO - 22988 - Training a model to be tested between 2008-12-31 and 2009-01-31.
2019-05-05 16:12:19,933 - INFO - 22988 - Training dataset is between 2008-12-01 and 2008-12-30.
2019-05-05 16:12:19,941 - INFO - 22988 - Training a model to be tested between 2009-01-31 and 2009-02-28.
2019-05-05 16:12:19,943 - INFO - 22988 - Training dataset is between 2008-12-31 and 2009-01-30.
2019-05-05 16:12:19,951 - INFO - 22988 - Training a model to be tested between 2009-02-28 and 2009-03-31.
2019-05-05 16:12:19,953 - INFO - 22988 - Training dataset is between 2009-01-28 and 2009-02-27.
2019-05-05 16:12:19,961 - INFO - 22988 - Training a model to be tested between 2009-03-31 and 2009-04-30.
2019-05-05 16:12:19,963 - INFO - 22988 - Training dataset is between 2009-03-02 and 20

2019-05-05 16:12:20,354 - INFO - 22988 - Training a model to be tested between 2012-04-30 and 2012-05-31.
2019-05-05 16:12:20,356 - INFO - 22988 - Training dataset is between 2012-03-30 and 2012-04-27.
2019-05-05 16:12:20,364 - INFO - 22988 - Training a model to be tested between 2012-05-31 and 2012-06-30.
2019-05-05 16:12:20,366 - INFO - 22988 - Training dataset is between 2012-04-30 and 2012-05-30.
2019-05-05 16:12:20,374 - INFO - 22988 - Training a model to be tested between 2012-06-30 and 2012-07-31.
2019-05-05 16:12:20,376 - INFO - 22988 - Training dataset is between 2012-05-30 and 2012-06-29.
2019-05-05 16:12:20,384 - INFO - 22988 - Training a model to be tested between 2012-07-31 and 2012-08-31.
2019-05-05 16:12:20,385 - INFO - 22988 - Training dataset is between 2012-07-02 and 2012-07-30.
2019-05-05 16:12:20,394 - INFO - 22988 - Training a model to be tested between 2012-08-31 and 2012-09-30.
2019-05-05 16:12:20,396 - INFO - 22988 - Training dataset is between 2012-07-31 and 20

2019-05-05 16:12:20,783 - INFO - 22988 - Training a model to be tested between 2015-09-30 and 2015-10-31.
2019-05-05 16:12:20,785 - INFO - 22988 - Training dataset is between 2015-08-31 and 2015-09-29.
2019-05-05 16:12:20,794 - INFO - 22988 - Training a model to be tested between 2015-10-31 and 2015-11-30.
2019-05-05 16:12:20,796 - INFO - 22988 - Training dataset is between 2015-09-30 and 2015-10-30.
2019-05-05 16:12:20,806 - INFO - 22988 - Training a model to be tested between 2015-11-30 and 2015-12-31.
2019-05-05 16:12:20,808 - INFO - 22988 - Training dataset is between 2015-10-30 and 2015-11-27.
2019-05-05 16:12:20,818 - INFO - 22988 - Training a model to be tested between 2015-12-31 and 2016-01-31.
2019-05-05 16:12:20,820 - INFO - 22988 - Training dataset is between 2015-11-30 and 2015-12-30.
2019-05-05 16:12:20,828 - INFO - 22988 - Training a model to be tested between 2016-01-31 and 2016-02-29.
2019-05-05 16:12:20,830 - INFO - 22988 - Training dataset is between 2015-12-31 and 20

In [13]:
get_backtest_performance_metrics(df_backtest.ret, df_backtest.benchmark_ret, with_benchmark=True, with_delta=True)

Unnamed: 0,main,benchmark,delta
alpha,0.549535,-3.322314e-16,
beta,-0.055703,1.0,
cagr,0.694489,0.05047234,0.644017
max_drawdown,-0.194069,-0.5518942,0.357825
return,21152.394548,1.534235,21150.860313
sharpe,2.885469,0.3527034,2.532765
var,-0.014543,-0.01925716,0.004714
volatility,0.189144,0.1916436,-0.002499


Yes, it is a 69.44% CAGR. We are on the same page. 
For future comparison, lets start the backtest on march and take 2 month of history 
to train the model.

In [14]:
x_vars = ['open_adj', 'low_adj', 'high_adj']
y_var = 'close_adj_shift_20'
buy_price_col = 'close_adj'
sell_price_col = 'close_adj_shift_1'
model_class = LinearRegression
model_params = {'fit_intercept': True}

df_backtest = train_model_and_backtest_regressor(df, x_vars=x_vars, y_var=y_var, 
    buy_price_col=buy_price_col, sell_price_col=sell_price_col,
    model_class=model_class, model_params=model_params, 
    backtest_start='2000-03-01', backtest_end='2018-12-31', 
    model_update_frequency='M', train_history_period=relativedelta(months=2),
)

2019-05-05 16:12:21,425 - DEBUG - 22988 - 227 periods to backtest: ['2000-03-01', '2000-03-31', '2000-04-30', '2000-05-31', '2000-06-30', '2000-07-31', '2000-08-31', '2000-09-30', '2000-10-31', '2000-11-30', '2000-12-31', '2001-01-31', '2001-02-28', '2001-03-31', '2001-04-30', '2001-05-31', '2001-06-30', '2001-07-31', '2001-08-31', '2001-09-30', '2001-10-31', '2001-11-30', '2001-12-31', '2002-01-31', '2002-02-28', '2002-03-31', '2002-04-30', '2002-05-31', '2002-06-30', '2002-07-31', '2002-08-31', '2002-09-30', '2002-10-31', '2002-11-30', '2002-12-31', '2003-01-31', '2003-02-28', '2003-03-31', '2003-04-30', '2003-05-31', '2003-06-30', '2003-07-31', '2003-08-31', '2003-09-30', '2003-10-31', '2003-11-30', '2003-12-31', '2004-01-31', '2004-02-29', '2004-03-31', '2004-04-30', '2004-05-31', '2004-06-30', '2004-07-31', '2004-08-31', '2004-09-30', '2004-10-31', '2004-11-30', '2004-12-31', '2005-01-31', '2005-02-28', '2005-03-31', '2005-04-30', '2005-05-31', '2005-06-30', '2005-07-31', '2005-08

2019-05-05 16:12:21,726 - INFO - 22988 - Training dataset is between 2001-12-28 and 2002-02-27.
2019-05-05 16:12:21,737 - INFO - 22988 - Training a model to be tested between 2002-03-31 and 2002-04-30.
2019-05-05 16:12:21,740 - INFO - 22988 - Training dataset is between 2002-01-31 and 2002-03-28.
2019-05-05 16:12:21,753 - INFO - 22988 - Training a model to be tested between 2002-04-30 and 2002-05-31.
2019-05-05 16:12:21,756 - INFO - 22988 - Training dataset is between 2002-02-28 and 2002-04-29.
2019-05-05 16:12:21,765 - INFO - 22988 - Training a model to be tested between 2002-05-31 and 2002-06-30.
2019-05-05 16:12:21,767 - INFO - 22988 - Training dataset is between 2002-04-01 and 2002-05-30.
2019-05-05 16:12:21,780 - INFO - 22988 - Training a model to be tested between 2002-06-30 and 2002-07-31.
2019-05-05 16:12:21,783 - INFO - 22988 - Training dataset is between 2002-04-30 and 2002-06-28.
2019-05-05 16:12:21,795 - INFO - 22988 - Training a model to be tested between 2002-07-31 and 20

2019-05-05 16:12:22,213 - INFO - 22988 - Training dataset is between 2005-05-31 and 2005-07-29.
2019-05-05 16:12:22,222 - INFO - 22988 - Training a model to be tested between 2005-08-31 and 2005-09-30.
2019-05-05 16:12:22,226 - INFO - 22988 - Training dataset is between 2005-06-30 and 2005-08-30.
2019-05-05 16:12:22,236 - INFO - 22988 - Training a model to be tested between 2005-09-30 and 2005-10-31.
2019-05-05 16:12:22,240 - INFO - 22988 - Training dataset is between 2005-08-01 and 2005-09-29.
2019-05-05 16:12:22,250 - INFO - 22988 - Training a model to be tested between 2005-10-31 and 2005-11-30.
2019-05-05 16:12:22,252 - INFO - 22988 - Training dataset is between 2005-08-31 and 2005-10-28.
2019-05-05 16:12:22,262 - INFO - 22988 - Training a model to be tested between 2005-11-30 and 2005-12-31.
2019-05-05 16:12:22,264 - INFO - 22988 - Training dataset is between 2005-09-30 and 2005-11-29.
2019-05-05 16:12:22,274 - INFO - 22988 - Training a model to be tested between 2005-12-31 and 20

2019-05-05 16:12:22,695 - INFO - 22988 - Training dataset is between 2008-10-31 and 2008-12-30.
2019-05-05 16:12:22,706 - INFO - 22988 - Training a model to be tested between 2009-01-31 and 2009-02-28.
2019-05-05 16:12:22,708 - INFO - 22988 - Training dataset is between 2008-12-01 and 2009-01-30.
2019-05-05 16:12:22,717 - INFO - 22988 - Training a model to be tested between 2009-02-28 and 2009-03-31.
2019-05-05 16:12:22,720 - INFO - 22988 - Training dataset is between 2008-12-29 and 2009-02-27.
2019-05-05 16:12:22,730 - INFO - 22988 - Training a model to be tested between 2009-03-31 and 2009-04-30.
2019-05-05 16:12:22,733 - INFO - 22988 - Training dataset is between 2009-02-02 and 2009-03-30.
2019-05-05 16:12:22,742 - INFO - 22988 - Training a model to be tested between 2009-04-30 and 2009-05-31.
2019-05-05 16:12:22,745 - INFO - 22988 - Training dataset is between 2009-03-02 and 2009-04-29.
2019-05-05 16:12:22,755 - INFO - 22988 - Training a model to be tested between 2009-05-31 and 20

2019-05-05 16:12:23,167 - INFO - 22988 - Training dataset is between 2012-04-02 and 2012-05-30.
2019-05-05 16:12:23,176 - INFO - 22988 - Training a model to be tested between 2012-06-30 and 2012-07-31.
2019-05-05 16:12:23,178 - INFO - 22988 - Training dataset is between 2012-04-30 and 2012-06-29.
2019-05-05 16:12:23,186 - INFO - 22988 - Training a model to be tested between 2012-07-31 and 2012-08-31.
2019-05-05 16:12:23,189 - INFO - 22988 - Training dataset is between 2012-05-31 and 2012-07-30.
2019-05-05 16:12:23,198 - INFO - 22988 - Training a model to be tested between 2012-08-31 and 2012-09-30.
2019-05-05 16:12:23,200 - INFO - 22988 - Training dataset is between 2012-07-02 and 2012-08-30.
2019-05-05 16:12:23,208 - INFO - 22988 - Training a model to be tested between 2012-09-30 and 2012-10-31.
2019-05-05 16:12:23,211 - INFO - 22988 - Training dataset is between 2012-07-30 and 2012-09-28.
2019-05-05 16:12:23,218 - INFO - 22988 - Training a model to be tested between 2012-10-31 and 20

2019-05-05 16:12:23,619 - INFO - 22988 - Training dataset is between 2015-08-31 and 2015-10-30.
2019-05-05 16:12:23,630 - INFO - 22988 - Training a model to be tested between 2015-11-30 and 2015-12-31.
2019-05-05 16:12:23,632 - INFO - 22988 - Training dataset is between 2015-09-30 and 2015-11-27.
2019-05-05 16:12:23,641 - INFO - 22988 - Training a model to be tested between 2015-12-31 and 2016-01-31.
2019-05-05 16:12:23,643 - INFO - 22988 - Training dataset is between 2015-11-02 and 2015-12-30.
2019-05-05 16:12:23,654 - INFO - 22988 - Training a model to be tested between 2016-01-31 and 2016-02-29.
2019-05-05 16:12:23,656 - INFO - 22988 - Training dataset is between 2015-11-30 and 2016-01-29.
2019-05-05 16:12:23,664 - INFO - 22988 - Training a model to be tested between 2016-02-29 and 2016-03-31.
2019-05-05 16:12:23,666 - INFO - 22988 - Training dataset is between 2015-12-29 and 2016-02-26.
2019-05-05 16:12:23,677 - INFO - 22988 - Training a model to be tested between 2016-03-31 and 20

In [15]:
get_backtest_performance_metrics(df_backtest.ret, df_backtest.benchmark_ret, with_benchmark=True, with_delta=True)

Unnamed: 0,main,benchmark,delta
alpha,0.43136,4.190292e-16,
beta,0.03959,1.0,
cagr,0.515458,0.05169112,0.463767
max_drawdown,-0.194069,-0.5518942,0.357825
return,2483.437082,1.580001,2481.857081
sharpe,2.284602,0.3588021,1.9258
var,-0.015838,-0.01920933,0.003371
volatility,0.190003,0.1915877,-0.001585


Taking 2 months the CAGR was reduced from 69% to 51%. Nevertheless, it is still huge. The look ahead bias
is still present.

Now lets discard the look ahead bias, keeping training data with no know return on evaluation time. 
We use the parameter `col_date_shift`. Explain better

In [16]:
x_vars = ['open_adj', 'low_adj', 'high_adj']
y_var = 'close_adj_shift_20'
buy_price_col = 'close_adj'
sell_price_col = 'close_adj_shift_1'
model_class = LinearRegression
model_params = {'fit_intercept': True}

df_backtest = train_model_and_backtest_regressor(df, x_vars=x_vars, y_var=y_var, 
    buy_price_col=buy_price_col, sell_price_col=sell_price_col,
    model_class=model_class, model_params=model_params, 
    backtest_start='2000-03-01', backtest_end='2018-12-31', 
    model_update_frequency='M', train_history_period=relativedelta(months=2),
    col_date_shift='date_shift_20',
)

2019-05-05 16:12:26,118 - DEBUG - 22988 - 227 periods to backtest: ['2000-03-01', '2000-03-31', '2000-04-30', '2000-05-31', '2000-06-30', '2000-07-31', '2000-08-31', '2000-09-30', '2000-10-31', '2000-11-30', '2000-12-31', '2001-01-31', '2001-02-28', '2001-03-31', '2001-04-30', '2001-05-31', '2001-06-30', '2001-07-31', '2001-08-31', '2001-09-30', '2001-10-31', '2001-11-30', '2001-12-31', '2002-01-31', '2002-02-28', '2002-03-31', '2002-04-30', '2002-05-31', '2002-06-30', '2002-07-31', '2002-08-31', '2002-09-30', '2002-10-31', '2002-11-30', '2002-12-31', '2003-01-31', '2003-02-28', '2003-03-31', '2003-04-30', '2003-05-31', '2003-06-30', '2003-07-31', '2003-08-31', '2003-09-30', '2003-10-31', '2003-11-30', '2003-12-31', '2004-01-31', '2004-02-29', '2004-03-31', '2004-04-30', '2004-05-31', '2004-06-30', '2004-07-31', '2004-08-31', '2004-09-30', '2004-10-31', '2004-11-30', '2004-12-31', '2005-01-31', '2005-02-28', '2005-03-31', '2005-04-30', '2005-05-31', '2005-06-30', '2005-07-31', '2005-08

2019-05-05 16:12:26,430 - INFO - 22988 - Training dataset is between 2001-12-28 and 2002-01-29.
2019-05-05 16:12:26,440 - INFO - 22988 - Training a model to be tested between 2002-03-31 and 2002-04-30.
2019-05-05 16:12:26,442 - INFO - 22988 - Training dataset is between 2002-01-31 and 2002-02-28.
2019-05-05 16:12:26,451 - INFO - 22988 - Training a model to be tested between 2002-04-30 and 2002-05-31.
2019-05-05 16:12:26,453 - INFO - 22988 - Training dataset is between 2002-02-28 and 2002-04-01.
2019-05-05 16:12:26,462 - INFO - 22988 - Training a model to be tested between 2002-05-31 and 2002-06-30.
2019-05-05 16:12:26,464 - INFO - 22988 - Training dataset is between 2002-04-01 and 2002-05-01.
2019-05-05 16:12:26,474 - INFO - 22988 - Training a model to be tested between 2002-06-30 and 2002-07-31.
2019-05-05 16:12:26,476 - INFO - 22988 - Training dataset is between 2002-04-30 and 2002-05-31.
2019-05-05 16:12:26,484 - INFO - 22988 - Training a model to be tested between 2002-07-31 and 20

2019-05-05 16:12:26,888 - INFO - 22988 - Training dataset is between 2005-05-31 and 2005-06-30.
2019-05-05 16:12:26,896 - INFO - 22988 - Training a model to be tested between 2005-08-31 and 2005-09-30.
2019-05-05 16:12:26,899 - INFO - 22988 - Training dataset is between 2005-06-30 and 2005-08-02.
2019-05-05 16:12:26,907 - INFO - 22988 - Training a model to be tested between 2005-09-30 and 2005-10-31.
2019-05-05 16:12:26,909 - INFO - 22988 - Training dataset is between 2005-08-01 and 2005-08-31.
2019-05-05 16:12:26,918 - INFO - 22988 - Training a model to be tested between 2005-10-31 and 2005-11-30.
2019-05-05 16:12:26,920 - INFO - 22988 - Training dataset is between 2005-08-31 and 2005-09-30.
2019-05-05 16:12:26,928 - INFO - 22988 - Training a model to be tested between 2005-11-30 and 2005-12-31.
2019-05-05 16:12:26,930 - INFO - 22988 - Training dataset is between 2005-09-30 and 2005-10-31.
2019-05-05 16:12:26,939 - INFO - 22988 - Training a model to be tested between 2005-12-31 and 20

2019-05-05 16:12:27,332 - INFO - 22988 - Training dataset is between 2008-10-31 and 2008-12-01.
2019-05-05 16:12:27,340 - INFO - 22988 - Training a model to be tested between 2009-01-31 and 2009-02-28.
2019-05-05 16:12:27,341 - INFO - 22988 - Training dataset is between 2008-12-01 and 2008-12-31.
2019-05-05 16:12:27,352 - INFO - 22988 - Training a model to be tested between 2009-02-28 and 2009-03-31.
2019-05-05 16:12:27,354 - INFO - 22988 - Training dataset is between 2008-12-29 and 2009-01-29.
2019-05-05 16:12:27,362 - INFO - 22988 - Training a model to be tested between 2009-03-31 and 2009-04-30.
2019-05-05 16:12:27,365 - INFO - 22988 - Training dataset is between 2009-02-02 and 2009-03-02.
2019-05-05 16:12:27,374 - INFO - 22988 - Training a model to be tested between 2009-04-30 and 2009-05-31.
2019-05-05 16:12:27,376 - INFO - 22988 - Training dataset is between 2009-03-02 and 2009-03-31.
2019-05-05 16:12:27,384 - INFO - 22988 - Training a model to be tested between 2009-05-31 and 20

2019-05-05 16:12:27,778 - INFO - 22988 - Training dataset is between 2012-04-02 and 2012-05-01.
2019-05-05 16:12:27,785 - INFO - 22988 - Training a model to be tested between 2012-06-30 and 2012-07-31.
2019-05-05 16:12:27,787 - INFO - 22988 - Training dataset is between 2012-04-30 and 2012-06-01.
2019-05-05 16:12:27,796 - INFO - 22988 - Training a model to be tested between 2012-07-31 and 2012-08-31.
2019-05-05 16:12:27,798 - INFO - 22988 - Training dataset is between 2012-05-31 and 2012-06-29.
2019-05-05 16:12:27,807 - INFO - 22988 - Training a model to be tested between 2012-08-31 and 2012-09-30.
2019-05-05 16:12:27,809 - INFO - 22988 - Training dataset is between 2012-07-02 and 2012-08-02.
2019-05-05 16:12:27,817 - INFO - 22988 - Training a model to be tested between 2012-09-30 and 2012-10-31.
2019-05-05 16:12:27,820 - INFO - 22988 - Training dataset is between 2012-07-30 and 2012-08-30.
2019-05-05 16:12:27,829 - INFO - 22988 - Training a model to be tested between 2012-10-31 and 20

2019-05-05 16:12:28,248 - INFO - 22988 - Training a model to be tested between 2015-10-31 and 2015-11-30.
2019-05-05 16:12:28,251 - INFO - 22988 - Training dataset is between 2015-08-31 and 2015-10-02.
2019-05-05 16:12:28,258 - INFO - 22988 - Training a model to be tested between 2015-11-30 and 2015-12-31.
2019-05-05 16:12:28,260 - INFO - 22988 - Training dataset is between 2015-09-30 and 2015-10-29.
2019-05-05 16:12:28,269 - INFO - 22988 - Training a model to be tested between 2015-12-31 and 2016-01-31.
2019-05-05 16:12:28,271 - INFO - 22988 - Training dataset is between 2015-11-02 and 2015-12-01.
2019-05-05 16:12:28,279 - INFO - 22988 - Training a model to be tested between 2016-01-31 and 2016-02-29.
2019-05-05 16:12:28,281 - INFO - 22988 - Training dataset is between 2015-11-30 and 2015-12-30.
2019-05-05 16:12:28,290 - INFO - 22988 - Training a model to be tested between 2016-02-29 and 2016-03-31.
2019-05-05 16:12:28,292 - INFO - 22988 - Training dataset is between 2015-12-29 and 20

In [17]:
get_backtest_performance_metrics(df_backtest.ret, df_backtest.benchmark_ret, with_benchmark=True, with_delta=True)

Unnamed: 0,main,benchmark,delta
alpha,-0.032984,4.190292e-16,
beta,0.22799,1.0,
cagr,-0.035045,0.05169112,-0.086736
max_drawdown,-0.63705,-0.5518942,-0.085156
return,-0.488736,1.580001,-2.068737
sharpe,-0.090189,0.3588021,-0.448991
var,-0.018687,-0.01920933,0.000522
volatility,0.191946,0.1915877,0.000359


Now that we don't have look ahead bias, the CAGR is -3.5%. 
Explain the training dataset on testing.
Aparently happiness doesn't last too much.