In [1]:
%matplotlib inline

import pandas as pd
pd.core.common.is_list_like = pd.api.types.is_list_like
from matplotlib import style
from pandas_datareader import data
import random
from SALib.sample import latin
from functions.stylizedfacts import *
import scipy.stats as stats
from functions.evolutionaryalgo import *
from pandas_datareader import data
from functions.helpers import hurst, organise_data, div_by_hundred, discounted_value_cash_flow, find_horizon, calculate_npv
import matplotlib.pyplot as plt
import statsmodels.api as sm
import statsmodels.tsa.stattools as ts
import seaborn as sns
import json

In [2]:
style.use('ggplot')

# Estimate model
Following the procedure presented by [Franke & Westerhoff (2012)](https://www.sciencedirect.com/science/article/pii/S0165188912000802).

## 1 Get data

In [14]:
start_date = '2008-12-31' #1933
end_date = '2018-12-31'

spy_nom_price = data.DataReader("SP500", 
                       start=start_date, 
                       end=end_date, 
                       data_source='fred')["SP500"].dropna()
spy_nom_returns = spy_nom_price.pct_change()[1:]

## 2 Bootstrap data for both short and long-term moments

In [16]:
small_block_size = 250 # 250
large_block_size = 625 # 750

**Small data blocks returns**
For short term moments, I produce a bootstrapped series of 250 day (= 1 year) data blocks. This means that there are 25 unique blocks. 

In [18]:
small_data_blocks = []
for x in range(0, len(spy_nom_returns), small_block_size):
    small_data_blocks.append(list(spy_nom_returns[x:x+small_block_size]))
    
# draw 5000 random series
bootstrapped_small_series = []
for i in range(5000):
    sim_data = [random.choice(small_data_blocks) for _ in small_data_blocks]
    sim_data2 = [j for i in sim_data for j in i]
    bootstrapped_small_series.append(sim_data2)

**Large data blocks returns**

For the longer moments, I produce a bootstrapped series of data blocks of 625 days. To accomodate this, I cut the data set with 250 observations, to 6000 observations. 

In [19]:
large_data_blocks = []
for x in range(0, len(spy_nom_returns), large_block_size): # used to be len(spy_nom_returns[:-250])
    large_data_blocks.append(list(spy_nom_returns[x:x+large_block_size]))
    
# draw 5000 random series
bootstrapped_long_series = []
for i in range(5000):
    sim_data = [random.choice(large_data_blocks) for _ in large_data_blocks]
    sim_data2 = [j for i in sim_data for j in i]
    bootstrapped_long_series.append(sim_data2)

## 3 Choose moments

For returns, I use the following moments **short-term moments**: 

1. mean first-order autocorrelation of the raw returns (no predictability),
2. autocorrelations at lags t ¼ 1
3. autocorrelations at lags t ¼ 5
4. mean first-order autocorrelation of the of the absolute returns (volatility clustering),
5. Kurtosis (fat tails), 

In [20]:
first_order_autocors = []
mean_abs_autocor = []
autocors1 = []
autocors5 = []
kurtoses = []
for rets in bootstrapped_small_series:
    first_order_autocors.append(autocorrelation_returns(rets, 25))
    mean_abs_autocor.append(autocorrelation_abs_returns(rets, 25))
    rets = pd.Series(rets)
    autocors1.append(rets.autocorr(lag=1))
    autocors5.append(rets.autocorr(lag=5))
    kurtoses.append(kurtosis(rets))

For long-term moments, I use the autocorrelation of returns for the with lags (10, 25, 50, 100, 150 and 200).

In [26]:
spy_abs_auto10 = []
spy_abs_auto25 = []
spy_abs_auto50 = []
spy_abs_auto100 = []
spy_abs_auto150 = []
spy_abs_auto200 = []

for rets in bootstrapped_long_series:
    rets = pd.Series(rets)
    spy_abs_auto10.append(rets.abs().autocorr(lag=10))
    spy_abs_auto25.append(rets.abs().autocorr(lag=25))
    spy_abs_auto50.append(rets.abs().autocorr(lag=50))
    spy_abs_auto100.append(rets.abs().autocorr(lag=100))
    spy_abs_auto150.append(rets.abs().autocorr(lag=150))
    spy_abs_auto200.append(rets.abs().autocorr(lag=200))

In [29]:
all_bootstrapped_moments = [first_order_autocors,
                            autocors1,
                            autocors5,
                            mean_abs_autocor,
                            kurtoses,
                            spy_abs_auto10,
                            spy_abs_auto25,
                            spy_abs_auto50,
                            spy_abs_auto100,
                            spy_abs_auto150,
                            spy_abs_auto200
                           ]

In [22]:
# Get the t-critical value*
def confidence_interval(sample, emp_value):
    """Calculate confidence_interval in sample"""
    z_critical = stats.norm.ppf(q = 0.99)
    stdev = pd.Series(sample).std()
    margin_of_error = z_critical * stdev
    confidence_interval = (emp_value - margin_of_error, emp_value + margin_of_error)  
    return confidence_interval

In [30]:
def get_specific_bootstraps_moments(full_series, bootstrap_number):
    """Get a vector with the moments of a specific bootstrap"""
    return np.array([full_series[i][bootstrap_number] for i in range(len(full_series))])

In [31]:
av_moments = [np.mean(x) for x in all_bootstrapped_moments]
moments_b = [get_specific_bootstraps_moments(all_bootstrapped_moments, n) for n in range(len(bootstrapped_long_series))]

In [33]:
av_moments

[-0.0085403959215959377,
 -0.059373447733413083,
 -0.046486740997376243,
 0.1916297308552411,
 4.6083558405710994,
 0.21680205832111227,
 0.11504999107652002,
 0.066271071482962823,
 0.011122122838318035,
 0.028435498879957846,
 0.0060635341640177974]

## 5 Estimate weighting matrix:

Here, I follow [Franke & Westerhoff 2016](https://link.springer.com/article/10.1007/s11403-014-0140-6#Sec8) in that I use the inverse of the bootstrap estimate of the moment covariance matrix as my weights.

In [24]:
emp_moments = np.array([
        autocorrelation_returns(spy_nom_returns, 25),
        spy_nom_returns.autocorr(lag=1),
        spy_nom_returns.autocorr(lag=5),
        autocorrelation_abs_returns(spy_nom_returns, 25),
        kurtosis(spy_nom_returns),
        spy_nom_returns.abs().autocorr(lag=10),
        spy_nom_returns.abs().autocorr(lag=25),
        spy_nom_returns.abs().autocorr(lag=50),
        spy_nom_returns.abs().autocorr(lag=100),
        spy_nom_returns.abs().autocorr(lag=150),
        spy_nom_returns.abs().autocorr(lag=200)
    ])

In [55]:
emp_moments

array([ -7.91632942e-03,  -6.44109792e-02,  -5.17149408e-02,
         2.15757804e-01,   4.99915089e+00,   2.29239806e-01,
         1.36705815e-01,   8.99171488e-02,   3.97109985e-02,
         4.56905198e-02,   3.40685479e-03])

In [27]:
# prevent NA errors
spy_abs_auto50 = list(pd.Series(spy_abs_auto50).fillna(0))
spy_abs_auto100 = list(pd.Series(spy_abs_auto100).fillna(0))
spy_abs_auto150 = list(pd.Series(spy_abs_auto150).fillna(0))
spy_abs_auto200 = list(pd.Series(spy_abs_auto200).fillna(0))

Then, I estimate the moment covariance matrix of the bootstrapped data as: 

$\hat{W} = \frac{1}{B} \sum{(m^b - \hat{m})(m^b - \hat{m})'}$

In [34]:
W_hat = 1.0 / len(bootstrapped_long_series) * sum([np.dot(np.array([(mb - av_moments)]).transpose(), np.array([(mb - av_moments)])) for mb in moments_b])

And take the inverse so that 

$W = \hat{W}^{-1}$

In [36]:
W = np.linalg.inv(W_hat)

In [52]:
# save weighting matrix
np.save('distr_weighting_matrix', W)

In [53]:
np.load('distr_weighting_matrix.npy')

array([[  5.40115835e+05,  -2.68094151e+04,  -1.54140820e+04,
         -1.95997713e+04,   5.15715823e+02,  -6.90929363e+02,
          2.83187460e+02,  -4.43014033e+01,  -7.25902562e+02,
          3.92289690e+02,   7.85196438e+01],
       [ -2.68094151e+04,   9.02487430e+03,  -4.15819453e+01,
          5.88554858e+03,  -1.02567209e+02,   1.37733987e+02,
         -1.31811278e+02,   6.94931439e+01,  -8.00463185e+01,
          1.96732927e+01,   5.58659522e+00],
       [ -1.54140820e+04,  -4.15819453e+01,   1.37453662e+03,
          1.15527815e+02,  -9.31130052e+00,   1.09947142e+01,
         -2.68475390e+01,   3.82182165e+01,   4.52543066e+01,
         -6.47109626e+01,   5.55455107e+01],
       [ -1.95997713e+04,   5.88554858e+03,   1.15527815e+02,
          5.21197171e+03,  -1.14637237e+02,   1.55453511e+02,
         -1.33940211e+02,   6.75360882e+01,  -6.39660820e+01,
         -6.59512529e+00,   2.44945961e+01],
       [  5.15715823e+02,  -1.02567209e+02,  -9.31130052e+00,
         -1.14

In [38]:
confidence_intervals = [confidence_interval(m, emp) for m, emp in zip(all_bootstrapped_moments, emp_moments)]

In [39]:
# export conrfidence intervals of bootstrapped data
with open('distr_bootstrapped_confidence_intervals.json', 'w') as fp:
    json.dump(confidence_intervals, fp)

Then, I apply the cost function to the bootstrapped series to get a distribution of J-values

In [40]:
j_values = []
for b in moments_b:
    j_values.append(quadratic_loss_function(b, emp_moments, W))

In [41]:
# export J-values of bootstrapped data
with open('distr_bootstrapped_j_values.json', 'w') as fp:
    json.dump(j_values, fp)