- Redo replication but use RNN instead of LASSO subset selection followed by linear regression on selected predictors
- Add 3m, 6m, 12m moving averages
- send 12 months of data to GRU (simplified LSTM)
- Use walk-forward cross-validation to tune hyperparameters, i.e. find best network depth, size, regularization, dropout.

- After identifying most promising network structure, backtest: 

1) train GRU each month on historical data up to that month

2) use GRU to predict following month

3) go long top 6 industries and short bottom 6 industries.


In [1]:
import os
import sys
import warnings
import numpy as np
import pandas as pd
import time 
import copy
from itertools import product

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' #Hide messy TensorFlow warnings
warnings.filterwarnings("ignore") #Hide messy Numpy warnings

from sklearn.model_selection import train_test_split
from sklearn.metrics import explained_variance_score, r2_score
from sklearn.linear_model import LinearRegression, Lasso, lasso_path, lars_path, LassoLarsIC

import keras
from keras.layers.core import Dense, Activation, Dropout
from keras.layers import Input
from keras.models import Model

from keras.layers.recurrent import LSTM, GRU
from keras.regularizers import l1
from keras.models import Sequential
from keras.models import load_model

import ffn
%matplotlib inline

import plotly as py
# print (py.__version__) # requires version >= 1.9.0
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from plotly.graph_objs import *
import plotly.figure_factory as ff

init_notebook_mode(connected=True)


Using TensorFlow backend.


In [2]:
print("Loading data...")
data = pd.read_csv("30_Industry_Portfolios.csv")
data = data.set_index('yyyymm')
industries = list(data.columns)
# map industry names to col nums
ind_reverse_dict = dict([(industries[i], i) for i in range(len(industries))])

rfdata = pd.read_csv("F-F_Research_Data_Factors.csv")
rfdata = rfdata.set_index('yyyymm')
data['rf'] = rfdata['RF']

# subtract risk-free rate
# create a response variable led by 1 period to predict
for ind in industries:
    data[ind] = data[ind] - data['rf']

for ind in industries:
    data[ind+".3m"] = pd.rolling_mean(data[ind],3)

for ind in industries:
    data[ind+".6m"] = pd.rolling_mean(data[ind],6)

for ind in industries:
    data[ind+".12m"] = pd.rolling_mean(data[ind],12)
    
for ind in industries:
    data[ind+".lead"] = data[ind].shift(-1)

allcols = list(data.columns[:120])
all_reverse_dict = dict([(allcols[i], i) for i in range(len(allcols))])

data = data.drop(columns=['rf'])    
data = data.dropna(axis=0, how='any')
    
data


Loading data...


Unnamed: 0_level_0,Food,Beer,Smoke,Games,Books,Hshld,Clths,Hlth,Chems,Txtls,...,Telcm.lead,Servs.lead,BusEq.lead,Paper.lead,Trans.lead,Whlsl.lead,Rtail.lead,Meals.lead,Fin.lead,Other.lead
yyyymm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
192708,2.05,-4.26,3.55,5.92,5.18,0.24,1.28,0.00,1.82,0.19,...,6.32,1.94,3.96,3.16,4.10,5.39,7.40,5.73,5.96,0.22
192709,5.83,6.80,4.42,3.97,9.83,2.32,4.44,5.44,5.76,1.68,...,-2.34,4.72,-4.91,-0.11,-4.84,-22.03,-5.28,-2.00,3.85,-3.87
192710,-2.71,-1.05,-0.57,0.01,2.38,-2.09,9.40,4.88,-7.71,-2.86,...,2.62,2.09,9.17,16.66,3.70,-1.51,11.90,2.80,7.79,10.98
192711,6.96,10.08,6.48,3.37,16.41,2.52,1.85,3.47,8.83,5.55,...,1.11,-8.05,1.08,0.82,-0.12,11.23,-0.88,-1.78,10.86,0.85
192712,3.31,12.50,0.81,2.59,3.05,10.09,-0.37,-0.68,-0.45,2.45,...,0.05,0.31,0.93,1.26,-1.48,-1.20,-1.10,-1.83,1.20,-3.63
192801,2.29,0.37,-2.81,-0.62,4.91,3.42,6.82,2.44,-1.33,-1.35,...,-0.63,-0.85,1.69,0.53,-1.68,-8.62,-1.37,-2.92,-3.12,-3.01
192802,-3.29,-5.55,-6.30,-1.22,-2.08,-0.34,-2.78,-1.71,0.40,-5.64,...,1.67,-2.73,2.94,10.79,5.49,7.83,8.40,3.18,9.28,8.56
192803,4.82,14.18,2.05,8.53,4.97,9.70,3.70,10.25,12.87,8.44,...,3.00,2.20,1.38,-0.47,4.15,-6.07,-0.25,0.25,8.65,10.73
192804,2.47,4.65,-6.17,5.18,21.01,3.37,9.74,2.13,-0.48,-4.63,...,6.29,0.46,3.56,5.81,-0.78,7.63,3.12,13.80,1.96,-1.12
192805,1.28,5.16,-0.35,1.53,17.36,-2.84,-1.31,8.95,2.66,-6.68,...,-5.53,-5.56,-3.37,-6.05,-4.15,-7.72,-3.71,-3.12,-10.22,-12.26


In [3]:
# use data >= 195912 for consistency with paper
# but keep data > 2016 to be as current and have as much data as possible
data = data.loc[data.index[data.index > 195911]]

data

Unnamed: 0_level_0,Food,Beer,Smoke,Games,Books,Hshld,Clths,Hlth,Chems,Txtls,...,Telcm.lead,Servs.lead,BusEq.lead,Paper.lead,Trans.lead,Whlsl.lead,Rtail.lead,Meals.lead,Fin.lead,Other.lead
yyyymm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
195912,2.01,0.35,-3.02,1.64,7.29,0.67,1.87,-1.97,3.08,0.74,...,0.62,-6.18,-7.93,-9.41,-4.31,-5.33,-6.09,-10.08,-4.68,-3.98
196001,-4.49,-5.71,-2.05,1.21,-5.47,-7.84,-8.53,-6.68,-10.03,-4.77,...,8.07,9.13,5.09,3.00,-0.94,1.42,4.00,1.81,-0.98,6.32
196002,3.35,-2.14,2.27,4.23,2.39,9.31,1.44,-0.02,-0.74,0.32,...,-0.21,-0.31,3.34,-2.43,-4.99,-1.37,-0.13,-3.88,0.05,-2.43
196003,-1.67,-2.94,-0.18,-0.65,2.18,-0.56,-2.59,1.26,-2.75,-6.79,...,-1.24,7.14,1.77,0.41,-2.13,0.45,-0.53,8.86,-0.64,0.55
196004,1.17,-2.16,1.35,6.46,-1.17,-1.27,0.21,1.49,-5.53,-1.10,...,3.05,-1.75,11.90,2.85,0.90,1.65,3.11,0.80,-0.45,1.02
196005,8.20,-0.52,2.44,7.28,11.67,7.74,1.74,13.50,3.40,2.10,...,-0.58,-8.07,2.39,3.50,2.17,5.96,3.41,1.03,3.72,6.41
196006,5.39,0.47,4.73,2.24,0.02,6.38,-1.59,-0.40,0.45,4.04,...,-0.03,2.84,-2.02,-4.10,-3.11,-6.16,-2.99,-1.25,0.09,-5.95
196007,-2.11,-0.79,4.60,-4.72,0.23,-0.60,-1.10,-3.99,-6.80,-3.14,...,6.94,5.69,2.71,1.18,1.98,4.51,2.85,2.05,3.47,3.48
196008,4.57,3.24,5.20,7.16,3.63,5.09,3.34,2.29,1.17,-0.84,...,-6.07,-3.53,-7.61,-7.37,-7.07,-8.44,-8.57,-1.90,-5.78,-4.21
196009,-3.88,-5.00,-2.09,-2.33,-6.20,-9.18,-4.23,-8.87,-6.70,-5.25,...,-0.08,4.62,-3.40,-1.85,-1.02,-4.22,0.31,-4.54,-0.40,0.38


In [41]:
# roll up X so each row contains 12 previous months * 120 cols
# create training, CV, test sets
# multi-output: simultaneously train all estimators, forcing 1st layer look for common patterns and inputs
# add regularization
# add dropout

npredictors=120
nresponses = 30
lookback = 12

X_raw = data.values[:,:npredictors]
Y = data.values[12:, -nresponses:]
numrows = Y.shape[0]

# each input will have shape 12 * npredictors
X = np.zeros([numrows, lookback, npredictors])
for row in range(numrows):
    prev12 = []
    for i in range(lookback):
        prev12.append(X_raw[row + i])
    X[row] = np.vstack(prev12)

print(X.shape)
print(Y.shape)

(685, 12, 120)
(685, 30)


In [37]:
# build LSTM model
# input (360)
# -> LSTM
# -> Dropout
# -> LSTM
# -> Dropout
# -> 30 Dense linear outputs (1 per industry)

INPUT_DIM = npredictors
OUTPUT_DIM = nresponses

def build_model(hidden_layers = [[16, 0.0001, 0.2], 
                                 [16, 0.0001, 0.2], 
                                 [1, 0.0, 0.0]],
                verbose=True):
    """Take list of [layer_size, reg_penalty, dropout], last layer is linear, rest LSTM"""

    main_input = Input(shape=(None, 
                              INPUT_DIM), 
                       dtype='float32', 
                       name='main_input')
    lastlayer=main_input

    n_lstms = len(hidden_layers)-1
    for i in range(n_lstms):
        layer_size, reg_penalty, dropout = hidden_layers[i]
        if verbose:
            print("layer %d size %d, reg_penalty %.8f, dropout %.3f" % (i, layer_size, reg_penalty, dropout))

        # first n-1 layers are GRU, return_sequences=True
        # nth layer is GRU, return_sequences=False
        return_sequences = True
        if i == n_lstms-1:
            return_sequences= False
        lastlayer = GRU(layer_size, 
                        return_sequences=return_sequences,
                        kernel_regularizer=keras.regularizers.l1(reg_penalty),
                        dropout=dropout, 
                        recurrent_dropout=dropout,
                        name = "GRU%02d" % i
                       )(lastlayer)
        
    layer_size, reg_penalty, dropout = hidden_layers[-1]
    if dropout:
        lastlayer = Dropout(dropout, name="Dropout%02d" % (i+1))(lastlayer)

    # OUTPUT_DIM outputs
    outputs = []
    for i in range(OUTPUT_DIM):
        outputs.append(Dense(1, 
                             activation='linear', 
                             kernel_regularizer=keras.regularizers.l1(reg_penalty),
                             name='output%02d' % i)(lastlayer)
                      )
    
    model = Model(inputs=[main_input], outputs=outputs)
    if verbose:
        print(model.summary())
    model.compile(loss="mae", metrics=['mae'], optimizer="rmsprop", loss_weights=[1.]*OUTPUT_DIM)
    return model


In [38]:
MODELPREFIX = "GRU"
EPOCHS = 160
#VAL_SPLIT = 0.2
BATCH_SIZE = 32
LOOKBACK = 128
BATCH_SIZE = 64
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error

def run_experiment (layer1_size = 16, 
                    layer2_size = 16, 
                    layer1_reg_penalty=0.0,
                    layer2_reg_penalty=0.0,
                    layer1_dropout=0.25,
                    layer2_dropout=0.25,
                    layer3_reg_penalty=0.001,
                    layer3_dropout=0.25
                   ):

    start = time.time()

    # generate k-folds
    n_splits = 5
    kf = KFold(n_splits=n_splits)
    kf.get_n_splits(X)
    last_indexes = []
    for train_index, test_index in kf.split(X):
        # use test_index as last index to train
        last_index = test_index[-1] + 1
        last_indexes.append(last_index)

    print("%s Generate splits %s" % (time.strftime("%H:%M:%S"), str([i for i in last_indexes])))
    
    avg_bests = []
    
    print("%s Build model" % (time.strftime("%H:%M:%S")))
    
    model = build_model([[layer1_size, layer1_reg_penalty, layer1_dropout],
                         [layer2_size, layer2_reg_penalty, layer2_dropout],
                         [1, layer3_reg_penalty, layer3_dropout],
                        ])
    print("Compile time : %s" % str(time.time() - start))
    print("Starting to train : %s" % (time.strftime("%H:%M:%S")))

    for i in range(1, n_splits-1):
        
        models = []
        losses = []
        scores = []
        count = 0        
        # skip kfold 0 so you start with train 2x size of eval set
        last_train_index = last_indexes[i]
        last_xval_index = last_indexes[i+1]

        # set up train, xval
        # train from beginning to last_train_index
        print("Training indexes 0 to %d" % (last_train_index-1))
        X_fit = X[:last_train_index]
        Y_fit = Y[:last_train_index]
        # xval from last_train_index to last_xval_index
        print("Cross-validating indexes %d to %d" % (last_train_index, last_xval_index -1 ))
        X_xval = X[last_train_index:last_xval_index]
        Y_xval = Y[last_train_index:last_xval_index]

        responses = []
        for i in range(nresponses):
            responses.append(Y_fit[:,i])
        # train for EPOCHS
        for epoch in range(EPOCHS):
            fit = model.fit(
                X_fit,
                responses,
                batch_size=BATCH_SIZE,
                #validation_split=VAL_SPLIT,
                epochs=1,
                verbose=0)
            
            train_loss = fit.history['loss'][-1]
            # evaluate ... run prediction, calc MSE by industry, and average
            y_xval_pred = np.array(model.predict(X_xval))
            y_xval_pred = y_xval_pred.reshape(Y_xval.T.shape)
            y_xval_pred = y_xval_pred.T
            mse_list = []
            for i in range(len(industries)):
                mse_list.append(mean_squared_error(Y_xval[:,i], y_xval_pred[:,i]))
            xval_score = np.mean(np.array(mse_list))            
            
            losses.append(train_loss)
            scores.append(xval_score)
            models.append(copy.copy(model))

            bestloss_index = np.argmin(scores)
            bestloss_value = scores[bestloss_index]

            sys.stdout.write('.')
            count += 1
            if count % 80 == 0:
                print("")
                print("%s Still training" % (time.strftime("%H:%M:%S")))
            sys.stdout.flush()            
            
            # stop if loss rises by 20% from best
            if xval_score / bestloss_value > 1.2:
                print("Stopping early..." )
                break

        # choose model with lowest xval loss
        print("")
        print ("%s Best Xval loss epoch %d, value %f" % (time.strftime("%H:%M:%S"), bestloss_index, bestloss_value))
        avg_bests.append(bestloss_value)
        model = models[bestloss_index]
    
    print ("Last Xval loss %f" % (bestloss_value))
    avg_loss = np.mean(np.array(avg_bests))
    print ("Avg Xval loss %f" % avg_loss)
    print("--------------------------------------------------------------------------------")
    return (avg_loss, model)


In [39]:
score, model = run_experiment()
print(score)

22:23:17 Generate splits [137, 274, 411, 548, 685]
22:23:17 Build model
layer 0 size 16, reg_penalty 0.00000000, dropout 0.250
layer 1 size 16, reg_penalty 0.00000000, dropout 0.250
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
main_input (InputLayer)         (None, None, 120)    0                                            
__________________________________________________________________________________________________
GRU00 (GRU)                     (None, None, 16)     6576        main_input[0][0]                 
__________________________________________________________________________________________________
GRU01 (GRU)                     (None, 16)           1584        GRU00[0][0]                      
__________________________________________________________________________________________________
Dropout02 (Dropout)       

................................................................................
22:27:35 Still training
................................................................................
22:28:34 Still training

22:28:34 Best Xval loss epoch 145, value 41.869454
Last Xval loss 41.869454
Avg Xval loss 39.038349
--------------------------------------------------------------------------------
39.038349197569545


In [17]:
# run in big xval loop
# do predictions
# compute mse
# save data to pick best hyperparameters

layer1_sizes = [8, 16, 32]
layer2_sizes = [4, 8, 16]
layer1_reg_penalties = [0.0]
layer2_reg_penalties = [0.0]
layer3_reg_penalties = [0.001, 0.0001]
layer1_dropouts = [0.25]
layer2_dropouts = [0.25]
layer3_dropouts = [0.25]

hyperparameter_combos = list(product(layer1_sizes,
                                     layer2_sizes,
                                     layer1_reg_penalties,
                                     layer2_reg_penalties,
                                     layer3_reg_penalties,
                                     layer1_dropouts,
                                     layer2_dropouts,
                                     layer3_dropouts,
                                    ))

print("%s Running %d experiments" % (time.strftime("%H:%M:%S"), len(hyperparameter_combos)))

experiments = {}

for counter, param_list in enumerate(hyperparameter_combos):
    layer1_size, layer2_size, layer1_reg_penalty, layer2_reg_penalty, layer3_reg_penalty, layer1_dropout, layer2_dropout, layer3_dropout = param_list
    print("%s Running experiment %d of %d" % (time.strftime("%H:%M:%S"), counter+1, len(hyperparameter_combos)))
    key = (layer1_size, layer1_reg_penalty, layer1_dropout, layer2_size, layer2_reg_penalty, layer2_dropout, layer3_reg_penalty, layer3_dropout)
    experiments[key], model = run_experiment(layer1_size=layer1_size,
                                             layer1_reg_penalty=layer1_reg_penalty,
                                             layer1_dropout=layer1_dropout,
                                             layer2_size=layer2_size,
                                             layer2_reg_penalty=layer2_reg_penalty,
                                             layer2_dropout=layer2_dropout,
                                             layer3_reg_penalty=layer3_reg_penalty,
                                             layer3_dropout=layer3_dropout
                                            )
    modelname = "%s_%d_%.6f" % (MODELPREFIX, counter, experiments[key])
    print("%s Saving %s.h5" % (time.strftime("%H:%M:%S"), modelname))
    model.save("%s.h5" % modelname)
    model.save_weights("%s_weights.h5" % modelname)    
                            

21:22:11 Running 18 experiments
21:22:11 Running experiment 1 of 18
21:22:11 Generate splits [137, 274, 411, 548, 685]
21:22:11 Build model
layer 0 size 8, reg_penalty 0.00000000, dropout 0.250
layer 1 size 4, reg_penalty 0.00000000, dropout 0.250
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
main_input (InputLayer)         (None, None, 120)    0                                            
__________________________________________________________________________________________________
GRU00 (GRU)                     (None, None, 8)      3096        main_input[0][0]                 
__________________________________________________________________________________________________
GRU01 (GRU)                     (None, 4)            156         GRU00[0][0]                      
___________________________________________________________

layer 1 size 4, reg_penalty 0.00000000, dropout 0.250
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
main_input (InputLayer)         (None, None, 120)    0                                            
__________________________________________________________________________________________________
GRU00 (GRU)                     (None, None, 8)      3096        main_input[0][0]                 
__________________________________________________________________________________________________
GRU01 (GRU)                     (None, 4)            156         GRU00[0][0]                      
__________________________________________________________________________________________________
Dropout02 (Dropout)             (None, 4)            0           GRU01[0][0]                      
_______________________________________________________

Compile time : 1.1610066890716553
Starting to train : 21:25:23
Training indexes 0 to 273
Cross-validating indexes 274 to 410
..................................................
21:25:57 Best Xval loss epoch 48, value 32.831582
Training indexes 0 to 410
Cross-validating indexes 411 to 547
..................................................
21:26:25 Best Xval loss epoch 45, value 41.049425
Training indexes 0 to 547
Cross-validating indexes 548 to 684
..................................................
21:27:00 Best Xval loss epoch 14, value 41.957562
Last Xval loss 41.957562
Avg Xval loss 38.612856
--------------------------------------------------------------------------------
21:27:00 Running experiment 4 of 18
21:27:00 Generate splits [137, 274, 411, 548, 685]
21:27:00 Build model
layer 0 size 8, reg_penalty 0.00000000, dropout 0.250
layer 1 size 8, reg_penalty 0.00000000, dropout 0.250
__________________________________________________________________________________________________
Lay

..................................................
21:27:36 Best Xval loss epoch 48, value 32.784666
Training indexes 0 to 410
Cross-validating indexes 411 to 547
..................................................
21:28:04 Best Xval loss epoch 38, value 41.024682
Training indexes 0 to 547
Cross-validating indexes 548 to 684
..................................................
21:28:39 Best Xval loss epoch 5, value 41.664234
Last Xval loss 41.664234
Avg Xval loss 38.491194
--------------------------------------------------------------------------------
21:28:39 Running experiment 5 of 18
21:28:39 Generate splits [137, 274, 411, 548, 685]
21:28:39 Build model
layer 0 size 8, reg_penalty 0.00000000, dropout 0.250
layer 1 size 16, reg_penalty 0.00000000, dropout 0.250
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
main_input (InputLayer)      

..................................................
21:29:45 Best Xval loss epoch 2, value 40.985676
Training indexes 0 to 547
Cross-validating indexes 548 to 684
..................................................
21:30:21 Best Xval loss epoch 3, value 41.952545
Last Xval loss 41.952545
Avg Xval loss 38.476597
--------------------------------------------------------------------------------
21:30:21 Running experiment 6 of 18
21:30:21 Generate splits [137, 274, 411, 548, 685]
21:30:21 Build model
layer 0 size 8, reg_penalty 0.00000000, dropout 0.250
layer 1 size 16, reg_penalty 0.00000000, dropout 0.250
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
main_input (InputLayer)         (None, None, 120)    0                                            
______________________________________________________________________________________________

..................................................
21:32:04 Best Xval loss epoch 15, value 43.123092
Last Xval loss 43.123092
Avg Xval loss 39.146831
--------------------------------------------------------------------------------
21:32:04 Running experiment 7 of 18
21:32:04 Generate splits [137, 274, 411, 548, 685]
21:32:04 Build model
layer 0 size 16, reg_penalty 0.00000000, dropout 0.250
layer 1 size 4, reg_penalty 0.00000000, dropout 0.250
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
main_input (InputLayer)         (None, None, 120)    0                                            
__________________________________________________________________________________________________
GRU00 (GRU)                     (None, None, 16)     6576        main_input[0][0]                 
_________________________________________________________

layer 1 size 4, reg_penalty 0.00000000, dropout 0.250
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
main_input (InputLayer)         (None, None, 120)    0                                            
__________________________________________________________________________________________________
GRU00 (GRU)                     (None, None, 16)     6576        main_input[0][0]                 
__________________________________________________________________________________________________
GRU01 (GRU)                     (None, 4)            252         GRU00[0][0]                      
__________________________________________________________________________________________________
Dropout02 (Dropout)             (None, 4)            0           GRU01[0][0]                      
_______________________________________________________

Compile time : 1.1762802600860596
Starting to train : 21:35:32
Training indexes 0 to 273
Cross-validating indexes 274 to 410
..................................................
21:36:13 Best Xval loss epoch 45, value 32.841647
Training indexes 0 to 410
Cross-validating indexes 411 to 547
..................................................
21:36:42 Best Xval loss epoch 8, value 41.033472
Training indexes 0 to 547
Cross-validating indexes 548 to 684
..................................................
21:37:19 Best Xval loss epoch 3, value 42.366848
Last Xval loss 42.366848
Avg Xval loss 38.747322
--------------------------------------------------------------------------------
21:37:19 Running experiment 10 of 18
21:37:19 Generate splits [137, 274, 411, 548, 685]
21:37:19 Build model
layer 0 size 16, reg_penalty 0.00000000, dropout 0.250
layer 1 size 8, reg_penalty 0.00000000, dropout 0.250
__________________________________________________________________________________________________
Lay

..................................................
21:38:01 Best Xval loss epoch 49, value 33.063317
Training indexes 0 to 410
Cross-validating indexes 411 to 547
..................................................
21:38:30 Best Xval loss epoch 32, value 40.853438
Training indexes 0 to 547
Cross-validating indexes 548 to 684
..................................................
21:39:05 Best Xval loss epoch 0, value 42.338338
Last Xval loss 42.338338
Avg Xval loss 38.751698
--------------------------------------------------------------------------------
21:39:05 Running experiment 11 of 18
21:39:05 Generate splits [137, 274, 411, 548, 685]
21:39:05 Build model
layer 0 size 16, reg_penalty 0.00000000, dropout 0.250
layer 1 size 16, reg_penalty 0.00000000, dropout 0.250
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
main_input (InputLayer)    

..................................................
21:40:15 Best Xval loss epoch 4, value 41.062251
Training indexes 0 to 547
Cross-validating indexes 548 to 684
..................................................
21:40:50 Best Xval loss epoch 9, value 41.762761
Last Xval loss 41.762761
Avg Xval loss 38.581952
--------------------------------------------------------------------------------
21:40:50 Running experiment 12 of 18
21:40:50 Generate splits [137, 274, 411, 548, 685]
21:40:50 Build model
layer 0 size 16, reg_penalty 0.00000000, dropout 0.250
layer 1 size 16, reg_penalty 0.00000000, dropout 0.250
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
main_input (InputLayer)         (None, None, 120)    0                                            
____________________________________________________________________________________________

..................................................
21:42:38 Best Xval loss epoch 15, value 41.111046
Last Xval loss 41.111046
Avg Xval loss 38.468910
--------------------------------------------------------------------------------
21:42:38 Running experiment 13 of 18
21:42:38 Generate splits [137, 274, 411, 548, 685]
21:42:38 Build model
layer 0 size 32, reg_penalty 0.00000000, dropout 0.250
layer 1 size 4, reg_penalty 0.00000000, dropout 0.250
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
main_input (InputLayer)         (None, None, 120)    0                                            
__________________________________________________________________________________________________
GRU00 (GRU)                     (None, None, 32)     14688       main_input[0][0]                 
________________________________________________________

layer 1 size 4, reg_penalty 0.00000000, dropout 0.250
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
main_input (InputLayer)         (None, None, 120)    0                                            
__________________________________________________________________________________________________
GRU00 (GRU)                     (None, None, 32)     14688       main_input[0][0]                 
__________________________________________________________________________________________________
GRU01 (GRU)                     (None, 4)            444         GRU00[0][0]                      
__________________________________________________________________________________________________
Dropout02 (Dropout)             (None, 4)            0           GRU01[0][0]                      
_______________________________________________________

Compile time : 1.2021899223327637
Starting to train : 21:46:20
Training indexes 0 to 273
Cross-validating indexes 274 to 410
..................................................
21:47:04 Best Xval loss epoch 49, value 32.724735
Training indexes 0 to 410
Cross-validating indexes 411 to 547
..................................................
21:47:35 Best Xval loss epoch 0, value 41.932191
Training indexes 0 to 547
Cross-validating indexes 548 to 684
..................................................
21:48:13 Best Xval loss epoch 10, value 42.865423
Last Xval loss 42.865423
Avg Xval loss 39.174116
--------------------------------------------------------------------------------
21:48:13 Running experiment 16 of 18
21:48:13 Generate splits [137, 274, 411, 548, 685]
21:48:13 Build model
layer 0 size 32, reg_penalty 0.00000000, dropout 0.250
layer 1 size 8, reg_penalty 0.00000000, dropout 0.250
__________________________________________________________________________________________________
La

..................................................
21:49:00 Best Xval loss epoch 42, value 32.512065
Training indexes 0 to 410
Cross-validating indexes 411 to 547
..................................................
21:49:30 Best Xval loss epoch 5, value 41.242924
Training indexes 0 to 547
Cross-validating indexes 548 to 684
..................................................
21:50:09 Best Xval loss epoch 13, value 41.946548
Last Xval loss 41.946548
Avg Xval loss 38.567179
--------------------------------------------------------------------------------
21:50:09 Running experiment 17 of 18
21:50:09 Generate splits [137, 274, 411, 548, 685]
21:50:09 Build model
layer 0 size 32, reg_penalty 0.00000000, dropout 0.250
layer 1 size 16, reg_penalty 0.00000000, dropout 0.250
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
main_input (InputLayer)    

..................................................
21:51:26 Best Xval loss epoch 0, value 41.807466
Training indexes 0 to 547
Cross-validating indexes 548 to 684
..................................................
21:52:03 Best Xval loss epoch 1, value 41.326265
Last Xval loss 41.326265
Avg Xval loss 38.463512
--------------------------------------------------------------------------------
21:52:03 Running experiment 18 of 18
21:52:03 Generate splits [137, 274, 411, 548, 685]
21:52:03 Build model
layer 0 size 32, reg_penalty 0.00000000, dropout 0.250
layer 1 size 16, reg_penalty 0.00000000, dropout 0.250
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
main_input (InputLayer)         (None, None, 120)    0                                            
____________________________________________________________________________________________

..................................................
21:53:59 Best Xval loss epoch 3, value 44.187688
Last Xval loss 44.187688
Avg Xval loss 39.489706
--------------------------------------------------------------------------------


In [18]:
experiments

{(8, 0.0, 0.25, 4, 0.0, 0.25, 0.0001, 0.25): 38.40307148621994,
 (8, 0.0, 0.25, 4, 0.0, 0.25, 0.001, 0.25): 38.72559272263172,
 (8, 0.0, 0.25, 8, 0.0, 0.25, 0.0001, 0.25): 38.49119367961723,
 (8, 0.0, 0.25, 8, 0.0, 0.25, 0.001, 0.25): 38.61285636867233,
 (8, 0.0, 0.25, 16, 0.0, 0.25, 0.0001, 0.25): 39.146831413538415,
 (8, 0.0, 0.25, 16, 0.0, 0.25, 0.001, 0.25): 38.47659704043105,
 (16, 0.0, 0.25, 4, 0.0, 0.25, 0.0001, 0.25): 38.84508341226216,
 (16, 0.0, 0.25, 4, 0.0, 0.25, 0.001, 0.25): 38.52322376703327,
 (16, 0.0, 0.25, 8, 0.0, 0.25, 0.0001, 0.25): 38.75169783360641,
 (16, 0.0, 0.25, 8, 0.0, 0.25, 0.001, 0.25): 38.747322424780755,
 (16, 0.0, 0.25, 16, 0.0, 0.25, 0.0001, 0.25): 38.468909621847,
 (16, 0.0, 0.25, 16, 0.0, 0.25, 0.001, 0.25): 38.58195176391808,
 (32, 0.0, 0.25, 4, 0.0, 0.25, 0.0001, 0.25): 38.846487146766954,
 (32, 0.0, 0.25, 4, 0.0, 0.25, 0.001, 0.25): 38.8439647648184,
 (32, 0.0, 0.25, 8, 0.0, 0.25, 0.0001, 0.25): 38.56717915891477,
 (32, 0.0, 0.25, 8, 0.0, 0.25, 0.0

In [19]:
# make a dataframe with columns as loss keys + loss value
flatlist = [list(l[0]) + [l[1]] for l in experiments.items()]
lossframe = pd.DataFrame(flatlist, columns=["layer1_size",
                                            "layer1_reg_penalty",
                                            "layer1_dropout",
                                            "layer2_size",
                                            "layer2_reg_penalty",
                                            "layer2_dropout",
                                            "layer3_reg_penalty",
                                            "layer3_dropout",
                                            "loss"
                                           ])
lossframe = lossframe.sort_values(['loss'])
lossframe


Unnamed: 0,layer1_size,layer1_reg_penalty,layer1_dropout,layer2_size,layer2_reg_penalty,layer2_dropout,layer3_reg_penalty,layer3_dropout,loss
1,8,0.0,0.25,4,0.0,0.25,0.0001,0.25,38.403071
16,32,0.0,0.25,16,0.0,0.25,0.001,0.25,38.463512
11,16,0.0,0.25,16,0.0,0.25,0.0001,0.25,38.46891
4,8,0.0,0.25,16,0.0,0.25,0.001,0.25,38.476597
3,8,0.0,0.25,8,0.0,0.25,0.0001,0.25,38.491194
6,16,0.0,0.25,4,0.0,0.25,0.001,0.25,38.523224
15,32,0.0,0.25,8,0.0,0.25,0.0001,0.25,38.567179
10,16,0.0,0.25,16,0.0,0.25,0.001,0.25,38.581952
2,8,0.0,0.25,8,0.0,0.25,0.001,0.25,38.612856
0,8,0.0,0.25,4,0.0,0.25,0.001,0.25,38.725593


In [21]:
#sort each column by mean loss
pd.DataFrame(lossframe.groupby(['layer1_size'])['loss'].mean())

Unnamed: 0_level_0,loss
layer1_size,Unnamed: 1_level_1
8,38.64269
16,38.653031
32,38.897494


In [22]:
pd.DataFrame(lossframe.groupby(['layer2_size'])['loss'].mean())

Unnamed: 0_level_0,loss
layer2_size,Unnamed: 1_level_1
4,38.697904
8,38.724061
16,38.771251


In [25]:
pd.DataFrame(lossframe.groupby(['layer3_reg_penalty'])['loss'].mean())

Unnamed: 0_level_0,loss
layer3_reg_penalty,Unnamed: 1_level_1
0.0001,38.778907
0.001,38.683237


In [26]:
pd.DataFrame(lossframe.groupby(['layer3_dropout'])['loss'].mean())

Unnamed: 0_level_0,loss
layer3_dropout,Unnamed: 1_level_1
0.25,38.731072


In [27]:
lossframe.pivot_table(index=['layer1_size'], columns=['layer2_size'], values=['loss'])


Unnamed: 0_level_0,loss,loss,loss
layer2_size,4,8,16
layer1_size,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
8,38.564332,38.552025,38.811714
16,38.684154,38.74951,38.525431
32,38.845226,38.870648,38.976609


In [28]:
def plot_matrix(lossframe, x_labels, y_labels, x_suffix="", y_suffix=""):

    pivot = lossframe.pivot_table(index=[x_labels], columns=[y_labels], values=['loss'])
    # specify labels as strings, to force it to use a discrete axis
    if lossframe[x_labels].dtype == np.float64 or lossframe[x_labels].dtype == np.float32:
        xaxis = ["%f %s" % (i, x_suffix) for i in pivot.columns.levels[1].values]
    else:
        xaxis = ["%d %s" % (i, x_suffix) for i in pivot.columns.levels[1].values]
    if lossframe[y_labels].dtype == np.float64 or lossframe[y_labels].dtype == np.float32:
        yaxis = ["%f %s" % (i, y_suffix) for i in pivot.index.values]
    else:
        yaxis = ["%d %s" % (i, y_suffix) for i in pivot.index.values]
        
    print(xaxis, yaxis)
    """plot a heat map of a matrix"""
    chart_width=640
    chart_height=480
    
    layout = Layout(
        title="%s v. %s" % (x_labels, y_labels),
        height=chart_height,
        width=chart_width,     
        margin=dict(
            l=150,
            r=30,
            b=120,
            t=100,
        ),
        xaxis=dict(
            title=y_labels,
            tickfont=dict(
                family='Arial, sans-serif',
                size=10,
                color='black'
            ),
        ),
        yaxis=dict(
            title=x_labels,
            tickfont=dict(
                family='Arial, sans-serif',
                size=10,
                color='black'
            ),
        ),
    )
    
    data = [Heatmap(z=pivot.values,
                    x=xaxis,
                    y=yaxis,
                    colorscale=[[0, 'rgb(0,0,255)', [1, 'rgb(255,0,0)']]],
                   )
           ]

    fig = Figure(data=data, layout=layout)
    return iplot(fig, link_text="")

plot_matrix(lossframe, "layer1_size", "layer2_size", x_suffix=" units", y_suffix=" units")



['4  units', '8  units', '16  units'] ['8  units', '16  units', '32  units']


In [46]:
EPOCHS=160

def fit_predict(X, Y, model):
    """for backtest, train model using Ys v. X using n-1 rows
    predict Ys on X using nth row
    return a prediction for month n+1 using X for final month"""
    
    # keep last row to predict against
    X_predict = X[-1]
    X_predict = X_predict.reshape(1,X.shape[1],X.shape[2])
    
    # fit on remaining rows
    X_fit = X[:-1]
    Y_fit = Y[:-1]
    
    Ys = []
    for i in range(OUTPUT_DIM):
        Ys.append(Y_fit[:,i])
        
    fit = model.fit(
        X_fit,
        Ys,
        batch_size=BATCH_SIZE,
        epochs=EPOCHS,
        verbose=0)
    
    return [z[0][0] for z in model.predict(X_predict)]    

predictions = fit_predict(X, Y, model)
predictions

[array([[2.0668411]], dtype=float32), array([[2.1671436]], dtype=float32), array([[2.6684165]], dtype=float32), array([[2.998097]], dtype=float32), array([[2.3361874]], dtype=float32), array([[2.152578]], dtype=float32), array([[2.7692204]], dtype=float32), array([[2.1950886]], dtype=float32), array([[2.6096902]], dtype=float32), array([[2.7966287]], dtype=float32), array([[2.7754967]], dtype=float32), array([[2.6230006]], dtype=float32), array([[3.0043914]], dtype=float32), array([[2.9483125]], dtype=float32), array([[2.6848085]], dtype=float32), array([[3.4327831]], dtype=float32), array([[2.2238753]], dtype=float32), array([[2.5416346]], dtype=float32), array([[2.1236176]], dtype=float32), array([[1.853823]], dtype=float32), array([[1.7496694]], dtype=float32), array([[2.994804]], dtype=float32), array([[2.684042]], dtype=float32), array([[2.1574578]], dtype=float32), array([[2.8792164]], dtype=float32), array([[2.6844234]], dtype=float32), array([[2.5600595]], dtype=float32), array

[2.0668411,
 2.1671436,
 2.6684165,
 2.998097,
 2.3361874,
 2.152578,
 2.7692204,
 2.1950886,
 2.6096902,
 2.7966287,
 2.7754967,
 2.6230006,
 3.0043914,
 2.9483125,
 2.6848085,
 3.4327831,
 2.2238753,
 2.5416346,
 2.1236176,
 1.853823,
 1.7496694,
 2.994804,
 2.684042,
 2.1574578,
 2.8792164,
 2.6844234,
 2.5600595,
 3.1260774,
 2.8187757,
 2.4286833]

In [47]:
# fit and predict all months starting STARTMONTH using data up to that month
# compute predictions matrix P
# compute returns matrix R using mean(top 6, (-bot 6))

def run_backtest(X, Y, arg_dict, startmonth=0):
    global P
    global R 
    
    print("%s Starting backtest" % (time.strftime("%H:%M:%S")))
    print(arg_dict)
    P = np.zeros((X.shape[0],OUTPUT_DIM))
    count = 0
    for month_index in range(startmonth, X.shape[0]+1):
        model = build_model([[arg_dict["layer1_size"], arg_dict["layer1_reg_penalty"], arg_dict["layer1_dropout"]],
                             [arg_dict["layer2_size"], arg_dict["layer2_reg_penalty"], arg_dict["layer2_dropout"]],
                             [1, arg_dict["layer3_reg_penalty"], arg_dict["layer3_dropout"]],
                            ])
        predictions = fit_predict(X[:month_index, :], 
                                  Y[:month_index], 
                                  model)
        try:
            P[month_index]= predictions
            sys.stdout.write('.')
            count += 1
            if count % 80 == 0:
                print("")
                print("%s Still training" % (time.strftime("%H:%M:%S")))
            sys.stdout.flush()
        except IndexError:
            # I want to run the fit and see the R-squared on full dataset
            # but we are storing the predictions in row of the month predicted
            # so we have no row to store the last prediction (2017-01)
            print("\nlast prediction not stored")
                
    R = np.zeros(P.shape[0])
    numstocks = 6 # top quintile (and bottom)

    for month_index in range(startmonth, X.shape[0]):
        # get indexes of sorted smallest to largest
        select_array = np.argsort(P[month_index])
        # leftmost 6
        short_indexes = select_array[:numstocks]
        # rightmost 6
        long_indexes = select_array[-numstocks:]
        # compute equal weighted long/short return
        R[month_index] = np.mean(X[month_index, long_indexes])/2 - np.mean(X[month_index, short_indexes])/2

    results = R[startmonth:]

    index = pd.date_range('01/01/1970',periods=results.shape[0], freq='M')
    perfdata = pd.DataFrame(results,index=index,columns=['Returns'])
    perfdata['Equity'] = 100 * np.cumprod(1 + results / 100)

    stats = perfdata['Equity'].calc_stats()

    retframe = pd.DataFrame([stats.stats.loc['start'],
                             stats.stats.loc['end'],
                             stats.stats.loc['cagr'],
                             stats.stats.loc['yearly_vol'],
                             stats.stats.loc['yearly_sharpe'],
                             stats.stats.loc['max_drawdown'],
                             ffn.core.calc_sortino_ratio(perfdata.Returns, rf=0, nperiods=564, annualize=False),
                            ],
                            index = ['start',
                                     'end',
                                     'cagr',
                                     'yearly_vol',
                                     'yearly_sharpe',
                                     'max_drawdown',
                                     'sortino',
                                    ],
                            columns=['Value'])   
    return retframe


In [None]:
STARTMONTH=121
arg_dict = {"layer1_size" : 16,
            "layer1_reg_penalty" : 0.0,
            "layer1_dropout": 0.25,
            "layer2_size": 16,
            "layer2_reg_penalty" : 0.0,
            "layer2_dropout" : 0.25,
            "layer3_reg_penalty" : 0.0,
            "layer3_dropout" : 0.001,
            'verbose' : False
           }
     
#model = build_model(**arg_dict)
run_backtest(X, Y, arg_dict, startmonth=STARTMONTH)

22:38:38 Starting backtest
{'layer1_size': 16, 'layer1_reg_penalty': 0.0, 'layer1_dropout': 0.25, 'layer2_size': 16, 'layer2_reg_penalty': 0.0, 'layer2_dropout': 0.25, 'layer3_reg_penalty': 0.0, 'layer3_dropout': 0.001, 'verbose': False}
layer 0 size 16, reg_penalty 0.00000000, dropout 0.250
layer 1 size 16, reg_penalty 0.00000000, dropout 0.250
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
main_input (InputLayer)         (None, None, 120)    0                                            
__________________________________________________________________________________________________
GRU00 (GRU)                     (None, None, 16)     6576        main_input[0][0]                 
__________________________________________________________________________________________________
GRU01 (GRU)                     (None, 16)           1584 

In [None]:
X.shape

In [None]:
# train model for EPOCHS on X[:-1]
# predict Y[-1] using X[-1], return prediction

BATCH_SIZE=32
EPOCHS=15
OUTPUT_DIM=30

def fit_predict(model, X, Y, epochs=EPOCHS):
    """fit Ys v. X using n-1 rows
    predict Ys on X using nth row
    return predictions for last month"""
    
    # keep last row to predict against
    X_predict = X[-1]
    X_predict = X_predict.reshape(1,X.shape[1],X.shape[2])
    # fit on remaining rows
    X_fit = X[:-1]
    Y_fit = Y[:-1]

    model.compile(loss="mae", metrics=['mae'], optimizer="rmsprop", loss_weights=[1.]*OUTPUT_DIM)
    #model.compile(loss="mse", metrics=['mse'], optimizer="rmsprop")

    Ys = []
    for i in range(OUTPUT_DIM):
        Ys.append(Y_fit[:,i])
        
    fit = model.fit(
        X_fit,
        Ys,
        batch_size=BATCH_SIZE,
        epochs=epochs,
        verbose=0)
    
    Y_predict = model.predict(X_predict)
        
    return Y_predict

In [None]:
# initialize predictions matrix
STARTMONTH=121
P = np.zeros(Y.shape)

model = build_model([[32, 0.0001, 0.25],
                     [32, 0.0001, 0.25],
                     [1, 0.0001, 0.25],
                    ])
print("%s Start backtest training" % (time.strftime("%H:%M:%S")))

count = 0
for month_index in range(STARTMONTH, X.shape[0]+1):
    predictions = fit_predict(model, X[:month_index, :], Y[:month_index])
    predictions = [pred[0,0] for pred in predictions]
    
    try:
        P[month_index]= predictions
        sys.stdout.write('.')
        count += 1
        if count % 80 == 0:
            print("")
            print("%s Still training" % (time.strftime("%H:%M:%S")))
        sys.stdout.flush()
    except IndexError:
        # I want to run the fit and see the expected R-squared
        # but we are storing the predictions in row of the month predicted
        # so we have no row to store the last prediction (2017-01)
        print("\nlast prediction not stored")

print("%s End backtest training" % (time.strftime("%H:%M:%S")))


In [None]:
R = np.zeros(P.shape[0])
numstocks = 6 # top quintile (and bottom)

for month_index in range(STARTMONTH, X.shape[0]):
        # get indexes of sorted smallest to largest
        select_array = np.argsort(P[month_index])
        # leftmost 6
        short_indexes = select_array[:numstocks]
        # rightmost 6
        long_indexes = select_array[-numstocks:]
        # compute equal weighted long/short return
        R[month_index] = np.mean(X[month_index, long_indexes])/2 - np.mean(X[month_index, short_indexes])/2

results = R[STARTMONTH:]
index = pd.date_range('01/01/1970',periods=564, freq='M')
perfdata = pd.DataFrame(results,index=index,columns=['Returns'])
perfdata['Equity'] = 100 * np.cumprod(1 + results / 100)

stats = perfdata['Equity'].calc_stats()

pd.DataFrame([stats.stats.loc['start'],
              stats.stats.loc['end'],
              stats.stats.loc['cagr'],
              stats.stats.loc['yearly_vol'],
              stats.stats.loc['yearly_sharpe'],
              stats.stats.loc['max_drawdown'],
              ffn.core.calc_sortino_ratio(perfdata.Returns, rf=0, nperiods=564, annualize=False),
             ],
            index = ['start',
                     'end',
                     'cagr',
                     'yearly_vol',
                     'yearly_sharpe',
                     'max_drawdown',
                     'sortino',
                    ],
            columns=['Value'])


In [None]:
perf = 100 * np.cumprod(1 + results / 100)

x_coords = np.linspace(1970, 2016, perf.shape[0])

trace1 = Scatter(
    x = x_coords,
    y = perf,
    name = 'Growth of $1',    
)

layout = Layout(
    yaxis=dict(
        type='log',
        autorange=True
    )
)
data = [trace1]

fig = Figure(data=data, layout=layout)

iplot(fig)

In [None]:
curr_frame = X[0]
predictions = [z[0] for z in curr_frame]

for i in range(X.shape[0]):
    predictions.append(model.predict(curr_frame[newaxis,:,:])[0,0])
    curr_frame = np.array(predictions[-window_size:]).reshape([window_size,1])    

# truncate the ones which aren't predictions
predictions = np.array(predictions[window_size:])


In [None]:
x_coords = np.linspace(0, 1, y.shape[0])

trace1 = Scatter(
    x = x_coords,
    y = y,
    name = 'Training data',    
)
trace2 = Scatter(    
    x = x_coords,
    y = predictions,
    name = 'Predictions',

)

data = [trace1, trace2]

iplot(data, filename='basic-line')