<a href="https://colab.research.google.com/github/ignacioaranguren1/bd_2/blob/main/bd_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [126]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle
import os
import keras_tuner
import datetime as dt

from tqdm import tqdm
from datetime import datetime
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from keras_tuner.tuners import RandomSearch

In [50]:
%matplotlib inline

datapath = '/Users/ignacioaranguren/bd_2/data'
os.chdir(datapath)

1.In the data used by Gu, Kelly and Xiu (RFS 2019 – provided in class), use a similar procedure to theirs to predict stock returns with neural networks. Start by finding a suitable baseline configuration, and use a validation procedure to pick optimal hyperparameters for three neural network models: One with 2 hidden layers, one with 3 hidden layers, and one with 4 hidden layers. 

In [151]:
data = pd.read_pickle('returns_chars_panel.pkl')
macro = pd.read_pickle('macro_timeseries.pkl')

In [168]:
def train_validation_test_split(data,train_end_date,validation_end_date):
  tmp = data.reset_index()
  train = tmp[tmp.date<=train_end_date].set_index(['date','permno'],drop=True)
  validation = tmp[(tmp.date>train_end_date) & (tmp.date<=validation_end_date)].set_index(['date','permno'],drop=True)
  test = tmp[tmp.date>validation_end_date].set_index(['date','permno'],drop=True)
  return train,validation,test

In [169]:
data_merged = pd.merge(data,macro,on=['date'])
datelist = list(set(data_merged['date']))
datelist.sort()
data_merged.set_index(['date','permno'],drop=True,inplace=True)

In [170]:
train_ratio = 0.5
validation_ratio = 0.25
train_date = datelist[int(len(datelist)*train_ratio)]
validation_date = datelist[int(len(datelist)*(train_ratio+validation_ratio))]
X = data_merged.iloc[:,3:].copy()
y = data_merged['excess_ret'].copy()

In [172]:
X_train,X_validation,X_test = train_validation_test_split(X,train_date,validation_date)
y_train,y_validation,y_test = train_validation_test_split(y,train_date,validation_date)

In [56]:
def keras_model(n_layers, units, dropout_rate, learning_rate):
    # Build model
    model = Sequential()
    model.add(layers.Input(shape=(105,)))
    for i in range(n_layers):
        model.add(layers.Dense(units=units[0][i], activation='relu'))
    model.add(layers.Dropout(rate=dropout_rate[0]))
    model.add(layers.Dense(1, activation='linear'))
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
        loss='mse')
    return model

class HyperRegressor(keras_tuner.HyperModel):
    def __init__(self, n_layers, *args, **kwargs):
        self.n_layers = n_layers
        super().__init__(*args, **kwargs)

    def build(self, hp):
        units=[hp.Int(f'units_{i + 1}',min_value=16,max_value=160,step=16) for i in range(self.n_layers)],
        dropout_rate=hp.Choice('dropout_rate', values=[0.2,0.4,0.6,0.8]),
        learning_rate = hp.Float("learning_rate", min_value=1e-4, max_value=1e-2, sampling="log")
        return keras_model(self.n_layers,
                           units, 
                           dropout_rate, 
                           learning_rate
                          )

    def fit(self, hp, model, x, y, validation_data, **kwargs):
        model.fit(x, y, **kwargs)
        x_val, y_val = validation_data
        y_pred = model.predict(x_val)
        # Return a single float to minimize.
        return np.mean((y_pred - y_val)**2)

In [57]:
MAX_TRIALS = 10
EXECUTION_PER_TRIAL = 3
EPOCHS = 10
BATCH_SIZE = 256

def tune_model(n_layers=2):
  tuner = RandomSearch(
        hypermodel=HyperRegressor(n_layers),
        max_trials=MAX_TRIALS,
        executions_per_trial=EXECUTION_PER_TRIAL,
        overwrite=True,
        directory='bd_2',
        project_name=f'NN_new_{n_layers}'
  )
  tuner.search(
      X_train.values, 
      y_train.values,
      validation_data=(X_validation.values, y_validation.values),
      batch_size=BATCH_SIZE,
      epochs=EPOCHS
  )
  return tuner

In [58]:
models=[]
parameters=[]
tuners=[]
for n in range(2,5):
    tuner = tune_model(n)
    parameters.append(tuner.get_best_hyperparameters)
    models.append(tuner.get_best_models(1)[0])
    tuners.append(tuner)

Trial 10 Complete [00h 05m 01s]
default_objective: 0.040514956096232325

Best default_objective So Far: 0.04012584269145073
Total elapsed time: 00h 49m 58s
INFO:tensorflow:Oracle triggered exit


In [59]:
models

[<keras.engine.sequential.Sequential at 0x140734ee0>,
 <keras.engine.sequential.Sequential at 0x140eb4ee0>,
 <keras.engine.sequential.Sequential at 0x1408ed8e0>]

In [60]:
results = {}
for i in range(3):
    train_result = models[i].evaluate(X_train.values,y_train.values,batch_size=256)
    test_result = models[i].evaluate(X_test.values,y_test.values,batch_size=256)
    results[f'model_{i}'] = {'train': train_result,'test': test_result}



In [74]:
results

{'model_0': {'train': 0.022159937769174576, 'test': 0.025516396388411522},
 'model_1': {'train': 0.021902702748775482, 'test': 0.025462862104177475},
 'model_2': {'train': 0.02213214337825775, 'test': 0.025467146188020706}}

In [73]:
for i in range(3):
  print(tuners[i].get_best_hyperparameters()[0].values)

{'units_1': 32, 'units_2': 32, 'dropout_rate': 0.6, 'learning_rate': 0.00012474684203056348}
{'units_1': 96, 'units_2': 128, 'units_3': 64, 'dropout_rate': 0.8, 'learning_rate': 0.0002967584080441718}
{'units_1': 32, 'units_2': 32, 'units_3': 32, 'units_4': 16, 'dropout_rate': 0.8, 'learning_rate': 0.0002796941031491936}


In [69]:
with open('parameters.pkl','wb') as f:
  pickle.dump(parameters,f)
with open('models.pkl','wb') as f:
  pickle.dump(models,f)
with open('tuners.pkl','wb') as f:
  pickle.dump(tuners,f)

INFO:tensorflow:Assets written to: ram://71713393-ea6b-4006-a0e3-c627dadb352b/assets
INFO:tensorflow:Assets written to: ram://a47e28e8-de34-4e4e-8c53-89cf37802bd9/assets
INFO:tensorflow:Assets written to: ram://f1cb8e24-4df9-4bf3-8ea3-b89ce9606319/assets


2.Use test data to get an idea of the out of sample performance of each model. Convert the standard MSE metric for out of sample performance to the “R2 out of sample” metric that was discussed in class. Compare your results to those in Gu-Kelly-Xiu and comment on the differences. 

In [75]:
with open('models.pkl','rb') as f:
  models = pickle.load(f)
with open('parameters.pkl','rb') as f:
  parameters = pickle.load(f)
with open('tuners.pkl','rb') as f:
  tuners = pickle.load(f)

In [175]:
def r_squared(y_pred, y_test):
    return 1 - np.sum((y_test - y_pred)**2) / np.sum(y_test**2)

rankings = X_test['mvel1'].groupby(['date']).rank()
top_X_test = X_test.loc[rankings<=1000,:].values
top_y_test = y_test.loc[rankings<=1000,:].values
rankings_reverse = X_test['mvel1'].groupby(['date']).rank(ascending=False)
bottom_X_test = X_test.loc[rankings_reverse<=1000,:].values
bottom_y_test = y_test.loc[rankings_reverse<=1000,:].values

R2_oos_df = pd.DataFrame(columns = ['R2_OOS','R2_OOS_top1000','R2_OOS_low1000'],index = ['NN2','NN3','NN4'])
for i in range(3):
    y_pred_all = models[i].predict(X_test,batch_size=256).reshape(-1,1)
    y_pred_top = models[i].predict(top_X_test,batch_size=256).reshape(-1,1)
    y_pred_bottom = models[i].predict(bottom_X_test,batch_size=256).reshape(-1,1)
    
    row = [r_squared(y_pred_all, y_test)[0],
           r_squared(y_pred_top, top_y_test),
           r_squared(y_pred_bottom, bottom_y_test)]
    
    R2_oos_df.iloc[i]= row



In [176]:
R2_oos_df * 100

Unnamed: 0,R2_OOS,R2_OOS_top1000,R2_OOS_low1000
NN2,0.741033,0.822783,0.883065
NN3,0.949141,1.173593,0.534166
NN4,0.932509,1.037264,0.865441


In [183]:
# X_test.groupby(pd.Grouper(freq='M'))['mvel1'].rank()
best_r_squared = max(R2_oos_df.iloc[:,0])
best_r_squared

0.009491411722372889

3.Pick the model that performs the best out of sample, and interpret its output by doing the following analysis of variable importance:
a.	First, for all stock characteristics, get variable importance by setting one predictor at a time to zero and finding the decrease in out of sample R2. Show a table of the 10 most important variables according to this measure, and give an economic interpretation. 


In [186]:
feature_importance = {}
for column_name in tqdm(X_train.columns):
    X_tmp = X_test.copy()
    X_tmp[column_name] = 0
    y_pred = models[i].predict(X_tmp,batch_size=256).reshape(-1,1)
    feature_importance[column_name] = best_r_squared - r_squared(y_pred_all, y_test)[0]
    

  0%|                                                   | 0/105 [00:00<?, ?it/s]



  1%|▍                                          | 1/105 [00:07<12:48,  7.39s/it]



  2%|▊                                          | 2/105 [00:13<11:45,  6.85s/it]



  3%|█▏                                         | 3/105 [00:20<11:33,  6.80s/it]



  4%|█▋                                         | 4/105 [00:27<11:25,  6.79s/it]



  5%|██                                         | 5/105 [00:34<11:17,  6.77s/it]



  6%|██▍                                        | 6/105 [00:41<11:46,  7.14s/it]



  7%|██▊                                        | 7/105 [00:47<11:02,  6.76s/it]



  8%|███▎                                       | 8/105 [00:53<10:19,  6.39s/it]



  9%|███▋                                       | 9/105 [00:59<09:50,  6.15s/it]



 10%|████                                      | 10/105 [01:04<09:30,  6.01s/it]



 10%|████▍                                     | 11/105 [01:10<09:28,  6.04s/it]



 11%|████▊                                     | 12/105 [01:17<09:22,  6.05s/it]



 12%|█████▏                                    | 13/105 [01:23<09:36,  6.27s/it]



 13%|█████▌                                    | 14/105 [01:31<10:09,  6.70s/it]



 14%|██████                                    | 15/105 [01:38<09:57,  6.64s/it]



 15%|██████▍                                   | 16/105 [01:45<10:01,  6.76s/it]



 16%|██████▊                                   | 17/105 [01:52<10:05,  6.88s/it]



 17%|███████▏                                  | 18/105 [01:59<10:08,  6.99s/it]



 18%|███████▌                                  | 19/105 [02:06<09:51,  6.88s/it]



 19%|████████                                  | 20/105 [02:12<09:37,  6.79s/it]



 20%|████████▍                                 | 21/105 [02:19<09:23,  6.71s/it]



 21%|████████▊                                 | 22/105 [02:25<09:14,  6.68s/it]



 22%|█████████▏                                | 23/105 [02:33<09:26,  6.91s/it]



 23%|█████████▌                                | 24/105 [02:39<09:15,  6.86s/it]



 24%|██████████                                | 25/105 [02:46<09:10,  6.88s/it]



 25%|██████████▍                               | 26/105 [02:54<09:23,  7.13s/it]



 26%|██████████▊                               | 27/105 [03:00<08:40,  6.68s/it]



 27%|███████████▏                              | 28/105 [03:06<08:13,  6.40s/it]



 28%|███████████▌                              | 29/105 [03:11<07:47,  6.15s/it]



 29%|████████████                              | 30/105 [03:17<07:28,  5.99s/it]



 30%|████████████▍                             | 31/105 [03:22<07:18,  5.93s/it]



 30%|████████████▊                             | 32/105 [03:29<07:19,  6.02s/it]



 31%|█████████████▏                            | 33/105 [03:35<07:15,  6.05s/it]



 32%|█████████████▌                            | 34/105 [03:42<07:42,  6.51s/it]



 33%|██████████████                            | 35/105 [03:49<07:28,  6.40s/it]



 34%|██████████████▍                           | 36/105 [03:55<07:20,  6.38s/it]



 35%|██████████████▊                           | 37/105 [04:01<07:14,  6.40s/it]



 36%|███████████████▏                          | 38/105 [04:08<07:07,  6.38s/it]



 37%|███████████████▌                          | 39/105 [04:14<07:09,  6.50s/it]



 38%|████████████████                          | 40/105 [04:21<06:59,  6.45s/it]



 39%|████████████████▍                         | 41/105 [04:27<06:52,  6.45s/it]



 40%|████████████████▊                         | 42/105 [04:34<06:58,  6.65s/it]



 41%|█████████████████▏                        | 43/105 [04:42<07:05,  6.86s/it]



 42%|█████████████████▌                        | 44/105 [04:47<06:36,  6.50s/it]



 43%|██████████████████                        | 45/105 [04:53<06:13,  6.23s/it]



 44%|██████████████████▍                       | 46/105 [04:59<05:58,  6.08s/it]



 45%|██████████████████▊                       | 47/105 [05:04<05:43,  5.93s/it]



 46%|███████████████████▏                      | 48/105 [05:10<05:31,  5.82s/it]



 47%|███████████████████▌                      | 49/105 [05:16<05:25,  5.81s/it]



 48%|████████████████████                      | 50/105 [05:22<05:24,  5.91s/it]



 49%|████████████████████▍                     | 51/105 [05:28<05:31,  6.13s/it]



 50%|████████████████████▊                     | 52/105 [05:35<05:29,  6.22s/it]



 50%|█████████████████████▏                    | 53/105 [05:41<05:22,  6.21s/it]



 51%|█████████████████████▌                    | 54/105 [05:48<05:32,  6.51s/it]



 52%|██████████████████████                    | 55/105 [05:55<05:27,  6.54s/it]



 53%|██████████████████████▍                   | 56/105 [06:01<05:19,  6.51s/it]



 54%|██████████████████████▊                   | 57/105 [06:08<05:13,  6.53s/it]



 55%|███████████████████████▏                  | 58/105 [06:15<05:12,  6.66s/it]



 56%|███████████████████████▌                  | 59/105 [06:21<05:01,  6.56s/it]



 57%|████████████████████████                  | 60/105 [06:30<05:24,  7.21s/it]



 58%|████████████████████████▍                 | 61/105 [06:42<06:22,  8.70s/it]



 59%|████████████████████████▊                 | 62/105 [06:50<06:05,  8.50s/it]



 60%|█████████████████████████▏                | 63/105 [07:00<06:14,  8.92s/it]



 61%|█████████████████████████▌                | 64/105 [07:06<05:30,  8.06s/it]



 62%|██████████████████████████                | 65/105 [07:12<04:53,  7.34s/it]



 63%|██████████████████████████▍               | 66/105 [07:17<04:26,  6.83s/it]



 64%|██████████████████████████▊               | 67/105 [07:23<04:05,  6.46s/it]



 65%|███████████████████████████▏              | 68/105 [07:29<03:49,  6.20s/it]



 66%|███████████████████████████▌              | 69/105 [07:35<03:44,  6.24s/it]



 67%|████████████████████████████              | 70/105 [07:41<03:34,  6.12s/it]



 68%|████████████████████████████▍             | 71/105 [07:48<03:43,  6.57s/it]



 69%|████████████████████████████▊             | 72/105 [07:55<03:37,  6.58s/it]



 70%|█████████████████████████████▏            | 73/105 [08:02<03:32,  6.65s/it]



 70%|█████████████████████████████▌            | 74/105 [08:08<03:23,  6.58s/it]



 71%|██████████████████████████████            | 75/105 [08:15<03:18,  6.61s/it]



 72%|██████████████████████████████▍           | 76/105 [08:21<03:11,  6.59s/it]



 73%|██████████████████████████████▊           | 77/105 [08:30<03:22,  7.22s/it]



 74%|███████████████████████████████▏          | 78/105 [08:37<03:10,  7.06s/it]



 75%|███████████████████████████████▌          | 79/105 [08:43<02:57,  6.83s/it]



 76%|████████████████████████████████          | 80/105 [08:51<02:56,  7.05s/it]



 77%|████████████████████████████████▍         | 81/105 [08:56<02:40,  6.69s/it]



 78%|████████████████████████████████▊         | 82/105 [09:02<02:26,  6.37s/it]



 79%|█████████████████████████████████▏        | 83/105 [09:08<02:15,  6.16s/it]



 80%|█████████████████████████████████▌        | 84/105 [09:13<02:06,  6.01s/it]



 81%|██████████████████████████████████        | 85/105 [09:19<01:57,  5.87s/it]



 82%|██████████████████████████████████▍       | 86/105 [09:25<01:49,  5.79s/it]



 83%|██████████████████████████████████▊       | 87/105 [09:31<01:45,  5.87s/it]



 84%|███████████████████████████████████▏      | 88/105 [09:38<01:49,  6.41s/it]



 85%|███████████████████████████████████▌      | 89/105 [09:45<01:42,  6.41s/it]



 86%|████████████████████████████████████      | 90/105 [09:51<01:35,  6.35s/it]



 87%|████████████████████████████████████▍     | 91/105 [09:58<01:30,  6.44s/it]



 88%|████████████████████████████████████▊     | 92/105 [10:04<01:24,  6.53s/it]



 89%|█████████████████████████████████████▏    | 93/105 [10:11<01:17,  6.48s/it]



 90%|█████████████████████████████████████▌    | 94/105 [10:17<01:12,  6.57s/it]



 90%|██████████████████████████████████████    | 95/105 [10:25<01:09,  6.97s/it]



 91%|██████████████████████████████████████▍   | 96/105 [10:32<01:02,  6.93s/it]



 92%|██████████████████████████████████████▊   | 97/105 [10:39<00:55,  6.92s/it]



 93%|███████████████████████████████████████▏  | 98/105 [10:46<00:47,  6.76s/it]



 94%|███████████████████████████████████████▌  | 99/105 [10:52<00:40,  6.67s/it]



 95%|███████████████████████████████████████  | 100/105 [11:00<00:35,  7.16s/it]



 96%|███████████████████████████████████████▍ | 101/105 [11:06<00:27,  6.77s/it]



 97%|███████████████████████████████████████▊ | 102/105 [11:12<00:19,  6.43s/it]



 98%|████████████████████████████████████████▏| 103/105 [11:17<00:12,  6.18s/it]



 99%|████████████████████████████████████████▌| 104/105 [11:23<00:06,  6.01s/it]



100%|█████████████████████████████████████████| 105/105 [11:29<00:00,  6.56s/it]


b.	Second, get a measure of the joint importance of all our “macro predictors” (i.e., those taken from Welch and Goyal 2008), by setting them all to zero and finding the decrease in out of sample R2. Comment on how important macroeconomic variables are relative to stock characteristics in predicting returns. 

c.	Repeat the two steps above, but by using a measure of the sensitivity of predictions to each input variable, as outlined in the lectures.

4.Fit a penalised linear model (LASSO) to the same data, using validation data to pick the best penalty (e.g., you can use the “sklearn” package in Python to do this easily). Compare its test data performance to the neural network. 

5.Suppose somebody tells you to collect 10 more micro or macro variables that can predict returns and are not in our current dataset. How would you choose those variables, based on the intuitions you have gained in this project?