In [17]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
import optuna


import tensorflow as tf
from tensorflow.keras.regularizers import L1
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,BatchNormalization,Activation
from tensorflow.keras import regularizers



panel = pd.read_pickle('../Data/returns_chars_panel.pkl') 
macro = pd.read_pickle('../Data/macro_timeseries.pkl')

In [2]:
# combine micro and macro data
df = pd.merge(panel,macro,on='date',how='left',suffixes=['','_macro']) 

# features + targets 
X = df.drop(columns=['ret','excess_ret','rfree','permno','date']) # everything except return info and IDs
y = df['excess_ret'] 

# Split Train Test 

In [3]:
# make 20 years of training data
date = df['date']
training = (date <= '1977-03') # selects 
X_train, y_train = X.loc[training].values, y.loc[training].values 

# make 10 years of validation data
validation = (date > '1977-03') & (date <= '1987-03') 
X_val, y_val = X.loc[validation].values, y.loc[validation].values 

# make test data
test = (date > '1987-03') 
X_test, y_test = X.loc[test].values, y.loc[test].values 

# Function to create NN model 

In [29]:
# create the final model 
def create_nn(n_layers, input_dim, lamda, learning_rate):
    
    # max nodes in first layer 
    num_layers = 32 
    optimizer = Adam(learning_rate=learning_rate)

    # init model 
    model = Sequential()
    model.add(Dense(32, input_dim=input_dim, 
                kernel_regularizer=regularizers.L1(lamda), 
                kernel_initializer = 'he_normal'))
    model.add(BatchNormalization())

    # add extra hidden layers 
    for i in range(n_layers - 1): 
        num_layers = int(num_layers / 2)
        model.add(Dense(num_layers,
                kernel_regularizer=regularizers.L1(lamda), 
                kernel_initializer = 'he_normal'))
        model.add(BatchNormalization())
        
    # output layer 
    model.add(Dense(1, activation='linear',
                    kernel_regularizer=regularizers.L1(lamda), 
                    kernel_initializer = 'he_normal'))

    model.compile(loss='mse', 
                optimizer=optimizer,
                metrics = ['mse']) 
    return model 


## Cross Validation for Lamda for L2 Regularization 

In [96]:
# Hyperparameters 
epochs = 100
patience = 5
batch_size = 10000

# Using Optuna to cross validate hyper parameter 
input_dim = X_train.shape[1]
n_layers = 4
def create_model(trial):

    num_layers = 32 
    # Suggest hyperparameters
    learning_rate = trial.suggest_categorical('learning_rate', [0.001, 0.01])
    l1_reg = trial.suggest_float('l1_reg', 1e-5, 1e-3, log=True)

    optimizer = Adam(learning_rate=learning_rate)
    model = Sequential()
    model.add(Dense(32, input_dim=input_dim, 
                kernel_regularizer=regularizers.L1(l1_reg), 
                kernel_initializer = 'he_normal'))
    model.add(BatchNormalization())

    # add extra hidden layers 
    for i in range(n_layers - 1): 
        num_layers = int(num_layers / 2)
        model.add(Dense(num_layers,
                kernel_regularizer=regularizers.L1(l1_reg), 
                kernel_initializer = 'he_normal'))
        model.add(BatchNormalization())
        
    # output layer 
    model.add(Dense(1, activation='linear',
                    kernel_regularizer=regularizers.L1(0.01), 
                    kernel_initializer = 'he_normal'))

    model.compile(loss='mse', 
                optimizer=optimizer,
                metrics = ['mse']) 
    return model

# Objective function for Optuna
def objective(trial):
    model = create_model(trial)
    
    # Use early stopping
    early_stopping = EarlyStopping(patience=patience, restore_best_weights=True)

    history = model.fit(
        X_train, y_train,
        epochs,
        batch_size,
        validation_split=0.2,
        callbacks=[early_stopping],
        verbose=0
    )

    # Evaluate the model
    score = model.evaluate(X_val, y_val, verbose=0)
    return score[0]

# Create a study and optimize the objective function
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=10)

# Get the best trial
best_trial = study.best_trial

print("Best trial:")
print(f"  Value: {best_trial.value}")
print("  Params: ")
for key, value in best_trial.params.items():
    print(f"    {key}: {value}")



[I 2024-05-24 10:18:42,183] A new study created in memory with name: no-name-4ea5b825-f68d-477d-873a-ea743ec109b7


16
8
4


[I 2024-05-24 10:19:06,213] Trial 0 finished with value: 0.026645848527550697 and parameters: {'learning_rate': 0.001, 'l1_reg': 0.0005436366706974076}. Best is trial 0 with value: 0.026645848527550697.


16
8
4


[I 2024-05-24 10:19:52,434] Trial 1 finished with value: 0.02643212489783764 and parameters: {'learning_rate': 0.001, 'l1_reg': 3.274073920839725e-05}. Best is trial 1 with value: 0.02643212489783764.


16
8
4


[I 2024-05-24 10:20:18,923] Trial 2 finished with value: 0.026593651622533798 and parameters: {'learning_rate': 0.001, 'l1_reg': 0.0001586046427887883}. Best is trial 1 with value: 0.02643212489783764.


16
8
4


[I 2024-05-24 10:20:42,599] Trial 3 finished with value: 0.028688626363873482 and parameters: {'learning_rate': 0.01, 'l1_reg': 0.0005548669578422475}. Best is trial 1 with value: 0.02643212489783764.


16
8
4


[I 2024-05-24 10:21:07,792] Trial 4 finished with value: 0.027741363272070885 and parameters: {'learning_rate': 0.01, 'l1_reg': 0.0003561459577263908}. Best is trial 1 with value: 0.02643212489783764.


16
8
4


[I 2024-05-24 10:21:42,681] Trial 5 finished with value: 0.02658342570066452 and parameters: {'learning_rate': 0.001, 'l1_reg': 0.0003823760995679974}. Best is trial 1 with value: 0.02643212489783764.


16
8
4


[I 2024-05-24 10:22:31,030] Trial 6 finished with value: 0.026645097881555557 and parameters: {'learning_rate': 0.001, 'l1_reg': 7.585546665114775e-05}. Best is trial 1 with value: 0.02643212489783764.


16
8
4


[I 2024-05-24 10:23:08,125] Trial 7 finished with value: 0.026483960449695587 and parameters: {'learning_rate': 0.001, 'l1_reg': 3.6002518380953646e-05}. Best is trial 1 with value: 0.02643212489783764.


16
8
4


[I 2024-05-24 10:23:50,436] Trial 8 finished with value: 0.02637159638106823 and parameters: {'learning_rate': 0.001, 'l1_reg': 1.6762375688023723e-05}. Best is trial 8 with value: 0.02637159638106823.


16
8
4


[I 2024-05-24 10:24:19,073] Trial 9 finished with value: 0.02843630127608776 and parameters: {'learning_rate': 0.01, 'l1_reg': 0.000497867338072132}. Best is trial 8 with value: 0.02637159638106823.


Best trial:
  Value: 0.02637159638106823
  Params: 
    learning_rate: 0.001
    l1_reg: 1.6762375688023723e-05


# Best Hyperparameters 
#### NN2 
- learning_rate = 0.001 
- l1_reg = 1.76e-05

#### NN3 
- learning_rate = 0.001 
- lamda = 2.91e-05

#### N4 
- learning_rate = 0.001 
- l1_reg = 1.67e-05

# Question 2

## Expanding Window R^2_OOS 

#### [ Code to test the R^2 OOS calculation only ]

In [31]:
lamda = 1e-05
epochs = 100
learning_rate = 0.001
patience = 5
batch_size = 10000
model = create_nn(3, X_test.shape[1], lamda , learning_rate)

history = model.fit(X_train, y_train, 
                        epochs=100, 
                        batch_size=batch_size, 
                        verbose=False,
                        validation_data = (X_val, y_val),
                        callbacks = [EarlyStopping(patience = patience, restore_best_weights=True)])
predictions = model.predict(X_test)
df_predictions = pd.DataFrame(predictions, columns=['Prediction'])
df_predictions['Actual'] = y_test
df_predictions['dif_squared'] = (df_predictions['Prediction'] - df_predictions['Actual'])**2
df_predictions['actual_sqaured'] = df_predictions['Actual']**2
R_OOS = 1 - (df_predictions['dif_squared'].sum()/df_predictions['actual_sqaured'].sum()) 
print(R_OOS)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m77295/77295[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 237us/step
-0.07850356207538467


# Code to calculate R^2 OOS 

In [32]:
import time

end_year = date.max()

# Hyperparameters 
lamda = 1e-05
epochs = 100
learning_rate = 0.001
patience = 5
batch_size = 10000
ensemble = 10 
hidden_layers = 3

# Size of rolling window in years 
val_length = 12 
test_length = 1

total_R_2_OOS = [] 
model = create_nn(hidden_layers, X.shape[1], lamda , learning_rate)

for i in range(1988, end_year.year - val_length - test_length):
    print(i)
    predictions = [] 
    training_window = (date < datetime(i,1,1)) 
    validation_window = (date >= datetime(i,1,1)) & (date < datetime(i+val_length,1,1)) 
    test_window = (date >= datetime(i+val_length,1,1)) & (date < datetime(i+val_length+test_length,1,1))

    X_train_expanding, y_train_expanding = X.loc[training_window].values, y.loc[training_window].values
    X_val_expanding , y_val_expanding = X.loc[validation_window].values, y.loc[validation_window].values
    X_test_expanding, y_test_expanding =  X.loc[test_window].values, y.loc[test_window].values

    # take the final output as the average of ensemble number of NN 
    for j in range(ensemble): 
        # set random seed each time the model is trained 
        seed = int(time.time())
        tf.random.set_seed(seed)
        history = model.fit(X_train_expanding, y_train_expanding, 
                            epochs=epochs, 
                            batch_size=batch_size, 
                            verbose=False,
                            validation_data = (X_val_expanding, y_val_expanding),
                            callbacks = [EarlyStopping(patience = patience, restore_best_weights=True)])
        current_prediction = model.predict(X_test_expanding)
        # Average the predictions 10 times from 10 different nueral network models 
        if len(predictions) == 0:
            predictions = current_prediction
        else: 
            predictions = (predictions + current_prediction) /2 

    df_predictions = pd.DataFrame(predictions, columns=['Prediction'])
    df_predictions['Actual'] = y_test_expanding
    df_predictions['dif_squared'] = (df_predictions['Prediction'] - df_predictions['Actual'])**2
    df_predictions['actual_sqaured'] = df_predictions['Actual']**2
    R_OOS = 1 - (df_predictions['dif_squared'].sum()/df_predictions['actual_sqaured'].sum()) 
    # print("***** R^2_OOS ", i, R_OOS)
    total_R_2_OOS.append(R_OOS)


# calculate the mean OOS for all time periods 
print("Final R^2 OOS :  ", np.mean(total_R_2_OOS)) 
    
    


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m3088/3088[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 231us/step
[1m3088/3088[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 237us/step
[1m3088/3088[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 227us/step
[1m3088/3088[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 226us/step
[1m3088/3088[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 227us/step
[1m3088/3088[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 229us/step
[1m3088/3088[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 226us/step
[1m3088/3088[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 229us/step
[1m3088/3088[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 235us/step
[1m3088/3088[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 229us/step
[1m2884/2884[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 236us/step
[1m2884/2884[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 244us/step
[1m2884/2884[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m