Autoencoder. Just for illustration purpose. Copy necessary codes to DL_project_modeling_v3 to run.

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import LSTM, Dense, Concatenate, Input, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint,ReduceLROnPlateau
import gc

In [None]:
'''Autoencoder. Features of the whole time period are fed here (do not need rolling as is done in LSTM+MLP training)'''
ENCODING_DIM=30 # encoding dim less than 20 may encounder greater difficulty coverging in training

inputs=Input(shape=(58, ))

encoded_layer1=Dense(64, 
                     activation=tf.keras.layers.LeakyReLU(alpha=0.2), 
                     kernel_initializer = tf.keras.initializers.RandomNormal(mean=0., stddev=0.05))
encoded_layer2=Dense(ENCODING_DIM, 
                     activation=tf.keras.layers.LeakyReLU(alpha=0.2), 
                     # Leaky RuLU causes smoother convergence, though ReLU generates better sparsity property
                     kernel_initializer = tf.keras.initializers.RandomNormal(mean=0., stddev=0.05),
                     # Surprisingly parameters in initializer makes a significant difference in training performance
                     activity_regularizer=tf.keras.regularizers.l1(10e-4))  # add sparsity

decoded_layer1=Dense(64, 
                     activation=tf.keras.layers.LeakyReLU(alpha=0.2),
                     kernel_initializer = tf.keras.initializers.RandomNormal(mean=0., stddev=0.05))

decoded_layer2=Dense(58, 
                     activation='linear', # could add non-linear activation function, but here linear turns out best
                     kernel_initializer = tf.keras.initializers.RandomNormal(mean=0., stddev=0.05))


encoded=encoded_layer2(encoded_layer1(inputs))
outputs=decoded_layer2(decoded_layer1(encoded))

autoencoder=tf.keras.Model(inputs=inputs, outputs=outputs)

encoder=tf.keras.Model(inputs=inputs, outputs=encoded)

encoded_inputs=Input(shape=ENCODING_DIM)
decoder=tf.keras.Model(inputs=encoded_inputs, outputs=decoded_layer2(decoded_layer1(encoded_inputs)))



In [None]:
x_train_ae=df[cross_section_cols] # we do NOT encode dummy features representing sector, or time-series data

opt=tf.keras.optimizers.RMSprop( # RMSprop is best. Adam is good sometimes. Others have difficulty converging
learning_rate=0.05)

autoencoder.compile(
    optimizer=opt, loss='mse', metrics=['mse','binary_crossentropy']
)
reduce_lr_loss = ReduceLROnPlateau(monitor='loss', factor=0.5, patience=2, verbose=1, min_delta=1e-3, mode='min')
# whole sample is very large. small patience suffices
earlyStopping = EarlyStopping(monitor='loss', patience=5, verbose=0, min_delta=1e-4, mode='min')

history_encoder=autoencoder.fit(
    x=x_train_ae, y=x_train_ae, batch_size=256, epochs=80, verbose=1, shuffle=True,
    callbacks=[reduce_lr_loss, earlyStopping],
    validation_split=0.2
)

transformed=encoder(x_train_ae) # This is the encoded features
gc.collect()

In [None]:
print(np.mean((autoencoder(x_train_ae)-x_train_ae)**2))

# This is just the MSE. Should be around 0.025~0.027 for the above configuration.

In [None]:
'''Model 2: LSTM + MLP + autoencoder'''
time_series_inputs = Input(shape=(12,1))
cross_sectional_inputs = Input(shape=(43,)) # 43 comes from 30 encoded features + 13 sector dummies

h1_rets=LSTM(units=50, return_sequences=True)(time_series_inputs)
time_series_output=LSTM(units=30, return_sequences=False)(h1_rets) 

combined_features = Concatenate()([cross_sectional_inputs, time_series_output])

h1=Dense(128, activation=tf.keras.layers.LeakyReLU(alpha=0.1))(combined_features) # Seems leaky ReLU performs slightly better?
h2=Dense(64, activation=tf.keras.layers.LeakyReLU(alpha=0.1))(h1)
h3=Dense(32, activation=tf.keras.layers.LeakyReLU(alpha=0.1))(h2)
outputs=Dense(1, activation='sigmoid')(h3)

hybrid_model=tf.keras.Model(inputs=[cross_sectional_inputs,time_series_inputs], outputs=outputs)

opt=tf.keras.optimizers.Adam(
    learning_rate=0.005,
)
hybrid_model.compile(
    optimizer=opt, loss='mse',metrics=['binary_crossentropy','mean_squared_error','mean_absolute_error','mean_absolute_percentage_error']
)


In [None]:
start_date='1971-01-31'
end_date='1981-01-31'
test_end_date='1981-02-28'

mask_train=(df[['DATE']]>=start_date) & (df[['DATE']]<end_date)
mask_test=(df[['DATE']]>=end_date) & (df[['DATE']]<test_end_date)


x = [np.hstack([transformed,df[sector_cols].values]), # x[0].shape=(N*(30+13)), x[1].shape=N*12
     df[time_series_cols].values] 
y = df[Y].values[:,0]

x_train, x_test, y_train, y_test=[i[mask_train['DATE'],:] for i in x], [i[mask_test['DATE'],:] for i in x], y[mask_train['DATE']], y[mask_test['DATE']]

earlyStopping = EarlyStopping(monitor='val_loss', patience=10, verbose=0, mode='min', restore_best_weights=True)
mcp_save = ModelCheckpoint('models/example3_hybrid_reg_model.hdf5', save_best_only=True, monitor='val_loss', mode='min')
reduce_lr_loss = ReduceLROnPlateau(monitor='val_loss', factor=0.25, patience=5, verbose=1, min_delta=1e-4, mode='min')

history_hybrid=hybrid_model.fit(
    x=x_train, y=y_train, batch_size=256, epochs=100, verbose=1, 
    callbacks=[earlyStopping, mcp_save, reduce_lr_loss],
    validation_split=0.1
)
gc.collect()