In [None]:
'''
Project
Forecasting Stock Market Indices Using the Recurrent Neural Network Based Hybrid Models: CNN-LSTM, GRU-CNN, and Ensemble Models
'''

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
import warnings, os, shutil, random, sys

from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import *
from tensorflow.keras.optimizers import Adam, RMSprop, Adadelta, Adagrad, Ftrl
from tensorflow.keras import initializers, metrics
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.metrics import mean_squared_error, mean_absolute_error
from tensorflow.keras.layers import BatchNormalization, Dropout
from tensorflow.keras.layers import MaxPool1D, GlobalMaxPooling1D
from tensorflow.keras import layers


SEED = 777
np.random.seed(SEED)
random.seed(SEED)

warnings.filterwarnings(action='ignore') 
os.environ["PYTHONHASHSEED"] = str(SEED)

print("python version:",sys.version)
print("numpy version:", np.__version__)        
print("tensorflow version:", tf.__version__)

In [None]:
!nvidia-smi

In [None]:
KERNEL_INIT = initializers.glorot_uniform(seed=SEED)
Re_INIT = initializers.Orthogonal(gain=1.0, seed=SEED)
        
def last_time_step_mse(y_true, y_pred):
    return metrics.mean_squared_error(y_true[:,-1], y_pred[:,-1])
def last_time_step_mae(y_true, y_pred):
    return metrics.mean_absolute_error(y_true[:,-1], y_pred[:,-1])

In [None]:
import pickle

def save_data(data, path):
    with open(path, 'wb') as f:
        pickle.dump(data, f)

In [None]:
'''
    Load Pickled Data
'''
import pickle

def load_data(path):
    loaded = None
    
    with open(path, 'rb') as fr:
        loaded = pickle.load(fr)
        
    return loaded

PREPROC_ALL_TRAIN_DATA = load_data('./save_files3/PREPROC_ALL_TRAIN_DATA.pickle')
PREPROC_ALL_TEST_DATA = load_data('./save_files3/PREPROC_ALL_TEST_DATA.pickle')
SCALER = load_data('./save_files3/SCALER.pickle')

In [None]:
def make_dataset(x, y, input_wd, pred_wd):
    mk_x = []   
    mk_y = []  
    for i in range(len(x)-input_wd-pred_wd+1):
        mk_x.append(x[i:i+input_wd])
        mk_y.append(y[i+input_wd:i+input_wd+pred_wd])
    return np.array(mk_x), np.array(mk_y)

In [None]:
'''
    Load preprocessed data
'''
def get_preprocess_data(feature_code = "OHMLVC", data_code = "DJI", duration = "P0019", dataset_split="T10"):
    train_x_key = f"{data_code}_{duration}_{dataset_split}_TRX"
    train_y_key = f"{data_code}_{duration}_{dataset_split}_TRY"
    
    test_x_key = f"{data_code}_{duration}_{dataset_split}_TEX"
    test_y_key = f"{data_code}_{duration}_{dataset_split}_TEY"
    
    scaler_key = f"{data_code}_{duration}"
    
    return (
        PREPROC_ALL_TRAIN_DATA[feature_code][train_x_key],
        PREPROC_ALL_TRAIN_DATA[feature_code][train_y_key],
        
        PREPROC_ALL_TEST_DATA[feature_code][test_x_key],
        PREPROC_ALL_TEST_DATA[feature_code][test_y_key],
        
        SCALER[feature_code][scaler_key]["x"],
        SCALER[feature_code][scaler_key]["y"],
    )

In [None]:
'''
    CNN_LSTM
'''
def cnn_lstm(in_window, n_feature, x, y, x_test, y_test, pred_wd, EPOCHS, OPT, lr, verbose):
    model = Sequential()
    model.add(Input(shape=(in_window, n_feature)))
    
    model.add(Conv1D(
         filters = 32,
         kernel_size = 3,
         strides = 1,
         padding = "causal",
        activation = "relu"
     ))    
    model.add(LSTM(128, return_sequences=False, stateful=False,
                    kernel_initializer=KERNEL_INIT,
                    recurrent_initializer=Re_INIT,
                    bias_initializer='zeros'))
    model.add(BatchNormalization())
    model.add(Dropout(0.2, seed=777))    
    model.add(Dense(pred_wd,kernel_initializer= KERNEL_INIT,bias_initializer="zeros", activation='relu'))
    if OPT=="ADAM":
        opt=Adam(lr=lr)
    else:
        opt=RMSprop(lr=lr)
    
    model.compile(loss=tf.keras.losses.Huber(), optimizer=opt, 
                  metrics=[last_time_step_mse, last_time_step_mae])
    
    earlystopping = EarlyStopping(monitor='val_loss', patience=10, mode='min')
    filename = os.path.join('cnn_lstm', 'ckeckpointer.ckpt')
    checkpoint = ModelCheckpoint(filename, 
                                 save_weights_only=True, 
                                 save_best_only=True, 
                                 monitor='val_loss', 
                                 verbose=0)
    
    hist = model.fit(x, y, batch_size=32, epochs=EPOCHS, validation_split=0.1, 
                     shuffle=False, verbose=0, callbacks=[earlystopping, checkpoint])   
    return model, filename
    

In [None]:
'''
    GRU_CNN Model
'''
def gru_cnn(in_window, n_feature,x,y,x_test, y_test, pred_wd, EPOCHS, OPT, lr, verbose):
    model = Sequential()
    
    model.add(GRU(128,input_shape=[in_window, n_feature],
                      kernel_initializer=KERNEL_INIT,
                      recurrent_initializer=Re_INIT,
                      return_sequences=True,
                      bias_initializer='zeros'))
     
    model.add(Conv1D(
         filters = 32,
         kernel_size = 3,
         strides = 1,
         padding = "causal",
        activation = "relu"
     ))
    
    model.add(GlobalMaxPooling1D())

    model.add(Dense(10, kernel_initializer= KERNEL_INIT,bias_initializer="zeros", activation='relu'))

    model.add(Dropout(0.2, seed=777))
    
    model.add(Dense(pred_wd,kernel_initializer= KERNEL_INIT,bias_initializer="zeros", activation='relu'))
    
    if OPT=="ADAM":
        opt=Adam(lr=lr)
    else:
        opt=RMSprop(lr=lr)
        
    model.compile(loss=tf.keras.losses.Huber(), optimizer=opt, 
                  metrics=[last_time_step_mse, last_time_step_mae])
    
    earlystopping = EarlyStopping(monitor='val_loss', patience=10, mode='min')
    filename = os.path.join('gru_cnn', 'ckeckpointer.ckpt')
    checkpoint = ModelCheckpoint(filename, 
                                 save_weights_only=True, 
                                 save_best_only=True, 
                                 monitor='val_loss', 
                                 verbose=0)
    hist = model.fit(x,y,batch_size=32, epochs=EPOCHS, validation_split=0.1, 
                     shuffle=False, verbose=0, callbacks=[earlystopping, checkpoint]) 
    
    return model, filename

In [None]:
def train_model(model_type, name, iw, pw, n_features, trd_x, trd_y, ted_x, ted_y, lr, epochs, verbose):
    model = None
    hist = None

    if model_type == 'gru_cnn':
        model, filename = gru_cnn(iw, n_features, trd_x, trd_y, ted_x, ted_y, pw, epochs, 'RSMProp', lr, verbose)
                
    if model_type == 'cnn_lstm':
        model, filename = cnn_lstm(iw, n_features, trd_x, trd_y, ted_x, ted_y, pw, epochs, 'RSMProp',lr, verbose)
    
    model.load_weights(filename)
    y_pred = model.predict(ted_x)
   
    return ted_y[:,-1], y_pred[:,-1]

In [None]:
FEATURES = [
#     "OHLV",
#     "MV",
    #"MVC",
    "OHMLVC"
]

DURATIONS = [
#     "P0019",
#     "P1719",
    "P1921"
]

DATA = [
    "DJI",
    "US500",
    "DAX",

]

IW = [
     5
#     21,
#    42
]

PW = [
    1
#    5
]

MODELS = [
#   "cnn_lstm", # Conv -> LSTM BLock * 22 -> Dense
#   "lstm_cnn", # Lstm -> Conv -> Dense
#   "lstm_cnn_gmp", # Lstm -> Conv -> GMP1D > Dense10 -> Dense
  "gru_cnn", # GRU -> Conv1d -> GMP1D -> Dense -> Dropout -> Dense
  "cnn_lstm"
]

true_result = []
pred_result = []
for feature in FEATURES: #4
    for duration in DURATIONS: #3
        for data in DATA: #5
            for iw in IW: #2
                for pw in PW: #2
                    for model in MODELS: #4
                        model_name = f"{iw}_{pw}_{feature}_{data}_{duration}_{model}"
                        
                        tf.keras.backend.clear_session()
                        
                        trd_x, trd_y, ted_x, ted_y, scaler_x, scaler_y = get_preprocess_data(
                            feature_code=feature, 
                            data_code=data, 
                            duration=duration, 
                            dataset_split='T20'
                        )

                        train_x_dataset, train_y_dataset = make_dataset(trd_x, trd_y, iw, pw)
                        test_x_dataset, test_y_dataset = make_dataset(ted_x, ted_y, iw, pw)

                        ted_y[:,-1], y_pred[:,-1] = train_model(
                                    model,
                                    model_name,
                                    iw, 
                                    pw, 
                                    len(feature), 
                                    train_x_dataset, 
                                    train_y_dataset, 
                                    test_x_dataset, 
                                    test_y_dataset, 
                                    lr=0.0005,
                                    epochs=1,
                                    verbose=0)
                        
                        true_result.append(ted_y[:,-1])
                        pred_result.append(y_pred[:,-1])