In [2]:
import os 
import sys
import tensorflow as tf
import numpy as np
import pickle
from tensorflow.keras.models import load_model

sys.path.append("../")

from src.data.make_dataset import INDEX_SHEET_NAME,load_data
from src.features.build_features import generate_features
from src.models.LSTM import build_lstm_model, generate_train_val_data
from src.models.metrics import LinearCorrelation, MeanAbsolutePercentageError, TheilU

%matplotlib inline
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
INDEX_SHEET_NAME

['HangSeng Index Data',
 'S&P500 Index Data',
 'CSI300 Index Data',
 'DJIA index Data',
 'Nikkei 225 index Data',
 'Nifty 50 index Data']

In [3]:
raw = load_data(sheet_name=INDEX_SHEET_NAME[-1])
raw.head()

Unnamed: 0,Date,Ntime,Open Price,High Price,Low price,Closing Price,Volume,MACD,CCI,ATR,...,EMA20,MA5,MA10,MTM6,MTM12,ROC,SMI,WVAD,US dollar Index,Interbank Offered Rate
0,20080701,733590,4039.75,4075.4,3878.2,3896.75,164469220,-201.56,-180.03,197.2,...,4385.058946,4128.49,4253.415,-369.65,-620.35,-13.733369,-0.149627,-828023961.0,72.34,8.706
1,20080702,733591,3895.3,4107.15,3848.25,4093.35,199920144,-199.63,-131.91,258.9,...,4357.277142,4096.63,4204.51,-97.75,-479.15,-10.47895,-0.091225,-561033032.0,71.99,7.73
2,20080703,733592,4094.6,4097.35,3874.85,3925.75,154573765,-209.21,-129.55,222.5,...,4316.179319,4018.61,4146.66,-326.9,-727.25,-15.629701,-0.09702,-726253640.3,72.73,6.4
3,20080704,733593,3926.65,4033.5,3896.4,4016.0,152045352,-207.14,-111.08,137.1,...,4287.590812,3994.48,4113.505,-299.85,-566.4,-12.360335,-0.058691,-548766472.1,72.71,6.21
4,20080707,733596,4002.0,4114.5,4002.0,4030.0,125737237,-202.03,-80.86,112.5,...,4263.058354,3992.37,4089.865,-106.65,-474.25,-10.528945,-0.038439,-486195115.1,72.71,9.0


## The following sections are training process of each model.

### WSAE-LSTM

In [4]:
result_dict = dict()
EPOCHS = 500 #0
past_history = 4

for index in INDEX_SHEET_NAME:
    print(f"Start {index} part!")
    result_dict[index] = dict()

    data_dir = f'../data/processed/wsae/{index}'
    if not os.path.exists(data_dir):
        raw = load_data(sheet_name=index)
        generate_features(raw, index)
    
    train_lst = os.listdir(data_dir)
    train_lst.sort()
    for name in train_lst:
        x_train = np.load(data_dir + f'/{name}/X_train.npy')
        y_train = np.load(data_dir + f'/{name}/Y_train.npy')
        x_val = np.load(data_dir + f'/{name}/X_val.npy')
        y_val = np.load(data_dir + f'/{name}/Y_val.npy')
        x_test = np.load(data_dir + f'/{name}/X_test.npy')
        y_test = np.load(data_dir + f'/{name}/Y_test.npy')

        train_data, val_data, test_data = generate_train_val_data(
            x_train, y_train, x_val, y_val, x_test, y_test,
            past_history=4, batch_size=60
        )


        model_save_dir = f'../models/{index}/{name}'
        if not os.path.exists(model_save_dir):
            os.makedirs(model_save_dir)
        if not os.path.exists(model_save_dir+'/wase-lstm_config.json') \
            or not os.path.exists(model_save_dir+'/wase-lstm_weights.h5'):
            print("No existing model, start to train!")
            lstm = build_lstm_model(inputs_shape=[4, 10],
                                    layers=5,
                                    units=[64, 64, 64, 64, 64],
                                    learning_rate=0.05)
            lstm.fit(train_data,
                     epochs=EPOCHS,
                     steps_per_epoch=(y_train.shape[0] // 60),
                     validation_data=val_data,
                     validation_steps=1,
                     verbose=0)
            json_config = lstm.to_json()

            with open(model_save_dir+'/wase-lstm_config.json', 'w') as json_file:
                json_file.write(json_config)
            # Save weights to disk
            lstm.save_weights(model_save_dir+'/wase-lstm_weights.h5')
            print("Model Saved!")
        else:
            with open(model_save_dir+'/wase-lstm_config.json') as json_file:
                json_config = json_file.read()
            lstm = tf.keras.models.model_from_json(json_config)
            lstm.compile(loss='mse',
                              optimizer='Adam',
                              metrics=[
                                  tf.keras.metrics.MeanAbsolutePercentageError(),
                                  MeanAbsolutePercentageError(),
                                  LinearCorrelation(),
                                  TheilU()],
                              lr=0.05
                              )
            lstm.load_weights(model_save_dir+'/wase-lstm_weights.h5')

            print("Model Loaded!")

        result_dict[index][name] = lstm.evaluate(test_data, steps=1)
        tf.keras.backend.clear_session()

        print(f">>>>{index} {name} done!<<<<")
    try:
        with open(f'./wsae-lstm/{index}_train_result.pickle', 'wb') as handle:
            pickle.dump(result_dict[index], handle, protocol=pickle.HIGHEST_PROTOCOL)
    except:
        print('fail to save!')

with open(f'./wsae-lstm/train_result.pickle', 'wb') as handle:
    pickle.dump(result_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)

Start DJIA index Data part!
>>>>201010 finished!<<<<
>>>>201101 finished!<<<<
>>>>201104 finished!<<<<
>>>>201107 finished!<<<<
>>>>201110 finished!<<<<
>>>>201201 finished!<<<<
>>>>201204 finished!<<<<
>>>>201207 finished!<<<<
>>>>201210 finished!<<<<
>>>>201301 finished!<<<<
>>>>201304 finished!<<<<
>>>>201307 finished!<<<<
>>>>201310 finished!<<<<
>>>>201401 finished!<<<<
>>>>201404 finished!<<<<
>>>>201407 finished!<<<<
>>>>201410 finished!<<<<
>>>>201501 finished!<<<<
>>>>201504 finished!<<<<
>>>>201507 finished!<<<<
>>>>201510 finished!<<<<
>>>>201601 finished!<<<<
>>>>201604 finished!<<<<
>>>>201607 finished!<<<<
>>>> Feature generation complete! <<<<
No existing model, start to train!
Model Saved!
>>>>DJIA index Data 201010 done!<<<<
No existing model, start to train!
Model Saved!
>>>>DJIA index Data 201101 done!<<<<
No existing model, start to train!
Model Saved!
>>>>DJIA index Data 201104 done!<<<<
No existing model, start to train!
Model Saved!
>>>>DJIA index Data 201107 don

### WLSTM

In [6]:
result_dict=dict()
EPOCHS = 500 #0
past_history = 4

for index in INDEX_SHEET_NAME:
    print(f"Start {index} part!")
    result_dict[index] = dict()

    data_dir = f'../data/processed/wavelet/{index}'
    if not os.path.exists(data_dir):
        raw = load_data(sheet_name=index)
        generate_features(raw, index)
    
    train_lst = os.listdir(data_dir)
    train_lst.sort()
    for name in train_lst:
        x_train = np.load(data_dir + f'/{name}/X_train.npy')
        y_train = np.load(data_dir + f'/{name}/Y_train.npy')
        x_val = np.load(data_dir + f'/{name}/X_val.npy')
        y_val = np.load(data_dir + f'/{name}/Y_val.npy')
        x_test = np.load(data_dir + f'/{name}/X_test.npy')
        y_test = np.load(data_dir + f'/{name}/Y_test.npy')

        train_data, val_data, test_data = generate_train_val_data(
            x_train, y_train, x_val, y_val, x_test, y_test,
            past_history=4, batch_size=60
        )


        model_save_dir = f'../models/{index}/{name}'
        config_filename = '/wlstm_config.json'
        weight_filename = '/wlstm_weights.h5'

        if not os.path.exists(model_save_dir):
            os.makedirs(model_save_dir)
        if not os.path.exists(model_save_dir+config_filename) \
            or not os.path.exists(model_save_dir+weight_filename):
            print("No existing model, start to train!")
            lstm = build_lstm_model(inputs_shape=[4, 19],
                                    layers=5,
                                    units=[64, 64, 64, 64, 64],
                                    learning_rate=0.05)
            lstm.fit(train_data,
                     epochs=EPOCHS,
                     steps_per_epoch=(y_train.shape[0] // 60),
                     validation_data=val_data,
                     validation_steps=1,
                     verbose=0)
            json_config = lstm.to_json()

            with open(model_save_dir+config_filename, 'w') as json_file:
                json_file.write(json_config)
            # Save weights to disk
            lstm.save_weights(model_save_dir+weight_filename)
            print("Model Saved!")
        else:
            with open(model_save_dir+config_filename) as json_file:
                json_config = json_file.read()
            lstm = tf.keras.models.model_from_json(json_config)
            lstm.compile(loss='mse',
                              optimizer='Adam',
                              metrics=[
                                  tf.keras.metrics.MeanAbsolutePercentageError(),
                                  MeanAbsolutePercentageError(),
                                  LinearCorrelation(),
                                  TheilU()],
                              lr=0.05
                              )
            lstm.load_weights(model_save_dir+weight_filename)

            print("Model Loaded!")

        result_dict[index][name] = lstm.evaluate(test_data, steps=1)
        tf.keras.backend.clear_session()

        print(f">>>>{index} {name} done!<<<<")
    try:
        with open(f'./wlstm/{index}_train_result.pickle', 'wb') as handle:
            pickle.dump(result_dict[index], handle, protocol=pickle.HIGHEST_PROTOCOL)
    except:
        print('fail to save!')

with open(f'./wlstm/train_result.pickle', 'wb') as handle:
    pickle.dump(result_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)

Start DJIA index Data part!
No existing model, start to train!
Model Saved!
>>>>DJIA index Data 201010 done!<<<<
No existing model, start to train!
Model Saved!
>>>>DJIA index Data 201101 done!<<<<
No existing model, start to train!
Model Saved!
>>>>DJIA index Data 201104 done!<<<<
No existing model, start to train!
Model Saved!
>>>>DJIA index Data 201107 done!<<<<
No existing model, start to train!
Model Saved!
>>>>DJIA index Data 201110 done!<<<<
No existing model, start to train!
Model Saved!
>>>>DJIA index Data 201201 done!<<<<
No existing model, start to train!
Model Saved!
>>>>DJIA index Data 201204 done!<<<<
No existing model, start to train!
Model Saved!
>>>>DJIA index Data 201207 done!<<<<
No existing model, start to train!
Model Saved!
>>>>DJIA index Data 201210 done!<<<<
No existing model, start to train!
Model Saved!
>>>>DJIA index Data 201301 done!<<<<
No existing model, start to train!
Model Saved!
>>>>DJIA index Data 201304 done!<<<<
No existing model, start to train!
Mo

### SAE-LSTM

In [4]:
result_dict = dict()
EPOCHS = 500 #0
past_history = 4

for index in INDEX_SHEET_NAME:
    print(f"Start {index} part!")
    result_dict[index] = dict()

    data_dir = f'../data/processed/sae/{index}'
    if not os.path.exists(data_dir):
        raw = load_data(sheet_name=index)
        generate_features(raw, index)
    
    train_lst = os.listdir(data_dir)
    train_lst.sort()
    for name in train_lst:
        x_train = np.load(data_dir + f'/{name}/X_train.npy')
        y_train = np.load(data_dir + f'/{name}/Y_train.npy')
        x_val = np.load(data_dir + f'/{name}/X_val.npy')
        y_val = np.load(data_dir + f'/{name}/Y_val.npy')
        x_test = np.load(data_dir + f'/{name}/X_test.npy')
        y_test = np.load(data_dir + f'/{name}/Y_test.npy')

        train_data, val_data, test_data = generate_train_val_data(
            x_train, y_train, x_val, y_val, x_test, y_test,
            past_history=4, batch_size=60
        )


        model_save_dir = f'../models/{index}/{name}'
        config_filename = '/sae-lstm_config.json'
        weight_filename = '/sae-lstm_weights.h5'

        if not os.path.exists(model_save_dir):
            os.makedirs(model_save_dir)
#         if not os.path.exists(model_save_dir+config_filename) \
#             or not os.path.exists(model_save_dir+weight_filename):
        if True:
            print("No existing model, start to train!")
            lstm = build_lstm_model(inputs_shape=[4, 10],
                                    layers=5,
                                    units=[64, 64, 64, 64, 64],
                                    learning_rate=0.05)
            lstm.fit(train_data,
                     epochs=EPOCHS,
                     steps_per_epoch=(y_train.shape[0] // 60),
                     validation_data=val_data,
                     validation_steps=1,
                     verbose=0)
            json_config = lstm.to_json()

            with open(model_save_dir+config_filename, 'w') as json_file:
                json_file.write(json_config)
            # Save weights to disk
            lstm.save_weights(model_save_dir+weight_filename)
            print("Model Saved!")
        else:
            with open(model_save_dir+config_filename) as json_file:
                json_config = json_file.read()
            lstm = tf.keras.models.model_from_json(json_config)
            lstm.compile(loss='mse',
                              optimizer='Adam',
                              metrics=[
                                  tf.keras.metrics.MeanAbsolutePercentageError(),
                                  MeanAbsolutePercentageError(),
                                  LinearCorrelation(),
                                  TheilU()],
                              lr=0.05
                              )
            lstm.load_weights(model_save_dir+weight_filename)

            print("Model Loaded!")

        result_dict[index][name] = lstm.evaluate(test_data, steps=1)
        tf.keras.backend.clear_session()

        print(f">>>>{index} {name} done!<<<<")
    try:
        with open(f'./sae-lstm/{index}_train_result.pickle', 'wb') as handle:
            pickle.dump(result_dict[index], handle, protocol=pickle.HIGHEST_PROTOCOL)
    except:
        print('fail to save!')

with open(f'./sae-lstm/train_result.pickle', 'wb') as handle:
    pickle.dump(result_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)

Start Nifty 50 index Data part!
No existing model, start to train!
Model Saved!
>>>>Nifty 50 index Data 201010 done!<<<<
No existing model, start to train!
Model Saved!
>>>>Nifty 50 index Data 201101 done!<<<<
No existing model, start to train!
Model Saved!
>>>>Nifty 50 index Data 201104 done!<<<<
No existing model, start to train!
Model Saved!
>>>>Nifty 50 index Data 201107 done!<<<<
No existing model, start to train!
Model Saved!
>>>>Nifty 50 index Data 201110 done!<<<<
No existing model, start to train!
Model Saved!
>>>>Nifty 50 index Data 201201 done!<<<<
No existing model, start to train!
Model Saved!
>>>>Nifty 50 index Data 201204 done!<<<<
No existing model, start to train!
Model Saved!
>>>>Nifty 50 index Data 201207 done!<<<<
No existing model, start to train!
Model Saved!
>>>>Nifty 50 index Data 201210 done!<<<<
No existing model, start to train!
Model Saved!
>>>>Nifty 50 index Data 201301 done!<<<<
No existing model, start to train!
Model Saved!
>>>>Nifty 50 index Data 201304

### LSTM

In [5]:
result_dict=dict()
EPOCHS = 500 #0
past_history = 4

for index in INDEX_SHEET_NAME:
    print(f"Start {index} part!")
    result_dict[index] = dict()

    data_dir = f'../data/interim/{index}'
    if not os.path.exists(data_dir):
        raw = load_data(sheet_name=index)
        generate_features(raw, index)
    
    train_lst = os.listdir(data_dir)
    train_lst.sort()
    for name in train_lst:
        x_train = np.load(data_dir + f'/{name}/X_train.npy')
        y_train = np.load(data_dir + f'/{name}/Y_train.npy')
        x_val = np.load(data_dir + f'/{name}/X_val.npy')
        y_val = np.load(data_dir + f'/{name}/Y_val.npy')
        x_test = np.load(data_dir + f'/{name}/X_test.npy')
        y_test = np.load(data_dir + f'/{name}/Y_test.npy')

        train_data, val_data, test_data = generate_train_val_data(
            x_train, y_train, x_val, y_val, x_test, y_test,
            past_history=4, batch_size=60
        )


        model_save_dir = f'../models/{index}/{name}'
        config_filename = '/lstm_config.json'
        weight_filename = '/lstm_weights.h5'

        if not os.path.exists(model_save_dir):
            os.makedirs(model_save_dir)
        if not os.path.exists(model_save_dir+config_filename) \
            or not os.path.exists(model_save_dir+weight_filename):
            print("No existing model, start to train!")
            lstm = build_lstm_model(inputs_shape=[4, 19],
                                    layers=5,
                                    units=[64, 64, 64, 64, 64],
                                    learning_rate=0.05)
            lstm.fit(train_data,
                     epochs=EPOCHS,
                     steps_per_epoch=(y_train.shape[0] // 60),
                     validation_data=val_data,
                     validation_steps=1,
                     verbose=0)
            json_config = lstm.to_json()

            with open(model_save_dir+config_filename, 'w') as json_file:
                json_file.write(json_config)
            # Save weights to disk
            lstm.save_weights(model_save_dir+weight_filename)
            print("Model Saved!")
        else:
            with open(model_save_dir+config_filename) as json_file:
                json_config = json_file.read()
            lstm = tf.keras.models.model_from_json(json_config)
            lstm.compile(loss='mse',
                              optimizer='Adam',
                              metrics=[
                                  tf.keras.metrics.MeanAbsolutePercentageError(),
                                  MeanAbsolutePercentageError(),
                                  LinearCorrelation(),
                                  TheilU()],
                              lr=0.05
                              )
            lstm.load_weights(model_save_dir+weight_filename)

            print("Model Loaded!")

        result_dict[index][name] = lstm.evaluate(test_data, steps=1)
        tf.keras.backend.clear_session()

        print(f">>>>{index} {name} done!<<<<")
    try:
        with open(f'./lstm/{index}_train_result.pickle', 'wb') as handle:
            pickle.dump(result_dict[index], handle, protocol=pickle.HIGHEST_PROTOCOL)
    except:
        print('fail to save!')

with open(f'./lstm/train_result.pickle', 'wb') as handle:
    pickle.dump(result_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)

Start DJIA index Data part!
No existing model, start to train!
Model Saved!
>>>>DJIA index Data 201010 done!<<<<
No existing model, start to train!
Model Saved!
>>>>DJIA index Data 201101 done!<<<<
No existing model, start to train!
Model Saved!
>>>>DJIA index Data 201104 done!<<<<
No existing model, start to train!
Model Saved!
>>>>DJIA index Data 201107 done!<<<<
No existing model, start to train!
Model Saved!
>>>>DJIA index Data 201110 done!<<<<
No existing model, start to train!
Model Saved!
>>>>DJIA index Data 201201 done!<<<<
No existing model, start to train!
Model Saved!
>>>>DJIA index Data 201204 done!<<<<
No existing model, start to train!
Model Saved!
>>>>DJIA index Data 201207 done!<<<<
No existing model, start to train!
Model Saved!
>>>>DJIA index Data 201210 done!<<<<
No existing model, start to train!
Model Saved!
>>>>DJIA index Data 201301 done!<<<<
No existing model, start to train!
Model Saved!
>>>>DJIA index Data 201304 done!<<<<
No existing model, start to train!
Mo