In [1]:
import os 
import sys
import tensorflow as tf
import numpy as np
import pickle
from tensorflow.keras.models import load_model

sys.path.append("../")

from src.data.make_dataset import INDEX_SHEET_NAME,load_data
from src.features.build_features import generate_features
from src.models.LSTM import build_lstm_model, generate_train_val_data
from src.models.metrics import LinearCorrelation, MeanAbsolutePercentageError, TheilU

%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
INDEX_SHEET_NAME

['HangSeng Index Data',
 'S&P500 Index Data',
 'CSI300 Index Data',
 'DJIA index Data',
 'Nikkei 225 index Data',
 'Nifty 50 index Data']

In [3]:
raw = load_data(sheet_name=INDEX_SHEET_NAME[0])
raw.head()

Unnamed: 0,Ntime,Time,Closing Price,Open Price,High price,Low Price,Volume,MACD,CCI,ATR,...,EMA20,MA10,MTM6,MA5,MTM12,ROC,SMI,WVAD,US Dollar Index,HIBOR
0,20080702,733591,21704.4492,21785.3906,21938.1992,21555.5293,254858.26,-646.0532,-193.115118,546.4805,...,22956.709411,22497.96286,-751.5703,22187.92768,-887.8516,-5.754486,-0.069509,-401619.369837,71.99,1.6
1,20080703,733592,21242.7793,21389.4902,21742.0703,21163.5703,272528.48,-702.28262,-186.955957,578.5,...,22793.477972,22289.66071,-1392.3806,21909.45156,-1786.9102,-7.872373,-0.068201,-507305.257079,72.73,1.6
2,20080704,733593,21423.8203,21402.1699,21534.0508,21344.8496,179634.42,-723.89177,-133.742667,291.2715,...,22663.034384,22152.2818,-1031.8496,21703.08164,-1634.1699,-8.153977,-0.058111,-398848.614015,72.71,1.6
3,20080707,733596,21913.0605,21402.6992,21916.2109,21402.6992,196457.92,-693.54483,-77.445777,513.5117,...,22591.6083,22069.02789,-129.2891,21677.22382,-1412.7403,-3.880007,-0.035303,-264228.169678,72.71,1.6
4,20080708,733597,21220.8105,21632.6992,21684.2109,21098.8398,201519.68,-717.087314,-108.634123,814.2207,...,22461.056128,21919.61285,-881.1993,21500.98396,-1576.7989,-6.703666,-0.055006,-518512.844172,72.96,1.6


### LSTM

In [14]:
result_dict = dict()
EPOCHS = 10# 00
past_history = 4

index = INDEX_SHEET_NAME[0]
#for index in INDEX_SHEET_NAME:
print(f"Start {index} part!")
result_dict[index] = dict()

data_dir = f'../data/processed/wsae/{index}'
if not os.path.exists(data_dir):
    raw = load_data(sheet_name=index)
    generate_features(raw, index)
    
train_lst = os.listdir(data_dir)
name=train_lst[0]
# for name in train_lst:
x_train = np.load(data_dir + f'/{name}/X_train.npy')
y_train = np.load(data_dir + f'/{name}/Y_train.npy')
x_val = np.load(data_dir + f'/{name}/X_val.npy')
y_val = np.load(data_dir + f'/{name}/Y_val.npy')
x_test = np.load(data_dir + f'/{name}/X_test.npy')
y_test = np.load(data_dir + f'/{name}/Y_test.npy')

train_data, val_data, test_data = generate_train_val_data(
    x_train, y_train, x_val, y_val, x_test, y_test,
    past_history=4, batch_size=60
)


model_save_dir = f'../models/{index}/{name}'
if not os.path.exists(model_save_dir):
    os.makedirs(model_save_dir)
if not os.path.exists(model_save_dir+'/wase-lstmt_config.json'):
    print("No existing model, start to train!")
    lstm = build_lstm_model(inputs_shape=[4, 10],
                            layers=5,
                            units=[64, 64, 64, 64, 64],
                            learning_rate=0.05)
    lstm.fit(train_data,
             epochs=EPOCHS,
             steps_per_epoch=(y_train.shape[0] // 60),
             validation_data=val_data,
             validation_steps=1,
             verbose=0)
    json_config = lstm.to_json()
    
    with open(model_save_dir+'/wase-lstmt_config.json', 'w') as json_file:
        json_file.write(json_config)
    # Save weights to disk
    model.save_weights(model_save_dir+'wase-lstmt_weights.h5')
    print("Model Saved!")
else:
    with open(model_save_dir+'/wase-lstmt_config.json') as json_file:
        json_config = json_file.read()
    lstm = tf.keras.models.model_from_json(json_config)
    lstm.compile(loss='mse',
                      optimizer='Adam',
                      metrics=[
                          tf.keras.metrics.MeanAbsolutePercentageError(),
                          MeanAbsolutePercentageError(),
                          LinearCorrelation(),
                          TheilU()],
                      lr=learning_rate
                      )
    lstm.load_weights(model_save_dir+'wase-lstmt_weights.h5')
    
    print("Model Loaded!")

result_dict[index][name] = lstm.evaluate(test_data, steps=1)
    
print(f">>>>{index} {name} done!<<<<")

Start HangSeng Index Data part!
Model Loaded!
>>>>HangSeng Index Data 201510 done!<<<<


In [None]:
try:
    with open(f'./WSAE-LSTM/{index}_train_result.pickle', 'wb') as handle:
        pickle.dump(result_dict[index], handle, protocol=pickle.HIGHEST_PROTOCOL)
except:
    print('fail to save!')

with open(f'./WSAE-LSTM/train_result.pickle', 'wb') as handle:
    pickle.dump(result_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [7]:
import pickle
with open(f'train_result.pickle', 'rb') as handle:
    result_dict=pickle.load(handle)