In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tensorflow import keras as keras
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import math
from statistics import mean

In [2]:
data = pd.read_csv("data/Stocks/aapl.us.txt")

In [3]:
data

Unnamed: 0,Date,Open,High,Low,Close,Volume,OpenInt
0,1984-09-07,0.42388,0.42902,0.41874,0.42388,23220030,0
1,1984-09-10,0.42388,0.42516,0.41366,0.42134,18022532,0
2,1984-09-11,0.42516,0.43668,0.42516,0.42902,42498199,0
3,1984-09-12,0.42902,0.43157,0.41618,0.41618,37125801,0
4,1984-09-13,0.43927,0.44052,0.43927,0.43927,57822062,0
...,...,...,...,...,...,...,...
8359,2017-11-06,171.75000,174.36000,171.10000,173.63000,34901241,0
8360,2017-11-07,173.29000,174.51000,173.29000,174.18000,24424877,0
8361,2017-11-08,174.03000,175.61000,173.71000,175.61000,24451166,0
8362,2017-11-09,174.48000,175.46000,172.52000,175.25000,29533086,0


In [5]:
raw_data = data['Close']
diff_data = data['Close'].diff()
raw_data = raw_data.values.reshape(-1,1)


train_data,val_data, test_data = diff_data[0:int(len(data)*0.60)],diff_data[int(len(data)*0.60):int(len(data)*0.80)],diff_data[int(len(data)*0.80):len(data)]
raw_test = raw_data[int(len(data)*0.80)-1:len(data)]

training_set = train_data.values.reshape(-1,1)
val_set = val_data.values.reshape(-1,1)

scaler = MinMaxScaler(feature_range = (0,1))
scalar = scaler.fit(training_set)
scaled_training_set = scaler.transform(training_set)
scaled_val_set = scaler.transform(val_set)

In [15]:
def prepare_train(timestep):
    x_train = []
    y_train = []

    for i in range(timestep+1,len(training_set)):
        x_train.append(scaled_training_set[i-timestep:i,0])
        y_train.append(scaled_training_set[i,0])
    
    x_train = np.array(x_train)
    y_train = np.array(y_train)
    x_train = x_train.reshape(x_train.shape[0],x_train.shape[1],1)
    return x_train, y_train

In [16]:
def prepare_val(timestep):
    x_val = []
    y_val = []
    for i in range(timestep,len(val_set)):
        x_val.append(scaled_val_set[i-timestep:i,0])
        y_val.append(scaled_val_set[i,0])
    
    x_val = np.array(x_val)
    y_val = np.array(y_val)
    x_val = x_val.reshape(x_val.shape[0],x_val.shape[1],1)
    return x_val, y_val

In [17]:
def measure_rmse(actual, predicted):
    return math.sqrt(mean_squared_error(actual, predicted))

In [18]:
def model_fit(config):
    # unpack config
    timestep, n_nodes = config
    x_train, y_train = prepare_train(timestep)
    x_val, y_val = prepare_val(timestep)
    
    model = keras.models.Sequential()
    model.add(keras.layers.LSTM(units = n_nodes, return_sequences = True, input_shape = (x_train.shape[1],1)))
    model.add(keras.layers.LSTM(units = n_nodes))
    model.add(keras.layers.Dense(units = n_nodes))
    model.add(keras.layers.Dense(1))
    model.compile(optimizer = 'adam', loss = 'mean_squared_error')
    callback = keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)
    history = model.fit(x_train, y_train, epochs=40, batch_size=256, verbose=0, callbacks=[callback], validation_data=(x_val,y_val))
    return model, history

In [19]:
def model_predict(config):
    model,history = model_fit(config)
    val_loss = history.history['val_loss'][-1]
    return val_loss

In [20]:
def repeat_evaluate( config, n_repeats=10):
    key = config
    scores = [model_predict(config) for _ in range(n_repeats)]
    result = mean(scores)
    print('> Model[%s] %.9f' % (key, result))
    return (key, result)

In [21]:
def grid_search(cfg_list):
    scores = [repeat_evaluate(cfg) for cfg in cfg_list]
    # sort configs by error, asc
    scores.sort(key=lambda tup: tup[1])
    return scores
 

In [24]:
def model_configs():
    # define scope of configs
    timestep = [30,35,40,45,50,55,60]
    n_nodes = [20,25,30,35,40,45]
    # create configs
    configs = list()
    for i in timestep:
        for j in n_nodes:
            cfg = [i, j]
            configs.append(cfg)
    print('Total configs: %d' % len(configs))
    return configs

In [25]:
cfg_list = model_configs()
# grid search
scores = grid_search(cfg_list)

Total configs: 42
> Model[[30, 20]] 0.033783221
> Model[[30, 25]] 0.033915004
> Model[[30, 30]] 0.033942702
> Model[[30, 35]] 0.034053292
> Model[[30, 40]] 0.034091226
> Model[[30, 45]] 0.034064770
> Model[[35, 20]] 0.033987318
> Model[[35, 25]] 0.033959962
> Model[[35, 30]] 0.034036282
> Model[[35, 35]] 0.034094409
> Model[[35, 40]] 0.034176888
> Model[[35, 45]] 0.034173197
> Model[[40, 20]] 0.033969628
> Model[[40, 25]] 0.034078548
> Model[[40, 30]] 0.034147567
> Model[[40, 35]] 0.034317990
> Model[[40, 40]] 0.034291252
> Model[[40, 45]] 0.034310182
> Model[[45, 20]] 0.034099816
> Model[[45, 25]] 0.034271905
> Model[[45, 30]] 0.034289877
> Model[[45, 35]] 0.034294938
> Model[[45, 40]] 0.034368554
> Model[[45, 45]] 0.034424908
> Model[[50, 20]] 0.034210833
> Model[[50, 25]] 0.034273899
> Model[[50, 30]] 0.034368247
> Model[[50, 35]] 0.034412097
> Model[[50, 40]] 0.034471647
> Model[[50, 45]] 0.034488676
> Model[[55, 20]] 0.034223896
> Model[[55, 25]] 0.034367745
> Model[[55, 30]] 0.03

In [14]:
scores

[([55, 45], 1.8594361154100626e-05),
 ([40, 45], 1.908317608467769e-05),
 ([45, 45], 2.3069215058058034e-05),
 ([30, 30], 2.4106969158310675e-05),
 ([55, 40], 2.57116237662558e-05),
 ([40, 30], 2.623627524371841e-05),
 ([30, 20], 2.6778622213896597e-05),
 ([30, 40], 2.7507651520863873e-05),
 ([30, 45], 2.773977239485248e-05),
 ([30, 35], 2.7765418781200425e-05),
 ([45, 30], 3.0388454069907313e-05),
 ([55, 25], 3.0440280079346847e-05),
 ([35, 40], 3.111043806711677e-05),
 ([45, 40], 3.134401686111232e-05),
 ([60, 35], 3.172740334775881e-05),
 ([50, 40], 3.178510323778028e-05),
 ([30, 25], 3.223334015274304e-05),
 ([35, 20], 3.2875748911465055e-05),
 ([40, 20], 3.321805406812928e-05),
 ([45, 35], 3.36800759214384e-05),
 ([35, 45], 3.3881416857184374e-05),
 ([60, 45], 3.4214837523904865e-05),
 ([50, 30], 3.464116753093549e-05),
 ([40, 35], 3.4682147543207975e-05),
 ([60, 40], 3.49417562574672e-05),
 ([50, 45], 3.5890101844415764e-05),
 ([35, 30], 3.7502321356441824e-05),
 ([55, 20], 3.809