In [7]:
import pandas as pd
import math
import matplotlib.pyplot as plt
import keras_tuner as kt
import plotly.express as px
import numpy as np
import tensorflow as tf
import plotly.graph_objects as go
from tqdm import tqdm
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_percentage_error, mean_squared_error, mean_absolute_error, r2_score
from tensorflow.keras.models import save_model, model_from_json, Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout
from tensorflow import keras
from keras_tuner.tuners import RandomSearch
from keras.models import load_model
from plotly.subplots import make_subplots
from keras_tuner.engine.hyperparameters import HyperParameters
from typing import Union
import warnings
warnings.simplefilter("ignore", UserWarning)

In [8]:
df = pd.read_csv('../../data/cleaned_data.csv')
data = df['close'].values
data = data.reshape(-1,1)

In [9]:
def min_max_scale(train: np.array, val:np.array, test: np.array) -> Union[MinMaxScaler, np.array, np.array, np.array]:
    """ Tranform the train and test data into min max scale of train data"""
    scaler = MinMaxScaler()
    scaler = scaler.fit(train)
    train_normalized = scaler.transform(train)
    test_normalized = scaler.transform(test)
    val_normalized = scaler.transform(val)
    return scaler, train_normalized, val_normalized, test_normalized

def data_divider(data: np.array, threshold: int):
    """ This functions divideds the data (close price) into 80 20 ration for test and train data """
    train_test_divider = int(len(data)*threshold)
    training_data, testing_data = data[:train_test_divider], data[train_test_divider:]
    return training_data, testing_data

def sliding_window(data: [], window_length: int, pred_len: int = 1) -> Union[np.array, np.array]:
    """ 
    This function creates a sliding window pattern from the data given and window length given.
    For example:
    Data = [[1],[2],[3],[4],[5],[6]]
    sliding window = 2
    pred_len = 1
    X = [[[1],[2]],[[2],[3]],[[3],[4]],[[4],[5]]]
    Y = [[3],[4],[5],[6]]
    
    """
    X = []
    Y = []
    for i in range(len(data) - window_length):
        input_end = i + window_length
        output_end = input_end + pred_len
        if output_end > len(data):
            break
        X.append(data[i: input_end])
        Y.append(data[input_end: output_end])
    
    return np.array(X), np.array(Y)

def model_builder(hp):
    """ Keras hyper paramter tuner model builder"""
    hp_dropout = hp.Choice('dropout', values=[0.05, 0.1, 0.2])  
    model = Sequential()
    model.add(LSTM(hp.Int('first_layer_neurons', min_value=32, max_value=512, step=32), return_sequences=True))
    for i in range(hp.Int('n_layers', 1, 4)):
        model.add(LSTM(hp.Int(f'lstm_{i}_units',min_value=32,max_value=512,step=32),return_sequences=True))
    model.add(Dropout(hp_dropout))
    model.add(LSTM(hp.Int('second_layer_neurons', min_value=32, max_value=512, step=32)))
    model.add(Dense(1))
    hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])

    model.compile(optimizer=keras.optimizers.Adam(learning_rate=hp_learning_rate),
                  loss='mse',
                  metrics='mse')

    return model

def show_best_hyperparamters(best_hps):
    print('Best Hyper Parameters\n')
    print('Layer 1 neuron: ', best_hps.get('first_layer_neurons'))
    print('Layer 2 neuron: ' , best_hps.get('second_layer_neurons'))
    print('n_layers: ' , best_hps.get('n_layer'))
    print('learning_rate: ', best_hps.get('learning_rate'))
    print('Dropout rate: ', best_hps.get('dropout'))
    
def calculate_metrics(test: np.ndarray, predict: np.ndarray) -> float:
    """."""
    RMSE = mean_squared_error(test.flatten(), predict.flatten(), squared=False)
    MSE = mean_squared_error(test.flatten(), predict.flatten())
    MAE = mean_absolute_error(test.flatten(), predict.flatten())
    MAPE = mean_absolute_percentage_error(test.flatten(), predict.flatten())
    r2 = r2_score(test.flatten(), predict.flatten())
    print('mse: {}, mae: {}, rmse: {}, mape: {}, R2: {}'.format(MSE, MAE, RMSE, MAPE, r2))
    

In [11]:
lookback = 3
pred_len = 1
train, test = data_divider(data, 0.8)
val, test = data_divider(test, 0.5)
scaler, train_normalized, val_normalized, test_normalized = min_max_scale(train, val, test)
x_train, y_train = sliding_window(train_normalized, lookback, pred_len)
x_val, y_val = sliding_window(val_normalized, lookback, pred_len)
x_test, y_test = sliding_window(test_normalized, lookback, pred_len)

In [None]:
tuner = kt.RandomSearch(model_builder, objective='val_mse', max_trials=30, directory='my_dir', project_name='intro_to_kt2')
stop_early = keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)
tuner.search(x_train, y_train, epochs=100, validation_data=(x_val,y_val), callbacks=[stop_early])

Trial 2 Complete [00h 24m 02s]
val_mse: 0.000210985861485824

Best val_mse So Far: 4.985070336260833e-05
Total elapsed time: 00h 24m 02s

Search: Running Trial #3

Value             |Best Value So Far |Hyperparameter
tanh              |tanh              |activationfunc
0.2               |0.05              |dropout
256               |192               |first_layer_neurons
2                 |3                 |n_layers
224               |64                |lstm_0_units
64                |256               |second_layer_neurons
0.01              |0.01              |learning_rate
384               |32                |lstm_1_units
352               |32                |lstm_2_units

Epoch 1/100
  67/1032 [>.............................] - ETA: 3:02 - loss: 0.1886 - mse: 0.1886

In [None]:
best_hps = tuner.get_best_hyperparameters()[0]
show_best_hyperparamters(best_hps)

In [14]:
def model():
    
#     np.random.seed(1234)
#     tf.random.set_seed(1234)
    
    model = keras.Sequential()
    model.add(LSTM(100, activation= 'relu', return_sequences=True))
    model.add(Dropout(0.2))
    model.add(LSTM(100, activation= 'relu'))
    model.add(Dense(pred_len))

    model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.00001),
                  loss='mse')

    return model

In [15]:
# Build the model with the best hp.
# model = model_builder(best_hps)

model = model()
# stop_training_early = keras.callbacks.EarlyStopping()
stop_training_early = keras.callbacks.EarlyStopping(monitor="val_loss", patience = 2)
history = model.fit(x_train, y_train, epochs=1 , verbose=1, shuffle=False, validation_data = (x_val, y_val), callbacks=[stop_training_early])

ValueError: in user code:

    File "C:\Users\Athra\anaconda3\envs\lstm\lib\site-packages\keras\engine\training.py", line 853, in train_function  *
        return step_function(self, iterator)
    File "C:\Users\Athra\anaconda3\envs\lstm\lib\site-packages\keras\engine\training.py", line 842, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\Users\Athra\anaconda3\envs\lstm\lib\site-packages\keras\engine\training.py", line 835, in run_step  **
        outputs = model.train_step(data)
    File "C:\Users\Athra\anaconda3\envs\lstm\lib\site-packages\keras\engine\training.py", line 787, in train_step
        y_pred = self(x, training=True)
    File "C:\Users\Athra\anaconda3\envs\lstm\lib\site-packages\keras\engine\base_layer.py", line 1020, in __call__
        input_spec.assert_input_compatibility(self.input_spec, inputs, self.name)
    File "C:\Users\Athra\anaconda3\envs\lstm\lib\site-packages\keras\engine\input_spec.py", line 266, in assert_input_compatibility
        raise ValueError('Input ' + str(input_index) +

    ValueError: Input 0 is incompatible with layer sequential: expected shape=(None, None, 4), found shape=(None, 3, 1)


In [None]:
model.summary()

In [None]:
# Build the model with the best hp.
# model = model_builder(best_hps)

model = model()
# stop_training_early = keras.callbacks.EarlyStopping()
stop_training_early = keras.callbacks.EarlyStopping(monitor="val_loss", patience = 2)
history = model.fit(x_train, y_train, epochs=1 , verbose=1, shuffle=False, validation_data = (x_val, y_val), callbacks=[stop_training_early])

In [None]:
model.summary()

In [None]:
# Build the model with the best hp.
# model = model_builder(best_hps)

model = model()
# stop_training_early = keras.callbacks.EarlyStopping()
stop_training_early = keras.callbacks.EarlyStopping(monitor="val_loss", patience = 2)
history = model.fit(x_train, y_train, epochs=1 , verbose=1, shuffle=False, validation_data = (x_val, y_val), callbacks=[stop_training_early])

In [None]:
model.summary()

In [9]:
# Build the model with the best hp.
# model = model_builder(best_hps)

model = model()
# stop_training_early = keras.callbacks.EarlyStopping()
stop_training_early = keras.callbacks.EarlyStopping(monitor="val_loss", patience = 2)
history = model.fit(x_train, y_train, epochs=1 , verbose=1, shuffle=False, validation_data = (x_val, y_val), callbacks=[stop_training_early])



In [12]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_2 (LSTM)                (None, 6, 100)            40800     
_________________________________________________________________
dropout_1 (Dropout)          (None, 6, 100)            0         
_________________________________________________________________
lstm_3 (LSTM)                (None, 100)               80400     
_________________________________________________________________
dense_1 (Dense)              (None, 2)                 202       
Total params: 121,402
Trainable params: 121,402
Non-trainable params: 0
_________________________________________________________________


In [None]:
plt.plot(history.history['loss'], label='train loss')
plt.plot(history.history['val_loss'], label='validation loss')
plt.legend()

In [None]:
# model = load_model('saved_model/hourly_model')
# model.save('saved_model/hourly_100_model')
# model.summary()

## Prediction

In [None]:
y_predict = model.predict(x_test)

In [None]:
y_test.shape

In [None]:
y_predict.shape

In [None]:
from math import sqrt
def evaluate_rmse(actual, predicted):
    scores = list()
    # calculate an RMSE score for each day
    for i in range(actual.shape[1]):
        # calculate mse
        mse = mean_squared_error(actual[:, i], predicted[:, i])
        # calculate rmse
        rmse = sqrt(mse)
        # store
        scores.append(rmse)
    # calculate overall RMSE
    s = 0
    for row in range(actual.shape[0]):
        for col in range(actual.shape[1]):
            s += (actual[row, col] - predicted[row, col])**2
    score = sqrt(s / (actual.shape[0] * actual.shape[1]))
    return score, scores

def evaluate_mae(actual, predicted):
    scores = list()
    # calculate an RMSE score for each day
    for i in range(actual.shape[1]):
        # calculate mse
        mae = mean_absolute_error(actual[:, i], predicted[:, i])
        scores.append(mae)
    # calculate overall RMSE
    s = 0
    for row in range(actual.shape[0]):
        for col in range(actual.shape[1]):
            s += abs(actual[row, col] - predicted[row, col])
    score = s / (actual.shape[0] * actual.shape[1])
    return score, scores

In [None]:
evaluate_rmse(y_test, y_predict)

In [None]:
evaluate_mae(y_test, y_predict)

In [None]:
calculate_metrics(y_test, y_predict)

In [None]:
# # plt.plot(y_test.flatten(), label='test')
# # plt.plot(y_predict.flatten(), 'r-', label='predict')
# # plt.legend()
# fig = go.Figure()
# fig.add_trace(go.Scatter(y = y_test.flatten(), name = 'Actual'))
# fig.add_trace(go.Scatter(y = y_predict.flatten(), name = 'Predict'))