# Stock market predictor
[link to tutorial](https://www.thepythoncode.com/article/stock-price-prediction-in-python-using-tensorflow-2-and-keras)

In [14]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Dropout, Bidirectional
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from yahoo_fin import stock_info as si
from collections import deque

import os
import numpy as np
import pandas as pd
import random

In [15]:
# set seed, so we cant get the same results after rerunning
np.random.seed(314)
tf.random.set_seed(314)
random.seed(314)


In [16]:
def shuffle_in_unison(a, b):
    # shuffle two arrays in the same away
    state = np.random.get_state()
    np.random.shuffle(a)
    np.random.set_state(state)
    np.random.shuffle(b)

In [28]:
def shuffle_in_unison(a, b):
    # shuffle two arrays in the same way
    state = np.random.get_state()
    np.random.shuffle(a)
    np.random.set_state(state)
    np.random.shuffle(b)

def load_data(ticker, n_steps=50, scale=True, shuffle=True, lookup_step=1, split_by_date=True,
                test_size=0.2, feature_columns=['adjclose', 'volume', 'open', 'high', 'low']):
    """
    Loads data from Yahoo Finance source, as well as scaling, shuffling, normalizing and splitting.
    Params:
        ticker (str/pd.DataFrame): the ticker you want to load, examples include AAPL, TESL, etc.
        n_steps (int): the historical sequence length (i.e window size) used to predict, default is 50
        scale (bool): whether to scale prices from 0 to 1, default is True
        shuffle (bool): whether to shuffle the dataset (both training & testing), default is True
        lookup_step (int): the future lookup step to predict, default is 1 (e.g next day)
        split_by_date (bool): whether we split the dataset into training/testing by date, setting it 
            to False will split datasets in a random way
        test_size (float): ratio for test data, default is 0.2 (20% testing data)
        feature_columns (list): the list of features to use to feed into the model, default is everything grabbed from yahoo_fin
    """
    # see if ticker is already a loaded stock from yahoo finance
    if isinstance(ticker, str):
        # load it from yahoo_fin library
        df = si.get_data(ticker)
    elif isinstance(ticker, pd.DataFrame):
        # already loaded, use it directly
        df = ticker
    else:
        raise TypeError("ticker can be either a str or a `pd.DataFrame` instances")
    # this will contain all the elements we want to return from this function
    result = {}
    # we will also return the original dataframe itself
    result['df'] = df.copy()
    # make sure that the passed feature_columns exist in the dataframe
    for col in feature_columns:
        assert col in df.columns, f"'{col}' does not exist in the dataframe."
    # add date as a column
    if "date" not in df.columns:
        df["date"] = df.index
    if scale:
        column_scaler = {}
        # scale the data (prices) from 0 to 1
        for column in feature_columns:
            scaler = preprocessing.MinMaxScaler()
            df[column] = scaler.fit_transform(np.expand_dims(df[column].values, axis=1))
            column_scaler[column] = scaler
        # add the MinMaxScaler instances to the result returned
        result["column_scaler"] = column_scaler
    # add the target column (label) by shifting by `lookup_step`
    df['future'] = df['adjclose'].shift(-lookup_step)
    # last `lookup_step` columns contains NaN in future column
    # get them before droping NaNs
    last_sequence = np.array(df[feature_columns].tail(lookup_step))
    # drop NaNs
    df.dropna(inplace=True)
    sequence_data = []
    sequences = deque(maxlen=n_steps)
    for entry, target in zip(df[feature_columns + ["date"]].values, df['future'].values):
        sequences.append(entry)
        if len(sequences) == n_steps:
            sequence_data.append([np.array(sequences), target])
    # get the last sequence by appending the last `n_step` sequence with `lookup_step` sequence
    # for instance, if n_steps=50 and lookup_step=10, last_sequence should be of 60 (that is 50+10) length
    # this last_sequence will be used to predict future stock prices that are not available in the dataset
    last_sequence = list([s[:len(feature_columns)] for s in sequences]) + list(last_sequence)
    last_sequence = np.array(last_sequence).astype(np.float32)
    # add to result
    result['last_sequence'] = last_sequence
    # construct the X's and y's
    X, y = [], []
    for seq, target in sequence_data:
        X.append(seq)
        y.append(target)
    # convert to numpy arrays
    X = np.array(X)
    y = np.array(y)
    if split_by_date:
        # split the dataset into training & testing sets by date (not randomly splitting)
        train_samples = int((1 - test_size) * len(X))
        result["X_train"] = X[:train_samples]
        result["y_train"] = y[:train_samples]
        result["X_test"]  = X[train_samples:]
        result["y_test"]  = y[train_samples:]
        if shuffle:
            # shuffle the datasets for training (if shuffle parameter is set)
            shuffle_in_unison(result["X_train"], result["y_train"])
            shuffle_in_unison(result["X_test"], result["y_test"])
    else:    
        # split the dataset randomly
        result["X_train"], result["X_test"], result["y_train"], result["y_test"] = train_test_split(X, y, 
                                                                                test_size=test_size, shuffle=shuffle)
    # get the list of test set dates
    dates = result["X_test"][:, -1, -1]
    # retrieve test features from the original dataframe
    result["test_df"] = result["df"].loc[dates]
    # remove duplicated dates in the testing dataframe
    result["test_df"] = result["test_df"][~result["test_df"].index.duplicated(keep='first')]
    # remove dates from the training/testing sets & convert to float32
    result["X_train"] = result["X_train"][:, :, :len(feature_columns)].astype(np.float32)
    result["X_test"] = result["X_test"][:, :, :len(feature_columns)].astype(np.float32)
    return result

In [33]:
def create_model(sequence_length, n_features, units=256, cell=LSTM, n_layers=2, dropout=0.3,
                loss="mean_absolute_error", optimizer="rmsprop", bidirectional=False):
    model = Sequential()
    for i in range(n_layers):
        if i == 0:
            # first layer
            if bidirectional:
                model.add(Bidirectional(cell(units, return_sequences=True), batch_input_shape=(None, sequence_length, n_features)))
            else:
                model.add(cell(units, return_sequences=True, batch_input_shape=(None, sequence_length, n_features)))
        elif i == n_layers - 1:
            # last layer
            if bidirectional:
                model.add(Bidirectional(cell(units, return_sequences=False)))
            else:
                model.add(cell(units, return_sequences=False))
        else:
            # hidden layers
            if bidirectional:
                model.add(Bidirectional(cell(units, return_sequences=True)))
            else:
                model.add(cell(units, return_sequences=True))
        # add dropout after each layer
        model.add(Dropout(dropout))
    model.add(Dense(1, activation="linear"))
    model.compile(loss=loss, metrics=["mean_absolute_error"], optimizer=optimizer)
    return model

In [34]:
import os
import time
from tensorflow.keras.layers import LSTM

# Window size or the sequence length
N_STEPS = 50
# Lookup step, 1 is the next day
LOOKUP_STEP = 15
# whether to scale feature columns & output price as well
SCALE = True
scale_str = f"sc-{int(SCALE)}"
# whether to shuffle the dataset
SHUFFLE = True
shuffle_str = f"sh-{int(SHUFFLE)}"
# whether to split the training/testing set by date
SPLIT_BY_DATE = False
split_by_date_str = f"sbd-{int(SPLIT_BY_DATE)}"
# test ratio size, 0.2 is 20%
TEST_SIZE = 0.2
# features to use
FEATURE_COLUMNS = ["adjclose", "volume", "open", "high", "low"]
# date now
date_now = time.strftime("%Y-%m-%d")
### model parameters
N_LAYERS = 2
# LSTM cell
CELL = LSTM
# 256 LSTM neurons
UNITS = 256
# 40% dropout
DROPOUT = 0.4
# whether to use bidirectional RNNs
BIDIRECTIONAL = False
### training parameters
# mean absolute error loss
# LOSS = "mae"
# huber loss
LOSS = "huber_loss"
OPTIMIZER = "adam"
BATCH_SIZE = 64
EPOCHS = 500
# Amazon stock market
ticker = "AMZN"
ticker_data_filename = os.path.join("data", f"{ticker}_{date_now}.csv")
# model name to save, making it as unique as possible based on parameters
model_name = f"{date_now}_{ticker}-{shuffle_str}-{scale_str}-{split_by_date_str}-\
{LOSS}-{OPTIMIZER}-{CELL.__name__}-seq-{N_STEPS}-step-{LOOKUP_STEP}-layers-{N_LAYERS}-units-{UNITS}"
if BIDIRECTIONAL:
    model_name += "-b"

In [35]:
# create these folders if they does not exist
if not os.path.isdir("results"):
    os.mkdir("results")
if not os.path.isdir("logs"):
    os.mkdir("logs")
if not os.path.isdir("data"):
    os.mkdir("data")

In [36]:
# load the data
data = load_data(ticker, N_STEPS, scale=SCALE, split_by_date=SPLIT_BY_DATE, 
                shuffle=SHUFFLE, lookup_step=LOOKUP_STEP, test_size=TEST_SIZE, 
                feature_columns=FEATURE_COLUMNS)
# save the dataframe
data["df"].to_csv(ticker_data_filename)
# construct the model
model = create_model(N_STEPS, len(FEATURE_COLUMNS), loss=LOSS, units=UNITS, cell=CELL, n_layers=N_LAYERS,
                    dropout=DROPOUT, optimizer=OPTIMIZER, bidirectional=BIDIRECTIONAL)
# some tensorflow callbacks
checkpointer = ModelCheckpoint(os.path.join("results", model_name + ".h5"), save_weights_only=True, save_best_only=True, verbose=1)
tensorboard = TensorBoard(log_dir=os.path.join("logs", model_name))
# train the model and save the weights whenever we see 
# a new optimal model using ModelCheckpoint
history = model.fit(data["X_train"], data["y_train"],
                    batch_size=BATCH_SIZE,
                    epochs=EPOCHS,
                    validation_data=(data["X_test"], data["y_test"]),
                    callbacks=[checkpointer, tensorboard],
                    verbose=1)

Epoch 1/500
Instructions for updating:
use `tf.profiler.experimental.stop` instead.
Epoch 00001: val_loss improved from inf to 0.00022, saving model to results\2021-02-23_AMZN-sh-1-sc-1-sbd-0-huber_loss-adam-LSTM-seq-50-step-15-layers-2-units-256.h5
Epoch 2/500
Epoch 00002: val_loss improved from 0.00022 to 0.00014, saving model to results\2021-02-23_AMZN-sh-1-sc-1-sbd-0-huber_loss-adam-LSTM-seq-50-step-15-layers-2-units-256.h5
Epoch 3/500
Epoch 00003: val_loss did not improve from 0.00014
Epoch 4/500
Epoch 00004: val_loss did not improve from 0.00014
Epoch 5/500
Epoch 00005: val_loss did not improve from 0.00014
Epoch 6/500
Epoch 00006: val_loss did not improve from 0.00014
Epoch 7/500
Epoch 00007: val_loss did not improve from 0.00014
Epoch 8/500
Epoch 00008: val_loss did not improve from 0.00014
Epoch 9/500
Epoch 00009: val_loss did not improve from 0.00014
Epoch 10/500
Epoch 00010: val_loss did not improve from 0.00014
Epoch 11/500
Epoch 00011: val_loss did not improve from 0.00014

Epoch 00023: val_loss did not improve from 0.00013
Epoch 24/500
Epoch 00024: val_loss did not improve from 0.00013
Epoch 25/500
Epoch 00025: val_loss did not improve from 0.00013
Epoch 26/500
Epoch 00026: val_loss did not improve from 0.00013
Epoch 27/500
Epoch 00027: val_loss improved from 0.00013 to 0.00013, saving model to results\2021-02-23_AMZN-sh-1-sc-1-sbd-0-huber_loss-adam-LSTM-seq-50-step-15-layers-2-units-256.h5
Epoch 28/500
Epoch 00028: val_loss did not improve from 0.00013
Epoch 29/500
Epoch 00029: val_loss did not improve from 0.00013
Epoch 30/500
Epoch 00030: val_loss did not improve from 0.00013
Epoch 31/500
Epoch 00031: val_loss did not improve from 0.00013
Epoch 32/500
Epoch 00032: val_loss did not improve from 0.00013
Epoch 33/500
Epoch 00033: val_loss improved from 0.00013 to 0.00012, saving model to results\2021-02-23_AMZN-sh-1-sc-1-sbd-0-huber_loss-adam-LSTM-seq-50-step-15-layers-2-units-256.h5
Epoch 34/500
Epoch 00034: val_loss did not improve from 0.00012
Epoch 3

Epoch 47/500
Epoch 00047: val_loss did not improve from 0.00012
Epoch 48/500
Epoch 00048: val_loss did not improve from 0.00012
Epoch 49/500
Epoch 00049: val_loss did not improve from 0.00012
Epoch 50/500
Epoch 00050: val_loss did not improve from 0.00012
Epoch 51/500
Epoch 00051: val_loss did not improve from 0.00012
Epoch 52/500
Epoch 00052: val_loss improved from 0.00012 to 0.00011, saving model to results\2021-02-23_AMZN-sh-1-sc-1-sbd-0-huber_loss-adam-LSTM-seq-50-step-15-layers-2-units-256.h5
Epoch 53/500
Epoch 00053: val_loss did not improve from 0.00011
Epoch 54/500
Epoch 00054: val_loss improved from 0.00011 to 0.00011, saving model to results\2021-02-23_AMZN-sh-1-sc-1-sbd-0-huber_loss-adam-LSTM-seq-50-step-15-layers-2-units-256.h5
Epoch 55/500
Epoch 00055: val_loss did not improve from 0.00011
Epoch 56/500
Epoch 00056: val_loss did not improve from 0.00011
Epoch 57/500
Epoch 00057: val_loss did not improve from 0.00011
Epoch 58/500
Epoch 00058: val_loss did not improve from 0.

Epoch 71/500
Epoch 00071: val_loss improved from 0.00009 to 0.00009, saving model to results\2021-02-23_AMZN-sh-1-sc-1-sbd-0-huber_loss-adam-LSTM-seq-50-step-15-layers-2-units-256.h5
Epoch 72/500
Epoch 00072: val_loss did not improve from 0.00009
Epoch 73/500
Epoch 00073: val_loss improved from 0.00009 to 0.00009, saving model to results\2021-02-23_AMZN-sh-1-sc-1-sbd-0-huber_loss-adam-LSTM-seq-50-step-15-layers-2-units-256.h5
Epoch 74/500
Epoch 00074: val_loss did not improve from 0.00009
Epoch 75/500
Epoch 00075: val_loss did not improve from 0.00009
Epoch 76/500
Epoch 00076: val_loss did not improve from 0.00009
Epoch 77/500
Epoch 00077: val_loss did not improve from 0.00009
Epoch 78/500
Epoch 00078: val_loss improved from 0.00009 to 0.00009, saving model to results\2021-02-23_AMZN-sh-1-sc-1-sbd-0-huber_loss-adam-LSTM-seq-50-step-15-layers-2-units-256.h5
Epoch 79/500
Epoch 00079: val_loss did not improve from 0.00009
Epoch 80/500
Epoch 00080: val_loss did not improve from 0.00009
Epo

Epoch 95/500
Epoch 00095: val_loss did not improve from 0.00009
Epoch 96/500
Epoch 00096: val_loss did not improve from 0.00009
Epoch 97/500
Epoch 00097: val_loss did not improve from 0.00009
Epoch 98/500
Epoch 00098: val_loss did not improve from 0.00009
Epoch 99/500
Epoch 00099: val_loss did not improve from 0.00009
Epoch 100/500
Epoch 00100: val_loss did not improve from 0.00009
Epoch 101/500
Epoch 00101: val_loss did not improve from 0.00009
Epoch 102/500
Epoch 00102: val_loss did not improve from 0.00009
Epoch 103/500
Epoch 00103: val_loss did not improve from 0.00009
Epoch 104/500
Epoch 00104: val_loss did not improve from 0.00009
Epoch 105/500
Epoch 00105: val_loss did not improve from 0.00009
Epoch 106/500
Epoch 00106: val_loss did not improve from 0.00009
Epoch 107/500
Epoch 00107: val_loss did not improve from 0.00009
Epoch 108/500
Epoch 00108: val_loss did not improve from 0.00009
Epoch 109/500
Epoch 00109: val_loss did not improve from 0.00009
Epoch 110/500
Epoch 00110: val

Epoch 00120: val_loss did not improve from 0.00009
Epoch 121/500
Epoch 00121: val_loss did not improve from 0.00009
Epoch 122/500
Epoch 00122: val_loss did not improve from 0.00009
Epoch 123/500
Epoch 00123: val_loss improved from 0.00009 to 0.00009, saving model to results\2021-02-23_AMZN-sh-1-sc-1-sbd-0-huber_loss-adam-LSTM-seq-50-step-15-layers-2-units-256.h5
Epoch 124/500
Epoch 00124: val_loss did not improve from 0.00009
Epoch 125/500
Epoch 00125: val_loss did not improve from 0.00009
Epoch 126/500
Epoch 00126: val_loss did not improve from 0.00009
Epoch 127/500
Epoch 00127: val_loss did not improve from 0.00009
Epoch 128/500
Epoch 00128: val_loss did not improve from 0.00009
Epoch 129/500
Epoch 00129: val_loss did not improve from 0.00009
Epoch 130/500
Epoch 00130: val_loss did not improve from 0.00009
Epoch 131/500
Epoch 00131: val_loss did not improve from 0.00009
Epoch 132/500
Epoch 00132: val_loss did not improve from 0.00009
Epoch 133/500
Epoch 00133: val_loss did not improv

Epoch 145/500
Epoch 00145: val_loss did not improve from 0.00009
Epoch 146/500
Epoch 00146: val_loss did not improve from 0.00009
Epoch 147/500
Epoch 00147: val_loss did not improve from 0.00009
Epoch 148/500
Epoch 00148: val_loss did not improve from 0.00009
Epoch 149/500
Epoch 00149: val_loss did not improve from 0.00009
Epoch 150/500
Epoch 00150: val_loss did not improve from 0.00009
Epoch 151/500
Epoch 00151: val_loss did not improve from 0.00009
Epoch 152/500
Epoch 00152: val_loss did not improve from 0.00009
Epoch 153/500
Epoch 00153: val_loss did not improve from 0.00009
Epoch 154/500
Epoch 00154: val_loss did not improve from 0.00009
Epoch 155/500
Epoch 00155: val_loss did not improve from 0.00009
Epoch 156/500
Epoch 00156: val_loss did not improve from 0.00009
Epoch 157/500
Epoch 00157: val_loss did not improve from 0.00009
Epoch 158/500
Epoch 00158: val_loss did not improve from 0.00009
Epoch 159/500
Epoch 00159: val_loss did not improve from 0.00009
Epoch 160/500
Epoch 00160

Epoch 00170: val_loss did not improve from 0.00009
Epoch 171/500
Epoch 00171: val_loss improved from 0.00009 to 0.00008, saving model to results\2021-02-23_AMZN-sh-1-sc-1-sbd-0-huber_loss-adam-LSTM-seq-50-step-15-layers-2-units-256.h5
Epoch 172/500
Epoch 00172: val_loss did not improve from 0.00008
Epoch 173/500
Epoch 00173: val_loss did not improve from 0.00008
Epoch 174/500
Epoch 00174: val_loss did not improve from 0.00008
Epoch 175/500
Epoch 00175: val_loss did not improve from 0.00008
Epoch 176/500
Epoch 00176: val_loss did not improve from 0.00008
Epoch 177/500
Epoch 00177: val_loss did not improve from 0.00008
Epoch 178/500
Epoch 00178: val_loss did not improve from 0.00008
Epoch 179/500
Epoch 00179: val_loss did not improve from 0.00008
Epoch 180/500
Epoch 00180: val_loss did not improve from 0.00008
Epoch 181/500
Epoch 00181: val_loss did not improve from 0.00008
Epoch 182/500
Epoch 00182: val_loss did not improve from 0.00008
Epoch 183/500
Epoch 00183: val_loss did not improv

Epoch 195/500
Epoch 00195: val_loss did not improve from 0.00008
Epoch 196/500
Epoch 00196: val_loss did not improve from 0.00008
Epoch 197/500
Epoch 00197: val_loss did not improve from 0.00008
Epoch 198/500
Epoch 00198: val_loss did not improve from 0.00008
Epoch 199/500
Epoch 00199: val_loss did not improve from 0.00008
Epoch 200/500
Epoch 00200: val_loss did not improve from 0.00008
Epoch 201/500
Epoch 00201: val_loss improved from 0.00008 to 0.00008, saving model to results\2021-02-23_AMZN-sh-1-sc-1-sbd-0-huber_loss-adam-LSTM-seq-50-step-15-layers-2-units-256.h5
Epoch 202/500
Epoch 00202: val_loss did not improve from 0.00008
Epoch 203/500
Epoch 00203: val_loss did not improve from 0.00008
Epoch 204/500
Epoch 00204: val_loss did not improve from 0.00008
Epoch 205/500
Epoch 00205: val_loss did not improve from 0.00008
Epoch 206/500
Epoch 00206: val_loss did not improve from 0.00008
Epoch 207/500
Epoch 00207: val_loss did not improve from 0.00008
Epoch 208/500
Epoch 00208: val_loss 

Epoch 220/500
Epoch 00220: val_loss did not improve from 0.00008
Epoch 221/500
Epoch 00221: val_loss did not improve from 0.00008
Epoch 222/500
Epoch 00222: val_loss did not improve from 0.00008
Epoch 223/500
Epoch 00223: val_loss did not improve from 0.00008
Epoch 224/500
Epoch 00224: val_loss did not improve from 0.00008
Epoch 225/500
Epoch 00225: val_loss did not improve from 0.00008
Epoch 226/500
Epoch 00226: val_loss did not improve from 0.00008
Epoch 227/500
Epoch 00227: val_loss did not improve from 0.00008
Epoch 228/500
Epoch 00228: val_loss did not improve from 0.00008
Epoch 229/500
Epoch 00229: val_loss did not improve from 0.00008
Epoch 230/500
Epoch 00230: val_loss did not improve from 0.00008
Epoch 231/500
Epoch 00231: val_loss did not improve from 0.00008
Epoch 232/500
Epoch 00232: val_loss did not improve from 0.00008
Epoch 233/500
Epoch 00233: val_loss did not improve from 0.00008
Epoch 234/500
Epoch 00234: val_loss did not improve from 0.00008
Epoch 235/500
Epoch 00235

Epoch 245/500
Epoch 00245: val_loss did not improve from 0.00008
Epoch 246/500
Epoch 00246: val_loss did not improve from 0.00008
Epoch 247/500
Epoch 00247: val_loss did not improve from 0.00008
Epoch 248/500
Epoch 00248: val_loss did not improve from 0.00008
Epoch 249/500
Epoch 00249: val_loss did not improve from 0.00008
Epoch 250/500
Epoch 00250: val_loss did not improve from 0.00008
Epoch 251/500
Epoch 00251: val_loss did not improve from 0.00008
Epoch 252/500
Epoch 00252: val_loss did not improve from 0.00008
Epoch 253/500
Epoch 00253: val_loss did not improve from 0.00008
Epoch 254/500
Epoch 00254: val_loss did not improve from 0.00008
Epoch 255/500
Epoch 00255: val_loss did not improve from 0.00008
Epoch 256/500
Epoch 00256: val_loss did not improve from 0.00008
Epoch 257/500
Epoch 00257: val_loss did not improve from 0.00008
Epoch 258/500
Epoch 00258: val_loss did not improve from 0.00008
Epoch 259/500
Epoch 00259: val_loss did not improve from 0.00008
Epoch 260/500
Epoch 00260

Epoch 00270: val_loss did not improve from 0.00008
Epoch 271/500
Epoch 00271: val_loss did not improve from 0.00008
Epoch 272/500
Epoch 00272: val_loss did not improve from 0.00008
Epoch 273/500
Epoch 00273: val_loss did not improve from 0.00008
Epoch 274/500
Epoch 00274: val_loss did not improve from 0.00008
Epoch 275/500
Epoch 00275: val_loss did not improve from 0.00008
Epoch 276/500
Epoch 00276: val_loss did not improve from 0.00008
Epoch 277/500
Epoch 00277: val_loss did not improve from 0.00008
Epoch 278/500
Epoch 00278: val_loss improved from 0.00008 to 0.00008, saving model to results\2021-02-23_AMZN-sh-1-sc-1-sbd-0-huber_loss-adam-LSTM-seq-50-step-15-layers-2-units-256.h5
Epoch 279/500
Epoch 00279: val_loss did not improve from 0.00008
Epoch 280/500
Epoch 00280: val_loss did not improve from 0.00008
Epoch 281/500
Epoch 00281: val_loss did not improve from 0.00008
Epoch 282/500
Epoch 00282: val_loss did not improve from 0.00008
Epoch 283/500
Epoch 00283: val_loss did not improv

Epoch 295/500
Epoch 00295: val_loss did not improve from 0.00008
Epoch 296/500
Epoch 00296: val_loss did not improve from 0.00008
Epoch 297/500
Epoch 00297: val_loss did not improve from 0.00008
Epoch 298/500
Epoch 00298: val_loss did not improve from 0.00008
Epoch 299/500
Epoch 00299: val_loss did not improve from 0.00008
Epoch 300/500
Epoch 00300: val_loss did not improve from 0.00008
Epoch 301/500
Epoch 00301: val_loss did not improve from 0.00008
Epoch 302/500
Epoch 00302: val_loss did not improve from 0.00008
Epoch 303/500
Epoch 00303: val_loss did not improve from 0.00008
Epoch 304/500
Epoch 00304: val_loss did not improve from 0.00008
Epoch 305/500
Epoch 00305: val_loss did not improve from 0.00008
Epoch 306/500
Epoch 00306: val_loss improved from 0.00008 to 0.00008, saving model to results\2021-02-23_AMZN-sh-1-sc-1-sbd-0-huber_loss-adam-LSTM-seq-50-step-15-layers-2-units-256.h5
Epoch 307/500
Epoch 00307: val_loss did not improve from 0.00008
Epoch 308/500
Epoch 00308: val_loss 

Epoch 320/500
Epoch 00320: val_loss did not improve from 0.00008
Epoch 321/500
Epoch 00321: val_loss did not improve from 0.00008
Epoch 322/500
Epoch 00322: val_loss did not improve from 0.00008
Epoch 323/500
Epoch 00323: val_loss did not improve from 0.00008
Epoch 324/500
Epoch 00324: val_loss did not improve from 0.00008
Epoch 325/500
Epoch 00325: val_loss did not improve from 0.00008
Epoch 326/500
Epoch 00326: val_loss did not improve from 0.00008
Epoch 327/500
Epoch 00327: val_loss did not improve from 0.00008
Epoch 328/500
Epoch 00328: val_loss did not improve from 0.00008
Epoch 329/500
Epoch 00329: val_loss did not improve from 0.00008
Epoch 330/500
Epoch 00330: val_loss did not improve from 0.00008
Epoch 331/500
Epoch 00331: val_loss did not improve from 0.00008
Epoch 332/500
Epoch 00332: val_loss did not improve from 0.00008
Epoch 333/500
Epoch 00333: val_loss did not improve from 0.00008
Epoch 334/500
Epoch 00334: val_loss did not improve from 0.00008
Epoch 335/500
Epoch 00335

Epoch 345/500
Epoch 00345: val_loss did not improve from 0.00008
Epoch 346/500
Epoch 00346: val_loss did not improve from 0.00008
Epoch 347/500
Epoch 00347: val_loss did not improve from 0.00008
Epoch 348/500
Epoch 00348: val_loss did not improve from 0.00008
Epoch 349/500
Epoch 00349: val_loss did not improve from 0.00008
Epoch 350/500
Epoch 00350: val_loss did not improve from 0.00008
Epoch 351/500
Epoch 00351: val_loss did not improve from 0.00008
Epoch 352/500
Epoch 00352: val_loss did not improve from 0.00008
Epoch 353/500
Epoch 00353: val_loss did not improve from 0.00008
Epoch 354/500
Epoch 00354: val_loss did not improve from 0.00008
Epoch 355/500
Epoch 00355: val_loss improved from 0.00008 to 0.00008, saving model to results\2021-02-23_AMZN-sh-1-sc-1-sbd-0-huber_loss-adam-LSTM-seq-50-step-15-layers-2-units-256.h5
Epoch 356/500
Epoch 00356: val_loss did not improve from 0.00008
Epoch 357/500
Epoch 00357: val_loss did not improve from 0.00008
Epoch 358/500
Epoch 00358: val_loss 

Epoch 370/500
Epoch 00370: val_loss did not improve from 0.00008
Epoch 371/500
Epoch 00371: val_loss did not improve from 0.00008
Epoch 372/500
Epoch 00372: val_loss did not improve from 0.00008
Epoch 373/500
Epoch 00373: val_loss did not improve from 0.00008
Epoch 374/500
Epoch 00374: val_loss did not improve from 0.00008
Epoch 375/500
Epoch 00375: val_loss did not improve from 0.00008
Epoch 376/500
Epoch 00376: val_loss improved from 0.00008 to 0.00008, saving model to results\2021-02-23_AMZN-sh-1-sc-1-sbd-0-huber_loss-adam-LSTM-seq-50-step-15-layers-2-units-256.h5
Epoch 377/500
Epoch 00377: val_loss did not improve from 0.00008
Epoch 378/500
Epoch 00378: val_loss did not improve from 0.00008
Epoch 379/500
Epoch 00379: val_loss did not improve from 0.00008
Epoch 380/500
Epoch 00380: val_loss did not improve from 0.00008
Epoch 381/500
Epoch 00381: val_loss did not improve from 0.00008
Epoch 382/500
Epoch 00382: val_loss did not improve from 0.00008
Epoch 383/500
Epoch 00383: val_loss 

Epoch 395/500
Epoch 00395: val_loss did not improve from 0.00008
Epoch 396/500
Epoch 00396: val_loss did not improve from 0.00008
Epoch 397/500
Epoch 00397: val_loss did not improve from 0.00008
Epoch 398/500
Epoch 00398: val_loss did not improve from 0.00008
Epoch 399/500
Epoch 00399: val_loss did not improve from 0.00008
Epoch 400/500
Epoch 00400: val_loss did not improve from 0.00008
Epoch 401/500
Epoch 00401: val_loss improved from 0.00008 to 0.00008, saving model to results\2021-02-23_AMZN-sh-1-sc-1-sbd-0-huber_loss-adam-LSTM-seq-50-step-15-layers-2-units-256.h5
Epoch 402/500
Epoch 00402: val_loss did not improve from 0.00008
Epoch 403/500
Epoch 00403: val_loss did not improve from 0.00008
Epoch 404/500
Epoch 00404: val_loss did not improve from 0.00008
Epoch 405/500
Epoch 00405: val_loss improved from 0.00008 to 0.00007, saving model to results\2021-02-23_AMZN-sh-1-sc-1-sbd-0-huber_loss-adam-LSTM-seq-50-step-15-layers-2-units-256.h5
Epoch 406/500
Epoch 00406: val_loss did not imp

Epoch 420/500
Epoch 00420: val_loss did not improve from 0.00007
Epoch 421/500
Epoch 00421: val_loss did not improve from 0.00007
Epoch 422/500
Epoch 00422: val_loss did not improve from 0.00007
Epoch 423/500
Epoch 00423: val_loss did not improve from 0.00007
Epoch 424/500
Epoch 00424: val_loss did not improve from 0.00007
Epoch 425/500
Epoch 00425: val_loss did not improve from 0.00007
Epoch 426/500
Epoch 00426: val_loss did not improve from 0.00007
Epoch 427/500
Epoch 00427: val_loss did not improve from 0.00007
Epoch 428/500
Epoch 00428: val_loss did not improve from 0.00007
Epoch 429/500
Epoch 00429: val_loss did not improve from 0.00007
Epoch 430/500
Epoch 00430: val_loss did not improve from 0.00007
Epoch 431/500
Epoch 00431: val_loss did not improve from 0.00007
Epoch 432/500
Epoch 00432: val_loss did not improve from 0.00007
Epoch 433/500
Epoch 00433: val_loss did not improve from 0.00007
Epoch 434/500
Epoch 00434: val_loss did not improve from 0.00007
Epoch 435/500
Epoch 00435

Epoch 00445: val_loss did not improve from 0.00007
Epoch 446/500
Epoch 00446: val_loss did not improve from 0.00007
Epoch 447/500
Epoch 00447: val_loss did not improve from 0.00007
Epoch 448/500
Epoch 00448: val_loss did not improve from 0.00007
Epoch 449/500
Epoch 00449: val_loss did not improve from 0.00007
Epoch 450/500
Epoch 00450: val_loss did not improve from 0.00007
Epoch 451/500
Epoch 00451: val_loss did not improve from 0.00007
Epoch 452/500
Epoch 00452: val_loss did not improve from 0.00007
Epoch 453/500
Epoch 00453: val_loss did not improve from 0.00007
Epoch 454/500
Epoch 00454: val_loss did not improve from 0.00007
Epoch 455/500
Epoch 00455: val_loss did not improve from 0.00007
Epoch 456/500
Epoch 00456: val_loss did not improve from 0.00007
Epoch 457/500
Epoch 00457: val_loss did not improve from 0.00007
Epoch 458/500
Epoch 00458: val_loss did not improve from 0.00007
Epoch 459/500
Epoch 00459: val_loss did not improve from 0.00007
Epoch 460/500
Epoch 00460: val_loss did

Epoch 00470: val_loss did not improve from 0.00007
Epoch 471/500
Epoch 00471: val_loss did not improve from 0.00007
Epoch 472/500
Epoch 00472: val_loss did not improve from 0.00007
Epoch 473/500
Epoch 00473: val_loss did not improve from 0.00007
Epoch 474/500
Epoch 00474: val_loss did not improve from 0.00007
Epoch 475/500
Epoch 00475: val_loss did not improve from 0.00007
Epoch 476/500
Epoch 00476: val_loss did not improve from 0.00007
Epoch 477/500
Epoch 00477: val_loss did not improve from 0.00007
Epoch 478/500
Epoch 00478: val_loss did not improve from 0.00007
Epoch 479/500
Epoch 00479: val_loss did not improve from 0.00007
Epoch 480/500
Epoch 00480: val_loss did not improve from 0.00007
Epoch 481/500
Epoch 00481: val_loss did not improve from 0.00007
Epoch 482/500
Epoch 00482: val_loss did not improve from 0.00007
Epoch 483/500
Epoch 00483: val_loss did not improve from 0.00007
Epoch 484/500
Epoch 00484: val_loss did not improve from 0.00007
Epoch 485/500
Epoch 00485: val_loss did

Epoch 495/500
Epoch 00495: val_loss did not improve from 0.00007
Epoch 496/500
Epoch 00496: val_loss did not improve from 0.00007
Epoch 497/500
Epoch 00497: val_loss did not improve from 0.00007
Epoch 498/500
Epoch 00498: val_loss did not improve from 0.00007
Epoch 499/500
Epoch 00499: val_loss did not improve from 0.00007
Epoch 500/500
Epoch 00500: val_loss did not improve from 0.00007


In [37]:
import matplotlib.pyplot as plt

def plot_graph(test_df):
    """
    This function plots true close price along with predicted close price
    with blue and red colors respectively
    """
    plt.plot(test_df[f'true_adjclose_{LOOKUP_STEP}'], c='b')
    plt.plot(test_df[f'adjclose_{LOOKUP_STEP}'], c='r')
    plt.xlabel("Days")
    plt.ylabel("Price")
    plt.legend(["Actual Price", "Predicted Price"])
    plt.show()

In [38]:
def get_final_df(model, data):
    """
    This function takes the `model` and `data` dict to 
    construct a final dataframe that includes the features along 
    with true and predicted prices of the testing dataset
    """
    # if predicted future price is higher than the current, 
    # then calculate the true future price minus the current price, to get the buy profit
    buy_profit  = lambda current, true_future, pred_future: true_future - current if pred_future > current else 0
    # if the predicted future price is lower than the current price,
    # then subtract the true future price from the current price
    sell_profit = lambda current, true_future, pred_future: current - true_future if pred_future < current else 0
    X_test = data["X_test"]
    y_test = data["y_test"]
    # perform prediction and get prices
    y_pred = model.predict(X_test)
    if SCALE:
        y_test = np.squeeze(data["column_scaler"]["adjclose"].inverse_transform(np.expand_dims(y_test, axis=0)))
        y_pred = np.squeeze(data["column_scaler"]["adjclose"].inverse_transform(y_pred))
    test_df = data["test_df"]
    # add predicted future prices to the dataframe
    test_df[f"adjclose_{LOOKUP_STEP}"] = y_pred
    # add true future prices to the dataframe
    test_df[f"true_adjclose_{LOOKUP_STEP}"] = y_test
    # sort the dataframe by date
    test_df.sort_index(inplace=True)
    final_df = test_df
    # add the buy profit column
    final_df["buy_profit"] = list(map(buy_profit, 
                                    final_df["adjclose"], 
                                    final_df[f"adjclose_{LOOKUP_STEP}"], 
                                    final_df[f"true_adjclose_{LOOKUP_STEP}"])
                                    # since we don't have profit for last sequence, add 0's
                                    )
    # add the sell profit column
    final_df["sell_profit"] = list(map(sell_profit, 
                                    final_df["adjclose"], 
                                    final_df[f"adjclose_{LOOKUP_STEP}"], 
                                    final_df[f"true_adjclose_{LOOKUP_STEP}"])
                                    # since we don't have profit for last sequence, add 0's
                                    )
    return final_df

In [39]:
def predict(model, data):
    # retrieve the last sequence from data
    last_sequence = data["last_sequence"][-N_STEPS:]
    # expand dimension
    last_sequence = np.expand_dims(last_sequence, axis=0)
    # get the prediction (scaled from 0 to 1)
    prediction = model.predict(last_sequence)
    # get the price (by inverting the scaling)
    if SCALE:
        predicted_price = data["column_scaler"]["adjclose"].inverse_transform(prediction)[0][0]
    else:
        predicted_price = prediction[0][0]
    return predicted_price

In [40]:
# load optimal model weights from results folder
model_path = os.path.join("results", model_name) + ".h5"
model.load_weights(model_path)

# evaluate the model
loss, mae = model.evaluate(data["X_test"], data["y_test"], verbose=0)
# calculate the mean absolute error (inverse scaling)
if SCALE:
    mean_absolute_error = data["column_scaler"]["adjclose"].inverse_transform([[mae]])[0][0]
else:
    mean_absolute_error = mae

# get the final dataframe for the testing set
final_df = get_final_df(model, data)

# predict the future price
future_price = predict(model, data)

# we calculate the accuracy by counting the number of positive profits
accuracy_score = (len(final_df[final_df['sell_profit'] > 0]) + len(final_df[final_df['buy_profit'] > 0])) / len(final_df)
# calculating total buy & sell profit
total_buy_profit  = final_df["buy_profit"].sum()
total_sell_profit = final_df["sell_profit"].sum()
# total profit by adding sell & buy together
total_profit = total_buy_profit + total_sell_profit
# dividing total profit by number of testing samples (number of trades)
profit_per_trade = total_profit / len(final_df)

# printing metrics
print(f"Future price after {LOOKUP_STEP} days is {future_price:.2f}$")
print(f"{LOSS} loss:", loss)
print("Mean Absolute Error:", mean_absolute_error)
print("Accuracy score:", accuracy_score)
print("Total buy profit:", total_buy_profit)
print("Total sell profit:", total_sell_profit)
print("Total profit:", total_profit)
print("Profit per trade:", profit_per_trade)

plot_graph(final_df)

print(final_df.tail(10))
# save the final dataframe to csv-results folder
csv_results_folder = "csv-results"
if not os.path.isdir(csv_results_folder):
    os.mkdir(csv_results_folder)
csv_filename = os.path.join(csv_results_folder, model_name + ".csv")
final_df.to_csv(csv_filename)

Future price after 15 days is 3186.36$
huber_loss loss: 7.185461436165497e-05


NameError: name 'mean_absolute_error' is not defined