In [135]:
#!pip install hvplot

In [136]:
# Imports to get show started

import numpy as np
import pandas as pd
import hvplot.pandas
from numpy.random import seed
from pathlib import Path
import matplotlib.pyplot as plt

# Import required preprocessing and Keras modules
from sklearn.preprocessing import MinMaxScaler
from tensorflow import random
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout


In [137]:
# TODO after model experimentation
# Define random seed for reproducibility

# seed(1)
# random.set_seed(2)

In [138]:
# Read in prepared model dataset created in data_prep notebook

model_df = pd.read_csv(Path('./ModelData/model_dataset.csv'),index_col="Date_Time",infer_datetime_format=True,parse_dates=True)
eval_df = model_df.copy()

In [139]:
# Last minute pruning of unwanted columns

#####################################################################
# CRITICAL TO MATCH SAME COLUMNS AS USED IN TRAIN NOTEBOOK FOR THIS 
# MODEL. THE SAME PROCESS IS USED HERE TO RECREATE THE TEST DATASET
#####################################################################

#  remove Close price, US holiday
column_2drop_list = ['Close',
                     'Volume',
                     'US_Holiday']

model_df = model_df.drop(columns=column_2drop_list)
model_df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 35513 entries, 2017-01-01 00:00:00 to 2021-01-15 00:00:00
Data columns (total 21 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   US_Market_Open        35513 non-null  float64
 1   Trail24hr_CloseRatio  35513 non-null  float64
 2   Trail12Wk_CloseRatio  35513 non-null  float64
 3   Trail52Wk_CloseRatio  35513 non-null  float64
 4   Hr_Return             35513 non-null  float64
 5   Trail24hr_Return      35513 non-null  float64
 6   Trail24hr_Std         35513 non-null  float64
 7   Trail12Wk_Return      35513 non-null  float64
 8   Trail12Wk_Std         35513 non-null  float64
 9   Trail52Wk_Return      35513 non-null  float64
 10  Trail52Wk_Std         35513 non-null  float64
 11  Trail24hr_VolRatio    35513 non-null  float64
 12  Trail12Wk_VolRatio    35513 non-null  float64
 13  Trail52Wk_VolRatio    35513 non-null  float64
 14  Vol_PctDelta          35513 non-nul

In [140]:
def data_window_chopper(df, window_len, feature_col_numlist, target_col_num):
    """
    Function chops up dataframe features (X) defined by column numbers
    in feature_col_numlist and target (y) values defined by t_col_num
    with a rolling window of length window_len.
    """
    X = []
    y = []
    for i in range(len(df) - window_len):
        feature_list = []
        for feature_col_num in feature_col_numlist:
            feature_list.append(df.iloc[i:(i + window_len), feature_col_num])
        X.append(feature_list)
        y.append(df.iloc[(i + window_len), target_col_num])
    return np.array(X).reshape(-1,(len(feature_col_numlist)*window_len)), np.array(y).reshape(-1, 1) 

In [141]:
# Create features (X) and target (y) data window sets

window_size = 12 # model dataset is hourly, try half day for starters
feature_col_numlist = list(range(model_df.shape[1]-1))
target_col_num = (model_df.shape[1] - 1) # 0s based column index
X, y = data_window_chopper(model_df, window_size, feature_col_numlist, target_col_num)

#print(f"X sample values:\n {X[:2]}")

In [142]:
# Split 70% of the data for training, 30% for testing
split = int(0.7 * len(X))
X_train = X[: split]
X_test = X[split:]
y_train = y[: split]
y_test = y[split:]

In [143]:
# Use the MinMaxScaler to scale data between 0 and 1.
scaler = MinMaxScaler()
scaler.fit(X)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [144]:
# Keras LSTM API requires features data as a vertical vector

# reshape training and test data
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

#print (f"X_train sample values:\n{X_train[:2]} \n")
#print (f"X_test sample values:\n{X_test[:5]}")

In [145]:
# Load the saved model state from train notebook to proceed with eval

saved_model = keras.models.load_model("./Model1")

In [146]:
# Evaluate model performance with test data

saved_model.evaluate(X_test, y_test)



[0.004344658460468054, 0.9997183084487915]

In [196]:
predicted = pd.DataFrame({
    "Predicted": y_test.ravel()
    })

predicted["Predicted"] = predicted["Predicted"]*40000
predicted.hvplot()

In [181]:
# Seperate data to graph
# Figure out how to export results of the model
close = eval_df["Significant_Drawdown"] * 40000
close.head()

Date_Time
2017-01-01 00:00:00    0
2017-01-01 01:00:00    0
2017-01-01 02:00:00    0
2017-01-01 03:00:00    0
2017-01-01 04:00:00    0
Name: Significant_Drawdown, dtype: int64

In [199]:
graph_model = eval_df[["Close"]]
# trend_data_df_1.merge(trend_data_df_2, left_on="date", right_on="date")
graph_model = pd.merge(graph_model, close, left_on="Date_Time", right_on="Date_Time")
# graph_model = pd.concat([predicted, graph_model])
# graph_model = pd.merge(graph_model, predicted, left_on="Date_Time", right_on="Date_Time")
graph_model.hvplot()

In [None]:
model_df.head()

In [None]:
# TODO Create graphs and eval here.
# note will need to reload Close price data as it was dropped prior to train, test vector creation!

"""
Graphs to show:
1- loss reduction?


2- closing price vs actual drawdowns vs predicted drawdowns
    great if this can be shown all in one graph with predicted of course starting later on in time
    
"""


