Using TF LSTM with a Sequential Model Predict a time window of a stock

#  1. Frame The Problem 
As a Stock investor i need an easier way to determing stock predictions and have better tools to assist in stock forcasting 


In [1]:
#Import the libraries
import math
import pandas_datareader as web
import numpy as np
import pandas as pd
from pickle import load, dump
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, LSTM # Long Short-Term Memory layer
import keras
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
from timeit import default_timer as timer

plt.style.use('fivethirtyeight')

sample_start_date = (datetime.now() - timedelta(days=360)).strftime("%Y-%m-01") 
sample_stop_date = datetime.now().strftime("%Y-%m-%d") 
print("{} -> {}".format(sample_start_date, sample_stop_date))
# STOCK='CPRX'
STOCK='AMD'
# STOCK='NCLH'
# STOCK='ISEE'
# STOCK='GME'

# STOCK='LCID'
# STOCK='TLRY'
#STOCK='NKLA'

n_days_to_trend=30

def report(results, n_top=3):
    for i in range(1, n_top + 1):
        candidates = np.flatnonzero(results['rank_test_score'] == i)
        for candidate in candidates:
            print("Model with rank: {0}".format(i))
            print("Mean validation score: {0:.3f} (std: {1:.3f})"
                  .format(np.sqrt(-results['mean_test_score'][candidate]),
                          results['std_test_score'][candidate]))
            print("Parameters: {0}".format(results['params'][candidate]))
            print("")


class TimingCallback(keras.callbacks.Callback):
    def __init__(self, logs={}):
        self.logs=[]
    def on_epoch_begin(self, epoch, logs={}):
        self.starttime = timer()
    def on_epoch_end(self, epoch, logs={}):
        self.logs.append(timer()-self.starttime)
        

ModuleNotFoundError: No module named 'pandas_datareader'

# 2. Get the Data 

Using an online stock store yahoo i will automatically get the data that is needed to train the model

In [None]:
#Get the stock quote 
df = web.DataReader(STOCK, data_source='yahoo', start=sample_start_date, end=sample_stop_date) 
#Show the data 
# df
print(df)

# 3. Explore The Data 

Identify insites into what the data shows, reminbering to document any bias i might see or have 


In [None]:
df.shape

In [None]:
#Visualize the closing price history
plt.figure(figsize=(16,8))
plt.title('Close Price History {}'.format(STOCK))
plt.plot(df['Close'])
plt.xlabel('Date',fontsize=18)
plt.ylabel('Close Price USD ($)',fontsize=18)
plt.show()

In [None]:
#Create a new dataframe with only the 'Close' column
data = df.filter(['Close'])
# print(data)
#Converting the dataframe to a numpy array
dataset = data.values
print(len(dataset))
#Get /Compute the number of rows to train the model on
training_data_len = math.ceil( len(dataset) *.8) 

# 4. Prepare the Data
Using a NN it is requred to scale the data into propper bounds sets


In [None]:

#Scale the all of the data to be values between 0 and 1 
scaler = MinMaxScaler(feature_range=(0, 1)) 
scaled_data = scaler.fit_transform(dataset)
# print(scaled_data)
# save the scaler
dump(scaler, open('mmscaler.pkl', 'wb'))

In [None]:
#Create the scaled training data set 
# print(training_data_len)
train_data = scaled_data[0:training_data_len, : ]
# print(len(train_data))
#Split the data into x_train and y_train data sets
x_train = []
y_train = []
for i in range(n_days_to_trend,len(train_data)):
    x_train.append(train_data[i-n_days_to_trend:i,0])
    y_train.append(train_data[i,0])
print(len(x_train))
print(len(y_train))

In [None]:
#Convert x_train and y_train to numpy arrays
x_train, y_train = np.array(x_train), np.array(y_train)

In [None]:
#Reshape the data into the shape accepted by the LSTM
x_train = np.reshape(x_train, (x_train.shape[0],x_train.shape[1],1))

# 5. Model the Data 

Using a Sequential LSTM Network apply the layers and detrming densities 


In [None]:
model = Sequential()
model.add(LSTM(units=113, return_sequences=True, input_shape=(x_train.shape[1],1)))
model.add(LSTM(units=50, return_sequences=False))
model.add(Dense(units=25))
model.add(Dense(units=1))


#Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Display a model summary
model.summary()

# model.save('my_stock_model.h5')

**Train The Model**

In [None]:
#Train the model
cb = TimingCallback()
model.fit(x_train, y_train, batch_size=1, epochs=30, workers=1, callbacks=[cb])
# print(cb.logs)
print("{} Seconds".format(sum(cb.logs)))
model.save('my_stock_model.h5')

In [None]:
#Test data set
test_data = scaled_data[training_data_len - n_days_to_trend: , : ]
#Create the x_test and y_test data sets
x_test = []
y_test =  dataset[training_data_len : , : ] #Get all of the rows from index 1603 to the rest and all of the columns (in this case it's only column 'Close'), so 2003 - 1603 = 400 rows of data
for i in range(n_days_to_trend,len(test_data)):
    x_test.append(test_data[i-n_days_to_trend:i,0])
    

In [None]:
#Convert x_test to a numpy array 
x_test = np.array(x_test)

In [None]:
#Reshape the data into the shape accepted by the LSTM
x_test = np.reshape(x_test, (x_test.shape[0],x_test.shape[1],1))

In [None]:
#Getting the models predicted price values
predictions = model.predict(x_test) 
predictions = scaler.inverse_transform(predictions)#Undo scaling

In [None]:
#Calculate/Get the value of RMSE
rmse=np.sqrt(np.mean(((predictions - y_test)**2)))
rmse

# 6. Fine-Tune the Model 
Displaying results of predictions and adjust 

# 7. Present the soltuion 

In [None]:
#Plot/Create the data for the graph
train = data[:training_data_len]
valid = data[training_data_len:]
# print(valid['Close'])
# valid['Predictions'] = predictions
valid.insert(1, "Predictions", predictions, True) 
#Visualize the data
plt.figure(figsize=(16,8))
plt.title('Model for {}'.format(STOCK))
plt.xlabel('Date', fontsize=18)
plt.ylabel('Close Price USD ($)', fontsize=18)
plt.plot(train['Close'])
plt.plot(valid[['Close', 'Predictions']])
plt.legend(['Train', 'Val', 'Predictions'], loc='upper right')
# plt.show()

In [None]:
#Show the valid and predicted prices
# valid

# 8. Launch the ML System
In a true production system you would not want to load the model each time as that would be slow and would be a preformace bottleneck

In [None]:
def GetValue(High, Low, Open, Close, Volume, AdjClose):
    model = keras.models.load_model('my_stock_model.h5')
    # load the scaler
    scaler = load(open('mmscaler.pkl', 'rb'))
    # Display a model summary
#     model.summary()
    x_test = np.array([[High], [Low], [Open], [Close], [Volume], [AdjClose]])
    x_test = scaler.transform(x_test)
    x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1 ))
    # Reshape the data into the shape accepted by the LSTM
    predictions = model.predict(x_test) 
    predictions = scaler.inverse_transform(predictions)#Undo scaling
    return predictions

# print("Predicted Value is: ", str(GetValue(66.91, 63.39, 64.46, 66.11, 83734300.0, 66.11)))
GetValue(58.51, 57.10, 57.35, 57.96, 76650225, 57.96)

In [None]:
len(df)
df.iloc[209]