In [1]:
import pandas as pd
import numpy as np
from sklearn import preprocessing
from keras.models import load_model

In [2]:
# Loading different models
close_model=load_model('built_models/predict_next_day_close.h5')
open_model=load_model('built_models/predict_next_day_open.h5')
high_model=load_model('built_models/predict_next_day_high.h5')
low_model=load_model('built_models/predict_next_day_low.h5')
volume_model=load_model('built_models/predict_next_day_volume.h5')

#### Normalizing historical data as X

In [13]:
data=pd.read_csv("SPY_daily.csv")
data.rename(columns={"1. open":"open","2. high":"high","3. low":"low","4. close":"close","5. volume":"volume"}, inplace=True)

In [14]:
# Flipping the dataset
data = data[::-1].reset_index(drop=True)
data.tail()

Unnamed: 0,date,open,high,low,close,volume
5291,2020-11-10,353.49,355.18,350.51,354.04,85552022.0
5292,2020-11-11,356.4,357.56,355.06,356.67,58649048.0
5293,2020-11-12,355.58,356.7182,351.26,353.21,68118563.0
5294,2020-11-13,355.27,358.9,354.71,358.1,62959429.0
5295,2020-11-16,360.98,362.78,359.59,362.57,72203007.0


In [15]:
data=data.drop("date", axis=1)
data.tail()

Unnamed: 0,open,high,low,close,volume
5291,353.49,355.18,350.51,354.04,85552022.0
5292,356.4,357.56,355.06,356.67,58649048.0
5293,355.58,356.7182,351.26,353.21,68118563.0
5294,355.27,358.9,354.71,358.1,62959429.0
5295,360.98,362.78,359.59,362.57,72203007.0


In [16]:
# Number of datapoints to use in predictions
history_points=50
# Scaling data
MinMaxScaler = preprocessing.MinMaxScaler()
X = MinMaxScaler.fit_transform(data)

#### Creating y_scaler for each variable

In [7]:
# Retriving the real next day open values
next_day_close_values = np.array([data.iloc[:,3][i + history_points].copy() for i in range(len(data) - history_points)])
next_day_open_values = np.array([data.iloc[:,0][i + history_points].copy() for i in range(len(data) - history_points)])
next_day_high_values = np.array([data.iloc[:,1][i + history_points].copy() for i in range(len(data) - history_points)])
next_day_low_values = np.array([data.iloc[:,2][i + history_points].copy() for i in range(len(data) - history_points)])
next_day_volume_values = np.array([data.iloc[:,4][i + history_points].copy() for i in range(len(data) - history_points)])
# Expanding the dimentions of next_day_open_values_normalised (5246, 1, 1)
unscaled_close_y = np.expand_dims(next_day_close_values, -1)
unscaled_open_y = np.expand_dims(next_day_open_values, -1)
unscaled_high_y = np.expand_dims(next_day_high_values, -1)
unscaled_low_y = np.expand_dims(next_day_low_values, -1)
unscaled_volume_y = np.expand_dims(next_day_volume_values, -1)

In [8]:
y_normaliser = preprocessing.MinMaxScaler()
y_normaliser_close=y_normaliser.fit(unscaled_close_y)
y_normaliser = preprocessing.MinMaxScaler()
y_normaliser_open=y_normaliser.fit(unscaled_open_y)
y_normaliser = preprocessing.MinMaxScaler()
y_normaliser_high=y_normaliser.fit(unscaled_high_y)
y_normaliser = preprocessing.MinMaxScaler()
y_normaliser_low=y_normaliser.fit(unscaled_low_y)
y_normaliser = preprocessing.MinMaxScaler()
y_normaliser_volume=y_normaliser.fit(unscaled_volume_y)

### Making predictions

In [9]:
# Predict close value
def make_prediction(d, model, y_norm):
    # Normalize the input data
    normalized_data=MinMaxScaler.fit_transform(d)
    # Predict next day's close
    today=model.predict(np.expand_dims(normalized_data, 0))
    # Convert the close to real terms
    last=y_norm.inverse_transform(today)
    return float(last[0][0])

In [10]:
def predict_actual_values(data, variable_to_predict, model, y_normaliser):    
    # make prediction
    prediction=make_prediction(data[-50:], model, y_normaliser)
    # converting to actual stock market price
    last_prediction=make_prediction(data[-51:-1], model, y_normaliser)
    # calculating percent change of from last predicted value
    prediction_percent_change=((prediction-last_prediction)/last_prediction)
    # getting last actual value
    last_actual = data[-3:].iloc[2][variable_to_predict]
    # calculating predicted percent change on actual value
    predicted_actual_close=last_actual+(last_actual*prediction_percent_change)
    return float(predicted_actual_close)

#### Not a reliable predictor of multiple days

In [17]:
days_to_predict = 10

for i in range(days_to_predict):
    new_row=pd.DataFrame({
        "open":predict_actual_values(data[-50:], "open", open_model, y_normaliser_open),
        "high":predict_actual_values(data[-50:], "high", high_model, y_normaliser_high),
        "low":predict_actual_values(data[-50:], "low", low_model, y_normaliser_low),
        "close":predict_actual_values(data[-50:], "close", close_model, y_normaliser_close),
        "volume":predict_actual_values(data[-50:], "volume", volume_model, y_normaliser_volume)
    }, index=[max(data.index)+1])
    data=data.append(new_row)

In [19]:
data[-11:]

Unnamed: 0,open,high,low,close,volume
5295,360.98,362.78,359.59,362.57,72203010.0
5296,355.485659,358.04142,353.651344,357.386311,93557870.0
5297,348.410718,352.868278,345.033087,351.041453,188632100.0
5298,317.38579,325.77216,309.997024,320.716215,462347600.0
5299,242.500528,256.165891,231.321714,248.115577,524977000.0
5300,238.662624,255.360076,224.850449,244.436935,598505300.0
5301,181.427742,200.508527,166.535903,188.131377,590729100.0
5302,180.487608,199.434091,165.851859,186.78464,581039200.0
5303,139.593736,159.103053,125.315663,145.94809,559518700.0
5304,141.720965,160.218865,128.376074,147.461868,543290500.0
