In [1]:
import pandas as pd
import numpy as np
from sklearn import preprocessing
from keras.models import load_model

In [2]:
# Loading different models
close_model=load_model('predict_hourly_close.h5')

### Normalize incoming data

In [4]:
data=pd.read_csv("../hourly_close.csv")
# data.rename(columns={"Change":"Close"}, inplace=True)
data

Unnamed: 0.1,Unnamed: 0,Time,Open,High,Low,Last,Volume
0,28,5/5/08 9:59,141.2000,141.260,141.140,141.1800,451000.0
1,88,5/5/08 10:59,140.5900,140.600,140.530,140.5400,446700.0
2,148,5/5/08 11:59,140.9000,140.900,140.830,140.8500,190900.0
3,208,5/5/08 12:59,140.8300,140.860,140.800,140.8600,40600.0
4,268,5/5/08 13:59,140.4900,140.530,140.440,140.5200,409400.0
...,...,...,...,...,...,...,...
22010,1402558,11/13/20 11:59,356.2399,356.355,356.220,356.2700,100894.0
22011,1402618,11/13/20 12:59,356.6400,356.740,356.635,356.7399,42318.0
22012,1402678,11/13/20 13:59,356.6900,356.700,356.560,356.6000,97078.0
22013,1402738,11/13/20 14:59,357.8000,357.810,357.640,357.6600,116020.0


In [5]:
data=data[["Open","High","Low","Last","Volume"]]
data

Unnamed: 0,Open,High,Low,Last,Volume
0,141.2000,141.260,141.140,141.1800,451000.0
1,140.5900,140.600,140.530,140.5400,446700.0
2,140.9000,140.900,140.830,140.8500,190900.0
3,140.8300,140.860,140.800,140.8600,40600.0
4,140.4900,140.530,140.440,140.5200,409400.0
...,...,...,...,...,...
22010,356.2399,356.355,356.220,356.2700,100894.0
22011,356.6400,356.740,356.635,356.7399,42318.0
22012,356.6900,356.700,356.560,356.6000,97078.0
22013,357.8000,357.810,357.640,357.6600,116020.0


In [6]:
# Number of datapoints to use in predictions
history_points=50
# Scaling data
MinMaxScaler = preprocessing.MinMaxScaler()

### Creating y_scaler for predicted variable

In [7]:
# Retriving the real next day open values
next_day_close_values = np.array([list(data.loc[:,"Last"])[i + history_points] for i in range(len(data) - history_points)])
# next_day_open_values = np.array([data.iloc[:,0][i + history_points].copy() for i in range(len(data) - history_points)])
# next_day_high_values = np.array([data.iloc[:,1][i + history_points].copy() for i in range(len(data) - history_points)])
# next_day_low_values = np.array([data.iloc[:,2][i + history_points].copy() for i in range(len(data) - history_points)])
# next_day_volume_values = np.array([data.iloc[:,4][i + history_points].copy() for i in range(len(data) - history_points)])
# Expanding the dimentions of next_day_open_values_normalised (5246, 1, 1)
unscaled_close_y = np.expand_dims(next_day_close_values, -1)
# unscaled_open_y = np.expand_dims(next_day_open_values, -1)
# unscaled_high_y = np.expand_dims(next_day_high_values, -1)
# unscaled_low_y = np.expand_dims(next_day_low_values, -1)
# unscaled_volume_y = np.expand_dims(next_day_volume_values, -1)

In [8]:
y_normaliser = preprocessing.MinMaxScaler()
y_normaliser_close=y_normaliser.fit(unscaled_close_y)
# y_normaliser = preprocessing.MinMaxScaler()
# y_normaliser_open=y_normaliser.fit(unscaled_open_y)
# y_normaliser = preprocessing.MinMaxScaler()
# y_normaliser_high=y_normaliser.fit(unscaled_high_y)
# y_normaliser = preprocessing.MinMaxScaler()
# y_normaliser_low=y_normaliser.fit(unscaled_low_y)
# y_normaliser = preprocessing.MinMaxScaler()
# y_normaliser_volume=y_normaliser.fit(unscaled_volume_y)

### Making predictions

In [9]:
# Predict close value
def make_prediction(data, model, y_norm):
    # Normalize the input data
    normalized_data=MinMaxScaler.fit_transform(data[-50:])
    # Predict next day's close
    today=model.predict(np.expand_dims(normalized_data, 0))
    # Convert the close to real terms
    last=y_norm.inverse_transform(today)
    return float(last[0][0])

In [10]:
def predict_actual_values(data, variable_to_predict, model, y_normaliser):    
    # make prediction
    prediction=make_prediction(data[-50:], model, y_normaliser)
    # converting to actual stock market price
    last_prediction=make_prediction(data[-51:-1], model, y_normaliser)
    # calculating percent change of from last predicted value
    prediction_percent_change=((prediction-last_prediction)/last_prediction)
    # getting last actual value
    last_actual = data[-3:].iloc[2][variable_to_predict]
    # calculating predicted percent change on actual value
    predicted_actual_close=last_actual+(last_actual*prediction_percent_change)
    return float(predicted_actual_close), last_prediction, prediction_percent_change, last_actual

### Buy, sell or hold

In [11]:
predicted_close = make_prediction(data, close_model, y_normaliser_close)
predicted_actual, last_prediction, percent_change, last_actual = predict_actual_values(data, "Last", close_model, y_normaliser_close)
print(f"Last Predicted Close: {last_prediction}")
print(f"Next Predicted Close: {predicted_close}")
print(f"Last Actual Close: {last_actual}")
print(f"Predicted Actual Close: {predicted_actual}")

Last Predicted Close: 293.0664978027344
Next Predicted Close: 303.0126953125
Last Actual Close: 358.1
Predicted Actual Close: 370.2533281864395


#### Trade based off *price* predicted

In [12]:
buys=[]
sells=[]

thresh = 5.00
diff = predicted_close - last_prediction
print(f"Predicted Price Change: {diff}")

if diff > thresh:
    print("buy")
elif diff < -thresh:
    print("sell")
else:
    print("hold")

Predicted Price Change: 9.946197509765625
buy


#### Trade based off *percent change* predicted

In [13]:
thresh = .5
change=percent_change*100
print(f"Predicted Percent Change: {change}")

if change > thresh:
    print("buy")
elif change < -thresh:
    print("sell")
else:
    print("hold")

Predicted Percent Change: 3.393836410622581
buy


In [14]:
# days_to_predict = 10
# index=max(data.index)

# for i in range(days_to_predict):
#     new_row=pd.DataFrame({
#         "open":predict_actual_values(data[-50:], "open", open_model, y_normaliser_open),
#         "high":predict_actual_values(data[-50:], "high", high_model, y_normaliser_high),
#         "low":predict_actual_values(data[-50:], "low", low_model, y_normaliser_low),
#         "close":predict_actual_values(data[-50:], "close", close_model, y_normaliser_close),
#         "volume":predict_actual_values(data[-50:], "volume", volume_model, y_normaliser_volume)
#     }, index=[index+1])
#     data=data.append(new_row)