# **Project: Predict an equity price trend**
To predict the price of a stock in Hong Kong based on the current Close Price.
Using the Long Short Term Memory (LSTM) to predict the closing price of a company HSBC based on the past 60 days of stock price.

In [1]:
# Import the libraries required for the tasks
import math
import pandas_datareader as web
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense,LSTM
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')

In [2]:
# Get the stock quote for HSBC using the stock ticker 0005.HK from Jan 1 2001 to Dec 7, 2020.
df=web.DataReader('0005.HK',data_source='yahoo',start='2000-01-01',end='2020-12-07')
# display the data
df


RemoteDataError: ignored

In [3]:
# display the shape of the dataset
df.shape

NameError: ignored

In [None]:
# Visualise the record in the dataset in the form a graph (for closing price history for HSBC Plc.)
plt.figure(figsize=(16,8))
plt.title('Close Price History (HSBC)')
plt.plot(df['Close'])
plt.xlabel('Date',fontsize=18)
plt.ylabel('Close Price (HK$)',fontsize=18)
plt.show()


## **Data Preprocessing**

In [None]:
# Create a new dataframe with only "Close Price" column
data=df.filter(['Close'])
# Converting the dataframe to a numpy array
dataset=data.values
# Decide the training dataset ratio to train the model (dataset splitting)
training_data_len=math.ceil(len(dataset)*.9)


In [None]:
# Data Normalising 
# Scale all the data to be values between 0 and 1
scaler=MinMaxScaler(feature_range=(0,1))
scaled_data=scaler.fit_transform(dataset)

In [None]:
# Create the scaled training dataset
train_data=scaled_data[0:training_data_len,:]
# Split the data into input and output dataset (x_train - input, y_train - output)
x_train=[]
y_train=[]
for i in range(60,len(train_data)):
  x_train.append(train_data[i-60:i,0])
  y_train.append(train_data[i,0])



In [None]:
# Convert x_train and y_train to numpy arrays
x_train,y_train=np.array(x_train),np.array(y_train)


In [None]:
# Reshape the data to be 3-dimensional form (no of samples, no of time steps, no of features) into the shape accepted by LSTM
x_train=np.reshape(x_train,(x_train.shape[0],x_train.shape[1],1))

# Building the Model
Build LSTM model to have two LSTM layers with 50 neurons and two Dense layers, one with 25 neurons and the other with 1 neuron.

In [None]:
# Build LSTM model
model=Sequential()
model.add(LSTM(units=50,return_sequences=True,input_shape=(x_train.shape[1],1)))
model.add(LSTM(units=50,return_sequences=False))
model.add(Dense(units=25))
model.add(Dense(units=1))
model.summary()


In [None]:
# Compile the LSTM Model
model.compile(optimizer='adam',loss='mean_squared_error')


In [None]:
# Train the LSTM Model
model.fit(x_train,y_train,batch_size=5,epochs=10)


In [None]:
# Create test dataset
test_data=scaled_data[training_data_len-60:,:]
# Create test dataset for x_test and y_test
x_test=[]
y_test=dataset[training_data_len:,:]
for i in range(60,len(test_data)):
  x_test.append(test_data[i-60:i,0])

# Convert the x_test set to a numpy array for use in LSTM model testing
x_test=np.array(x_test)
# Reshape the data into shape accepted by LSTM model
x_test=np.reshape(x_test,(x_test.shape[0],x_test.shape[1],1))

# Model Testing

In [None]:
# Getting the models predicted price values
predictions=model.predict(x_test)
predictions=scaler.inverse_transform(predictions) # undo the scaling (normalising)
# Measure the model assessment  for accuracy
# Value of 0 means the model predicting value matching the actual value from the  test dataset perfectly; the lower the value the better the model performed.
# there are many metrics to value the performance of the model. Root Mean Square Error (RMSE) is used here.
# Calculate the value of RMSE as follows:
rmse=np.sqrt(np.mean(((predictions-y_test)**2)))
rmse
print("RMSE value :",rmse)

# Data Testing
Model validation completed with acceptable prediction accuracy, data testing is launching now.

In [None]:
# Create and plot the data for a graph
train=data[:training_data_len]
valid=data[training_data_len:]
valid['Predictions']=predictions

# Visualise the data
plt.figure(figsize=(16,8))
plt.title('Stock Price Prediction : HSBC Plc')
plt.xlabel('Date',fontsize=18)
plt.ylabel('Close Price (HK$)',fontsize=18)
plt.plot(train['Close'])
plt.plot(valid[['Close','Predictions']])
plt.legend(['Train','Val','Predictions'],loc='lower right')
plt.show()

In [None]:
# Display the actual (valid) and predicted prices
valid

In [None]:
# Get the quote 
apple_quote=web.DataReader('0005.HK',data_source='yahoo',start='2020-07-01',end='2020-12-31')
# Create a new dataframe
new_df=apple_quote.filter(['Close'])
# Get the last 60 days of closing price
last_60_days=new_df[-60:].values
# Scale the data to be values between 0 to 1
last_60_days_scaled=scaler.transform(last_60_days)
# Create an empty list
x_test1=[]
# Append the past 60 days
x_test1.append(last_60_days_scaled)
# Convert the x_test1 dataset to a numpy array
x_test1=np.array(x_test1)
# Reshape the data
x_test1=np.reshape(x_test1,(x_test1.shape[0],x_test1.shape[1],1))
# Get the predicted scaled price
pred_price=model.predict(x_test1)
# Undo the scaling /normalisation
pred_price=scaler.inverse_transform(pred_price)
print(pred_price)

In [None]:
# Extract the actual price of HSBC on a particular day
apple_quote_now=web.DataReader('0005.HK',data_source='yahoo',start='2020-04-01',end='2020-04-01')
print("HSBC Plc for the date (HK$):",apple_quote_now['Close'])