___

<a href='http://www.dourthe.tech'> <img src='Dourthe_Technologies_Headers.png' /></a>
___
<center><em>For more information, visit <a href='http://www.dourthe.tech'>www.dourthe.tech</a></em></center>

# Stock Market Forecasting using Deep Recurrent Neural Network

___
## Objective
Train a Deep Recurrent Neural Network (RNN) to predict next day Closing Price of a defined stock by using multivariate historical data and time series segmentation.
___
## Next Day Prediction on Single Stock [Multivariate-Single time window-One Script]
This section was built to train a model on a single stock (to be selected) with no train/test split. The code will import historical data until current day and try to predict closing price for the next day.

The difference with sections 3 and 4 is that a single time window is used. Also, the user can select the period during which predictions should be made. The model will be trained using historical data of each data until the day before each prediction is made and will be retrained with an additional day for each new prediction.

The script ends with a chart showing Real vs. Prediction and the possibility to download a DataFrame containing real and predicted values for the defined period.

___
# Libraries import

In [1]:
# Ignore warnings
import warnings
warnings.filterwarnings("ignore")
import logging
logging.getLogger('tensorflow').disabled = True

# Computation time monitoring
import time

# Data processing
import pandas as pd
import numpy as np
import datetime
from datetime import datetime
from datetime import timedelta
from pandas_datareader import data, wb

# Data visualization
import matplotlib.pyplot as plt
%matplotlib inline

# Data normalization
from sklearn.preprocessing import MinMaxScaler

# Neural network architecture
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout

# Callbacks
from tensorflow.keras.callbacks import EarlyStopping

# Evaluation
from sklearn.metrics import mean_squared_error

# Download from Google Colab
from google.colab import files

ModuleNotFoundError: No module named 'google.colab'

___
# Next Day Prediction on Single Stock [Multivariate-Single time window-One Script]

In [None]:
# Set total time tracker
total_start_time = time.time()

# Select ticker
ticker = 'GOOG'

# Select start and end dates of historical data
start = datetime(2012,1,1)
end = datetime(2020,8,2)

# Import data from Yahoo Finance (option to drop certain columns) (for univariate -> drop(['High', 'Low', 'Open', 'Volume', 'Adj Close'], axis=1))
stock_full_history = data.DataReader(ticker, 'yahoo', start, end).drop(['Adj Close'], axis=1)
close_idx = stock_full_history.columns.get_loc('Close')

# Select time period for predictions
period = stock_full_history.loc['2020-06-30':'2020-07'].index

# Specify historical windows to use
win = 90

# Specify how far into the future to predict (i.e. lag)
days_into_the_future = 1

# Initialize lists to store predictions
real_list = []
prediction_list = []
change_list = []

print('MODEL with HISTORICAL WINDOW of', win, 'DAYS')

for p in range(len(period)):

  print('\n\tTraining in progress...')

  # Select historical data up until certain day to train model and predict for following day
  stock = stock_full_history.loc[:period[p]]

  # Normalize data
  scaler = MinMaxScaler()
  scaled_stock = scaler.fit_transform(stock)

  # Loop through all historical windows and generate and train one model for each window
  start_time = time.time()
  X = []
  y = []
  # Segment data
  for i in range(win, len(stock)-days_into_the_future+1):
      X.append(scaled_stock[i-win:i, :])
      y.append(scaled_stock[i+days_into_the_future-1, close_idx])
  X, y = np.array(X), np.array(y)
  X = np.reshape(X, (X.shape[0], X.shape[1], X.shape[2]))

  # Generate sequential model
  model = Sequential()
  # Create 1st LSTM layer and some Dropout regularisation
  model.add(LSTM(units=100, return_sequences=True, input_shape=(X.shape[1],X.shape[2])))
  model.add(Dropout(0.2))
  # Create 2nd LSTM layer and some Dropout regularisation
  model.add(LSTM(units=100, return_sequences=True))
  model.add(Dropout(0.2))
  # Create 3rd LSTM layer and some Dropout regularisation
  model.add(LSTM(units=100, return_sequences=True))
  model.add(Dropout(0.2))
  # Create 4th LSTM layer and some Dropout regularisation
  model.add(LSTM(units=100))
  model.add(Dropout(0.2))
  # Create output fully connected layer
  model.add(Dense(units=days_into_the_future))

  # Compile model
  model.compile(optimizer='adam', loss='mean_squared_error')

  # Fit the model to the training set
  model.fit(X, y, epochs=50, batch_size=32, verbose=0)

  # Save loss as dataframe
  loss = pd.DataFrame(model.history.history)

  # Print computation time
  current_time = time.time()-start_time
  print(f'\tTraining completed in {current_time//3600:3.0f} hrs {(current_time - current_time//3600*3600)//60:3.0f} mins {current_time%60:3.0f} secs\n')

  # Generate following day's timestamp
  if p == len(period)-1:
    tomorrow = stock.index[-1] + timedelta(days=days_into_the_future)
  else:
    tomorrow = period[p+1]
  if tomorrow.strftime("%a") == 'Sat':
    tomorrow = tomorrow + timedelta(days=2)  

  # Generate prediction using each model trained for different historical windows
  # Isolate last segment of data
  last_segment = scaled_stock[-win:]
  # Reshape last segment of data to match with the input shape of the RNN
  last_segment = last_segment.reshape((1, last_segment.shape[0], last_segment.shape[1]))
  # Generate scaled prediction
  scaled_prediction = model.predict(last_segment)
  if last_segment.shape[1] > 1:
    scaled_prediction = np.array([scaled_prediction]*scaled_stock.shape[1]).reshape(last_segment.shape[0],-1)
  # Unscale prediction
  prediction = scaler.inverse_transform(scaled_prediction)

  # Calculate increase/decrease from previous day (in %)
  previous_day = stock['Close'][-1]
  change = (prediction[0][0] - previous_day)*100/previous_day

  # Print prediction
  if change > 0:
    print(f'\033[1m\t\tPredicted Closing Price for {ticker} on {tomorrow.strftime("%a. %b. %d, %Y")} \033[0m\t{prediction[0][0]:8.2f} USD\t(🡕 by {change:3.2f} %)')
  else:
    print(f'\033[1m\t\tPredicted Closing Price for {ticker} on {tomorrow.strftime("%a. %b. %d, %Y")} \033[0m\t{prediction[0][0]:8.2f} USD\t(🡖 by {change:3.2f} %)')

  # Append real price, prediction and corresponding predicted change
  if p != len(period)-1:
    real_list.append(stock_full_history.loc[period[p+1]]['Close'])
  else:
    real_list.append(np.nan)
  prediction_list.append(prediction[0][0])
  change_list.append(change)

# Generate dataframe
prediction_period = period + timedelta(1)
df = pd.DataFrame(np.transpose([real_list, prediction_list, change_list]), columns=['real [$]', 'predicted [$]', 'predicted change [%]'], index=prediction_period)

# Plot Real vs. Predictions
plt.figure(figsize=(16,6))
plt.plot(df['real [$]'], label='Real')
plt.plot(df['predicted [$]'], label='Predicted')
plt.title(ticker + ' Real vs. Predicted')
plt.ylabel('Stock Price [USD]')
plt.legend()
plt.show()

# Set filename
filename = ticker + '_' + str(win) + '-day-model_' + prediction_period[0].strftime("%Y-%b-%d") + '_to_' + prediction_period[-1].strftime("%Y-%b-%d") + '.csv'
df.to_csv(filename) 

# Download file (if script ran via Google Colab)
files.download(filename)

# Print total computation time
total_time = time.time() - total_start_time
print(f'\nTotal computation time: {total_time//3600:3.0f} hrs {(total_time - total_time//3600*3600)//60:3.0f} mins {total_time%60:3.0f} secs')