<a href="https://colab.research.google.com/github/abhinandankatoch/Stock-Market-Prediction/blob/master/Stock_Price_Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Stock Market Prediction Model using TensorFlow**

## FETCHING DATA

In [3]:
!pip install yfinance

Collecting yfinance
  Downloading yfinance-0.1.67-py2.py3-none-any.whl (25 kB)
Collecting lxml>=4.5.1
  Downloading lxml-4.6.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl (6.3 MB)
[K     |████████████████████████████████| 6.3 MB 17.0 MB/s 
Installing collected packages: lxml, yfinance
  Attempting uninstall: lxml
    Found existing installation: lxml 4.2.6
    Uninstalling lxml-4.2.6:
      Successfully uninstalled lxml-4.2.6
Successfully installed lxml-4.6.4 yfinance-0.1.67


In [4]:
#Importing libraries
import yfinance as yf
import numpy as np
import pandas as pd
import tensorflow as tf
import plotly.graph_objects as go

In [5]:
#Downloading the dataset
stock_data = yf.download("GOOGL", start = "2018-11-15", interval = '1d')

[*********************100%***********************]  1 of 1 completed


In [6]:
stock_data.shape

(768, 6)

In [7]:
stock_data.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-11-15,1051.459961,1078.869995,1041.290039,1071.050049,1071.050049,2079200
2018-11-16,1065.22998,1072.439941,1054.900024,1068.27002,1068.27002,1768300
2018-11-19,1063.390015,1068.0,1022.869995,1027.420044,1027.420044,2284200
2018-11-20,1007.289978,1037.349976,1002.210022,1030.449951,1030.449951,2722900
2018-11-21,1045.310059,1054.709961,1039.459961,1043.430054,1043.430054,1484900


In [8]:
#Indexing the data
stock_data.sort_index(inplace = True)

In [9]:
#Removing any duplicate index
stock_data = stock_data.loc[~stock_data.index.duplicated(keep = 'first')]

In [10]:
stock_data.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-11-29,2880.0,2925.949951,2877.870117,2910.610107,2910.610107,1629800
2021-11-30,2900.169922,2920.0,2832.030029,2837.949951,2837.949951,2103400
2021-12-01,2880.0,2919.01001,2818.23999,2821.030029,2821.030029,1701000
2021-12-02,2825.0,2877.290039,2804.290039,2859.320068,2859.320068,1459700
2021-12-03,2873.620117,2888.0,2806.02002,2840.030029,2840.030029,2060800


In [11]:
#Checking for missing value
stock_data.isnull().sum()

Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64

In [12]:
stock_data.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 768 entries, 2018-11-15 to 2021-12-03
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Open       768 non-null    float64
 1   High       768 non-null    float64
 2   Low        768 non-null    float64
 3   Close      768 non-null    float64
 4   Adj Close  768 non-null    float64
 5   Volume     768 non-null    int64  
dtypes: float64(5), int64(1)
memory usage: 42.0 KB


In [13]:
stock_data.describe()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume
count,768.0,768.0,768.0,768.0,768.0,768.0
mean,1661.180937,1677.896716,1645.055849,1662.380052,1662.380052,1704924.0
std,581.689509,585.004992,577.498754,581.398314,581.398314,805839.9
min,984.320007,1012.119995,977.659973,984.669983,984.669983,465600.0
25%,1199.425049,1208.257477,1189.869995,1201.910004,1201.910004,1183725.0
50%,1447.224976,1455.025024,1429.590027,1446.26001,1446.26001,1500050.0
75%,2056.502441,2080.997559,2032.920044,2055.549927,2055.549927,1917000.0
max,2999.51001,3019.330078,2977.97998,2996.77002,2996.77002,6658900.0


In [14]:
#Check the trend in closing values
fig = go.Figure()
fig.add_trace(go.Scatter(x = stock_data.index, y = stock_data['Close'], mode = 'lines'))
fig.update_layout(height = 500, width = 900, xaxis_title = 'Date', yaxis_title = 'Close')
fig.show()

In [15]:
#Check the trend in volume
fig = go.Figure()
fig.add_trace(go.Scatter(x = stock_data.index, y = stock_data['Volume'], mode = 'lines'))
fig.update_layout(height = 500, width = 900, xaxis_title = 'Date', yaxis_title = 'Volume')
fig.show()

## PROCESSING DATA

In [16]:
#Importing libraries
from sklearn.preprocessing import MinMaxScaler
import pickle
from tqdm.notebook import tnrange

In [17]:
#Filtering required data
stock_data = stock_data[['Close', 'Volume']]
stock_data.head()

Unnamed: 0_level_0,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-11-15,1071.050049,2079200
2018-11-16,1068.27002,1768300
2018-11-19,1027.420044,2284200
2018-11-20,1030.449951,2722900
2018-11-21,1043.430054,1484900


In [18]:
#Confirm the testing set length
test_length = stock_data[(stock_data.index >= '2019-08-17')].shape[0]

In [19]:
def CreateFeatures_and_Targets(data, feature_length):
  X = []
  Y = []

  for i in tnrange(len(stock_data) - feature_length):
    X.append(stock_data.iloc[i : i + feature_length,:].values)
    Y.append(stock_data["Close"].values[i + feature_length])

  X = np.array(X)
  Y = np.array(Y)

  return X,Y

In [20]:
X , Y = CreateFeatures_and_Targets(stock_data, 32)

  0%|          | 0/736 [00:00<?, ?it/s]

In [21]:
#Check the shapes
X.shape, Y.shape

((736, 32, 2), (736,))

In [25]:
Xtrain, Xtest, Ytrain, Ytest = X[:-test_length], X[-test_length:], Y[:-test_length], Y[-test_length:]

In [26]:
Xtrain.shape, Ytrain.shape

((156, 32, 2), (156,))

In [27]:
Xtest.shape, Ytest.shape

((580, 32, 2), (580,))

In [28]:
#Scalers to scale Vectors with Multiple Dimensions
class MultiDimensionScaler():
  def __init__ (self):
    self.scalers = []
  
  def fit_transform(self, X):
    total_dims = X.shape[2]
    for i in range(total_dims):
      Scaler = MinMaxScaler()
      X[:, :, i] = Scaler.fit_transform(X[:, :, i])
      self.scalers.append(Scaler)
    return X

  def transform(self, X):
    for i in range(X.shape[2]):
      X[:, :, i] = self.scalers[i].transform(X[:, :, i])
    return X

In [29]:
Feature_Scaler = MultiDimensionScaler()
Xtrain = Feature_Scaler.fit_transform(Xtrain)
Xtest = Feature_Scaler.transform(Xtest)

In [30]:
Target_Scaler = MinMaxScaler()
Ytrain = Target_Scaler.fit_transform(Ytrain.reshape(-1,1))
Ytest = Target_Scaler.transform(Ytest.reshape(-1,1))

In [31]:
def save_object(obj, name : str):
  pickle_out = open(f"{name}.pck","wb")
  pickle.dump(obj, pickle_out)
  pickle_out.close()

def load_object(name : str):
  pickle_in = open(f"{name}.pck","rb")
  stock_data = pickle.load(pickle_in)
  return stock_data

## Model

In [32]:
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau

save_best = ModelCheckpoint("best_weights.h5", monitor = 'val_loss', save_best_only = True, save_weights_only = True)
reduce_lr = ReduceLROnPlateau(monitor = 'val_loss', factor = 0.25, patience = 5, min_lr = 0.00001, verbose = 1)

In [33]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, Bidirectional

model = Sequential()

model.add(Bidirectional(LSTM(512, return_sequences = True, recurrent_dropout = 0.1, input_shape = (32,2))))
model.add(LSTM(256, recurrent_dropout = 0.1))
model.add(Dropout(0.3))
model.add(Dense(64, activation = 'elu'))
model.add(Dropout(0.3))
model.add(Dense(32, activation = 'elu'))
model.add(Dense(1, activation = 'linear')) #Final Layer



In [34]:
optimizer = tf.keras.optimizers.SGD(learning_rate = 0.002)
model.compile(loss = 'mse', optimizer=optimizer)

In [35]:
history = model.fit(Xtrain, Ytrain,
                    epochs = 10,
                    batch_size = 1,
                    verbose = 1,
                    shuffle = False,
                    validation_data = (Xtest, Ytest),
                    callbacks = [reduce_lr, save_best])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


## VISUALIZING PREDICTION

In [36]:
#Loading the best weights
model.load_weights("best_weights.h5")

In [37]:
Predictions = model.predict(Xtest)

In [39]:
Predictions = Target_Scaler.inverse_transform(Predictions)
Actual = Target_Scaler.inverse_transform(Ytest)

In [42]:
Predictions.shape

(580, 1)

In [44]:
Predictions = np.squeeze(Predictions, axis = 1)
Actual = np.squeeze(Actual, axis = 1)

In [47]:
#Plotting the Prediction vs Actual Graph

fig = go.Figure()
fig.add_trace(go.Scatter(x = stock_data.index[-test_length:], y = Actual, mode = 'lines', name = 'Actual'))
fig.add_trace(go.Scatter(x = stock_data.index[-test_length:], y = Predictions, mode = 'lines', name = 'Predictions'))
fig.show()