<a href="https://colab.research.google.com/github/abhinandankatoch/Stock-Market-Prediction/blob/master/Stock_Price_Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Stock Market Prediction Model using TensorFlow**

Studying the Data

In [42]:
!pip install yfinance



In [43]:
#Importing libraries

import yfinance as yf
import numpy as np
import pandas as pd
import tensorflow as tf
import plotly.graph_objects as go

In [44]:
#Downloading the dataset
stock_data = yf.download("GOOGL", start = "2020-01-01", interval = '1d')

[*********************100%***********************]  1 of 1 completed


In [45]:
stock_data.shape

(486, 6)

In [46]:
stock_data.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-01-02,1348.410034,1368.680054,1346.48999,1368.680054,1368.680054,1363900
2020-01-03,1348.0,1373.75,1347.319946,1361.52002,1361.52002,1170400
2020-01-06,1351.630005,1398.319946,1351.0,1397.810059,1397.810059,2338400
2020-01-07,1400.459961,1403.5,1391.560059,1395.109985,1395.109985,1716500
2020-01-08,1394.819946,1411.849976,1392.630005,1405.040039,1405.040039,1765700


In [47]:
#Indexing the data
stock_data.sort_index(inplace = True)

In [48]:
#Removing any duplicate index
stock_data = stock_data.loc[~stock_data.index.duplicated(keep = 'first')]

In [49]:
stock_data.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-11-29,2880.0,2925.949951,2877.870117,2910.610107,2910.610107,1629800
2021-11-30,2900.169922,2920.0,2832.030029,2837.949951,2837.949951,2103400
2021-12-01,2880.0,2919.01001,2818.23999,2821.030029,2821.030029,1701000
2021-12-02,2825.0,2877.290039,2804.290039,2859.320068,2859.320068,1459700
2021-12-03,2873.620117,2888.0,2806.02002,2840.030029,2840.030029,2060800


In [50]:
#Checking for missing value

stock_data.isnull().sum()

Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64

In [51]:
stock_data.describe()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume
count,486.0,486.0,486.0,486.0,486.0,486.0
mean,1942.935864,1963.365613,1923.068892,1944.305639,1944.305639,1778004.0
std,560.428685,560.909345,558.487424,559.691344,559.691344,827451.0
min,1056.369995,1066.910034,1008.869995,1054.130005,1054.130005,465600.0
25%,1464.295013,1484.837463,1454.089996,1467.110046,1467.110046,1222000.0
50%,1763.265015,1777.13501,1741.744995,1761.25,1761.25,1551700.0
75%,2413.464966,2437.289978,2399.717529,2425.154907,2425.154907,2076725.0
max,2999.51001,3019.330078,2977.97998,2996.77002,2996.77002,5417900.0


In [62]:
#Check the trend in closing values

fig = go.Figure()
fig.add_trace(go.Scatter(x = stock_data.index, y = stock_data['Close'], mode = 'lines'))
fig.update_layout(height = 500, width = 900, xaxis_title = 'Date', yaxis_title = 'Close')
fig.show()

In [63]:
#Check the trend in volume

fig = go.Figure()
fig.add_trace(go.Scatter(x = stock_data.index, y = stock_data['Volume'], mode = 'lines'))
fig.update_layout(height = 500, width = 900, xaxis_title = 'Date', yaxis_title = 'Volume')
fig.show()

Processing the data and making prediction model.

In [54]:
#Importing Libraries

from sklearn.preprocessing import MinMaxScaler
import pickle
from tqdm.notebook import tnrange

In [55]:
#Filtering required data

stock_data = stock_data[['Close', 'Volume']]
stock_data.head()

Unnamed: 0_level_0,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-01-02,1368.680054,1363900
2020-01-03,1361.52002,1170400
2020-01-06,1397.810059,2338400
2020-01-07,1395.109985,1716500
2020-01-08,1405.040039,1765700


In [56]:
#Confirm the testing set length

test_length = stock_data[(stock_data.index >= '2022-01-01')].shape[0]

In [57]:
def CreateFeatures_and_Targets(data, feature_length):
  X = []
  Y = []

  for i in tnrange(len(stock_data) - feature_length):
    X.append(stock_data.iloc[i : i + feature_length,:].values)
    Y.append(stock_data["Close"].values[i + feature_length])

  X = np.array(X)
  Y = np.array(Y)

  return X,Y

In [58]:
X , Y = CreateFeatures_and_Targets(stock_data, 32)

  0%|          | 0/454 [00:00<?, ?it/s]

In [64]:
#Check the shapes
X.shape, Y.shape

((454, 32, 2), (454,))

In [65]:
Xtrain, Xtest, Ytrain, Ytest = X[:-test_length], X[-test_length:], Y[:-test_length], Y[-test_length:]

In [66]:
Xtrain.shape, Ytrain.shape

((0, 32, 2), (0,))

In [67]:
Xtest.shape, Ytest.shape

((454, 32, 2), (454,))