# Google Stock Price Prediction with LSTM

# Introduction
#### Predict Google stock prices by using LSTM

## Content
* [Read Data](#1)
* [Preproccesing](#2)
* [LSTM Model](#3)
* [Predictions and Visualization](#4)

In [None]:
# installing libraries
! pip install plotly
! pip install tensorflow

In [None]:
# import libraries 
import numpy as np # Import Numpy for data statistical analysis - linear algebra
import pandas as pd # Import Pandas for data manipulation and CSV file I/O (e.g. pd.read_csv) - DataFrames
import matplotlib.pyplot as plt # Import matplotlib for data visualisation
import plotly.graph_objects as go # interactive data visualization library for Python

<a id="1"></a>
## Read Data

In [None]:
train_data = pd.read_csv("trainset.csv")
train_data

In [None]:
train_data.head()

In [None]:
train_data.describe()

In [None]:
train_data.info()

In [None]:
train_data.shape

## VISUALIZATION OF THE DATASET.

In [None]:
from plotly.subplots import make_subplots

# Create subplots and mention plot grid size
fig = make_subplots(rows=2, cols=1, shared_xaxes=True, 
               vertical_spacing=0.3, subplot_titles=('Google', 'Volume'), 
               row_width=[0.2, 0.7])

# Plot data 
fig.add_trace(go.Candlestick(x=train_data["Date"], open=train_data["Open"], high=train_data["High"],
                low=train_data["Low"], close=train_data["Close"], name="Google"), 
                row=1, col=1)

fig.update_layout(
    title="Google Stock",
    yaxis_title="Google Stock Price")

# Plot volume
fig.add_trace(go.Bar(x=train_data['Date'], y=train_data['Volume'], showlegend=False), row=2, col=1)

fig.update(layout_xaxis_rangeslider_visible=True)
fig.show()

<a id="2"></a>
## Preproccesing

In [None]:
train = train_data.loc[:, ["Open"]].values # Access a group of rows and columns by label(s) or a boolean array.
train

In [None]:
# Feature Scaling - Normalizing the Data
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range = (0, 1)) # Converts between 0 and 1 - scaling each feature to a given range
train_scaled = scaler.fit_transform(train)
train_scaled

In [None]:
plt.plot(train_scaled)
plt.ylabel("Price")
plt.xlabel("Time(Days)")
plt.title("Google Stock Data")
plt.show()

In [None]:
# Creating a data structure with 50 timesteps and 1 output
X_train = []
y_train = []
timesteps = 1
for i in range(timesteps, 1258): #1258: len of days
    X_train.append(train_scaled[i-timesteps:i, 0])
    y_train.append(train_scaled[i, 0])
X_train, y_train = np.array(X_train), np.array(y_train)

In [None]:
# Reshaping
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_train

In [None]:
y_train

In [None]:
print(f"Shape of X_train: {X_train.shape}\nShape of y_train: {y_train.shape}")

<a id="3"></a>
## TRAINING LSTM Model

In [None]:
from keras.models import Sequential # Sequential model is appropriate for a plain stack of layers where each layer has exactly one input tensor and one output tensor.
from keras.layers import Dense # Regular densely-connected NN layer.
from keras.layers import Dropout # Applies Dropout to the input.
from keras.layers.recurrent import LSTM # Long Short-Term Memory layer

# Initialize
model = Sequential()

# Firs LSTM layer and Regularization with Dropout
model.add(LSTM(128, input_shape = (X_train.shape[1], 1)))
model.add(Dropout(0.2))
model.add(Dense(1))

# Compile
model.compile(loss="mean_squared_error", optimizer="adam") # optimise the model according to the MSE loss - Optimizer that implements the Adam algorithm.
#Adam optimization is a stochastic gradient descent method that is based on adaptive estimation of first-order and second-order moments.

model.fit(X_train, y_train.reshape(-1,1), epochs=100)

<a id="4"></a>
## Predictions and Visualization

In [None]:
test_data = pd.read_csv('testset.csv')
test_data.head()

In [None]:
real_stock_price = test_data.loc[:, ["Open"]].values
real_stock_price

In [None]:
dataset_total = pd.concat((train_data['Open'], test_data['Open']), axis = 0)
inputs = dataset_total[len(dataset_total) - len(test_data) - timesteps:].values.reshape(-1,1)
inputs = scaler.transform(inputs)  # min max scaler
inputs

In [None]:
X_test = []
for i in range(timesteps, 127):
    X_test.append(inputs[i-timesteps:i, 0])
X_test = np.array(X_test)
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

# get the predictions for the test data
predicted_stock_price = model.predict(X_test)
predicted_stock_price = scaler.inverse_transform(predicted_stock_price)

# Visualising the results
plt.figure(figsize=(12,9))
plt.plot(real_stock_price, color = 'red', label = 'Real Google Stock Price')
plt.plot(predicted_stock_price, color = 'blue',alpha=0.7, label = 'Predicted Google Stock Price')
plt.title('Google Stock Price Prediction')
plt.xlabel('Time(Days)')
plt.ylabel('Google Stock Price')
plt.legend()
plt.show()