## Import Libraries


In [15]:
import numpy as np 
import pandas as pd 
import os

import datetime as dt
from datetime import datetime

import matplotlib.pyplot as plt
plt.style.use('ggplot')

import numpy as np

from sklearn.preprocessing import MinMaxScaler

### Create the Stacked LSTM model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from keras.layers import Dropout

## Loading Stock Data

The dataset is taken from https://in.finance.yahoo.com/ . Any stock dataset you want to add in the website can be taken from the website given above then traing the model on that dataset

In [None]:
data = './data/AMZN.csv'

In [None]:
df = pd.read_csv(data)
df.head()

In [None]:
df['Date'] = pd.to_datetime(df.Date,format='%Y-%m-%d')
df.index = df['Date']
plt.figure(figsize=(20,8))
plt.plot(df['Close'], label='Close Price')
plt.xlabel('Year')
plt.ylabel('Close Price') 
plt.title('Close Price')
plt.legend() 

In [None]:
features = ["Date", "Close"]
all_data = df[features]
all_data.index = all_data.Date
all_data.drop('Date', axis=1, inplace=True)

In [None]:
all_data.head()
all_data.shape

## Loading sentiment data and augmenting training data

In [11]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import load_model
import pickle

In [2]:
tokenizer = Tokenizer()

with open("sentiment_analysis_models/tokenizer.pkl", "rb") as handle:
    tokenizer = pickle.load(handle)

In [20]:
# function that uses the model to predict score
def get_score(text):
    model = load_model("sentiment_analysis_models/model.h5")
    # Tokenize text
    tokenized_text = pad_sequences(tokenizer.texts_to_sequences([text]), maxlen=300)
    # Predict
    score = model.predict([tokenized_text])[0]
    # Decode sentiment
    return float(score)
    
# print(predict("idk what to do"))

In [17]:
sec_data=pd.read_csv('output/out.csv')
sec_data.head()

Unnamed: 0,dates,data
0,2022-04-28,of contentsunited statessecurities and exchang...
1,2022-04-14,form false inc xbrlishares xbrlishares table o...
2,2022-04-13,false united states securities and exchange co...
3,2022-03-09,of contentsunited statessecurities and exchang...
4,2022-02-03,of contentsunited statessecurities and exchang...


In [19]:
sec_data["sentiment"] = sec_data["data"].apply(lambda x: get_score(x))
sec_data.head()


NameError: name 'get_score' is not defined

## Pre-processing Stock Data

In [None]:
dataset = all_data.values
train = dataset[2000:4500,:]
valid = dataset[4500:,:]

In [None]:
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(dataset)

In [None]:
x_train, y_train = [], []
for i in range(90,len(train)):
    x_train.append(scaled_data[i-90:i,0])
    y_train.append(scaled_data[i,0])
x_train, y_train = np.array(x_train), np.array(y_train)
#we take the 90 days dataset and predict the 91st day
#Code for making csv for it will be reflected during the presentation part where
#We created a front end and backend to use the model we made here

In [None]:
x_train = np.reshape(x_train, (x_train.shape[0],x_train.shape[1],1))

In [None]:
inputs = all_data[len(all_data) - len(valid)-90:].values
inputs = inputs.reshape(-1,1)
inputs  = scaler.transform(inputs)
inputs.shape
X_test = []
for i in range(90,inputs.shape[0]):
    X_test.append(inputs[i-90:i,0])
X_test = np.array(X_test)

## Making the model


Initializing the LSTM model and 2nd, 3rd and 4th LSTM layer each with a Dropout Layer. The layers contain 50 neurons and with a Dropout rate of 20%, twenty percent of 50 neurons will be ignored randomly during each iteration

Finally, an output layer is added with 1 as an output dimension (as we are predicting the close price)


We use Stochastic Gradient Descent algorithm to compile the model and use mean squared error ad loss function

In [None]:
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(x_train.shape[1],1)))
model.add(Dropout(rate = 0.3))

model.add(LSTM(units=50, return_sequences = True))
model.add(Dropout(rate = 0.3))

model.add(LSTM(units=50, return_sequences = True))
model.add(Dropout(rate = 0.3))

model.add(LSTM(units=50, return_sequences = False))
model.add(Dropout(rate = 0.3))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
#we use standard adam's optimizer

## Training the model

In [None]:
model.fit(x_train, y_train, epochs=100, batch_size=128, verbose=1)

In [None]:
X_test = np.reshape(X_test, (X_test.shape[0],X_test.shape[1],1))
preds = model.predict(X_test)
preds = scaler.inverse_transform(preds)

In [None]:
print(preds.shape)

In [None]:
print(valid.shape)

In [None]:
print(valid[-1],preds[-1])

## Results

In [None]:
rms=np.sqrt(np.mean(np.power((valid-preds),2)))
rms
# it is the standard deviation of the residuals (prediction errors)

In [None]:
train = all_data[2000:4500]
valid = all_data[4500:]
valid['Predictions'] = preds
plt.figure(figsize=(20,8))
plt.plot(train['Close'])
plt.plot(valid['Close'], color = 'blue', label = 'Real Price')
plt.plot(valid['Predictions'], color = 'red', label = 'Predicted Price')
plt.title('HDFCBANK price prediction')
plt.legend()
plt.show()

In [None]:
from keras.models import load_model
model.save('AMZN.h5')  # creates a HDF5 file 