In [29]:
#Importing all libraries 

from pandas_datareader import data
import matplotlib.pyplot as plt #visualization of data
import pandas as pd #used to read the datset 
import datetime as dt
import datetime
import urllib.request, json
import os
import numpy as np #perform basic array operations
import tensorflow as tf 
from sklearn.preprocessing import MinMaxScaler #used to scale the data



# API KEY: RWL0I1BNSUZDQFC7

### Importing data by using API

In [30]:
data_source = 'alphavantage'

if data_source == 'alphavantage':
    # Loading data from Alphavantage 

    api_key = 'RWL0I1BNSUZDQFC7'

    ticker = "GOOG"

    # JSON file with all the stock market data
    url_string = "https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol=%s&outputsize=full&apikey=%s"%(ticker,api_key)

    # Saving the data to this file
    file_to_save = 'stock_market_data-%s.csv'%ticker

    # If you haven't already saved data,
    # Go ahead and grab the data from the url
    # And store date, low, high, volume, close, open values to a Pandas DataFrame
    if not os.path.exists(file_to_save):
        with urllib.request.urlopen(url_string) as url:
            data = json.loads(url.read().decode())
            # extract stock market data
            data = data['Time Series (Daily)']
            df = pd.DataFrame(columns=['Date','Low','High','Close','Open'])
            for k,v in data.items():
                date = dt.datetime.strptime(k, '%Y-%m-%d')
                data_row = [date.date(),float(v['3. low']),float(v['2. high']),
                            float(v['4. close']),float(v['1. open'])]
                df.loc[-1,:] = data_row
                df.index = df.index + 1
        print('Data saved to : %s'%file_to_save)        
        df.to_csv(file_to_save)

    # If the data is already there, just load it from the CSV
    else:
        print('File already exists. Loading data from CSV')
        df = pd.read_csv(file_to_save)

else:

    # Loading localy saved data
    df = pd.read_csv("GOOG.csv")
    print('Loaded data from localy saved data')

File already exists. Loading data from CSV


In [31]:
df.head()

Unnamed: 0.1,Unnamed: 0,Date,Low,High,Close,Open
0,1646,2020-10-08,1465.09,1490.0,1485.93,1465.09
1,1645,2020-10-07,1436.0,1468.96,1460.29,1464.29
2,1644,2020-10-06,1448.5901,1486.76,1453.44,1475.58
3,1643,2020-10-05,1464.27,1488.21,1486.02,1466.21
4,1642,2020-10-02,1450.92,1483.2,1458.42,1462.03


In [32]:
df.tail()

Unnamed: 0.1,Unnamed: 0,Date,Low,High,Close,Open
1642,4,2014-04-02,562.19,604.83,567.0,565.106
1643,3,2014-04-01,558.71,568.45,567.16,558.71
1644,2,2014-03-31,556.93,567.0,556.97,566.89
1645,1,2014-03-28,558.67,566.43,559.99,561.2
1646,0,2014-03-27,552.92,568.0,558.46,568.0


### Spliting data into training and testing sets 


The training set will have data from 2014 to 2019 
and the test set will have data from 2020.

In [33]:

date = datetime.datetime(2020,1,1)

df['Date'] = pd.to_datetime(df['Date'])
data_train = df[df['Date'] < date]
data_test = df[df['Date'] >= date]
print(data_train)
data_train.shape



      Unnamed: 0       Date      Low     High    Close      Open
195         1451 2019-12-31  1329.09  1338.00  1337.02  1330.110
196         1450 2019-12-30  1334.02  1353.00  1336.14  1350.000
197         1449 2019-12-27  1349.31  1364.53  1351.89  1362.990
198         1448 2019-12-26  1344.47  1361.33  1360.40  1346.170
199         1447 2019-12-24  1342.78  1350.26  1343.56  1348.500
...          ...        ...      ...      ...      ...       ...
1642           4 2014-04-02   562.19   604.83   567.00   565.106
1643           3 2014-04-01   558.71   568.45   567.16   558.710
1644           2 2014-03-31   556.93   567.00   556.97   566.890
1645           1 2014-03-28   558.67   566.43   559.99   561.200
1646           0 2014-03-27   552.92   568.00   558.46   568.000

[1452 rows x 6 columns]


(1452, 6)

In [34]:
data_train = data_train.drop(['Date'], axis = 1)


In [35]:
#to make the data in the training set lie in the same range of 0 to 1 in order to improve accuracy 
scaler = MinMaxScaler()
data_train = scaler.fit_transform(data_train)
print(data_train)

[[1.00000000e+00 9.72743351e-01 9.68930662e-01 9.72197278e-01
  9.61735927e-01]
 [9.99310820e-01 9.78442048e-01 9.86191406e-01 9.71184177e-01
  9.84632209e-01]
 [9.98621640e-01 9.96116101e-01 9.99459163e-01 9.89316387e-01
  9.99585588e-01]
 ...
 [1.37835975e-03 8.01863347e-02 8.17284678e-02 7.41636159e-02
  8.31587429e-02]
 [6.89179876e-04 8.21976396e-02 8.10725596e-02 7.76403951e-02
  7.66087257e-02]
 [0.00000000e+00 7.55510860e-02 8.28791840e-02 7.58789805e-02
  8.44365143e-02]]


In [36]:
#We will read the data for first 60 days and then predict for the 61st day. 
#Then we will hop ahead by one day and read the next chunk of data for next sixty days.
X_train = []
y_train = []

for i in range(60, data_train.shape[0]):
    X_train.append(data_train[i-60:i])
    y_train.append(data_train[i, 0])
    
X_train, y_train = np.array(X_train), np.array(y_train)

In [37]:
X_train.shape

(1392, 60, 5)

### Building LSTM

In [38]:
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout

In [39]:
regressor = Sequential()

regressor.add(LSTM(units = 60, activation = 'relu', return_sequences = True, input_shape = (X_train.shape[1], 5)))
regressor.add(Dropout(0.2))

regressor.add(LSTM(units = 60, activation = 'relu', return_sequences = True))
regressor.add(Dropout(0.2))

regressor.add(LSTM(units = 80, activation = 'relu', return_sequences = True))
regressor.add(Dropout(0.2))

regressor.add(LSTM(units = 120, activation = 'relu'))
regressor.add(Dropout(0.2))

regressor.add(Dense(units = 1))

In [40]:
regressor.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_4 (LSTM)                (None, 60, 60)            15840     
_________________________________________________________________
dropout_4 (Dropout)          (None, 60, 60)            0         
_________________________________________________________________
lstm_5 (LSTM)                (None, 60, 60)            29040     
_________________________________________________________________
dropout_5 (Dropout)          (None, 60, 60)            0         
_________________________________________________________________
lstm_6 (LSTM)                (None, 60, 80)            45120     
_________________________________________________________________
dropout_6 (Dropout)          (None, 60, 80)            0         
_________________________________________________________________
lstm_7 (LSTM)                (None, 120)              

In [41]:
regressor.compile(optimizer='adam', loss = 'mean_squared_error')
regressor.fit(X_train, y_train, epochs=20, batch_size=32)

Train on 1392 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7fe81c6f83d0>

### Preparing Data set 

In [51]:
data_test.head()


Unnamed: 0.1,Unnamed: 0,Date,Low,High,Close,Open
0,1646,2020-10-08,1465.09,1490.0,1485.93,1465.09
1,1645,2020-10-07,1436.0,1468.96,1460.29,1464.29
2,1644,2020-10-06,1448.5901,1486.76,1453.44,1475.58
3,1643,2020-10-05,1464.27,1488.21,1486.02,1466.21
4,1642,2020-10-02,1450.92,1483.2,1458.42,1462.03
