In [192]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [193]:
# Import modules and packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime as dt
from datetime import datetime

from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, TensorBoard

%matplotlib inline

In [194]:
# Importing Training Set
dataset_train = pd.read_csv('/content/gdrive/My Drive/datasets/GOOG.csv')

print(dataset_train.head())

# Select features (columns) to be involved intro training and predictions
cols = list(dataset_train)[1:6]

# Extract dates (will be used in visualization)
datelist_train = list(dataset_train['Date'])
datelist_train = [dt.datetime.strptime(date, '%Y-%m-%d').date() for date in datelist_train]

print(datelist_train)

print('Training set shape == {}'.format(dataset_train.shape))
print('All timestamps == {}'.format(len(datelist_train)))
print('Featured selected: {}'.format(cols))

         Date        Open        High  ...       Close   Adj Close   Volume
0  2008-08-22  244.832306  246.515991  ...  244.379013  244.379013  4611500
1  2008-08-25  242.147369  247.572037  ...  240.603165  240.603165  4043600
2  2008-08-26  240.827316  240.827316  ...  236.194687  236.194687  6641100
3  2008-08-27  235.980484  236.528427  ...  233.415100  233.415100  8807000
4  2008-08-28  235.362808  237.335403  ...  236.005386  236.005386  6082000

[5 rows x 7 columns]
[datetime.date(2008, 8, 22), datetime.date(2008, 8, 25), datetime.date(2008, 8, 26), datetime.date(2008, 8, 27), datetime.date(2008, 8, 28), datetime.date(2008, 8, 29), datetime.date(2008, 9, 2), datetime.date(2008, 9, 3), datetime.date(2008, 9, 4), datetime.date(2008, 9, 5), datetime.date(2008, 9, 8), datetime.date(2008, 9, 9), datetime.date(2008, 9, 10), datetime.date(2008, 9, 11), datetime.date(2008, 9, 12), datetime.date(2008, 9, 15), datetime.date(2008, 9, 16), datetime.date(2008, 9, 17), datetime.date(2008, 9, 

In [195]:
dataset_train = dataset_train[cols].astype(str)
for i in cols:
    for j in range(0, len(dataset_train)):
        dataset_train[i][j] = dataset_train[i][j].replace(',', '')

dataset_train = dataset_train.astype(float)

# Using multiple features (predictors)
training_set = dataset_train.values

print('Shape of training set == {}.'.format(training_set.shape))
training_set

Shape of training set == (2998, 5).


array([[ 244.832306,  246.515991,  243.82608 ,  244.379013,  244.379013],
       [ 242.147369,  247.572037,  239.850983,  240.603165,  240.603165],
       [ 240.827316,  240.827316,  234.416351,  236.194687,  236.194687],
       ...,
       [1521.619995, 1523.439941, 1498.420044, 1515.550049, 1515.550049],
       [1515.26001 , 1570.290039, 1503.599976, 1565.719971, 1565.719971],
       [1586.98999 , 1586.98999 , 1554.280029, 1558.420044, 1558.420044]])

In [196]:
# Feature Scaling
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
training_set_scaled = sc.fit_transform(training_set)

training_set_scaled


array([[-1.03988439, -1.04063569, -1.03585083, -1.04020883, -1.04020883],
       [-1.04718024, -1.03779417, -1.04674835, -1.05045548, -1.05045548],
       [-1.05076726, -1.05594234, -1.06164711, -1.06241891, -1.06241891],
       ...,
       [ 2.42956569,  2.39521192,  2.40355288,  2.40940922,  2.40940922],
       [ 2.41228353,  2.52127252,  2.41775339,  2.54555696,  2.54555696],
       [ 2.60719737,  2.56620745,  2.5566901 ,  2.52574691,  2.52574691]])

In [197]:
# Creating a data structure with 90 timestamps and 1 output
X_train = []
y_train = []

n_future = 60   # Number of days we want to predict into the future (lag size)
n_past = 90     # Number of past days we want to use to predict the future

for i in range(n_past, len(training_set_scaled) - n_future +1):
    X_train.append(training_set_scaled[i - n_past:i, 0:dataset_train.shape[1] - 1])
    y_train.append(training_set_scaled[i + n_future - 1:i + n_future, 0])

X_train, y_train = np.array(X_train), np.array(y_train)



print('X_train shape == {}.'.format(X_train.shape))
print('y_train shape == {}.'.format(y_train.shape))

X_train shape == (2849, 90, 4).
y_train shape == (2849, 1).


In [198]:
# Import Libraries and packages from Keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from keras.optimizers import Adam
from keras.layers import Bidirectional

In [199]:
# (window size, features)
# Initializing the Neural Network based on LSTM
model = Sequential()

# Adding 1st LSTM layer
                                                              #(window size, 1)
model.add(Bidirectional(LSTM(units=512, return_sequences=True, kernel_initializer='random_normal', input_shape=(n_past, dataset_train.shape[1]-1))))

# Adding 2nd LSTM layer
model.add(Bidirectional(LSTM(units=128, kernel_initializer='random_normal', return_sequences=True)))

# Adding Dropout
model.add(Dropout(0.25))


# # Adding 2nd LSTM layer
# model.add(LSTM(units=64, kernel_initializer='random_normal', return_sequences=True))

# # # Adding Dropout
# # model.add(Dropout(0.25))


# # Adding 2nd LSTM layer
# model.add(LSTM(units=32, return_sequences=False))

# # # Adding Dropout
# # model.add(Dropout(0.25))


# Output layer
model.add(Dense(units=1, activation='linear'))

# Compiling the Neural Network
model.compile(optimizer = Adam(learning_rate=0.001), loss='mean_squared_error')

In [200]:
%%time
es = EarlyStopping(monitor='val_loss', min_delta=1e-10, patience=10, verbose=1)
rlr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, verbose=1)
mcp = ModelCheckpoint(filepath='weights.h5', monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=True)

tb = TensorBoard('logs')

history = model.fit(X_train, y_train, shuffle=True, epochs=30, callbacks=[es, rlr, mcp, tb], validation_split=0.3, verbose=1, batch_size=32)

Epoch 1/30
Epoch 00001: val_loss improved from inf to 0.25338, saving model to weights.h5
Epoch 2/30
Epoch 00002: val_loss did not improve from 0.25338
Epoch 3/30
Epoch 00003: val_loss did not improve from 0.25338
Epoch 4/30
Epoch 00004: val_loss improved from 0.25338 to 0.23686, saving model to weights.h5
Epoch 5/30
Epoch 00005: val_loss improved from 0.23686 to 0.21072, saving model to weights.h5
Epoch 6/30
Epoch 00006: val_loss improved from 0.21072 to 0.20139, saving model to weights.h5
Epoch 7/30
Epoch 00007: val_loss improved from 0.20139 to 0.14650, saving model to weights.h5
Epoch 8/30
Epoch 00008: val_loss did not improve from 0.14650
Epoch 9/30
Epoch 00009: val_loss did not improve from 0.14650
Epoch 10/30
Epoch 00010: val_loss did not improve from 0.14650
Epoch 11/30
Epoch 00011: val_loss did not improve from 0.14650
Epoch 12/30
Epoch 00012: val_loss did not improve from 0.14650
Epoch 13/30
Epoch 00013: val_loss did not improve from 0.14650
Epoch 14/30
Epoch 00014: val_loss 

In [201]:
# Generate list of sequence of days for predictions
datelist_future = pd.date_range(datelist_train[-1], periods=n_future, freq='1d').tolist()
print(datelist_future)
'''
Remeber, we have datelist_train from begining.
'''
print("___________________________________________________________________________________________")
# Convert Pandas Timestamp to Datetime object (for transformation) --> FUTURE
datelist_future_ = []
for this_timestamp in datelist_future:
    datelist_future_.append(this_timestamp.date())


[Timestamp('2020-07-21 00:00:00', freq='D'), Timestamp('2020-07-22 00:00:00', freq='D'), Timestamp('2020-07-23 00:00:00', freq='D'), Timestamp('2020-07-24 00:00:00', freq='D'), Timestamp('2020-07-25 00:00:00', freq='D'), Timestamp('2020-07-26 00:00:00', freq='D'), Timestamp('2020-07-27 00:00:00', freq='D'), Timestamp('2020-07-28 00:00:00', freq='D'), Timestamp('2020-07-29 00:00:00', freq='D'), Timestamp('2020-07-30 00:00:00', freq='D'), Timestamp('2020-07-31 00:00:00', freq='D'), Timestamp('2020-08-01 00:00:00', freq='D'), Timestamp('2020-08-02 00:00:00', freq='D'), Timestamp('2020-08-03 00:00:00', freq='D'), Timestamp('2020-08-04 00:00:00', freq='D'), Timestamp('2020-08-05 00:00:00', freq='D'), Timestamp('2020-08-06 00:00:00', freq='D'), Timestamp('2020-08-07 00:00:00', freq='D'), Timestamp('2020-08-08 00:00:00', freq='D'), Timestamp('2020-08-09 00:00:00', freq='D'), Timestamp('2020-08-10 00:00:00', freq='D'), Timestamp('2020-08-11 00:00:00', freq='D'), Timestamp('2020-08-12 00:00:00'

In [202]:
sc_predict = StandardScaler()
sc_predict.fit_transform(training_set[:, 0:1])

array([[-1.03988439],
       [-1.04718024],
       [-1.05076726],
       ...,
       [ 2.42956569],
       [ 2.41228353],
       [ 2.60719737]])

In [203]:
# Perform predictions
predictions_future = model.predict(X_train[-n_future:])

predictions_train = model.predict(X_train[n_past:])

In [204]:

# Inverse the predictions to original measurements

# ---> Special function: convert <datetime.date> to <Timestamp>
def datetime_to_timestamp(x):
    '''
        x : a given datetime value (datetime.date)
    '''
    return datetime.strptime(x.strftime('%Y%m%d'), '%Y%m%d')


y_pred_future = sc_predict.inverse_transform(predictions_future)
y_pred_train = sc_predict.inverse_transform(predictions_train)

PREDICTIONS_FUTURE = pd.DataFrame(y_pred_future, columns=['Open']).set_index(pd.Series(datelist_future))
PREDICTION_TRAIN = pd.DataFrame(y_pred_train, columns=['Open']).set_index(pd.Series(datelist_train[2 * n_past + n_future -1:]))

# Convert <datetime.date> to <Timestamp> for PREDCITION_TRAIN
PREDICTION_TRAIN.index = PREDICTION_TRAIN.index.to_series().apply(datetime_to_timestamp)


ValueError: ignored

In [None]:
# Parse training set timestamp for better visualization
dataset_train = pd.DataFrame(dataset_train, columns=cols)
dataset_train.index = datelist_train
dataset_train.index = pd.to_datetime(dataset_train.index)

In [None]:
# Set plot size 
from pylab import rcParams
rcParams['figure.figsize'] = 14, 5

# Plot parameters
START_DATE_FOR_PLOTTING = '2012-06-01'

plt.plot(PREDICTIONS_FUTURE.index, PREDICTIONS_FUTURE['Open'], color='r', label='Predicted Stock Price')
plt.plot(PREDICTION_TRAIN.loc[START_DATE_FOR_PLOTTING:].index, PREDICTION_TRAIN.loc[START_DATE_FOR_PLOTTING:]['Open'], color='orange', label='Training predictions')
plt.plot(dataset_train.loc[START_DATE_FOR_PLOTTING:].index, dataset_train.loc[START_DATE_FOR_PLOTTING:]['Open'], color='b', label='Actual Stock Price')

plt.axvline(x = min(PREDICTIONS_FUTURE.index), color='green', linewidth=2, linestyle='--')

plt.grid(which='major', color='#cccccc', alpha=0.5)

plt.legend(shadow=True)
plt.title('Predcitions and Acutal Stock Prices', family='Arial', fontsize=12)
plt.xlabel('Timeline', family='Arial', fontsize=10)
plt.ylabel('Stock Price Value', family='Arial', fontsize=10)
plt.xticks(rotation=45, fontsize=8)
plt.show()