In [1]:
import sys
import warnings

if not sys.warnoptions:
    warnings.simplefilter('ignore')

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from datetime import datetime
from datetime import timedelta
from tqdm import tqdm
sns.set()
tf.compat.v1.random.set_random_seed(1234)
import datetime as dt
from datetime import datetime

from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, TensorBoard

In [2]:
df = pd.read_csv('AAPL.csv')
df.head()



Unnamed: 0,symbol,date,close,high,low,open,volume,adjClose,adjHigh,adjLow,adjOpen,adjVolume,divCash,splitFactor
0,GOOG,2016-02-12 00:00:00+00:00,682.4,693.75,678.6,690.26,2141373,682.4,693.75,678.6,690.26,2141373,0.0,1.0
1,GOOG,2016-02-16 00:00:00+00:00,691.0,698.0,685.05,692.98,2520021,691.0,698.0,685.05,692.98,2520021,0.0,1.0
2,GOOG,2016-02-17 00:00:00+00:00,708.4,709.75,691.38,699.0,2492634,708.4,709.75,691.38,699.0,2492634,0.0,1.0
3,GOOG,2016-02-18 00:00:00+00:00,697.35,712.35,696.03,710.0,1883248,697.35,712.35,696.03,710.0,1883248,0.0,1.0
4,GOOG,2016-02-19 00:00:00+00:00,700.91,703.0805,694.05,695.03,1589281,700.91,703.0805,694.05,695.03,1589281,0.0,1.0


In [3]:
cols = list(df)[2:7]
datelist_train = list(df['date'])
datelist_train = [dt.datetime.strptime(date, '%Y-%m-%d %H:%M:%S+00:00')
                  .date() for date in datelist_train]

print('Training set shape == {}'.format(df.shape))
print('All timestamps == {}'.format(len(datelist_train)))
print('Featured selected: {}'.format(cols))

Training set shape == (1257, 14)
All timestamps == 1257
Featured selected: ['close', 'high', 'low', 'open', 'volume']


In [35]:
df = df[cols].astype(str)
for i in cols:
    for j in range(0, len(df)):
        df[i][j] = df[i][j].replace(',', '')

df = df.astype(float)

# Using multiple features (predictors)
training_set = df.as_matrix()

print('Shape of training set == {}.'.format(training_set.shape))
training_set

Shape of training set == (1257, 5).


array([[6.8357000e+02, 7.0399000e+02, 6.8015000e+02, 7.0387000e+02,
        5.1057250e+06],
       [6.8274000e+02, 6.8403000e+02, 6.6306000e+02, 6.6785000e+02,
        4.2473550e+06],
       [6.7811000e+02, 6.9990000e+02, 6.6877000e+02, 6.7232000e+02,
        3.6089140e+06],
       ...,
       [1.8357400e+03, 1.8572800e+03, 1.8102000e+03, 1.8461700e+03,
        1.6125520e+06],
       [1.9013500e+03, 1.9223918e+03, 1.8509300e+03, 1.8535700e+03,
        1.6021820e+06],
       [1.9275100e+03, 1.9557600e+03, 1.9144900e+03, 1.9225600e+03,
        2.1038640e+06]])

In [36]:
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
training_set_scaled = sc.fit_transform(training_set)

sc_predict = StandardScaler()
sc_predict.fit_transform(training_set[:, 0:1])

array([[-1.51768105],
       [-1.52064207],
       [-1.53715955],
       ...,
       [ 2.59267512],
       [ 2.82673821],
       [ 2.92006377]])

In [38]:
X_train = []
y_train = []

n_future = 60   # Number of days we want top predict into the future
n_past = 90     # Number of past days we want to use to predict the future

for i in range(n_past, len(training_set_scaled) - n_future +1):
    X_train.append(training_set_scaled[i - n_past:i, 0:df.shape[1] - 1])
    y_train.append(training_set_scaled[i + n_future - 1:i + n_future, 0])

X_train, y_train = np.array(X_train), np.array(y_train)

print('X_train shape == {}.'.format(X_train.shape))
print('y_train shape == {}.'.format(y_train.shape))

X_train shape == (1108, 90, 4).
y_train shape == (1108, 1).


In [39]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from keras.optimizers import Adam

In [41]:
model = Sequential()

# Adding 1st LSTM layer
model.add(LSTM(units=64, return_sequences=True, input_shape=(n_past, df.shape[1]-1)))

# Adding 2nd LSTM layer
model.add(LSTM(units=10, return_sequences=False))

# Adding Dropout
model.add(Dropout(0.25))

# Output layer
model.add(Dense(units=1, activation='linear'))

# Compiling the Neural Network
model.compile(optimizer = Adam(learning_rate=0.01), loss='mean_squared_error')

In [44]:
%%time
es = EarlyStopping(monitor='val_loss', min_delta=1e-10, patience=10, verbose=1)
rlr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, verbose=1)
mcp = ModelCheckpoint(filepath='weights.h5', monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=True)

tb = TensorBoard('logs')

history = model.fit(X_train, y_train, shuffle=True, epochs=30, callbacks=[es, rlr, mcp, tb], validation_split=0.2, verbose=1, batch_size=256)

Epoch 1/30
Epoch 00001: val_loss improved from inf to 1.16924, saving model to weights.h5
Epoch 2/30
Epoch 00002: val_loss improved from 1.16924 to 0.96965, saving model to weights.h5
Epoch 3/30
Epoch 00003: val_loss did not improve from 0.96965
Epoch 4/30
Epoch 00004: val_loss improved from 0.96965 to 0.95924, saving model to weights.h5
Epoch 5/30
Epoch 00005: val_loss improved from 0.95924 to 0.95788, saving model to weights.h5
Epoch 6/30
Epoch 00006: val_loss improved from 0.95788 to 0.86567, saving model to weights.h5
Epoch 7/30
Epoch 00007: val_loss did not improve from 0.86567
Epoch 8/30
Epoch 00008: val_loss did not improve from 0.86567
Epoch 9/30
Epoch 00009: val_loss did not improve from 0.86567
Epoch 10/30
Epoch 00010: val_loss did not improve from 0.86567
Epoch 11/30
Epoch 00011: val_loss did not improve from 0.86567
Epoch 12/30
Epoch 00012: val_loss did not improve from 0.86567
Epoch 13/30
Epoch 00013: val_loss did not improve from 0.86567
Epoch 14/30
Epoch 00014: val_loss 

In [51]:
# Generate list of sequence of days for predictions
datelist_future = pd.date_range(datelist_train[-1], periods=n_future, freq='1d').tolist()

'''
Remeber, we have datelist_train from begining.
'''

# Convert Pandas Timestamp to Datetime object (for transformation) --> FUTURE
datelist_future_ = []
for this_timestamp in datelist_future:
    datelist_future_.append(this_timestamp.date())

In [52]:
# Perform predictions
predictions_future = model.predict(X_train[-n_future:])

predictions_train = model.predict(X_train[n_past:])

In [53]:

# ---> Special function: convert <datetime.date> to <Timestamp>
def datetime_to_timestamp(x):
    '''
        x : a given datetime value (datetime.date)
    '''
    return datetime.strptime(x.strftime('%Y%m%d'), '%Y%m%d')


y_pred_future = sc_predict.inverse_transform(predictions_future)
y_pred_train = sc_predict.inverse_transform(predictions_train)

PREDICTIONS_FUTURE = pd.DataFrame(y_pred_future, columns=['open']).set_index(pd.Series(datelist_future))
PREDICTION_TRAIN = pd.DataFrame(y_pred_train, columns=['open']).set_index(pd.Series(datelist_train[2 * n_past + n_future -1:]))

# Convert <datetime.date> to <Timestamp> for PREDCITION_TRAIN
PREDICTION_TRAIN.index = PREDICTION_TRAIN.index.to_series().apply(datetime_to_timestamp)

PREDICTION_TRAIN.head(3)

Unnamed: 0,open
2017-01-18,805.858521
2017-01-19,807.364929
2017-01-20,809.761475


In [57]:
# Set plot size 
from pylab import rcParams
rcParams['figure.figsize'] = 14, 5

# Plot parameters
START_DATE_FOR_PLOTTING = '2017-01-18'

plt.plot(PREDICTIONS_FUTURE.index, PREDICTIONS_FUTURE['open'], color='r', label='Predicted Stock Price')
plt.plot(PREDICTION_TRAIN.loc[START_DATE_FOR_PLOTTING:].index, PREDICTION_TRAIN.loc[START_DATE_FOR_PLOTTING:]['open'], color='orange', label='Training predictions')
plt.plot(df.loc[START_DATE_FOR_PLOTTING:].index, df.loc[START_DATE_FOR_PLOTTING:]['open'], color='b', label='Actual Stock Price')

plt.axvline(x = min(PREDICTIONS_FUTURE.index), color='green', linewidth=2, linestyle='--')

plt.grid(which='major', color='#cccccc', alpha=0.5)

plt.legend(shadow=True)
plt.title('Predcitions and Acutal Stock Prices', family='Arial', fontsize=12)
plt.xlabel('Timeline', family='Arial', fontsize=10)
plt.ylabel('Stock Price Value', family='Arial', fontsize=10)
plt.xticks(rotation=45, fontsize=8)
plt.show()

ValueError: view limit minimum -36682.0 is less than 1 and is an invalid Matplotlib date value. This often happens if you pass a non-datetime value to an axis that has datetime units

Error in callback <function install_repl_displayhook.<locals>.post_execute at 0x000001D377E1D678> (for post_execute):


ValueError: view limit minimum -36682.0 is less than 1 and is an invalid Matplotlib date value. This often happens if you pass a non-datetime value to an axis that has datetime units

ValueError: view limit minimum -36682.0 is less than 1 and is an invalid Matplotlib date value. This often happens if you pass a non-datetime value to an axis that has datetime units

<Figure size 1008x360 with 1 Axes>