In [2]:
import os
import numpy as np
import tensorflow as tf
from tensorflow import keras
import pandas as pd
import seaborn as sns
from pylab import rcParams
import matplotlib.pyplot as plt
from matplotlib import rc
from sklearn.model_selection import train_test_split
from pandas.plotting import register_matplotlib_converters
from sklearn.preprocessing import RobustScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error


%matplotlib inline
%config InlineBackend.figure_format='retina'

register_matplotlib_converters()
sns.set(style='whitegrid', palette='muted', font_scale=1.5)

rcParams['figure.figsize'] = 22, 10

RANDOM_SEED = 42

np.random.seed(RANDOM_SEED)
tf.random.set_seed(RANDOM_SEED)

In [3]:
df = pd.read_csv("AAPL.csv", parse_dates=['Date'], index_col="Date")
print('Number of rows and columns:', df.shape)
df.head(5)

Number of rows and columns: (1447, 6)


Unnamed: 0_level_0,Open,High,Low,Close,Adj_Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2015-02-01,27.8475,27.860001,26.8375,27.3325,24.941502,212818400
2015-05-01,27.0725,27.1625,26.352501,26.5625,24.238857,257142000
2015-06-01,26.635,26.8575,26.157499,26.565001,24.241146,263188400
2015-07-01,26.799999,27.049999,26.674999,26.9375,24.58106,160423600
2015-08-01,27.307501,28.0375,27.174999,27.9725,25.525522,237458000


## Preprocessing
We’ll use the last 10% of the data for testing:

In [4]:
train_size = int(len(df) * 0.8)
test_size = len(df) - train_size
train, test = df.iloc[0:train_size], df.iloc[train_size:len(df)]
test[:5]

Unnamed: 0_level_0,Open,High,Low,Close,Adj_Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2019-08-08,50.049999,50.8825,49.8475,50.857498,50.167248,108038000
2019-09-08,50.325001,50.689999,49.822498,50.247501,49.753849,98478800
2019-12-08,49.904999,50.512501,49.787498,50.119999,49.627602,89927600
2019-08-13,50.255001,53.035,50.119999,52.2425,51.729252,188874000
2019-08-14,50.790001,51.610001,50.647499,50.6875,50.189526,146189600


And now we’ll scale some of the features we’re using for our modeling:

In [5]:
# scaler = MinMaxScaler()
scaler = MinMaxScaler()
train = scaler.fit_transform(train)
test = scaler.transform(test)
print(train.shape)

(1157, 6)


In [6]:
train

array([[0.1519392 , 0.13942175, 0.12743211, 0.13398718, 0.10571957,
        0.27738933],
       [0.12991902, 0.11974612, 0.11360562, 0.11225572, 0.08585242,
        0.35084852],
       [0.11748828, 0.11114245, 0.10804644, 0.1123263 , 0.08591714,
        0.36086945],
       ...,
       [0.76708342, 0.75444282, 0.73487278, 0.72673387, 0.74861731,
        0.27200895],
       [0.75514985, 0.75035267, 0.74527826, 0.75255764, 0.77413758,
        0.16217252],
       [0.74875695, 0.76086034, 0.74371041, 0.76695115, 0.78836204,
        0.14586166]])

In [5]:
def create_dataset(X, time_steps=1):
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        v = X[i:(i + time_steps)]
        Xs.append(v)        
        ys.append(X[i, 0])
    return np.array(Xs), np.array(ys)

In [6]:
time_steps = 50

# reshape to [samples, time_steps, n_features]

X_train, y_train = create_dataset(train, time_steps)

X_test, y_test = create_dataset(test, time_steps)

print(X_train.shape, y_train.shape)

(1107, 50, 6) (1107,)


In [7]:
print(y_train.shape)

(1107,)


In [8]:
print(X_test.shape)

(240, 50, 6)


In [9]:
model = keras.Sequential()

#Adding the first LSTM layer and some Dropout regularisation
model.add(keras.layers.LSTM(units=128, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(keras.layers.Dropout(rate=0.2))
# Adding the output layer
model.add(keras.layers.Dense(units=6))

# Compiling the RNN
model.compile(loss='mean_squared_error', optimizer='adam')

In [10]:
# Fitting the RNN to the Training set
if(not os.path.exists('stockPrediction.h5')):
    model.fit(X_train, y_train, epochs = 100, batch_size = 6956)
    model.save('stockPrediction.h5')

In [11]:
# model = load_model('stockPrediction.h5')
print(X_test.shape)
predictions = model.predict(X_test)


(240, 50, 6)


In [15]:
predictions = scaler.inverse_transform(predictions)
predictions[0]

array([ 3.5254322e+01,  2.3870678e+01,  2.0091444e+01,  2.1256184e+01,
        2.8323803e+01, -8.6710950e+06], dtype=float32)

In [16]:
predictions[189][0]

44.166473

In [17]:
predictions

array([[ 3.5254322e+01,  2.3870678e+01,  2.0091444e+01,  2.1256184e+01,
         2.8323803e+01, -8.6710950e+06],
       [ 3.5312969e+01,  2.3828344e+01,  2.0064779e+01,  2.1222664e+01,
         2.8363373e+01, -8.5740010e+06],
       [ 3.5424683e+01,  2.3799185e+01,  2.0038120e+01,  2.1167679e+01,
         2.8431965e+01, -8.3233490e+06],
       ...,
       [ 4.5750835e+01,  1.9523027e+01,  1.2111596e+01,  1.6226913e+01,
         3.3876671e+01, -1.8517786e+07],
       [ 4.5880173e+01,  1.9407221e+01,  1.1937804e+01,  1.6132452e+01,
         3.3990116e+01, -1.5131732e+07],
       [ 4.6004444e+01,  1.9494944e+01,  1.1952395e+01,  1.6086285e+01,
         3.4116093e+01, -1.4585190e+07]], dtype=float32)