In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline  

In [2]:
import pandas as pd
import numpy as np
from numpy import array
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense
from sklearn.metrics import mean_squared_error
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt


Using TensorFlow backend.


### Control Variables

In [3]:
np.random.seed(42) 
n_steps = 50

### Support Functions

In [4]:
# split a multivariate sequence into samples
# source: Jason Brownlee - How to Develop LSTM Models for Time Series Forecasting
def split_sequences(sequences, n_steps):
    X, y = list(), list()
    for i in range(len(sequences)):
        # find the end of this pattern
        end_ix = i + n_steps
        # check if we are beyond the dataset
        if end_ix > len(sequences):
            break
        # gather input and output parts of the pattern
        seq_x, seq_y = sequences[i:end_ix, :-1], sequences[end_ix-1, -1]
        X.append(seq_x)
        y.append(seq_y)
    return array(X), array(y)


### Absolute Price LSTM

Create a Bidrectional Stacked LSTM for predicting the next day closing price.

In [5]:
df = pd.read_pickle("sp500.pickle")

# choose the correct Y
df = df.drop('Target+', axis=1)
df = df.drop('Target_', axis=1)
df = df.astype('float64')

# report initial results
print("Shape of pandas train      set: {}".format(df.loc["2010-01-01":"2017-01-01"].shape))
print("Shape of pandas validation set: {}".format(df.loc["2017-01-01":"2018-01-01"].shape))
print("Shape of pandas test       set: {}\n".format(df.loc["2018-01-01":"2019-01-01"].shape))

# find split coordinates
train_size = df.loc["2010-01-01":"2017-01-01"].shape[0]
validation_size = df.loc["2017-01-01":"2018-01-01"].shape[0]
test_size = df.loc["2018-01-01":"2019-01-01"].shape[0]

print("Size of train              set: {}".format(train_size))
print("Size of validation         set: {}".format(validation_size))
print("Size of test               set: {}\n".format(test_size))

# scale entire dataset
scaler = MinMaxScaler(feature_range=(0, 1))
dataset = scaler.fit_transform(df)

# define sets
train = dataset[0:train_size]
validation = dataset[train_size : train_size + validation_size]
test  = dataset[train_size + validation_size: ]

print("Shape of numpy train       set: {}".format(train.shape))
print("Shape of numpy validation  set: {}".format(validation.shape))
print("Shape of numpy test        set: {}".format(test.shape))

# print(scaler.inverse_transform(test)[0] - 
# df.loc["2018-01-01":"2019-01-01"].values[0])

Shape of pandas train      set: (1777, 22)
Shape of pandas validation set: (253, 22)
Shape of pandas test       set: (254, 22)

Size of train              set: 1777
Size of validation         set: 253
Size of test               set: 254

Shape of numpy train       set: (1777, 22)
Shape of numpy validation  set: (253, 22)
Shape of numpy test        set: (254, 22)


In [6]:
# How many time steps back? - CRITICAL
print("Number of Steps back:", n_steps)

# convert into X and y
X_train, y_train = split_sequences(train, n_steps)
X_validation, y_validation = split_sequences(validation, n_steps)
X_test,  y_test = split_sequences(test, n_steps)

validation_data=(X_validation, y_validation)

# define number of features into the model - CRITICAL
n_features = X_train.shape[2]
print("Number of Features:", n_features)

# verify shapes of model inputs
print("Train shape       :", X_train.shape, y_train.shape)
print("Validation shape  :", X_validation.shape, y_validation.shape)
print("Test shape        :", X_test.shape, y_test.shape)


Number of Steps back: 50
Number of Features: 21
Train shape       : (1728, 50, 21) (1728,)
Validation shape  : (204, 50, 21) (204,)
Test shape        : (205, 50, 21) (205,)


In [None]:
model = Sequential()
model.add(LSTM(200, activation='relu', input_shape=(n_steps, n_features)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse', metrics=['accuracy'])
# fit model
%time history=model.fit(X_train, y_train, \
                        validation_data=validation_data, \
                        epochs=150, batch_size=10, verbose=2)


Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.
Train on 1728 samples, validate on 204 samples
Epoch 1/150
 - 6s - loss: 0.0017 - acc: 5.7870e-04 - val_loss: 2.0373e-05 - val_acc: 0.0000e+00
Epoch 2/150
 - 5s - loss: 2.6461e-05 - acc: 5.7870e-04 - val_loss: 8.1877e-05 - val_acc: 0.0000e+00
Epoch 3/150
 - 5s - loss: 2.1789e-05 - acc: 5.7870e-04 - val_loss: 1.2667e-04 - val_acc: 0.0000e+00
Epoch 4/150
 - 5s - loss: 1.7474e-05 - acc: 5.7870e-04 - val_loss: 2.5373e-05 - val_acc: 0.0000e+00
Epoch 5/150
 - 5s - loss: 1.6303e-05 - acc: 5.7870e-04 - val_loss: 1.2120e-04 - val_acc: 0.0000e+00
Epoch 6/150
 - 5s - loss: 1.9863e-05 - acc: 5.7870e-04 - val_loss: 8.7599e-05 - val_acc: 0.0000e+00
Epoch 7/150
 - 5s - loss: 1.1944e-05 - acc: 5.7870e-04 - val_loss: 2.9335e-05 - val_acc: 0.0000e+00
Epoch 8/150
 - 5s - loss: 1.1916e-05 - acc: 5.7870e-04 - val_loss: 1.5601e-05 - val_acc: 0.0000e+00
Epoch 9/150
 - 5s - loss: 1.6821e-05 

Epoch 76/150
 - 5s - loss: 2.3819e-06 - acc: 5.7870e-04 - val_loss: 3.2196e-05 - val_acc: 0.0000e+00
Epoch 77/150
 - 5s - loss: 1.4894e-06 - acc: 5.7870e-04 - val_loss: 5.0657e-06 - val_acc: 0.0000e+00
Epoch 78/150
 - 5s - loss: 2.1042e-06 - acc: 5.7870e-04 - val_loss: 1.6680e-05 - val_acc: 0.0000e+00
Epoch 79/150
 - 5s - loss: 3.1158e-06 - acc: 5.7870e-04 - val_loss: 1.8383e-05 - val_acc: 0.0000e+00
Epoch 80/150
 - 5s - loss: 2.5641e-06 - acc: 5.7870e-04 - val_loss: 6.6885e-06 - val_acc: 0.0000e+00
Epoch 81/150
 - 5s - loss: 1.9169e-06 - acc: 5.7870e-04 - val_loss: 2.2317e-05 - val_acc: 0.0000e+00
Epoch 82/150
 - 5s - loss: 2.6649e-06 - acc: 5.7870e-04 - val_loss: 2.6427e-05 - val_acc: 0.0000e+00
Epoch 83/150
 - 5s - loss: 4.2216e-06 - acc: 5.7870e-04 - val_loss: 8.4411e-06 - val_acc: 0.0000e+00
Epoch 84/150
 - 5s - loss: 3.8814e-06 - acc: 5.7870e-04 - val_loss: 1.5253e-05 - val_acc: 0.0000e+00
Epoch 85/150
 - 5s - loss: 2.4415e-06 - acc: 5.7870e-04 - val_loss: 1.1243e-05 - val_acc: 0

In [None]:
yhat = model.predict(X_test)
a = np.zeros((205,21))
print("yhat shape is {}".format(yhat.shape))
pred = np.concatenate((a, yhat),  axis=1)
pred = scaler.inverse_transform(pred)
pred = pred[:,-1]
print("pred shape is {}".format(pred.shape))
pred[0]

In [None]:
print("yhat shape is {}".format(y_test.shape))
new_y = y_test.reshape(y_test.shape[0],1)
print("new_y shape is {}".format(new_y.shape))
y_pred = np.concatenate((a, new_y),  axis=1)
y_pred = scaler.inverse_transform(y_pred)
y_pred = y_pred[:,-1]
y_pred[0]

In [None]:
testScore = mean_squared_error(pred, y_pred)
print('Test Score: %.2f RMSE' % (testScore))

In [None]:
# make predictions
# trainPredict = model.predict(X_train)
# testPredict = model.predict(X_test)
# print(trainPredict.shape)

# invert predictions
# trainPredict = scaler.inverse_transform(trainPredict)
# trainY = scaler.inverse_transform([y_train])
# testPredict = scaler.inverse_transform(testPredict)
# testY = scaler.inverse_transform([y_test])

# calculate root mean squared error
# trainScore = math.sqrt(mean_squared_error(trainY[0], trainPredict[:,0]))
# print('Train Score: %.2f RMSE' % (trainScore))
# testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:,0]))
# print('Test Score: %.2f RMSE' % (testScore))

In [None]:
# summarize history for loss
loss = history.history['loss']
val_loss = history.history['val_loss']

# loss = [min(x, 1000) for x in loss]
# val_loss = [min(x, 1000) for x in val_loss]

plt.plot(loss[1:])
plt.plot(val_loss[1:])
plt.title('model loss')
plt.ylabel('loss')200
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()

In [None]:
acc = history.history['acc']
val_acc = history.history['val_acc']

# acc = [min(x, 1000) for x in acc]
# val_acc = [min(x, 1000) for x in val_acc]

plt.plot(y_pred[1:])
plt.plot(pred)
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()


In [None]:
# 