In [None]:
%matplotlib inline
import mpld3
mpld3.enable_notebook()

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from pandas import read_csv
import math
from keras.preprocessing.sequence import TimeseriesGenerator
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

In [None]:
dataset_DJI = read_csv('../input/stocks-timeseries-data/DJI_20150919-20180918.csv', usecols=[4]).values.astype('float32')
dataset_AAPL = read_csv('../input/stocks-timeseries-data/AAPL_20150919-20180918.csv', usecols=[4]).values.astype('float32')
dataset_AMAZN = read_csv('../input/stocks-timeseries-data/AMZN_20150919-20180918.csv', usecols=[4]).values.astype('float32')

In [None]:
def delta_time_series(data):
    return data[1:]- data[:-1]

In [None]:
dataset_delta_DJI = delta_time_series(dataset_DJI)
dataset_delta_AAPL = delta_time_series(dataset_AAPL)
dataset_delta_AMAZN = delta_time_series(dataset_AMAZN)

In [None]:
def plot_delta(data):
    plt.plot(delta_time_series(data))
    plt.ylabel('close')
    plt.show()

In [None]:
plot_delta(dataset_delta_AMAZN)

In [None]:
def get_y_from_generator(gen):
    '''
    Get all targets y from a TimeseriesGenerator instance.
    '''
    y = None
    for i in range(len(gen)):
        batch_y = gen[i][1]
        if y is None:
            y = batch_y
        else:
            y = np.append(y, batch_y)
    y = y.reshape((-1,1))
    print(y.shape)
    return y

In [None]:
def binary_accuracy(a, b):
    '''
    Helper function to compute the match score of two 
    binary numpy arrays.
    '''
    assert len(a) == len(b)
    return (a == b).sum() / len(a)    

## Single time series as input¶
### Normalize datasets

In [None]:
dataset = dataset_delta_DJI
# normalize the dataset
scaler = MinMaxScaler(feature_range=(0, 1))
dataset = scaler.fit_transform(dataset)
# split into train and test sets
train_size = int(len(dataset) * 0.67)
test_size = len(dataset) - train_size
train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]

In [None]:
train_size, test_size

In [None]:
look_back = 7


In [None]:
train_data_gen = TimeseriesGenerator(train, train,
                               length=look_back, sampling_rate=1, batch_size=3)

In [None]:
test_data_gen = TimeseriesGenerator(test, test,
                               length=look_back, sampling_rate=1,stride=1,
                               batch_size=1)

In [None]:
model = Sequential()
model.add(LSTM(4, input_shape=(look_back, 1)))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')

In [None]:
from keras.utils import plot_model
plot_model(model, to_file='model.png', show_shapes=True)
# from IPython.display import Image
# Image(filename='model.png')

In [None]:
history = model.fit_generator(train_data_gen, epochs=100).history

In [None]:
model.evaluate_generator(test_data_gen)

In [None]:
train_predict = model.predict_generator(train_data_gen)
train_predict.shape

In [None]:
test_predict = model.predict_generator(test_data_gen)
test_predict.shape

In [None]:
# invert predictions, scale values back to real index/price range.
train_predict = scaler.inverse_transform(train_predict)
test_predict = scaler.inverse_transform(test_predict)

In [None]:
train_actual_y = []
test_actual_y = []

for i in range(len(train_data_gen)):
    batch_y = train_data_gen[i][1]
    train_actual_y.append(batch_y)

for i in range(len(test_data_gen)):
    batch_y = test_data_gen[i][1]
    test_actual_y.append(batch_y)

train_actual_y = np.asarray(train_actual_y).reshape((-1,1))
test_actual_y = np.asarray(test_actual_y).reshape((-1,1))

In [None]:
train_actual_y.shape, test_actual_y.shape 

In [None]:
train_predict.shape, test_predict.shape

In [None]:
train_ape = (abs(train_predict - train_actual_y)*100)/train_actual_y
test_ape = (abs(test_predict - test_actual_y)*100)/test_actual_y

In [None]:
np.mean(train_ape), np.mean(test_ape)


In [None]:
# calculate root mean squared error
trainScore = math.sqrt(mean_squared_error(train_actual_y[:,0], train_predict[:,0]))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(test_actual_y[:, 0], test_predict[:,0]))
print('Test Score: %.2f RMSE' % (testScore))

In [None]:
dataset = scaler.inverse_transform(dataset)
dataset.shape

In [None]:
trainPredictPlot = np.empty_like(dataset)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[look_back:len(train_predict)+look_back, :] = train_predict
# Delta + previous close
trainPredictPlot = trainPredictPlot + dataset_AMAZN[1:]

In [None]:
testPredictPlot = np.empty_like(dataset)
testPredictPlot[:, :] = np.nan
testPredictPlot[len(trainPredict)+(look_back*2):len(dataset), :] = test_predict

# Delta + previous close
testPredictPlot = testPredictPlot + dataset_DJI[1:]

In [None]:
plt.plot(dataset, label='Actual')
plt.plot(train_predict, label='Train Preds')
plt.plot(test_predict, label='Test Preds')
plt.legend()
plt.show()

In [None]:
# plot baseline and predictions
plt.plot(dataset + dataset_AMAZN[1:])
plt.plot(trainPredictPlot, label='Train')
# plt.plot(testPredictPlot, label='Test')
plt.legend()
plt.show()

In [None]:

# plot baseline and predictions
plt.plot(dataset + dataset_DJI[:-1])
plt.plot(trainPredictPlot)
plt.plot(testPredictPlot)
plt.show()