In [1]:
import numpy as np
import pandas as pd
import scipy
from datetime import datetime as dt
import datetime as dt
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler

import tensorflow as tf
from tensorflow import keras
from keras import backend as K
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout, LSTM

Using TensorFlow backend.


In [2]:
sns.set_style("whitegrid")
colors = sns.color_palette("deep", 8)
sns.set_palette(colors)

In [3]:
df_orig = pd.read_json("../datasets/df.json", orient="split")
df = df_orig.copy()
df = df[df.eth_close.isnull().sum():]
df.fillna(0, inplace=True)
df.set_index("date", drop=True, inplace=True)
print(df.shape)
df.head(2)

(1134, 15)


Unnamed: 0_level_0,eth_open,eth_high,eth_low,eth_close,eth_volumefrom,eth_volumeto,btc,xrp,eos,ltc,xlm,xmr,vixcls,twexb,effr
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2015-08-06,0.6747,3.0,0.6747,3.0,123.93,371.79,277.89,0.008013,0.0,4.1,0.0,0.75,13.77,118.1833,0.14
2015-08-07,3.0,3.0,0.15,1.2,2119.43,1438.16,258.6,0.009,0.0,3.77,0.0,0.75,13.39,118.1833,0.14


In [4]:
X = df.eth_close
train, test = X[:-30], X[-30:]
history = [x for x in train]
predictions = list()
for i in range(len(test)):
    predictions.append(history[-1])
    history.append(test[i])

rmse_baseline = np.sqrt(mean_squared_error(test, predictions))
print(rmse_baseline)

16.01032625110015


In [6]:
def to_supervised(data, lag=1):
    df = pd.DataFrame(data)
    columns = [df.shift(i) for i in range(1, lag+1)]
    columns.append(df)
    df = pd.concat(columns, axis=1)
    df.fillna(0, inplace=True)
    return df

def difference(data, interval=1):
    diff = list()
    for i in range(interval, len(data)):
        value = data[i] - data[i-interval]
        diff.append(value)
    return pd.Series(diff)

def inverse_diff(history, yhat, interval=1):
    return yhat + history[-interval]

def scale(train, test):
    scaler = MinMaxScaler(feature_range=(-1,1))
    scaler = scaler.fit(train)
    train = train.reshape(train.shape[0], train.shape[1])
    train_scaled = scaler.transform(train)
    test = test.reshape(test.shape[0], test.shape[1])
    test_scaled = scaler.transform(test)
    return scaler, train_scaled, test_scaled

def invert_scale(scaler, X, value):
    new_row = [x for x in X] + [value]
    array = np.array(new_row)
    array = array.reshape(1, len(array))
    inverted = scaler.inverse_transform(array)
    return inverted[0, -1]

def fit_lstm(train, batch_size, nb_epoch, neurons):
    X, y = train[:, 0:-1], train[:, -1]
    X = X.reshape(X.shape[0], 1, X.shape[1])
    model = Sequential()
    model.add(LSTM(neurons, batch_input_shape=(batch_size,
                                               X.shape[1],
                                               X.shape[2]), stateful=True))
    model.add(Dense(1))
    model.compile(loss="mean_squared_error", optimizer="adam")
    for i in range(nb_epoch):
        model.fit(X, y, epochs=1, batch_size=batch_size, verbose=0, shuffle=False)
        model.reset_states()
    return model

def forecast_lstm(model, batch_size, X):
    X = X.reshape(1,1,len(X))
    yhat = model.predict(X, batch_size=batch_size)
    return yhat[0,0]

## TS

In [8]:
# set ts
ts = df.eth_close

# difference for stationarity
diff = difference(ts.values, 1)

# transform to supervised learning format
supervised = to_supervised(diff, 1)
supervised_values = supervised.values

# set train/test
train, test = supervised_values[:-30], supervised_values[-30:]

# scale
#scaler, train_scaled, test_scaled = scale(train, test)

In [10]:
test

array([[ -5.72,   2.93],
       [  2.93,   5.56],
       [  5.56,  30.77],
       [ 30.77, -22.72],
       [-22.72,   4.77],
       [  4.77, -28.81],
       [-28.81,  10.32],
       [ 10.32, -10.76],
       [-10.76,   5.46],
       [  5.46,   5.54],
       [  5.54,  -3.81],
       [ -3.81,  -3.26],
       [ -3.26,  13.72],
       [ 13.72,   7.53],
       [  7.53,  -6.88],
       [ -6.88,  -4.52],
       [ -4.52,  -2.49],
       [ -2.49,  13.7 ],
       [ 13.7 ,  -0.34],
       [ -0.34,  -6.05],
       [ -6.05,  -3.74],
       [ -3.74, -56.96],
       [-56.96,   1.25],
       [  1.25, -14.38],
       [-14.38, -18.37],
       [-18.37,  -0.78],
       [ -0.78,   1.15],
       [  1.15, -11.99],
       [-11.99,  -2.12],
       [ -2.12,  23.  ]])

In [9]:
train

array([[  0.  ,  -1.8 ],
       [ -1.8 ,   0.  ],
       [  0.  ,   0.  ],
       ...,
       [-22.56,   0.  ],
       [  0.  , -24.98],
       [-24.98,  -5.72]])

In [7]:
import time
start = time.time()

# fit lstm
lstm_model = fit_lstm(train_scaled, 1, 1000, 4)
print("runtime: {} seconds".format(time.time() - start))

runtime: 1449.486298084259 seconds


In [8]:
# get predictions
preds = list()
for i in range(len(test_scaled)):
    X, y = test_scaled[i, 0:1], test_scaled[i, -1]
    yhat = forecast_lstm(lstm_model, 1, X)
    yhat = invert_scale(scaler, X, yhat)
    yhat = inverse_diff(ts.values, yhat, len(test_scaled)+1-i)
    preds.append(yhat)
    expected = ts.values[len(train)+i+1]
    #print("predicted: {}, expected: {}".format(yhat, expected))

In [9]:
rmse_lstm = np.sqrt(mean_squared_error(ts[-30:], preds))
print(rmse_lstm)

15.892882837317583


## Exog