In [1]:
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from math import sqrt
from pytz import timezone
import statsmodels.api as sm

In [2]:
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler, Normalizer, StandardScaler
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Activation
from keras.layers import TimeDistributed
from keras.utils.np_utils import to_categorical
from keras import optimizers

Using TensorFlow backend.


In [3]:
np.random.seed(7)

In [4]:
def get_X_data():
    import dovahkiin as dk
    dp = dk.DataParser()
    X = dp.get_data("cu")
    return X

In [5]:
X = get_X_data()

In [6]:
X = pd.DataFrame((X["open"] + X["high"] + X["low"] + X["close"])/4, columns=["AvgPrice"])

In [7]:
X_prime = X.asfreq("10T").dropna()
# X_prime = X.asfreq("5D").dropna()

In [8]:
prediction_length = 8

In [9]:
# res = sm.tsa.seasonal_decompose(X_prime, model="multiplicative", freq=30, two_sided=True)
res = sm.tsa.seasonal_decompose(X_prime, model="multiplicative", freq=(prediction_length - 1)*2, two_sided=True)

In [10]:
# res_one_side = sm.tsa.seasonal_decompose(X_prime, model="multiplicative", freq=30, two_sided=False)

In [11]:
# res = sm.tsa.seasonal_decompose(X_prime, model="additive", freq=30)

In [12]:
trend = res.trend.dropna()
noise = res.resid.dropna()
seasonal = res.seasonal.dropna()

In [13]:
# res.plot()

In [14]:
trend_std_scaler = StandardScaler()

In [15]:
scaled_trend = trend_std_scaler.fit_transform(trend)
scaled_trend = pd.DataFrame(scaled_trend, index = trend.index)

In [16]:
# plt.cla(); plt.clf()
# res.trend.plot()
# scaled_trend.plot()
# plt.show()

In [17]:
len(trend)

97512

In [18]:
batch_size = 5
time_series_step = 32
features = 1

In [22]:
model = Sequential()

# first layer of Market2Vec
model.add(Dense(32, 
                activation="softmax",
                batch_input_shape=(batch_size, time_series_step, 1)
               )
         )

# MultiLayer of LSTM to learn abstraction if any
model.add(
    LSTM(128,
         # batch_input_shape=(batch_size, time_series_step, features), 
         stateful=True,
         return_sequences=True,
         dropout=0.2
        )
)


model.add(
    LSTM(128,
         # batch_input_shape=(batch_size, time_series_step, features), 
         stateful=True, 
         return_sequences=True,
         dropout=0.2
        ))

model.add(
    LSTM(128, 
         stateful=True, 
         return_sequences=True,
         dropout=0.2
        ))

model.add(
    LSTM(128, 
         stateful=True, 
         return_sequences=True,
         dropout=0.2
        ))

model.add(
    LSTM(128, 
         stateful=True, 
         return_sequences=True,
         dropout=0.2
        ))

model.add(
    TimeDistributed(Dense(8, activation='softmax'))
)

model.add(
    TimeDistributed(Dense(1, activation='softmax'))
)
    
# # FullyConnected Network to final output
# model.add(Dense(32, activation="softmax"))
# model.add(Dense(32, activation="softmax"))
# model.add(Dense(8, activation="linear"))

In [23]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (5, 32, 32)               64        
_________________________________________________________________
lstm_7 (LSTM)                (5, 32, 128)              82432     
_________________________________________________________________
lstm_8 (LSTM)                (5, 32, 128)              131584    
_________________________________________________________________
lstm_9 (LSTM)                (5, 32, 128)              131584    
_________________________________________________________________
lstm_10 (LSTM)               (5, 32, 128)              131584    
_________________________________________________________________
lstm_11 (LSTM)               (5, 32, 128)              131584    
_________________________________________________________________
time_distributed_1 (TimeDist (5, 32, 8)                1032      
__________

In [24]:
sgd = optimizers.SGD(lr=1e-8, decay=1e-9, momentum=0.9, nesterov=True, clipnorm=0.5)
adam = optimizers.Adam(lr=1e-4, clipnorm=1)
rmsprop = optimizers.rmsprop(lr=1e-4, decay=1e-8, clipnorm=1)

In [25]:
model.compile(loss="mse",
              optimizer=rmsprop, 
              # metrics=["accuracy"]
             )

In [26]:
def timeseries_to_supervised(raw_time_series, lag):
    p = {}
    for i in range(1, lag+1):
        p["{}".format(i)] = raw_time_series.shift(i).fillna(0) - raw_time_series
    
    supervised_data = pd.Panel(p)
    return supervised_data

def non_shuffling_train_test_split(X, y, test_size=0.2):
    i = int((1 - test_size) * X.shape[0]) + 1
    X_train, X_test = np.split(X, [i])
    y_train, y_test = np.split(y, [i])
    return X_train, X_test, y_train, y_test

def create_supervised_X(raw_time_series, lag):
    supervised_X = timeseries_to_supervised(raw_time_series, lag)
    swaped_supervised_X = supervised_X.swapaxes(0, 1)
    return swaped_supervised_X

In [27]:
supervised_X = create_supervised_X(scaled_trend, lag=time_series_step)
supervised_X = supervised_X.fillna(0)
supervised_X_values = supervised_X.values

In [28]:
supervised_X.shape

(97512, 32, 1)

In [29]:
def create_future_y(time_series, future_steps):
    p = pd.DataFrame(index=time_series.index)
    for i in range(1, future_steps+1):
        p["{}".format(i)] = time_series.shift(-i).fillna(0) - time_series
    #p["0"] = raw_time_series
    # supervised_data = pd.DataFrame(p, index=time_series.index)
    return p

In [30]:
y = create_future_y(scaled_trend, time_series_step)

In [31]:
y = np.expand_dims(y.values, axis=2)

In [32]:
y.shape

(97512, 32, 1)

In [33]:
supervised_X = supervised_X.values[512:]

In [34]:
y = y[512:]

In [35]:
supervised_X.shape

(97000, 32, 1)

In [36]:
# Normalization
(y == np.inf).any().any() or (y == -np.inf).any().any()

False

In [37]:
(supervised_X == np.inf).any().any().any() or (supervised_X == -np.inf).any().any().any()

False

In [38]:
model.fit(
        supervised_X, 
        y,
        epochs=1,
        batch_size=batch_size,
        verbose=1,
        shuffle=False,
        validation_split=0.2
)

Train on 77600 samples, validate on 19400 samples
Epoch 1/1
 6765/77600 [=>............................] - ETA: 3169s - loss: 0.9966

KeyboardInterrupt: 

In [43]:
model.layers[1].get_weights()

[array([[ 0.03182383,  0.0423538 ,  0.03777497, ...,  0.1353517 ,
          0.04213285, -0.08594719],
        [-0.17222689, -0.01056621,  0.00684876, ...,  0.0246493 ,
          0.12771715,  0.10741999],
        [ 0.1673896 , -0.01082403,  0.09938719, ...,  0.07971887,
          0.00076845, -0.12089114],
        ..., 
        [-0.0391319 , -0.01348677, -0.01615433, ..., -0.04331077,
          0.18793973, -0.1669798 ],
        [-0.13813792,  0.00665532, -0.17250216, ...,  0.07282177,
         -0.02989632,  0.16240856],
        [-0.12657964, -0.057271  ,  0.01415862, ...,  0.16965061,
          0.176622  ,  0.1293215 ]], dtype=float32),
 array([[ 0.02992599, -0.03977711, -0.03217505, ...,  0.05426534,
         -0.0665641 , -0.19620439],
        [-0.13557677, -0.05605917, -0.05813349, ..., -0.06484164,
         -0.06552805, -0.00121082],
        [-0.02360117,  0.08252946, -0.05244586, ..., -0.13341483,
         -0.08708744,  0.14862762],
        ..., 
        [-0.19779205, -0.02528335,  0

In [36]:
model.save("10T_LSTM_30TimeSteps.h5")

ImportError: `save_model` requires h5py.