In [1]:
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import numpy as np
from matplotlib import pyplot
from math import sqrt
from pytz import timezone

from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler, Normalizer, RobustScaler
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Activation
from keras.utils.np_utils import to_categorical
from keras import optimizers
import talib

Using TensorFlow backend.


In [2]:
# Fix the random seed to reproducibility
np.random.seed(7)

In [16]:
def get_CU():
    import dovahkiin as dk
    dp = dk.DataParser()
    X = dp.get_data("cu")
    return X

In [17]:
def get_SP500():
    import pandas_datareader as pdr
    SP500 = pdr.get_data_yahoo('^GSPC')
    return SP500

In [3]:
def get_X_data():
    import dovahkiin as dk
    dp = dk.DataParser()
    X = dp.get_data("cu")
    return X

In [166]:
X = get_CU()

## Add Features

In [178]:
high = X.high.values
low = X.low.values
close = X.close.values
volume = X.volume.astype(np.float64).values

In [182]:
# Please note, some volumes can be 0
# X.replace(0.0, np.NaN).replace(0, np.NaN).fillna(method="ffill").fillna(method="backfill");
# X[(X["volume"] == 0.0)]

In [183]:
X["%K"], X["%D"] = talib.STOCHF(X.high.values, X.low.values, X.close.values)
_, X["Slow %D"] = talib.STOCH(X.high.values, X.low.values, X.close.values)
X["ROC"] = talib.ROC(X.close.values)
X["Momentum"] = talib.MOM(X.close.values)
X["WilliamsR"] = talib.WILLR(X.high.values, X.low.values, X.close.values)

X["CCI"] = talib.CCI(high, low, close)
X["RSI"] = talib.RSI(close)
X["APO"] = talib.APO(close)
X["AroonDown"], X["AroonUp"] = talib.AROON(high, low)
X["AroonOSC"] = talib.AROONOSC(high, low)
X["MAMA"], _ = talib.MAMA(close)
X["ADX"] = talib.ADX(high, low, close)
X["ADXR"] = talib.ADXR(high, low, close)
X["ATR"] = talib.ATR(high, low, close)
X["BOP"] = talib.BOP(X.open.values, high, low, close)
X["BBTop"], X["BBMid"], X["BBBot"] = talib.BBANDS(close)
X["BBWidth"] = X["BBTop"] - X["BBBot"]
X["%B"] = (X.close - X["BBBot"])/(X["BBTop"] - X["BBBot"])
X["CMO"] = talib.CMO(close)
X["-DI"] = talib.MINUS_DI(high, low, close)
X["-DM"] = talib.MINUS_DM(high, low)
X["+DI"] = talib.PLUS_DI(high, low, close)
X["+DM"] = talib.PLUS_DM(high, low)
X["DEMA"] = talib.DEMA(close)
X["EMA"] = talib.EMA(close)
X["kAMA"] = talib.KAMA(close)
X["MA"] = talib.MA(close)
X["MACD"], X["MACDSig"], X["MACDHist"] = talib.MACD(close)
X["HHV"] = talib.MAX(close)
X["LLV"] = talib.MIN(close)

X["PPO"] = talib.PPO(close)
X["PDI"] = talib.PLUS_DI(high, low, close)
X["PDM"] = talib.PLUS_DM(high, low)
X["PVI"] = (close - X.open.values) / (high - low)
X["ParabolicSAR"] = talib.SAR(high, low)
X["TEMA"] = talib.TEMA(close)
X["TRIMA"] = talib.TRIMA(close)
X["TRIX"] = talib.TRIX(close)
X["UltimateOscillator"] = talib.ULTOSC(high, low, close)
X["WMA"] = talib.WMA(close)

X["ADOscillator"] = talib.ADOSC(high, low, close, volume)
X["MFI"] = talib.MFI(high, low, close, volume)

In [69]:
X_train = X["2012":"2015"]

In [70]:
X_test = X["2016":]

In [71]:
len(X_test)

166845

In [72]:
len(X_train)

333465

In [74]:
X.shape

(958635, 54)

In [164]:
(X == 0.0).any()

open                  False
high                  False
low                   False
close                 False
volume                 True
openint               False
%K                     True
%D                     True
Slow %D               False
ROC                    True
Momentum               True
WilliamsR              True
ADOscillator          False
CCI                    True
RSI                   False
APO                    True
AroonDown              True
AroonUp                True
AroonOSC               True
MAMA                  False
ADX                   False
ADXR                  False
ATR                   False
BOP                    True
BBTop                 False
BBMid                 False
BBBot                 False
BBWidth                True
%B                    False
CMO                   False
-DI                   False
-DM                   False
+DI                   False
+DM                   False
DEMA                  False
EMA                 

## Normalization of Data

In [162]:
inds = pd.isnull(pd.DataFrame(X["volume"].pct_change()["2012":])).any(1).nonzero()[0]

In [163]:
inds

array([ 90226,  90227,  90228,  90229,  90230,  90231,  90232,  90233,
        90234,  90235,  90236,  90237,  90238,  90239,  90240,  90241,
        90242,  90243,  90244,  90245,  90246,  90247,  90248,  90249,
        90250,  90251,  90252,  90253,  90254,  90255,  90256,  90257,
        90258,  90259,  90260,  90261,  90262,  90263,  90264,  90265,
        90266,  90267,  90268,  90269,  90270,  90271,  90272,  90273,
        90274,  90275,  90276,  90277,  90278,  90279,  90280,  90281,
        90282,  90283,  90284,  90285,  90286,  90287,  90288,  90289,
        90290,  90291,  90292,  90293,  90294,  90295,  90296,  90297,
        90298,  90299,  90300,  90301,  90302,  90303,  90304,  90305,
        90306,  90307,  90308,  90309,  90310,  90311,  90312,  90313,
        90314,  90315,  90316,  90317,  90318,  90319,  90320,  90321,
        90322,  90323,  90324,  90325,  90326,  90327,  90328,  90329,
        90330,  90331,  90332,  90333,  90334,  90335,  90336,  90337,
      

In [149]:
X["2012":].ix[90226]

open                   52029.791000
high                   52029.791000
low                    52029.791000
close                  52029.791000
volume                     0.000000
openint               590382.000000
%K                        66.673021
%D                        66.907163
Slow %D                   63.008070
ROC                        0.152119
Momentum                  79.026800
WilliamsR                -31.495520
ADOscillator             558.854380
CCI                      166.743287
RSI                       69.935734
APO                       15.274384
AroonDown                 42.857143
AroonUp                   85.714286
AroonOSC                  42.857143
MAMA                   51955.044477
ADX                       21.335942
ADXR                      19.019230
ATR                       25.652380
BOP                        0.000000
BBTop                  52083.046868
BBMid                  51993.244900
BBBot                  51903.442932
BBWidth                  179

In [147]:
X["volume"]["2012":].ix[90226]

0

In [134]:
X["volume"].pct_change()["2012":].isnull()

date_time
2012-01-04 09:00:00+08:00    False
2012-01-04 09:01:00+08:00    False
2012-01-04 09:02:00+08:00    False
2012-01-04 09:03:00+08:00    False
2012-01-04 09:04:00+08:00    False
2012-01-04 09:05:00+08:00    False
2012-01-04 09:06:00+08:00    False
2012-01-04 09:07:00+08:00    False
2012-01-04 09:08:00+08:00    False
2012-01-04 09:09:00+08:00    False
2012-01-04 09:10:00+08:00    False
2012-01-04 09:11:00+08:00    False
2012-01-04 09:12:00+08:00    False
2012-01-04 09:13:00+08:00    False
2012-01-04 09:14:00+08:00    False
2012-01-04 09:15:00+08:00    False
2012-01-04 09:16:00+08:00    False
2012-01-04 09:17:00+08:00    False
2012-01-04 09:18:00+08:00    False
2012-01-04 09:19:00+08:00    False
2012-01-04 09:20:00+08:00    False
2012-01-04 09:21:00+08:00    False
2012-01-04 09:22:00+08:00    False
2012-01-04 09:23:00+08:00    False
2012-01-04 09:24:00+08:00    False
2012-01-04 09:25:00+08:00    False
2012-01-04 09:26:00+08:00    False
2012-01-04 09:27:00+08:00    False
2012-01-04

In [130]:
X.pct_change()["2012":].isnull().any()

open                  False
high                  False
low                   False
close                 False
volume                 True
openint               False
%K                     True
%D                    False
Slow %D               False
ROC                    True
Momentum               True
WilliamsR              True
ADOscillator          False
CCI                    True
RSI                   False
APO                   False
AroonDown              True
AroonUp                True
AroonOSC               True
MAMA                  False
ADX                   False
ADXR                  False
ATR                   False
BOP                    True
BBTop                 False
BBMid                 False
BBBot                 False
BBWidth                True
%B                     True
CMO                   False
-DI                   False
-DM                   False
+DI                   False
+DM                   False
DEMA                  False
EMA                 

## AutoEncoder

In [107]:
from keras.layers import Input, Dense
from keras.models import Model

In [108]:
encoding_dim = 16 # 54 dim -> 16 dim
input_layer = Input(shape=(X.shape[1],))

In [109]:
# "encoded" is the encoded representation of the input
encoded = Dense(encoding_dim, activation='relu')(input_layer)
# "decoded" is the lossy reconstruction of the input
decoded = Dense(X.shape[1], activation='sigmoid')(encoded)
# this model maps an input to its reconstruction
autoencoder = Model(input_layer, decoded)

In [110]:
encoded_input = Input(shape=(encoding_dim,))
encoder = Model(input_layer, encoded)

In [111]:
decoder_layer = autoencoder.layers[-1]
decoder = Model(encoded_input, decoder_layer(encoded_input))

In [112]:
autoencoder.compile(optimizer='adadelta', loss='mse')

In [114]:
history = autoencoder.fit(X_train.values, X_train.values)

Epoch 1/1
