In [1]:
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import numpy as np
from matplotlib import pyplot
from math import sqrt
from pytz import timezone

from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler, Normalizer, RobustScaler
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Activation
from keras.utils.np_utils import to_categorical
from keras import optimizers
import talib

Using TensorFlow backend.


In [2]:
# Fix the random seed to reproducibility
np.random.seed(7)

In [3]:
def get_CU():
    import dovahkiin as dk
    dp = dk.DataParser()
    X = dp.get_data("cu")
    return X

In [4]:
def get_SP500():
    import pandas_datareader as pdr    
    SP500 = pdr.get_data_yahoo('^GSPC')
    return SP500

In [5]:
def get_X_data():
    import dovahkiin as dk
    dp = dk.DataParser()
    X = dp.get_data("cu")
    return X

In [6]:
X = get_X_data()

## Add Features

In [7]:
high = X.high.values
low = X.low.values
close = X.close.values
volume = X.volume.astype(np.float64).values

In [8]:
# Min Max Scaler
X["%K"], X["%D"] = talib.STOCHF(X.high.values, X.low.values, X.close.values)
_, X["Slow %D"] = talib.STOCH(X.high.values, X.low.values, X.close.values)
X["RSI"] = talib.RSI(close)
X["WilliamsR"] = talib.WILLR(X.high.values, X.low.values, X.close.values)
X["-DI"] = talib.MINUS_DI(high, low, close)
X["+DI"] = talib.PLUS_DI(high, low, close)
X["CMO"] = talib.CMO(close)
X["AroonOSC"] = talib.AROONOSC(high, low)
X["ADX"] = talib.ADX(high, low, close)


# Scale the same as raw prices
X["DEMA"] = talib.DEMA(close)
X["EMA"] = talib.EMA(close)
X["kAMA"] = talib.KAMA(close)
X["TEMA"] = talib.TEMA(close)
X["TRIMA"] = talib.TRIMA(close)
X["WMA"] = talib.WMA(close)
X["MA"] = talib.MA(close)
X["MAMA"], _ = talib.MAMA(close)

# No Scaling Needed
X["BOP"] = talib.BOP(X.open.values, high, low, close)

# Scale as percentage
X["ROC"] = talib.ROC(X.close.values)

# No Sure what to do
X["Momentum"] = talib.MOM(X.close.values)
X["CCI"] = talib.CCI(high, low, close)
X["APO"] = talib.APO(close)
X["-DM"] = talib.MINUS_DM(high, low)
X["+DM"] = talib.PLUS_DM(high, low)



X["AroonDown"], X["AroonUp"] = talib.AROON(high, low)

X["ADXR"] = talib.ADXR(high, low, close)
X["ATR"] = talib.ATR(high, low, close)

X["BBTop"], X["BBMid"], X["BBBot"] = talib.BBANDS(close)
X["BBWidth"] = X["BBTop"] - X["BBBot"]
X["%B"] = (X.close - X["BBBot"])/(X["BBTop"] - X["BBBot"])



X["MACD"], X["MACDSig"], X["MACDHist"] = talib.MACD(close)
X["HHV"] = talib.MAX(close)
X["LLV"] = talib.MIN(close)

X["PPO"] = talib.PPO(close)
X["PDI"] = talib.PLUS_DI(high, low, close)
X["PDM"] = talib.PLUS_DM(high, low)
X["PVI"] = (close - X.open.values) / (high - low)
X["ParabolicSAR"] = talib.SAR(high, low)

X["TRIX"] = talib.TRIX(close)
X["UltimateOscillator"] = talib.ULTOSC(high, low, close)


X["ADOscillator"] = talib.ADOSC(high, low, close, volume)
X["MFI"] = talib.MFI(high, low, close, volume)

In [12]:
len(X_test)

166845

In [13]:
len(X_train)

333465

In [14]:
X.shape

(958635, 54)

## Normalization of Data

In [115]:
indicator = "ADX"

In [116]:
X[indicator].min()

2.0192749376481625

In [117]:
X[indicator].max()

99.930548759302638

In [118]:
X[indicator].mean()

28.965332663845128

In [119]:
X[indicator].std()

13.653568412139093

In [51]:
assert(not (X["2012":] == np.inf).any().any())

AssertionError: 

## Divide Data into Train and Test

In [37]:
X_train = X["2012":"2015"]
X_test = X["2016":]
print("Test: {:.2f}%".format(100 * len(X_test)/len(X["2012":])))
print("Train: {:.2f}%".format(100 * len(X_train)/len(X["2012":])))

Test: 33.35%
Train: 66.65%


## AutoEncoder

In [107]:
from keras.layers import Input, Dense
from keras.models import Model

In [108]:
encoding_dim = 16 # 54 dim -> 16 dim
input_layer = Input(shape=(X.shape[1],))

In [109]:
# "encoded" is the encoded representation of the input
encoded = Dense(encoding_dim, activation='relu')(input_layer)
# "decoded" is the lossy reconstruction of the input
decoded = Dense(X.shape[1], activation='sigmoid')(encoded)
# this model maps an input to its reconstruction
autoencoder = Model(input_layer, decoded)

In [110]:
encoded_input = Input(shape=(encoding_dim,))
encoder = Model(input_layer, encoded)

In [111]:
decoder_layer = autoencoder.layers[-1]
decoder = Model(encoded_input, decoder_layer(encoded_input))

In [112]:
autoencoder.compile(optimizer='adadelta', loss='mse')

In [114]:
history = autoencoder.fit(X_train.values, X_train.values)

Epoch 1/1
