In [194]:
from IPython.core.debugger import set_trace

from keras.utils.vis_utils import model_to_dot
from IPython.display import SVG
import pydot_ng as pydot


import sys
sys.path.append('../financial_utils/')

import keras
import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn.utils import shuffle

%matplotlib notebook

import matplotlib
import matplotlib.pyplot as plt

import performance as per

# DATASET

### Load and Compute Returns

In [195]:
price_table = pd.read_csv('history_files/SPY.csv')
vol_table = pd.read_csv('history_files/VIX.csv')

# Build Tick Table
tick_table = price_table[['Date','Close','Open','High','Low']]
tick_table.columns = ['Date', 'Tick','Open','High','Low']
# Get Return Table
returns, other_returns = per.tick2ret_pivoted(tick_table)
data_dict = {'Date':tick_table.Date[1:], 
              'ReturnClose': returns[:,0], 'ReturnOpen': other_returns[:,0],
              'ReturnHigh': other_returns[:,1], 'ReturnLow': other_returns[:,2]}
returns_table = pd.DataFrame(data=data_dict,columns=data_dict.keys())
returns_table = returns_table.set_index('Date')

# Drop columns of Vol Table
vol_table = vol_table[['Date', 'Close','Open','High','Low']]
vol_table.columns = ['Date', 'VolClose','VolOpen','VolHigh','VolLow']
vol_table.VolClose = vol_table.VolClose/100
vol_table.VolOpen = vol_table.VolOpen/100
vol_table.VolHigh = vol_table.VolHigh/100
vol_table.VolLow = vol_table.VolLow/100
vol_table = vol_table.set_index('Date')

# InnerJoin
retvol_table = pd.concat([returns_table,vol_table], join='inner', axis=1)


In [196]:
T_x = 10
n_fields = retvol_table.shape[1]
T_y = 2
T_stride = 1

### Generate Dataset

In [197]:
def gen_dataset(T_x, T_y, n_fields, T_stride, retvol_table):
    """
    
    
    return
        x E (m, T_x)
        y E (m, T_y)
    """
    
    m = int(np.floor((returns.shape[0] - T_x) / T_stride))
    
    x = np.nan * np.ones((m-T_y*T_stride, T_x, n_fields))
    y = np.nan * np.ones((m-T_y*T_stride, T_y, n_fields))
    
    for i in range(m-T_y*T_stride):
        for j in range(retvol_table.shape[1]):
            x[i,:,j] = retvol_table.iloc[i*T_stride:i*T_stride+T_x,j].transpose()
            y[i,:,j] = retvol_table.iloc[i*T_stride+T_x:i*T_stride+T_x+T_y,j].transpose()
            
    return x, y, m

In [198]:
def normalize_data(x, params=None):
    """
    
    return 
        x_norm
        x_norm_param = (mean, std)
    """
    if params == None:
        mean = np.mean(x, axis=(0,1)) 
        std = np.std(x, axis=(0,1))
    else:
        mean = params[0]
        std = params[1]
    x_norm_param = (mean, std)
    x_norm = (x - mean)/std
    
    return x_norm, x_norm_param

In [199]:
def denormalize_data(x_norm, x_norm_param):
    """"""
    
    mean = x_norm_param[0]
    std = x_norm_param[1]
    x = x_norm * std + mean
    
    return x

In [200]:
x, y, m = gen_dataset(T_x,T_y, n_fields, T_stride,retvol_table)
x, x_norm_param = normalize_data(x)
y, _ = normalize_data(y, x_norm_param)
print('Training Example: '+str(m))
print('X Shape: '+str(x.shape))
print('Y Shape: '+str(y.shape))

Training Example: 6085
X Shape: (6083, 10, 8)
Y Shape: (6083, 2, 8)


### Train / Val / Test

In [201]:
x,y = shuffle(x,y)

train_end = int(m * 0.7)

val_end = train_end + int(0.15 * m)

x_train = x[0:train_end]
x_val = x[train_end:val_end]
x_test = x[val_end:]

y_train = y[0:train_end]
y_val = y[train_end:val_end]
y_test = y[val_end:]

# Keras Model

### Encoder- Decoder

In [202]:
keras.backend.clear_session()

n_a = 100

encoder_LSTM = keras.layers.LSTM(units = n_a, return_state=True)
decoder_LSTM = keras.layers.LSTM(units = n_a, return_state=True, return_sequences=True)

flatter = keras.layers.Flatten()
dense = keras.layers.Dense(units = 100, activation='tanh')
relu_out = keras.layers.Dense(units = n_fields, activation='tanh')
concatenator = keras.layers.Lambda(lambda x: keras.backend.stack(x, axis=1))

In [203]:
def define_model(T_x, T_y, n_fields, n_a):
    
    x_input = keras.layers.Input(shape=(T_x, n_fields))
    output_seq = list()
    
    _, encoder_h, encoder_c = encoder_LSTM(x_input)  
    
    decoder_input = keras.layers.Input(shape=(1,n_a))
    deco_input = decoder_input
    decoder_h = encoder_h
    decoder_c = encoder_c
    
    for _ in range(T_y):
        decoder_outputs, decoder_h, decoder_c = decoder_LSTM(deco_input, initial_state=[decoder_h, decoder_c])
        decoder_outputs_flat = flatter(decoder_outputs)
        out = dense(decoder_outputs_flat)
        out = relu_out(out)
        
        output_seq.append(out)
        deco_input = decoder_outputs
        
    if T_y != 1:    
        output_seq = concatenator(output_seq)
    model = keras.models.Model(inputs=[x_input, decoder_input], outputs=output_seq)
    return model

In [204]:
model = define_model(T_x, T_y, n_fields, n_a)
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 10, 8)        0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            (None, 1, 100)       0                                            
__________________________________________________________________________________________________
lstm_1 (LSTM)                   [(None, 100), (None, 43600       input_1[0][0]                    
__________________________________________________________________________________________________
lstm_2 (LSTM)                   [(None, 1, 100), (No 80400       input_2[0][0]                    
                                                                 lstm_1[0][1]                     
          

In [205]:
optim = keras.optimizers.Adam(lr=0.001)
model.compile(optimizer=optim, loss='mean_squared_error', metrics=['accuracy'])

In [206]:
epochs = 300

decoder_in = np.zeros((x_train.shape[0],1,n_a))
decoder_val = np.zeros((x_val.shape[0],1,n_a))
if T_y!=1:
    history = model.fit(x=[x_train,decoder_in], y=y_train,
              validation_data = ([x_val, decoder_val], y_val),
              shuffle=True, epochs = epochs)
else:
    history = model.fit(x=[x_train,decoder_in], y=y_train[:,0,:],
              validation_data = ([x_val, decoder_val], y_val[:,0,:]),
              shuffle=True, epochs = epochs)


Train on 4259 samples, validate on 912 samples
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300


Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/300
Epoch 77/300
Epoch 78/300
Epoch 79/300
Epoch 80/300
Epoch 81/300
Epoch 82/300
Epoch 83/300
Epoch 84/300
Epoch 85/300
Epoch 86/300
Epoch 87/300
Epoch 88/300
Epoch 89/300
Epoch 90/300
Epoch 91/300
Epoch 92/300
Epoch 93/300
Epoch 94/300
Epoch 95/300
Epoch 96/300
Epoch 97/300
Epoch 98/300
Epoch 99/300
Epoch 100/300
Epoch 101/300
Epoch 102/300
Epoch 103/300
Epoch 104/300
Epoch 105/300
Epoch 106/300
Epoch 107/300
Epoch 108/300
Epoch 109/300
Epoch 110/300
Epoch 111/300
Epoch 112/300
Epoch 113/300
Epoch 114/300
Epoch 115/300
Epoch 116/300
Epoch 117/300
Epoch 118/300
Epoch 119/300


Epoch 120/300
Epoch 121/300
Epoch 122/300
Epoch 123/300
Epoch 124/300
Epoch 125/300
Epoch 126/300
Epoch 127/300
Epoch 128/300
Epoch 129/300
Epoch 130/300
Epoch 131/300
Epoch 132/300
Epoch 133/300
Epoch 134/300
Epoch 135/300
Epoch 136/300
Epoch 137/300
Epoch 138/300
Epoch 139/300
Epoch 140/300
Epoch 141/300
Epoch 142/300
Epoch 143/300
Epoch 144/300
Epoch 145/300
Epoch 146/300
Epoch 147/300
Epoch 148/300
Epoch 149/300
Epoch 150/300
Epoch 151/300
Epoch 152/300
Epoch 153/300
Epoch 154/300
Epoch 155/300
Epoch 156/300
Epoch 157/300
Epoch 158/300
Epoch 159/300
Epoch 160/300
Epoch 161/300
Epoch 162/300
Epoch 163/300
Epoch 164/300
Epoch 165/300
Epoch 166/300
Epoch 167/300
Epoch 168/300
Epoch 169/300
Epoch 170/300
Epoch 171/300
Epoch 172/300
Epoch 173/300
Epoch 174/300
Epoch 175/300
Epoch 176/300
Epoch 177/300
Epoch 178/300


Epoch 179/300
Epoch 180/300
Epoch 181/300
Epoch 182/300
Epoch 183/300
Epoch 184/300
Epoch 185/300
Epoch 186/300
Epoch 187/300
Epoch 188/300
Epoch 189/300
Epoch 190/300
Epoch 191/300
Epoch 192/300
Epoch 193/300
Epoch 194/300
Epoch 195/300
Epoch 196/300
Epoch 197/300
Epoch 198/300
Epoch 199/300
Epoch 200/300
Epoch 201/300
Epoch 202/300
Epoch 203/300
Epoch 204/300
Epoch 205/300
Epoch 206/300
Epoch 207/300
Epoch 208/300
Epoch 209/300
Epoch 210/300
Epoch 211/300
Epoch 212/300
Epoch 213/300
Epoch 214/300
Epoch 215/300
Epoch 216/300
Epoch 217/300
Epoch 218/300
Epoch 219/300
Epoch 220/300
Epoch 221/300
Epoch 222/300
Epoch 223/300
Epoch 224/300
Epoch 225/300
Epoch 226/300
Epoch 227/300
Epoch 228/300
Epoch 229/300
Epoch 230/300
Epoch 231/300
Epoch 232/300
Epoch 233/300
Epoch 234/300
Epoch 235/300
Epoch 236/300
Epoch 237/300


Epoch 238/300
Epoch 239/300
Epoch 240/300
Epoch 241/300
Epoch 242/300
Epoch 243/300
Epoch 244/300
Epoch 245/300
Epoch 246/300
Epoch 247/300
Epoch 248/300
Epoch 249/300
Epoch 250/300
Epoch 251/300
Epoch 252/300
Epoch 253/300
Epoch 254/300
Epoch 255/300
Epoch 256/300
Epoch 257/300
Epoch 258/300
Epoch 259/300
Epoch 260/300
Epoch 261/300
Epoch 262/300
Epoch 263/300
Epoch 264/300
Epoch 265/300
Epoch 266/300
Epoch 267/300
Epoch 268/300
Epoch 269/300
Epoch 270/300
Epoch 271/300
Epoch 272/300
Epoch 273/300
Epoch 274/300
Epoch 275/300
Epoch 276/300
Epoch 277/300
Epoch 278/300
Epoch 279/300
Epoch 280/300
Epoch 281/300
Epoch 282/300
Epoch 283/300
Epoch 284/300
Epoch 285/300
Epoch 286/300
Epoch 287/300
Epoch 288/300
Epoch 289/300
Epoch 290/300
Epoch 291/300
Epoch 292/300
Epoch 293/300
Epoch 294/300
Epoch 295/300


Epoch 296/300
Epoch 297/300
Epoch 298/300
Epoch 299/300
Epoch 300/300


In [207]:
plt.figure()
plt.plot(history.epoch, history.history["loss"], label='Train Loss')
plt.plot(history.epoch, history.history["val_loss"], label='Vali Loss')
plt.legend()

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x1600f46d898>

In [208]:
if T_y!=1:
    decoder_in = np.zeros((x_train.shape[0],1,n_a))
    evalu_test = model.evaluate(x=[x_train,decoder_in], y=y_train)
    decoder_in = np.zeros((x_val.shape[0],1,n_a))
    evalu_val = model.evaluate(x=[x_val,decoder_in], y=y_val)
else:
    decoder_in = np.zeros((x_train.shape[0],1,n_a))
    evalu_test = model.evaluate(x=[x_train,decoder_in], y=y_train[:,0,:])
    decoder_in = np.zeros((x_val.shape[0],1,n_a))
    evalu_val = model.evaluate(x=[x_val,decoder_in], y=y_val[:,0,:])
    
print('Test Error: '+ str(evalu_test))
print('Val  Error: '+ str(evalu_val))

Test Error: [0.3023905173559848, 0.771894810988495]
Val  Error: [0.7163905564107393, 0.36019736842105265]


### Visualization

In [209]:
data_set = (x_val, y_val)
indexes = np.random.randint(0,data_set[0].shape[0], size=1)
x_in = data_set[0][indexes,:,:]
decoder_in = np.zeros((x_in.shape[0],1,n_a))
y_true = data_set[1][indexes,:,:]
y_pred = model.predict(x=[x_in,decoder_in])
y_pred_denorm = denormalize_data(y_pred, x_norm_param)
print('X:')
print(denormalize_data(x_in,x_norm_param))
print('Y:')
print(denormalize_data(y_true,x_norm_param))
print('LSTM Out:')
print(y_pred)
print('Y pred:')
print(y_pred_denorm)

X:
[[[ 9.25009818e-03 -2.36178896e-03  9.88976063e-03 -4.13309870e-03
    1.38200000e-01  1.57400000e-01  1.58900000e-01  1.37900000e-01]
  [ 4.38771949e-03  5.75276925e-03  8.53159147e-03  2.29134658e-03
    1.35600000e-01  1.36900000e-01  1.38900000e-01  1.30600000e-01]
  [-2.42694879e-03 -5.33928734e-04  1.89302006e-03 -3.34919904e-03
    1.39500000e-01  1.37300000e-01  1.42800000e-01  1.34900000e-01]
  [ 6.81195977e-03 -5.69286676e-03  7.88242005e-03 -7.49322679e-03
    1.31000000e-01  1.52300000e-01  1.52800000e-01  1.30000000e-01]
  [-3.23795673e-03 -4.34931379e-04  7.24961345e-04 -4.97776440e-03
    1.41200000e-01  1.38800000e-01  1.42400000e-01  1.36600000e-01]
  [-9.98786909e-03 -7.66061576e-03  4.84606061e-05 -1.14424291e-02
    1.54200000e-01  1.53900000e-01  1.57200000e-01  1.49300000e-01]
  [ 1.09211811e-02  0.00000000e+00  1.12640334e-02 -1.02848327e-03
    1.40900000e-01  1.56100000e-01  1.59800000e-01  1.40000000e-01]
  [-1.19658997e-02 -6.20094474e-03 -4.16626300e-03 -

In [210]:
x_in_returns = denormalize_data(x_in,x_norm_param)[:,:,0].transpose()
x_in_vol = denormalize_data(x_in,x_norm_param)[:,:,1].transpose()
y_true_returns = denormalize_data(y_true,x_norm_param)[:,:,0].transpose()
y_true_vol = denormalize_data(y_true,x_norm_param)[:,:,1].transpose()
y_pred_returns = denormalize_data(y_pred,x_norm_param)[:,:,0].transpose()
y_pred_vol = denormalize_data(y_pred,x_norm_param)[:,:,1].transpose()
plt.figure()
plt.plot(range(0,T_x), x_in_returns, label='Input')
plt.plot(range(T_x, T_x+T_y), y_true_returns, label='Truth')
plt.plot(range(T_x, T_x+T_y), y_pred_returns, label='LSTM')
plt.title('Returns')
plt.legend()

plt.figure()
plt.plot(range(0,T_x), x_in_vol, label='Input')
plt.plot(range(T_x, T_x+T_y), y_true_vol, label='Truth')
plt.plot(range(T_x, T_x+T_y), y_pred_vol, label='LSTM')
plt.title('Vol')
plt.legend()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x1600f5e2c50>

## SNIPPETS

### Norm - Denorm Check

In [None]:
a, b , m = gen_dataset(T_x,T_y,T_stride,retvol_table)
x, x_norm_param = normalize_data(a)
y, _ = normalize_data(b, x_norm_param)
x = denormalize_data(x, x_norm_param)
y = denormalize_data(y, x_norm_param)

plt.figure()
plt.hist((b-y).flatten(), 100)

In [None]:
class PlotHistory(keras.callbacks.History):
    def on_train_begin(self, logs=None):
        super().on_train_begin(logs)
        self.fig = plt.figure()
        self.axes = plt.subplot(111)
        self.trainloss_line, = self.axes.plot([],[], label='Train Loss')
        self.fig.subplots_adjust(bottom=0.25, top=0.9, left=0.1,right=0.85, wspace=0, hspace=0) 
        self.fig.canvas.draw()
        
    def on_epoch_end(self, epoch, logs=None):
        super().on_epoch_end(epoch, logs)
        self.trainloss_line.set_data(self.epoch, self.history["loss"])
        self.axes.relim()
        self.fig.canvas.draw()
# Update of plot does not properly work