In [1]:
import os
import math
import time
import numpy as np
import pandas as pd
import tensorflow as tf
import statsmodels as sm
import statsmodels.api as smapi
import matplotlib.pyplot as plt

from numpy.linalg import eig

from matplotlib.animation import FuncAnimation

from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler

In [2]:
btc = np.array(pd.read_csv('csvdata/bitfinex_BTCUSD5m.csv', header=0).Close)
dsh = np.array(pd.read_csv('csvdata/bitfinex_DSHUSD5m.csv', header=0).Close)
eth = np.array(pd.read_csv('csvdata/bitfinex_ETHUSD5m.csv', header=0).Close)

prices = pd.DataFrame(np.column_stack((btc, dsh, eth)))
returns = prices.pct_change().dropna()

In [3]:
Y = np.copy(returns.values)
(row, col) = np.shape(Y)

In [4]:
for j in range(0,col):
    mean = np.mean(Y[:,j])
    std = np.std(Y[:,j])
    
    for i in range(0, row):
        Y[i,j] = (Y[i,j] - mean) / std

In [5]:
corr = np.dot(Y.T, Y) / (row - 1)

In [6]:
eigval, eigv = np.linalg.eig(corr)
sortedindex = eigval.argsort()[::-1]
eigv = eigv[:, sortedindex]

In [7]:
cumeigval = np.cumsum(eigval)
plt.plot(cumeigval)

[<matplotlib.lines.Line2D at 0x7f416e099750>]

In [8]:
eigv = eigv[:, :1]
eigvec = np.zeros(np.shape(eigv))

for i in range(0, np.shape(eigv)[0]):
    for j in range(0, np.shape(eigv)[1]):
        eigvec[i,j] = eigv[i,j].real / np.std(returns.values[:,j])
        
for i in range(0,np.shape(eigvec)[1]):
    eigvec[:,i] = eigvec[:,i] / np.sum(np.abs(eigvec[:,j]))

eigret = np.dot(returns.values, eigvec)

print eigvec


[[ 0.34921267]
 [ 0.31101515]
 [ 0.33977218]]


In [9]:
def fit_OU(ts):
    X = smapi.add_constant(ts[:-1])
    model = smapi.OLS(ts[1:], X)
    results = model.fit()
    b = results.params[1]
    a = results.params[0]
    e = np.std(results.resid)
    k = -math.log(b) * 252
    var = e**2 / (2*k) * (1 - math.exp(-2 * k))
    num = -a * np.sqrt(1-b**2)
    den = (1-b) * np.sqrt(var)
    return num / den, 0. 

In [10]:
%matplotlib

S = []
for s in returns:
    # regress stock returns on eigen portfolio returns
    results = smapi.OLS(returns[s].values, smapi.add_constant(eigret)).fit()
    b = results.params
    cumresid = np.cumsum(results.resid)
    score, hf = fit_OU(cumresid) # fit OU    
    S.append(cumresid)
    
    #if abs(score) > 2 and sm.tsa.stattools.adfuller(cumresid)[1] < 0.01:
    #    plt.plot(np.cumsum(results.resid))
    #    break

Using matplotlib backend: TkAgg


In [11]:
from util.zig_zag import zig_zag

from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
from keras.layers import Bidirectional
from keras.models import Sequential

from scipy.linalg import hankel
from sklearn.metrics import mean_squared_error

Using TensorFlow backend.


In [12]:
#plt.plot(np.cumsum(results.resid))

In [13]:
#window_size = 1
#data = zig_zag(S[1].T, 0.008)

#T = data['time']
#Z = data['zigzag']

#trainsize = int(len(Z) * 0.88)
#testsize  = int(len(Z) - trainsize)

#testing_set  = Z.T[-testsize - 1:]
#training_set = Z.T[:trainsize + 1]

#X_train = training_set[0:trainsize]
#Y_train = training_set[1:trainsize + 1]

#X_test = testing_set[0:testsize]
#Y_test = testing_set[1:testsize + 1]

#X_train = np.reshape(X_train, (trainsize, 1, 1))


In [14]:
## Initialising the RNN
#regressor = Sequential()

## Adding the input layer and the LSTM layer
#regressor.add(LSTM(units = 6, activation = 'sigmoid', input_shape = (None, 1)))

## Adding the output layer
#regressor.add(Dense(units = 1))

## Compiling the RNN
#regressor.compile(optimizer = 'adam', loss = 'mean_squared_error')

## Fitting the RNN to the Training set
#regressor.fit(x = X_train, y = Y_train, batch_size = 1, epochs = 40)

In [15]:
#inputs = X_test
#inputs = np.reshape(inputs, (testsize, 1, 1))

#outputs = regressor.predict(inputs, batch_size=32, verbose=1)

In [16]:
#%matplotlib

## Visualising the results
##plt.plot(X_test,  color = 'red',  label = 'X_test')
#plt.plot(Y_test,  color = 'blue', label = 'Y_test')

#plt.plot(outputs, color = 'green', label = 'Predicted')

#plt.title('Filtered ZIGZAG prediction')
#plt.xlabel('Time')
#plt.ylabel('Filtered ZIGZAG')
#plt.legend()
#plt.show()

In [26]:
#########################################
window_size = 3
dropout_value = 0.2
activation_function = 'linear' 
loss_function = 'mse' 
optimizer = 'adam'
#########################################

data = zig_zag(S[1].T, 0.008)

T = data['time']
Z = data['zigzag']

# Feature Scaling
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler(feature_range=(0,1))
training_set = sc.fit_transform(Z)

#R = []
#for i in range(len(T)-1):
#    R.append(T[i+1] - T[i])
    
#plt.plot(R)

trainsize = int(len(Z) * 0.88)
testsize  = int(len(Z) - trainsize)

testing_set  = Z.T[-testsize - 1:]
training_set = Z.T[:trainsize + 1]

X_train = training_set[0:trainsize]
Y_train = training_set[window_size:trainsize + 1]

X_test = testing_set[0:testsize]
Y_test = testing_set[window_size:testsize + 1]

X_train = hankel(X_train[0:window_size], X_train[window_size-1:]).T

P = X_train
P = P.T

X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))

X_test = hankel(X_test[0:window_size], X_test[window_size-1:]).T
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))



#X_train = np.reshape(X_train, (52, window_size, 1))
#X_test = np.reshape(X_test, (testsize/window_size, window_size, 1))

In [28]:
%matplotlib
plt.plot(Z)

#from mpl_toolkits.mplot3d import Axes3D

#fig = plt.figure()
#ax = fig.add_subplot(111, projection='3d')
#ax.scatter(P[0],P[1],P[2])

#ax.set_xlabel('X')
#ax.set_ylabel('Y')
#ax.set_zlabel('Z')


Using matplotlib backend: TkAgg


[<matplotlib.lines.Line2D at 0x7f4145eb7a50>]

In [22]:
model = Sequential()

model.add(Bidirectional(LSTM(window_size, return_sequences=True), input_shape=(window_size, X_train.shape[-1])))
model.add(Dropout(dropout_value))

model.add(Bidirectional(LSTM(window_size, return_sequences=True)))
model.add(Dropout(dropout_value))

model.add(Bidirectional(LSTM(window_size, return_sequences=False)))

model.add(Dense(units=1))

model.add(Activation(activation_function))
model.compile(loss=loss_function, optimizer=optimizer)

model.fit(X_train, Y_train, batch_size = 16, epochs = 40)


Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


<keras.callbacks.History at 0x7f4152187710>

In [23]:
inputs = X_test
outputs = model.predict(inputs, batch_size=1, verbose=1)



In [24]:
%matplotlib

# Visualising the results
#plt.plot(X_test,  color = 'red',  label = 'X_test')
plt.plot(Y_test,  color = 'blue', label = 'Y_test')

plt.plot(outputs, color = 'green', label = 'Predicted')

plt.title('Filtered ZIGZAG prediction')
plt.xlabel('Time')
plt.ylabel('Filtered ZIGZAG')
plt.legend()
plt.show()

Using matplotlib backend: TkAgg
