In [1]:
import os
import math
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt

from sklearn.cluster import MeanShift, estimate_bandwidth, get_bin_seeds
from scipy.linalg    import hankel

from tensorflow.contrib import rnn

from util.ZigZag import ZigZag, ZigZag0
from util.CalcReturns import CalcReturns

from util.BuildData import TrainingSet_NN_Prices, TrainingSet_NN_Logret
from util.BuildData import TrainingSet_ML_Prices, TrainingSet_ML_Logret

In [2]:
class Grid:
    def __init__(self, dx = 8.448750494999999e-06, dy = 8.44596043e-06, bound = 0.002):
        
        self.dx = dx
        self.dy = dy
        
        self.xmin = -math.fabs(bound)
        self.xmax =  math.fabs(bound)
        
        self.ymin = -math.fabs(bound)
        self.ymax =  math.fabs(bound)
        
        self.__recalculate()        
        pass
    
    def fit(self, X):        
        labels   = []
        clusters = []
        
        for i in range(len(X)):            
            _, _, index, cell = self.cell(X[i])
            
            labels.append(index)
            clusters.append(cell)    
        
        labels   = np.reshape(labels,   (len(labels),  1))
        clusters = np.reshape(clusters, (len(clusters),2))
        
        return labels, clusters
    
    def fit_cells(self, X):
        cells = []
        
        for i in range(len(X)):            
            i, j, _, _ = self.cell(X[i])
            cells.append([i, j])
        
        cells = np.reshape(cells, (len(cells),2))        
        return cells
    
    def cell(self, v):
        dims = math.sqrt(grid.shape()[0])
        
        dy   = self.dy
        dy_2 = dy/2
        
        dx   = self.dx
        dx_2 = dx/2
        
        v0 = v[0]
        v1 = v[1]
        
        sv0 = 0
        if v0 != 0.0:
            sv0 = int(v0 / math.fabs(v0))
        
        sv1 = 0
        if v1 != 0.0:
            sv1 = int(v1 / math.fabs(v1))
        
        i0 = int((v0 + sv0 * dy_2) / dy) 
        j0 = int((v1 + sv1 * dx_2) / dx) 
        
        i = i0 + int(dims/2)
        j = j0 + int(dims/2)

        index = int(i*dims + j)
        cell  = self.grid[index]
        
        return i0, j0, index, cell
    
    def __recalculate(self):
        x = 0
        grid_x = []
        grid_x.append(x)

        while x < self.xmax:        
            x = x + self.dx
            grid_x.append(x)
            grid_x.append(-x)
    
        y = 0
        grid_y = []
        grid_y.append(y)

        while y < self.ymax:
            y = y + self.dy
            grid_y.append(y)
            grid_y.append(-y)
    
        grid_x.sort()
        grid_y.sort()

        grid = []

        for i in range(0,len(grid_y)):
            for j in range(0,len(grid_x)):
                grid.append([grid_y[i], grid_x[j]])

        self.grid = np.array(grid)
        pass
        
    def plot(self, show = False):
        G = self.grid.T        
        plt.scatter(G[0], G[1], color='red')
        
        if show:
            plt.show()
        
        pass
    
    def shape(self):
        return self.grid.shape

def metric(x):
    return max(math.fabs(x[0]), math.fabs(x[1]))

def build_set(S, lag = 2):
    return hankel(S[0 : lag], S[lag-1 :]).T

def plot_phase_space(S, Lag, n):
    m = n/2;
    c = 16  
    
    ################################
    
    X = build_set(S)
    
    #X0 = build_set(S)
    #X = []
    
    #for i in range(len(X0)):
    #    if metric(X0[i]) < 0.00001:
    #        X.append(X0[i])

    #X = np.array(X)
    
    
    clf = MeanShift(bandwidth=0.000004, bin_seeding=True)
    clf.fit_predict(X)

    cluster_centers = clf.cluster_centers_.T
    
    X = X.T
    
    x0 = X[0][0:m-c]
    y0 = X[1][0:m-c]
    
    x1 = X[0][m-c:m]
    y1 = X[1][m-c:m]
    
    x2 = X[0][m:n]
    y2 = X[1][m:n]  
    
    plt.scatter(x0,y0)
    plt.plot(x1,y1, 'b*--')
    plt.scatter(x2,y2)
    plt.plot(cluster_centers[0], cluster_centers[1], 'o', markersize=6, markerfacecolor='red')
    
    plt.show()
    
    print clf.cluster_centers_
    print clf.labels_
    
def plot_phase_space0(S, n, Lag = 2):
    m = n/2;
    c = 16
    
    X = build_set(S, Lag)
    X = X.T
    
    x0 = X[0][0:m-c]
    y0 = X[1][0:m-c]
    
    x1 = X[0][m-c:m]
    y1 = X[1][m-c:m]
    
    x2 = X[0][m:n]
    y2 = X[1][m:n]  
    
    plt.scatter(x0,y0)
    plt.plot(x1,y1, 'b*--')
    plt.scatter(x2,y2)
    

In [3]:
def recurrent_neural_network_model(input):

    layer = {'w' : tf.Variable(tf.random_normal([num_hidden, n_classes])),
             'b' : tf.Variable(tf.random_normal([n_classes]))}
    
    input = tf.unstack(input, timesteps, 1)    
    
    lstm_cell = rnn.LSTMCell(num_hidden)    
    outs, state = rnn.static_rnn(lstm_cell, input, dtype=tf.float32)
    
    output = tf.add(tf.matmul(outs[-1], layer['w']), layer['b'], name='nn')    
    return output

def train_and_save_nn(x_data,y_data, filename):
    x_train = tf.placeholder('float', [None, timesteps, num_input], name='x')
    y_train = tf.placeholder('float', [None, n_classes], name='y')
    
    nn = recurrent_neural_network_model(x_train)            
    cost = tf.norm(tf.subtract(nn, y_train), name='cost')
    #optimizer = tf.train.GradientDescentOptimizer(learning_rate = 0.001).minimize(cost)   
    optimizer = tf.train.AdamOptimizer(learning_rate = 0.001).minimize(cost)   
    
    #correct = tf.equal(tf.floor(nn), y_train, name='correct')
    #accuracy = tf.reduce_mean(tf.cast(correct, 'float'), name = 'accuracy')
    
    saver = tf.train.Saver()
    
    with tf.Session() as session:
        
        session.run(tf.initialize_all_variables())
        
        epoch_loss = 0
        for epoch in range(hm_epochs):
            epoch_loss = 0
            for i in range(int(len(X_train)/batch_size)):
                
                batch_x = x_data[i*batch_size : (i+1)*batch_size]
                batch_y = y_data[i*batch_size : (i+1)*batch_size]
               
                batch_x = batch_x.reshape((batch_size, timesteps, num_input))
                
                _, c = session.run([optimizer, cost], feed_dict = {x_train: batch_x, y_train: batch_y})
                epoch_loss += c
                
            if epoch % 100 == 0:
                print('Epoch', epoch, 'completed out of', hm_epochs, 'loss', epoch_loss)
               
        #saver.save(session, 'data/fxrn.ckpt')
        saver.save(session, filename)
        print('Last epoch loss: ', epoch_loss)
        
    return nn#, correct, accuracy

def test_nn(x_test, y_test, filename):
    with tf.Session() as session:
        #saver = tf.train.import_meta_graph('data/fxrn.ckpt.meta')
        saver = tf.train.import_meta_graph(filename)
        saver.restore(session, tf.train.latest_checkpoint('data/'))
        
        graph    = tf.get_default_graph()        
                
        x  = graph.get_tensor_by_name('x:0')
        y  = graph.get_tensor_by_name('y:0')
        
        nn = graph.get_tensor_by_name('nn:0')        

        inputs  = x_test        
        inputs  = np.reshape(inputs, (testsize, timesteps, num_input))
        
        outputs = session.run([nn], feed_dict = {x: inputs})
        predicted = outputs[0]        
        
        #x0 = y_test[0]
        #x0 = np.reshape(x0, (1, n_classes))
        
        #y0 = predicted[0]
        #y0 = np.reshape(y0, (1, n_classes))
        
        #x_ = tf.placeholder('float', [1, n_classes])
        #y_ = tf.placeholder('float', [1, n_classes])        
        
        #print x0
        #print y0
        
        #cost = tf.norm(tf.subtract(x_, y_))
        #output = session.run([cost], feed_dict = {x_: x0, y_: y0})
        #print output
        
        # Visualising the results
        
        print y_test
        print predicted
        
        plt.plot(y_test, 'b*',    label = 'Actual')
        plt.plot(predicted, 'r+', label = 'Predicted')

        plt.title('Prediction')        
        plt.legend()
        plt.show()

In [4]:
%matplotlib

N = 8*1440 + 1

source = pd.read_csv('EURUSD1.csv', header=0) # source CSV
prices = np.array(source.Close)[0:N+1] # close prices
r = CalcReturns(prices)  


Using matplotlib backend: TkAgg


In [5]:
X = build_set(r)

grid = Grid(bound = 0.004)
#grid.plot()

#plot_phase_space0(r, N-1)
#plt.show()

cells = grid.fit_cells(X)

#plt.plot(labels)
#plt.show()


In [6]:
timesteps  = 1
num_input  = 2
n_classes  = 1
step       = num_input * timesteps

lag        = 1
testsize   = 16
trainsize  = 2*1440 + 16 #N - testsize

hm_epochs  = 1000
num_hidden = 128
batch_size = 128

In [7]:
# Feature Scaling
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler()

# Importing the training set
data_set = cells[:trainsize + lag + testsize + lag + 1]
#data_set = sc.fit_transform(data_set)

testing_set  = data_set[-testsize-lag:]
training_set = data_set[:trainsize+lag]

# Getting the inputs and the ouputs
X_train = training_set[0:trainsize]

Y_train0 = training_set[lag:trainsize+lag].T[0]
Y_train0 = np.reshape(Y_train0, (len(Y_train0), n_classes))

Y_train1 = training_set[lag:trainsize+lag].T[1]
Y_train1 = np.reshape(Y_train1, (len(Y_train1), n_classes))

X_test = testing_set[0:testsize]

Y_test0 = testing_set[lag:testsize+lag].T[0]
Y_test0 = np.reshape(Y_test0, (len(Y_test0), n_classes))

Y_test1 = testing_set[lag:testsize+lag].T[1]
Y_test1 = np.reshape(Y_test1, (len(Y_test1), n_classes))

# Reshaping
X_train = np.reshape(X_train, (trainsize, timesteps, num_input))

In [8]:
#train_and_save_nn(X_train, Y_train0, 'data/fxrn_0.ckpt')
train_and_save_nn(X_train, Y_train1, 'data/fxrn_1.ckpt')

Instructions for updating:
Use `tf.global_variables_initializer` instead.
('Epoch', 0, 'completed out of', 1000, 'loss', 2739.5259857177734)
('Epoch', 100, 'completed out of', 1000, 'loss', 2693.2753067016602)
('Epoch', 200, 'completed out of', 1000, 'loss', 2688.2223091125488)
('Epoch', 300, 'completed out of', 1000, 'loss', 2683.9165191650391)
('Epoch', 400, 'completed out of', 1000, 'loss', 2680.1788902282715)
('Epoch', 500, 'completed out of', 1000, 'loss', 2677.1105079650879)
('Epoch', 600, 'completed out of', 1000, 'loss', 2674.2837562561035)
('Epoch', 700, 'completed out of', 1000, 'loss', 2671.5860290527344)
('Epoch', 800, 'completed out of', 1000, 'loss', 2669.0427742004395)
('Epoch', 900, 'completed out of', 1000, 'loss', 2666.5200004577637)
('Last epoch loss: ', 2663.9752807617188)


<tf.Tensor 'nn:0' shape=(?, 1) dtype=float32>

In [9]:
#test_nn(X_test, Y_test0, 'data/fxrn_0.ckpt.meta')

In [9]:
test_nn(X_test, Y_test1, 'data/fxrn_1.ckpt.meta')

INFO:tensorflow:Restoring parameters from data/fxrn_1.ckpt
[[ 11]
 [ -2]
 [  1]
 [-14]
 [  0]
 [  6]
 [ -5]
 [  5]
 [  2]
 [ 10]
 [-12]
 [  4]
 [  2]
 [ 11]
 [-10]
 [ 10]]
[[ 0.99427408]
 [-0.65748388]
 [-1.60269427]
 [ 0.18227345]
 [ 1.16076708]
 [ 0.4105987 ]
 [ 0.03912878]
 [-0.2397632 ]
 [-0.6598503 ]
 [ 1.43531418]
 [ 2.44757462]
 [ 0.87571448]
 [ 0.77016491]
 [ 1.76721239]
 [ 2.82277179]
 [ 0.76998299]]


In [15]:
L, R = grid.fit(X)

print X


[[  0.00000000e+00  -2.80689818e-04]
 [ -2.80689818e-04   3.91242967e-04]
 [  3.91242967e-04   1.02038213e-04]
 ..., 
 [ -1.68185946e-05   4.20459562e-05]
 [  4.20459562e-05  -5.88648337e-05]
 [ -5.88648337e-05  -1.00919206e-04]]


In [12]:
r0 = []
r0.append(R[0][0])

for i in range(len(R)-1):
    r0.append((R[i][1]+R[i+1][0])/2)
    
r0.append(R[len(R)-1][1])
    
plt.plot(r, 'r*')
plt.plot(r0)
plt.show()

In [13]:
prices0 = []

price = prices[0]
for i in range(len(r0)):
    prices0.append(price)
    price = price * math.exp(r0[i])

plt.plot(prices, 'r*')
plt.plot(prices0)
plt.show()