In [1]:
import os
import math
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt

from sklearn.cluster import MeanShift, estimate_bandwidth, get_bin_seeds
from scipy.linalg    import hankel

from tensorflow.contrib import rnn

from util.ZigZag import ZigZag, ZigZag0
from util.CalcReturns import CalcReturns

from util.BuildData import TrainingSet_NN_Prices, TrainingSet_NN_Logret
from util.BuildData import TrainingSet_ML_Prices, TrainingSet_ML_Logret

from mpl_toolkits.mplot3d import Axes3D

from sklearn.cluster import MeanShift


In [2]:
%matplotlib

N = 8*1440 + 1

source = pd.read_csv('EURUSD1.csv', header=0) # source CSV
prices = np.array(source.Close)[0:N+1] # close prices

r = CalcReturns(prices)


Using matplotlib backend: TkAgg


In [3]:
def build_set(S, lag = 2):
    return hankel(S[0 : lag], S[lag-1 :]).T

def metric3(x):
    return max(math.fabs(x[0]), math.fabs(x[1]), math.fabs(x[2]))

In [4]:
class GridN:
    
    def __init__(self, N = 3, delta = 8.45145029e-06, bound = 0.1):        
        self.N     = N
        self.delta = delta
        
        self.min = -math.fabs(bound)
        self.max =  math.fabs(bound)
        self.dim = 2 * math.fabs(bound) / self.delta
        
        pass
    
    def fit(self, X):
        cells    = []
        labels   = []
        clusters = []
        
        for i in range(len(X)):            
            cell, label, cluster = self.fit_single(X[i])
            
            cells.append(cell)
            labels.append(label)
            clusters.append(cluster)    
     
        cells    = np.array(cells)
        labels   = np.array(labels)
        clusters = np.array(clusters)        
        
        return cells, labels, clusters
    
    def fit_single(self, v):
        assert len(v) == self.N
        
        sv = []
        for i in range(self.N):
            
            s = 0            
            if v[i] != 0.0:
                s = int(v[i] / math.fabs(v[i]))                
            sv.append(s)
            
        sv = np.array(sv)        
        
        # cell        
        cell = []
        for i in range(self.N):
            cell.append(int((v[i] + sv[i] * self.delta / 2) / self.delta))
        cell = np.array(cell)
        
        # label
        label = ''.join(str(e) for e in cell)
        label = hash(label)
            
        # cluster
        cluster = cell * self.delta
        
        return cell, label, cluster
    
    def restore_from_clusters(self, r, plot = False):
        X = build_set(r, self.N)
        
        _, L, C = self.fit(X)
        
        Last = self.N - 1
        
        r0 = []
        for i in range(Last):            
            r0.append(C[0][i])

        for i in range(len(C)):
            r0.append(C[i][Last])
            
        if plot and (self.N == 3):
            XT = X.T
            CT = C.T

            fig = plt.figure()
            
            ax = fig.add_subplot(111, projection='3d')
            
            ax.plot(XT[0], XT[1], XT[2], 'x')
            ax.plot(CT[0], CT[1], CT[2], 'r-')

            plt.show()
            
        ################################################
        u, c, ratio = self.request_unique_ratio(r)        
        print u, c, ratio        
        ################################################
            
        return np.array(r0)
    
    def request_unique_ratio(self, r):
        X = build_set(r, self.N)        
        _, L, _ = self.fit(X)
        
        c = len(L)
        u = len(np.unique(L))
        
        ratio = float(u) / c
        return u, c, ratio


In [5]:
#r = r[0:31]

Dims = 5

grid = GridN(N = Dims)
r0 = grid.restore_from_clusters(r)

X = build_set(r, Dims)
_, L, C = grid.fit(X)

Cp = []
Flag = False

for i in range(len(C)):
    if L[i] in Cp:
        if Flag == False:
            print '============='
        Flag = True
    else:
        Flag = False        
        Cp.append(L[i])
   
    if Flag:
        print i, L[i]


        
plt.plot(r, 'r*')
plt.plot(r0)
plt.show()

    
    




11485 11517 0.997221498654
25 3567998172912873461
35 3567998172912873463
1836 -8376796214511035457
1907 1362059110856543902
4125 -355701165176818326
4342 -4991407356310764812
5669 2182016312245958940
5679 -391439041335723318
5740 3567999172916873462
5829 6701585905652871936
5909 6898098585106440822
6086 1301879475553296387
6170 5355803772556603735
6172 -8366639410748196240
6191 -1547176312077340958
6192 2744683343302950461
6194 -6698255525061704427
6196 8097897435837369338
7183 3567998172912873461
7184 3567998172912873461
7192 3567998172912873461
7519 7979696710308886850
7649 -7913624436667215985
8476 -1302176299552024734
8505 3568726720268565175
9030 4461920972870738753
9080 3992105010906219015
9081 -6665238781082928492
10050 3567998172912873461
10051 -473381263834093291
10399 7976561360162642650
10550 -6698255525062704429


In [14]:
v = X[10399]

plt.plot(v)
plt.show()
print v


[  8.35013799e-06  -8.35013799e-06  -4.17517358e-05   8.35017285e-05
   0.00000000e+00]


In [7]:
X2 = build_set(r, 2)


X2s = X2.T

#plt.plot(X2s[0], X2s[1], '.')

#grid = Grid()
#grid.plot()

ms = MeanShift(bandwidth = 0.0001, bin_seeding=True)
ms.fit(X2)

labels = ms.labels_
cluster_centers = ms.cluster_centers_

X2s = X2.T
CCs = ms.cluster_centers_.T

plt.plot(X2s[0], X2s[1], '.')
plt.plot(CCs[0], CCs[1], 'r.')