In [1]:
import math
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt

from sklearn import svm, preprocessing

from sklearn.tree import DecisionTreeClassifier

from sklearn.neighbors import KNeighborsClassifier

from sklearn.ensemble import VotingClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier


In [2]:
df = pd.read_csv('EURUSD1.csv', names=['Date', 'Time', 'Open', 'High', 'Low', 'Close', 'Volume'])
train = np.array(df.Close)

In [3]:
def CalcReturns(data):
    R = []    
    for i in range(len(data)-1):
        v = math.log(math.fabs(data[i+1]/data[i]))
        R.append(v)
        
    return R

def ZigZag(data, minsize):
    
    N = 0
    Z = {'zigzag': [], 'time': [], 'label' : []}
    
    T = N
    
    Count = 0;
    
    Max  = data[0]
    Min  = data[0]
    
    Flag = False
    
    PriceLow = 0
    PriceHigh = 0    
        
    while N < len(data):
        
        PriceLow = data[N]
        PriceHigh = data[N]        
        
        if Flag:
            
            if PriceHigh > Max:
                
                T = N
                Max = PriceHigh                
                
            elif (Max - PriceLow >= minsize):
                
                Z['time'].append(T)
                Z['label'].append(-1)
                Z['zigzag'].append(Max)
                
                Flag = False
                Count = Count + 1                                
                
                T = N
                Min = PriceLow                
                
        else:
               
            if PriceLow < Min:
                
                T = N
                Min = PriceLow                
                    
            elif (PriceHigh - Min >= minsize):
                    
                Z['time'].append(T)
                Z['label'].append(1)
                Z['zigzag'].append(Min)
                
                Flag = True
                Count = Count + 1                
                
                T = N
                Max = PriceHigh                 
    
        N = N + 1    

    return Z

def BuildData0(zigzag, data, lag):
    
    D = []
    L = []
    
    D0 = []
    D1 = []
    
    L0 = []
    L1 = []
    
    N = len(data)
    Count = len(Z['time'])

    for i in range(0,N-lag):        
        try:
            index = zigzag["time"].index(i+lag)
            
            D0.append(data[i:i+lag])
            L0.append(zigzag["label"][index])
            
        except:
            
            D1.append(data[i:i+lag])
            L1.append(0)            

    Count0 = len(D0)
    Count1 = len(D1)
    
    #print(Count0, Count1)
    
    N = Count0 / 2
    if Count1 > N:        
        D1 = np.random.permutation(D1)
        
        D = D1[:N]
        L = L1[:N]
        
    else:        
        D = D1
        L = L1
    
    D_temp = np.concatenate((D, D0), axis=0)
    L_temp = np.concatenate((L, L0), axis=0)    
    
    I = range(len(D_temp))
    I = np.random.permutation(I)
    
    D = []
    L = []
    
    for i in I:
        D.append(D_temp[i])
        L.append(L_temp[i])
    
    D = np.array(D)
    L = np.array(L)   
    
    D = preprocessing.scale(D)
    
    return D, L
    
def BuildData1(zigzag, returns, lag):
    
    D = []
    L = []
    
    D0 = []
    D1 = []
    
    L0 = []
    L1 = []
    
    N = len(returns) + 1
    Count = len(Z['time'])

    for i in range(0,N-lag):        
        try:
            index = zigzag["time"].index(i+lag-1)
            
            D0.append(returns[i:i+lag-1])
            L0.append(zigzag["label"][index])
            
        except:
            
            D1.append(returns[i:i+lag-1])
            L1.append(0)            

    Count0 = len(D0)
    Count1 = len(D1)
    
    #print(Count0, Count1)
    
    N = Count0 / 2
    #N = Count0
    if Count1 > N:        
        D1 = np.random.permutation(D1)
        
        D = D1[:N]
        L = L1[:N]
        
    else:        
        D = D1
        L = L1
    
    D_temp = np.concatenate((D, D0), axis=0)
    L_temp = np.concatenate((L, L0), axis=0)
    
    I = range(len(D_temp))
    I = np.random.permutation(I)
    
    D = []
    L = []
    
    for i in I:
        D.append(D_temp[i])
        L.append(L_temp[i])
    
    D = np.array(D)
    L = np.array(L)   
    
    D = preprocessing.scale(D)
    
    return D, L

def BuildData2(zigzag, returns, lag):
    
    D = []
    L = []
    
    D0 = []
    D1 = []
    
    L0 = []
    L1 = []
    
    N = len(returns) + 1
    Count = len(Z['time'])

    for i in range(0,N-lag):        
        try:
            index = zigzag["time"].index(i+lag-1)
            
            D0.append(returns[i:i+lag-1])
            L0.append(zigzag["label"][index])
            
        except:
            
            D1.append(returns[i:i+lag-1])
            L1.append(0)            

    Count0 = len(D0)
    Count1 = len(D1)
    
    #print(Count0, Count1)
    
    N = Count0
    if Count1 > N:        
        D1 = np.random.permutation(D1)
        
        D = D1[:N]
        L = L1[:N]
        
    else:        
        D = D1
        L = L1
    
    D_temp = np.concatenate((D, D0), axis=0)
    L_temp = np.concatenate((L, L0), axis=0)
    
    I = range(len(D_temp))
    I = np.random.permutation(I)
    
    D = []
    L = []
    
    for i in I:
        D.append(D_temp[i])
        L.append(L_temp[i])
    
    D = np.array(D)
    L = np.array(L)   
    
    D = preprocessing.scale(D)
    
    return D, L

In [5]:
Z = ZigZag(train, 0.0004)
returns = CalcReturns(train)

D, L = BuildData2(Z, returns, 61)

Dim = D.shape[1]
t_count = 1024

#D, L = BuildData1(Z, returns, 61)
#D, L = BuildData0(Z, train, 61)

print(D.shape)
print(len(D))


(36576, 60)
36576


In [5]:
#clf = svm.SVC(kernel="poly", C=0.0001, degree=6, coef0=0.0001)
clf = RandomForestClassifier(n_estimators = 100, criterion='entropy') # 79%
#clf = ExtraTreesClassifier(n_estimators = 10, oob_score = True, bootstrap = True)
#clf = ExtraTreesClassifier(n_estimators = 100)
#clf = DecisionTreeClassifier()
#clf = AdaBoostClassifier(learning_rate = 0.6, n_estimators = 100) # 71%
#clf = GradientBoostingClassifier(n_estimators = 100, learning_rate=1.1) # 81%
#clf = BaggingClassifier() #79 %
#clf = KNeighborsClassifier(n_neighbors=100, algorithm='brute') # 37%
#clf = BaggingClassifier(base_estimator = KNeighborsClassifier(n_neighbors=100)) # 36%
#clf = BaggingClassifier(base_estimator = GradientBoostingClassifier(n_estimators = 100, learning_rate=0.1)) # 81%

#clf0 = GradientBoostingClassifier(n_estimators = 100, learning_rate=0.1) # 81%
#clf1 = KNeighborsClassifier(n_neighbors=100, algorithm='brute') # 37%

#clf = VotingClassifier(estimators=[('gbc', clf0),('knc', clf1)], voting='soft')
clf.fit(D[:-t_count], L[:-t_count])

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='entropy',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_split=1e-07, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            n_estimators=100, n_jobs=1, oob_score=False, random_state=None,
            verbose=0, warm_start=False)

In [14]:
correct_count  = 0.0

test_buy  = 1.0
test_sell = 1.0
test_hold = 1.0

correct_buy  = 0.0
correct_sell = 0.0
correct_hold = 0.0

incorrect_buy  = 0.0
incorrect_sell = 0.0
incorrect_hold = 0.0

p = clf.predict(D[len(D)-t_count:])
c = L[len(D)-t_count:]

c0 = {'point': [], 'correct': [], 'wrong': []}

for i in range(0, t_count):
    
    if c[i] == 1:
        test_buy += 1.0;
    elif c[i] == 0:
        test_hold += 1.0;
    elif c[i] == -1:
        test_sell += 1.0;
    
    if p[i] == c[i]:
        correct_count += 1.0
        
        if c[i] == 1:
            correct_buy += 1.0;
        elif c[i] == 0:
            correct_hold += 1.0;
        elif c[i] == -1:
            correct_sell += 1.0;

    else:        
        if c[i] == 1:
            incorrect_buy += 1.0;
        elif c[i] == 0:
            incorrect_hold += 1.0;
        elif c[i] == -1:
            incorrect_sell += 1.0;
            
        c0['point'].append(D[i])
        c0['correct'].append(c[i])
        c0['wrong'].append(p[i])
         
print( "Accuracy:  ", (correct_count/t_count)*100.00 )

print( "Test buy:  ", test_buy  )
print( "Test sell: ", test_sell )
print( "Test hold: ", test_hold )

print( "Correct buy:  ", correct_buy,  "Incorrect buy:  ", incorrect_buy,  "Accuracy: ", (correct_buy/test_buy)*100 )
print( "Correct sell: ", correct_sell, "Incorrect sell: ", incorrect_sell, "Accuracy: ", (correct_sell/test_sell)*100 )
print( "Correct hold: ", correct_hold, "Incorrect hold: ", incorrect_hold, "Accuracy: ", (correct_hold/test_hold)*100 )


('Accuracy:  ', 74.21875)
('Test buy:  ', 278.0)
('Test sell: ', 256.0)
('Test hold: ', 493.0)
('Correct buy:  ', 197.0, 'Incorrect buy:  ', 80.0, 'Accuracy: ', 70.86330935251799)
('Correct sell: ', 194.0, 'Incorrect sell: ', 61.0, 'Accuracy: ', 75.78125)
('Correct hold: ', 369.0, 'Incorrect hold: ', 123.0, 'Accuracy: ', 74.84787018255578)


In [None]:
%matplotlib qt

plt.plot(Z["time"], Z["zigzag"], range(0, len(train)), train)
#plt.plot(D[-101])



In [10]:
plt.plot(c0['point'][2])

for i in range(0, len(c0['point'])):
    print(c0['correct'][i], c0['wrong'][i])

(0, 1)
(1, 0)
(0, 1)
(0, 1)
(0, 1)
(0, 1)
(1, 0)
(0, 1)
(0, 1)
(0, 1)
(1, 0)
(0, 1)
(0, 1)
(0, 1)
(0, 1)
(0, 1)
(0, 1)
(0, 1)
(1, 0)
(0, 1)
(0, 1)
(1, 0)
(0, 1)
(1, 0)
(0, 1)
(1, 0)
(1, 0)
(0, 1)
(0, 1)
(0, 1)
(0, 1)
(0, 1)
(1, 0)
(0, 1)
(1, 0)
(0, 1)
(1, 0)
(1, 0)
(1, 0)
(0, 1)
(1, 0)
(1, 0)
(1, 0)
(0, 1)
(0, 1)
(0, 1)
(0, 1)
(0, 1)
(1, 0)
(1, 0)
(1, 0)
(0, 1)
(0, 1)
(1, 0)
(1, 0)
(1, 0)
(1, 0)
(0, 1)
(0, 1)
(0, 1)
(0, 1)
(1, 0)
(0, 1)
(1, 0)
(1, 0)
(0, 1)
(0, 1)
(0, 1)
(0, 1)
(0, 1)
(0, 1)
(0, 1)
(1, 0)
(1, 0)
(1, 0)
(0, 1)
(0, 1)
(0, 1)
(1, 0)
(0, 1)
(0, 1)
(0, 1)
(1, 0)
(1, 0)
(0, 1)
(1, 0)
(0, 1)
(0, 1)
(1, 0)
(1, 0)
(1, 0)
(1, 0)
(1, 0)
(1, 0)
(1, 0)
(0, 1)
(1, 0)
(0, 1)
(1, 0)
(1, 0)
(0, 1)
(0, 1)
(1, 0)
(0, 1)
(0, 1)
(0, 1)
(0, 1)
(0, 1)
(0, 1)
(1, 0)
(0, 1)
(1, 0)
(0, 1)
(1, 0)
(1, 0)
(1, 0)
(0, 1)
(0, 1)
(1, 0)
(0, 1)
(0, 1)
(1, 0)
(0, 1)
(1, 0)
(1, 0)
(0, 1)
(0, 1)
(0, 1)
(0, 1)
(1, 0)
(0, 1)
(1, 0)
(0, 1)
(0, 1)
(0, 1)
(1, 0)
(0, 1)
(0, 1)
(1, 0)
(1, 0)
(1, 0)
(1, 0)
(0, 1)