In [1]:
import pandas as pd
import numpy as np
import bokeh.plotting as bp
import matplotlib.pyplot as plt
%matplotlib inline

#### Para empezar se probará con los pares EUR/USD y GBP/USD para predecir la señal EUR/USD

In [7]:
#Se lee la señal correspondiente a GBP vs USD para el mes de enero de 2018
data_GBPvsUSD = pd.read_csv("../data/TrueFX/GBP-USD/datos_procesados_5T_GBPUSD-2018-01.csv", index_col=0
                            ,infer_datetime_format=True, parse_dates=True )

#Se lee la señal correspondiente a EUR vs USD para el mes de enero de 2018
data_EURvsUSD = pd.read_csv("../data/TrueFX/EUR-USD/datos_procesados_5T_EURUSD-2018-01.csv", index_col=0
                            ,infer_datetime_format=True, parse_dates=True )

In [7]:
%run ../code/EUtilities.py
%run ../code/PNLEstimatorWrapper.py
EU = EUtilities()

In [12]:
print('tamaño de la señal EUR/USD', data_EURvsUSD.shape)
print('tamaño de la señal GBP/USD', data_GBPvsUSD.shape)

tamaño de la señal EUR/USD (6361, 2)
tamaño de la señal GBP/USD (6360, 2)


para poder utilizar la función "multi_signal" se deben cumplir ciertas caracteristicas, entre ellas, que los dataframe a utilizar tenga su columna llamada "bid"...

In [13]:
#se construye el dataset EUR/USD con las dos señales en el, para su posterior uso
data_EURvsUSD = EU.multi_signal(data_EURvsUSD,data_GBPvsUSD)

In [14]:
data_EURvsUSD[:10]

Unnamed: 0,bid,ask,bidnew
2018-01-02 00:00:00,1.20015,1.2011,1.20015
2018-01-02 00:05:00,1.20047,1.20084,1.3494
2018-01-02 00:10:00,1.20082,1.20126,1.34895
2018-01-02 00:15:00,1.20035,1.20091,1.34945
2018-01-02 00:20:00,1.20049,1.20085,1.34953
2018-01-02 00:25:00,1.20047,1.20085,1.34953
2018-01-02 00:30:00,1.20079,1.20113,1.34963
2018-01-02 00:35:00,1.20135,1.20155,1.35042
2018-01-02 00:40:00,1.20143,1.20158,1.34983
2018-01-02 00:45:00,1.20149,1.20163,1.3495


procedemos a construir ambos dataset por separados para luego unirlos

In [20]:
#los datos del par EUR/USD 
X_EURUSD, y_EURUSD, bt_EURUSD = EU.build_dataset(data_EURvsUSD,bid_col='bid',window=7,binary_target=True,PNL=True
                                             ,delete_constant_values=True)
#los datos del par GBP/USD para luego unirlos y formar los datos X para entrenar
X_GBPUSD,_ = EU.build_dataset(data_EURvsUSD,bid_col='bidnew',window=7,binary_target=False
                              ,delete_constant_values=True)

In [21]:
X_EURUSD.shape, X_GBPUSD.shape

((6232, 9), (6237, 7))

debido a que son diferentes señales, al habilitar la opción de <i>"delete_constant_values"</i> en la función <i>"build_dataset"</i> esto hace que las señales resultantes no sean iguales en tamaño, por ende para entrenar se cogeran los valores de los indices de la señal a medir, si este no está en la otra señal, se coloca lo de la señal EUR/USD

In [22]:
X_GBPUSD = X_GBPUSD.loc[X_EURUSD.index]
X_EURUSD.shape, X_GBPUSD.shape

((6232, 9), (6232, 7))

debido a que si el valor del indice del par EUR/USD no se encuentra en el par GBP/USD entonces este genera valores nan, estos valores se cambian por el valor que hay en el par EUR/USD

In [23]:
#para cambiar los valores NaN por los que hay en la señal EUR/USD
X_GBPUSD = X_GBPUSD.fillna(X_EURUSD,axis=0)

se unen para entrenar la multiseñal

In [43]:
window=7
for i in X_GBPUSD.columns:
    X_EURUSD.insert(loc=window+int(i),column=window+int(i),value=X_GBPUSD.iloc[:,i])

In [44]:
X_EURUSD.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,PNL_0,PNL_1
2018-01-02 00:30:00,1.20015,1.20047,1.20082,1.20035,1.20049,1.20047,1.20079,1.20015,1.3494,1.34895,1.34945,1.34953,1.34953,1.34963,-0.00076,0.00022
2018-01-02 00:35:00,1.20047,1.20082,1.20035,1.20049,1.20047,1.20079,1.20135,1.3494,1.34895,1.34945,1.34953,1.34953,1.34963,1.35042,-0.00023,-0.00012
2018-01-02 00:40:00,1.20082,1.20035,1.20049,1.20047,1.20079,1.20135,1.20143,1.34895,1.34945,1.34953,1.34953,1.34963,1.35042,1.34983,-0.0002,-9e-05
2018-01-02 00:45:00,1.20035,1.20049,1.20047,1.20079,1.20135,1.20143,1.20149,1.34945,1.34953,1.34953,1.34963,1.35042,1.34983,1.3495,-7e-05,-0.0003
2018-01-02 00:50:00,1.20049,1.20047,1.20079,1.20135,1.20143,1.20149,1.20133,1.34953,1.34953,1.34963,1.35042,1.34983,1.3495,1.34968,-0.00028,-0.00017


#### probando la multiseñal con el periodo de 5T

In [50]:
from sklearn.naive_bayes import GaussianNB
    
y = bt_EURUSD
n_bdtrain = 4
n_bdtest = 1
mday = 1
#para indexar el dataframe por las fechas
dates = np.unique(X_EURUSD.index.date)[n_bdtrain:]
est_GNB = PNLEstimatorWrapper(GaussianNB(), PNL_column=['PNL_0','PNL_1'])
result_GNB = EU.step_validation(est_GNB, X_EURUSD, y, EU.v_split(X_EURUSD,n_bdtrain,n_bdtest,mday))
result_GNB, predict_GNB = EU.redim(result_GNB)
print('--------- GaussianNB 5m ----------------')
print('average PNL : ', np.mean(result_GNB[:,0]))
print('average buy: ', np.mean(result_GNB[:,1]))
print('average sell: ', np.mean(result_GNB[:,2]))
print('longest_buy: ', np.max(result_GNB[:,3]))
print('longest_sell: ', np.max(result_GNB[:,4]))
print('%correct_1: ', np.mean(result_GNB[:,5]))
print('%correct_0: ', np.mean(result_GNB[:,6]))
print('%correct_3: ', np.mean(result_GNB[:,7]))
print('%correct_2: ', np.mean(result_GNB[:,8]))
print('% correct total: ', np.mean(result_GNB[:,5]+result_GNB[:,6]+result_GNB[:,7]+result_GNB[:,8]))
print('-------------------------------------')
df_GNB = pd.DataFrame(result_GNB, columns=list(['PNL','buys','sells','longest_buys','longest_sells'
                                                ,'%correct_1','%correct_0','%correct_3','%correct_2']), index=dates)
print(df_GNB)
df_GNB.to_csv('../resultados/TrueFX/EUR-USD/MultiSeñal/GaussianNB/pnl_5T_EUR-USD_GBP-USD_GNB_NC4_wsize7_2018-01.csv')

--------- GaussianNB 5m ----------------
average PNL :  -0.0038047826087
average buy:  109.695652174
average sell:  112.47826087
longest_buy:  267.0
longest_sell:  288.0
%correct_1:  0.127552755757
%correct_0:  0.0946245566433
%correct_3:  0.0204626330719
%correct_2:  0.0556370631486
% correct total:  0.298277008621
-------------------------------------
                PNL   buys  sells  longest_buys  longest_sells  %correct_1  \
2018-01-06 -0.00057    1.0    0.0           1.0            0.0    0.000000   
2018-01-08 -0.01514  279.0    0.0          87.0           39.0    0.301075   
2018-01-09 -0.01132  283.0    0.0          77.0            1.0    0.416961   
2018-01-10 -0.00928  277.0    4.0         160.0            1.0    0.380783   
2018-01-11  0.00072  182.0  100.0          59.0          102.0    0.273050   
2018-01-12  0.00000    9.0  271.0           0.0          280.0    0.000000   
2018-01-13 -0.00138    1.0    0.0           1.0            0.0    0.000000   
2018-01-15 -0.00548 

In [54]:
from sklearn.neighbors import KNeighborsClassifier
    
y = bt_EURUSD
n_bdtrain = 4
n_bdtest = 1
mday = 1
#para indexar el dataframe por las fechas
dates = np.unique(X_EURUSD.index.date)[n_bdtrain:]
est_KNC = PNLEstimatorWrapper(KNeighborsClassifier(n_neighbors=7,n_jobs=2,leaf_size=30
                                                   ,algorithm='kd_tree'), PNL_column=['PNL_0','PNL_1'])
result_KNC= EU.step_validation(est_KNC, X_EURUSD, y, EU.v_split(X_EURUSD,n_bdtrain,n_bdtest,mday))
result_KNC, predict_KNC = EU.redim(result_KNC)
print('--------- KNeighbors 5m ----------------')
print('average PNL : ', np.mean(result_KNC[:,0]))
print('average buy: ', np.mean(result_KNC[:,1]))
print('average sell: ', np.mean(result_KNC[:,2]))
print('longest_buy: ', np.max(result_KNC[:,3]))
print('longest_sell: ', np.max(result_KNC[:,4]))
print('%correct_1: ', np.mean(result_KNC[:,5]))
print('%correct_0: ', np.mean(result_KNC[:,6]))
print('%correct_3: ', np.mean(result_KNC[:,7]))
print('%correct_2: ', np.mean(result_KNC[:,8]))
print('% correct total: ', np.mean(result_KNC[:,5]+result_KNC[:,6]+result_KNC[:,7]+result_KNC[:,8]))
print('-------------------------------------')
df_KNC = pd.DataFrame(result_KNC, columns=list(['PNL','buys','sells','longest_buys','longest_sells'
                                                ,'%correct_1','%correct_0','%correct_3','%correct_2']), index=dates)
print(df_KNC)
df_KNC.to_csv('../resultados/TrueFX/EUR-USD/MultiSeñal/KNeighborsClassifier/pnl_5T_EUR-USD_GBP-USD_KNC_NC4_wsize7_2018-01.csv')

--------- KNeighbors 5m ----------------
average PNL :  -0.00971826086957
average buy:  95.2608695652
average sell:  126.913043478
longest_buy:  250.0
longest_sell:  241.0
%correct_1:  0.19210920098
%correct_0:  0.198820495597
%correct_3:  0.000154178230034
%correct_2:  0.0012350836707
% correct total:  0.392318958478
-------------------------------------
                PNL   buys  sells  longest_buys  longest_sells  %correct_1  \
2018-01-06 -0.00121    0.0    1.0           0.0            1.0    0.000000   
2018-01-08 -0.00815  154.0  125.0          30.0           36.0    0.240143   
2018-01-09 -0.01294  203.0   80.0          77.0           21.0    0.314488   
2018-01-10 -0.01273  100.0  181.0          46.0           30.0    0.160142   
2018-01-11 -0.00937  161.0  121.0          59.0           41.0    0.251773   
2018-01-12 -0.02342   12.0  268.0           4.0          160.0    0.025000   
2018-01-13 -0.00138    1.0    0.0           1.0            0.0    0.000000   
2018-01-15 -0.0049

In [55]:
from sklearn.tree import DecisionTreeClassifier
    
y = bt_EURUSD
n_bdtrain = 4
n_bdtest = 1
mday = 1
#para indexar el dataframe por las fechas
dates = np.unique(X_EURUSD.index.date)[n_bdtrain:]
est_DTC = PNLEstimatorWrapper(DecisionTreeClassifier(), PNL_column=['PNL_0','PNL_1'])
result_DTC= EU.step_validation(est_DTC, X_EURUSD, y, EU.v_split(X_EURUSD,n_bdtrain,n_bdtest,mday))
result_DTC, predict_DTC = EU.redim(result_DTC)
print('--------- DecisionTree 5m ----------------')
print('average PNL : ', np.mean(result_DTC[:,0]))
print('average buy: ', np.mean(result_DTC[:,1]))
print('average sell: ', np.mean(result_DTC[:,2]))
print('longest_buy: ', np.max(result_DTC[:,3]))
print('longest_sell: ', np.max(result_DTC[:,4]))
print('%correct_1: ', np.mean(result_DTC[:,5]))
print('%correct_0: ', np.mean(result_DTC[:,6]))
print('%correct_3: ', np.mean(result_DTC[:,7]))
print('%correct_2: ', np.mean(result_DTC[:,8]))
print('% correct total: ', np.mean(result_DTC[:,5]+result_DTC[:,6]+result_DTC[:,7]+result_DTC[:,8]))
print('-------------------------------------')
df_DTC = pd.DataFrame(result_DTC, columns=list(['PNL','buys','sells','longest_buys','longest_sells'
                                                ,'%correct_1','%correct_0','%correct_3','%correct_2']), index=dates)
print(df_DTC)
df_DTC.to_csv('../resultados/TrueFX/EUR-USD/MultiSeñal/DecisionTreeClassifier/pnl_5T_EUR-USD_GBP-USD_DTC_NC4_wsize7_2018-01.csv')

--------- DecisionTree 5m ----------------
average PNL :  -0.00579260869565
average buy:  136.608695652
average sell:  85.5652173913
longest_buy:  218.0
longest_sell:  110.0
%correct_1:  0.233971295479
%correct_0:  0.112606987125
%correct_3:  0.00477245074905
%correct_2:  0.00480548560391
% correct total:  0.356156218957
-------------------------------------
                PNL   buys  sells  longest_buys  longest_sells  %correct_1  \
2018-01-06 -0.00057    1.0    0.0           1.0            0.0    0.000000   
2018-01-08 -0.00717  167.0  112.0          18.0           23.0    0.236559   
2018-01-09 -0.01317  240.0   43.0         170.0            9.0    0.356890   
2018-01-10 -0.01250  182.0   99.0          37.0           16.0    0.241993   
2018-01-11 -0.00843  168.0  114.0          25.0           14.0    0.234043   
2018-01-12 -0.00426   89.0  191.0          24.0           70.0    0.135714   
2018-01-13  0.00000    0.0    1.0           0.0            1.0    0.000000   
2018-01-15 -0.0

In [56]:
from sklearn.ensemble import RandomForestClassifier
    
y = bt_EURUSD
n_bdtrain = 4
n_bdtest = 1
mday = 1
#para indexar el dataframe por las fechas
dates = np.unique(X_EURUSD.index.date)[n_bdtrain:]
est_RFC = PNLEstimatorWrapper(RandomForestClassifier(n_estimators=15, n_jobs=3), PNL_column=['PNL_0','PNL_1'])
result_RFC= EU.step_validation(est_RFC, X_EURUSD, y, EU.v_split(X_EURUSD,n_bdtrain,n_bdtest,mday))
result_RFC, predict_RFC = EU.redim(result_RFC)
print('--------- RandomForest 5m ----------------')
print('average PNL : ', np.mean(result_RFC[:,0]))
print('average buy: ', np.mean(result_RFC[:,1]))
print('average sell: ', np.mean(result_RFC[:,2]))
print('longest_buy: ', np.max(result_RFC[:,3]))
print('longest_sell: ', np.max(result_RFC[:,4]))
print('%correct_1: ', np.mean(result_RFC[:,5]))
print('%correct_0: ', np.mean(result_RFC[:,6]))
print('%correct_3: ', np.mean(result_RFC[:,7]))
print('%correct_2: ', np.mean(result_RFC[:,8]))
print('% correct total: ', np.mean(result_RFC[:,5]+result_RFC[:,6]+result_RFC[:,7]+result_RFC[:,8]))
print('-------------------------------------')
df_RFC = pd.DataFrame(result_RFC, columns=list(['PNL','buys','sells','longest_buys','longest_sells'
                                                ,'%correct_1','%correct_0','%correct_3','%correct_2']), index=dates)
print(df_RFC)
df_RFC.to_csv('../resultados/TrueFX/EUR-USD/MultiSeñal/RandomForestClassifier/pnl_5T_EUR-USD_GBP-USD_RFC_NC4_wsize7_2018-01.csv')

--------- RandomForest 5m ----------------
average PNL :  -0.00680782608696
average buy:  94.2608695652
average sell:  127.913043478
longest_buy:  168.0
longest_sell:  244.0
%correct_1:  0.191332615757
%correct_0:  0.193928186525
%correct_3:  0.00123727833646
%correct_2:  0.00402911697165
% correct total:  0.390527197591
-------------------------------------
                PNL   buys  sells  longest_buys  longest_sells  %correct_1  \
2018-01-06 -0.00057    1.0    0.0           1.0            0.0    0.000000   
2018-01-08 -0.00642  129.0  150.0          29.0           33.0    0.211470   
2018-01-09 -0.01098  270.0   13.0         168.0            4.0    0.413428   
2018-01-10 -0.00249   70.0  211.0           8.0           31.0    0.110320   
2018-01-11 -0.00740  218.0   64.0          59.0           18.0    0.361702   
2018-01-12 -0.02611   28.0  252.0           5.0           75.0    0.035714   
2018-01-13  0.00000    0.0    1.0           0.0            1.0    0.000000   
2018-01-15 -0.0

#### Ahora probaremos con los pares GBP/USD y EUR/USD para predecir GBP/USD

In [38]:
#Se lee la señal correspondiente a GBP vs USD para el mes de enero de 2018
data_GBPvsUSD = pd.read_csv("../data/TrueFX/GBP-USD/datos_procesados_5T_GBPUSD-2018-01.csv", index_col=0
                            ,infer_datetime_format=True, parse_dates=True )

#Se lee la señal correspondiente a EUR vs USD para el mes de enero de 2018
data_EURvsUSD = pd.read_csv("../data/TrueFX/EUR-USD/datos_procesados_5T_EURUSD-2018-01.csv", index_col=0
                            ,infer_datetime_format=True, parse_dates=True )

In [39]:
#se construye el dataset EUR/USD con las dos señales en el, para su posterior uso
data_GBPvsUSD = EU.multi_signal(data_GBPvsUSD,data_EURvsUSD)

In [37]:
data_GBPvsUSD.head()

Unnamed: 0_level_0,bid,ask,bidnew
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2018-01-02 00:05:00,1.3494,1.35247,1.20047
2018-01-02 00:10:00,1.34895,1.35287,1.20082
2018-01-02 00:15:00,1.34945,1.35216,1.20035
2018-01-02 00:20:00,1.34953,1.35298,1.20049
2018-01-02 00:25:00,1.34953,1.35289,1.20047


In [40]:
#los datos del par EUR/USD 
X_EURUSD,_= EU.build_dataset(data_GBPvsUSD,bid_col='bidnew',window=7,binary_target=False
                             ,delete_constant_values=True)
#los datos del par GBP/USD para luego unirlos y formar los datos X para entrenar
X_GBPUSD, y_GBPUSD, bt_GBPUSD  = EU.build_dataset(data_GBPvsUSD,bid_col='bid',window=7,binary_target=True
                              ,delete_constant_values=True,PNL=True)

In [41]:
X_EURUSD = X_EURUSD.loc[X_GBPUSD.index]
X_EURUSD.shape, X_GBPUSD.shape

((6236, 7), (6236, 9))

In [42]:
#para cambiar los valores NaN por los que hay en la señal EUR/USD
X_EURUSD = X_EURUSD.fillna(X_GBPUSD,axis=0)

In [43]:
window=7
for i in X_EURUSD.columns:
    X_GBPUSD.insert(loc=window+int(i),column=window+int(i),value=X_EURUSD.iloc[:,i])

In [44]:
X_GBPUSD.head()

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,PNL_0,PNL_1
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2018-01-02 00:35:00,1.3494,1.34895,1.34945,1.34953,1.34953,1.34963,1.35042,1.20047,1.20082,1.20035,1.20049,1.20047,1.20079,1.20135,-0.00072,-0.00316
2018-01-02 00:40:00,1.34895,1.34945,1.34953,1.34953,1.34963,1.35042,1.34983,1.20082,1.20035,1.20049,1.20047,1.20079,1.20135,1.20143,-0.00316,-0.00164
2018-01-02 00:45:00,1.34945,1.34953,1.34953,1.34963,1.35042,1.34983,1.3495,1.20035,1.20049,1.20047,1.20079,1.20135,1.20143,1.20149,-0.003,-0.00331
2018-01-02 00:50:00,1.34953,1.34953,1.34963,1.35042,1.34983,1.3495,1.34968,1.20049,1.20047,1.20079,1.20135,1.20143,1.20149,1.20133,-0.00277,-0.00234
2018-01-02 00:55:00,1.34953,1.34963,1.35042,1.34983,1.3495,1.34968,1.35016,1.20047,1.20079,1.20135,1.20143,1.20149,1.20133,1.20139,-0.00195,-0.00105


In [45]:
from sklearn.naive_bayes import GaussianNB
    
y = bt_GBPUSD
n_bdtrain = 4
n_bdtest = 1
mday = 1
#para indexar el dataframe por las fechas
dates = np.unique(X_GBPUSD.index.date)[n_bdtrain:]
est_GNB = PNLEstimatorWrapper(GaussianNB(), PNL_column=['PNL_0','PNL_1'])
result_GNB = EU.step_validation(est_GNB, X_GBPUSD, y, EU.v_split(X_GBPUSD,n_bdtrain,n_bdtest,mday))
result_GNB, predict_GNB = EU.redim(result_GNB)
print('--------- GaussianNB 5m ----------------')
print('average PNL : ', np.mean(result_GNB[:,0]))
print('average buy: ', np.mean(result_GNB[:,1]))
print('average sell: ', np.mean(result_GNB[:,2]))
print('longest_buy: ', np.max(result_GNB[:,3]))
print('longest_sell: ', np.max(result_GNB[:,4]))
print('%correct_1: ', np.mean(result_GNB[:,5]))
print('%correct_0: ', np.mean(result_GNB[:,6]))
print('%correct_3: ', np.mean(result_GNB[:,7]))
print('%correct_2: ', np.mean(result_GNB[:,8]))
print('% correct total: ', np.mean(result_GNB[:,5]+result_GNB[:,6]+result_GNB[:,7]+result_GNB[:,8]))
print('-------------------------------------')
df_GNB = pd.DataFrame(result_GNB, columns=list(['PNL','buys','sells','longest_buys','longest_sells'
                                                ,'%correct_1','%correct_0','%correct_3','%correct_2']), index=dates)
print(df_GNB)
df_GNB.to_csv('../resultados/TrueFX/EUR-USD/MultiSeñal/GaussianNB/pnl_5T_GBP-USD_EUR-USD_GNB_NC4_wsize7_2018-01.csv')

--------- GaussianNB 5m ----------------
average PNL :  -0.0192852173913
average buy:  49.8695652174
average sell:  173.0
longest_buy:  283.0
longest_sell:  287.0
%correct_1:  0.094555638556
%correct_0:  0.25553482568
%correct_3:  0.00460986750892
%correct_2:  0.00784781697079
% correct total:  0.362548148716
-------------------------------------
                PNL   buys  sells  longest_buys  longest_sells  %correct_1  \
2018-01-06 -0.00171    0.0    1.0           0.0            1.0    0.000000   
2018-01-08 -0.02257    8.0  269.0           0.0          277.0    0.000000   
2018-01-09 -0.02009  165.0  114.0          96.0          116.0    0.215054   
2018-01-10 -0.01759    0.0  284.0           0.0          284.0    0.000000   
2018-01-11 -0.01882   95.0  186.0          52.0           97.0    0.131673   
2018-01-12 -0.02080    8.0  271.0           0.0          279.0    0.000000   
2018-01-13 -0.00428    1.0    0.0           1.0            0.0    0.000000   
2018-01-15 -0.02275  283.0 

In [47]:
from sklearn.neighbors import KNeighborsClassifier
    
y = bt_GBPUSD
n_bdtrain = 4
n_bdtest = 1
mday = 1
#para indexar el dataframe por las fechas
dates = np.unique(X_GBPUSD.index.date)[n_bdtrain:]
est_KNC = PNLEstimatorWrapper(KNeighborsClassifier(n_neighbors=7,n_jobs=2,leaf_size=30
                                                   ,algorithm='kd_tree'), PNL_column=['PNL_0','PNL_1'])
result_KNC= EU.step_validation(est_KNC, X_GBPUSD, y, EU.v_split(X_GBPUSD,n_bdtrain,n_bdtest,mday))
result_KNC, predict_KNC = EU.redim(result_KNC)
print('--------- KNeighbors 5m ----------------')
print('average PNL : ', np.mean(result_KNC[:,0]))
print('average buy: ', np.mean(result_KNC[:,1]))
print('average sell: ', np.mean(result_KNC[:,2]))
print('longest_buy: ', np.max(result_KNC[:,3]))
print('longest_sell: ', np.max(result_KNC[:,4]))
print('%correct_1: ', np.mean(result_KNC[:,5]))
print('%correct_0: ', np.mean(result_KNC[:,6]))
print('%correct_3: ', np.mean(result_KNC[:,7]))
print('%correct_2: ', np.mean(result_KNC[:,8]))
print('% correct total: ', np.mean(result_KNC[:,5]+result_KNC[:,6]+result_KNC[:,7]+result_KNC[:,8]))
print('-------------------------------------')
df_KNC = pd.DataFrame(result_KNC, columns=list(['PNL','buys','sells','longest_buys','longest_sells'
                                                ,'%correct_1','%correct_0','%correct_3','%correct_2']), index=dates)
print(df_KNC)
df_KNC.to_csv('../resultados/TrueFX/EUR-USD/MultiSeñal/KNeighborsClassifier/pnl_5T_GBP-USD_EUR-USD_KNC_NC4_wsize7_2018-01.csv')

--------- KNeighbors 5m ----------------
average PNL :  -0.0149926086957
average buy:  75.2173913043
average sell:  147.652173913
longest_buy:  96.0
longest_sell:  182.0
%correct_1:  0.145483338687
%correct_0:  0.221847481481
%correct_3:  0.0035594433986
%correct_2:  0.0159277267139
% correct total:  0.38681799028
-------------------------------------
                PNL   buys  sells  longest_buys  longest_sells  %correct_1  \
2018-01-06 -0.00171    0.0    1.0           0.0            1.0    0.000000   
2018-01-08 -0.02308   43.0  234.0          11.0          100.0    0.032491   
2018-01-09 -0.01432  186.0   93.0          96.0           42.0    0.236559   
2018-01-10 -0.01717  148.0  136.0          40.0           22.0    0.183099   
2018-01-11 -0.01764  200.0   81.0          25.0           29.0    0.153025   
2018-01-12 -0.02385   50.0  229.0           5.0           95.0    0.075269   
2018-01-13  0.00000    0.0    1.0           0.0            1.0    0.000000   
2018-01-15 -0.00084   

In [48]:
from sklearn.tree import DecisionTreeClassifier
    
y = bt_GBPUSD
n_bdtrain = 4
n_bdtest = 1
mday = 1
#para indexar el dataframe por las fechas
dates = np.unique(X_GBPUSD.index.date)[n_bdtrain:]
est_DTC = PNLEstimatorWrapper(DecisionTreeClassifier(), PNL_column=['PNL_0','PNL_1'])
result_DTC= EU.step_validation(est_DTC, X_GBPUSD, y, EU.v_split(X_GBPUSD,n_bdtrain,n_bdtest,mday))
result_DTC, predict_DTC = EU.redim(result_DTC)
print('--------- DecisionTree 5m ----------------')
print('average PNL : ', np.mean(result_DTC[:,0]))
print('average buy: ', np.mean(result_DTC[:,1]))
print('average sell: ', np.mean(result_DTC[:,2]))
print('longest_buy: ', np.max(result_DTC[:,3]))
print('longest_sell: ', np.max(result_DTC[:,4]))
print('%correct_1: ', np.mean(result_DTC[:,5]))
print('%correct_0: ', np.mean(result_DTC[:,6]))
print('%correct_3: ', np.mean(result_DTC[:,7]))
print('%correct_2: ', np.mean(result_DTC[:,8]))
print('% correct total: ', np.mean(result_DTC[:,5]+result_DTC[:,6]+result_DTC[:,7]+result_DTC[:,8]))
print('-------------------------------------')
df_DTC = pd.DataFrame(result_DTC, columns=list(['PNL','buys','sells','longest_buys','longest_sells'
                                                ,'%correct_1','%correct_0','%correct_3','%correct_2']), index=dates)
print(df_DTC)
df_DTC.to_csv('../resultados/TrueFX/EUR-USD/MultiSeñal/DecisionTreeClassifier/pnl_5T_GBP-USD_EUR-USD_DTC_NC4_wsize7_2018-01.csv')

--------- DecisionTree 5m ----------------
average PNL :  -0.0124952173913
average buy:  121.086956522
average sell:  101.782608696
longest_buy:  239.0
longest_sell:  251.0
%correct_1:  0.144044902008
%correct_0:  0.159351699077
%correct_3:  0.00926054552748
%correct_2:  0.0122967124274
% correct total:  0.324953859039
-------------------------------------
                PNL   buys  sells  longest_buys  longest_sells  %correct_1  \
2018-01-06 -0.00409    1.0    0.0           1.0            0.0    0.000000   
2018-01-08 -0.01118  201.0   76.0           4.0           97.0    0.018051   
2018-01-09 -0.01668  206.0   73.0          96.0           26.0    0.268817   
2018-01-10 -0.01492  171.0  113.0          67.0           16.0    0.186620   
2018-01-11 -0.01931  159.0  122.0          23.0           28.0    0.135231   
2018-01-12 -0.03950   59.0  220.0          23.0           78.0    0.053763   
2018-01-13 -0.00402    0.0    1.0           0.0            1.0    0.000000   
2018-01-15 -0.002

In [49]:
from sklearn.ensemble import RandomForestClassifier
    
y = bt_GBPUSD
n_bdtrain = 4
n_bdtest = 1
mday = 1
#para indexar el dataframe por las fechas
dates = np.unique(X_GBPUSD.index.date)[n_bdtrain:]
est_RFC = PNLEstimatorWrapper(RandomForestClassifier(n_estimators=15, n_jobs=3), PNL_column=['PNL_0','PNL_1'])
result_RFC= EU.step_validation(est_RFC, X_GBPUSD, y, EU.v_split(X_GBPUSD,n_bdtrain,n_bdtest,mday))
result_RFC, predict_RFC = EU.redim(result_RFC)
print('--------- RandomForest 5m ----------------')
print('average PNL : ', np.mean(result_RFC[:,0]))
print('average buy: ', np.mean(result_RFC[:,1]))
print('average sell: ', np.mean(result_RFC[:,2]))
print('longest_buy: ', np.max(result_RFC[:,3]))
print('longest_sell: ', np.max(result_RFC[:,4]))
print('%correct_1: ', np.mean(result_RFC[:,5]))
print('%correct_0: ', np.mean(result_RFC[:,6]))
print('%correct_3: ', np.mean(result_RFC[:,7]))
print('%correct_2: ', np.mean(result_RFC[:,8]))
print('% correct total: ', np.mean(result_RFC[:,5]+result_RFC[:,6]+result_RFC[:,7]+result_RFC[:,8]))
print('-------------------------------------')
df_RFC = pd.DataFrame(result_RFC, columns=list(['PNL','buys','sells','longest_buys','longest_sells'
                                                ,'%correct_1','%correct_0','%correct_3','%correct_2']), index=dates)
print(df_RFC)
df_RFC.to_csv('../resultados/TrueFX/EUR-USD/MultiSeñal/RandomForestClassifier/pnl_5T_GBP-USD_EUR-USD_RFC_NC4_wsize7_2018-01.csv')

--------- RandomForest 5m ----------------
average PNL :  -0.0128434782609
average buy:  110.130434783
average sell:  112.739130435
longest_buy:  242.0
longest_sell:  254.0
%correct_1:  0.157915051258
%correct_0:  0.140428511224
%correct_3:  0.00278043523319
%correct_2:  0.00994679572074
% correct total:  0.311070793436
-------------------------------------
                PNL   buys  sells  longest_buys  longest_sells  %correct_1  \
2018-01-06 -0.00171    0.0    1.0           0.0            1.0    0.000000   
2018-01-08 -0.01357  188.0   89.0          54.0           37.0    0.263538   
2018-01-09 -0.01981  170.0  109.0          45.0           57.0    0.222222   
2018-01-10 -0.02288  152.0  132.0          34.0           21.0    0.154930   
2018-01-11 -0.01737  190.0   91.0          35.0           21.0    0.199288   
2018-01-12 -0.00944  237.0   42.0         149.0            5.0    0.365591   
2018-01-13  0.00000    0.0    1.0           0.0            1.0    0.000000   
2018-01-15 -0.00

## Ahora probaré con una multiseñal de 3 señales
Para esto utilizaré los pares EUR/USD, GBP/USD y USD/CAD para predecir el par EUR/USD

In [68]:
#Se lee la señal correspondiente a GBP vs USD para el mes de enero de 2018
data_GBPvsUSD = pd.read_csv("../data/TrueFX/GBP-USD/datos_procesados_5T_GBPUSD-2018-01.csv", index_col=0
                            ,infer_datetime_format=True, parse_dates=True )

#Se lee la señal correspondiente a EUR vs USD para el mes de enero de 2018
data_EURvsUSD = pd.read_csv("../data/TrueFX/EUR-USD/datos_procesados_5T_EURUSD-2018-01.csv", index_col=0
                            ,infer_datetime_format=True, parse_dates=True )

#Se lee la señal correspondiente a EUR vs USD para el mes de enero de 2018
data_USDvsCAD = pd.read_csv("../data/TrueFX/USD-CAD/datos_procesados_5T_USDCAD-2018-01.csv", index_col=0
                            ,infer_datetime_format=True, parse_dates=True )

In [69]:
print('tamaño de la señal EUR/USD', data_EURvsUSD.shape)
print('tamaño de la señal GBP/USD', data_GBPvsUSD.shape)
print('tamaño de la señal USD/CAD', data_USDvsCAD.shape)

tamaño de la señal EUR/USD (6361, 2)
tamaño de la señal GBP/USD (6360, 2)
tamaño de la señal USD/CAD (6361, 2)


In [70]:
# %load ../code/multi_signal.py
def multi_signal(s_A, s_B, new_col, new_loc):
    """
    dada dos señales s_A y s_B, se obtiene una multiseñal donde la
    señal que predomina para la contrucción es s_A
    new_col, par darle nombre a la nueva columna
    """
    import numpy as np
    import pandas as pd
    #saco los valores que si están en el mismo instante de tiempo
    #se filtra la señal s_B por los indices que tiene s_A
    values = s_B.filter(items=s_A.index, axis=0)
    #inserto una columna llena de ceros para posteriormente colocar ahi los valores a utilizar
    s_A.insert(loc=new_loc, column=new_col, value=np.zeros((s_A.shape[0],1)))
    #llenamos con los datos
    s_A.loc[values.index,new_col] = values.loc[values.index,values.columns[0]]
    #llenamos los ceros
    s_A.loc[s_A.loc[s_A.index,new_col]==0, new_col] = s_A.loc[s_A.loc[s_A.index,new_col]==0,s_A.columns[0]]
    
    return s_A

In [71]:
data_EURvsUSD = multi_signal(data_EURvsUSD,data_GBPvsUSD, new_col='bidGBP/USD', new_loc=2)
data_EURvsUSD = multi_signal(data_EURvsUSD,data_USDvsCAD, new_col='bidUSD/CAD', new_loc=3)

In [72]:
#los datos del par EUR/USD 
X_EURUSD, y_EURUSD, bt_EURUSD = EU.build_dataset(data_EURvsUSD,bid_col='bid',window=7,binary_target=True,PNL=True
                                             ,delete_constant_values=True)
#los datos del par GBP/USD para luego unirlos y formar los datos X para entrenar
X_GBPUSD,_ = EU.build_dataset(data_EURvsUSD,bid_col='bidGBP/USD',window=7,binary_target=False
                              ,delete_constant_values=True)
#los datos del par USD/CAD para luego unirlos y formar los datos X para entrenar
X_USDCAD,_ = EU.build_dataset(data_EURvsUSD,bid_col='bidUSD/CAD',window=7,binary_target=False
                              ,delete_constant_values=True)

In [73]:
X_GBPUSD = X_GBPUSD.loc[X_EURUSD.index]
X_USDCAD = X_USDCAD.loc[X_EURUSD.index]

X_EURUSD.shape, X_GBPUSD.shape, X_USDCAD.shape

((6232, 9), (6232, 7), (6232, 7))

##### para cambiar lo nan

In [74]:
#para cambiar los valores NaN por los que hay en la señal EUR/USD
X_GBPUSD = X_GBPUSD.fillna(X_EURUSD,axis=0)
X_USDCADUSD = X_USDCAD.fillna(X_EURUSD,axis=0)

### pegar los dataset...

In [75]:
window=7
for i in X_GBPUSD.columns:
    X_EURUSD.insert(loc=window+int(i),column=window+int(i),value=X_GBPUSD.iloc[:,i])

In [76]:
window=len(X_EURUSD.columns)
for i in X_USDCAD.columns:
    X_EURUSD.insert(loc=window+int(i),column=window+int(i),value=X_GBPUSD.iloc[:,i])

In [78]:
X_EURUSD.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,13,PNL_0,PNL_1,16,17,18,19,20,21,22
2018-01-02 00:30:00,1.20015,1.20047,1.20082,1.20035,1.20049,1.20047,1.20079,1.20015,1.3494,1.34895,...,1.34963,-0.00076,0.00022,1.20015,1.3494,1.34895,1.34945,1.34953,1.34953,1.34963
2018-01-02 00:35:00,1.20047,1.20082,1.20035,1.20049,1.20047,1.20079,1.20135,1.3494,1.34895,1.34945,...,1.35042,-0.00023,-0.00012,1.3494,1.34895,1.34945,1.34953,1.34953,1.34963,1.35042
2018-01-02 00:40:00,1.20082,1.20035,1.20049,1.20047,1.20079,1.20135,1.20143,1.34895,1.34945,1.34953,...,1.34983,-0.0002,-9e-05,1.34895,1.34945,1.34953,1.34953,1.34963,1.35042,1.34983
2018-01-02 00:45:00,1.20035,1.20049,1.20047,1.20079,1.20135,1.20143,1.20149,1.34945,1.34953,1.34953,...,1.3495,-7e-05,-0.0003,1.34945,1.34953,1.34953,1.34963,1.35042,1.34983,1.3495
2018-01-02 00:50:00,1.20049,1.20047,1.20079,1.20135,1.20143,1.20149,1.20133,1.34953,1.34953,1.34963,...,1.34968,-0.00028,-0.00017,1.34953,1.34953,1.34963,1.35042,1.34983,1.3495,1.34968


#### Ahora probamos

In [79]:
from sklearn.naive_bayes import GaussianNB
    
y = bt_EURUSD
n_bdtrain = 4
n_bdtest = 1
mday = 1
#para indexar el dataframe por las fechas
dates = np.unique(X_EURUSD.index.date)[n_bdtrain:]
est_GNB = PNLEstimatorWrapper(GaussianNB(), PNL_column=['PNL_0','PNL_1'])
result_GNB = EU.step_validation(est_GNB, X_EURUSD, y, EU.v_split(X_EURUSD,n_bdtrain,n_bdtest,mday))
result_GNB, predict_GNB = EU.redim(result_GNB)
print('--------- GaussianNB 5m ----------------')
print('average PNL : ', np.mean(result_GNB[:,0]))
print('average buy: ', np.mean(result_GNB[:,1]))
print('average sell: ', np.mean(result_GNB[:,2]))
print('longest_buy: ', np.max(result_GNB[:,3]))
print('longest_sell: ', np.max(result_GNB[:,4]))
print('%correct_1: ', np.mean(result_GNB[:,5]))
print('%correct_0: ', np.mean(result_GNB[:,6]))
print('%correct_3: ', np.mean(result_GNB[:,7]))
print('%correct_2: ', np.mean(result_GNB[:,8]))
print('% correct total: ', np.mean(result_GNB[:,5]+result_GNB[:,6]+result_GNB[:,7]+result_GNB[:,8]))
print('-------------------------------------')
df_GNB = pd.DataFrame(result_GNB, columns=list(['PNL','buys','sells','longest_buys','longest_sells'
                                                ,'%correct_1','%correct_0','%correct_3','%correct_2']), index=dates)
print(df_GNB)
df_GNB.to_csv('../resultados/TrueFX/EUR-USD/MultiSeñal/GaussianNB/pnl_5T_EUR-USD_GBP-USD_CAD-USD_GNB_NC4_wsize7_2018-01.csv')

--------- GaussianNB 5m ----------------
average PNL :  -0.00344130434783
average buy:  108.086956522
average sell:  114.086956522
longest_buy:  267.0
longest_sell:  288.0
%correct_1:  0.120323098843
%correct_0:  0.0857169103661
%correct_3:  0.0206010936832
%correct_2:  0.0565545774336
% correct total:  0.283195680326
-------------------------------------
                PNL   buys  sells  longest_buys  longest_sells  %correct_1  \
2018-01-06 -0.00057    1.0    0.0           1.0            0.0    0.000000   
2018-01-08 -0.01511  279.0    0.0          87.0            2.0    0.379928   
2018-01-09 -0.01132  283.0    0.0          77.0            1.0    0.416961   
2018-01-10 -0.00928  277.0    4.0         160.0            1.0    0.380783   
2018-01-11  0.00129  183.0   99.0          59.0          101.0    0.276596   
2018-01-12  0.00000    9.0  271.0           0.0          280.0    0.000000   
2018-01-13 -0.00138    1.0    0.0           1.0            0.0    0.000000   
2018-01-15 -0.0054

In [85]:
from sklearn.neighbors import KNeighborsClassifier
    
y = bt_EURUSD
n_bdtrain = 4
n_bdtest = 1
mday = 1
#para indexar el dataframe por las fechas
dates = np.unique(X_EURUSD.index.date)[n_bdtrain:]
est_KNC = PNLEstimatorWrapper(KNeighborsClassifier(n_neighbors=7,n_jobs=2,leaf_size=30
                                                   ,algorithm='kd_tree'), PNL_column=['PNL_0','PNL_1'])
result_KNC= EU.step_validation(est_KNC, X_EURUSD, y, EU.v_split(X_EURUSD,n_bdtrain,n_bdtest,mday))
result_KNC, predict_KNC = EU.redim(result_KNC)
print('--------- KNeighbors 5m ----------------')
print('average PNL : ', np.mean(result_KNC[:,0]))
print('average buy: ', np.mean(result_KNC[:,1]))
print('average sell: ', np.mean(result_KNC[:,2]))
print('longest_buy: ', np.max(result_KNC[:,3]))
print('longest_sell: ', np.max(result_KNC[:,4]))
print('%correct_1: ', np.mean(result_KNC[:,5]))
print('%correct_0: ', np.mean(result_KNC[:,6]))
print('%correct_3: ', np.mean(result_KNC[:,7]))
print('%correct_2: ', np.mean(result_KNC[:,8]))
print('% correct total: ', np.mean(result_KNC[:,5]+result_KNC[:,6]+result_KNC[:,7]+result_KNC[:,8]))
print('-------------------------------------')
df_KNC = pd.DataFrame(result_KNC, columns=list(['PNL','buys','sells','longest_buys','longest_sells'
                                                ,'%correct_1','%correct_0','%correct_3','%correct_2']), index=dates)
print(df_KNC)
df_KNC.to_csv('../resultados/TrueFX/EUR-USD/MultiSeñal/KNeighborsClassifier/pnl_5T_GBP-USD_EUR-USD_CAD-USD_KNC_NC4_wsize7_2018-01.csv')

--------- KNeighbors 5m ----------------
average PNL :  -0.00851260869565
average buy:  97.0434782609
average sell:  125.130434783
longest_buy:  250.0
longest_sell:  241.0
%correct_1:  0.195839107192
%correct_0:  0.197412654679
%correct_3:  0.00030945773314
%correct_2:  0.00107924761024
% correct total:  0.394640467215
-------------------------------------
                PNL   buys  sells  longest_buys  longest_sells  %correct_1  \
2018-01-06 -0.00121    0.0    1.0           0.0            1.0    0.000000   
2018-01-08 -0.00286  114.0  165.0          29.0           38.0    0.186380   
2018-01-09 -0.01591  204.0   79.0          49.0           26.0    0.300353   
2018-01-10 -0.01238  106.0  175.0          47.0           30.0    0.153025   
2018-01-11 -0.00598  146.0  136.0          59.0           55.0    0.234043   
2018-01-12 -0.02598   17.0  263.0           3.0          160.0    0.028571   
2018-01-13  0.00000    0.0    1.0           0.0            1.0    0.000000   
2018-01-15 -0.004

In [86]:
from sklearn.tree import DecisionTreeClassifier
    
y = bt_EURUSD
n_bdtrain = 4
n_bdtest = 1
mday = 1
#para indexar el dataframe por las fechas
dates = np.unique(X_EURUSD.index.date)[n_bdtrain:]
est_DTC = PNLEstimatorWrapper(DecisionTreeClassifier(), PNL_column=['PNL_0','PNL_1'])
result_DTC= EU.step_validation(est_DTC, X_EURUSD, y, EU.v_split(X_EURUSD,n_bdtrain,n_bdtest,mday))
result_DTC, predict_DTC = EU.redim(result_DTC)
print('--------- DecisionTree 5m ----------------')
print('average PNL : ', np.mean(result_DTC[:,0]))
print('average buy: ', np.mean(result_DTC[:,1]))
print('average sell: ', np.mean(result_DTC[:,2]))
print('longest_buy: ', np.max(result_DTC[:,3]))
print('longest_sell: ', np.max(result_DTC[:,4]))
print('%correct_1: ', np.mean(result_DTC[:,5]))
print('%correct_0: ', np.mean(result_DTC[:,6]))
print('%correct_3: ', np.mean(result_DTC[:,7]))
print('%correct_2: ', np.mean(result_DTC[:,8]))
print('% correct total: ', np.mean(result_DTC[:,5]+result_DTC[:,6]+result_DTC[:,7]+result_DTC[:,8]))
print('-------------------------------------')
df_DTC = pd.DataFrame(result_DTC, columns=list(['PNL','buys','sells','longest_buys','longest_sells'
                                                ,'%correct_1','%correct_0','%correct_3','%correct_2']), index=dates)
print(df_DTC)
df_DTC.to_csv('../resultados/TrueFX/EUR-USD/MultiSeñal/DecisionTreeClassifier/pnl_5T_EUR-USD_GBP-USD_CAD-USD_DTC_NC4_wsize7_2018-01.csv')

--------- DecisionTree 5m ----------------
average PNL :  -0.00550304347826
average buy:  134.913043478
average sell:  87.2608695652
longest_buy:  199.0
longest_sell:  79.0
%correct_1:  0.185835825846
%correct_0:  0.122386576235
%correct_3:  0.00697448587366
%correct_2:  0.00372072766705
% correct total:  0.318917615622
-------------------------------------
                PNL   buys  sells  longest_buys  longest_sells  %correct_1  \
2018-01-06 -0.00057    1.0    0.0           1.0            0.0    0.000000   
2018-01-08 -0.00332  145.0  134.0          16.0           19.0    0.222222   
2018-01-09 -0.01266  241.0   42.0         169.0            9.0    0.356890   
2018-01-10 -0.01052  186.0   95.0          37.0           13.0    0.249110   
2018-01-11 -0.02046  108.0  174.0          12.0           30.0    0.131206   
2018-01-12 -0.00374  232.0   48.0          24.0           70.0    0.135714   
2018-01-13  0.00000    0.0    1.0           0.0            1.0    0.000000   
2018-01-15 -0.00

In [89]:
from sklearn.ensemble import RandomForestClassifier
    
y = bt_EURUSD
n_bdtrain = 4
n_bdtest = 1
mday = 1
#para indexar el dataframe por las fechas
dates = np.unique(X_EURUSD.index.date)[n_bdtrain:]
est_RFC = PNLEstimatorWrapper(RandomForestClassifier(n_estimators=15, n_jobs=3), PNL_column=['PNL_0','PNL_1'])
result_RFC= EU.step_validation(est_RFC, X_EURUSD, y, EU.v_split(X_EURUSD,n_bdtrain,n_bdtest,mday))
result_RFC, predict_RFC = EU.redim(result_RFC)
print('--------- RandomForest 5m ----------------')
print('average PNL : ', np.mean(result_RFC[:,0]))
print('average buy: ', np.mean(result_RFC[:,1]))
print('average sell: ', np.mean(result_RFC[:,2]))
print('longest_buy: ', np.max(result_RFC[:,3]))
print('longest_sell: ', np.max(result_RFC[:,4]))
print('%correct_1: ', np.mean(result_RFC[:,5]))
print('%correct_0: ', np.mean(result_RFC[:,6]))
print('%correct_3: ', np.mean(result_RFC[:,7]))
print('%correct_2: ', np.mean(result_RFC[:,8]))
print('% correct total: ', np.mean(result_RFC[:,5]+result_RFC[:,6]+result_RFC[:,7]+result_RFC[:,8]))
print('-------------------------------------')
df_RFC = pd.DataFrame(result_RFC, columns=list(['PNL','buys','sells','longest_buys','longest_sells'
                                                ,'%correct_1','%correct_0','%correct_3','%correct_2']), index=dates)
print(df_RFC)
df_RFC.to_csv('../resultados/TrueFX/EUR-USD/MultiSeñal/RandomForestClassifier/pnl_5T_EUR-USD_GBP-USD_CAD-USD_RFC_NC4_wsize7_2018-01.csv')

--------- RandomForest 5m ----------------
average PNL :  -0.00754782608696
average buy:  109.086956522
average sell:  113.086956522
longest_buy:  199.0
longest_sell:  255.0
%correct_1:  0.215668418842
%correct_0:  0.166513138151
%correct_3:  0.00139255392045
%correct_2:  0.00341239230615
% correct total:  0.386986503219
-------------------------------------
                PNL   buys  sells  longest_buys  longest_sells  %correct_1  \
2018-01-06 -0.00121    0.0    1.0           0.0            1.0    0.000000   
2018-01-08 -0.00701   74.0  205.0           9.0           50.0    0.107527   
2018-01-09 -0.01052  245.0   38.0         172.0            5.0    0.381625   
2018-01-10 -0.00674   75.0  206.0           6.0           34.0    0.110320   
2018-01-11 -0.01393  132.0  150.0          17.0           22.0    0.216312   
2018-01-12 -0.01197  195.0   85.0         147.0           12.0    0.339286   
2018-01-13  0.00000    0.0    1.0           0.0            1.0    0.000000   
2018-01-15 -0.0

### Ahora se prueba una multiseñal con pesos para equilibrar las clases

Para esto utilizaré los pares EUR/USD, GBP/USD y USD/CAD para predecir el par EUR/USD

In [11]:
#Se lee la señal correspondiente a GBP vs USD para el mes de enero de 2018
data_GBPvsUSD = pd.read_csv("../data/TrueFX/GBP-USD/datos_procesados_5T_GBPUSD-2018-01.csv", index_col=0
                            ,infer_datetime_format=True, parse_dates=True )

#Se lee la señal correspondiente a EUR vs USD para el mes de enero de 2018
data_EURvsUSD = pd.read_csv("../data/TrueFX/EUR-USD/datos_procesados_5T_EURUSD-2018-01.csv", index_col=0
                            ,infer_datetime_format=True, parse_dates=True )

#Se lee la señal correspondiente a EUR vs USD para el mes de enero de 2018
data_USDvsCAD = pd.read_csv("../data/TrueFX/USD-CAD/datos_procesados_5T_USDCAD-2018-01.csv", index_col=0
                            ,infer_datetime_format=True, parse_dates=True )

In [12]:
data_EURvsUSD = EU.multi_signal(data_EURvsUSD,data_GBPvsUSD, new_col='bidGBP/USD', new_loc=2)
data_EURvsUSD = EU.multi_signal(data_EURvsUSD,data_USDvsCAD, new_col='bidUSD/CAD', new_loc=3)

In [13]:
data_EURvsUSD.head()

Unnamed: 0,bid,ask,bidGBP/USD,bidUSD/CAD
2018-01-02 00:00:00,1.20015,1.2011,1.20015,1.25283
2018-01-02 00:05:00,1.20047,1.20084,1.3494,1.25374
2018-01-02 00:10:00,1.20082,1.20126,1.34895,1.25415
2018-01-02 00:15:00,1.20035,1.20091,1.34945,1.2541
2018-01-02 00:20:00,1.20049,1.20085,1.34953,1.25412


In [14]:
#los datos del par EUR/USD #los dat 
X_EURUSD, y_EURUSD, bt_EURUSD = EU.build_dataset(data_EURvsUSD,bid_col='bid',window=7,binary_target=True,PNL=True
                                             ,delete_constant_values=True)
#los datos del par GBP/USD para luego unirlos y formar los datos X para entrenar
X_GBPUSD,_ = EU.build_dataset(data_EURvsUSD,bid_col='bidGBP/USD',window=7,binary_target=False
                              ,delete_constant_values=True)
#los datos del par USD/CAD para luego unirlos y formar los datos X para entrenar
X_USDCAD,_ = EU.build_dataset(data_EURvsUSD,bid_col='bidUSD/CAD',window=7,binary_target=False
                              ,delete_constant_values=True)

In [15]:
X_GBPUSD = X_GBPUSD.loc[X_EURUSD.index]
X_USDCAD = X_USDCAD.loc[X_EURUSD.index]

X_EURUSD.shape, X_GBPUSD.shape, X_USDCAD.shape

((6232, 9), (6232, 7), (6232, 7))

In [16]:
#para cambiar los valores NaN por los que hay en la señal EUR/USD
X_GBPUSD = X_GBPUSD.fillna(X_EURUSD,axis=0)
X_USDCAD = X_USDCAD.fillna(X_EURUSD,axis=0)

#### pegar los dataset...

In [17]:
window=7
for i in X_GBPUSD.columns:
    X_EURUSD.insert(loc=window+int(i),column=window+int(i),value=X_GBPUSD.iloc[:,i])

In [18]:
window=len(X_EURUSD.columns)
for i in X_USDCAD.columns:
    X_EURUSD.insert(loc=window+int(i),column=window+int(i),value=X_GBPUSD.iloc[:,i])

In [19]:
X_EURUSD.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,13,PNL_0,PNL_1,16,17,18,19,20,21,22
2018-01-02 00:30:00,1.20015,1.20047,1.20082,1.20035,1.20049,1.20047,1.20079,1.20015,1.3494,1.34895,...,1.34963,-0.00076,0.00022,1.20015,1.3494,1.34895,1.34945,1.34953,1.34953,1.34963
2018-01-02 00:35:00,1.20047,1.20082,1.20035,1.20049,1.20047,1.20079,1.20135,1.3494,1.34895,1.34945,...,1.35042,-0.00023,-0.00012,1.3494,1.34895,1.34945,1.34953,1.34953,1.34963,1.35042
2018-01-02 00:40:00,1.20082,1.20035,1.20049,1.20047,1.20079,1.20135,1.20143,1.34895,1.34945,1.34953,...,1.34983,-0.0002,-9e-05,1.34895,1.34945,1.34953,1.34953,1.34963,1.35042,1.34983
2018-01-02 00:45:00,1.20035,1.20049,1.20047,1.20079,1.20135,1.20143,1.20149,1.34945,1.34953,1.34953,...,1.3495,-7e-05,-0.0003,1.34945,1.34953,1.34953,1.34963,1.35042,1.34983,1.3495
2018-01-02 00:50:00,1.20049,1.20047,1.20079,1.20135,1.20143,1.20149,1.20133,1.34953,1.34953,1.34963,...,1.34968,-0.00028,-0.00017,1.34953,1.34953,1.34963,1.35042,1.34983,1.3495,1.34968


#### Ahora probamos con los diferentes pesos

In [31]:
from sklearn.naive_bayes import GaussianNB
    
y = bt_EURUSD
n_bdtrain = 4
n_bdtest = 1
mday = 1
wc0 = 0.06 #peso de la clase 0
wc1 = 0.06 #peso de la clase 1
wc2 = 0.44 #peso de la clase 2
wc3 = 0.44 #peso de la clase 3
#para indexar el dataframe por las fechas
dates = np.unique(X_EURUSD.index.date)[n_bdtrain:]
est_GNB = PNLEstimatorWrapper(GaussianNB(priors=(wc0,wc1,wc2,wc3)), PNL_column=['PNL_0','PNL_1'])
result_GNB = EU.step_validation(est_GNB, X_EURUSD, y, EU.v_split(X_EURUSD,n_bdtrain,n_bdtest,mday))
result_GNB, predict_GNB = EU.redim(result_GNB)
print('--------- GaussianNB 5m ----------------')
print('average PNL : ', np.mean(result_GNB[:,0]))
print('average buy: ', np.mean(result_GNB[:,1]))
print('average sell: ', np.mean(result_GNB[:,2]))
print('longest_buy: ', np.max(result_GNB[:,3]))
print('longest_sell: ', np.max(result_GNB[:,4]))
print('%correct_1: ', np.mean(result_GNB[:,5]))
print('%correct_0: ', np.mean(result_GNB[:,6]))
print('%correct_3: ', np.mean(result_GNB[:,7]))
print('%correct_2: ', np.mean(result_GNB[:,8]))
print('% correct total: ', np.mean(result_GNB[:,5]+result_GNB[:,6]+result_GNB[:,7]+result_GNB[:,8]))
print('-------------------------------------')
df_GNB = pd.DataFrame(result_GNB, columns=list(['PNL','buys','sells','longest_buys','longest_sells'
                                                ,'%correct_1','%correct_0','%correct_3','%correct_2']), index=dates)
wc0 = np.ones(len(result_GNB))*wc0
wc1 = np.ones(len(result_GNB))*wc1
wc2 = np.ones(len(result_GNB))*wc2
wc3 = np.ones(len(result_GNB))*wc3

df_GNB.insert(loc=9,column='wc0',value=wc0)
df_GNB.insert(loc=10,column='wc1',value=wc1)
df_GNB.insert(loc=11,column='wc2',value=wc2)
df_GNB.insert(loc=12,column='wc3',value=wc3)

print(df_GNB)
df_GNB.to_csv('../resultados/TrueFX/EUR-USD/MultiSeñal/GaussianNB/pnl_5T_weights_EUR-USD_GBP-USD_CAD-USD_GNB_NC4_wsize7_2018-01.csv')

--------- GaussianNB 5m ----------------
average PNL :  -0.00299347826087
average buy:  110.913043478
average sell:  111.260869565
longest_buy:  244.0
longest_sell:  288.0
%correct_1:  0.105516386883
%correct_0:  0.0578436564638
%correct_3:  0.0229138284058
%correct_2:  0.0590154709732
% correct total:  0.245289342726
-------------------------------------
                PNL   buys  sells  longest_buys  longest_sells  %correct_1  \
2018-01-06 -0.00057    1.0    0.0           1.0            0.0    0.000000   
2018-01-08 -0.01511  279.0    0.0          87.0            2.0    0.379928   
2018-01-09 -0.01132  283.0    0.0          77.0            1.0    0.416961   
2018-01-10 -0.00928  277.0    4.0         160.0            1.0    0.380783   
2018-01-11 -0.00020  144.0  138.0          59.0          107.0    0.212766   
2018-01-12  0.00000    9.0  271.0           0.0          280.0    0.000000   
2018-01-13 -0.00138    1.0    0.0           1.0            0.0    0.000000   
2018-01-15 -0.0054

In [34]:
from sklearn.neighbors import KNeighborsClassifier
    
y = bt_EURUSD
n_bdtrain = 4
n_bdtest = 1
mday = 1
#para indexar el dataframe por las fechas
dates = np.unique(X_EURUSD.index.date)[n_bdtrain:]
est_KNC = PNLEstimatorWrapper(KNeighborsClassifier(n_neighbors=7,n_jobs=2,leaf_size=30
                                                   ,algorithm='kd_tree',weights='distance')
                              , PNL_column=['PNL_0','PNL_1'])
result_KNC= EU.step_validation(est_KNC, X_EURUSD, y, EU.v_split(X_EURUSD,n_bdtrain,n_bdtest,mday))
result_KNC, predict_KNC = EU.redim(result_KNC)
print('--------- KNeighbors 5m ----------------')
print('average PNL : ', np.mean(result_KNC[:,0]))
print('average buy: ', np.mean(result_KNC[:,1]))
print('average sell: ', np.mean(result_KNC[:,2]))
print('longest_buy: ', np.max(result_KNC[:,3]))
print('longest_sell: ', np.max(result_KNC[:,4]))
print('%correct_1: ', np.mean(result_KNC[:,5]))
print('%correct_0: ', np.mean(result_KNC[:,6]))
print('%correct_3: ', np.mean(result_KNC[:,7]))
print('%correct_2: ', np.mean(result_KNC[:,8]))
print('% correct total: ', np.mean(result_KNC[:,5]+result_KNC[:,6]+result_KNC[:,7]+result_KNC[:,8]))
print('-------------------------------------')
df_KNC = pd.DataFrame(result_KNC, columns=list(['PNL','buys','sells','longest_buys','longest_sells'
                                                ,'%correct_1','%correct_0','%correct_3','%correct_2']), index=dates)
wc0 = 'distance'
wc1 = 'distance'
wc2 = 'distance'
wc3 = 'distance'

df_KNC.insert(loc=9,column='wc0',value=wc0)
df_KNC.insert(loc=10,column='wc1',value=wc1)
df_KNC.insert(loc=11,column='wc2',value=wc2)
df_KNC.insert(loc=12,column='wc3',value=wc3)
print(df_KNC)
df_KNC.to_csv('../resultados/TrueFX/EUR-USD/MultiSeñal/KNeighborsClassifier/pnl_5T_weights_EUR-USD_GBP-USD_KNC_NC4_wsize7_2018-01.csv')

--------- KNeighbors 5m ----------------
average PNL :  -0.00794130434783
average buy:  106.739130435
average sell:  115.434782609
longest_buy:  154.0
longest_sell:  241.0
%correct_1:  0.209674411437
%correct_0:  0.178313233556
%correct_3:  0.000928921876394
%correct_2:  0.00247732365681
% correct total:  0.391393890527
-------------------------------------
                PNL   buys  sells  longest_buys  longest_sells  %correct_1  \
2018-01-06 -0.00057    1.0    0.0           1.0            0.0    0.000000   
2018-01-08 -0.00216  145.0  134.0          53.0           33.0    0.236559   
2018-01-09 -0.01406  240.0   43.0          65.0           22.0    0.356890   
2018-01-10 -0.01392  120.0  161.0          48.0           29.0    0.170819   
2018-01-11 -0.00643  159.0  123.0          59.0           55.0    0.241135   
2018-01-12 -0.02474   23.0  257.0           4.0          159.0    0.035714   
2018-01-13  0.00000    0.0    1.0           0.0            1.0    0.000000   
2018-01-15 -0.00

In [66]:
from sklearn.tree import DecisionTreeClassifier


wc0 = 0.06 #peso de la clase 0
wc1 = 0.06 #peso de la clase 1
wc2 = 0.44 #peso de la clase 2
wc3 = 0.44 #peso de la clase 3

est_DTC = PNLEstimatorWrapper(DecisionTreeClassifier(class_weight={0:wc0,1:wc1,2:wc2,3:wc3})
                              , PNL_column=['PNL_0','PNL_1'])
result_DTC = EU.step_validation(est_DTC, X_EURUSD, y, EU.v_split(X_EURUSD,n_bdtrain,n_bdtest,mday))
result_DTC, predict_DTC = EU.redim(result_DTC)
print('--------- DecisionTree 5T ----------------')
print('average PNL : ', np.mean(result_DTC[:,0]))
print('average buy: ', np.mean(result_DTC[:,1]))
print('average sell: ', np.mean(result_DTC[:,2]))
print('longest_buy: ', np.max(result_DTC[:,3]))
print('longest_sell: ', np.max(result_DTC[:,4]))
print('%correct_1: ', np.mean(result_DTC[:,5]))
print('%correct_0: ', np.mean(result_DTC[:,6]))
print('%correct_3: ', np.mean(result_DTC[:,7]))
print('%correct_2: ', np.mean(result_DTC[:,8]))
print('% correct total: ', np.mean(result_DTC[:,5]+result_DTC[:,6]+result_DTC[:,7]+result_DTC[:,8]))
print('-------------------------------------')
df_DTC = pd.DataFrame(result_DTC, columns=list(['PNL','buys','sells','longest_buys','longest_sells'
                                                ,'%correct_1','%correct_0','%correct_3','%correct_2']), index=dates)
wc0 = np.ones(len(result_DTC))*wc0
wc1 = np.ones(len(result_DTC))*wc1
wc2 = np.ones(len(result_DTC))*wc2
wc3 = np.ones(len(result_DTC))*wc3

df_DTC.insert(loc=9,column='wc0',value=wc0)
df_DTC.insert(loc=10,column='wc1',value=wc1)
df_DTC.insert(loc=11,column='wc2',value=wc2)
df_DTC.insert(loc=12,column='wc3',value=wc3)

print(df_DTC)
df_DTC.to_csv('../resultados/TrueFX/EUR-USD/MultiSeñal/DecisionTreeClassifier/pnl_5T_weights_EUR-USD_GBP-USD_DTC_NC4_wsize7_2018-01.csv')

--------- DecisionTree 5T ----------------
average PNL :  -0.00723434782609
average buy:  111.173913043
average sell:  111.0
longest_buy:  206.0
longest_sell:  241.0
%correct_1:  0.189726146563
%correct_0:  0.147756540509
%correct_3:  0.0137167331264
%correct_2:  0.0476446117523
% correct total:  0.398844031951
-------------------------------------
                PNL   buys  sells  longest_buys  longest_sells  %correct_1  \
2018-01-06 -0.00121    0.0    1.0           0.0            1.0    0.000000   
2018-01-08 -0.00746   82.0  197.0          18.0           99.0    0.086022   
2018-01-09 -0.00341  264.0   19.0          32.0          162.0    0.063604   
2018-01-10 -0.01387  107.0  174.0          12.0           33.0    0.124555   
2018-01-11 -0.01727  131.0  151.0          20.0           31.0    0.127660   
2018-01-12 -0.02104   81.0  199.0          14.0           74.0    0.125000   
2018-01-13  0.00000    0.0    1.0           0.0            1.0    0.000000   
2018-01-15 -0.00377   50.

In [77]:
from sklearn.ensemble import RandomForestClassifier

wc0 = 1 #peso de la clase 0
wc1 = 1 #peso de la clase 1
wc2 = 3 #peso de la clase 2
wc3 = 3 #peso de la clase 3

est_RFC = PNLEstimatorWrapper(RandomForestClassifier(n_estimators=15, n_jobs=3,
                                                     class_weight={0:wc0,1:wc1,2:wc2,3:wc3})
                              ,PNL_column=['PNL_1','PNL_0'])
result_RFC = EU.step_validation(est_RFC, X_EURUSD, y, EU.v_split(X_EURUSD,4,1,1))
result_RFC, predict_RFC = EU.redim(result_RFC)
print('--------- RandomForest 5T ----------------')
print('average PNL : ', np.mean(result_RFC[:,0]))
print('average buy: ', np.mean(result_RFC[:,1]))
print('average sell: ', np.mean(result_RFC[:,2]))
print('longest_buy: ', np.max(result_RFC[:,3]))
print('longest_sell: ', np.max(result_RFC[:,4]))
print('%correct_1: ', np.mean(result_RFC[:,5]))
print('%correct_0: ', np.mean(result_RFC[:,6]))
print('%correct_3: ', np.mean(result_RFC[:,7]))
print('%correct_2: ', np.mean(result_RFC[:,8]))
print('% correct total: ', np.mean(result_RFC[:,5]+result_RFC[:,6]+result_RFC[:,7]+result_RFC[:,8]))
print('-------------------------------------')
df_RFC = pd.DataFrame(result_RFC, columns=list(['PNL','buys','sells','longest_buys','longest_sells'
                                                ,'%correct_1','%correct_0','%correct_3','%correct_2']), index=dates)
wc0 = np.ones(len(result_RFC))*wc0
wc1 = np.ones(len(result_RFC))*wc1
wc2 = np.ones(len(result_RFC))*wc2
wc3 = np.ones(len(result_RFC))*wc3

df_RFC.insert(loc=9,column='wc0',value=wc0)
df_RFC.insert(loc=10,column='wc1',value=wc1)
df_RFC.insert(loc=11,column='wc2',value=wc2)
df_RFC.insert(loc=12,column='wc3',value=wc3)

print(df_RFC)
df_RFC.to_csv('../resultados/TrueFX/EUR-USD/MultiSeñal/RandomForestClassifier/pnl_5T_weights_EUR-USD_GBP-USD_RFC_NC4_wsize7_2018-01.csv')

--------- RandomForest 5T ----------------
average PNL :  -0.00724347826087
average buy:  108.826086957
average sell:  113.347826087
longest_buy:  151.0
longest_sell:  263.0
%correct_1:  0.162041149845
%correct_0:  0.178596951881
%correct_3:  0.00405333634651
%correct_2:  0.000927824480898
% correct total:  0.345619262554
-------------------------------------
                PNL   buys  sells  longest_buys  longest_sells  %correct_1  \
2018-01-06 -0.00121    0.0    1.0           0.0            1.0    0.000000   
2018-01-08 -0.00919  141.0  138.0          13.0           32.0    0.225806   
2018-01-09 -0.00877  248.0   35.0          80.0           17.0    0.349823   
2018-01-10 -0.01778   77.0  204.0          11.0           30.0    0.099644   
2018-01-11 -0.00263  162.0  120.0          59.0           24.0    0.241135   
2018-01-12 -0.01347   67.0  213.0          17.0           70.0    0.114286   
2018-01-13  0.00000    0.0    1.0           0.0            1.0    0.000000   
2018-01-15 -0.