## Importar librerías

In [1]:
import os
import pandas as pd
from sklearn.preprocessing import RobustScaler
from sklearn.decomposition import KernelPCA
from sklearn_genetic import GASearchCV
from sklearn_genetic.space import Integer, Categorical, Continuous
from sklearn_genetic.callbacks import ProgressBar, ConsecutiveStopping
import xgboost as xgb
from sklearn.pipeline import Pipeline
from skl2onnx.common.data_types import FloatTensorType
from skl2onnx import convert_sklearn, update_registered_converter
from skl2onnx.common.shape_calculator import calculate_linear_classifier_output_shapes
from onnxmltools.convert.xgboost.operator_converters.XGBoost import convert_xgboost
import warnings
warnings.filterwarnings('ignore')

## Cargar y preparar datos

In [2]:
# Cargar
tester_files_folder = r"C:\Users\Administrador\AppData\Roaming\MetaQuotes\Terminal\Common\Files"
df_buy = pd.read_csv(os.path.join(tester_files_folder, "buy_training_dataset.csv"))
df_sell = pd.read_csv(os.path.join(tester_files_folder, "sell_training_dataset.csv"))
print(f"Buy -> Trades: {df_buy.shape[0]} Features: {df_buy.shape[1]-1}")
print(f"Sell -> Trades: {df_sell.shape[0]} Features: {df_sell.shape[1]-1}")

Buy -> Trades: 3272 Features: 296
Sell -> Trades: 2806 Features: 296


In [3]:
# Aleatorizar muestras (compras)
df_buy = df_buy.sample(frac=1).reset_index(drop=True)
df_buy

Unnamed: 0,data_0,data_1,data_2,data_3,data_4,data_5,data_6,data_7,data_8,data_9,...,data_287,data_288,data_289,data_290,data_291,data_292,data_293,data_294,data_295,target
0,0.273227,-1.316457,24.766404,0.004006,0.000869,213.0,0.004072,0.000590,192.0,0.003760,...,156.0,0.003990,0.000424,127.0,0.004006,0.000433,132.0,0.375000,0.166667,1.0
1,-0.022138,-1.070116,11.980921,-0.001080,0.001343,405.0,-0.002234,0.001267,126.0,-0.002011,...,335.0,0.002015,0.000538,215.0,0.002425,0.000435,83.0,0.375000,0.833333,0.0
2,-0.707143,0.101740,3.235492,-0.000400,0.000270,70.0,-0.000617,0.000257,25.0,-0.000654,...,25.0,-0.000436,0.000262,10.0,-0.000436,0.000278,29.0,0.291667,0.833333,0.0
3,-1.273240,1.455473,11.611854,-0.001865,0.001021,345.0,-0.001865,0.000918,340.0,-0.002507,...,364.0,-0.000450,0.001153,314.0,-0.000643,0.001222,371.0,0.500000,0.833333,1.0
4,-0.195336,-0.434613,1.785165,-0.000182,0.000467,89.0,-0.000152,0.000464,120.0,-0.000881,...,76.0,-0.001973,0.000394,24.0,-0.001913,0.000435,65.0,0.208333,0.166667,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3267,-0.689332,-0.374979,7.287653,-0.000722,0.001091,27.0,-0.001484,0.001056,37.0,-0.001324,...,428.0,-0.004958,0.000861,285.0,-0.004086,0.000717,322.0,0.166667,0.166667,1.0
3268,-0.073940,-0.703389,7.382039,-0.000600,0.000697,29.0,-0.001199,0.000718,33.0,-0.001199,...,37.0,0.000450,0.001145,56.0,0.000750,0.001161,44.0,0.208333,0.333333,0.0
3269,0.044638,0.718551,9.239440,-0.000988,0.001027,836.0,-0.001849,0.000979,829.0,-0.001699,...,296.0,0.000435,0.001042,272.0,0.000237,0.001107,214.0,0.666667,0.333333,1.0
3270,-0.925441,0.544969,2.885403,-0.000465,0.001000,116.0,-0.000697,0.000960,113.0,-0.000883,...,205.0,-0.000511,0.000650,182.0,-0.000511,0.000656,260.0,0.375000,0.333333,1.0


In [4]:
# Aleatorizar muestras (ventas)
df_sell = df_sell.sample(frac=1).reset_index(drop=True)
df_sell

Unnamed: 0,data_0,data_1,data_2,data_3,data_4,data_5,data_6,data_7,data_8,data_9,...,data_287,data_288,data_289,data_290,data_291,data_292,data_293,data_294,data_295,target
0,-0.371578,-1.014345,5.566210,-0.000666,0.000422,120.0,-0.000698,0.000383,72.0,-0.000508,...,29.0,-0.001459,0.000429,46.0,-0.001396,0.000420,59.0,0.166667,0.333333,0.0
1,0.307395,-0.727948,3.714310,0.000324,0.000536,18.0,0.000454,0.000542,12.0,0.000446,...,57.0,-0.000308,0.001390,60.0,-0.000187,0.001358,62.0,0.208333,0.166667,0.0
2,0.664475,0.476066,11.243792,0.001193,0.001438,347.0,0.002023,0.001386,463.0,0.001696,...,397.0,0.000785,0.001447,432.0,0.001570,0.001456,387.0,0.750000,0.833333,0.0
3,0.302238,-0.367719,24.154434,0.002921,0.004727,381.0,0.003610,0.004685,379.0,0.003325,...,561.0,-0.000418,0.005511,543.0,0.001878,0.005490,685.0,0.500000,0.166667,1.0
4,-0.125546,0.387626,6.157725,0.000330,0.000603,67.0,0.000923,0.000579,86.0,0.001583,...,27.0,0.000066,0.000537,48.0,-0.000329,0.000541,45.0,0.916667,0.333333,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2801,0.356681,0.462939,2.665376,0.000375,0.000502,27.0,0.000391,0.000491,29.0,0.000273,...,23.0,0.000625,0.000671,44.0,0.000664,0.000687,25.0,0.250000,0.500000,1.0
2802,0.870890,-0.212881,5.583832,0.001033,0.000871,304.0,0.000917,0.000853,243.0,0.000868,...,248.0,0.000372,0.000834,368.0,0.000413,0.000715,224.0,0.583333,0.500000,1.0
2803,-0.721812,1.191819,12.638646,0.001854,0.002253,110.0,0.001900,0.002232,349.0,0.004166,...,878.0,0.004931,0.001644,841.0,0.003704,0.001561,933.0,0.125000,0.833333,0.0
2804,-1.072758,1.032759,7.446513,0.000519,0.000811,616.0,0.001234,0.000887,688.0,0.001884,...,47.0,0.001039,0.000644,83.0,0.001689,0.000642,99.0,0.416667,0.500000,0.0


In [5]:
# Chequear y limpiar
if(df_buy.isna().values.any()):
    df_buy=df_buy.dropna()
if(df_sell.isna().values.any()):
    df_sell=df_sell.dropna()

In [6]:
# Preparación de los datos de compra
X_buy = df_buy.drop(columns='target').values
y_buy = df_buy['target'].values
X_buy_train = X_buy
y_buy_train = y_buy
# Preparación de los datos de venta
X_sell = df_sell.drop(columns='target').values
y_sell = df_sell['target'].values
X_sell_train = X_sell
y_sell_train = y_sell
# Número de características
n_features = X_buy_train.shape[1]

## Definir y entrenar modelos

In [7]:
# Definir espacio de hiperparámetros
param_grid = {
    'dim_reducer__n_components': Integer(1, n_features),
    'dim_reducer__gamma': Continuous(1e-4, 1e-1, distribution='log-uniform'),
    'xgb__n_estimators': Integer(50, 500),
    'xgb__max_depth': Integer(3, 10),
    'xgb__learning_rate': Continuous(0.01, 0.3),
    'xgb__subsample': Continuous(0.6, 1.0),
    'xgb__colsample_bytree': Continuous(0.6, 1.0),
    'xgb__gamma': Continuous(0, 0.5),
    'xgb__min_child_weight': Integer(1, 10),
    'xgb__reg_alpha': Continuous(0, 1.0),
    'xgb__reg_lambda': Continuous(0, 1.0)
}

In [8]:
# Definir el pipeline con placeholders (compras)
pipe_buy = Pipeline([
    ('scaler', RobustScaler()),
    ('dim_reducer', KernelPCA(kernel='rbf')),
    ('xgb', xgb.XGBClassifier(use_label_encoder=False, eval_metric='mlogloss'))
])
# Definir algoritmo genético
evolutionary_search_buy = GASearchCV(
    estimator=pipe_buy,
    cv=5,
    scoring='accuracy',
    param_grid=param_grid,
    population_size=50,
    generations=2,
    mutation_probability=0.1,
    crossover_probability=0.9,
    n_jobs=-1,
    verbose=True,
    keep_top_k=25
)
# Entrenar el modelo
evolutionary_search_buy.fit(X_buy_train, y_buy_train, callbacks=[ProgressBar(), ConsecutiveStopping(generations=1, metric='fitness')])
# Obtener el mejor estimador
model_buy = evolutionary_search_buy.best_estimator_
# Visualizar resultados
print(evolutionary_search_buy.best_params_)
print(evolutionary_search_buy.best_score_)

  0%|          | 0/3 [00:00<?, ?it/s]

gen	nevals	fitness	fitness_std	fitness_max	fitness_min
0  	10    	0.57897	0.0130913  	0.598097   	0.554092   
1  	20    	0.587129	0.00641575 	0.598099   	0.577612   
2  	20    	0.590185	0.0047645  	0.598099   	0.58007    
{'dim_reducer__n_components': 178, 'dim_reducer__gamma': 0.00018731253955333403, 'xgb__n_estimators': 298, 'xgb__max_depth': 9, 'xgb__learning_rate': 0.03486996972781047, 'xgb__subsample': 0.916325424848935, 'xgb__colsample_bytree': 0.9370493798523257, 'xgb__gamma': 0.24460297238044432, 'xgb__min_child_weight': 10, 'xgb__reg_alpha': 0.9413543090004716, 'xgb__reg_lambda': 0.2971824107642125}
0.5980993066741369


In [9]:
# Definir el pipeline con placeholders (ventas)
pipe_sell = Pipeline([
    ('scaler', RobustScaler()),
    ('dim_reducer', KernelPCA()),
    ('xgb', xgb.XGBClassifier(use_label_encoder=False, eval_metric='mlogloss'))
])
# Definir algoritmo genético
evolutionary_search_sell = GASearchCV(
    estimator=pipe_buy,
    cv=5,
    scoring='accuracy',
    param_grid=param_grid,
    population_size=50,
    generations=20,
    mutation_probability=0.1,
    crossover_probability=0.9,
    n_jobs=-1,
    verbose=True,
    keep_top_k=25
)
# Entrenar el modelo
evolutionary_search_sell.fit(X_sell_train, y_sell_train, callbacks=[ProgressBar(), ConsecutiveStopping(generations=1, metric='fitness')])
# Obtener el mejor estimador
model_sell = evolutionary_search_sell.best_estimator_
# Visualizar resultados
print(evolutionary_search_sell.best_params_)
print(evolutionary_search_sell.best_score_)

  0%|          | 0/3 [00:00<?, ?it/s]

gen	nevals	fitness 	fitness_std	fitness_max	fitness_min
0  	10    	0.552132	0.0197013  	0.57768    	0.500716   
1  	20    	0.56261 	0.0121099  	0.57768    	0.547758   
2  	20    	0.569626	0.00677073 	0.576611   	0.553461   
{'dim_reducer__n_components': 232, 'dim_reducer__gamma': 0.0004569576262671673, 'xgb__n_estimators': 420, 'xgb__max_depth': 8, 'xgb__learning_rate': 0.03629450750251921, 'xgb__subsample': 0.8227816185974162, 'xgb__colsample_bytree': 0.9763599573118829, 'xgb__gamma': 0.30048387929014253, 'xgb__min_child_weight': 7, 'xgb__reg_alpha': 0.14151792141076736, 'xgb__reg_lambda': 0.7056346850069357}
0.5776802989070102


## Exportar modelos a formato ONNX

In [10]:
update_registered_converter(
    xgb.XGBClassifier,
    "XGBClassifier",
    calculate_linear_classifier_output_shapes,
    convert_xgboost,
    options={'nocl': [False], 'zipmap': [True, False, 'columns']}
)

In [11]:
model_buy_onnx = convert_sklearn(
    model_buy,
    'pipeline_buy_xgboost',
    [('input', FloatTensorType([None, X_buy_train.shape[1]]))],
    target_opset={'': 12, 'ai.onnx.ml': 2}
)
model_sell_onnx = convert_sklearn(
    model_sell,
    'pipeline_sell_xgboost',
    [('input', FloatTensorType([None, X_buy_train.shape[1]]))],
    target_opset={'': 12, 'ai.onnx.ml': 2}
)
mql5_files_folder = "C:/Users/Administrador/AppData/Roaming/MetaQuotes/Terminal/6C3C6A11D1C3791DD4DBF45421BF8028/MQL5/Files"
with open(os.path.join(mql5_files_folder, "model_buy.onnx"), 'wb') as f:
    f.write(model_buy_onnx.SerializeToString())
with open(os.path.join(mql5_files_folder, "model_sell.onnx"), 'wb') as f:
    f.write(model_sell_onnx.SerializeToString())