## Importar librerías

In [1]:
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import RobustScaler
from sklearn.decomposition import KernelPCA
from sklearn_genetic import GASearchCV
from sklearn_genetic.plots import plot_fitness_evolution
from sklearn_genetic.space import Integer, Categorical, Continuous
from sklearn_genetic.callbacks import ProgressBar, ConsecutiveStopping
import matplotlib.pyplot as plt
import xgboost as xgb
from sklearn.pipeline import Pipeline
from skl2onnx.common.data_types import FloatTensorType
from skl2onnx import convert_sklearn, update_registered_converter
from skl2onnx.common.shape_calculator import calculate_linear_classifier_output_shapes
from onnxmltools.convert.xgboost.operator_converters.XGBoost import convert_xgboost
from onnxmltools.convert import convert_xgboost
import warnings
warnings.filterwarnings('ignore')

## Cargar y preparar datos

In [2]:
# Cargar
tester_files_folder = r"C:\Users\Administrador\AppData\Roaming\MetaQuotes\Terminal\Common\Files"
df_buy = pd.read_csv(os.path.join(tester_files_folder, "buy_training_dataset.csv"))
df_sell = pd.read_csv(os.path.join(tester_files_folder, "sell_training_dataset.csv"))
print(f"Buy -> Trades: {df_buy.shape[0]} Features: {df_buy.shape[1]-1}")
print(f"Sell -> Trades: {df_sell.shape[0]} Features: {df_sell.shape[1]-1}")

Buy -> Trades: 3364 Features: 296
Sell -> Trades: 3064 Features: 296


In [3]:
# Aleatorizar muestras (compras)
df_buy = df_buy.sample(frac=1).reset_index(drop=True)
df_buy

Unnamed: 0,data_0,data_1,data_2,data_3,data_4,data_5,data_6,data_7,data_8,data_9,...,data_287,data_288,data_289,data_290,data_291,data_292,data_293,data_294,data_295,target
0,-0.082995,1.372703,4.217311,-0.000636,0.001210,313.0,-0.000495,0.001240,322.0,-0.000495,...,453.0,-0.001887,0.001380,433.0,-0.001181,0.001425,415.0,0.750000,0.666667,0.0
1,0.240428,0.252779,7.163154,-0.000350,0.001420,435.0,-0.001018,0.001424,516.0,-0.000928,...,445.0,-0.001164,0.001367,587.0,-0.002676,0.001456,542.0,0.458333,0.333333,0.0
2,0.410962,-0.661037,8.377429,-0.000654,0.001470,501.0,-0.001390,0.001516,495.0,-0.003023,...,609.0,-0.000573,0.001450,425.0,0.000327,0.001434,601.0,0.583333,0.500000,1.0
3,-0.624602,-0.846291,30.839952,-0.004501,0.002197,1081.0,-0.003655,0.002079,620.0,-0.005900,...,502.0,-0.000501,0.003027,625.0,-0.001238,0.003752,665.0,0.791667,0.500000,0.0
4,-1.241263,1.169950,9.492406,-0.001751,0.001097,51.0,-0.001680,0.001026,64.0,-0.001537,...,66.0,0.000250,0.001141,57.0,0.001396,0.001150,47.0,0.208333,0.500000,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3359,-0.113376,-0.789315,6.923939,-0.000611,0.000684,61.0,-0.001303,0.000643,32.0,-0.001384,...,32.0,-0.001873,0.000657,46.0,-0.001466,0.000657,27.0,0.333333,0.333333,1.0
3360,0.205988,-0.798858,8.848182,-0.000321,0.000589,533.0,-0.001026,0.000552,392.0,-0.001590,...,87.0,-0.001103,0.000486,57.0,-0.001219,0.000480,79.0,0.666667,0.166667,1.0
3361,-0.996242,0.793455,6.484978,-0.001722,0.001016,561.0,-0.001356,0.000927,164.0,-0.001522,...,161.0,0.000391,0.000829,185.0,0.002643,0.000814,195.0,0.416667,0.500000,0.0
3362,0.297186,0.492685,4.987897,-0.000441,0.000734,149.0,-0.000630,0.000743,147.0,-0.000945,...,266.0,0.000252,0.001273,217.0,0.001198,0.001286,200.0,0.541667,0.833333,0.0


In [4]:
# Aleatorizar muestras (ventas)
df_sell = df_sell.sample(frac=1).reset_index(drop=True)
df_sell

Unnamed: 0,data_0,data_1,data_2,data_3,data_4,data_5,data_6,data_7,data_8,data_9,...,data_287,data_288,data_289,data_290,data_291,data_292,data_293,data_294,data_295,target
0,1.043091,1.672516,5.112068,0.001029,0.000985,251.0,0.000940,0.000868,138.0,0.000985,...,150.0,0.000537,0.001011,132.0,-0.000268,0.001082,141.0,0.583333,0.666667,1.0
1,0.535174,-0.204805,8.435681,0.001584,0.001229,398.0,0.001403,0.001102,266.0,0.001448,...,159.0,0.001910,0.001024,88.0,0.001774,0.001041,64.0,0.416667,0.833333,0.0
2,-0.633445,0.647443,11.163581,0.001257,0.000987,42.0,0.002328,0.000978,72.0,0.001934,...,29.0,0.001450,0.001134,24.0,0.001257,0.001259,55.0,0.250000,0.333333,0.0
3,0.004522,-0.091472,6.774256,0.000720,0.000677,206.0,0.001205,0.000654,247.0,0.001972,...,169.0,-0.000727,0.000621,230.0,-0.000651,0.000593,208.0,0.750000,0.666667,1.0
4,0.586430,-0.640000,5.594606,0.000948,0.000675,39.0,0.001088,0.000628,47.0,0.001142,...,10.0,0.001881,0.000895,17.0,0.001671,0.000973,26.0,0.208333,0.166667,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3059,-0.081082,-1.110919,4.623756,0.000376,0.000473,43.0,0.000659,0.000464,32.0,0.000941,...,68.0,0.000094,0.000219,48.0,-0.000094,0.000208,34.0,0.375000,0.666667,0.0
3060,0.183148,-0.614935,3.042736,0.000458,0.000745,217.0,0.000458,0.000731,208.0,0.000523,...,259.0,0.000033,0.001002,241.0,0.000359,0.001022,196.0,0.791667,0.833333,0.0
3061,-0.587054,0.119077,2.586949,0.000329,0.000740,33.0,0.000493,0.000764,32.0,0.000575,...,79.0,-0.000410,0.000699,61.0,-0.000287,0.000706,39.0,0.208333,0.333333,1.0
3062,0.267435,-0.610224,12.547416,0.001226,0.001414,1460.0,0.001292,0.001217,697.0,0.001226,...,622.0,-0.001893,0.001922,756.0,-0.001975,0.001918,818.0,0.833333,0.166667,1.0


In [5]:
# Chequear y limpiar
if(df_buy.isna().values.any()):
    df_buy=df_buy.dropna()
if(df_sell.isna().values.any()):
    df_sell=df_sell.dropna()

In [6]:
# Preparación de los datos de compra
X_buy = df_buy.drop(columns='target').values
y_buy = df_buy['target'].values
X_buy_train = X_buy
y_buy_train = y_buy
# Preparación de los datos de venta
X_sell = df_sell.drop(columns='target').values
y_sell = df_sell['target'].values
X_sell_train = X_sell
y_sell_train = y_sell
# Número de características
n_features = X_buy_train.shape[1]

## Definir y entrenar modelos

In [7]:
# Definir espacio de hiperparámetros
param_grid = {
    # Hiperparámetros del reductor
    'dim_reducer__n_components': Integer(1, n_features),
    'dim_reducer__kernel': Categorical(['linear', 'poly', 'rbf', 'sigmoid', 'cosine']),
    # Hiperparámetros de XGBoost
    'xgb__n_estimators': Integer(50, 500),
    'xgb__max_depth': Integer(3, 10),
    'xgb__learning_rate': Continuous(0.01, 0.3),
    'xgb__subsample': Continuous(0.6, 1.0),
    'xgb__colsample_bytree': Continuous(0.6, 1.0),
    'xgb__gamma': Continuous(0, 0.5),
    'xgb__min_child_weight': Integer(1, 10),
    'xgb__reg_alpha': Continuous(0, 1.0),
    'xgb__reg_lambda': Continuous(0, 1.0)
}

In [None]:
# Definir el pipeline con placeholders (compras)
pipe_buy = Pipeline([
    ('scaler', RobustScaler()),
    ('dim_reducer', KernelPCA()),
    ('xgb', xgb.XGBClassifier(use_label_encoder=False, eval_metric='mlogloss'))
])
# Definir algoritmo genético
evolutionary_search_buy = GASearchCV(
    estimator=pipe_buy,
    cv=5,
    scoring='accuracy',
    param_grid=param_grid,
    population_size=100,
    generations=20,
    mutation_probability=0.2,
    crossover_probability=0.8,
    n_jobs=-1,
    verbose=True,
    keep_top_k=10
)
# Entrenar el modelo
evolutionary_search_buy.fit(X_buy_train, y_buy_train, callbacks=[ProgressBar(), ConsecutiveStopping(generations=1, metric='fitness_max')])
# Obtener el mejor estimador
model_buy = evolutionary_search_buy.best_estimator_

  0%|          | 0/21 [00:00<?, ?it/s]

In [None]:
print(evolutionary_search_buy.best_params_)
print(evolutionary_search_buy.best_score_)

In [None]:
# Definir el pipeline con placeholders (compras)
pipe_sell = Pipeline([
    ('scaler', RobustScaler()),
    ('dim_reducer', KernelPCA()),
    ('xgb', xgb.XGBClassifier(use_label_encoder=False, eval_metric='mlogloss'))
])
# Definir algoritmo genético
evolutionary_search_sell = GASearchCV(
    estimator=pipe_buy,
    cv=5,
    scoring='accuracy',
    param_grid=param_grid,
    population_size=100,
    generations=20,
    mutation_probability=0.2,
    crossover_probability=0.8,
    n_jobs=-1,
    verbose=True,
    keep_top_k=10
)
# Entrenar el modelo
evolutionary_search_sell.fit(X_sell_train, y_sell_train, callbacks=[ProgressBar(), ConsecutiveStopping(generations=1, metric='fitness_max')])
# Obtener el mejor estimador
model_sell = evolutionary_search_sell.best_estimator_

In [None]:
print(evolutionary_search_sell.best_params_)
print(evolutionary_search_sell.best_score_)

## Exportar modelos a formato ONNX

In [10]:
update_registered_converter(
    xgb.XGBClassifier,
    "XGBClassifier",
    calculate_linear_classifier_output_shapes,
    convert_xgboost,
    options={'nocl': [False], 'zipmap': [True, False, 'columns']}
)

In [11]:
model_buy_onnx = convert_sklearn(
    model_buy,
    'pipeline_buy_xgboost',
    [('input', FloatTensorType([None, X_buy_train.shape[1]]))],
    target_opset={'': 12, 'ai.onnx.ml': 2}
)
model_sell_onnx = convert_sklearn(
    model_sell,
    'pipeline_sell_xgboost',
    [('input', FloatTensorType([None, X_buy_train.shape[1]]))],
    target_opset={'': 12, 'ai.onnx.ml': 2}
)
mql5_files_folder = "C:/Users/Administrador/AppData/Roaming/MetaQuotes/Terminal/6C3C6A11D1C3791DD4DBF45421BF8028/MQL5/Files"
with open(os.path.join(mql5_files_folder, "model_buy.onnx"), 'wb') as f:
    f.write(model_buy_onnx.SerializeToString())
with open(os.path.join(mql5_files_folder, "model_sell.onnx"), 'wb') as f:
    f.write(model_sell_onnx.SerializeToString())