## Importar librerías

In [1]:
import os
import pandas as pd
from sklearn.preprocessing import RobustScaler
from sklearn.decomposition import KernelPCA
from sklearn.model_selection import StratifiedKFold
from sklearn_genetic import GASearchCV
from sklearn_genetic import ExponentialAdapter
from sklearn_genetic.space import Integer, Categorical, Continuous
from sklearn_genetic.callbacks import ProgressBar, ConsecutiveStopping
from xgboost import XGBClassifier
from sklearn.pipeline import Pipeline
from concurrent.futures import ThreadPoolExecutor, wait
from skl2onnx.common.data_types import FloatTensorType
from skl2onnx import convert_sklearn, update_registered_converter
from skl2onnx.common.shape_calculator import calculate_linear_classifier_output_shapes
from onnxmltools.convert.xgboost.operator_converters.XGBoost import convert_xgboost
import warnings
warnings.filterwarnings('ignore')

## Cargar y preparar datos

In [2]:
# Cargar
tester_files_folder = r"C:\Users\Administrador\AppData\Roaming\MetaQuotes\Terminal\Common\Files"
df_buy = pd.read_csv(os.path.join(tester_files_folder, "buy_training_dataset.csv"))
df_sell = pd.read_csv(os.path.join(tester_files_folder, "sell_training_dataset.csv"))
print(f"Buy -> Trades: {df_buy.shape[0]} Features: {df_buy.shape[1]-1}")
print(f"Sell -> Trades: {df_sell.shape[0]} Features: {df_sell.shape[1]-1}")

Buy -> Trades: 3256 Features: 296
Sell -> Trades: 2806 Features: 296


In [3]:
# Aleatorizar muestras (compras)
df_buy = df_buy.sample(frac=1).reset_index(drop=True)
df_buy

Unnamed: 0,data_0,data_1,data_2,data_3,data_4,data_5,data_6,data_7,data_8,data_9,...,data_287,data_288,data_289,data_290,data_291,data_292,data_293,data_294,data_295,target
0,-0.295788,-0.912332,3.806443,-0.000503,0.000458,198.0,-0.000503,0.000454,73.0,-0.000440,...,61.0,-0.001037,0.000364,70.0,-0.000723,0.000346,93.0,0.916667,0.333333,0.0
1,0.077675,-0.562644,2.450891,-0.000262,0.001201,168.0,-0.000525,0.001253,189.0,-0.001136,...,218.0,0.001190,0.001246,226.0,0.001050,0.001264,164.0,0.666667,0.500000,1.0
2,0.065275,-0.198058,10.081263,-0.000866,0.001177,315.0,-0.001546,0.001198,386.0,-0.001892,...,116.0,-0.002355,0.000759,113.0,-0.002293,0.000731,121.0,0.416667,0.500000,0.0
3,0.027383,-0.153231,5.229061,-0.000279,0.001181,382.0,-0.000486,0.001204,295.0,-0.000810,...,326.0,-0.003021,0.001065,410.0,-0.003344,0.001047,480.0,0.708333,0.833333,0.0
4,-0.239367,-0.604876,11.507831,-0.001140,0.000823,291.0,-0.001973,0.000774,164.0,-0.002448,...,174.0,-0.003488,0.000696,144.0,-0.003098,0.000719,131.0,0.583333,0.333333,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3251,0.380829,-0.328188,3.804018,-0.000365,0.000453,246.0,-0.000722,0.000459,268.0,-0.000649,...,283.0,-0.000292,0.000416,223.0,-0.000438,0.000411,192.0,0.916667,0.333333,1.0
3252,-1.017597,1.347065,7.942043,-0.000818,0.001147,317.0,-0.001486,0.001106,411.0,-0.002723,...,333.0,-0.002662,0.000663,382.0,-0.002600,0.000624,386.0,0.708333,0.333333,1.0
3253,0.300929,-0.197564,6.640287,-0.000589,0.000571,148.0,-0.001309,0.000538,99.0,-0.000917,...,104.0,-0.002094,0.000605,73.0,-0.002159,0.000640,70.0,0.625000,0.333333,0.0
3254,-1.226575,1.166120,9.195962,-0.001382,0.001737,1019.0,-0.001479,0.001587,736.0,-0.001439,...,987.0,-0.002528,0.001836,1403.0,-0.002811,0.001783,311.0,0.458333,0.333333,1.0


In [4]:
# Aleatorizar muestras (ventas)
df_sell = df_sell.sample(frac=1).reset_index(drop=True)
df_sell

Unnamed: 0,data_0,data_1,data_2,data_3,data_4,data_5,data_6,data_7,data_8,data_9,...,data_287,data_288,data_289,data_290,data_291,data_292,data_293,data_294,data_295,target
0,0.168690,-0.339132,4.717235,0.000970,0.000750,147.0,0.001033,0.000671,80.0,0.000733,...,28.0,0.000102,0.000586,57.0,0.000378,0.000558,42.0,0.250000,0.833333,1.0
1,0.440934,-0.742326,11.272881,0.002703,0.002690,688.0,0.002654,0.002594,785.0,0.003508,...,1103.0,0.000813,0.003089,956.0,-0.003149,0.003130,836.0,0.791667,0.666667,0.0
2,0.314110,1.326681,17.633790,0.001724,0.002300,890.0,0.002520,0.002326,945.0,0.003583,...,893.0,0.000119,0.002452,1016.0,0.001657,0.002615,732.0,0.916667,0.166667,0.0
3,0.110115,-0.743381,5.846479,0.000572,0.000701,119.0,0.000763,0.000701,83.0,0.000700,...,97.0,-0.000744,0.000904,131.0,-0.000299,0.000954,101.0,0.666667,0.500000,1.0
4,0.908687,1.217198,6.798513,0.000763,0.001217,80.0,0.001261,0.001216,92.0,0.001095,...,84.0,-0.003641,0.001145,136.0,-0.002715,0.001123,118.0,0.250000,0.666667,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2801,0.005358,-0.862190,8.615320,0.000737,0.001763,387.0,0.001546,0.001765,532.0,0.001490,...,772.0,0.001887,0.001658,949.0,0.000792,0.001610,811.0,0.791667,0.333333,1.0
2802,0.331086,-0.625930,12.366591,0.002353,0.002116,80.0,0.002968,0.002023,47.0,0.003442,...,76.0,0.001464,0.002819,104.0,0.001322,0.002837,92.0,0.291667,0.500000,1.0
2803,0.039652,0.094297,4.904724,0.000650,0.000722,215.0,0.001260,0.000703,207.0,0.001138,...,177.0,0.001731,0.000794,231.0,0.001056,0.000780,176.0,0.750000,0.500000,1.0
2804,0.026980,-0.074340,4.214216,0.000405,0.000813,276.0,0.000243,0.000929,319.0,0.001622,...,144.0,0.002028,0.000835,83.0,0.002028,0.000803,80.0,0.416667,0.166667,1.0


In [5]:
# Chequear y limpiar
if(df_buy.isna().values.any()):
    df_buy=df_buy.dropna()
if(df_sell.isna().values.any()):
    df_sell=df_sell.dropna()

In [6]:
# Preparación de los datos de compra
X_buy_train = df_buy.drop(columns='target').values
y_buy_train = df_buy['target'].values
# Preparación de los datos de venta
X_sell_train = df_sell.drop(columns='target').values
y_sell_train = df_sell['target'].values
# Número de características
n_features = X_buy_train.shape[1]

## Entrenar modelos

In [7]:
def train_model_buy(X_buy_train, y_buy_train, param_grid, cv, crossover_adapter, mutation_adapter):
    try:
        # Definir el pipeline con placeholders (compras)
        pipe_buy = Pipeline([
            ('scaler', RobustScaler()),
            ('dim_reducer', KernelPCA(kernel='rbf')),
            ('xgb', XGBClassifier(use_label_encoder=False, eval_metric='mlogloss'))
        ])
        # Definir algoritmo genético
        evolutionary_search_buy = GASearchCV(
            estimator=pipe_buy,
            cv=cv,
            scoring='accuracy',
            population_size=100,
            generations=50,
            tournament_size=5,
            elitism=True,
            crossover_probability=crossover_adapter,
            mutation_probability=mutation_adapter,
            param_grid=param_grid,
            criteria='max',
            algorithm='eaMuPlusLambda',
            n_jobs=-1,
            verbose=True,
            keep_top_k=25
        )
        # Entrenar el modelo
        evolutionary_search_buy.fit(X_buy_train, y_buy_train, callbacks=[ProgressBar(), ConsecutiveStopping(generations=1, metric='fitness')])
        # Obtener el mejor estimador
        model_buy = evolutionary_search_buy.best_estimator_
        # Visualizar resultados
        print(evolutionary_search_buy.best_params_)
        print(evolutionary_search_buy.best_score_)
    except Exception as e:
        print(f"Error en train_model_buy: {e}")
        raise
    return model_buy


In [8]:
def train_model_sell(X_sell_train, y_sell_train, param_grid, cv, crossover_adapter, mutation_adapter):
    try:
        # Definir el pipeline con placeholders (ventas)
        pipe_sell = Pipeline([
            ('scaler', RobustScaler()),
            ('dim_reducer', KernelPCA(kernel='rbf')),
            ('xgb', XGBClassifier(use_label_encoder=False, eval_metric='mlogloss'))
        ])
        # Definir algoritmo genético
        evolutionary_search_sell = GASearchCV(
            estimator=pipe_sell,
            cv=cv,
            scoring='accuracy',
            population_size=100,
            generations=50,
            tournament_size=5,
            elitism=True,
            crossover_probability=crossover_adapter,
            mutation_probability=mutation_adapter,
            param_grid=param_grid,
            criteria='max',
            algorithm='eaMuPlusLambda',
            n_jobs=-1,
            verbose=True,
            keep_top_k=25
        )
        # Entrenar el modelo
        evolutionary_search_sell.fit(X_sell_train, y_sell_train, callbacks=[ProgressBar(), ConsecutiveStopping(generations=1, metric='fitness')])
        # Obtener el mejor estimador
        model_sell = evolutionary_search_sell.best_estimator_
        # Visualizar resultados
        print(evolutionary_search_sell.best_params_)
        print(evolutionary_search_sell.best_score_)
    except Exception as e:
        print(f"Error en train_model_buy: {e}")
        raise
    return model_sell

In [9]:
# Preparación de los datos de compra
X_buy_train = df_buy.drop(columns='target').values
y_buy_train = df_buy['target'].values
# Preparación de los datos de venta
X_sell_train = df_sell.drop(columns='target').values
y_sell_train = df_sell['target'].values
# Número de características
n_features = X_buy_train.shape[1]

In [10]:
# Definir parámetros genéticos
crossover_adapter = ExponentialAdapter(initial_value=0.1, end_value=0.9, adaptive_rate=0.1)
mutation_adapter = ExponentialAdapter(initial_value=0.9, end_value=0.1, adaptive_rate=0.1)
cv = StratifiedKFold(n_splits=5, shuffle=True)
# Definir espacio de hiperparámetros
param_grid = {
    'scaler': Categorical([RobustScaler(), 'passthrough']),
    'dim_reducer__n_components': Integer(1, n_features),
    'dim_reducer__gamma': Continuous(1e-4, 1e-1, distribution='log-uniform'),
    'xgb__n_estimators': Integer(50, 500),
    'xgb__max_depth': Integer(3, 10),
    'xgb__learning_rate': Continuous(0.01, 0.3),
    'xgb__subsample': Continuous(0.6, 1.0),
    'xgb__colsample_bytree': Continuous(0.6, 1.0),
    'xgb__gamma': Continuous(0, 0.5),
    'xgb__min_child_weight': Integer(1, 10),
    'xgb__reg_alpha': Continuous(0, 1.0),
    'xgb__reg_lambda': Continuous(0, 1.0)
}

In [None]:

# Entrenar modelos simultáneamente
with ThreadPoolExecutor(max_workers=2) as executor:
    # enviar tareas de entrenamiento
    future_buy = executor.submit(train_model_buy, X_buy_train, y_buy_train, param_grid, cv, crossover_adapter, mutation_adapter)
    future_sell = executor.submit(train_model_sell, X_sell_train, y_sell_train, param_grid, cv, crossover_adapter, mutation_adapter)
    # esperar a que todas las tareas terminen
    futures = [future_buy, future_sell]
    print("Esperando que las tareas finalicen...")
    wait(futures)
    print("¡Todas las tareas han terminado!")
    # Obtener resultados una vez que ambas tareas han terminado
    model_buy = future_buy.result()
    model_sell = future_sell.result()

Esperando que las tareas finalicen...


  0%|          | 0/51 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

gen	nevals	fitness 	fitness_std	fitness_max	fitness_min
0  	100   	0.558029	0.0173255  	0.593367   	0.500327   
gen	nevals	fitness 	fitness_std	fitness_max	fitness_min
0  	100   	0.574764	0.0167636  	0.608421   	0.51843    


## Exportar modelos a formato ONNX

In [None]:
update_registered_converter(
    XGBClassifier,
    "XGBClassifier",
    calculate_linear_classifier_output_shapes,
    convert_xgboost,
    options={'nocl': [False], 'zipmap': [True, False, 'columns']}
)
model_buy_onnx = convert_sklearn(
    model_buy,
    'pipeline_buy_xgboost',
    [('input', FloatTensorType([None, X_buy_train.shape[1]]))],
    target_opset={'': 12, 'ai.onnx.ml': 2}
)
model_sell_onnx = convert_sklearn(
    model_sell,
    'pipeline_sell_xgboost',
    [('input', FloatTensorType([None, X_buy_train.shape[1]]))],
    target_opset={'': 12, 'ai.onnx.ml': 2}
)

In [21]:
mql5_files_folder = "C:/Users/Administrador/AppData/Roaming/MetaQuotes/Terminal/6C3C6A11D1C3791DD4DBF45421BF8028/MQL5/Files"
with open(os.path.join(mql5_files_folder, "model_buy.onnx"), 'wb') as f:
    f.write(model_buy_onnx.SerializeToString())
with open(os.path.join(mql5_files_folder, "model_sell.onnx"), 'wb') as f:
    f.write(model_sell_onnx.SerializeToString())