## Importar librerías

In [1]:
import os
import numpy as np
import pandas as pd
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.decomposition import PCA, KernelPCA, TruncatedSVD, FactorAnalysis, FastICA
from sklearn_genetic import GASearchCV
from sklearn_genetic.space import Integer, Categorical, Continuous
from sklearn_genetic.callbacks import ProgressBar
import matplotlib.pyplot as plt
import xgboost as xgb
from sklearn.pipeline import Pipeline
from skl2onnx.common.data_types import FloatTensorType
from skl2onnx import convert_sklearn, update_registered_converter
from skl2onnx.common.shape_calculator import calculate_linear_classifier_output_shapes
from onnxmltools.convert.xgboost.operator_converters.XGBoost import convert_xgboost
from onnxmltools.convert import convert_xgboost
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Cargar
tester_files_folder = r"C:\Users\Administrador\AppData\Roaming\MetaQuotes\Terminal\Common\Files"
df_buy = pd.read_csv(os.path.join(tester_files_folder, "buy_training_dataset.csv"))
df_sell = pd.read_csv(os.path.join(tester_files_folder, "sell_training_dataset.csv"))
print(f"Buy -> Trades: {df_buy.shape[0]} Features: {df_buy.shape[1]-1}")
print(f"Sell -> Trades: {df_sell.shape[0]} Features: {df_sell.shape[1]-1}")

Buy -> Trades: 3364 Features: 296
Sell -> Trades: 3064 Features: 296


In [3]:
df_buy

Unnamed: 0,data_0,data_1,data_2,data_3,data_4,data_5,data_6,data_7,data_8,data_9,...,data_287,data_288,data_289,data_290,data_291,data_292,data_293,data_294,data_295,target
0,0.347505,-0.533746,21.058615,-0.001188,0.002035,694.0,-0.004281,0.001974,683.0,-0.005447,...,363.0,-0.005977,0.001294,220.0,-0.006385,0.001303,302.0,0.416667,0.333333,1.0
1,-0.208776,-1.247894,29.340124,-0.004139,0.001792,100.0,-0.004929,0.001541,191.0,-0.005073,...,211.0,-0.004466,0.001418,160.0,-0.004959,0.001388,148.0,0.125000,0.500000,1.0
2,-0.238217,-0.142496,5.823409,-0.000424,0.000622,112.0,-0.000777,0.000582,95.0,-0.000706,...,122.0,-0.000989,0.000741,98.0,-0.000847,0.000731,111.0,0.583333,0.500000,0.0
3,-0.485131,-0.036161,4.045076,-0.000401,0.000324,38.0,-0.000709,0.000302,34.0,-0.000648,...,31.0,-0.000062,0.000187,33.0,-0.000031,0.000196,16.0,0.291667,0.666667,1.0
4,0.216359,-0.522387,3.050455,-0.000175,0.001020,577.0,-0.000413,0.001060,477.0,-0.000969,...,727.0,-0.000175,0.000772,594.0,0.000859,0.000744,749.0,0.458333,0.833333,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3359,-0.488338,-0.187534,2.066212,-0.000122,0.000433,68.0,-0.000122,0.000457,84.0,-0.000122,...,55.0,0.000591,0.000472,70.0,0.000591,0.000463,135.0,0.541667,0.166667,0.0
3360,0.419527,0.764363,10.641483,-0.001493,0.001389,203.0,-0.001948,0.001348,331.0,-0.002447,...,227.0,-0.003393,0.002237,278.0,-0.002835,0.002324,363.0,0.458333,0.333333,0.0
3361,-1.241794,0.764122,5.037224,-0.000883,0.000943,165.0,-0.001059,0.000892,175.0,-0.000839,...,132.0,0.000203,0.000893,215.0,-0.001183,0.000911,362.0,0.583333,0.166667,1.0
3362,0.658837,0.393340,3.999073,-0.000514,0.000940,173.0,-0.000606,0.000985,212.0,-0.000881,...,526.0,-0.000009,0.001380,378.0,0.000083,0.001415,275.0,0.791667,0.833333,1.0


In [4]:
df_sell

Unnamed: 0,data_0,data_1,data_2,data_3,data_4,data_5,data_6,data_7,data_8,data_9,...,data_287,data_288,data_289,data_290,data_291,data_292,data_293,data_294,data_295,target
0,-0.163193,0.441939,4.051536,0.000186,0.000941,209.0,0.000619,0.000984,155.0,0.001115,...,124.0,0.001859,0.000775,167.0,0.003058,0.000801,120.0,0.666667,0.166667,1.0
1,0.316295,-1.353240,15.835863,0.002352,0.000730,32.0,0.002122,0.000553,98.0,0.002161,...,91.0,0.001893,0.000536,127.0,0.002275,0.000528,200.0,0.125000,0.833333,1.0
2,0.132708,-0.699077,5.825885,0.000250,0.000644,20.0,0.000688,0.000644,39.0,0.000813,...,194.0,-0.001250,0.000556,327.0,-0.001375,0.000547,237.0,0.166667,0.666667,0.0
3,0.115300,-0.959047,8.439783,0.000850,0.001378,424.0,0.001403,0.001449,389.0,0.001325,...,545.0,-0.000973,0.002048,579.0,-0.000740,0.001969,563.0,0.458333,0.666667,1.0
4,-0.405372,-0.051972,1.489893,0.000125,0.000420,62.0,0.000188,0.000416,45.0,0.000188,...,28.0,0.000251,0.000309,34.0,0.000251,0.000313,27.0,0.875000,0.166667,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3059,0.320020,-0.592438,4.422967,0.000686,0.000940,765.0,0.001078,0.000937,257.0,0.000860,...,245.0,0.002339,0.001163,256.0,0.002034,0.001237,347.0,0.666667,0.666667,0.0
3060,-0.229954,0.179408,22.327198,0.002933,0.004717,386.0,0.005321,0.005349,480.0,0.006166,...,343.0,-0.008692,0.006156,379.0,-0.005544,0.005789,495.0,0.750000,0.166667,0.0
3061,-0.243118,-0.152216,8.430143,0.000501,0.001627,79.0,0.001543,0.001527,157.0,0.001813,...,132.0,0.002006,0.001970,115.0,0.001968,0.002015,113.0,0.166667,0.166667,1.0
3062,0.018828,-0.793696,27.115443,0.002525,0.000924,157.0,0.004846,0.000765,163.0,0.004283,...,27.0,0.004142,0.000639,34.0,0.003087,0.000639,66.0,0.291667,0.500000,0.0


In [5]:
# Chequear y limpiar
if(df_buy.isna().values.any()):
    df_buy=df_buy.dropna()
if(df_sell.isna().values.any()):
    df_sell=df_sell.dropna()

In [6]:
# Dividir 
# Preparación de los datos de compra
X_buy = df_buy.drop(columns='target').values
y_buy = df_buy['target'].values
X_buy_train = X_buy
y_buy_train = y_buy
# Preparación de los datos de venta
X_sell = df_sell.drop(columns='target').values
y_sell = df_sell['target'].values
X_sell_train = X_sell
y_sell_train = y_sell

In [7]:
class DimensionalityReducer(BaseEstimator, TransformerMixin):
    def __init__(self, method=None, n_components=None, kernel=None,
             n_neighbors=None, min_dist=None):
        self.method = method
        self.n_components = n_components
        self.kernel = kernel
        self.n_neighbors = n_neighbors
        self.min_dist = min_dist
        self.reducer = None

    def fit(self, X, y=None):
        n_components = self._get_n_components(X)
        if self.method == 'pca':
            self.reducer = PCA(n_components=n_components)
        elif self.method == 'kernel_pca':
            kernel = self.kernel if self.kernel else 'linear'
            self.reducer = KernelPCA(n_components=n_components, kernel=kernel)
        elif self.method == 'truncated_svd':
            self.reducer = TruncatedSVD(n_components=n_components)
        elif self.method == 'factor_analysis':
            self.reducer = FactorAnalysis(n_components=n_components)
        elif self.method == 'fast_ica':
            self.reducer = FastICA(n_components=n_components, random_state=42)

        if self.reducer is not None:
            self.reducer.fit(X, y)
        return self

    def transform(self, X):
        if self.reducer is not None:
            return self.reducer.transform(X)
        else:
            return X

    def _get_n_components(self, X):
        if self.n_components is None:
            return None
        if self.method == 'pca':
            return self.n_components
        else:
            n_features = X.shape[1]
            if isinstance(self.n_components, float):
                n_components = int(self.n_components * n_features)
            else:
                n_components = self.n_components
            n_components = max(1, min(n_components, n_features))
            return n_components

In [8]:
param_grid = {
    # Escalador
    'scaler': Categorical([StandardScaler(), RobustScaler()]),
    # Método de reducción de dimensionalidad
    'dim_reducer__method': Categorical(['pca', 'kernel_pca', 'truncated_svd', 'factor_analysis', 'fast_ica']),
    # Hiperparámetros comunes
    'dim_reducer__n_components': Continuous(0.6, 0.99),
    # Hiperparámetros específicos
    'dim_reducer__kernel': Categorical(['linear', 'poly', 'rbf', 'sigmoid', 'cosine']),
    # Hiperparámetros de XGBoost
    'xgb__n_estimators': Integer(50, 500),
    'xgb__max_depth': Integer(3, 10),
    'xgb__learning_rate': Continuous(0.01, 0.3),
    'xgb__subsample': Continuous(0.6, 1.0),
    'xgb__colsample_bytree': Continuous(0.6, 1.0),
    'xgb__gamma': Continuous(0, 0.5),
    'xgb__min_child_weight': Integer(1, 10),
    'xgb__reg_alpha': Continuous(0, 1.0),
    'xgb__reg_lambda': Continuous(0, 1.0)
}

In [None]:
# Definir el pipeline con placeholders (compras)
pipe_buy = Pipeline([
    ('scaler', RobustScaler()),
    ('dim_reducer', DimensionalityReducer()),
    ('xgb', xgb.XGBClassifier(use_label_encoder=False, eval_metric='mlogloss', random_state=42))
])
evolutionary_search_buy = GASearchCV(
    estimator=pipe_buy,
    cv=5,
    scoring='accuracy',
    param_grid=param_grid,
    population_size=50,
    generations=20,
    mutation_probability=0.1,
    crossover_probability=0.9,
    n_jobs=-1,
    verbose=True,
    keep_top_k=10
)
# Entrenar el modelo
evolutionary_search_buy.fit(X_buy_train, y_buy_train, callbacks=[ProgressBar()])
# Obtener el mejor estimador
model_buy = evolutionary_search_buy.best_estimator_

  0%|          | 0/21 [00:00<?, ?it/s]

gen	nevals	fitness 	fitness_std	fitness_max	fitness_min
0  	50    	0.511502	0.0094059  	0.52913    	0.491679   


In [None]:
# Crear una figura con dos subgráficos (2 filas, 1 columna)
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 8))
# Primer subgráfico: Evolución del Fitness
plt.sca(ax1)
evolutionary_search_buy.plot_fitness_evolution()
ax1.set_title('Evolución del Fitness - Modelo de Compras')
# Segundo subgráfico: Importancia de los Hiperparámetros
plt.sca(ax2)
evolutionary_search_buy.plot_parameters_importance()
ax2.set_title('Importancia de los Hiperparámetros - Modelo de Compras')
# Ajustar el espaciado entre los subgráficos
plt.tight_layout()
# Mostrar los gráficos
plt.show()
# Score
print(f'Model sell score: {evolutionary_search_buy.best_score_}')

In [None]:
# Definir el pipeline con placeholders (compras)
pipe_sell = Pipeline([
    ('scaler', 'passthrough'),
    ('dim_reducer', DimensionalityReducer()),
    ('xgb', xgb.XGBClassifier(use_label_encoder=False, eval_metric='mlogloss', random_state=42))
])
# Configurar la búsqueda genética
evolutionary_search_sell = GASearchCV(
    estimator=pipe_sell,
    cv=5,
    scoring='accuracy',
    param_grid=param_grid,
    population_size=50,
    generations=20,
    mutation_probability=0.1,
    crossover_probability=0.9,
    n_jobs=-1,
    verbose=True,
    keep_top_k=10
)
# Entrenar el modelo
evolutionary_search_sell.fit(X_sell_train, y_sell_train, callbacks=[ProgressBar()])
# Obtener el mejor estimador
model_sell = evolutionary_search_sell.best_estimator_

In [None]:
# Crear una figura con dos subgráficos (2 filas, 1 columna)
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 8))  # Ajusta el tamaño según prefieras
# Primer subgráfico: Evolución del Fitness
plt.sca(ax1)
evolutionary_search_sell.plot_fitness_evolution()
ax1.set_title('Evolución del Fitness - Modelo de Ventas')
# Segundo subgráfico: Importancia de los Hiperparámetros
plt.sca(ax2)
evolutionary_search_sell.plot_parameters_importance()
ax2.set_title('Importancia de los Hiperparámetros - Modelo de Ventas')
# Ajustar el espaciado entre los subgráficos
plt.tight_layout()
# Mostrar los gráficos
plt.show()
# Score
print(f'Model sell score: {evolutionary_search_sell.best_score_}')

In [10]:
update_registered_converter(
    xgb.XGBClassifier,
    "XGBClassifier",
    calculate_linear_classifier_output_shapes,
    convert_xgboost,
    options={'nocl': [False], 'zipmap': [True, False, 'columns']}
)

In [11]:
model_buy_onnx = convert_sklearn(
    model_buy,
    'pipeline_buy_xgboost',
    [('input', FloatTensorType([None, X_buy_train.shape[1]]))],
    target_opset={'': 12, 'ai.onnx.ml': 2}
)
model_sell_onnx = convert_sklearn(
    model_sell,
    'pipeline_sell_xgboost',
    [('input', FloatTensorType([None, X_buy_train.shape[1]]))],
    target_opset={'': 12, 'ai.onnx.ml': 2}
)
mql5_files_folder = "C:/Users/Administrador/AppData/Roaming/MetaQuotes/Terminal/6C3C6A11D1C3791DD4DBF45421BF8028/MQL5/Files"
with open(os.path.join(mql5_files_folder, "model_buy.onnx"), 'wb') as f:
    f.write(model_buy_onnx.SerializeToString())
with open(os.path.join(mql5_files_folder, "model_sell.onnx"), 'wb') as f:
    f.write(model_sell_onnx.SerializeToString())