## Importar librerías

In [10]:
import os
import glob
import pandas as pd
import numpy as np
from genetic_algorithm import GeneticAlgorithmCV
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import KernelPCA
from sklearn.model_selection import StratifiedKFold
from sklearn.pipeline import make_pipeline, Pipeline
import xgboost as xgb
from concurrent.futures import ThreadPoolExecutor, wait
from skl2onnx.common.data_types import FloatTensorType
from skl2onnx import convert_sklearn, update_registered_converter
from skl2onnx.common.shape_calculator import calculate_linear_classifier_output_shapes
from onnxmltools.convert.xgboost.operator_converters.XGBoost import convert_xgboost
import warnings
warnings.filterwarnings('ignore')

## Cargar y preparar datos

In [11]:
# Cargar el archivo CSV
file_folder = r"/mnt/c/Users/Administrador/AppData/Roaming/MetaQuotes/Terminal/Common/Files/"
file_pattern = os.path.join(file_folder, 'training_dataset_*.csv')
df_file_path = glob.glob(file_pattern)
df = pd.read_csv(df_file_path[0])

In [12]:
def create_training_dataset(df, trade_type):
    df = df.drop_duplicates()
    # Filtrar las operaciones del tipo especificado y con profit != 0
    df_trade = df[(df['type'] == trade_type) & (df['profit'] != 0)].copy()
    # Añadir la columna 'Target' basada en el profit
    df_trade['target'] = df_trade['profit'].apply(lambda x: 1 if x > 0 else 0)
    # Seleccionar las columnas necesarias (todas menos las dos últimas para el conjunto principal,
    feature_columns = df.columns[:-2]
    df_trade = df_trade[feature_columns.tolist() + ['target']]
    # Eliminar posibles missings
    if(df_trade.isna().values.any()):
        df_trade = df_trade.dropna()
    # Debug info
    n_winning = len(df_trade[df_trade['target']==1])
    n_losing = len(df_trade[df_trade['target']==0])
    print(f"Tipo de operación: {'Buy' if trade_type == 1 else 'Sell'}")
    print(f"Número de características: {df.iloc[:, :-2].shape[1]}")
    print(f"Total Ganadoras: {n_winning}")
    print(f"Total Perdedoras: {n_losing}")
    print("___________________________________")
    # Preparación de los datos de entrenamiento
    X_train = df_trade.drop(columns='target').values
    y_train = df_trade['target'].values
    # return dataframe
    return X_train, y_train

# Obtener datasets de entrenamiento
X_buy, y_buy = create_training_dataset(df, 1)
X_sell, y_sell = create_training_dataset(df, -1)

Tipo de operación: Buy
Número de características: 541
Total Ganadoras: 392
Total Perdedoras: 417
___________________________________
Tipo de operación: Sell
Número de características: 541
Total Ganadoras: 348
Total Perdedoras: 291
___________________________________


## Entrenamiento de los modelos

In [13]:
# Definir algoritmo de validación cruzada
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
# Definir reductor de dimensionalidad
reducer = KernelPCA(kernel='rbf')
# Definir el clasificador
classifier = xgb.XGBClassifier(
    tree_method = "hist",
    device = "cuda",
    predictor='gpu_predictor', 
    verbosity=0
    )

In [14]:
# Create scaler
def create_scaler(X_train, y_train):
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X_train, y_train)
    return scaler, X_scaled

# Escalar los datos
with ThreadPoolExecutor(max_workers=2) as executor:
    # Crear tareas paralelas
    future_buy = executor.submit(create_scaler, X_buy, y_buy)
    future_sell = executor.submit(create_scaler, X_sell, y_sell)
    # Esperar a que las tareas terminen
    futures = [future_buy, future_sell]
    wait(futures)
    # Obtener resultados
    scaler_buy, X_buy_scaled = future_buy.result()
    scaler_sell, X_sell_scaled = future_sell.result()

In [15]:
# Definir espacio de hiperparámetros para compras
def create_param_grid(X_scaled):
    n_features = X_scaled.shape[1]
    param_grid = {
        'kernelpca__n_components': {'type': 'int', 'low': 1, 'high': n_features},
        'xgbclassifier__n_estimators': {'type': 'int', 'low': 50, 'high': 500},
        'xgbclassifier__max_depth': {'type': 'int', 'low': 3, 'high': 50}
    }
    return param_grid

# Obtener espacios de hiperparámetros
param_grid_buy = create_param_grid(X_buy_scaled)
param_grid_sell = create_param_grid(X_sell_scaled)

In [16]:
# Train model functions
def train_classifier(X_train, y_train, model_type, estimator, cv, param_grid):
    try:
        # Entrenar el modelo utilizando el algoritmo genético
        ga_search = GeneticAlgorithmCV(
            model_type=model_type,
            estimator=estimator,
            cv=cv,
            param_grid=param_grid,
            verbose=True
        )
        ga_search.fit(X_train, y_train)
    except Exception as e:
        print(f"Error en traing model {model_type}: {e}")
        raise
    # Obtener los mejores parámetros y el mejor estimador
    print("####################################################################")
    print(f"Mejor puntuación de validación para {model_type}: {ga_search.best_score_}")
    print(f"Mejores parámetros encontrados para {model_type}: {ga_search.best_params_}")
    print("####################################################################")
    # Retornar mejor estimador
    return ga_search.best_estimator_

# Definir el pipeline para búsqueda genética
estimator = make_pipeline(reducer, classifier)
# Entrenar modelos en paralelo
with ThreadPoolExecutor(max_workers=2) as executor:
    print("Esperando que las tareas finalicen...")
    # enviar tareas de entrenamiento
    future_buy = executor.submit(train_classifier, X_buy_scaled, y_buy, "buy", estimator, cv, param_grid_buy)
    future_sell = executor.submit(train_classifier, X_sell_scaled, y_sell, "sell", estimator, cv, param_grid_sell)
    # esperar a que todas las tareas terminen
    futures = [future_buy, future_sell]
    wait(futures)
    # Obtener resultados una vez que ambas tareas han terminado
    estimator_buy = future_buy.result()
    estimator_sell = future_sell.result()
    print("¡Todas las tareas han terminado!")

Esperando que las tareas finalicen...


Generaciones sell:   0%|          | 0/5 [00:00<?, ?gen/s]

Generaciones buy:   0%|          | 0/5 [00:00<?, ?gen/s]

[1, buy] Fitness: 0.5672656838571289 | Best Fitness: 0.5672656838571289
[1, buy] Fitness Improvement: 0.0000 | Diversity: 0.6422
[1, buy] Normalized Fitness Improvement: 0.0000 | Normalized Diversity: 0.0000
[1, buy] Crossover Rate: 0.5000 | Mutation Rate: 0.5000
[1, sell] Fitness: 0.563877177577621 | Best Fitness: 0.563877177577621
[1, sell] Fitness Improvement: 0.0000 | Diversity: 0.7042
[1, sell] Normalized Fitness Improvement: 0.0000 | Normalized Diversity: 0.0000
[1, sell] Crossover Rate: 0.5000 | Mutation Rate: 0.5000
[2, buy] Fitness: 0.5673949473727836 | Best Fitness: 0.5673949473727836
[2, buy] Fitness Improvement: 0.0001 | Diversity: 0.5411
[2, buy] Normalized Fitness Improvement: 1.0000 | Normalized Diversity: 0.0000
[2, buy] Crossover Rate: 0.3152 | Mutation Rate: 0.6848
[2, sell] Fitness: 0.5611395318281134 | Best Fitness: 0.563877177577621
[2, sell] Fitness Improvement: -0.0027 | Diversity: 0.5620
[2, sell] Normalized Fitness Improvement: 0.0000 | Normalized Diversity: 0.

In [17]:
def create_pipeline(scaler, pipeline):
    steps = [
        ('standardscaler', scaler)
        ]
    for name, step in pipeline.steps:
        steps.append((name, step))
    pipeline = Pipeline(steps=steps)
    return pipeline

model_buy = create_pipeline(scaler_buy, estimator_buy)
model_sell = create_pipeline(scaler_sell, estimator_sell)

## Exportar modelos a formato ONNX

In [18]:
def save_onnx_models(mql5_files_folder, X_buy, X_sell):
    try:
        update_registered_converter(
            xgb.XGBClassifier,
            "XGBClassifier",
            calculate_linear_classifier_output_shapes,
            convert_xgboost,
            options={'nocl': [True, False], 'zipmap': [True, False, 'columns']}
        )
        model_buy_onnx = convert_sklearn(
            model_buy,
            'pipeline_buy_xgboost',
            [('input', FloatTensorType([None, X_buy.shape[1]]))],
            target_opset={'': 12, 'ai.onnx.ml': 2}
        )
        model_sell_onnx = convert_sklearn(
            model_sell,
            'pipeline_sell_xgboost',
            [('input', FloatTensorType([None, X_sell.shape[1]]))],
            target_opset={'': 12, 'ai.onnx.ml': 2}
        )
        with open(os.path.join(mql5_files_folder, "model_buy.onnx"), 'wb') as f:
            f.write(model_buy_onnx.SerializeToString())
        with open(os.path.join(mql5_files_folder, "model_sell.onnx"), 'wb') as f:
            f.write(model_sell_onnx.SerializeToString())
    except Exception as e:
        print(f"Error en exportar los modelos: {e}")
        raise
    print("Modelos ONNX exportados correctamente")

mql5_files_folder = r'/mnt/c/Users/Administrador/AppData/Roaming/MetaQuotes/Terminal/6C3C6A11D1C3791DD4DBF45421BF8028/MQL5/Files'
save_onnx_models(mql5_files_folder, X_buy, X_sell)

Modelos ONNX exportados correctamente
