## Importar librerías

In [27]:
import os
import glob
import pandas as pd
from genetic_algorithm import GeneticAlgorithmCV
from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.decomposition import KernelPCA, TruncatedSVD
from sklearn.model_selection import StratifiedKFold
from sklearn.pipeline import Pipeline
import xgboost as xgb
from concurrent.futures import ThreadPoolExecutor, wait
from skl2onnx.common.data_types import FloatTensorType
from skl2onnx import convert_sklearn, update_registered_converter
from skl2onnx.common.shape_calculator import calculate_linear_classifier_output_shapes
from onnxmltools.convert.xgboost.operator_converters.XGBoost import convert_xgboost
import warnings
warnings.filterwarnings('ignore')

## Cargar y preparar datos

In [46]:
def create_training_dataset(df, trade_type):
    df = df.drop_duplicates()
    # Filtrar las operaciones con profit != 0
    df_trade = df[df['profit'] != 0].copy()
    # Añadir la columna 'Target' basada en el profit
    df_trade['target'] = df_trade['profit'].apply(lambda x: 1 if x > 0 else 0)
    # Eliminar posibles missings
    if(df_trade.isna().values.any()):
        df_trade = df_trade.dropna()
    # Separar en ganadoras y perdedoras
    df_winning = df_trade[df_trade['profit'] > 0]
    df_losing = df_trade[df_trade['profit'] < 0]
    # Debug info
    n_winning = len(df_trade[df_trade['target']==1])
    n_losing = len(df_trade[df_trade['target']==0])
    print(f"Tipo de operación: {'Buy' if trade_type == 1 else 'Sell'}")
    print(f"Número de características: {df.iloc[:, :-1].shape[1]}")
    print(f"Total Ganadoras: {n_winning}")
    print(f"Total Perdedoras: {n_losing}")
    # Verificar que hay suficientes datos
    if n_winning == 0 or n_losing == 0:
        print(f"No hay suficientes datos para {'compras' if trade_type == 1 else 'ventas'} para entrenar el modelo.")
        return False
    # Equilibrar las clases
    if n_winning <= n_losing:
        n_samples_per_class = n_winning
        # Seleccionar todas las ganadoras
        selected_winning = df_winning.copy()
        # Ordenar las perdedoras por pérdida de mayor a menor (menor profit a mayor)
        df_losing_sorted = df_losing.sort_values(by='profit', ascending=True)
        # Seleccionar las perdedoras con mayor pérdida
        selected_losing = df_losing_sorted.head(n_samples_per_class)
    else:
        n_samples_per_class = n_losing
        # Seleccionar todas las perdedoras
        selected_losing = df_losing.copy()
        # Ordenar las ganadoras por profit de mayor a menor
        df_winning_sorted = df_winning.sort_values(by='profit', ascending=False)
        # Seleccionar las ganadoras con mayor profit
        selected_winning = df_winning_sorted.head(n_samples_per_class)
    print(f"Se seleccionarán {n_samples_per_class} muestras por clase.")
    print("___________________________________")
    # Combinar las muestras seleccionadas
    df_training = pd.concat([selected_winning, selected_losing], ignore_index=True)
    # Seleccionar las columnas necesarias (todas menos las dos últimas para el conjunto principal)
    feature_columns = df_training.columns[:-2]
    df_training = df_training[feature_columns.tolist() + ['target']]
    # Preparación de los datos de entrenamiento
    X_train = df_training.drop(columns='target').values
    y_train = df_training['target'].values
    # return dataframe
    return X_train, y_train, df_training

# Ruta CSVs
file_folder = r"/mnt/c/Users/Administrador/AppData/Roaming/MetaQuotes/Terminal/Common/Files/"
# Compras
buy_file_pattern = os.path.join(file_folder, 'training_dataset_buy_*.csv')
df_buy_file_path = glob.glob(buy_file_pattern)
df_buy = pd.read_csv(df_buy_file_path[0])
X_buy, y_buy, df_buy_ = create_training_dataset(df_buy, 1)
# Ventas
sell_file_pattern = os.path.join(file_folder, 'training_dataset_sell_*.csv')
df_sell_file_path = glob.glob(sell_file_pattern)
df_sell = pd.read_csv(df_sell_file_path[0])
X_sell, y_sell, df_sell_ = create_training_dataset(df_sell, -1)

Tipo de operación: Buy
Número de características: 12
Total Ganadoras: 320
Total Perdedoras: 323
Se seleccionarán 320 muestras por clase.
___________________________________
Tipo de operación: Sell
Número de características: 12
Total Ganadoras: 4213
Total Perdedoras: 4794
Se seleccionarán 4213 muestras por clase.
___________________________________


## Entrenamiento de los modelos

In [31]:
# Train model functions
def train_classifier(X_train, y_train, model_type, n_features):
    # Definir algoritmo de validación cruzada
    cv = StratifiedKFold(n_splits=3)
    # Definir Pipeline
    pipeline = Pipeline([
            ('scaler', 'none'),
            ('reducer', 'none'),
            ('xgbclassifier', xgb.XGBClassifier(
                device = "cuda",
                verbosity=0)
            )
        ])
    # Definir mapa de estimadores
    estimator_map = {
        'scaler': {
            'standard': StandardScaler(),
            'robust': RobustScaler(),
            'none': 'passthrough'
        },
        'reducer': {
            'kernel_pca_rbf': KernelPCA(kernel='rbf'),
            'kernel_pca_linear': KernelPCA(kernel='linear'),
            'kernel_pca_poly': KernelPCA(kernel='poly'),
            'kernel_pca_sigmoid': KernelPCA(kernel='sigmoid'),
            'kernel_pca_cosine': KernelPCA(kernel='cosine'),
            'truncated_svd': TruncatedSVD(),
            'none': 'passthrough'
        }
    }
    # Definir espacio de hiperparámetros para compras
    param_grid = {
        'scaler': {'type': 'categorical', 'values': ['standard', 'robust']},
        'reducer': {'type': 'categorical', 'values': [
            'kernel_pca_rbf', 'kernel_pca_linear', 'kernel_pca_poly',
            'kernel_pca_sigmoid', 'kernel_pca_cosine','truncated_svd',]},
        'reducer__n_components': {'type': 'int', 'low': 2, 'high': n_features-1},
        'xgbclassifier__n_estimators': {'type': 'int', 'low': 50, 'high': 500},
        'xgbclassifier__max_depth': {'type': 'int', 'low': 3, 'high': 10},
        'xgbclassifier__eta':{'type': 'float', 'low': 0.1, 'high': 0.5},
        'xgbclassifier__gamma':{'type': 'float', 'low': 0.0, 'high': 0.5},
        'xgbclassifier__subsample': {'type': 'float', 'low': 0.5, 'high': 1.0},
        'xgbclassifier__colsample_bytree': {'type': 'float', 'low': 0.5, 'high': 1.0},
        'xgbclassifier__colsample_bylevel': {'type': 'float', 'low': 0.5, 'high': 1.0},
        'xgbclassifier__colsample_bynode': {'type': 'float', 'low': 0.5, 'high': 1.0},
        'xgbclassifier__alpha': {'type': 'float', 'low': 0.0, 'high': 1.0},
        'xgbclassifier__lambda': {'type': 'float', 'low': 0.0, 'high': 1.0},
        'xgbclassifier__min_child_weight': {'type': 'int', 'low': 1, 'high': 10},
        'xgbclassifier__scale_pos_weight':  {'type': 'int', 'low': 1, 'high': 10}
    }
    try:
        # Entrenar el modelo utilizando el algoritmo genético
        ga_search = GeneticAlgorithmCV(
            model_type=model_type,
            pipeline=pipeline,
            param_grid=param_grid,
            estimator_map=estimator_map,
            cv=cv,
            pop_size=20,
            generations=5,
            early_stopping_rounds=1,
            crossover_initial=0.9,
            crossover_end=0.1,
            mutation_initial=0.1,
            mutation_end=0.9,
            elitism=True,
            elite_size=3,
            tournament_size=3,
            n_random=10,
            n_jobs=-1,
            verbose=True,
        )
        ga_search.fit(X_train, y_train)
    except Exception as e:
        print(f"Error en traing model {model_type}: {e}")
        raise
    # Obtener los mejores parámetros y el mejor estimador
    print("####################################################################")
    print(f"Mejor puntuación de validación para {model_type}: {ga_search.best_score_}")
    print(f"Mejores parámetros encontrados para {model_type}: {ga_search.best_params_full_}")
    print("####################################################################")
    # Retornar mejor estimador
    return ga_search.best_estimator_

# Número de características totales
n_features = df.iloc[:, :-2].shape[1]
# Entrenar modelos en paralelo
with ThreadPoolExecutor(max_workers=2) as executor:
    print("Esperando que las tareas finalicen...")
    # enviar tareas de entrenamiento
    future_buy = executor.submit(train_classifier, X_buy, y_buy, "buy", n_features)
    future_sell = executor.submit(train_classifier, X_sell, y_sell, "sell", n_features)
    # esperar a que todas las tareas terminen
    futures = [future_buy, future_sell]
    wait(futures)
    # Obtener resultados una vez que ambas tareas han terminado
    model_buy = future_buy.result()
    model_sell = future_sell.result()
    print("¡Todas las tareas han terminado!")

Esperando que las tareas finalicen...


Generaciones buy:   0%|          | 0/5 [00:00<?, ?gen/s]

Generaciones sell:   0%|          | 0/5 [00:00<?, ?gen/s]

[1, buy] Fitness: 0.4892355169438229 | Best Fitness: 0.4892355169438229
[1, buy] Fitness Improvement: 0.0000 | Diversity: 1.5623
[1, buy] Normalized Fitness Improvement: 0.0000 | Normalized Diversity: 0.0000
[1, buy] Crossover Rate: 0.9000 | Mutation Rate: 0.1000
[2, buy] Fitness: 0.48749506384098984 | Best Fitness: 0.4892355169438229
[2, buy] Fitness Improvement: -0.0017 | Diversity: 1.5946
[2, buy] Normalized Fitness Improvement: 0.0000 | Normalized Diversity: 1.0000
[2, buy] Crossover Rate: 0.3152 | Mutation Rate: 0.6848
[2, buy] Early stopping due to no improvement.
Best fitness for buy: 0.4892355169438229
[1, sell] Fitness: 0.4948958746425223 | Best Fitness: 0.4948958746425223
[1, sell] Fitness Improvement: 0.0000 | Diversity: 1.5581
[1, sell] Normalized Fitness Improvement: 0.0000 | Normalized Diversity: 0.0000
[1, sell] Crossover Rate: 0.9000 | Mutation Rate: 0.1000
####################################################################
Mejor puntuación de validación para buy: 0.48

## Exportar modelos a formato ONNX

In [32]:
def save_onnx_model(mql5_files_folder, model, X, model_type):
    try:
        update_registered_converter(
            xgb.XGBClassifier,
            "XGBClassifier",
            calculate_linear_classifier_output_shapes,
            convert_xgboost,
            options={'nocl': [True, False], 'zipmap': [True, False, 'columns']}
        )
        model_onnx = convert_sklearn(
            model,
            f"pipeline_{model_type}_xgboost",
            [('input', FloatTensorType([None, X.shape[1]]))],
            target_opset={'': 12, 'ai.onnx.ml': 2}
        )
        with open(os.path.join(mql5_files_folder, f"model_{model_type}.onnx"), 'wb') as f:
            f.write(model_onnx.SerializeToString())
    except Exception as e:
        print(f"Error en exportar el modelo {model_type}: {e}")
        raise
    print(f"Modelo {model_type} ONNX exportado correctamente")

mql5_files_folder = r'/mnt/c/Users/Administrador/AppData/Roaming/MetaQuotes/Terminal/6C3C6A11D1C3791DD4DBF45421BF8028/MQL5/Files'
save_onnx_model(mql5_files_folder, model_buy, X_buy, "buy")
save_onnx_model(mql5_files_folder, model_sell, X_sell, "sell")

Modelo buy ONNX exportado correctamente
Modelo sell ONNX exportado correctamente


## Resultados

In [33]:
model_buy.steps

[('scaler', RobustScaler()),
 ('reducer', KernelPCA(n_components=2)),
 ('xgbclassifier',
  XGBClassifier(alpha=0.32671316974359443, base_score=None, booster=None,
                callbacks=None, colsample_bylevel=0.780626831389928,
                colsample_bynode=0.9417118138475853,
                colsample_bytree=0.5798590222801234, device='cuda',
                early_stopping_rounds=None, enable_categorical=False,
                eta=0.29659457807475487, eval_metric=None, feature_types=None,
                gamma=0.23301663738229578, grow_policy=None, importance_type=None,
                interaction_constraints=None, lambda=0.6987355369515166,
                learning_rate=None, max_bin=None, max_cat_threshold=None,
                max_cat_to_onehot=None, max_delta_step=None, max_depth=4,
                max_leaves=None, min_child_weight=10, missing=nan,
                monotone_constraints=None, multi_strategy=None, n_estimators=451, ...))]

In [34]:
model_sell.steps

[('scaler', StandardScaler()),
 ('reducer', KernelPCA(kernel='sigmoid', n_components=2)),
 ('xgbclassifier',
  XGBClassifier(alpha=0.15620046603684773, base_score=None, booster=None,
                callbacks=None, colsample_bylevel=0.9697321048698884,
                colsample_bynode=0.7925340575727776,
                colsample_bytree=0.8440571408543636, device='cuda',
                early_stopping_rounds=None, enable_categorical=False,
                eta=0.4357500281376786, eval_metric=None, feature_types=None,
                gamma=0.3971284142335976, grow_policy=None, importance_type=None,
                interaction_constraints=None, lambda=0.17991726993640597,
                learning_rate=None, max_bin=None, max_cat_threshold=None,
                max_cat_to_onehot=None, max_delta_step=None, max_depth=7,
                max_leaves=None, min_child_weight=2, missing=nan,
                monotone_constraints=None, multi_strategy=None, n_estimators=125, ...))]

In [38]:
pd.DataFrame([model_buy.predict_proba(X_buy)[:10, 1], df_buy['profit'].head(10)])

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,0.937254,0.855803,0.965094,0.965032,0.968551,0.937609,0.955981,0.944236,0.931411,0.90059
1,551.86,1731.6,-988.53,-983.28,-977.16,-982.8,-917.49,-917.29,-917.9,-919.02


In [39]:
pd.DataFrame([model_sell.predict_proba(X_sell)[:10, 1], df_sell['profit'].head(10)])

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,0.790591,0.872201,0.899802,0.843521,0.852664,0.873787,0.905419,0.874304,0.754041,0.90397
1,18.24,-603.8,-345.8,-256.8,-251.8,-243.8,-183.8,-241.8,-221.8,-97.28
