## Importar librerías

In [None]:
import os
import glob
import pandas as pd
from sklearn.preprocessing import RobustScaler
from sklearn.decomposition import KernelPCA
from sklearn.model_selection import StratifiedKFold
from sklearn_genetic import GASearchCV
from sklearn_genetic import ExponentialAdapter
from sklearn_genetic.space import Integer, Categorical, Continuous
from sklearn_genetic.callbacks import ProgressBar, ConsecutiveStopping
import xgboost as xgb
from sklearn.pipeline import Pipeline
from concurrent.futures import ThreadPoolExecutor, wait
from skl2onnx.common.data_types import FloatTensorType
from skl2onnx import convert_sklearn, update_registered_converter
from skl2onnx.common.shape_calculator import calculate_linear_classifier_output_shapes
from onnxmltools.convert.xgboost.operator_converters.XGBoost import convert_xgboost
import warnings
warnings.filterwarnings('ignore')

## Cargar y preparar datos

In [2]:
def create_training_dataset(df, trade_type):
    df = df.drop_duplicates()
    # Filtrar las operaciones del tipo especificado y con profit != 0
    df_trade = df[(df['type'] == trade_type) & (df['profit'] != 0)].copy()
    # Separar en ganadoras y perdedoras
    df_winning = df_trade[df_trade['profit'] > 0]
    df_losing = df_trade[df_trade['profit'] < 0]
    n_winning = len(df_winning)
    n_losing = len(df_losing)
    print(f"Tipo de operación: {'Buy' if trade_type == 1 else 'Sell'}")
    print(f"Total Ganadoras: {n_winning}")
    print(f"Total Perdedoras: {n_losing}")
    # Verificar que hay suficientes datos
    if n_winning == 0 or n_losing == 0:
        print(f"No hay suficientes datos para {'compras' if trade_type == 1 else 'ventas'} para entrenar el modelo.")
        return False
    # Equilibrar las clases
    if n_winning <= n_losing:
        n_samples_per_class = n_winning
        # Seleccionar todas las ganadoras
        selected_winning = df_winning.copy()
        # Ordenar las perdedoras por pérdida de mayor a menor (menor profit a mayor)
        df_losing_sorted = df_losing.sort_values(by='profit', ascending=True)
        # Seleccionar las perdedoras con mayor pérdida
        selected_losing = df_losing_sorted.head(n_samples_per_class)
    else:
        n_samples_per_class = n_losing
        # Seleccionar todas las perdedoras
        selected_losing = df_losing.copy()
        # Ordenar las ganadoras por profit de mayor a menor
        df_winning_sorted = df_winning.sort_values(by='profit', ascending=False)
        # Seleccionar las ganadoras con mayor profit
        selected_winning = df_winning_sorted.head(n_samples_per_class)
    print(f"Se seleccionarán {n_samples_per_class} muestras por clase.")
    # Combinar las muestras seleccionadas
    df_training = pd.concat([selected_winning, selected_losing], ignore_index=True)
    # Añadir la columna 'Target' basada en el profit
    df_training['target'] = df_training['profit'].apply(lambda x: 1 if x > 0 else 0)
    # Seleccionar las columnas necesarias (todas menos las dos últimas para el conjunto principal,
    # y todas las columnas de los subconjuntos excepto la última)
    # Suponiendo que las dos últimas columnas en el conjunto principal son 'type' y 'profit'
    feature_columns = df.columns[:-2]
    df_training = df_training[feature_columns.tolist() + ['target']]
    # Mezclar los datos
    df_training = df_training.sample(frac=1).reset_index(drop=True)
    # Eliminar posibles missings
    if(df_training.isna().values.any()):
        df_training=df_training.dropna()
    # retunr df
    return df_training

In [3]:
# Cargar, limpiar y preparar datasets
def load_dataset(df):
    # Preparación de los datos de compra
    df_buy = create_training_dataset(df, trade_type=1)
    X_buy_train = df_buy.drop(columns='target').values
    y_buy_train = df_buy['target'].values
    # Preparación de los datos de venta
    df_sell = create_training_dataset(df, trade_type=-1)
    X_sell_train = df_sell.drop(columns='target').values
    y_sell_train = df_sell['target'].values
    return X_buy_train, y_buy_train, X_sell_train, y_sell_train

In [None]:
file_folder = r"/mnt/c/Users/Administrador/AppData/Roaming/MetaQuotes/Terminal/Common/Files/"
# Leer los archivos CSV
file_pattern = os.path.join(file_folder, 'training_dataset_*.csv')
df_file_path = glob.glob(file_pattern)
df = pd.read_csv(df_file_path[0])
X_buy_train, y_buy_train, X_sell_train, y_sell_train = load_dataset(df)
print(f"Buy  -> Trades: {X_buy_train.shape[0]} Features: {X_buy_train.shape[1]}")
print(f"Sell -> Trades: {X_sell_train.shape[0]} Features: {X_sell_train.shape[1]}")

## Entrenar modelos

In [5]:
def train_model_buy(X_buy_train, y_buy_train, param_grid, cv, crossover_adapter, mutation_adapter):
    try:
        # Definir el pipeline con placeholders (compras)
        pipe_buy = Pipeline([
            ('scaler', RobustScaler()),
            ('dim_reducer', KernelPCA(kernel='rbf')),
            ('xgb', xgb.XGBClassifier(eval_metric='mlogloss', tree_method='gpu_hist', predictor='gpu_predictor', verbosity=0))
        ])
        # Definir algoritmo genético
        evolutionary_search_buy = GASearchCV(
            estimator=pipe_buy,
            cv=cv,
            scoring='accuracy',
            population_size=50,
            generations=15,
            tournament_size=5,
            elitism=True,
            crossover_probability=crossover_adapter,
            mutation_probability=mutation_adapter,
            param_grid=param_grid,
            criteria='max',
            algorithm='eaMuPlusLambda',
            n_jobs=1,
            verbose=True,
            keep_top_k=3,
            error_score='raise'
        )
        # Entrenar el modelo
        evolutionary_search_buy.fit(X_buy_train, y_buy_train, callbacks=[ProgressBar(), ConsecutiveStopping(generations=1, metric='fitness')])
        # Obtener el mejor estimador
        model_buy = evolutionary_search_buy.best_estimator_
        # Visualizar resultados
        print(evolutionary_search_buy.best_params_)
        print(evolutionary_search_buy.best_score_)
    except Exception as e:
        print(f"Error en train_model_buy: {e}")
        raise
    return model_buy

In [6]:
def train_model_sell(X_sell_train, y_sell_train, param_grid, cv, crossover_adapter, mutation_adapter):
    try:
        # Definir el pipeline con placeholders (ventas)
        pipe_sell = Pipeline([
            ('scaler', RobustScaler()),
            ('dim_reducer', KernelPCA(kernel='rbf')),
            ('xgb', xgb.XGBClassifier(eval_metric='mlogloss', tree_method='gpu_hist', predictor='gpu_predictor', verbosity=0))
        ])
        # Definir algoritmo genético
        evolutionary_search_sell = GASearchCV(
            estimator=pipe_sell,
            cv=cv,
            scoring='accuracy',
            population_size=50,
            generations=15,
            tournament_size=5,
            elitism=True,
            crossover_probability=crossover_adapter,
            mutation_probability=mutation_adapter,
            param_grid=param_grid,
            criteria='max',
            algorithm='eaMuPlusLambda',
            n_jobs=1,
            verbose=True,
            keep_top_k=3,
            error_score='raise'
        )
        # Entrenar el modelo
        evolutionary_search_sell.fit(X_sell_train, y_sell_train, callbacks=[ProgressBar(), ConsecutiveStopping(generations=1, metric='fitness')])
        # Obtener el mejor estimador
        model_sell = evolutionary_search_sell.best_estimator_
        # Visualizar resultados
        print(evolutionary_search_sell.best_params_)
        print(evolutionary_search_sell.best_score_)
    except Exception as e:
        print(f"Error en train_model_buy: {e}")
        raise
    return model_sell

In [7]:
# Definir parámetros genéticos
crossover_adapter = ExponentialAdapter(initial_value=0.1, end_value=0.9, adaptive_rate=0.1)
mutation_adapter = ExponentialAdapter(initial_value=0.9, end_value=0.1, adaptive_rate=0.1)
cv = StratifiedKFold(n_splits=5, shuffle=True)
# Definir espacio de hiperparámetros
n_features = X_buy_train.shape[1]
param_grid = {
    'dim_reducer__n_components': Integer(1, n_features),
    'dim_reducer__gamma': Continuous(1e-4, 1e-1, distribution='log-uniform'),
    'xgb__n_estimators': Integer(50, 500),
    'xgb__max_depth': Integer(3, 10),
    'xgb__learning_rate': Continuous(0.01, 0.3),
    'xgb__subsample': Continuous(0.6, 1.0),
    'xgb__colsample_bytree': Continuous(0.6, 1.0),
    'xgb__gamma': Continuous(0, 0.5),
    'xgb__min_child_weight': Integer(1, 10),
    'xgb__reg_alpha': Continuous(0, 1.0),
    'xgb__reg_lambda': Continuous(0, 1.0)
}

In [None]:
# Entrenar modelos simultáneamente
with ThreadPoolExecutor(max_workers=2) as executor:
    # enviar tareas de entrenamiento
    future_buy = executor.submit(train_model_buy, X_buy_train, y_buy_train, param_grid, cv, crossover_adapter, mutation_adapter)
    future_sell = executor.submit(train_model_sell, X_sell_train, y_sell_train, param_grid, cv, crossover_adapter, mutation_adapter)
    # esperar a que todas las tareas terminen
    futures = [future_buy, future_sell]
    print("Esperando que las tareas finalicen...")
    wait(futures)
    print("¡Todas las tareas han terminado!")
    # Obtener resultados una vez que ambas tareas han terminado
    model_buy = future_buy.result()
    model_sell = future_sell.result()

## Exportar modelos a formato ONNX

In [None]:
def save_onnx_models(mql5_files_folder):
    try:
        update_registered_converter(
            xgb.XGBClassifier,
            "XGBClassifier",
            calculate_linear_classifier_output_shapes,
            convert_xgboost,
            options={'nocl': [True, False], 'zipmap': [True, False, 'columns']}
        )
        model_buy_onnx = convert_sklearn(
            model_buy,
            'pipeline_buy_xgboost',
            [('input', FloatTensorType([None, X_buy_train.shape[1]]))],
            target_opset={'': 12, 'ai.onnx.ml': 2}
        )
        model_sell_onnx = convert_sklearn(
            model_sell,
            'pipeline_sell_xgboost',
            [('input', FloatTensorType([None, X_buy_train.shape[1]]))],
            target_opset={'': 12, 'ai.onnx.ml': 2}
        )
        with open(os.path.join(mql5_files_folder, "model_buy.onnx"), 'wb') as f:
            f.write(model_buy_onnx.SerializeToString())
        with open(os.path.join(mql5_files_folder, "model_sell.onnx"), 'wb') as f:
            f.write(model_sell_onnx.SerializeToString())
    except Exception as e:
        print(f"Error en exportar los modelos: {e}")
        raise
    print("Modelos ONNX exportados correctamente")

In [21]:
save_onnx_models(r'C:\Users\Administrador\AppData\Roaming\MetaQuotes\Terminal\6C3C6A11D1C3791DD4DBF45421BF8028\MQL5\Files')