## Importar librerías

In [None]:
import os
import cupy as cp
#import cudf
from cuml.metrics import accuracy_score
from cuml.model_selection import StratifiedKFold
import xgboost as xgb
from concurrent.futures import ThreadPoolExecutor, wait
from skl2onnx.common.data_types import FloatTensorType
from skl2onnx import convert_sklearn, update_registered_converter
from skl2onnx.common.shape_calculator import calculate_linear_classifier_output_shapes
from onnxmltools.convert.xgboost.operator_converters.XGBoost import convert_xgboost
import warnings
warnings.filterwarnings('ignore')

## Cargar y preparar datos

In [7]:
def load_dataset(tester_files_folder):
    # Preparación de los datos de compra
    df_buy = cudf.read_csv(os.path.join(tester_files_folder, "buy_training_dataset.csv"))
    if df_buy.isna().any().any():
        df_buy = df_buy.dropna()
    df_buy = df_buy.sample(frac=1).reset_index(drop=True)
    X_buy_train = df_buy.drop(columns='target')
    y_buy_train = df_buy['target']
    # Preparación de los datos de venta
    df_sell = cudf.read_csv(os.path.join(tester_files_folder, "sell_training_dataset.csv"))
    if df_sell.isna().any().any():
        df_sell = df_sell.dropna()
    df_sell = df_sell.sample(frac=1).reset_index(drop=True)
    X_sell_train = df_sell.drop(columns='target')
    y_sell_train = df_sell['target']
    return X_buy_train, y_buy_train, X_sell_train, y_sell_train

In [None]:
X_buy_train, y_buy_train, X_sell_train, y_sell_train = load_dataset(r"/mnt/c/Users/Administrador/AppData/Roaming/MetaQuotes/Terminal/Common/Files/")
print(f"Buy -> Trades: {X_buy_train.shape[0]} Features: {X_buy_train.shape[1]}")
print(f"Sell -> Trades: {X_sell_train.shape[0]} Features: {X_sell_train.shape[1]}")

## Algoritmo genético para encontrar los mejores hiperparámetros

In [None]:
def decode_chromosome(chromosome, param_grid):
    param_values = {}
    for i, key in enumerate(param_grid.keys()):
        gene = chromosome[i]
        param_info = param_grid[key]
        low = param_info['low']
        high = param_info['high']
        if param_info['type'] == 'int':
            value = int(cp.round(gene * (high - low) + low))
        elif param_info['type'] == 'float':
            value = gene * (high - low) + low
        param_values[key] = value
    return param_values

def initialize_population(pop_size, chromosome_length):
    population = cp.random.uniform(low=0.0, high=1.0, size=(pop_size, chromosome_length))
    return population

def evaluate_population(population, X_train, y_train, cv, param_grid):
    fitnesses = []
    for chromosome in population:
        params = decode_chromosome(chromosome, param_grid)
        scores = []
        for train_idx, val_idx in cv.split(X_train, y_train):
            X_tr, X_val = X_train.iloc[train_idx], X_train.iloc[val_idx]
            y_tr, y_val = y_train.iloc[train_idx], y_train.iloc[val_idx]
            
            model = xgb.XGBClassifier(
                tree_method='gpu_hist',
                predictor='gpu_predictor',
                use_label_encoder=False,
                verbosity=0,
                **params
            )
            model.fit(X_tr, y_tr)
            y_pred = model.predict(X_val)
            score = accuracy_score(y_val, y_pred)
            scores.append(score)
        fitness = cp.mean(cp.array(scores))
        fitnesses.append(fitness)
    return cp.array(fitnesses)

def select_parents(population, fitnesses, tournament_size=2):
    selected = []
    for _ in range(len(population)):
        indices = cp.random.randint(0, len(population), size=tournament_size)
        best_idx = indices[cp.argmax(fitnesses[indices])]
        selected.append(population[best_idx])
    return cp.vstack(selected)

def crossover(parents, crossover_rate):
    offspring = []
    for i in range(0, len(parents), 2):
        parent1 = parents[i].copy()
        parent2 = parents[(i+1) % len(parents)].copy()
        if cp.random.rand() < crossover_rate:
            point = cp.random.randint(1, len(parent1))
            child1 = cp.concatenate((parent1[:point], parent2[point:]))
            child2 = cp.concatenate((parent2[:point], parent1[point:]))
            offspring.append(child1)
            offspring.append(child2)
        else:
            offspring.append(parent1)
            offspring.append(parent2)
    return cp.vstack(offspring)

def mutate(offspring, mutation_rate, mutation_scale=0.1):
    for chromosome in offspring:
        if cp.random.rand() < mutation_rate:
            gene_idx = cp.random.randint(0, len(chromosome))
            mutation = cp.random.normal(0, mutation_scale)
            chromosome[gene_idx] += mutation
            chromosome[gene_idx] = cp.clip(chromosome[gene_idx], 0.0, 1.0)
    return offspring

def genetic_algorithm(
    X_train, y_train, param_grid, pop_size=20, generations=10, early_stopping_rounds=1,
    crossover_initial=0.1, crossover_end=0.9,
    mutation_initial=0.9, mutation_end=0.1,
    elitism=True, elite_size=3,
    tournament_size=5
):
    cv = StratifiedKFold(n_splits=5, shuffle=True)
    chromosome_length = len(param_grid)
    population = initialize_population(pop_size, chromosome_length)
    best_overall_fitness = -cp.inf
    best_overall_chromosome = None
    no_improvement_generations = 0

    for generation in range(generations):
        print(f"Generación [{generation+1}]")
        crossover_rate = crossover_initial * ((crossover_end / crossover_initial) ** (generation / generations))
        mutation_rate = mutation_initial * ((mutation_end / mutation_initial) ** (generation / generations))
        print(f"Probabilidad de cruce en generación [{generation+1}]: {crossover_rate:.4f}, Probabilidad de mutación: {mutation_rate:.4f}")
        fitnesses = evaluate_population(population, X_train, y_train, cv, param_grid)
        current_best_fitness = cp.max(fitnesses)
        print(f"Mejor fitness en generación [{generation+1}]: {current_best_fitness}")
        if current_best_fitness > best_overall_fitness:
            best_overall_fitness = current_best_fitness
            best_idx = cp.argmax(fitnesses)
            best_overall_chromosome = population[best_idx]
            no_improvement_generations = 0
        else:
            no_improvement_generations += 1
        if no_improvement_generations >= early_stopping_rounds:
            print(f"No hubo mejora en el fitness por {early_stopping_rounds} generaciones consecutivas. Deteniendo el algoritmo.")
            break
        if elitism:
            sorted_indices = cp.argsort(fitnesses)[::-1]
            elites = population[sorted_indices[:elite_size]]
        else:
            elites = None
        parents = select_parents(population, fitnesses, tournament_size=tournament_size)
        offspring = crossover(parents, crossover_rate=crossover_rate)
        population = mutate(offspring, mutation_rate=mutation_rate)
        if elitism:
            population = cp.vstack((elites, population))
            if len(population) > pop_size:
                population = population[:pop_size]
                
    best_params = decode_chromosome(best_overall_chromosome, param_grid)
    return best_params

In [11]:
# Definir param_grid con rangos
param_grid = {
    'n_estimators': {'type': 'int', 'low': 50, 'high': 500},
    'max_depth': {'type': 'int', 'low': 3, 'high': 10},
    'learning_rate': {'type': 'float', 'low': 0.01, 'high': 0.3},
    'subsample': {'type': 'float', 'low': 0.6, 'high': 1.0},
    'colsample_bytree': {'type': 'float', 'low': 0.6, 'high': 1.0},
    'gamma': {'type': 'float', 'low': 0.0, 'high': 0.5},
    'min_child_weight': {'type': 'int', 'low': 1, 'high': 10},
    'reg_alpha': {'type': 'float', 'low': 0.0, 'high': 1.0},
    'reg_lambda': {'type': 'float', 'low': 0.0, 'high': 1.0}
}

In [None]:
# Entrenar modelos simultáneamente
with ThreadPoolExecutor(max_workers=2) as executor:
    # enviar tareas de entrenamiento
    best_buy_params = executor.submit(genetic_algorithm,
        X_buy_train, y_buy_train, param_grid,
        pop_size=50,
        generations=100,
        early_stopping_rounds=5,
        crossover_initial=0.1,
        crossover_end=0.9,
        mutation_initial=0.9,
        mutation_end=0.1,
        elitism=True,
        elite_size=2,
        tournament_size=5
    )
    best_sell_params = executor.submit(genetic_algorithm,
        X_sell_train, y_sell_train, param_grid,
        pop_size=50,
        generations=100,
        early_stopping_rounds=5,
        crossover_initial=0.1,
        crossover_end=0.9,
        mutation_initial=0.9,
        mutation_end=0.1,
        elitism=True,
        elite_size=2,
        tournament_size=5
    )
    # esperar a que todas las tareas terminen
    print("Esperando que las tareas finalicen...")
    futures = [best_buy_params, best_sell_params]
    wait(futures)
    print("¡Todas las tareas han terminado!")
    # Obtener resultados una vez que ambas tareas han terminado
    model_buy_params = best_buy_params.result()
    model_sell_params = best_sell_params.result()

## Entrenar los modelos

In [None]:
# Entrenar el modelo de compra con los mejores hiperparámetros
model_buy = xgb.XGBClassifier(
    tree_method='gpu_hist',
    predictor='gpu_predictor',
    use_label_encoder=False,
    verbosity=0,
    **model_buy_params
)
model_buy.fit(X_buy_train, y_buy_train)

# Entrenar el modelo de venta con los mejores hiperparámetros
model_sell = xgb.XGBClassifier(
    tree_method='gpu_hist',
    predictor='gpu_predictor',
    use_label_encoder=False,
    verbosity=0,
    **model_sell_params
)
model_sell.fit(X_sell_train, y_sell_train)

## Exportar modelos a formato ONNX

In [None]:
def save_onnx_models(mql5_files_folder):
    try:
        update_registered_converter(
            xgb.XGBClassifier,
            "XGBClassifier",
            calculate_linear_classifier_output_shapes,
            convert_xgboost,
            options={'nocl': [True, False], 'zipmap': [True, False, 'columns']}
        )
        model_buy_onnx = convert_sklearn(
            model_buy,
            'pipeline_buy_xgboost',
            [('input', FloatTensorType([None, X_buy_train.shape[1]]))],
            target_opset={'': 12, 'ai.onnx.ml': 2}
        )
        model_sell_onnx = convert_sklearn(
            model_sell,
            'pipeline_sell_xgboost',
            [('input', FloatTensorType([None, X_buy_train.shape[1]]))],
            target_opset={'': 12, 'ai.onnx.ml': 2}
        )
        with open(os.path.join(mql5_files_folder, "model_buy.onnx"), 'wb') as f:
            f.write(model_buy_onnx.SerializeToString())
        with open(os.path.join(mql5_files_folder, "model_sell.onnx"), 'wb') as f:
            f.write(model_sell_onnx.SerializeToString())
    except Exception as e:
        print(f"Error en exportar los modelos: {e}")
        raise
    print("Modelos ONNX exportados correctamente")

In [21]:
save_onnx_models(r'C:\Users\Administrador\AppData\Roaming\MetaQuotes\Terminal\6C3C6A11D1C3791DD4DBF45421BF8028\MQL5\Files')