## Importar librerías

In [1]:
import os
import glob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from imblearn.over_sampling import SMOTE
from sklearn.base import clone
from sklearn.preprocessing import RobustScaler, StandardScaler
from sklearn.feature_selection import GenericUnivariateSelect
from sklearn.feature_selection import f_classif
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score
from sklearn.pipeline import Pipeline
import xgboost as xgb
import multiprocessing
from tqdm.notebook import tqdm
from joblib import Parallel, delayed
from concurrent.futures import ThreadPoolExecutor, wait
from skl2onnx.common.data_types import FloatTensorType
from skl2onnx import convert_sklearn, update_registered_converter
from skl2onnx.common.shape_calculator import calculate_linear_classifier_output_shapes
from onnxmltools.convert.xgboost.operator_converters.XGBoost import convert_xgboost
import warnings
warnings.filterwarnings('ignore')

## Cargar y preparar datos

In [2]:
def create_training_dataset(df, trade_type):
    df = df.drop_duplicates()
    # Filtrar las operaciones del tipo especificado
    df_trade = df[df['type'] == trade_type].copy()
    # Añadir la columna 'target' basada en el profit
    df_trade['target'] = df_trade['profit'].apply(lambda x: 1 if x > 0 else 0)
    # Seleccionar las características y el objetivo
    feature_columns = df.columns[:-2]
    X = df_trade[feature_columns]
    y = df_trade['target']
    # Eliminar posibles valores faltantes
    X = X.dropna()
    y = y.loc[X.index]
    # Aplicar balanceo de clases
    smote = SMOTE()
    X_resampled, y_resampled = smote.fit_resample(X, y)
    # Crear el DataFrame final
    df_training = pd.DataFrame(X_resampled, columns=feature_columns)
    df_training['target'] = y_resampled
    # Mezclar los datos
    df_training = df_training.sample(frac=1).reset_index(drop=True)
    return df_training

In [3]:
# Cargar, limpiar y preparar datasets
def load_dataset(df):
    # Preparación de los datos de compra
    df_buy = create_training_dataset(df, trade_type=1)
    X_buy_train = df_buy.drop(columns='target')
    y_buy_train = df_buy['target']
    # Preparación de los datos de venta
    df_sell = create_training_dataset(df, trade_type=-1)
    X_sell_train = df_sell.drop(columns='target')
    y_sell_train = df_sell['target']
    return X_buy_train.values, y_buy_train.values, X_sell_train.values, y_sell_train.values

In [None]:
file_folder = r"/mnt/c/Users/Administrador/AppData/Roaming/MetaQuotes/Terminal/Common/Files/"
# Leer los archivos CSV
file_pattern = os.path.join(file_folder, 'training_dataset_*.csv')
df_file_path = glob.glob(file_pattern)
df = pd.read_csv(df_file_path[0])
X_buy_train, y_buy_train, X_sell_train, y_sell_train = load_dataset(df)
print(f"Buy  -> Trades: {X_buy_train.shape[0]} | Features: {X_buy_train.shape[1]}")
print(f"Sell -> Trades: {X_sell_train.shape[0]} | Features: {X_sell_train.shape[1]}")

## Algoritmo genético de optimización de hiperparámetros

In [5]:
class GeneticAlgorithmCV:
    def __init__(
        self,
        model_type,
        estimator,
        param_grid,
        cv=None,
        scoring=None,
        pop_size=100,
        generations=25,
        early_stopping_rounds=5,
        crossover_initial=0.1,
        crossover_end=0.9,
        mutation_initial=0.9,
        mutation_end=0.1,
        elitism=True,
        elite_size=10,
        tournament_size=5,
        n_random=5,
        n_jobs=-1,
        verbose=False,
        alpha=1.5,
        beta=1.0,
    ):
        self.model_type = model_type
        self.estimator = estimator
        self.param_grid = param_grid
        self.cv = cv
        self.scoring = scoring
        self.pop_size = pop_size
        self.generations = generations
        self.early_stopping_rounds = early_stopping_rounds
        self.crossover_initial = crossover_initial
        self.crossover_end = crossover_end
        self.mutation_initial = mutation_initial
        self.mutation_end = mutation_end
        self.elitism = elitism
        self.elite_size = elite_size
        self.tournament_size = tournament_size
        self.n_random = n_random
        self.n_jobs = n_jobs
        self.verbose = verbose
        self.best_params_ = None
        self.best_score_ = None
        self.alpha = alpha
        self.beta = beta

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))
    
    def decode_chromosome(self, chromosome):
        param_values = {}
        for i, key in enumerate(self.param_grid.keys()):
            gene = chromosome[i]
            param_info = self.param_grid[key]
            low = param_info['low']
            high = param_info['high']
            if param_info['type'] == 'int':
                value = int(np.round(gene * (high - low) + low))
            elif param_info['type'] == 'float':
                value = gene * (high - low) + low
            param_values[key] = value
        return param_values

    def initialize_population(self):
        chromosome_length = len(self.param_grid)
        population = np.random.uniform(low=0.0, high=1.0, size=(self.pop_size, chromosome_length))
        return population

    def evaluate_population(self, population, X_train, y_train):
        def evaluate_individual(chromosome):
            params = self.decode_chromosome(chromosome)
            scores = []
            for train_idx, val_idx in self.cv.split(X_train, y_train):
                X_tr, X_val = X_train[train_idx], X_train[val_idx]
                y_tr, y_val = y_train[train_idx], y_train[val_idx]
                model = clone(self.estimator)
                model.set_params(**params)
                model.fit(X_tr, y_tr)
                y_pred = model.predict(X_val)
                score = self.scoring(y_val, y_pred)
                scores.append(score)
            fitness = np.mean(np.array(scores))
            return fitness

        if self.n_jobs == -1:
            n_jobs = multiprocessing.cpu_count()
        else:
            n_jobs = self.n_jobs
        fitnesses = Parallel(n_jobs=n_jobs)(
            delayed(evaluate_individual)(chromosome) for chromosome in population
        )
        return np.array(fitnesses)

    def select_parents(self, population, fitnesses):
        selected = []
        for _ in range(len(population)):
            indices = np.random.randint(0, len(population), size=self.tournament_size)
            best_idx = indices[np.argmax(fitnesses[indices])]
            selected.append(population[best_idx])
        return np.vstack(selected)

    def crossover(self, parents, crossover_rate):
        offspring = []
        for i in range(0, len(parents), 2):
            parent1 = parents[i].copy()
            parent2 = parents[(i+1) % len(parents)].copy()
            if np.random.rand() < crossover_rate:
                point = np.random.randint(1, len(parent1))
                child1 = np.concatenate((parent1[:point], parent2[point:]))
                child2 = np.concatenate((parent2[:point], parent1[point:]))
                offspring.append(child1)
                offspring.append(child2)
            else:
                offspring.append(parent1)
                offspring.append(parent2)
        return np.vstack(offspring)

    def mutate(self, offspring, mutation_rate, mutation_scale=0.1):
        for chromosome in offspring:
            if np.random.rand() < mutation_rate:
                gene_idx = np.random.randint(0, len(chromosome))
                mutation = np.random.normal(0, mutation_scale)
                chromosome[gene_idx] += mutation
                chromosome[gene_idx] = np.clip(chromosome[gene_idx], 0.0, 1.0)
        return offspring

    def generate_random_individuals(self, n_random):
        chromosome_length = len(self.param_grid)
        random_chromosomes = np.empty((n_random, chromosome_length), dtype=np.float32)
        for i, key in enumerate(self.param_grid.keys()):
            grid = self.param_grid[key]
            low = grid['low']
            high = grid['high']
            if grid['type'] == 'int':
                sampled = np.random.randint(low, high + 1, size=n_random)
                normalized = (sampled - low) / (high - low)
                random_chromosomes[:, i] = normalized.astype(np.float32)
            elif grid['type'] == 'float':
                sampled = np.random.uniform(low, high, size=n_random)
                normalized = (sampled - low) / (high - low)
                random_chromosomes[:, i] = normalized.astype(np.float32)
            else:
                raise ValueError(f"Tipo de parámetro no soportado: {grid['type']}")
        return random_chromosomes

    def calculate_fitness_improvement(self, best_fitness_history):
        if len(best_fitness_history) < 2:
            return 0.0
        return best_fitness_history[-1] - best_fitness_history[-2]

    def calculate_diversity(self, population):
        diversity = 0.0
        n = len(population)
        if n <= 1:
            return diversity
        for i in range(n):
            for j in range(i + 1, n):
                diversity += np.linalg.norm(population[i] - population[j])
        diversity /= (n * (n - 1) / 2)
        return diversity

    def fit(self, X_train, y_train):
        population = self.initialize_population()
        best_overall_fitness = -np.inf
        worst_overall_fitness = np.inf
        best_overall_chromosome = None
        no_improvement_generations = 0
        fitness_history = []

        for generation in tqdm(range(self.generations), desc=f"Generaciones {self.model_type}", unit="gen"):
            # Calcular métricas
            diversity = self.calculate_diversity(population)
            fitness_improvement = self.calculate_fitness_improvement(fitness_history)
            # Combinar las métricas con los pesos alpha, beta, gamma
            sigmoid_value = self.sigmoid((self.alpha * (fitness_improvement + 1.0)) + (self.beta * diversity))
            # Ajustar las tasas utilizando la sigmoide
            crossover_rate = self.crossover_initial + (self.crossover_end - self.crossover_initial) * sigmoid_value
            mutation_rate = self.mutation_initial + (self.mutation_end - self.mutation_initial) * sigmoid_value
            # Asegurar que las tasas estén dentro de [0,1]
            crossover_rate = np.clip(crossover_rate, 0.0, self.crossover_end)
            mutation_rate = np.clip(mutation_rate, self.mutation_end, 1.0)

            if self.verbose:
                print(f"[{generation+1}, {self.model_type}] Crossover Rate: {crossover_rate:.4f} | Mutation Rate: {mutation_rate:.4f}")
                print(f"[{generation+1}, {self.model_type}] Mejora Fitness: {fitness_improvement:.4f} | Diversidad: {diversity:.4f}")

            # Evaluar la población
            if len(population) > self.pop_size:
                fitnesses = self.evaluate_population(population, X_train, y_train)
                sorted_indices = np.argsort(fitnesses)[::-1]
                population = population[sorted_indices[:self.pop_size]]
            else:
                fitnesses = self.evaluate_population(population, X_train, y_train)
            current_best_fitness = np.max(fitnesses)
            current_worst_fitness = np.min(fitnesses)
            fitness_history.append(current_best_fitness)
            # Actualizar el peor fitness
            if current_worst_fitness < worst_overall_fitness:
                worst_overall_fitness = current_worst_fitness
            # Actualizar el mejor fitness y cromosoma
            if current_best_fitness > best_overall_fitness:
                best_overall_fitness = current_best_fitness
                best_idx = np.argmax(fitnesses)
                best_overall_chromosome = population[best_idx]
                no_improvement_generations = 0
            else:
                no_improvement_generations += 1

            if self.verbose:
                print(f"[{generation+1}, {self.model_type}] Fitness: {current_best_fitness} | Mejor Fitness: {best_overall_fitness} | Peor fitness: {worst_overall_fitness}")

            # Verificar condición de parada por falta de mejora
            if no_improvement_generations >= self.early_stopping_rounds:
                if self.verbose:
                    print(f"[{generation+1}, {self.model_type}] Deteniendo el algoritmo por falta de mejora.")
                    print(f"El mejor fitness para {self.model_type}: {best_overall_fitness}")
                break

            # Seleccionar elites
            if self.elitism:
                sorted_indices = np.argsort(fitnesses)[::-1]
                elites = population[sorted_indices[:self.elite_size]]
            else:
                elites = None
            # Seleccionar padres
            parents = self.select_parents(population, fitnesses)
            # Generar descendencia mediante cruza
            offspring = self.crossover(parents, crossover_rate=crossover_rate)
            # Aplicar mutaciones a la descendencia
            offspring = self.mutate(offspring, mutation_rate=mutation_rate)
            # Inyección de individuos aleatorios
            if(self.n_random>0):
                random_individuals = self.generate_random_individuals(self.n_random)
                offspring = np.vstack((offspring, random_individuals))
            # Mantener el tamaño de la población
            if self.elitism and elites is not None:
                population = np.vstack((elites, offspring))
            else:
                population = offspring
            """
            # Si la población excede el tamaño, seleccionar los mejores
            if len(population) > self.pop_size:
                fitnesses = self.evaluate_population(population, X_train, y_train)
                sorted_indices = np.argsort(fitnesses)[::-1]
                population = population[sorted_indices[:self.pop_size]]
            """
        self.best_params_ = self.decode_chromosome(best_overall_chromosome)
        self.best_score_ = best_overall_fitness
        # Entrenar el mejor estimador
        self.best_estimator_ = clone(self.estimator)
        self.best_estimator_.set_params(**self.best_params_)
        self.best_estimator_.fit(X_train, y_train)
        return self

## Entrenamiento de los modelos

In [6]:
def train_model_buy(X_train, y_train, param_grid):
    try:
        # Definir el pipeline con placeholders (compras)
        estimator = Pipeline([
            ('scaler', RobustScaler()),
            ('selector', GenericUnivariateSelect(score_func=f_classif, mode='percentile')),
            ('classifier', xgb.XGBClassifier(eval_metric='mlogloss', tree_method='gpu_hist', predictor='gpu_predictor', verbosity=0))
        ])
    # Crear una instancia de GeneticAlgorithmCV
        ga_search = GeneticAlgorithmCV(
            model_type={'buy'},
            estimator=estimator,
            cv=StratifiedKFold(n_splits=5, shuffle=True),
            param_grid=param_grid,
            scoring=accuracy_score,
            verbose=True,
            n_jobs=-1
        )
        # Entrenar el modelo utilizando el algoritmo genético
        ga_search.fit(X_train, y_train)
    except Exception as e:
        print(f"Error en train_model_buy: {e}")
        raise
    # Obtener los mejores parámetros y el mejor estimador
    print("####################################################################")
    print("Mejores parámetros encontrados para compras:", ga_search.best_params_)
    print("Mejor puntuación de validación para compras:", ga_search.best_score_)
    print("####################################################################")
    # Retornar mejor estimador
    return ga_search.best_estimator_

In [7]:
def train_model_sell(X_train, y_train, param_grid):
    try:
        # Definir el pipeline con placeholders (ventas)
        estimator = Pipeline([
            ('scaler', RobustScaler()),
            ('selector', GenericUnivariateSelect(score_func=f_classif, mode='percentile')),
            ('classifier', xgb.XGBClassifier(eval_metric='mlogloss', tree_method='gpu_hist', predictor='gpu_predictor', verbosity=0))
        ])
        # Crear una instancia de GeneticAlgorithmCV
        ga_search = GeneticAlgorithmCV(
            model_type={'sell'},
            estimator=estimator,
            cv=StratifiedKFold(n_splits=5, shuffle=True),
            param_grid=param_grid,
            scoring=accuracy_score,
            verbose=True,
            n_jobs=-1
        )
        # Entrenar el modelo utilizando el algoritmo genético
        ga_search.fit(X_train, y_train)
    except Exception as e:
        print(f"Error en train_model_buy: {e}")
        raise
    # Obtener los mejores parámetros y el mejor estimador
    print("####################################################################")
    print("Mejores parámetros encontrados para ventas:", ga_search.best_params_)
    print("Mejor puntuación de validación para ventas:", ga_search.best_score_)
    print("####################################################################")
    # Retornar mejor estimador
    return ga_search.best_estimator_

In [8]:
# Definir espacio de hiperparámetros
param_grid = {
    'selector__param': {'type': 'int', 'low': 1, 'high': 100},
    'classifier__n_estimators': {'type': 'int', 'low': 50, 'high': 500},
    'classifier__max_depth': {'type': 'int', 'low': 3, 'high': 10},
    'classifier__learning_rate': {'type': 'float', 'low': 0.01, 'high': 0.3},
    'classifier__subsample': {'type': 'float', 'low': 0.6, 'high': 1.0},
    'classifier__colsample_bytree': {'type': 'float', 'low': 0.6, 'high': 1.0},
    'classifier__gamma': {'type': 'float', 'low': 0.0, 'high': 0.5},
    'classifier__min_child_weight': {'type': 'int', 'low': 1, 'high': 10},
    'classifier__reg_alpha': {'type': 'float', 'low': 0.0, 'high': 1.0},
    'classifier__reg_lambda': {'type': 'float', 'low': 0.0, 'high': 1.0}
}

In [None]:
# Entrenar modelos simultáneamente
with ThreadPoolExecutor(max_workers=2) as executor:
    print("Esperando que las tareas finalicen...")
    # enviar tareas de entrenamiento
    future_buy = executor.submit(train_model_buy, X_buy_train, y_buy_train, param_grid)
    future_sell = executor.submit(train_model_sell, X_sell_train, y_sell_train, param_grid)
    # esperar a que todas las tareas terminen
    futures = [future_buy, future_sell]
    wait(futures)
    # Obtener resultados una vez que ambas tareas han terminado
    model_buy = future_buy.result()
    model_sell = future_sell.result()
    print("¡Todas las tareas han terminado!")

## Exportar modelos a formato ONNX

In [12]:
def save_onnx_models(mql5_files_folder):
    try:
        update_registered_converter(
            xgb.XGBClassifier,
            "XGBClassifier",
            calculate_linear_classifier_output_shapes,
            convert_xgboost,
            options={'nocl': [True, False], 'zipmap': [True, False, 'columns']}
        )
        model_buy_onnx = convert_sklearn(
            model_buy,
            'pipeline_buy_xgboost',
            [('input', FloatTensorType([None, X_buy_train.shape[1]]))],
            target_opset={'': 12, 'ai.onnx.ml': 2}
        )
        model_sell_onnx = convert_sklearn(
            model_sell,
            'pipeline_sell_xgboost',
            [('input', FloatTensorType([None, X_buy_train.shape[1]]))],
            target_opset={'': 12, 'ai.onnx.ml': 2}
        )
        with open(os.path.join(mql5_files_folder, "model_buy.onnx"), 'wb') as f:
            f.write(model_buy_onnx.SerializeToString())
        with open(os.path.join(mql5_files_folder, "model_sell.onnx"), 'wb') as f:
            f.write(model_sell_onnx.SerializeToString())
    except Exception as e:
        print(f"Error en exportar los modelos: {e}")
        raise
    print("Modelos ONNX exportados correctamente")

In [None]:
save_onnx_models(r'/mnt/c/Users/Administrador/AppData/Roaming/MetaQuotes/Terminal/6C3C6A11D1C3791DD4DBF45421BF8028/MQL5/Files')