## Importaciones

In [10]:
import os
import math
import numpy as np
from numba import njit
import pandas as pd
from datetime import datetime
from typing import Dict, Any, Tuple, List
import optuna
from catboost import CatBoostClassifier
from sklearn.model_selection import train_test_split
from modules.labeling_lib import get_labels_one_direction
from modules.labeling_lib import sliding_window_clustering
from modules.tester_lib import tester_one_direction
from modules.export_lib import export_model_to_ONNX
import warnings
warnings.filterwarnings("ignore")

In [11]:
# Obtener precios
def get_prices(hyper_params) -> pd.DataFrame:
    history_file = os.path.join(hyper_params["history_path"], f"{hyper_params['symbol']}_{hyper_params['timeframe']}.csv")
    p = pd.read_csv(history_file, sep=r"\s+")
    pFixed = pd.DataFrame(columns=['time', 'close'])
    pFixed['time'] = p['<DATE>'] + ' ' + p['<TIME>']
    pFixed['time'] = pd.to_datetime(pFixed['time'], format='mixed')
    pFixed['close'] = p['<CLOSE>']
    pFixed.set_index('time', inplace=True)
    return pFixed.dropna()
# Ingeniería de características
@njit
def compute_features(close, periods, periods_meta, stats):
    n = len(close)
    total_features = (len(periods)*len(stats)) + len(periods_meta)
    features = np.full((n, total_features), np.nan)
    col = 0

    def std_manual(x):
        m = np.mean(x)
        return np.sqrt(np.sum((x - m) ** 2) / (x.size - 1))

    def skew_manual(x):
        m = np.mean(x)
        s = std_manual(x)
        return np.mean(((x - m) / s) ** 3) if s != 0 else 0.0

    def kurt_manual(x):
        m = np.mean(x)
        s = std_manual(x)
        return np.mean(((x - m) / s) ** 4) - 3 if s != 0 else 0.0

    # Procesar períodos normales
    for win in periods:
        for i in range(win, n):
            window = close[i - win:i]
            features[i, col]     = std_manual(window)
            features[i, col+1]   = skew_manual(window)
            features[i, col+2]   = kurt_manual(window)
        col += len(stats)

    # Procesar períodos meta
    for win in periods_meta:
        for i in range(win, n):
            window = close[i - win:i]
            features[i, col]     = std_manual(window)
        col += 1

    return features

def get_features(data: pd.DataFrame, hp):
    close = data['close'].values
    index = data.index
    periods = hp["periods"]
    periods_meta = hp["periods_meta"]
    stats = ["std", "skew", "kurt"]
    feats = compute_features(close, np.array(periods), np.array(periods_meta), stats)

    # Nombres de columnas
    colnames = []
    for p in periods:
        for s in stats:
            colnames.extend([f"{p}_{s}_feature"])
    for p in periods_meta:
        colnames.extend([f"{p}_{stats[0]}_meta_feature"])
    df = pd.DataFrame(feats, columns=colnames, index=index)
    df["close"] = data["close"]
    return df.dropna()
    
def test_model_one_direction(
        dataset: pd.DataFrame,
        result:  list,
        forward: datetime,
        backward: datetime,
        markup:  float,
        direction: str,
        plt: bool = False):

    pr_tst = dataset.copy()
    X = pr_tst.drop(columns=['close'])
    X_meta = X.loc[:,  X.columns.str.contains('meta_feature')]
    X      = X.loc[:, ~X.columns.str.contains('meta_feature')]

    pr_tst['labels']      = result[0].predict_proba(X)[:,1]
    pr_tst['meta_labels'] = result[1].predict_proba(X_meta)[:,1]

    # Corrección aquí:
    pr_tst[['labels', 'meta_labels']] = (pr_tst[['labels', 'meta_labels']] > 0.5).astype(float)

    return tester_one_direction(pr_tst, forward, backward, markup, direction, plt)

## Main

In [None]:
def fit_final_models(clustered: pd.DataFrame,
                     meta: pd.DataFrame,
                     oos_data: pd.DataFrame,
                     hp: Dict[str, Any]) -> Tuple[float, Any, Any]:
    """Entrena modelo principal + meta‑modelo y evalúa en OOS.

    Devuelve (R2, model, meta_model).
    """
    # ---------- 1) main model ----------
    X_main = clustered.drop(columns=['labels', *meta.columns[meta.columns.str.contains('_meta_feature')]])
    y_main = clustered['labels'].astype('int16')

    # ---------- 2) meta‑model ----------
    X_meta = meta.loc[:, meta.columns.str.contains('_meta_feature')]
    y_meta = meta['clusters'].astype('int16')
    # 3) Split aleatorio (70/30)
    train_X, test_X, train_y, test_y = train_test_split(
        X_main, y_main, train_size=0.7, shuffle=True)
    train_X_m, test_X_m, train_y_m, test_y_m = train_test_split(
        X_meta, y_meta, train_size=0.7, shuffle=True)
    # debug
    # common_index = X_main.index[0]
    # display(X_main.loc[[common_index]])
    # display(X_meta.loc[[common_index]])
    # 4) Hiper‑parámetros CatBoost (con valores por defecto + overrides)
    cat_main_params = dict(
        iterations=hp.get('cat_iterations', 500),
        depth=hp.get('cat_depth', 6),
        learning_rate=hp.get('cat_learning_rate', 0.1),
        l2_leaf_reg=hp.get('cat_l2_leaf_reg', 3.0),
        custom_loss=['Accuracy'],
        eval_metric='Accuracy',
        use_best_model=True,
        verbose=False,
        thread_count=-1,
        task_type='CPU',
    )
    model = CatBoostClassifier(**cat_main_params)
    model.fit(train_X, train_y, eval_set=(test_X, test_y), early_stopping_rounds=25)

    cat_meta_params = dict(
        iterations=hp.get('cat_meta_iterations', 300),
        depth=hp.get('cat_meta_depth', 5),
        learning_rate=hp.get('cat_meta_learning_rate', 0.15),
        l2_leaf_reg=hp.get('cat_meta_l2_leaf_reg', 3.0),
        custom_loss=['F1'],
        eval_metric='F1',
        use_best_model=True,
        verbose=False,
        thread_count=-1,
        task_type='CPU',
    )
    meta_model = CatBoostClassifier(**cat_meta_params)
    meta_model.fit(train_X_m, train_y_m, eval_set=(test_X_m, test_y_m), early_stopping_rounds=15)

    # 5) Evaluación en datos fuera de muestra
    R2 = test_model_one_direction(
        oos_data,
        [model, meta_model],
        hp['full forward'],
        hp['forward'],
        hp['markup'],
        hp['direction'],
        plt=False,
    )
    if math.isnan(R2):
        R2 = -1.0
    return R2, model, meta_model

# ----------------------------------------------------------------------------
#      ─── FUNCIÓN OBJETIVO PARA OPTUNA ───
# ----------------------------------------------------------------------------

def objective(trial: optuna.trial.Trial, base_hp: Dict[str, Any], study=None) -> float:
    hp = base_hp.copy()

    # µ··· Espacio de búsqueda ···µ
    hp['n_clusters']   = trial.suggest_int('n_clusters', 5, 60, step=5)
    hp['window_size']  = trial.suggest_int('window_size', 100, 500, step=10)
    hp['label_min']  = trial.suggest_int('label_min', 1, 5)
    hp['label_max']  = trial.suggest_int('label_max', hp['label_min']+5, 30)

    # CatBoost (main)
    hp['cat_iterations']      = trial.suggest_int('cat_iterations', 100, 800, step=100)
    hp['cat_depth']           = trial.suggest_int('cat_depth', 4, 10)
    hp['cat_learning_rate']   = trial.suggest_float('cat_learning_rate', 0.03, 0.3, log=True)
    hp['cat_l2_leaf_reg']     = trial.suggest_float('cat_l2_leaf_reg', 1.0, 7.0)

    # Dataset completo
    full_ds = get_features(get_prices(hp), hp)
    ds_train = full_ds[(full_ds.index > hp['backward']) & (full_ds.index < hp['forward'])]
    ds_oos   = full_ds[(full_ds.index >= hp['forward']) & (full_ds.index < hp['full forward'])]
    
    # Clustering con ventana deslizante
    data = sliding_window_clustering(
        ds_train,
        n_clusters=hp['n_clusters'],
        window_size=hp['window_size']
    )
    
    best_R2 = -math.inf
    for clust in np.sort(data['clusters'].unique()):
        clustered_data = data[data['clusters'] == clust].copy()
        if len(clustered_data) < 500:
            continue

        clustered_data = get_labels_one_direction(
            clustered_data,
            markup    = hp['markup'],
            min       = hp['label_min'],
            max       = hp['label_max'],
            direction = hp['direction'])

        clustered_data = clustered_data.drop(['close', 'clusters'], axis=1)
        meta_data = data.copy()
        meta_data['clusters'] = (meta_data['clusters'] == clust).astype(int)

        R2, model, meta_model = fit_final_models(
            clustered_data,
            meta_data.drop(['close'], axis=1),
            ds_oos,
            hp
        )

        if R2 < 1.0 and R2 > best_R2:
            best_R2 = R2
            best_pack = (model, meta_model)
            
            # Solo guardar si este R2 es mejor que cualquier guardado antes
            if study is not None:
                prev_best = study.user_attrs.get("best_r2", -np.inf)
                if best_R2 > prev_best:
                    study.set_user_attr("best_model", best_pack[0])
                    study.set_user_attr("best_meta_model", best_pack[1])
                    study.set_user_attr("best_r2", best_R2)

    return best_R2

# ----------------------------------------------------------------------------
#                 ─── PIPELINE DE OPTIMIZACIÓN + EXPORT ───
# ----------------------------------------------------------------------------

def optimize_and_export(symbol, timeframe, model_number, n_trials):
    """Lanza Optuna, guarda el mejor modelo y lo exporta a ONNX."""

    common_file_folder = r"/mnt/c/Users/Administrador/AppData/Roaming/MetaQuotes/Terminal/Common/Files/"
    mql5_files_folder = r'/mnt/c/Users/Administrador/AppData/Roaming/MetaQuotes/Terminal/6C3C6A11D1C3791DD4DBF45421BF8028/MQL5/Files/'
    mql5_include_folder = r'/mnt/c/Users/Administrador/AppData/Roaming/MetaQuotes/Terminal/6C3C6A11D1C3791DD4DBF45421BF8028/MQL5/Include/ajmtrz/include/Dmitrievsky'

    base_hp: Dict[str, Any] = {
        'symbol': symbol,
        'timeframe': timeframe,
        'models_export_path': mql5_files_folder,
        'include_export_path': mql5_include_folder,
        'history_path': common_file_folder,
        'best_models': [],
        'model_number': model_number,
        'markup': 0.20,
        'label_min'  : 1,
        'label_max'  : 15,
        'direction': 'buy',
        'n_clusters': 30,
        'window_size': 350,
        'periods': [i for i in range(5, 300, 30)],
        'periods_meta': [5],
        'backward': datetime(2020, 3, 26),
        'forward': datetime(2024, 1, 1),
        'full forward': datetime(2026, 1, 1),
    }

    study = optuna.create_study(direction='maximize')
    study.optimize(lambda t: objective(t, base_hp, study), n_trials=n_trials, show_progress_bar=True)


    print("\n┌───────────────────────────────────────────────┐")
    print("│      MEJOR RESULTADO = {:.4f}                 │".format(study.best_value))
    print("└───────────────────────────────────────────────┘\n")
    print("Parámetros óptimos:\n", study.best_params)

    # Recuperar el mejor modelo y meta‑modelo
    base_hp.update(study.best_params)
    model      = study.user_attrs["best_model"]
    meta_model = study.user_attrs["best_meta_model"]
    best_r2    = study.user_attrs["best_r2"]
    base_hp.pop('best_models', None)
    print("Exportando modelos ONNX… R2 = {:.4f}".format(best_r2))
    export_model_to_ONNX(best_models=[model, meta_model], **base_hp)

if __name__ == "__main__":
    for i in range(3, 10):
        optimize_and_export('XAUUSD', 'H1', i, n_trials=50)

[I 2025-04-20 12:13:17,267] A new study created in memory with name: no-name-a3bd9167-efd8-429c-820d-5140d2096076


  0%|          | 0/50 [00:00<?, ?it/s]

[I 2025-04-20 12:13:25,220] Trial 0 finished with value: 0.9383581924560003 and parameters: {'n_clusters': 50, 'window_size': 180, 'label_min': 2, 'label_max': 27, 'cat_iterations': 600, 'cat_depth': 9, 'cat_learning_rate': 0.03561938681199903, 'cat_l2_leaf_reg': 3.3715812243343497}. Best is trial 0 with value: 0.9383581924560003.
[I 2025-04-20 12:13:31,470] Trial 1 finished with value: 0.9296481036348058 and parameters: {'n_clusters': 55, 'window_size': 200, 'label_min': 5, 'label_max': 17, 'cat_iterations': 600, 'cat_depth': 8, 'cat_learning_rate': 0.22518814226233957, 'cat_l2_leaf_reg': 3.8577411586966925}. Best is trial 0 with value: 0.9383581924560003.
[I 2025-04-20 12:13:39,110] Trial 2 finished with value: 0.9383889534020754 and parameters: {'n_clusters': 55, 'window_size': 350, 'label_min': 5, 'label_max': 29, 'cat_iterations': 100, 'cat_depth': 9, 'cat_learning_rate': 0.17428457713804413, 'cat_l2_leaf_reg': 5.090851379680299}. Best is trial 2 with value: 0.9383889534020754.
[I

[I 2025-04-20 12:16:56,429] A new study created in memory with name: no-name-b6ac4207-085c-4398-9366-610e0477f068


[I 2025-04-20 12:16:56,392] Trial 49 finished with value: 0.9663836145929552 and parameters: {'n_clusters': 60, 'window_size': 190, 'label_min': 4, 'label_max': 15, 'cat_iterations': 500, 'cat_depth': 7, 'cat_learning_rate': 0.0822379653925937, 'cat_l2_leaf_reg': 5.295954894431533}. Best is trial 41 with value: 0.9782063536539899.

┌───────────────────────────────────────────────┐
│      MEJOR RESULTADO = 0.9782                 │
└───────────────────────────────────────────────┘

Parámetros óptimos:
 {'n_clusters': 40, 'window_size': 420, 'label_min': 4, 'label_max': 14, 'cat_iterations': 300, 'cat_depth': 4, 'cat_learning_rate': 0.06615582750248386, 'cat_l2_leaf_reg': 5.96680306465874}
Exportando modelos ONNX… R2 = 0.9782
Modelo /mnt/c/Users/Administrador/AppData/Roaming/MetaQuotes/Terminal/6C3C6A11D1C3791DD4DBF45421BF8028/MQL5/Files/dmitrievsky_model_XAUUSD_H1_3.onnx ONNX exportado correctamente
Modelo /mnt/c/Users/Administrador/AppData/Roaming/MetaQuotes/Terminal/6C3C6A11D1C3791DD4D

  0%|          | 0/50 [00:00<?, ?it/s]

[I 2025-04-20 12:17:00,360] Trial 0 finished with value: 0.9238193503239651 and parameters: {'n_clusters': 40, 'window_size': 370, 'label_min': 5, 'label_max': 19, 'cat_iterations': 600, 'cat_depth': 5, 'cat_learning_rate': 0.08240032522114504, 'cat_l2_leaf_reg': 2.927854373399766}. Best is trial 0 with value: 0.9238193503239651.
[I 2025-04-20 12:17:04,042] Trial 1 finished with value: 0.9727308479514705 and parameters: {'n_clusters': 15, 'window_size': 210, 'label_min': 3, 'label_max': 25, 'cat_iterations': 400, 'cat_depth': 8, 'cat_learning_rate': 0.11157529523419091, 'cat_l2_leaf_reg': 4.780120680040877}. Best is trial 1 with value: 0.9727308479514705.
[I 2025-04-20 12:17:06,603] Trial 2 finished with value: 0.9118579998638869 and parameters: {'n_clusters': 60, 'window_size': 160, 'label_min': 5, 'label_max': 30, 'cat_iterations': 100, 'cat_depth': 8, 'cat_learning_rate': 0.13461406518766672, 'cat_l2_leaf_reg': 3.156737488678679}. Best is trial 1 with value: 0.9727308479514705.
[I 2