In [6]:
import MetaTrader5 as mt5
import pandas as pd
import numpy as np
import pytz
from datetime import datetime
import pandas_ta as ta
import xgboost as xgb
from sklearn.calibration import CalibratedClassifierCV
from sklearn.metrics import accuracy_score, roc_auc_score, log_loss, brier_score_loss

# ONNX Imports
from skl2onnx import convert_sklearn, update_registered_converter
from skl2onnx.common.data_types import FloatTensorType
from skl2onnx.common.shape_calculator import calculate_linear_classifier_output_shapes
from onnxmltools.convert.xgboost.operator_converters.XGBoost import convert_xgboost

update_registered_converter(
    xgb.XGBClassifier, "XGBoostXGBClassifier",
    calculate_linear_classifier_output_shapes, convert_xgboost,
    options={"nocl": [True, False], "zipmap": [True, False, "renamed"]}
)

# --- USER CONFIG ---
SYMBOLS = ["EURUSD", "GBPUSD", "USDJPY"]  # you can add more
DXY_SYMBOL = "DXY"
EMA_PERIOD = 50
TIMEOUT_BARS = 48
INVALIDATION_PIPS = 15.0

# --- time-safe splits ---
TEST_FRAC = 0.20   # last 20% test
CAL_FRAC  = 0.10   # prior 10% calibration
GAP_BARS  = 0      # optional embargo bars between splits

FEATURES_13 = [
    "tick_volume", "hour_of_day", "rsi_14", "atr_14",
    "dist_to_ema", "bull_fvg_size", "bear_fvg_size",
    "bull_fvg_atr_ratio", "bear_fvg_atr_ratio",
    "h4_rsi_14", "h4_dist_to_ema", "dxy_rsi", "dxy_ema_dist"
]

def resolve_symbol(query: str) -> str | None:
    """
    Robust symbol resolver for MT5 brokers with suffixes.
    - Prefers exact match (USDJPY)
    - Otherwise uses wildcard (*USDJPY*) and chooses shortest name
    - Ensures the chosen symbol is selected in Market Watch
    """
    # Try exact first
    exact = mt5.symbols_get(query)
    if exact and len(exact) > 0:
        name = exact[0].name
        mt5.symbol_select(name, True)
        return name

    # Wildcard fallback
    cands = mt5.symbols_get(f"*{query}*")
    if not cands:
        return None

    # Prefer shortest symbol name containing query (often removes suffix noise)
    cands = sorted(cands, key=lambda s: len(s.name))
    name = cands[0].name
    mt5.symbol_select(name, True)
    return name

def fetch_data(symbol, timeframe, utc_from, utc_to):
    rates = mt5.copy_rates_range(symbol, timeframe, utc_from, utc_to)
    if rates is None or len(rates) == 0:
        return None
    df = pd.DataFrame(rates)
    # IMPORTANT: UTC-aware timestamps
    df["time"] = pd.to_datetime(df["time"], unit="s", utc=True)
    return df

def make_splits(n: int, test_frac: float, cal_frac: float, gap: int):
    """
    Time-safe split:
      [ train .... ][ gap ][ calibrate ][ gap ][ test ]
    """
    test_size = int(round(n * test_frac))
    cal_size  = int(round(n * cal_frac))
    train_end = n - (cal_size + test_size)

    cal_start = train_end + gap
    cal_end   = n - test_size
    test_start = cal_end + gap

    cal_start = min(max(cal_start, 0), n)
    cal_end   = min(max(cal_end, cal_start), n)
    test_start = min(max(test_start, cal_end), n)

    train_idx = np.arange(0, max(train_end, 0))
    cal_idx   = np.arange(cal_start, cal_end)
    test_idx  = np.arange(test_start, n)
    return train_idx, cal_idx, test_idx

def generate_and_train_regime_model():
    if not mt5.initialize():
        print("MT5 initialize failed.")
        return

    utc_from = datetime(2023, 5, 1, tzinfo=pytz.UTC)
    utc_to   = datetime(2026, 2, 21, tzinfo=pytz.UTC)

    # 1) Fetch DXY Context
    print("Loading Intermarket DXY Context...")
    dxy_name = resolve_symbol(DXY_SYMBOL)
    dxy_df = None

    if dxy_name:
        dxy_raw = fetch_data(dxy_name, mt5.TIMEFRAME_M15, utc_from, utc_to)
        if dxy_raw is not None:
            dxy_raw = dxy_raw.sort_values("time").reset_index(drop=True)
            dxy_raw["dxy_ema_50"] = dxy_raw["close"].ewm(span=EMA_PERIOD, adjust=False).mean()
            dxy_raw["dxy_ema_dist"] = dxy_raw["close"] - dxy_raw["dxy_ema_50"]
            dxy_raw["dxy_rsi"] = ta.rsi(dxy_raw["close"], length=14)
            dxy_df = dxy_raw[["time", "dxy_rsi", "dxy_ema_dist"]].dropna().copy()
            print(f"DXY loaded: {dxy_name} rows={len(dxy_df)}")
        else:
            print(f"DXY symbol found ({dxy_name}) but no data. Using neutral defaults.")
    else:
        print("DXY symbol not found. Using neutral defaults.")

    for sym in SYMBOLS:
        print("\n" + "="*50)
        print(f" BUILDING BRAIN #3 (REGIME MANAGER) FOR {sym} ")
        print("="*50)

        sym_name = resolve_symbol(sym)
        if not sym_name:
            print(f"Symbol not found: {sym}")
            print("Candidates:", [s.name for s in (mt5.symbols_get(f'*{sym}*') or [])][:30])
            continue

        df_m15 = fetch_data(sym_name, mt5.TIMEFRAME_M15, utc_from, utc_to)
        df_h4  = fetch_data(sym_name, mt5.TIMEFRAME_H4,  utc_from, utc_to)
        if df_m15 is None or df_h4 is None:
            print(f"Missing data for {sym_name}. Ensure symbol is selected + history downloaded.")
            continue

        df_m15 = df_m15.sort_values("time").reset_index(drop=True)
        df_h4  = df_h4.sort_values("time").reset_index(drop=True)

        info = mt5.symbol_info(sym_name)
        pip_val = info.point * (10.0 if info.digits in [3, 5] else 1.0)

        # --- H4 context (NO +4 hours hack)
        df_h4["h4_ema_50"] = df_h4["close"].ewm(span=EMA_PERIOD, adjust=False).mean()
        df_h4["h4_dist_to_ema"] = (df_h4["close"] - df_h4["h4_ema_50"]) / pip_val
        df_h4["h4_rsi_14"] = ta.rsi(df_h4["close"], length=14)
        h4_context = df_h4[["time", "h4_dist_to_ema", "h4_rsi_14"]].dropna().copy()

        # --- M15 indicators
        df_m15["ema_50"] = df_m15["close"].ewm(span=EMA_PERIOD, adjust=False).mean()
        df_m15["dist_to_ema"] = (df_m15["close"] - df_m15["ema_50"]) / pip_val
        df_m15["hour_of_day"] = df_m15["time"].dt.hour
        df_m15["atr_14"] = ta.atr(df_m15["high"], df_m15["low"], df_m15["close"], length=14)
        df_m15["rsi_14"] = ta.rsi(df_m15["close"], length=14)

        # Merge H4 -> M15
        df_m15 = pd.merge_asof(
            df_m15.sort_values("time"),
            h4_context.sort_values("time"),
            on="time",
            direction="backward"
        )

        # Merge DXY -> M15
        if dxy_df is not None:
            df_m15 = pd.merge_asof(
                df_m15.sort_values("time"),
                dxy_df.sort_values("time"),
                on="time",
                direction="backward"
            )
        else:
            df_m15["dxy_rsi"] = 50.0
            df_m15["dxy_ema_dist"] = 0.0

        df_m15 = df_m15.dropna().reset_index(drop=True)

        # --- FVG detection
        df_m15["bull_gap"] = df_m15["low"] - df_m15["high"].shift(2)
        df_m15["is_bull_fvg"] = (df_m15["bull_gap"] > 0) & (df_m15["close"].shift(1) > df_m15["close"].shift(2))
        df_m15["bear_gap"] = df_m15["low"].shift(2) - df_m15["high"]
        df_m15["is_bear_fvg"] = (df_m15["bear_gap"] > 0) & (df_m15["close"].shift(1) < df_m15["close"].shift(2))

        df_m15["bull_fvg_size"] = np.where(df_m15["is_bull_fvg"], df_m15["bull_gap"] / pip_val, 0.0)
        df_m15["bear_fvg_size"] = np.where(df_m15["is_bear_fvg"], df_m15["bear_gap"] / pip_val, 0.0)
        df_m15["bull_fvg_atr_ratio"] = np.where(df_m15["is_bull_fvg"], df_m15["bull_gap"] / df_m15["atr_14"], 0.0)
        df_m15["bear_fvg_atr_ratio"] = np.where(df_m15["is_bear_fvg"], df_m15["bear_gap"] / df_m15["atr_14"], 0.0)

        fvg_df = df_m15[(df_m15["is_bull_fvg"]) | (df_m15["is_bear_fvg"])].copy()
        if fvg_df.empty:
            print(f"No FVGs found for {sym}.")
            continue

        print(f"Bars={len(df_m15)} | FVG setups={len(fvg_df)}")

        # =========================
        # FAST REGIME LABELING FIX
        # =========================
        print("Calculating MAE/MFE Regimes... (fast)")
        regime_labels = []

        # Keep original bar index for each FVG row
        fvg_df = fvg_df.reset_index(drop=False).rename(columns={"index": "bar_index"})

        highs = df_m15["high"].to_numpy()
        lows  = df_m15["low"].to_numpy()

        n_fvg = len(fvg_df)
        for k, i in enumerate(fvg_df["bar_index"].to_numpy(), start=1):
            start = i + 1
            if start >= len(df_m15):
                regime_labels.append(0)
                continue

            end = min(start + TIMEOUT_BARS, len(df_m15))

            is_bull = bool(df_m15.loc[i, "is_bull_fvg"])
            mfe = 0.0
            mae = 0.0

            if is_bull:
                entry = float(df_m15.loc[i, "low"])
                for h, l in zip(highs[start:end], lows[start:end]):
                    if (h - entry) > mfe: mfe = (h - entry)
                    if (entry - l) > mae: mae = (entry - l)
                    if (entry - l) >= (INVALIDATION_PIPS * pip_val):
                        break
            else:
                entry = float(df_m15.loc[i, "high"])
                for h, l in zip(highs[start:end], lows[start:end]):
                    if (entry - l) > mfe: mfe = (entry - l)
                    if (h - entry) > mae: mae = (h - entry)
                    if (h - entry) >= (INVALIDATION_PIPS * pip_val):
                        break

            mfe_pips = mfe / pip_val
            mae_pips = mae / pip_val
            regime_labels.append(1 if (mfe_pips >= 15.0 and mae_pips <= 5.0) else 0)

            if k % 500 == 0:
                print(f"  labeled {k}/{n_fvg}")

        fvg_df["target_regime"] = regime_labels

        # --- Training data
        X = fvg_df[FEATURES_13].to_numpy(dtype=np.float32)
        y = fvg_df["target_regime"].to_numpy(dtype=np.int32)

        if y.sum() < 50:
            print(f"Not enough 'Clean Trend' setups found for {sym}. Skipping.")
            continue

        # --- time-safe train/cal/test split (no leakage)
        train_idx, cal_idx, test_idx = make_splits(len(fvg_df), TEST_FRAC, CAL_FRAC, GAP_BARS)
        if len(train_idx) < 200 or len(cal_idx) < 50 or len(test_idx) < 50:
            print(f"Split too small for {sym}: train={len(train_idx)}, cal={len(cal_idx)}, test={len(test_idx)}")
            continue

        X_train, y_train = X[train_idx], y[train_idx]
        X_cal, y_cal     = X[cal_idx], y[cal_idx]
        X_test, y_test   = X[test_idx], y[test_idx]

        pos = int((y_train == 1).sum())
        neg = int((y_train == 0).sum())
        imbalance = (neg / pos) if pos > 0 else 1.0

        print("Training Regime Classifier (time-safe)...")
        base_model = xgb.XGBClassifier(
            n_estimators=150,
            max_depth=3,
            learning_rate=0.05,
            subsample=0.9,
            colsample_bytree=0.9,
            scale_pos_weight=imbalance,
            random_state=42,
            eval_metric="logloss",
            n_jobs=-1
        )

        # Fit base on TRAIN only
        base_model.fit(X_train, y_train)

        # Calibrate on later CAL slice (cv="prefit" avoids shuffle/leak)
        model = CalibratedClassifierCV(base_model, method="sigmoid", cv="prefit")
        model.fit(X_cal, y_cal)

        # Evaluate on TEST (latest)
        preds = model.predict(X_test)
        proba = model.predict_proba(X_test)[:, 1]

        acc = accuracy_score(y_test, preds)
        auc = roc_auc_score(y_test, proba) if len(np.unique(y_test)) > 1 else float("nan")
        ll  = log_loss(y_test, proba, labels=[0, 1])
        brier = brier_score_loss(y_test, proba)

        print(f"OOS Accuracy: {acc:.2%} | AUC: {auc:.3f} | LogLoss: {ll:.4f} | Brier: {brier:.4f}")

        # Export Brain #3 ONNX
        initial_type = [("float_input", FloatTensorType([None, 13]))]
        onx = convert_sklearn(
            model,
            initial_types=initial_type,
            target_opset={"": 12, "ai.onnx.ml": 3},
            options={type(model): {"zipmap": False}}
        )

        filename = f"fvg_manager_{sym}_v6.onnx"
        with open(filename, "wb") as f:
            f.write(onx.SerializeToString())
        print(f"SUCCESS: Brain #3 Saved -> {filename}")

    mt5.shutdown()

if __name__ == "__main__":
    generate_and_train_regime_model()

Loading Intermarket DXY Context...
DXY loaded: DXYN rows=4023

 BUILDING BRAIN #3 (REGIME MANAGER) FOR EURUSD 
Bars=69586 | FVG setups=14584
Calculating MAE/MFE Regimes... (fast)
  labeled 500/14584
  labeled 1000/14584
  labeled 1500/14584
  labeled 2000/14584
  labeled 2500/14584
  labeled 3000/14584
  labeled 3500/14584
  labeled 4000/14584
  labeled 4500/14584
  labeled 5000/14584
  labeled 5500/14584
  labeled 6000/14584
  labeled 6500/14584
  labeled 7000/14584
  labeled 7500/14584
  labeled 8000/14584
  labeled 8500/14584
  labeled 9000/14584
  labeled 9500/14584
  labeled 10000/14584
  labeled 10500/14584
  labeled 11000/14584
  labeled 11500/14584
  labeled 12000/14584
  labeled 12500/14584
  labeled 13000/14584
  labeled 13500/14584
  labeled 14000/14584
  labeled 14500/14584
Training Regime Classifier (time-safe)...
OOS Accuracy: 77.07% | AUC: 0.609 | LogLoss: 0.5298 | Brier: 0.1737
SUCCESS: Brain #3 Saved -> fvg_manager_EURUSD_v6.onnx

 BUILDING BRAIN #3 (REGIME MANAGER) FO