In [None]:
import MetaTrader5 as mt5
import pandas as pd
import numpy as np
import pytz
from datetime import datetime, timedelta
import pandas_ta as ta
import xgboost as xgb
import os

# ONNX Imports
from skl2onnx import convert_sklearn, update_registered_converter
from skl2onnx.common.data_types import FloatTensorType
from skl2onnx.common.shape_calculator import calculate_linear_classifier_output_shapes, calculate_linear_regressor_output_shapes
from onnxmltools.convert.xgboost.operator_converters.XGBoost import convert_xgboost

# --- REGISTER XGBOOST CONVERTERS ---
update_registered_converter(
    xgb.XGBClassifier, "XGBoostXGBClassifier",
    calculate_linear_classifier_output_shapes, convert_xgboost,
    options={"nocl": [True, False], "zipmap": [True, False, "renamed"]}
)
update_registered_converter(
    xgb.XGBRegressor, "XGBoostXGBRegressor",
    calculate_linear_regressor_output_shapes, convert_xgboost
)

# --- CONFIGURATION ---
SYMBOLS = ["EURUSD", "GBPUSD", "USDJPY"] # Add more as needed
DXY_SYMBOL = "DXY"
EMA_PERIOD = 50
SL_PIPS = 15.0
RR_RATIO = 2.0
TIMEOUT_BARS = 48
INVALIDATION_PIPS = 15.0
WINDOW_DAYS = 90 # Rolling window for Concept Drift

def fetch_data(symbol, timeframe, utc_from, utc_to):
    rates = mt5.copy_rates_range(symbol, timeframe, utc_from, utc_to)
    if rates is None or len(rates) == 0: return None
    df = pd.DataFrame(rates)
    df['time'] = pd.to_datetime(df['time'], unit='s', utc=True)
    return df

def build_features_and_labels(symbol_name, dxy_df, utc_from, utc_to):
    print(f"\n>>> Extracting Data & Labeling Targets for {symbol_name}...")
    
    symbols_found = mt5.symbols_get(f"*{symbol_name}*")
    if not symbols_found: return None
    actual_symbol = symbols_found[0].name
    
    df_m15 = fetch_data(actual_symbol, mt5.TIMEFRAME_M15, utc_from, utc_to)
    df_h4 = fetch_data(actual_symbol, mt5.TIMEFRAME_H4, utc_from, utc_to)
    if df_m15 is None or df_h4 is None: return None

    info = mt5.symbol_info(actual_symbol)
    pip_val = info.point * (10.0 if info.digits in [3, 5] else 1.0)

    # 1. H4 Context
    df_h4['h4_ema_50'] = df_h4['close'].ewm(span=EMA_PERIOD, adjust=False).mean()
    df_h4['h4_dist_to_ema'] = (df_h4['close'] - df_h4['h4_ema_50']) / pip_val
    df_h4['h4_rsi_14'] = ta.rsi(df_h4['close'], length=14)
    df_h4 = df_h4.dropna(subset=['h4_rsi_14', 'h4_dist_to_ema']).copy()
    df_h4['time_aligned'] = df_h4['time'] + pd.Timedelta(hours=4)
    h4_context = df_h4[['time_aligned', 'h4_dist_to_ema', 'h4_rsi_14']].rename(columns={'time_aligned': 'time'})

    # 2. M15 Features
    df_m15['ema_50'] = df_m15['close'].ewm(span=EMA_PERIOD, adjust=False).mean()
    df_m15['dist_to_ema'] = (df_m15['close'] - df_m15['ema_50']) / pip_val
    df_m15['hour_of_day'] = df_m15['time'].dt.hour
    df_m15['atr_14'] = ta.atr(df_m15['high'], df_m15['low'], df_m15['close'], length=14)
    df_m15['rsi_14'] = ta.rsi(df_m15['close'], length=14)
    
    # 3. Stitch Tensors (FIXED: Timezone stripping AND exact resolution matching)
    df_m15['time'] = df_m15['time'].dt.tz_localize(None).astype('datetime64[ns]')
    h4_context['time'] = h4_context['time'].dt.tz_localize(None).astype('datetime64[ns]')
    
    df_m15 = pd.merge_asof(df_m15.sort_values('time'), h4_context.sort_values('time'), on='time', direction='backward')

    if dxy_df is not None:
        dxy_df['time'] = dxy_df['time'].dt.tz_localize(None).astype('datetime64[ns]')
        dxy_subset = dxy_df[['time', 'dxy_rsi', 'dxy_ema_dist']]
        df_m15 = pd.merge_asof(df_m15, dxy_subset.sort_values('time'), on='time', direction='backward')
    else:
        df_m15['dxy_rsi'] = 50.0 
        df_m15['dxy_ema_dist'] = 0.0

    df_m15.dropna(inplace=True)

    # 4. FVG Detection
    df_m15['prev_high'] = df_m15['high'].shift(2)
    df_m15['prev_low'] = df_m15['low'].shift(2)
    
    df_m15['bull_gap'] = df_m15['low'] - df_m15['prev_high']
    df_m15['is_bull_fvg'] = (df_m15['bull_gap'] > 0) & (df_m15['close'].shift(1) > df_m15['close'].shift(2))
    df_m15['bull_fvg_size'] = np.where(df_m15['is_bull_fvg'], df_m15['bull_gap'] / pip_val, 0.0)
    
    df_m15['bear_gap'] = df_m15['prev_low'] - df_m15['high']
    df_m15['is_bear_fvg'] = (df_m15['bear_gap'] > 0) & (df_m15['close'].shift(1) < df_m15['close'].shift(2))
    df_m15['bear_fvg_size'] = np.where(df_m15['is_bear_fvg'], df_m15['bear_gap'] / pip_val, 0.0)
    
    df_m15['bull_fvg_atr_ratio'] = np.where(df_m15['is_bull_fvg'], df_m15['bull_gap'] / df_m15['atr_14'], 0.0)
    df_m15['bear_fvg_atr_ratio'] = np.where(df_m15['is_bear_fvg'], df_m15['bear_gap'] / df_m15['atr_14'], 0.0)
    
    fvg_df = df_m15[(df_m15['is_bull_fvg']) | (df_m15['is_bear_fvg'])].copy()
    
    # 5. Unified Target Labeling (Brains 1, 2, and 3)
    win_loss_labels, mfe_labels, regime_labels = [], [], []
    sl_points = SL_PIPS * pip_val
    tp_points = sl_points * RR_RATIO
    invalidation_points = INVALIDATION_PIPS * pip_val
    
    times, highs, lows = df_m15['time'].values, df_m15['high'].values, df_m15['low'].values
    
    for _, row in fvg_df.iterrows():
        future_idx = np.where(times > row['time'])[0]
        if len(future_idx) == 0: 
            win_loss_labels.append(0); mfe_labels.append(0.0); regime_labels.append(0); continue
            
        max_search = min(len(future_idx), TIMEOUT_BARS)
        f_highs, f_lows = highs[future_idx[:max_search]], lows[future_idx[:max_search]]
        
        outcome, max_excursion, max_adverse, mfe_resolved = -1, 0.0, 0.0, False
        
        if row['is_bull_fvg']:
            entry = row['low']
            sl, tp, ruin_price = entry - sl_points, entry + tp_points, entry - invalidation_points
            for h, l in zip(f_highs, f_lows):
                if not mfe_resolved:
                    if h - entry > max_excursion: max_excursion = h - entry
                    if entry - l > max_adverse: max_adverse = entry - l
                    if l <= ruin_price: mfe_resolved = True
                if outcome == -1:
                    if l <= sl: outcome = 0
                    elif h >= tp: outcome = 1
        else:
            entry = row['high']
            sl, tp, ruin_price = entry + sl_points, entry - tp_points, entry + invalidation_points
            for h, l in zip(f_highs, f_lows):
                if not mfe_resolved:
                    if entry - l > max_excursion: max_excursion = entry - l
                    if h - entry > max_adverse: max_adverse = h - entry
                    if h >= ruin_price: mfe_resolved = True
                if outcome == -1:
                    if h >= sl: outcome = 0
                    elif l <= tp: outcome = 1
                    
        if outcome == -1: outcome = 0 # Timeout is a loss
        
        win_loss_labels.append(outcome)
        mfe_labels.append(max_excursion / pip_val)
        
        # Regime: 1 (Clean Trend) if Winner AND Drawdown never exceeded 50% of SL. Else 0 (Chop).
        if outcome == 1 and max_adverse <= (sl_points * 0.5): regime_labels.append(1)
        else: regime_labels.append(0)
        
    fvg_df['target_win_loss'] = win_loss_labels
    fvg_df['target_mfe_pips'] = mfe_labels
    fvg_df['target_regime'] = regime_labels
    
    return fvg_df[['tick_volume', 'hour_of_day', 'rsi_14', 'atr_14', 'dist_to_ema', 
                   'bull_fvg_size', 'bear_fvg_size', 'bull_fvg_atr_ratio', 'bear_fvg_atr_ratio', 
                   'h4_rsi_14', 'h4_dist_to_ema', 'dxy_rsi', 'dxy_ema_dist', 
                   'target_win_loss', 'target_mfe_pips', 'target_regime']]

def save_onnx_model(model, symbol, model_type, dims):
    timestamp = datetime.now().strftime("%Y%m%d_%H%M")
    initial_type = [("float_input", FloatTensorType([None, dims]))]
    
    if model_type == "mfe_regressor_v5":
        onx = convert_sklearn(model, initial_types=initial_type, target_opset={"": 12, "ai.onnx.ml": 3})
    else:
        onx = convert_sklearn(model, initial_types=initial_type, target_opset={"": 12, "ai.onnx.ml": 3}, options={type(model): {"zipmap": False}})
        
    # Standard name (MT5 reads this by default)
    std_filename = f"fvg_{model_type}_{symbol}.onnx"
    # Backup name (Avoids MT5 lock crashes)
    ts_filename = f"fvg_{model_type}_{symbol}_{timestamp}.onnx"
    
    # For V4 backwards compatibility naming if needed based on the previous EA variables
    if model_type == "model_v4":
        std_filename = f"fvg_model_{symbol}_v4.onnx"
    elif model_type == "mfe_regressor_v5":
        std_filename = f"fvg_mfe_regressor_{symbol}_v5.onnx"
    elif model_type == "manager_v6":
        std_filename = f"fvg_manager_{symbol}_v6.onnx"
    
    with open(ts_filename, "wb") as f: f.write(onx.SerializeToString())
    try:
        with open(std_filename, "wb") as f: f.write(onx.SerializeToString())
    except PermissionError:
        print(f"  [!] MT5 locked {std_filename}. Saved backup as {ts_filename}.")

def auto_train_pipeline():
    if not mt5.initialize(): 
        print("MT5 Init Failed"); return

    utc_to = datetime.now(pytz.UTC)
    utc_from = utc_to - timedelta(days=WINDOW_DAYS) 
    print(f"Rolling Window: {utc_from.strftime('%Y-%m-%d')} to {utc_to.strftime('%Y-%m-%d')}")

    # Process DXY Once
    dxy_df = None
    dxy_found = mt5.symbols_get(f"*{DXY_SYMBOL}*")
    if dxy_found:
        dxy_raw = fetch_data(dxy_found[0].name, mt5.TIMEFRAME_M15, utc_from, utc_to)
        if dxy_raw is not None:
            dxy_raw['dxy_ema_50'] = dxy_raw['close'].ewm(span=EMA_PERIOD, adjust=False).mean()
            dxy_raw['dxy_ema_dist'] = dxy_raw['close'] - dxy_raw['dxy_ema_50']
            dxy_raw['dxy_rsi'] = ta.rsi(dxy_raw['close'], length=14)
            # FIXED: Strip timezone AND force nanosecond resolution for DXY stitching
            dxy_raw['time'] = dxy_raw['time'].dt.tz_localize(None).astype('datetime64[ns]')
            dxy_df = dxy_raw[['time', 'dxy_rsi', 'dxy_ema_dist']].dropna()

    for sym in SYMBOLS:
        df = build_features_and_labels(sym, dxy_df, utc_from, utc_to)
        if df is None or len(df) < 50: 
            print(f"Skipping {sym}, not enough data."); continue
            
        # Sample Weighting: Linearly weight recent data heavier to fight Concept Drift
        weights = np.linspace(0.1, 1.0, len(df))
        
        X_13D = df.iloc[:, :13].values
        X_9D = df.iloc[:, :9].values

        # --- TRAIN BRAIN 1: WIN/LOSS CLASSIFIER ---
        model_1 = xgb.XGBClassifier(n_estimators=100, max_depth=4, learning_rate=0.05, random_state=42)
        model_1.fit(X_13D, df['target_win_loss'].values, sample_weight=weights)
        save_onnx_model(model_1, sym, "model_v4", 13)

        # --- TRAIN BRAIN 2: MFE REGRESSOR ---
        # Only train Regressor on setups that actually moved (> 2.0 pips)
        mfe_mask = df['target_mfe_pips'] > 2.0
        if mfe_mask.sum() > 20:
            model_2 = xgb.XGBRegressor(n_estimators=100, max_depth=4, learning_rate=0.05, random_state=42)
            model_2.fit(X_9D[mfe_mask], df.loc[mfe_mask, 'target_mfe_pips'].values, sample_weight=weights[mfe_mask])
            save_onnx_model(model_2, sym, "mfe_regressor_v5", 9)

        # --- TRAIN BRAIN 3: REGIME MANAGER ---
        model_3 = xgb.XGBClassifier(n_estimators=100, max_depth=4, learning_rate=0.05, random_state=42)
        model_3.fit(X_13D, df['target_regime'].values, sample_weight=weights)
        save_onnx_model(model_3, sym, "manager_v6", 13)
        
        print(f"SUCCESS: Triple-Brain Models updated for {sym}.")

    mt5.shutdown()

if __name__ == "__main__":
    auto_train_pipeline()

Rolling Window: 2025-11-24 to 2026-02-22

>>> Extracting Data & Labeling Targets for EURUSD...


MergeError: incompatible merge keys [0] dtype('<M8[s]') and dtype('<M8[us]'), must be the same type