# Gold Meta-Model Training - Attempt 3

**Architecture**: 2-stage ensemble with confidence-based re-ranking

**Key improvements from Attempt 2**:
- 5-model ensemble with different random seeds [42, 137, 256, 389, 512]
- Confidence-based re-ranking for HCDA (magnitude_rank + agreement_fraction)
- Optuna objective reweighting: Sharpe 35%, DA 25%, MAE 15%, HCDA 25%
- Relaxed regularization ranges

**Data**: Reuse meta_model_attempt_2 datasets from bigbigzabuton/gold-prediction-complete

**Target**: DA >56%, HCDA >60%, Sharpe >0.8, MAE <0.75%

In [None]:
# ============================================================
# 1. IMPORTS
# ============================================================
import pandas as pd
import numpy as np
import xgboost as xgb
import optuna
import json
from datetime import datetime
from sklearn.metrics import mean_absolute_error
import warnings
warnings.filterwarnings('ignore')

# Set random seeds for base reproducibility
np.random.seed(42)

print(f"Training started: {datetime.now().isoformat()}")
print(f"XGBoost version: {xgb.__version__}")
print(f"Optuna version: {optuna.__version__}")

In [None]:
# ============================================================
# 2. DATA LOADING
# ============================================================
# Reuse Attempt 2 datasets from Kaggle dataset
train_df = pd.read_csv('../input/gold-prediction-complete/meta_model_attempt_2_train.csv', index_col=0, parse_dates=True)
val_df = pd.read_csv('../input/gold-prediction-complete/meta_model_attempt_2_val.csv', index_col=0, parse_dates=True)
test_df = pd.read_csv('../input/gold-prediction-complete/meta_model_attempt_2_test.csv', index_col=0, parse_dates=True)

print(f"Data loaded successfully")
print(f"Train: {train_df.shape}, Val: {val_df.shape}, Test: {test_df.shape}")
print(f"Date ranges:")
print(f"  Train: {train_df.index.min()} to {train_df.index.max()}")
print(f"  Val: {val_df.index.min()} to {val_df.index.max()}")
print(f"  Test: {test_df.index.min()} to {test_df.index.max()}")

# Prepare X, y
target_col = 'gold_return_next'
feature_cols = [c for c in train_df.columns if c != target_col]

X_train = train_df[feature_cols]
y_train = train_df[target_col]
X_val = val_df[feature_cols]
y_val = val_df[target_col]
X_test = test_df[feature_cols]
y_test = test_df[target_col]

print(f"\nFeatures ({len(feature_cols)}): {feature_cols[:5]}...")
print(f"Target: {target_col}")
print(f"\nTarget statistics:")
print(f"  Train: mean={y_train.mean():.4f}, std={y_train.std():.4f}")
print(f"  Val: mean={y_val.mean():.4f}, std={y_val.std():.4f}")
print(f"  Test: mean={y_test.mean():.4f}, std={y_test.std():.4f}")

In [None]:
# ============================================================
# 3. EVALUATION METRICS
# ============================================================
def direction_accuracy(y_true, y_pred):
    """Direction accuracy, excluding exact zeros"""
    mask = y_true != 0
    if mask.sum() == 0:
        return 0.5
    return (np.sign(y_true[mask]) == np.sign(y_pred[mask])).mean()

def sharpe_ratio(y_true, y_pred, transaction_cost_bps=5.0):
    """Sharpe ratio with transaction costs"""
    positions = np.sign(y_pred)
    strategy_returns = positions * y_true
    
    # Transaction costs (one-way)
    trades = np.abs(np.diff(positions, prepend=0)) > 0
    costs = trades * (transaction_cost_bps / 10000.0)
    net_returns = strategy_returns - costs
    
    if net_returns.std() == 0:
        return 0.0
    return net_returns.mean() / net_returns.std() * np.sqrt(252)

def high_confidence_direction_accuracy(y_true, y_pred, top_pct=0.2):
    """Standard HCDA: top 20% by absolute prediction magnitude"""
    abs_pred = np.abs(y_pred)
    threshold = np.percentile(abs_pred, (1 - top_pct) * 100)
    mask = (abs_pred >= threshold) & (y_true != 0)
    
    if mask.sum() == 0:
        return 0.5
    return (np.sign(y_true[mask]) == np.sign(y_pred[mask])).mean()

def reranked_hcda(y_true, y_pred_ensemble, alpha=0.5, top_pct=0.2):
    """
    Confidence-based re-ranking HCDA
    
    Args:
        y_true: Ground truth
        y_pred_ensemble: (n_samples, n_models) predictions from ensemble
        alpha: Weight for magnitude_rank (1-alpha for agreement_fraction)
        top_pct: Top percentage to use for HCDA
    """
    # Ensemble mean prediction
    y_pred_mean = y_pred_ensemble.mean(axis=1)
    
    # 1. Magnitude rank (normalized to [0, 1])
    abs_pred = np.abs(y_pred_mean)
    magnitude_rank = abs_pred / (abs_pred.max() + 1e-10)
    
    # 2. Agreement fraction
    pred_signs = np.sign(y_pred_ensemble)
    mean_sign = np.sign(y_pred_mean)
    agreement = (pred_signs == mean_sign[:, None]).mean(axis=1)
    
    # 3. Combined confidence score
    confidence_score = alpha * magnitude_rank + (1 - alpha) * agreement
    
    # 4. Select top 20% by confidence
    threshold = np.percentile(confidence_score, (1 - top_pct) * 100)
    mask = (confidence_score >= threshold) & (y_true != 0)
    
    if mask.sum() == 0:
        return 0.5
    
    return (np.sign(y_true[mask]) == np.sign(y_pred_mean[mask])).mean()

def compute_all_metrics(y_true, y_pred, y_pred_ensemble=None, alpha_confidence=0.5):
    """Compute all metrics including both standard and reranked HCDA"""
    metrics = {
        'mae': mean_absolute_error(y_true, y_pred),
        'da': direction_accuracy(y_true, y_pred),
        'sharpe': sharpe_ratio(y_true, y_pred),
        'hcda': high_confidence_direction_accuracy(y_true, y_pred),
    }
    
    if y_pred_ensemble is not None:
        metrics['hcda_reranked'] = reranked_hcda(y_true, y_pred_ensemble, alpha=alpha_confidence)
    
    return metrics

print("Evaluation metrics defined")
print("  - MAE (Mean Absolute Error)")
print("  - DA (Direction Accuracy, excluding zeros)")
print("  - Sharpe Ratio (with 5bps transaction costs)")
print("  - HCDA (High-Confidence DA, top 20% by magnitude)")
print("  - HCDA Reranked (top 20% by confidence score)")

In [None]:
# ============================================================
# 4. OPTUNA OBJECTIVE
# ============================================================
ENSEMBLE_SEEDS = [42, 137, 256, 389, 512]

def objective(trial):
    """
    Optuna objective: Train 5-model ensemble with same HP, different seeds
    Objective = 0.35*Sharpe + 0.25*DA + 0.15*(1-MAE/2) + 0.25*HCDA_reranked - overfitting_penalty
    """
    # Hyperparameters (relaxed regularization ranges)
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 50, 300, step=50),
        'max_depth': trial.suggest_int('max_depth', 2, 5),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.15, log=True),
        'min_child_weight': trial.suggest_int('min_child_weight', 3, 15),
        'subsample': trial.suggest_float('subsample', 0.6, 0.95),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 0.95),
        'reg_lambda': trial.suggest_float('reg_lambda', 2.0, 20.0, log=True),
        'reg_alpha': trial.suggest_float('reg_alpha', 0.5, 10.0, log=True),
        'gamma': trial.suggest_float('gamma', 0.0, 0.5),
        'objective': 'reg:squarederror',
        'tree_method': 'hist',
        'eval_metric': 'mae',
    }
    
    # Alpha for confidence score
    alpha_confidence = trial.suggest_float('alpha_confidence', 0.2, 0.8)
    
    # Train 5 models with different seeds
    models = []
    train_preds_list = []
    val_preds_list = []
    
    for seed in ENSEMBLE_SEEDS:
        params['random_state'] = seed
        model = xgb.XGBRegressor(**params)
        model.fit(
            X_train, y_train,
            eval_set=[(X_val, y_val)],
            verbose=False
        )
        models.append(model)
        train_preds_list.append(model.predict(X_train))
        val_preds_list.append(model.predict(X_val))
    
    # Ensemble predictions (n_samples, n_models)
    train_preds_ensemble = np.column_stack(train_preds_list)
    val_preds_ensemble = np.column_stack(val_preds_list)
    
    # Mean predictions
    train_pred_mean = train_preds_ensemble.mean(axis=1)
    val_pred_mean = val_preds_ensemble.mean(axis=1)
    
    # Train metrics
    train_metrics = compute_all_metrics(y_train, train_pred_mean, train_preds_ensemble, alpha_confidence)
    
    # Validation metrics
    val_metrics = compute_all_metrics(y_val, val_pred_mean, val_preds_ensemble, alpha_confidence)
    
    # Weighted objective with reranked HCDA
    mae_norm = 1 - val_metrics['mae'] / 2.0  # Normalize MAE (assume max ~2%)
    
    objective_value = (
        0.35 * val_metrics['sharpe'] +
        0.25 * val_metrics['da'] +
        0.15 * mae_norm +
        0.25 * val_metrics['hcda_reranked']
    )
    
    # Overfitting penalty (same as Attempt 2)
    overfit_penalty = 0
    if train_metrics['da'] - val_metrics['da'] > 0.10:
        overfit_penalty += 0.1
    if train_metrics['sharpe'] - val_metrics['sharpe'] > 0.3:
        overfit_penalty += 0.1
    if val_metrics['mae'] > train_metrics['mae'] * 1.3:
        overfit_penalty += 0.05
    
    final_objective = objective_value - overfit_penalty
    
    # Store metrics for logging
    trial.set_user_attr('train_mae', train_metrics['mae'])
    trial.set_user_attr('val_mae', val_metrics['mae'])
    trial.set_user_attr('train_da', train_metrics['da'])
    trial.set_user_attr('val_da', val_metrics['da'])
    trial.set_user_attr('train_sharpe', train_metrics['sharpe'])
    trial.set_user_attr('val_sharpe', val_metrics['sharpe'])
    trial.set_user_attr('val_hcda', val_metrics['hcda'])
    trial.set_user_attr('val_hcda_reranked', val_metrics['hcda_reranked'])
    trial.set_user_attr('overfit_penalty', overfit_penalty)
    
    return final_objective

print("Optuna objective defined")
print("  - 5-model ensemble with seeds: [42, 137, 256, 389, 512]")
print("  - Objective: 0.35*Sharpe + 0.25*DA + 0.15*(1-MAE/2) + 0.25*HCDA_reranked - penalty")
print("  - Overfitting penalties: DA gap >0.10, Sharpe gap >0.3, MAE ratio >1.3")

In [None]:
# ============================================================
# 5. HYPERPARAMETER OPTIMIZATION
# ============================================================
print("Starting Optuna hyperparameter optimization...")
print(f"  - n_trials: 80")
print(f"  - timeout: 3600 seconds (1 hour)")
print(f"  - Pruning: MedianPruner with 5 warmup steps\n")

study = optuna.create_study(
    direction='maximize',
    pruner=optuna.pruners.MedianPruner(n_warmup_steps=5)
)

study.optimize(objective, n_trials=80, timeout=3600, show_progress_bar=True)

print(f"\nOptimization complete!")
print(f"  - Total trials: {len(study.trials)}")
print(f"  - Best objective value: {study.best_value:.4f}")
print(f"\nBest hyperparameters:")
for key, value in study.best_params.items():
    print(f"  {key}: {value}")

# Best trial metrics
best_trial = study.best_trial
print(f"\nBest trial validation metrics:")
print(f"  MAE: {best_trial.user_attrs['val_mae']:.4f}")
print(f"  DA: {best_trial.user_attrs['val_da']:.4f}")
print(f"  Sharpe: {best_trial.user_attrs['val_sharpe']:.4f}")
print(f"  HCDA (standard): {best_trial.user_attrs['val_hcda']:.4f}")
print(f"  HCDA (reranked): {best_trial.user_attrs['val_hcda_reranked']:.4f}")
print(f"  Overfit penalty: {best_trial.user_attrs['overfit_penalty']:.4f}")

best_params = study.best_params.copy()
alpha_confidence_final = best_params.pop('alpha_confidence')

In [None]:
# ============================================================
# 6. TRAIN FINAL ENSEMBLE
# ============================================================
print("\nTraining final 5-model ensemble with best hyperparameters...")

final_models = []
final_params = {
    **best_params,
    'objective': 'reg:squarederror',
    'tree_method': 'hist',
    'eval_metric': 'mae',
}

for i, seed in enumerate(ENSEMBLE_SEEDS, 1):
    print(f"  Training model {i}/5 (seed={seed})...")
    final_params['random_state'] = seed
    model = xgb.XGBRegressor(**final_params)
    model.fit(
        X_train, y_train,
        eval_set=[(X_val, y_val)],
        verbose=False
    )
    final_models.append(model)
    print(f"    Best iteration: {model.best_iteration}")

print(f"\nFinal ensemble trained successfully!")

In [None]:
# ============================================================
# 7. GENERATE ENSEMBLE PREDICTIONS
# ============================================================
print("Generating ensemble predictions...")

def get_ensemble_predictions(models, X):
    """Get predictions from all models in ensemble"""
    preds = [model.predict(X) for model in models]
    return np.column_stack(preds)

# Get predictions on all splits
train_preds_ensemble = get_ensemble_predictions(final_models, X_train)
val_preds_ensemble = get_ensemble_predictions(final_models, X_val)
test_preds_ensemble = get_ensemble_predictions(final_models, X_test)

# Mean predictions
train_pred = train_preds_ensemble.mean(axis=1)
val_pred = val_preds_ensemble.mean(axis=1)
test_pred = test_preds_ensemble.mean(axis=1)

print(f"  Train predictions: shape {train_preds_ensemble.shape}")
print(f"  Val predictions: shape {val_preds_ensemble.shape}")
print(f"  Test predictions: shape {test_preds_ensemble.shape}")

# Save predictions
pd.DataFrame({
    'actual': y_train,
    'predicted': train_pred,
}).to_csv('train_predictions.csv')

pd.DataFrame({
    'actual': y_val,
    'predicted': val_pred,
}).to_csv('val_predictions.csv')

pd.DataFrame({
    'actual': y_test,
    'predicted': test_pred,
}).to_csv('test_predictions.csv')

print("\nPrediction CSVs saved")

In [None]:
# ============================================================
# 8. EVALUATE ON ALL SPLITS
# ============================================================
print("\n" + "="*60)
print("FINAL EVALUATION RESULTS")
print("="*60)

train_metrics_final = compute_all_metrics(y_train, train_pred, train_preds_ensemble, alpha_confidence_final)
val_metrics_final = compute_all_metrics(y_val, val_pred, val_preds_ensemble, alpha_confidence_final)
test_metrics_final = compute_all_metrics(y_test, test_pred, test_preds_ensemble, alpha_confidence_final)

print("\nTRAIN SET:")
print(f"  MAE: {train_metrics_final['mae']:.4f}")
print(f"  Direction Accuracy: {train_metrics_final['da']:.4f}")
print(f"  Sharpe Ratio: {train_metrics_final['sharpe']:.4f}")
print(f"  HCDA (standard): {train_metrics_final['hcda']:.4f}")
print(f"  HCDA (reranked, alpha={alpha_confidence_final:.2f}): {train_metrics_final['hcda_reranked']:.4f}")

print("\nVALIDATION SET:")
print(f"  MAE: {val_metrics_final['mae']:.4f}")
print(f"  Direction Accuracy: {val_metrics_final['da']:.4f}")
print(f"  Sharpe Ratio: {val_metrics_final['sharpe']:.4f}")
print(f"  HCDA (standard): {val_metrics_final['hcda']:.4f}")
print(f"  HCDA (reranked, alpha={alpha_confidence_final:.2f}): {val_metrics_final['hcda_reranked']:.4f}")

print("\nTEST SET (HELD-OUT):")
print(f"  MAE: {test_metrics_final['mae']:.4f}")
print(f"  Direction Accuracy: {test_metrics_final['da']:.4f}")
print(f"  Sharpe Ratio: {test_metrics_final['sharpe']:.4f}")
print(f"  HCDA (standard): {test_metrics_final['hcda']:.4f}")
print(f"  HCDA (reranked, alpha={alpha_confidence_final:.2f}): {test_metrics_final['hcda_reranked']:.4f}")

print("\n" + "="*60)
print("TARGET COMPARISON (Test Set):")
print("="*60)
targets = {
    'Direction Accuracy': (test_metrics_final['da'], 0.56, '✓' if test_metrics_final['da'] > 0.56 else '✗'),
    'HCDA (reranked)': (test_metrics_final['hcda_reranked'], 0.60, '✓' if test_metrics_final['hcda_reranked'] > 0.60 else '✗'),
    'MAE': (test_metrics_final['mae'], 0.75, '✓' if test_metrics_final['mae'] < 0.75 else '✗'),
    'Sharpe Ratio': (test_metrics_final['sharpe'], 0.80, '✓' if test_metrics_final['sharpe'] > 0.80 else '✗'),
}

for metric_name, (value, target, status) in targets.items():
    if metric_name == 'MAE':
        print(f"{status} {metric_name}: {value:.4f} (target: <{target})")
    else:
        print(f"{status} {metric_name}: {value:.4f} (target: >{target})")

# Overall pass/fail
all_passed = all(status == '✓' for _, _, status in targets.values())
print(f"\nOVERALL: {'PASS' if all_passed else 'FAIL'}")

In [None]:
# ============================================================
# 9. FEATURE IMPORTANCE (from first model)
# ============================================================
print("\nFeature Importance (from model with seed=42):")

feature_importance = pd.DataFrame({
    'feature': feature_cols,
    'importance': final_models[0].feature_importances_
}).sort_values('importance', ascending=False)

print(feature_importance.to_string(index=False))

feature_importance.to_csv('feature_importance.csv', index=False)
print("\nFeature importance saved to feature_importance.csv")

In [None]:
# ============================================================
# 10. SAVE RESULTS
# ============================================================
print("\nSaving final results...")

# Save all 5 models
for i, model in enumerate(final_models):
    model.save_model(f'model_seed_{ENSEMBLE_SEEDS[i]}.json')
    print(f"  Saved model_seed_{ENSEMBLE_SEEDS[i]}.json")

# Training result summary
result = {
    "feature": "meta_model",
    "attempt": 3,
    "timestamp": datetime.now().isoformat(),
    "architecture": "5-model XGBoost ensemble with confidence-based re-ranking",
    "ensemble_seeds": ENSEMBLE_SEEDS,
    "alpha_confidence": alpha_confidence_final,
    "best_params": best_params,
    "optuna": {
        "n_trials_completed": len(study.trials),
        "best_objective_value": study.best_value,
        "optimization_time": sum(t.duration.total_seconds() for t in study.trials if t.duration),
    },
    "metrics": {
        "train": {
            "mae": train_metrics_final['mae'],
            "direction_accuracy": train_metrics_final['da'],
            "sharpe_ratio": train_metrics_final['sharpe'],
            "hcda_standard": train_metrics_final['hcda'],
            "hcda_reranked": train_metrics_final['hcda_reranked'],
        },
        "val": {
            "mae": val_metrics_final['mae'],
            "direction_accuracy": val_metrics_final['da'],
            "sharpe_ratio": val_metrics_final['sharpe'],
            "hcda_standard": val_metrics_final['hcda'],
            "hcda_reranked": val_metrics_final['hcda_reranked'],
        },
        "test": {
            "mae": test_metrics_final['mae'],
            "direction_accuracy": test_metrics_final['da'],
            "sharpe_ratio": test_metrics_final['sharpe'],
            "hcda_standard": test_metrics_final['hcda'],
            "hcda_reranked": test_metrics_final['hcda_reranked'],
        },
    },
    "targets_met": {
        "direction_accuracy": test_metrics_final['da'] > 0.56,
        "hcda_reranked": test_metrics_final['hcda_reranked'] > 0.60,
        "mae": test_metrics_final['mae'] < 0.75,
        "sharpe_ratio": test_metrics_final['sharpe'] > 0.80,
        "all_targets_met": all_passed,
    },
    "data_info": {
        "train_samples": len(X_train),
        "val_samples": len(X_val),
        "test_samples": len(X_test),
        "n_features": len(feature_cols),
        "feature_cols": feature_cols,
    },
}

with open('training_result.json', 'w') as f:
    json.dump(result, f, indent=2, default=str)

print("\nResults saved:")
print("  - training_result.json (complete metrics and metadata)")
print("  - model_seed_{42,137,256,389,512}.json (5 trained models)")
print("  - train_predictions.csv, val_predictions.csv, test_predictions.csv")
print("  - feature_importance.csv")

print(f"\n{'='*60}")
print("TRAINING COMPLETE!")
print(f"{'='*60}")
print(f"Finished: {datetime.now().isoformat()}")