In [2]:
import pandas as pd
import numpy as np
from scipy.stats import pearsonr
from scipy.optimize import minimize

# --- 1. Configuration ---
class Config:
    INPUT_Y_PATH = '/kaggle/input/feature-engineering/y_engineered.parquet'
    INPUT_OOF_LGBM_PATH = '/kaggle/input/02-train-lgbm/oof_lgbm.npy'
    INPUT_OOF_MLP_PATH = '/kaggle/input/03-train-mlp-ipynb/oof_mlp.npy'
    INPUT_OOF_FTT_PATH = '/kaggle/input/04-train-ftt-ipynb/oof_ftt.npy'

# --- Main Execution ---
if __name__ == '__main__':
    print("Loading target data and all OOF predictions...")
    y = pd.read_parquet(Config.INPUT_Y_PATH)['label']
    oof_lgbm = np.load(Config.INPUT_OOF_LGBM_PATH)
    oof_mlp = np.load(Config.INPUT_OOF_MLP_PATH)
    oof_ftt = np.load(Config.INPUT_OOF_FTT_PATH)

    # --- Ensembling ---
    print("\n--- Starting Phase 4: Ensembling ---")
    
    def objective(weights):
        w1, w2 = weights
        w3 = 1 - w1 - w2
        if w3 < 0: return 1e6 # Constraint violation penalty
        final_oof = w1 * oof_lgbm + w2 * oof_mlp + w3 * oof_ftt
        return -pearsonr(y, final_oof)[0]

    initial_weights = [0.33, 0.33]
    bounds = [(0, 1), (0, 1)]
    result = minimize(objective, initial_weights, bounds=bounds, method='SLSQP')
    best_weights = np.append(result.x, 1 - np.sum(result.x))
    
    # --- Display Results ---
    lgbm_score = pearsonr(y, oof_lgbm)[0]
    mlp_score = pearsonr(y, oof_mlp)[0]
    ftt_score = pearsonr(y, oof_ftt)[0]
    
    print(f"\nBest OOF Scores:")
    print(f"LGBM: {lgbm_score:.5f}")
    print(f"MLP:  {mlp_score:.5f}")
    print(f"FTT:  {ftt_score:.5f}")
    
    print(f"\nOptimal Ensemble Weights:")
    print(f"LGBM Weight: {best_weights[0]:.4f}")
    print(f"MLP Weight:  {best_weights[1]:.4f}")
    print(f"FTT Weight:  {best_weights[2]:.4f}")

    final_ensemble_score = -result.fun
    print(f"\nFinal Ensemble OOF Pearson Correlation: {final_ensemble_score:.5f}")

Loading target data and all OOF predictions...

--- Starting Phase 4: Ensembling ---

Best OOF Scores:
LGBM: 0.67383
MLP:  0.62708
FTT:  0.02688

Optimal Ensemble Weights:
LGBM Weight: 0.6625
MLP Weight:  0.3375
FTT Weight:  0.0000

Final Ensemble OOF Pearson Correlation: 0.68095


In [None]:
df = 