# Meta-Ensemble: MTLR Ensemble + XGBoost AFT Ensemble

This notebook combines two survival models:
- an **MTLR ensemble**
- an **XGBoost AFT ensemble**

Both ensembles are assumed to produce risk scores on the **same validation or test set**.

We build a **meta-ensemble** by averaging the ranks of their risk scores (rank-based ensembling).

In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sksurv.util import Surv
from sksurv.metrics import concordance_index_ipcw

plt.style.use('default')

MTLR_PRED_PATH = "../../submissions/submission_mtlr_ensemble_nestedcv.csv"  # adapt if needed
XGB_PRED_PATH = "../../submissions/submission_xgb_aft_ensemble_nestedcv.csv"  # adapt if needed

VAL_DATA_PATH = "../../data/eval_enhanced.csv"  # adapt or set to None if not available
TIME_COL = "OS_YEARS"
EVENT_COL = "OS_STATUS"
TAU_CINDEX = 7.0


In [5]:
# === LOAD PREDICTIONS ===
mtlr_pred = pd.read_csv(MTLR_PRED_PATH)
xgb_pred = pd.read_csv(XGB_PRED_PATH)

print("MTLR predictions:")
print(mtlr_pred.head())
print("\nXGB AFT predictions:")
print(xgb_pred.head())

# Sanity check on columns
assert "ID" in mtlr_pred.columns and "risk_score" in mtlr_pred.columns, "MTLR file must contain 'ID' and 'risk_score' columns."
assert "ID" in xgb_pred.columns and "risk_score" in xgb_pred.columns, "XGB file must contain 'ID' and 'risk_score' columns."


MTLR predictions:
     ID  risk_score
0  KYW1    0.891348
1  KYW2    0.757857
2  KYW3    0.496452
3  KYW4    0.728625
4  KYW5    0.781176

XGB AFT predictions:
     ID  risk_score
0  KYW1    0.559738
1  KYW2    0.785246
2  KYW3    0.490842
3  KYW4    0.529659
4  KYW5    0.683751


In [6]:
# === MERGE PREDICTIONS ON ID ===
merged = (
    mtlr_pred.rename(columns={"risk_score": "risk_mtlr"})
    .merge(xgb_pred.rename(columns={"risk_score": "risk_xgb"}), on="ID", how="inner")
)

print(f"Merged shape: {merged.shape}")
print(merged.head())


Merged shape: (1193, 3)
     ID  risk_mtlr  risk_xgb
0  KYW1   0.891348  0.559738
1  KYW2   0.757857  0.785246
2  KYW3   0.496452  0.490842
3  KYW4   0.728625  0.529659
4  KYW5   0.781176  0.683751


In [7]:
risk_cols = ["risk_mtlr", "risk_xgb"]

# Rank each column (1 = lowest risk, N = highest risk)
rank_df = merged[risk_cols].rank(method="average")

# Average the ranks across models
merged["risk_ensemble_rank"] = rank_df.mean(axis=1)

# Normalization 0â€“1
merged["risk_ensemble_rank_norm"] = (
    (merged["risk_ensemble_rank"] - merged["risk_ensemble_rank"].min())
    / (merged["risk_ensemble_rank"].max() - merged["risk_ensemble_rank"].min() + 1e-8)
)

print(merged.head())


     ID  risk_mtlr  risk_xgb  risk_ensemble_rank  risk_ensemble_rank_norm
0  KYW1   0.891348  0.559738              877.75                 0.737209
1  KYW2   0.757857  0.785246              934.50                 0.785201
2  KYW3   0.496452  0.490842              587.25                 0.491543
3  KYW4   0.728625  0.529659              760.50                 0.638055
4  KYW5   0.781176  0.683751              894.25                 0.751163


In [9]:
# === SAVE FINAL META-ENSEMBLE PREDICTIONS ===

final_submission = merged[["ID", "risk_ensemble_rank_norm"]].rename(
    columns={"risk_ensemble_rank_norm": "risk_score"}
)

output_path = "../../submissions/submission_meta_ensemble_rank_avg.csv"
final_submission.to_csv(output_path, index=False)
print(f"Meta-ensemble predictions saved to: {output_path}")
print(final_submission.head())


Meta-ensemble predictions saved to: ../../submissions/submission_meta_ensemble_rank_avg.csv
     ID  risk_score
0  KYW1    0.737209
1  KYW2    0.785201
2  KYW3    0.491543
3  KYW4    0.638055
4  KYW5    0.751163
