# IPCRidge Survival Analysis

This notebook implements survival analysis using IPCRidge (Inverse Probability of Censoring Ridge Regression) from scikit-survival.

In [2]:
import pandas as pd
from sksurv.util import Surv
from sksurv.linear_model import IPCRidge
import numpy as np
from sksurv.metrics import concordance_index_ipcw

pd.options.plotting.backend = "plotly"

df = pd.read_csv('../../data/train_enhanced.csv', sep=',')
eval = pd.read_csv('../../data/eval_enhanced.csv', sep=',')

In [17]:
df["OS_YEARS"] = df["OS_YEARS"] + 1

In [18]:
target = ["OS_STATUS", "OS_YEARS"]
X = df.drop(columns=target + ["ID"])


y = Surv.from_dataframe(*target, df[target])

In [19]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [20]:
ipc_ridge = IPCRidge()
ipc_ridge.fit(X_train, y_train)

ipc_cindex_train = concordance_index_ipcw(y_train, y_train, ipc_ridge.predict(X_train), tau=7)[0]
ipc_cindex_test = concordance_index_ipcw(y_train, y_test, ipc_ridge.predict(X_test), tau=7)[0]
print(f"IPCRidge Model Concordance Index IPCW on train: {ipc_cindex_train:.4f}")
print(f"IPCRidge Model Concordance Index IPCW on test: {ipc_cindex_test:.4f}")

IPCRidge Model Concordance Index IPCW on train: 0.3007
IPCRidge Model Concordance Index IPCW on test: 0.3054


In [21]:
from sklearn.model_selection import KFold

kf = KFold(n_splits=5, shuffle=True, random_state=42)

cv_scores_train = []
cv_scores_test = []

for fold, (train_idx, test_idx) in enumerate(kf.split(X), 1):
    X_train_fold = X.iloc[train_idx]
    X_test_fold = X.iloc[test_idx]
    y_train_fold = y[train_idx]
    y_test_fold = y[test_idx]

    X_train_fold = pd.DataFrame(
        X_train_fold,
        columns=X_train_fold.columns,
        index=X_train_fold.index
    )
    X_test_fold = pd.DataFrame(
        X_test_fold,
        columns=X_test_fold.columns,
        index=X_test_fold.index
    )

    ipc_fold = IPCRidge()
    ipc_fold.fit(X_train_fold, y_train_fold)

    train_score = concordance_index_ipcw(y, y_train_fold, ipc_fold.predict(X_train_fold), tau=7)[0]
    test_score = concordance_index_ipcw(y, y_test_fold, ipc_fold.predict(X_test_fold), tau=7)[0]

    cv_scores_train.append(train_score)
    cv_scores_test.append(test_score)

    print(f"Fold {fold} - Train C-Index IPCW: {train_score:.4f}, Test C-Index IPCW: {test_score:.4f}")

print(f"\nAverage Train C-Index IPCW: {np.mean(cv_scores_train):.4f} (+/- {np.std(cv_scores_train):.4f})")
print(f"Average Test C-Index IPCW: {np.mean(cv_scores_test):.4f} (+/- {np.std(cv_scores_test):.4f})")

Fold 1 - Train C-Index IPCW: 0.2999, Test C-Index IPCW: 0.3104
Fold 2 - Train C-Index IPCW: 0.2897, Test C-Index IPCW: 0.2969
Fold 3 - Train C-Index IPCW: 0.3034, Test C-Index IPCW: 0.2906
Fold 4 - Train C-Index IPCW: 0.3078, Test C-Index IPCW: 0.3115
Fold 5 - Train C-Index IPCW: 0.3011, Test C-Index IPCW: 0.3129

Average Train C-Index IPCW: 0.3004 (+/- 0.0060)
Average Test C-Index IPCW: 0.3045 (+/- 0.0090)


In [22]:
import optuna
from optuna.visualization import plot_optimization_history, plot_param_importances

In [23]:
def objective(trial):
    params = {
        'alpha': trial.suggest_float('alpha', 1e-6, 100.0, log=True),
        'max_iter': trial.suggest_int('max_iter', 100, 10000),
        'tol': trial.suggest_float('tol', 1e-8, 1e-3, log=True)
    }

    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    cv_scores = []

    for train_idx, test_idx in kf.split(X):
        X_train_fold = X.iloc[train_idx]
        X_test_fold = X.iloc[test_idx]
        y_train_fold = y[train_idx]
        y_test_fold = y[test_idx]

        ipc_trial = IPCRidge(**params)
        ipc_trial.fit(X_train_fold, y_train_fold)

        test_score = concordance_index_ipcw(y, y_test_fold, ipc_trial.predict(X_test_fold), tau=7)[0]
        cv_scores.append(test_score)

    return np.mean(cv_scores)

In [24]:
study = optuna.create_study(direction='maximize', study_name='IPCRidge_tuning')
study.optimize(objective, n_trials=250, show_progress_bar=True)

[I 2025-10-01 18:11:07,315] A new study created in memory with name: IPCRidge_tuning


  0%|          | 0/250 [00:00<?, ?it/s]

[I 2025-10-01 18:11:07,402] Trial 0 finished with value: 0.304571475450807 and parameters: {'alpha': 0.6249935620792026, 'max_iter': 9235, 'tol': 0.0008597000697303609}. Best is trial 0 with value: 0.304571475450807.
[I 2025-10-01 18:11:07,475] Trial 1 finished with value: 0.3046028572749693 and parameters: {'alpha': 0.3116190881891032, 'max_iter': 5868, 'tol': 0.0005771158384501648}. Best is trial 1 with value: 0.3046028572749693.
[I 2025-10-01 18:11:07,558] Trial 2 finished with value: 0.3046722563125015 and parameters: {'alpha': 0.0006420107925208614, 'max_iter': 9535, 'tol': 1.1940022587085257e-07}. Best is trial 2 with value: 0.3046722563125015.
[I 2025-10-01 18:11:07,639] Trial 3 finished with value: 0.30464225479419776 and parameters: {'alpha': 0.20665853850255075, 'max_iter': 930, 'tol': 1.820662744579564e-08}. Best is trial 2 with value: 0.3046722563125015.
[I 2025-10-01 18:11:07,713] Trial 4 finished with value: 0.3046723382785529 and parameters: {'alpha': 2.025774009714881e-

In [25]:
print(f"Best trial value (C-Index IPCW): {study.best_trial.value:.4f}")
print("\nBest hyperparameters:")
for key, value in study.best_params.items():
    print(f"  {key}: {value}")

Best trial value (C-Index IPCW): 0.3047

Best hyperparameters:
  alpha: 0.004774520567354753
  max_iter: 8685
  tol: 5.39736902216079e-06


In [26]:
fig = plot_optimization_history(study)
fig.show()

In [27]:
fig = plot_param_importances(study)
fig.show()

## Rerun Cross-Validation with Best Hyperparameters

In [28]:
kf_best = KFold(n_splits=5, shuffle=True, random_state=42)

cv_scores_train_best = []
cv_scores_test_best = []

print(f"Running CV with best parameters: {study.best_params}\n")

for fold, (train_idx, test_idx) in enumerate(kf_best.split(X), 1):
    X_train_fold = X.iloc[train_idx]
    X_test_fold = X.iloc[test_idx]
    y_train_fold = y[train_idx]
    y_test_fold = y[test_idx]

    X_train_fold = pd.DataFrame(
        X_train_fold,
        columns=X_train_fold.columns,
        index=X_train_fold.index
    )
    X_test_fold = pd.DataFrame(
        X_test_fold,
        columns=X_test_fold.columns,
        index=X_test_fold.index
    )

    ipc_best = IPCRidge(**study.best_params)
    ipc_best.fit(X_train_fold, y_train_fold)

    train_score = concordance_index_ipcw(y, y_train_fold, ipc_best.predict(X_train_fold), tau=7)[0]
    test_score = concordance_index_ipcw(y, y_test_fold, ipc_best.predict(X_test_fold), tau=7)[0]

    cv_scores_train_best.append(train_score)
    cv_scores_test_best.append(test_score)

    print(f"Fold {fold} - Train C-Index IPCW: {train_score:.4f}, Test C-Index IPCW: {test_score:.4f}")

print(f"\nAverage Train C-Index IPCW: {np.mean(cv_scores_train_best):.4f} (+/- {np.std(cv_scores_train_best):.4f})")
print(f"Average Test C-Index IPCW: {np.mean(cv_scores_test_best):.4f} (+/- {np.std(cv_scores_test_best):.4f})")

Running CV with best parameters: {'alpha': 0.004774520567354753, 'max_iter': 8685, 'tol': 5.39736902216079e-06}

Fold 1 - Train C-Index IPCW: 0.3000, Test C-Index IPCW: 0.3106
Fold 2 - Train C-Index IPCW: 0.2898, Test C-Index IPCW: 0.2973
Fold 3 - Train C-Index IPCW: 0.3036, Test C-Index IPCW: 0.2908
Fold 4 - Train C-Index IPCW: 0.3079, Test C-Index IPCW: 0.3117
Fold 5 - Train C-Index IPCW: 0.3013, Test C-Index IPCW: 0.3130

Average Train C-Index IPCW: 0.3005 (+/- 0.0060)
Average Test C-Index IPCW: 0.3047 (+/- 0.0089)


In [None]:
cv_results_comparison = pd.DataFrame({
    'Baseline Train': cv_scores_train,
    'Baseline Test': cv_scores_test,
    'Tuned Train': cv_scores_train_best,
    'Tuned Test': cv_scores_test_best
}, index=[f'Fold {i + 1}' for i in range(5)])

cv_results_comparison.loc['Mean'] = cv_results_comparison.mean()
cv_results_comparison.loc['Std'] = cv_results_comparison.std()

cv_results_comparison

In [None]:
ipc_final = IPCRidge(**study.best_params)
ipc_final.fit(X, y)
prediction = ipc_final.predict(eval.drop(columns=["ID"]))
submission = pd.Series(prediction, index=eval['ID'], name='risk_score')
submission.to_csv('../../submissions/ipcridge_enhanced_tuned.csv')

In [None]:
print(f"Tuned model predictions saved to: ipcridge_enhanced_tuned.csv")


In [None]:
print(f"Tuned model predictions saved to: coxph_enhanced_tuned.csv")
