# FPBoost: Fully Parametric Gradient Boosting for Survival Analysis

Source code of the paper "FPBoost: Fully Parametric Gradient Boosting for Survival Analysis" for AAAI 2025.

## Imports

In [1]:
from typing import Optional
from abc import ABC, abstractmethod

import os
import json

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

from sklearn.model_selection import StratifiedKFold
from sklearn.compose import make_column_selector
from sklearndf.pipeline import PipelineDF
from sklearndf.transformation import (
    ColumnTransformerDF,
    OneHotEncoderDF,
    SimpleImputerDF,
    StandardScalerDF,
)
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split

from sksurv.metrics import integrated_brier_score
from sksurv.linear_model import CoxPHSurvivalAnalysis
from sksurv.ensemble import RandomSurvivalForest as RSF, GradientBoostingSurvivalAnalysis

try:
    from auton_survival.models.dsm import DeepSurvivalMachines as DSM
except ImportError:
    os.system("git clone https://github.com/autonlab/auton-survival.git")
    os.system("mv auton-survival/auton_survival .")
    os.system("rm -r auton-survival")
    from auton_survival.models.dsm import DeepSurvivalMachines as DSM

import numba
import torch
from torch import Tensor
from torch.autograd import Variable
import torch.nn.functional as F

import torchtuples as tt
from pycox.models import DeepHitSingle, CoxPH

import ray
from ray import tune
from ray.tune.search.optuna import OptunaSearch
import optuna

from utils.data_loader import load_dataframe


SEED = 42

np.random.seed(SEED)
torch.manual_seed(SEED);

Cloning into 'auton-survival'...


## Data Loading

This section defines the data loading and preprocessing functions alongsiide the cross-validation code.

In [2]:
FOLDS = 10  # Number of folds for cross-validation

In [3]:
def get_preprocess_transformer():
    """Returns the preprocessing sklearn transformer."""
    sel_fac = make_column_selector(pattern="^fac\\_")
    enc_fac = PipelineDF(
        steps=[("ohe", OneHotEncoderDF(sparse_output=False, handle_unknown="ignore"))]
    )
    sel_num = make_column_selector(pattern="^num\\_")
    enc_num = PipelineDF(
        steps=[
            ("impute", SimpleImputerDF(strategy="median")),
            ("scale", StandardScalerDF()),
        ]
    )
    tr = ColumnTransformerDF(transformers=[("ohe", enc_fac, sel_fac), ("s", enc_num, sel_num)])
    return tr


def get_k_fold_splits(df):
    """Returns a generator of k-fold splits."""
    events = df["event"].values.astype(bool)
    times = df["time"].values
    times = times / times.max()

    skf = StratifiedKFold(n_splits=FOLDS, shuffle=True, random_state=SEED)
    splits = list(skf.split(df, events))
    splits = [s[1] for s in splits]

    for i in range(FOLDS):
        val_idx = splits[i]
        test_idx = splits[(i + 1) % FOLDS]
        train_idx = [j for j in range(len(df)) if j not in val_idx and j not in test_idx]

        df_train = df.iloc[train_idx]
        df_val = df.iloc[val_idx]
        df_test = df.iloc[test_idx]

        tr = get_preprocess_transformer()
        X_train = tr.fit_transform(df_train).to_numpy().astype(np.float32)
        X_val = tr.transform(df_val).to_numpy().astype(np.float32)
        X_test = tr.transform(df_test).to_numpy().astype(np.float32)

        e_train = events[train_idx]
        e_val = events[val_idx]
        e_test = events[test_idx]

        t_train = times[train_idx]
        t_val = times[val_idx]
        t_test = times[test_idx]

        max_time = t_train.max()
        min_time = t_train.min()
        X_val = X_val[(min_time < t_val) & (t_val < max_time)]
        e_val = e_val[(min_time < t_val) & (t_val < max_time)]
        t_val = t_val[(min_time < t_val) & (t_val < max_time)]
        X_test = X_test[(min_time < t_test) & (t_test < max_time)]
        e_test = e_test[(min_time < t_test) & (t_test < max_time)]
        t_test = t_test[(min_time < t_test) & (t_test < max_time)]

        sksurv_type = [("event", bool), ("time", float)]
        y_train = np.array([(e, t) for e, t in zip(e_train, t_train)], dtype=sksurv_type)
        y_val = np.array([(e, t) for e, t in zip(e_val, t_val)], dtype=sksurv_type)
        y_test = np.array([(e, t) for e, t in zip(e_test, t_test)], dtype=sksurv_type)

        yield (X_train, y_train), (X_val, y_val), (X_test, y_test)

## Models

This section defines the FPBoost model and the base learners, all implementing the `SurvModel` abstract class.

In [4]:
@numba.njit
def concordance_index_td(
    events: np.ndarray, times: np.ndarray, risks: np.ndarray, percentile: float = 1.0
) -> float:
    """Computes the concordance index for time-dependent data."""
    threshold_time = np.percentile(times, percentile * 100)
    concordant_pairs, comparable_pairs = 0, 0
    for i, ti in enumerate(times):
        for j, tj in enumerate(times):
            if events[i] == 1 and ti < tj and ti < threshold_time:
                comparable_pairs += 1
                if risks[i] > risks[j]:
                    concordant_pairs += 1
    return concordant_pairs / comparable_pairs if comparable_pairs > 0 else 0.0

In [5]:
class SurvModel(ABC):
    """Base class for survival models."""

    @abstractmethod
    def fit(self, X_train, y_train):
        """Fits the model to the training data.

        Args:
            X_train: Training data of shape (n_samples, n_features).
            y_train: Training labels of shape (n_samples,) with dtype=[("event", bool), ("time", float)].
        """
        pass

    @abstractmethod
    def predict(self, X_test, times) -> np.array:
        """Predicts the survival function for the given times.

        Args:
            X_test: Test data of shape (n_samples, n_features).
            times: Times at which to predict the survival function of shape (n_times,).

        Returns:
            Survival function of shape (n_samples, n_times).
        """
        pass

    def evaluate(self, X_test, y_test, y_train) -> dict[str, float]:
        """Evaluates the model on the test data.

        Args:
            X_test: Test data of shape (n_samples, n_features).
            y_test: Test labels of shape (n_samples,) with dtype=[("event", bool), ("time", float)].
            y_train: Training labels of shape (n_samples,) with dtype=[("event", bool), ("time", float)].

        Returns:
            Dictionary of survival metrics.
        """
        min_time, max_time = y_test["time"].min(), y_test["time"].max()
        tolerance = 0.1 * (max_time - min_time)
        times = np.linspace(min_time + tolerance, max_time - tolerance, 100)
        survs = self.predict(X_test, times)
        mean_times = survs.sum(axis=1)
        c25 = concordance_index_td(y_test["event"], y_test["time"], -mean_times, 0.25)
        c50 = concordance_index_td(y_test["event"], y_test["time"], -mean_times, 0.50)
        c75 = concordance_index_td(y_test["event"], y_test["time"], -mean_times, 0.75)
        cid = concordance_index_td(y_test["event"], y_test["time"], -mean_times)
        try:
            ibs = integrated_brier_score(y_train, y_test, survs, times)
        except ValueError as e:
            ibs = 0.25
        return {
            "cid": cid,
            "ibs": ibs,
            "c25": c25,
            "c50": c50,
            "c75": c75,
        }

In [6]:
class Cox(SurvModel):
    """Cox proportional hazards model (Cox, 1972)."""

    def __init__(self) -> None:
        self.model = CoxPHSurvivalAnalysis(alpha=0.01)
        self.failed_opt = False

    def fit(self, X_train, y_train):
        try:
            self.model.fit(X_train, y_train)
        except ValueError as e:
            self.failed_opt = True

    def predict(self, X_test, times) -> np.array:
        if self.failed_opt:
            return np.ones((X_test.shape[0], len(times))) * 0.5
        return np.array([S(times) for S in self.model.predict_survival_function(X_test)])

In [7]:
class RandomSurvivalForest(SurvModel):
    """Random survival forest model (Ishwaran et al., 2008)."""

    def __init__(self) -> None:
        self.model = RSF(n_jobs=-1)

    def fit(self, X_train, y_train):
        self.model.fit(X_train, y_train)

    def predict(self, X_test, times) -> np.array:
        return np.array([S(times) for S in self.model.predict_survival_function(X_test)])

In [8]:
class DeepSurv(SurvModel):
    """DeepSurv model (Katzman et al., 2018)."""

    def __init__(self) -> None:
        self.net = None
        self.model = None

    def fit(self, X_train, y_train):
        net = tt.practical.MLPVanilla(
            X_train.shape[1],
            [i * X_train.shape[1] for i in [3, 5, 3]],  # hidden layers as in Katzman et al., 2018
            1,  # outputs
            False,  # batch norm
            0.6,  # dropout, as in Katzman et al., 2018
        )
        X_train, X_val, y_train, y_val = train_test_split(
            X_train, y_train, test_size=0.15, stratify=y_train["event"]
        )
        _y_val = (y_val["time"].copy(), y_val["event"].copy())
        val_data = (X_val, _y_val)
        callbacks = [tt.callbacks.EarlyStopping()]
        self.model = CoxPH(net, tt.optim.Adam)
        _y_train = (y_train["time"].copy(), y_train["event"].copy())
        self.model.fit(
            X_train, _y_train, 256, 256, val_data=val_data, callbacks=callbacks, verbose=False
        )
        self.model.compute_baseline_hazards()

    def predict(self, X_test, times) -> np.array:
        preds = self.model.predict_surv_df(X_test)
        unique_times = preds.index.to_numpy()
        survs = np.array([preds.iloc[:, i].values for i in range(preds.shape[1])])
        surv = np.zeros((X_test.shape[0], len(times)))
        for i, t in enumerate(times):
            idx = np.abs(unique_times - t).argmin()
            surv[:, i] = survs[:, idx]
        return surv

In [9]:
class DeepHit(SurvModel):
    """DeepHit model (Lee et al., 2018)."""

    def __init__(self, num_durations: int = 5) -> None:
        self.net = None
        self.model = None
        self.labtrans = None
        self.num_durations = num_durations

    def fit(self, X_train, y_train):
        X_train, X_val, y_train, y_val = train_test_split(
            X_train, y_train, test_size=0.15, stratify=y_train["event"]
        )
        self.labtrans = DeepHitSingle.label_transform(self.num_durations)
        _y_train = self.labtrans.fit_transform(y_train["time"], y_train["event"])
        _y_val = self.labtrans.transform(y_val["time"], y_val["event"])
        val_data = (X_val, _y_val)
        net = tt.practical.MLPVanilla(
            X_train.shape[1],
            [i * X_train.shape[1] for i in [3, 5, 3]],
            self.num_durations,  # outputs
            False,  # batch norm
            0.6,  # dropout
        )
        callbacks = [tt.callbacks.EarlyStopping()]
        self.model = DeepHitSingle(net, tt.optim.Adam, alpha=0.5, duration_index=self.labtrans.cuts)
        self.model.fit(
            X_train, _y_train, 256, 256, val_data=val_data, callbacks=callbacks, verbose=False
        )

    def predict(self, X_test, times) -> np.array:
        preds = self.model.predict_surv_df(X_test)
        unique_times = preds.index.to_numpy()
        survs = np.array([preds.iloc[:, i].values for i in range(preds.shape[1])])
        surv = np.zeros((X_test.shape[0], len(times)))
        for i, t in enumerate(times):
            idx = np.abs(unique_times - t).argmin()
            surv[:, i] = survs[:, idx]
        return surv

In [10]:
class DeepSurvivalMachines(SurvModel):
    """Deep survival machines model (Nagpal et al., 2021)."""

    def __init__(self) -> None:
        self.model = None

    def fit(self, X_train, y_train):
        self.model = DSM(layers=[i * X_train.shape[1] for i in [3, 5, 3]])
        self.model.fit(X_train, y_train["time"], y_train["event"])

    def predict(self, X_test, times) -> np.array:
        r = np.concatenate(
            [
                self.model.predict_risk(X_test.astype(np.float64), t.astype(np.float64))
                for t in times
            ],
            axis=1,
        )
        survs = np.exp(-r)
        return survs

In [11]:
class CoxBoost(SurvModel):
    """CoxBoost model (Ridgeway, 1999)."""

    def __init__(self) -> None:
        self.model = GradientBoostingSurvivalAnalysis()

    def fit(self, X_train, y_train):
        self.model.fit(X_train, y_train)

    def predict(self, X_test, times) -> np.array:
        return np.array([S(times) for S in self.model.predict_survival_function(X_test)])

In [12]:
class FPBoost(SurvModel):
    """FPBoost model for AAAI submission.

    Args:
        weibull_heads: Number of Weibull heads.
        loglogistic_heads: Number of log-logistic heads.
        n_estimators: Number of base learners per estimated parameter.
        max_depth: Maximum depth of the base learners.
        learning_rate: Learning rate for the boosting algorithm.
        alpha: ElasticNet regularization strength.
        l1_ratio: Ratio between L1 and L2 regularization.
        uniform_heads: Whether to use uniform weights for the heads.
        heads_activation: Activation function for the heads. Can be "relu" or "softmax".
        patience: Patience for early stopping.
        verbose: Whether to print progress.
    """

    def __init__(
        self,
        weibull_heads: int,
        loglogistic_heads: int,
        n_estimators: int,
        max_depth: int,
        learning_rate: float,
        alpha: float,
        l1_ratio: float,
        uniform_heads: bool,
        heads_activation: str,
        patience: Optional[int],
        verbose: bool = False,
    ):
        self.weibull_heads = weibull_heads
        self.loglogistic_heads = loglogistic_heads
        self.heads = weibull_heads + loglogistic_heads
        if self.heads == 0:
            self.weibull_heads = 1
            self.heads = 1
        self.n_estimators = n_estimators
        self.max_depth = max_depth
        self.learning_rate = learning_rate
        self.alpha = alpha
        self.l1_ratio = l1_ratio
        self.uniform_heads = uniform_heads
        self.heads_activation = heads_activation
        self.patience = patience
        self.verbose = verbose

        # Random initialization of the parameters
        self.init_eta = np.random.rand(self.heads) + 0.5
        self.eta_heads = [[] for _ in range(self.heads)]
        self.init_k = np.random.rand(self.heads) * 2
        self.k_heads = [[] for _ in range(self.heads)]
        self.init_w = np.random.rand(self.heads)
        self.w_heads = [[] for _ in range(self.heads)]

        heads_activation_fns = {
            "relu": lambda w: F.relu(w),
            "softmax": lambda w: F.softmax(w, dim=1),
        }
        if heads_activation not in heads_activation_fns:
            raise ValueError(f"Heads activation function not in {heads_activation_fns.keys()}")
        self.heads_activation_fn = heads_activation_fns[heads_activation]

    def _predict_etas(self, X: np.array) -> np.array:
        output = np.zeros((len(X), self.heads)) + self.init_eta.reshape((1, -1))
        for i, regs in enumerate(self.eta_heads):
            if len(regs) == 0:
                continue
            preds = np.concatenate([reg.predict(X).reshape((-1, 1)) for reg in regs], axis=1)
            output[:, i] += self.learning_rate * np.sum(preds, axis=1)
        return output

    def _predict_ks(self, X: np.array) -> np.array:
        output = np.ones((len(X), self.heads)) * self.init_k.reshape((1, -1))
        for i, regs in enumerate(self.k_heads):
            if len(regs) == 0:
                continue
            preds = np.concatenate([reg.predict(X).reshape((-1, 1)) for reg in regs], axis=1)
            output[:, i] += self.learning_rate * np.sum(preds, axis=1)
        return output

    def _predict_ws(self, X: np.array) -> np.array:
        if self.uniform_heads:
            return np.ones((len(X), self.heads)) / self.heads
        output = np.ones((len(X), self.heads)) * self.init_w.reshape((1, -1))
        for i, regs in enumerate(self.w_heads):
            if len(regs) == 0:
                continue
            preds = np.concatenate([reg.predict(X).reshape((-1, 1)) for reg in regs], axis=1)
            output[:, i] += self.learning_rate * np.sum(preds, axis=1)
        return output

    def _predict_params(self, X: np.array) -> np.array:
        etas = self._predict_etas(X).reshape((-1, self.heads, 1))
        ks = self._predict_ks(X).reshape((-1, self.heads, 1))
        ws = self._predict_ws(X).reshape((-1, self.heads, 1))
        return np.concatenate([etas, ks, ws], -1)

    def _weibull_hazard(self, eta, k, times):
        return k * eta * times ** (k - 1)

    def _weibull_cum_hazard(self, eta, k, times):
        return eta * times**k

    def _loglogistic_hazard(self, eta, k, times):
        return eta * k * times ** (k - 1) / (1 + eta * times**k)

    def _loglogistic_cum_hazard(self, eta, k, times):
        if torch.is_tensor(times):
            return torch.log1p(eta * times**k)
        return np.log1p(eta * times**k)

    def _get_neg_grads(self, params: np.array, events: Tensor, times: Tensor) -> np.array:
        params_torch = Variable(torch.tensor(params).float(), requires_grad=True)

        etas = F.relu(params_torch[:, :, 0])
        ks = F.relu(params_torch[:, :, 1])
        ws = self.heads_activation_fn(params_torch[:, :, 2])

        hazard = torch.zeros(len(times))
        cum_hazard = torch.zeros(len(times))

        if self.weibull_heads > 0:
            weibull_hazard = self._weibull_hazard(
                etas[:, : self.weibull_heads], ks[:, : self.weibull_heads], times
            )
            weibull_cum_hazard = self._weibull_cum_hazard(
                etas[:, : self.weibull_heads], ks[:, : self.weibull_heads], times
            )
            hazard += (weibull_hazard * ws[:, : self.weibull_heads]).sum(dim=1)
            cum_hazard += (weibull_cum_hazard * ws[:, : self.weibull_heads]).sum(dim=1)

        if self.loglogistic_heads > 0:
            loglogistic_hazard = self._loglogistic_hazard(
                etas[:, self.weibull_heads :], ks[:, self.weibull_heads :], times
            )
            loglogistic_cum_hazard = self._loglogistic_cum_hazard(
                etas[:, self.weibull_heads :], ks[:, self.weibull_heads :], times
            )
            hazard += (loglogistic_hazard * ws[:, self.weibull_heads :]).sum(dim=1)
            cum_hazard += (loglogistic_cum_hazard * ws[:, self.weibull_heads :]).sum(dim=1)

        log_likelihood = (events * torch.log(hazard) - cum_hazard).mean()
        l1_reg = torch.abs(params_torch).mean()
        l2_reg = (params_torch**2).mean()
        elastic_net_reg = self.l1_ratio * l1_reg + (1 - self.l1_ratio) * l2_reg
        loss = -log_likelihood + self.alpha * elastic_net_reg

        loss.backward()
        grad = params_torch.grad.numpy()
        grad[np.isnan(grad)] = 0.0
        return -(grad / np.abs(grad).max())

    def _fit_base_learner(self, X: np.array, y: np.array):
        reg = DecisionTreeRegressor(max_depth=self.max_depth)
        reg.fit(X, y)
        return reg

    def fit(self, X_train: np.array, y_train: np.array) -> None:
        if self.verbose:
            print(f"Fitting a Survival Boosting model with {self.heads} heads...")

        patience_counter, best_num_base_learners, best_cid = 0, 0, 0.0
        if self.patience is not None:
            X_train, X_val, y_train, y_val = train_test_split(
                X_train,
                y_train,
                test_size=0.2,
                stratify=y_train["event"],
            )

        events = torch.tensor(y_train["event"].copy()).float().reshape((-1,))
        times = torch.tensor(y_train["time"].copy()).float().reshape((-1, 1))
        timeline = np.linspace(np.min(y_train["time"]), np.max(y_train["time"]), 100)

        for j in range(self.n_estimators):
            params = self._predict_params(X_train)

            neg_grads = self._get_neg_grads(params, events, times)
            eta_grads = neg_grads[:, :, 0]
            k_grads = neg_grads[:, :, 1]
            w_grads = neg_grads[:, :, 2]

            for i in range(self.heads):
                self.eta_heads[i].append(self._fit_base_learner(X_train, eta_grads[:, i]))
                self.k_heads[i].append(self._fit_base_learner(X_train, k_grads[:, i]))
                if not self.uniform_heads:
                    self.w_heads[i].append(self._fit_base_learner(X_train, w_grads[:, i]))

            if self.patience is not None:
                survs = self.predict(X_val, timeline)
                mean_times = survs.sum(axis=1)
                cid = concordance_index_td(y_val["event"], y_val["time"], -mean_times)
                if self.verbose:
                    print(f"[Iteration {j:04}] Concordance: {cid:.4f}")
                if cid > best_cid:
                    best_cid = cid
                    best_num_base_learners = len(self.eta_heads[0])
                    patience_counter = 0
                else:
                    patience_counter += 1
                    if patience_counter >= self.patience:
                        break

        if self.patience is not None:
            self.eta_heads = [heads[:best_num_base_learners] for heads in self.eta_heads]
            self.k_heads = [heads[:best_num_base_learners] for heads in self.k_heads]
            self.w_heads = [heads[:best_num_base_learners] for heads in self.w_heads]

    def predict(self, X_test, times) -> np.array:
        times = times.reshape((1, 1, -1))
        params = torch.tensor(self._predict_params(X_test)).float()

        etas = F.relu(params[:, :, 0]).numpy().reshape((-1, self.heads, 1))
        ks = F.relu(params[:, :, 1]).numpy().reshape((-1, self.heads, 1))
        ws = self.heads_activation_fn(params[:, :, 2]).numpy().reshape((-1, self.heads, 1))

        cum_hazard = np.zeros((len(X_test), len(times[0][0])))

        if self.weibull_heads > 0:
            weibull_cum_hazard = self._weibull_cum_hazard(
                etas[:, : self.weibull_heads], ks[:, : self.weibull_heads], times
            )
            cum_hazard += (weibull_cum_hazard * ws[:, : self.weibull_heads]).sum(axis=1)

        if self.loglogistic_heads > 0:
            loglogistic_cum_hazard = self._loglogistic_cum_hazard(
                etas[:, self.weibull_heads :], ks[:, self.weibull_heads :], times
            )
            cum_hazard += (loglogistic_cum_hazard * ws[:, self.weibull_heads :]).sum(axis=1)
        surv = np.exp(-cum_hazard)
        return surv

In [13]:
def init_model(model: str, params: dict) -> SurvModel:
    """Initializes a survival model with the given parameters.

    Args:
        model: String with the model name.
        params: Dictionary with the model parameters.

    Returns:
        Initialized survival model.
    """

    models = {
        "cox": Cox,
        "rsf": RandomSurvivalForest,
        "deepsurv": DeepSurv,
        "deephit": DeepHit,
        "dsm": DeepSurvivalMachines,
        "coxboost": CoxBoost,
        "fpboost": FPBoost,
    }
    return models[model](**params)

## Training

In [14]:
def atomic_training(dataset: str, model: str, params: dict) -> dict[str, float]:
    """Trains and evaluates a model on a dataset."""

    np.random.seed(SEED)
    torch.manual_seed(SEED)

    df = load_dataframe(dataset)

    metrics = {}
    for train, val, test in get_k_fold_splits(df):
        X_train, y_train = train
        X_val, y_val = val
        X_test, y_test = test

        m = init_model(model, params)
        m.fit(X_train, y_train)
        val_results = m.evaluate(X_val, y_val, y_train)
        test_results = m.evaluate(X_test, y_test, y_train)

        for k, v in val_results.items():
            k = f"{k}_val"
            if k not in metrics:
                metrics[k] = []
            metrics[k].append(v)
        for k, v in test_results.items():
            k = f"{k}_test"
            if k not in metrics:
                metrics[k] = []
            metrics[k].append(v)

    ret = {}
    for k, v in metrics.items():
        ret[f"{k}_mean"] = np.mean(v).item()
        ret[f"{k}_std"] = np.std(v).item()

    return ret

## Results

Train and evaluate the baseline models and FPBoost on the datasets.

In [15]:
NUM_CPUS = 8  # Number of CPUs for parallel training
OBJ_MEMORY_GB = 2  # Memory for each ray object in GB

RESULTS_PATH = "results"  # Path to save the results

# Datasets on which to train the models
DATASETS = [
    "aids",
    "breast_cancer",
    "gbsg",
    "metabric",
    "support",
    "veterans",
    "whas",
]

# Baseline models to train
BASELINE_MODELS = ["rsf", "cox", "coxboost", "deepsurv", "dsm", "deephit"]

# Search space for the hyperparameter optimization of the FPBoost model
SEARCH_SPACE = {
    "weibull_heads": tune.randint(0, 8),
    "loglogistic_heads": tune.randint(0, 8),
    "n_estimators": tune.randint(1, 256),
    "max_depth": tune.randint(1, 8),
    "learning_rate": tune.uniform(1e-2, 1),
    "alpha": tune.uniform(0.0, 1.0),
    "l1_ratio": tune.uniform(0, 1),
    "uniform_heads": tune.choice([True, False]),
    "heads_activation": tune.choice(["relu", "softmax"]),
    "patience": tune.choice([None, 4, 16]),
}

ITERATIONS = 8  # Number of iterations for the hyperparameter optimization

In [16]:
os.makedirs(RESULTS_PATH, exist_ok=True)
ray.init(
    num_cpus=NUM_CPUS,
    object_store_memory=OBJ_MEMORY_GB * 1024 * 1024 * 1024,
    ignore_reinit_error=True,
)

2024-08-18 11:12:33,950	INFO worker.py:1740 -- Started a local Ray instance. View the dashboard at [1m[32m127.0.0.1:8265 [39m[22m


0,1
Python version:,3.10.13
Ray version:,2.20.0
Dashboard:,http://127.0.0.1:8265


### Baselines

In [17]:
tempdir = os.path.join(RESULTS_PATH, "temp_baselines")
os.makedirs(tempdir, exist_ok=True)


@ray.remote
def remote_baseline_training(dataset: str, model: str) -> dict[str, float]:
    if os.path.exists(os.path.join(tempdir, f"{dataset}_{model}.json")):
        return None
    ret = atomic_training(dataset, model, {})
    ret["dataset"] = dataset
    ret["model"] = model
    ret["params"] = {}
    with open(os.path.join(tempdir, f"{dataset}_{model}.json"), "w") as f:
        json.dump(ret, f)
    return ret


BASELINE_RESULTS_FILE = os.path.join(RESULTS_PATH, "baseline_results.csv")

baseline_results = []
for dataset in DATASETS:
    for model in BASELINE_MODELS:
        baseline_results.append(remote_baseline_training.remote(dataset, model))
baseline_results = ray.get(baseline_results)

results = {}
for f in os.listdir(tempdir):
    with open(os.path.join(tempdir, f), "r") as file:
        r = json.load(file)
        for k, v in r.items():
            if k not in results:
                results[k] = []
            results[k].append(v)

baseline_results_df = pd.DataFrame(results)
baseline_results_df.to_csv(BASELINE_RESULTS_FILE, index=False)

  0%|          | 0/10000 [00:00<?, ?it/s][0m 
  0%|          | 1/10000 [00:00<34:41,  4.80it/s]
  1%|▏         | 138/10000 [00:00<00:17, 552.85it/s]
[36m(remote_baseline_training pid=51961)[0m   self.net.load_state_dict(torch.load(path, **kwargs))
  3%|▎         | 282/10000 [00:00<00:11, 874.40it/s]
  4%|▍         | 426/10000 [00:00<00:08, 1066.79it/s]
  5%|▌         | 548/10000 [00:00<00:08, 1100.03it/s]
  7%|▋         | 686/10000 [00:00<00:07, 1187.81it/s]
  8%|▊         | 822/10000 [00:00<00:07, 1239.66it/s]
 10%|▉         | 962/10000 [00:00<00:07, 1288.41it/s]
 11%|█         | 1095/10000 [00:01<00:06, 1277.37it/s]
 13%|█▎        | 1310/10000 [00:01<00:07, 1094.45it/s]
  0%|          | 0/1 [00:00<?, ?it/s]956)[0m 
100%|██████████| 1/1 [00:00<00:00, 50.91it/s] 
[36m(remote_baseline_training pid=51960)[0m   funcs[i] = StepFunction(x=self.baseline_survival_.x, y=np.power(self.baseline_survival_.y, risk_score[i]))
[36m(remote_baseline_training pid=51960)[0m   funcs[i] = StepFunc

### FPBoost

In [18]:
def objective(config):
    return atomic_training(config["dataset"], "fpboost", config["params"])


os.makedirs(RESULTS_PATH, exist_ok=True)

for dataset in DATASETS:
    print(f"Training FPBoost on {dataset}...")

    search_alg = OptunaSearch(
        metric="cid_val_mean", mode="max", sampler=optuna.samplers.TPESampler()
    )

    analysis = tune.run(
        objective,
        config={"dataset": dataset, "params": SEARCH_SPACE},
        num_samples=ITERATIONS,
        search_alg=search_alg,
        name=f"{dataset}_{model}_tune_optuna_experiment",
        storage_path=f"file://{os.path.abspath(RESULTS_PATH)}",
    )

    df = analysis.results_df
    csv_filename = f"{dataset}_{model}_tune_optuna_experiment.csv"
    df.to_csv(os.path.join(RESULTS_PATH, csv_filename), index=False)

2024-08-18 11:18:48,227	INFO tune.py:614 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949
[I 2024-08-18 11:18:48,249] A new study created in memory with name: optuna
2024-08-18 11:18:48,250	INFO tensorboardx.py:193 -- pip install "ray[tune]" to see TensorBoard files.


Training FPBoost on aids...


0,1
Current time:,2024-08-18 11:21:02
Running for:,00:02:13.83
Memory:,7.6/23.5 GiB

Trial name,status,loc,params/alpha,params/heads_activat ion,params/l1_ratio,params/learning_rate,params/loglogistic_h eads,params/max_depth,params/n_estimators,params/patience,params/uniform_heads,params/weibull_heads,iter,total time (s),cid_val_mean,cid_val_std,ibs_val_mean
objective_36f397fd,TERMINATED,172.22.6.17:58391,0.542493,softmax,0.081641,0.0735958,7,1,24,16.0,False,7,1,22.9052,0.593237,0.100355,0.158633
objective_27909715,TERMINATED,172.22.6.17:58454,0.588082,softmax,0.813656,0.403007,6,2,171,4.0,False,2,1,19.9091,0.706933,0.0529778,0.0938427
objective_79031809,TERMINATED,172.22.6.17:58515,0.468769,relu,0.866082,0.37659,5,3,58,4.0,True,6,1,15.9447,0.587728,0.152687,0.149304
objective_4f1ab806,TERMINATED,172.22.6.17:58579,0.196412,softmax,0.909318,0.958652,4,2,37,16.0,False,5,1,24.7289,0.72782,0.0474309,0.0827952
objective_030f5ec2,TERMINATED,172.22.6.17:58642,0.36104,relu,0.844384,0.408767,1,3,33,,True,4,1,17.3046,0.708344,0.0689261,0.0843827
objective_97acf6a5,TERMINATED,172.22.6.17:58707,0.784921,relu,0.0995798,0.630802,2,6,73,,True,4,1,41.0589,0.611194,0.108014,0.0646789
objective_9b628a74,TERMINATED,172.22.6.17:58769,0.997972,relu,0.657074,0.494414,6,7,123,,False,0,1,116.703,0.625232,0.0863893,0.0700942
objective_e65d6e14,TERMINATED,172.22.6.17:58837,0.284871,softmax,0.182186,0.362287,5,7,166,16.0,True,2,1,24.846,0.629214,0.109451,0.11687


Trial name,c25_test_mean,c25_test_std,c25_val_mean,c25_val_std,c50_test_mean,c50_test_std,c50_val_mean,c50_val_std,c75_test_mean,c75_test_std,c75_val_mean,c75_val_std,cid_test_mean,cid_test_std,cid_val_mean,cid_val_std,ibs_test_mean,ibs_test_std,ibs_val_mean,ibs_val_std
objective_030f5ec2,0.711244,0.063081,0.722467,0.0865636,0.718439,0.0580107,0.7156,0.0774847,0.712893,0.0542642,0.707655,0.0681003,0.7137,0.0548716,0.708344,0.0689261,0.0847562,0.0165996,0.0843827,0.0184769
objective_27909715,0.710526,0.0764107,0.71414,0.0698132,0.711986,0.0719895,0.715287,0.0610828,0.7058,0.06483,0.706824,0.0528903,0.706091,0.0651657,0.706933,0.0529778,0.094289,0.031096,0.0938427,0.0294747
objective_36f397fd,0.589392,0.131793,0.603995,0.126347,0.590961,0.118198,0.599584,0.111533,0.581547,0.105931,0.593025,0.0999987,0.581141,0.105351,0.593237,0.100355,0.158908,0.0193719,0.158633,0.0154968
objective_4f1ab806,0.713805,0.0694376,0.740193,0.0603781,0.718512,0.0672486,0.731991,0.0514019,0.712714,0.0613158,0.727547,0.0470914,0.713262,0.0619958,0.72782,0.0474309,0.0833519,0.0246785,0.0827952,0.0238931
objective_79031809,0.613298,0.151641,0.5859,0.176386,0.620296,0.152842,0.582793,0.164807,0.615538,0.147199,0.588024,0.152058,0.616242,0.148008,0.587728,0.152687,0.149652,0.0259409,0.149304,0.0232495
objective_97acf6a5,0.629724,0.143979,0.604001,0.127362,0.638236,0.106833,0.617009,0.106645,0.635843,0.11136,0.609975,0.107809,0.637179,0.110837,0.611194,0.108014,0.0651886,0.00627284,0.0646789,0.00564344
objective_9b628a74,0.643196,0.0897293,0.62431,0.100402,0.645936,0.0768703,0.623606,0.0871395,0.641937,0.0810677,0.624289,0.0868508,0.641817,0.0809981,0.625232,0.0863893,0.0725347,0.00992678,0.0700942,0.00896586
objective_e65d6e14,0.64687,0.0976466,0.62745,0.117118,0.635102,0.103915,0.633728,0.108529,0.636569,0.100749,0.629152,0.10935,0.637357,0.10137,0.629214,0.109451,0.118403,0.0455868,0.11687,0.0355414


You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.
You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).
2024-08-18 11:21:02,086	INFO tune.py:1007 -- Wrote the latest version of all result files and experiment state to '/home/aarchetto/fpboost/results/aids_deephit_tune_optuna_experiment' in 0.0044s.
2024-08-18 11:21:02,090	INFO tune.py:1039 -- Total run time: 133.86 seconds (133.82 seconds for the tuning loop).
2024-08-18 11:21:02,098	INFO tune.py:614 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949
[I 2024-08-18 11:21:02,115] A new study created in memory with name: optuna


Training FPBoost on breast_cancer...


0,1
Current time:,2024-08-18 11:22:09
Running for:,00:01:07.57
Memory:,7.7/23.5 GiB

Trial name,status,loc,params/alpha,params/heads_activat ion,params/l1_ratio,params/learning_rate,params/loglogistic_h eads,params/max_depth,params/n_estimators,params/patience,params/uniform_heads,params/weibull_heads,iter,total time (s),cid_val_mean,cid_val_std,ibs_val_mean
objective_efe0dd9c,TERMINATED,172.22.6.17:60137,0.865034,relu,0.223091,0.234806,1,4,48,4.0,False,4,1,10.9712,0.627265,0.122288,0.270655
objective_9e508450,TERMINATED,172.22.6.17:60192,0.663473,softmax,0.443152,0.0686812,0,5,164,,False,2,1,63.242,0.633917,0.154665,0.169298
objective_da31bc7e,TERMINATED,172.22.6.17:60250,0.488337,relu,0.357611,0.323369,2,4,42,,False,1,1,21.7708,0.689454,0.146944,0.17862
objective_f078d3cb,TERMINATED,172.22.6.17:60314,0.946604,softmax,0.926914,0.965657,1,4,171,4.0,False,3,1,11.3707,0.617507,0.134055,0.18086
objective_60b4a055,TERMINATED,172.22.6.17:60377,0.56701,relu,0.698512,0.159864,3,7,255,16.0,True,4,1,28.2245,0.573797,0.153821,0.199464
objective_51590b6e,TERMINATED,172.22.6.17:60439,0.970727,relu,0.16972,0.498935,2,7,164,4.0,True,0,1,10.7787,0.634289,0.109681,0.168448
objective_17ec519a,TERMINATED,172.22.6.17:60503,0.784626,relu,0.812124,0.617771,5,1,223,16.0,False,1,1,17.9568,0.624656,0.0983503,0.184472
objective_e44c5992,TERMINATED,172.22.6.17:60567,0.178816,softmax,0.378328,0.780271,1,5,88,,True,1,1,24.8848,0.640874,0.106227,0.176318


Trial name,c25_test_mean,c25_test_std,c25_val_mean,c25_val_std,c50_test_mean,c50_test_std,c50_val_mean,c50_val_std,c75_test_mean,c75_test_std,c75_val_mean,c75_val_std,cid_test_mean,cid_test_std,cid_val_mean,cid_val_std,ibs_test_mean,ibs_test_std,ibs_val_mean,ibs_val_std
objective_17ec519a,0.641814,0.176139,0.677976,0.148108,0.60185,0.129164,0.627116,0.101051,0.60361,0.129989,0.625574,0.0979538,0.604082,0.13034,0.624656,0.0983503,0.200202,0.0807148,0.184472,0.0500954
objective_51590b6e,0.656764,0.15595,0.67434,0.148433,0.61418,0.129956,0.649091,0.12141,0.605125,0.128228,0.635429,0.110609,0.604059,0.127622,0.634289,0.109681,0.176943,0.0345804,0.168448,0.0274709
objective_60b4a055,0.623156,0.191295,0.601352,0.176312,0.607116,0.155429,0.570327,0.154032,0.604022,0.150916,0.574665,0.153691,0.602931,0.150267,0.573797,0.153821,0.19734,0.0651285,0.199464,0.0540556
objective_9e508450,0.642167,0.153945,0.67963,0.142574,0.610439,0.126108,0.636423,0.163407,0.610616,0.110955,0.634835,0.154358,0.609599,0.110628,0.633917,0.154665,0.164748,0.0248177,0.169298,0.0294001
objective_da31bc7e,0.746361,0.123005,0.737967,0.132523,0.683963,0.109599,0.688207,0.161681,0.683111,0.112025,0.690273,0.145993,0.683756,0.111489,0.689454,0.146944,0.171111,0.0422821,0.17862,0.0534971
objective_e44c5992,0.674352,0.131237,0.703269,0.154521,0.634689,0.109692,0.642093,0.106572,0.63587,0.106696,0.642015,0.107115,0.636267,0.107112,0.640874,0.106227,0.172291,0.0345821,0.176318,0.0361842
objective_efe0dd9c,0.714246,0.171326,0.648695,0.16451,0.678347,0.132698,0.626996,0.121072,0.666653,0.118656,0.626719,0.12217,0.665562,0.118409,0.627265,0.122288,0.286967,0.166319,0.270655,0.116637
objective_f078d3cb,0.600997,0.145898,0.645908,0.153665,0.603052,0.112931,0.621351,0.137672,0.607073,0.109739,0.618102,0.132986,0.605957,0.108695,0.617507,0.134055,0.184009,0.0439204,0.18086,0.0405179


2024-08-18 11:22:09,690	INFO tune.py:1007 -- Wrote the latest version of all result files and experiment state to '/home/aarchetto/fpboost/results/breast_cancer_deephit_tune_optuna_experiment' in 0.0036s.
2024-08-18 11:22:09,694	INFO tune.py:1039 -- Total run time: 67.60 seconds (67.57 seconds for the tuning loop).
2024-08-18 11:22:09,702	INFO tune.py:614 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949
[I 2024-08-18 11:22:09,717] A new study created in memory with name: optuna


Training FPBoost on gbsg...


0,1
Current time:,2024-08-18 11:23:21
Running for:,00:01:11.45
Memory:,7.7/23.5 GiB

Trial name,status,loc,params/alpha,params/heads_activat ion,params/l1_ratio,params/learning_rate,params/loglogistic_h eads,params/max_depth,params/n_estimators,params/patience,params/uniform_heads,params/weibull_heads,iter,total time (s),cid_val_mean,cid_val_std,ibs_val_mean
objective_6e53c33d,TERMINATED,172.22.6.17:61119,0.175574,softmax,0.26154,0.250875,2,7,6,4.0,False,7,1,10.4578,0.634434,0.070114,0.210854
objective_9b4f681d,TERMINATED,172.22.6.17:61176,0.445675,relu,0.604641,0.497602,6,2,21,4.0,False,7,1,13.4989,0.671308,0.0643397,0.19138
objective_51e59df6,TERMINATED,172.22.6.17:61234,0.198408,softmax,0.932576,0.813803,4,3,163,4.0,True,1,1,9.9282,0.654147,0.049115,0.21001
objective_043d252b,TERMINATED,172.22.6.17:61293,0.71222,relu,0.54198,0.411601,6,6,11,4.0,True,5,1,12.5491,0.610783,0.0688766,0.216391
objective_e71e5e39,TERMINATED,172.22.6.17:61353,0.648154,softmax,0.491043,0.182783,4,3,229,4.0,False,1,1,13.3497,0.673391,0.0595523,0.21882
objective_bc215f15,TERMINATED,172.22.6.17:61415,0.0462902,softmax,0.90776,0.757113,2,5,100,,True,7,1,58.0478,0.645202,0.0719769,0.207611
objective_648b63fe,TERMINATED,172.22.6.17:61482,0.628679,relu,0.085273,0.588163,6,3,212,4.0,False,2,1,11.5166,0.672039,0.0666041,0.208269
objective_0b844fd7,TERMINATED,172.22.6.17:61543,0.375845,softmax,0.697965,0.23676,0,7,116,4.0,True,5,1,9.9611,0.608558,0.0463435,0.209689


Trial name,c25_test_mean,c25_test_std,c25_val_mean,c25_val_std,c50_test_mean,c50_test_std,c50_val_mean,c50_val_std,c75_test_mean,c75_test_std,c75_val_mean,c75_val_std,cid_test_mean,cid_test_std,cid_val_mean,cid_val_std,ibs_test_mean,ibs_test_std,ibs_val_mean,ibs_val_std
objective_043d252b,0.678879,0.0552704,0.639984,0.0860326,0.656208,0.0522784,0.6153,0.0869365,0.646029,0.0467196,0.610123,0.0703765,0.64605,0.0439572,0.610783,0.0688766,0.215932,0.018959,0.216391,0.0181193
objective_0b844fd7,0.673258,0.078925,0.650717,0.0479487,0.636422,0.0736257,0.614947,0.0579674,0.622181,0.0589558,0.60785,0.047282,0.623211,0.0581361,0.608558,0.0463435,0.207631,0.0148313,0.209689,0.0178603
objective_51e59df6,0.713203,0.0540236,0.711649,0.0425979,0.682297,0.0514615,0.667469,0.0623138,0.672235,0.0461359,0.655041,0.0520775,0.670426,0.0436147,0.654147,0.049115,0.210722,0.0143235,0.21001,0.019789
objective_648b63fe,0.709195,0.0782581,0.714055,0.0786455,0.673179,0.0773063,0.68571,0.0789255,0.663684,0.0689185,0.67292,0.0688561,0.663237,0.0659941,0.672039,0.0666041,0.20313,0.0346302,0.208269,0.0286133
objective_6e53c33d,0.643375,0.0771019,0.671185,0.0967134,0.619726,0.0661429,0.650055,0.0848113,0.615582,0.0542137,0.635209,0.0717269,0.615771,0.0535665,0.634434,0.070114,0.211383,0.0194087,0.210854,0.0218649
objective_9b4f681d,0.731007,0.0745397,0.715075,0.0742314,0.688449,0.0707718,0.68333,0.0764123,0.678759,0.0646737,0.671313,0.0671108,0.677563,0.0616702,0.671308,0.0643397,0.193524,0.0224093,0.19138,0.0294823
objective_bc215f15,0.680888,0.0962485,0.67646,0.0931866,0.651346,0.0833305,0.656113,0.0833083,0.644984,0.0733179,0.644681,0.0728493,0.644674,0.0721893,0.645202,0.0719769,0.205295,0.0257345,0.207611,0.028248
objective_e71e5e39,0.727829,0.0691663,0.730945,0.0700315,0.681128,0.0676812,0.684932,0.0772153,0.670899,0.0576771,0.675345,0.061727,0.669506,0.0547609,0.673391,0.0595523,0.217432,0.0147534,0.21882,0.0214439


You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.
You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).
2024-08-18 11:23:21,174	INFO tune.py:1007 -- Wrote the latest version of all result files and experiment state to '/home/aarchetto/fpboost/results/gbsg_deephit_tune_optuna_experiment' in 0.0040s.
2024-08-18 11:23:21,177	INFO tune.py:1039 -- Total run time: 71.48 seconds (71.45 seconds for the tuning loop).
2024-08-18 11:23:21,185	INFO tune.py:614 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949
[I 2024-08-18 11:23:21,202] A new study created in memory with name: optuna


Training FPBoost on metabric...


0,1
Current time:,2024-08-18 11:28:33
Running for:,00:05:12.71
Memory:,8.0/23.5 GiB

Trial name,status,loc,params/alpha,params/heads_activat ion,params/l1_ratio,params/learning_rate,params/loglogistic_h eads,params/max_depth,params/n_estimators,params/patience,params/uniform_heads,params/weibull_heads,iter,total time (s),cid_val_mean,cid_val_std,ibs_val_mean
objective_31842179,TERMINATED,172.22.6.17:62137,0.0458101,relu,0.630732,0.238422,4,2,214,,False,5,1,310.734,0.631327,0.0329873,0.199754
objective_efc9ead9,TERMINATED,172.22.6.17:62193,0.951201,softmax,0.253615,0.26297,3,3,7,16.0,True,1,1,10.0626,0.608296,0.0371917,0.251185
objective_1f8becaf,TERMINATED,172.22.6.17:62251,0.546376,relu,0.0396917,0.0764276,4,6,48,,False,4,1,63.7634,0.629052,0.0347388,0.201287
objective_fbc8f1da,TERMINATED,172.22.6.17:62311,0.0443219,relu,0.69355,0.709177,1,6,50,16.0,True,5,1,25.9453,0.623218,0.0373597,0.241567
objective_68b70ad5,TERMINATED,172.22.6.17:62371,0.977423,softmax,0.041544,0.350754,0,6,211,16.0,False,0,1,13.4075,0.618345,0.0345503,0.241847
objective_217d0d13,TERMINATED,172.22.6.17:62432,0.798079,softmax,0.203463,0.931856,1,1,42,4.0,False,1,1,14.2244,0.623058,0.0186897,0.244992
objective_19dfd7ee,TERMINATED,172.22.6.17:62499,0.333074,relu,0.809955,0.0211925,5,1,20,4.0,True,5,1,13.3042,0.485404,0.0765142,0.241512
objective_df40aa6c,TERMINATED,172.22.6.17:62567,0.112348,softmax,0.972271,0.283713,4,2,249,4.0,True,6,1,29.5147,0.636571,0.031821,0.238431


Trial name,c25_test_mean,c25_test_std,c25_val_mean,c25_val_std,c50_test_mean,c50_test_std,c50_val_mean,c50_val_std,c75_test_mean,c75_test_std,c75_val_mean,c75_val_std,cid_test_mean,cid_test_std,cid_val_mean,cid_val_std,ibs_test_mean,ibs_test_std,ibs_val_mean,ibs_val_std
objective_19dfd7ee,0.508772,0.0841522,0.522725,0.0645456,0.499066,0.0877278,0.501203,0.075352,0.483134,0.0902705,0.488544,0.076862,0.481029,0.0906522,0.485404,0.0765142,0.243979,0.0110807,0.241512,0.0101031
objective_1f8becaf,0.623502,0.0463171,0.63163,0.0581617,0.62927,0.0308403,0.630948,0.040555,0.625306,0.0278433,0.629193,0.0375802,0.625846,0.0255097,0.629052,0.0347388,0.202201,0.014605,0.201287,0.0130774
objective_217d0d13,0.622607,0.0395075,0.619803,0.0309491,0.628004,0.0266832,0.621749,0.021731,0.626731,0.0303529,0.622259,0.0210081,0.627337,0.0285753,0.623058,0.0186897,0.245438,0.00988443,0.244992,0.0117903
objective_31842179,0.634635,0.0402302,0.625289,0.0509616,0.636353,0.0240382,0.631577,0.0385074,0.634629,0.0242592,0.631285,0.0357929,0.634785,0.021774,0.631327,0.0329873,0.198452,0.0142046,0.199754,0.0140963
objective_68b70ad5,0.623646,0.0412251,0.620621,0.0456523,0.625613,0.0302852,0.620577,0.0428458,0.618174,0.0300598,0.618479,0.0367014,0.617938,0.0284775,0.618345,0.0345503,0.239593,0.0186582,0.241847,0.0211434
objective_df40aa6c,0.655727,0.0413869,0.654698,0.0403765,0.645749,0.0326101,0.644451,0.0333538,0.638566,0.0319953,0.638212,0.0342866,0.637939,0.0297165,0.636571,0.031821,0.239558,0.0183845,0.238431,0.012414
objective_efc9ead9,0.639965,0.0636878,0.6367,0.0477572,0.622619,0.0579687,0.620852,0.0374395,0.610777,0.0625244,0.609813,0.0385288,0.60842,0.0616212,0.608296,0.0371917,0.252194,0.0162744,0.251185,0.0100709
objective_fbc8f1da,0.637774,0.051533,0.645487,0.0548411,0.636694,0.0359289,0.634542,0.0434411,0.628044,0.0334211,0.624819,0.03969,0.6268,0.0314944,0.623218,0.0373597,0.241248,0.0174882,0.241567,0.0134313


You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.
You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).
2024-08-18 11:28:33,919	INFO tune.py:1007 -- Wrote the latest version of all result files and experiment state to '/home/aarchetto/fpboost/results/metabric_deephit_tune_optuna_experiment' in 0.0053s.
2024-08-18 11:28:33,923	INFO tune.py:1039 -- Total run time: 312.74 seconds (312.71 seconds for the tuning loop).
2024-08-18 11:28:33,934	INFO tune.py:614 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949
[I 2024-08-18 11:28:33,959] A new study created in memory with name: optuna


Training FPBoost on support...


0,1
Current time:,2024-08-18 11:40:10
Running for:,00:11:36.69
Memory:,5.9/23.5 GiB

Trial name,status,loc,params/alpha,params/heads_activat ion,params/l1_ratio,params/learning_rate,params/loglogistic_h eads,params/max_depth,params/n_estimators,params/patience,params/uniform_heads,params/weibull_heads,iter,total time (s),cid_val_mean,cid_val_std,ibs_val_mean
objective_958c54f3,TERMINATED,172.22.6.17:65597,0.127039,softmax,0.17181,0.47967,7,2,106,,False,4,1,694.418,0.839964,0.0119523,0.166195
objective_c2bd553a,TERMINATED,172.22.6.17:65655,0.377708,relu,0.285955,0.0816155,5,3,91,16.0,True,5,1,389.113,0.843458,0.00964357,0.270708
objective_cd5350ff,TERMINATED,172.22.6.17:65713,0.584783,softmax,0.410242,0.601511,3,4,79,16.0,True,5,1,175.885,0.846114,0.006065,0.251525
objective_5f470a32,TERMINATED,172.22.6.17:65773,0.899798,softmax,0.745635,0.290829,2,6,10,,False,6,1,106.82,0.847586,0.00968725,0.257206
objective_3da463ca,TERMINATED,172.22.6.17:65839,0.840587,relu,0.955063,0.638311,1,1,219,4.0,True,5,1,27.6958,0.759729,0.0532192,0.245685
objective_bfde0790,TERMINATED,172.22.6.17:65903,0.849536,relu,0.623546,0.638254,0,5,216,16.0,False,2,1,59.6745,0.843718,0.0107137,0.185917
objective_c86dde8b,TERMINATED,172.22.6.17:65974,0.136646,softmax,0.963288,0.385488,1,2,94,16.0,False,6,1,290.472,0.845682,0.0087192,0.171899
objective_eee850f5,TERMINATED,172.22.6.17:66065,0.974193,relu,0.133335,0.322412,6,2,58,4.0,True,2,1,62.5257,0.835033,0.0157041,0.297334


Trial name,c25_test_mean,c25_test_std,c25_val_mean,c25_val_std,c50_test_mean,c50_test_std,c50_val_mean,c50_val_std,c75_test_mean,c75_test_std,c75_val_mean,c75_val_std,cid_test_mean,cid_test_std,cid_val_mean,cid_val_std,ibs_test_mean,ibs_test_std,ibs_val_mean,ibs_val_std
objective_3da463ca,0.874453,0.0300932,0.873562,0.0322854,0.802639,0.050017,0.803124,0.0444327,0.765945,0.0591485,0.766936,0.0519785,0.759294,0.0599194,0.759729,0.0532192,0.246271,0.0212069,0.245685,0.0252457
objective_5f470a32,0.915965,0.011361,0.916426,0.00775309,0.873447,0.00747525,0.874675,0.00874375,0.850907,0.00867306,0.851573,0.00988525,0.847044,0.00868922,0.847586,0.00968725,0.257207,0.0256047,0.257206,0.0256402
objective_958c54f3,0.903008,0.0172209,0.903144,0.017722,0.865575,0.00776556,0.866305,0.0121591,0.843601,0.00876245,0.844002,0.012105,0.839561,0.0086351,0.839964,0.0119523,0.16674,0.0168997,0.166195,0.021016
objective_bfde0790,0.914187,0.0116073,0.915194,0.00762788,0.870899,0.00932925,0.870242,0.0106589,0.848472,0.0100816,0.847473,0.0108474,0.844865,0.00982189,0.843718,0.0107137,0.184668,0.0260376,0.185917,0.0210569
objective_c2bd553a,0.921021,0.00867619,0.920407,0.00681397,0.87367,0.00893814,0.873263,0.00818325,0.848501,0.0100676,0.84813,0.00955592,0.844001,0.00998641,0.843458,0.00964357,0.271068,0.0199592,0.270708,0.0201682
objective_c86dde8b,0.917346,0.00965352,0.917778,0.00619368,0.872445,0.00865,0.873554,0.00819672,0.848994,0.0101995,0.850005,0.00891943,0.844865,0.0100746,0.845682,0.0087192,0.172099,0.027493,0.171899,0.0281564
objective_cd5350ff,0.914285,0.0078147,0.916077,0.00887557,0.87132,0.00797601,0.872726,0.00577466,0.84899,0.00899948,0.849919,0.00610201,0.845244,0.00863139,0.846114,0.006065,0.250997,0.0285286,0.251525,0.0258246
objective_eee850f5,0.912837,0.0115795,0.913157,0.012142,0.86348,0.0147449,0.864774,0.0149324,0.839307,0.0164981,0.839562,0.0153753,0.835024,0.0165476,0.835033,0.0157041,0.297048,0.0255513,0.297334,0.0252081


You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.
You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).
2024-08-18 11:40:10,652	INFO tune.py:1007 -- Wrote the latest version of all result files and experiment state to '/home/aarchetto/fpboost/results/support_deephit_tune_optuna_experiment' in 0.0038s.
2024-08-18 11:40:10,656	INFO tune.py:1039 -- Total run time: 696.72 seconds (696.68 seconds for the tuning loop).
2024-08-18 11:40:10,664	INFO tune.py:614 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949
[I 2024-08-18 11:40:10,681] A new study created in memory with name: optuna


Training FPBoost on veterans...


0,1
Current time:,2024-08-18 11:42:22
Running for:,00:02:11.41
Memory:,5.9/23.5 GiB

Trial name,status,loc,params/alpha,params/heads_activat ion,params/l1_ratio,params/learning_rate,params/loglogistic_h eads,params/max_depth,params/n_estimators,params/patience,params/uniform_heads,params/weibull_heads,iter,total time (s),cid_val_mean,cid_val_std,ibs_val_mean
objective_14ec9601,TERMINATED,172.22.6.17:72676,0.873433,relu,0.630345,0.201991,3,2,68,16.0,True,7,1,21.257,0.702496,0.0803405,0.391701
objective_4bafa406,TERMINATED,172.22.6.17:72725,0.490523,relu,0.59718,0.918018,5,4,49,16.0,True,5,1,14.5371,0.723047,0.0852726,0.430748
objective_e33d38ac,TERMINATED,172.22.6.17:72779,0.776682,relu,0.627779,0.18937,2,3,168,,False,7,1,125.193,0.700454,0.10725,0.14774
objective_15fe14d0,TERMINATED,172.22.6.17:72841,0.0376136,relu,0.00392843,0.264729,6,5,49,,True,1,1,18.801,0.698163,0.0972734,0.403233
objective_7f60b581,TERMINATED,172.22.6.17:72895,0.354271,relu,0.118558,0.180038,7,6,186,4.0,True,6,1,14.1726,0.673486,0.0624012,0.444317
objective_fafda5be,TERMINATED,172.22.6.17:72958,0.60712,softmax,0.245342,0.516367,2,3,21,,False,6,1,13.259,0.680995,0.101268,0.320644
objective_ee189bcc,TERMINATED,172.22.6.17:73013,0.832563,relu,0.559945,0.589234,3,3,151,4.0,True,4,1,10.5845,0.703226,0.0494312,0.420469
objective_febe0179,TERMINATED,172.22.6.17:73075,0.508599,relu,0.849634,0.145892,1,7,78,16.0,True,7,1,17.3063,0.668091,0.100265,0.426871


Trial name,c25_test_mean,c25_test_std,c25_val_mean,c25_val_std,c50_test_mean,c50_test_std,c50_val_mean,c50_val_std,c75_test_mean,c75_test_std,c75_val_mean,c75_val_std,cid_test_mean,cid_test_std,cid_val_mean,cid_val_std,ibs_test_mean,ibs_test_std,ibs_val_mean,ibs_val_std
objective_14ec9601,0.746388,0.118224,0.795202,0.116133,0.725189,0.110574,0.738041,0.108233,0.706321,0.0860732,0.71079,0.0890071,0.695464,0.084887,0.702496,0.0803405,0.39488,0.0733317,0.391701,0.0406926
objective_15fe14d0,0.708406,0.137772,0.774875,0.178418,0.6889,0.111285,0.720502,0.131193,0.69114,0.0632125,0.705149,0.101313,0.685193,0.0587538,0.698163,0.0972734,0.399377,0.0572676,0.403233,0.0515495
objective_4bafa406,0.720239,0.130665,0.812771,0.12263,0.700235,0.109995,0.746744,0.0980281,0.684101,0.0872474,0.730584,0.0925268,0.676782,0.0832881,0.723047,0.0852726,0.429905,0.0559241,0.430748,0.05481
objective_7f60b581,0.744225,0.0825206,0.739278,0.123468,0.703175,0.0957607,0.687586,0.0864743,0.693376,0.0796169,0.688251,0.065684,0.674889,0.0784422,0.673486,0.0624012,0.440311,0.0711464,0.444317,0.0638064
objective_e33d38ac,0.635402,0.140536,0.748004,0.162188,0.624439,0.124182,0.714421,0.137183,0.6334,0.0892019,0.708153,0.112854,0.629659,0.0849074,0.700454,0.10725,0.159197,0.0357619,0.14774,0.044903
objective_ee189bcc,0.787361,0.0992758,0.768542,0.0703193,0.753172,0.112549,0.725959,0.0756275,0.724978,0.0793919,0.71345,0.0547462,0.716991,0.08023,0.703226,0.0494312,0.412951,0.0707479,0.420469,0.0660487
objective_fafda5be,0.741785,0.181577,0.754513,0.190382,0.71143,0.185929,0.70637,0.139632,0.69412,0.136698,0.694413,0.109615,0.686704,0.129683,0.680995,0.101268,0.31681,0.0539642,0.320644,0.04248
objective_febe0179,0.722692,0.141838,0.742361,0.130633,0.677414,0.125394,0.698242,0.123985,0.656226,0.101543,0.680334,0.103767,0.644514,0.100669,0.668091,0.100265,0.421278,0.0644195,0.426871,0.0621192


You may want to consider increasing the `CheckpointConfig(num_to_keep)` or decreasing the frequency of saving checkpoints.
You can suppress this error by setting the environment variable TUNE_WARN_EXCESSIVE_EXPERIMENT_CHECKPOINT_SYNC_THRESHOLD_S to a smaller value than the current threshold (5.0).
2024-08-18 11:42:22,097	INFO tune.py:1007 -- Wrote the latest version of all result files and experiment state to '/home/aarchetto/fpboost/results/veterans_deephit_tune_optuna_experiment' in 0.0044s.
2024-08-18 11:42:22,101	INFO tune.py:1039 -- Total run time: 131.44 seconds (131.41 seconds for the tuning loop).
2024-08-18 11:42:22,110	INFO tune.py:614 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949
[I 2024-08-18 11:42:22,125] A new study created in memory with name: optuna


Training FPBoost on whas...


0,1
Current time:,2024-08-18 11:44:07
Running for:,00:01:45.81
Memory:,5.9/23.5 GiB

Trial name,status,loc,params/alpha,params/heads_activat ion,params/l1_ratio,params/learning_rate,params/loglogistic_h eads,params/max_depth,params/n_estimators,params/patience,params/uniform_heads,params/weibull_heads,iter,total time (s),cid_val_mean,cid_val_std,ibs_val_mean
objective_4f7d97e5,TERMINATED,172.22.6.17:74181,0.147158,softmax,0.723373,0.085551,0,3,56,16.0,False,4,1,19.8245,0.713407,0.0539287,0.20281
objective_ccadceff,TERMINATED,172.22.6.17:74232,0.75311,relu,0.796535,0.301171,6,2,92,,False,2,1,54.6644,0.752923,0.0523992,0.166172
objective_90010fbb,TERMINATED,172.22.6.17:74291,0.180616,relu,0.88173,0.208617,0,3,219,,True,5,1,99.5415,0.745418,0.0583954,0.174763
objective_cac9b08e,TERMINATED,172.22.6.17:74347,0.625238,relu,0.511721,0.936665,1,6,83,16.0,True,1,1,12.6486,0.732963,0.065417,0.201646
objective_6efb437d,TERMINATED,172.22.6.17:74400,0.261712,relu,0.275931,0.58853,4,6,199,,True,0,1,85.3753,0.713093,0.0564387,0.202483
objective_3828efa5,TERMINATED,172.22.6.17:74462,0.962965,softmax,0.653462,0.626868,5,4,243,16.0,False,7,1,25.8601,0.740928,0.0519408,0.200477
objective_69fdd71b,TERMINATED,172.22.6.17:74517,0.415532,relu,0.39497,0.802705,1,1,6,,False,7,1,10.8171,0.750053,0.0622439,0.167675
objective_f6ed7fef,TERMINATED,172.22.6.17:74579,0.177643,relu,0.585021,0.349091,3,6,159,4.0,False,2,1,11.3337,0.71847,0.0624159,0.213011


Trial name,c25_test_mean,c25_test_std,c25_val_mean,c25_val_std,c50_test_mean,c50_test_std,c50_val_mean,c50_val_std,c75_test_mean,c75_test_std,c75_val_mean,c75_val_std,cid_test_mean,cid_test_std,cid_val_mean,cid_val_std,ibs_test_mean,ibs_test_std,ibs_val_mean,ibs_val_std
objective_3828efa5,0.73013,0.0442217,0.738555,0.0542862,0.736346,0.0314687,0.744332,0.0450872,0.734004,0.0371632,0.743419,0.0511094,0.731018,0.0401101,0.740928,0.0519408,0.202609,0.0143982,0.200477,0.0111068
objective_4f7d97e5,0.722613,0.0672724,0.703388,0.0413983,0.733538,0.0554441,0.712102,0.0474338,0.732282,0.056179,0.714839,0.0527751,0.726611,0.058049,0.713407,0.0539287,0.201157,0.024388,0.20281,0.0164189
objective_69fdd71b,0.738535,0.0625118,0.742742,0.0593157,0.751623,0.0541111,0.752664,0.0565154,0.749046,0.0602356,0.751521,0.0613117,0.745658,0.0642253,0.750053,0.0622439,0.169476,0.0202114,0.167675,0.0182922
objective_6efb437d,0.715046,0.0308518,0.707489,0.0620437,0.722469,0.0385187,0.715308,0.0553727,0.715426,0.0418098,0.715467,0.0571659,0.712777,0.0436146,0.713093,0.0564387,0.203247,0.0143096,0.202483,0.0178346
objective_90010fbb,0.748932,0.0661283,0.73279,0.0705882,0.757758,0.0536357,0.743474,0.0586248,0.757565,0.0553417,0.746338,0.0601548,0.755293,0.0549029,0.745418,0.0583954,0.171945,0.0169052,0.174763,0.0192942
objective_cac9b08e,0.710925,0.0740448,0.724653,0.0667755,0.72209,0.0601907,0.734161,0.0594376,0.718764,0.0693276,0.734843,0.063161,0.718396,0.0660389,0.732963,0.065417,0.202178,0.0126567,0.201646,0.0131505
objective_ccadceff,0.733755,0.0632353,0.73466,0.0508425,0.742663,0.0528474,0.752559,0.0497361,0.740004,0.0609799,0.754195,0.052528,0.737683,0.0586882,0.752923,0.0523992,0.173077,0.0298953,0.166172,0.0274646
objective_f6ed7fef,0.735445,0.0544197,0.712427,0.0661281,0.737037,0.0446968,0.723318,0.054378,0.731417,0.0487122,0.718403,0.0610318,0.729244,0.051358,0.71847,0.0624159,0.208924,0.0325475,0.213011,0.0547398


2024-08-18 11:44:07,944	INFO tune.py:1007 -- Wrote the latest version of all result files and experiment state to '/home/aarchetto/fpboost/results/whas_deephit_tune_optuna_experiment' in 0.0035s.
2024-08-18 11:44:07,947	INFO tune.py:1039 -- Total run time: 105.84 seconds (105.81 seconds for the tuning loop).


In [19]:
fpboost_results = []
for file in os.listdir(RESULTS_PATH):
    if file.endswith("_tune_optuna_experiment.csv"):
        df = pd.read_csv(os.path.join(RESULTS_PATH, file))
        fpboost_results.append(df)

df = pd.concat(fpboost_results)
df["model"] = "fpboost"
df = df.dropna(axis=1, how="any")
df.columns = df.columns.str.replace("config/", "")
df.reset_index(drop=True, inplace=True)

# Select the best hyperparameters for each dataset accrding to the C-Index - IBS difference on the validation set
df["sel_col"] = df["cid_val_mean"] - df["ibs_val_mean"]
idx = df.groupby(["dataset", "model"])["sel_col"].idxmax()
df = df.loc[idx]
assert df.groupby(["dataset", "model"]).size().eq(1).all()

df = df.drop(
    columns=[
        "sel_col",
        "timestamp",
        "time_since_restore",
        "pid",
        "time_total_s",
        "date",
        "training_iteration",
        "time_this_iter_s",
        "done",
        "hostname",
        "node_ip",
        "iterations_since_restore",
        "experiment_tag",
    ]
)

FPBOOST_RESULTS_FILE = os.path.join(RESULTS_PATH, "fpboost_results.csv")

df.to_csv(FPBOOST_RESULTS_FILE, index=False)

### Results Collection

Load the results returned by the previous cells.

In [20]:
baseline_results_df = pd.read_csv(BASELINE_RESULTS_FILE)
fpboost_results_df = pd.read_csv(FPBOOST_RESULTS_FILE)

results_df = pd.concat([baseline_results_df, fpboost_results_df])
results_df.sort_values(["dataset", "model"], inplace=True)
results_df.to_csv(os.path.join(RESULTS_PATH, "results.csv"), index=False)