# Introduction to the xgbsurv package

This notebook demonstrates how to use `xgbsurv` using cross validation from scikit-learn. It structured by the following steps:

- Load data
- Load model
- Fit model
- Predict and evaluate model

The syntax conveniently follows that of sklearn.

In [1]:
from xgbsurv.datasets import load_metabric
from xgbsurv.models.breslow_final import breslow_likelihood
from xgbsurv import XGBSurv
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.pipeline import Pipeline
from sklearn.metrics import make_scorer
#from sksurv.metrics import concordance_index_censored
from xgbsurv.evaluation import cindex_censored, ibs
import numpy as np
import pandas as pd
from scipy.stats import uniform as scuniform
from scipy.stats import randint as scrandint
from scipy.stats import loguniform as scloguniform 
%load_ext autoreload
%autoreload 2


## Load Data

In [2]:
data, target = load_metabric(path="/Users/JUSC/Documents/xgbsurv/xgbsurv/datasets/data/", as_frame=False)
# stratification on event type
target_sign = np.sign(target)
X_train, X_test, y_train, y_test = train_test_split(data, target, stratify=target_sign)

In [3]:
# Set Hyperparameter Space

param_grid = {
'alpha': scloguniform(1e-10,1),#[1e-10,1], # from hyp augmentation, L1 regularization
'reg_lambda': scloguniform(1e-10,1), #[1e-10,1], #alias l2_regularization, lambda in augmentation
'learning_rate': scloguniform(0.001,1), #[0.001,1], # assumed alias eta from augmentation,
'n_estimators':  scrandint(1,100), # corresponds to num_rounds
'gamma': scuniform(0.1,1-0.1),#[0.1,1], # minimum loss reduction required to make a further partition on a leaf node of the tree.
'colsample_bylevel': scuniform(0.1, 1-0.1), #[0.1,1], # from hyp augmentation
'colsample_bynode': scuniform(0.1, 1-0.1), #[0.1,1], # from hyp augmentation, uniform(0.1,1),
'colsample_bytree': scuniform(0.5, 1-0.5),#[0.5,1], # from hyp augmentation, seems to exceed the bound, uniform(0.5,1)
'max_depth': scrandint(1,20),#[1,20], # from hyp augmentation
'max_delta_step': scrandint(0,10),#[0,10], # from hyp augmentation
'min_child_weight' : scloguniform(0.1,20-0.1),#[0.1,20], # from hyp augmentation
'subsample': scuniform(0.01,1-0.01),#[0.01,1], # from hyp augmentation
}

## Load Model - Cross Validation without Early Stopping

In [4]:
# xgbsurv_pipe = Pipeline(steps=[
#                      ('xgbsurv',XGBSurv(objective="breslow_objective",
#                                              eval_metric="breslow_loss",
#                                              random_state=8, disable_default_eval_metric=True ))    
#                                              ])
model = XGBSurv(objective="breslow_objective",eval_metric="breslow_loss",
                                             random_state=8, disable_default_eval_metric=True )
#scoring function form of score_func(y, y_pred)
# -1 means using all processors.
grid_search = RandomizedSearchCV(model, param_grid, scoring = make_scorer(breslow_likelihood, greater_is_better=False), n_jobs=-1, cv=10, n_iter=40)



## Fit Model

In [5]:
grid_result = grid_search.fit(X_train, y_train)

In [6]:
pd.DataFrame(grid_result.cv_results_).describe()

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,split5_test_score,split6_test_score,split7_test_score,split8_test_score,split9_test_score,mean_test_score,std_test_score,rank_test_score
count,40.0,40.0,40.0,40.0,40.0,40.0,40.0,40.0,40.0,40.0,40.0,40.0,40.0,40.0,40.0,40.0,40.0
mean,0.084516,0.019231,0.014207,0.005427,-4.639719,-4.8638,-4.618429,-4.805938,-4.623692,-4.872388,-4.969491,-4.843168,-4.785771,-4.886405,-4.79088,0.262858,20.5
std,0.177165,0.068524,0.071371,0.029056,2.562232,2.553611,1.667517,2.275549,2.109226,2.485533,2.831186,3.054724,2.441867,2.67129,2.414811,0.568778,11.690452
min,0.00734,0.001709,0.00047,1.4e-05,-18.705285,-16.372307,-11.849984,-14.023804,-14.203815,-16.171601,-19.172601,-21.774283,-16.199625,-18.771677,-16.724498,0.048481,1.0
25%,0.028706,0.003431,0.000664,5.7e-05,-3.907049,-4.07906,-4.10481,-4.043592,-3.932945,-4.143065,-4.083507,-4.028489,-4.011417,-4.067268,-4.032347,0.061554,10.75
50%,0.051478,0.007024,0.000761,0.000266,-3.897675,-3.98076,-4.029819,-4.017801,-3.902086,-4.048994,-4.064031,-3.9189,-3.938437,-4.055156,-3.984901,0.063663,20.5
75%,0.081724,0.010723,0.00093,0.000467,-3.880365,-3.973612,-4.019735,-3.999365,-3.89377,-4.04414,-4.043168,-3.915477,-3.933196,-4.035829,-3.973354,0.089442,30.25
max,1.141923,0.439931,0.445161,0.183568,-3.846123,-3.947119,-3.989037,-3.972305,-3.856155,-4.035679,-4.010262,-3.904973,-3.890445,-3.941707,-3.960424,2.787861,40.0


## Train Model with Best Parameters

In [7]:
best_params = grid_result.best_estimator_
p = best_params.get_params()
bmodel = XGBSurv(**p)
bmodel.fit(X_train, y_train)

In [8]:
bmodel.save_model("best_cv_model.json")



## Predict

In [9]:
preds_train = bmodel.predict(X_train, output_margin=True)
preds_test = bmodel.predict(X_test, output_margin=True)

## Evaluation

In [10]:
# train
cindex_censored(y_train, preds_train)

0.6649808870518152

In [11]:
# test
cindex_censored(y_test, preds_test)

0.6278086707093073

## Cross Validation with Early Stopping


In [12]:
# Set Hyperparameter Space

param_grid = {
'early_stopping_rounds': scrandint(1,20),
'alpha': scloguniform(1e-10,1),#[1e-10,1], # from hyp augmentation, L1 regularization
'reg_lambda': scloguniform(1e-10,1), #[1e-10,1], #alias l2_regularization, lambda in augmentation
'learning_rate': scloguniform(0.001,1), #[0.001,1], # assumed alias eta from augmentation,
'n_estimators':  scrandint(1,100), # corresponds to num_rounds
'gamma': scuniform(0.1,1-0.1),#[0.1,1], # minimum loss reduction required to make a further partition on a leaf node of the tree.
'colsample_bylevel': scuniform(0.1, 1-0.1), #[0.1,1], # from hyp augmentation
'colsample_bynode': scuniform(0.1, 1-0.1), #[0.1,1], # from hyp augmentation, uniform(0.1,1),
'colsample_bytree': scuniform(0.5, 1-0.5),#[0.5,1], # from hyp augmentation, seems to exceed the bound, uniform(0.5,1)
'max_depth': scrandint(1,20),#[1,20], # from hyp augmentation
'max_delta_step': scrandint(0,10),#[0,10], # from hyp augmentation
'min_child_weight' : scloguniform(0.1,20-0.1),#[0.1,20], # from hyp augmentation
'subsample': scuniform(0.01,1-0.01),#[0.01,1], # from hyp augmentation
}

In [14]:
model = XGBSurv(objective="breslow_objective",eval_metric="breslow_loss",
                                             random_state=8, disable_default_eval_metric=True, early_stopping_rounds=10)
#scoring function form of score_func(y, y_pred)
# -1 means using all processors.
grid_search = RandomizedSearchCV(model, param_grid, scoring = make_scorer(breslow_likelihood, greater_is_better=False), n_jobs=-1, cv=10, n_iter=40)

grid_result = grid_search.fit(X_train, y_train, eval_test_size=0.1)

[0]	validation_0-breslow_likelihood:6.12623	validation_1-breslow_likelihood:3.73119
[1]	validation_0-breslow_likelihood:6.12572	validation_1-breslow_likelihood:3.73102
[2]	validation_0-breslow_likelihood:6.12510	validation_1-breslow_likelihood:3.73077
[0]	validation_0-breslow_likelihood:6.08129	validation_1-breslow_likelihood:3.92599
[3]	validation_0-breslow_likelihood:6.12467	validation_1-breslow_likelihood:3.73081
[1]	validation_0-breslow_likelihood:6.08068	validation_1-breslow_likelihood:3.92578
[2]	validation_0-breslow_likelihood:6.08065	validation_1-breslow_likelihood:3.92577
[3]	validation_0-breslow_likelihood:6.08025	validation_1-breslow_likelihood:3.92555
[4]	validation_0-breslow_likelihood:6.12409	validation_1-breslow_likelihood:3.73070
[4]	validation_0-breslow_likelihood:6.07966	validation_1-breslow_likelihood:3.92531
[5]	validation_0-breslow_likelihood:6.07944	validation_1-breslow_likelihood:3.92509
[6]	validation_0-breslow_likelihood:6.07901	validation_1-breslow_likelihood:

## Fit Best Model

In [15]:
grid_result.best_estimator_.fit(X_train, y_train, eval_test_size=0.1)

[0]	validation_0-breslow_likelihood:6.17761	validation_1-breslow_likelihood:3.99178
[1]	validation_0-breslow_likelihood:6.17730	validation_1-breslow_likelihood:3.99209
[2]	validation_0-breslow_likelihood:6.17628	validation_1-breslow_likelihood:3.99127
[3]	validation_0-breslow_likelihood:6.17568	validation_1-breslow_likelihood:3.99107
[4]	validation_0-breslow_likelihood:6.17492	validation_1-breslow_likelihood:3.99105
[5]	validation_0-breslow_likelihood:6.17408	validation_1-breslow_likelihood:3.99080
[6]	validation_0-breslow_likelihood:6.17308	validation_1-breslow_likelihood:3.98983
[7]	validation_0-breslow_likelihood:6.17253	validation_1-breslow_likelihood:3.98884
[8]	validation_0-breslow_likelihood:6.17185	validation_1-breslow_likelihood:3.98821
[9]	validation_0-breslow_likelihood:6.17041	validation_1-breslow_likelihood:3.98663
[10]	validation_0-breslow_likelihood:6.17007	validation_1-breslow_likelihood:3.98686
[11]	validation_0-breslow_likelihood:6.16935	validation_1-breslow_likelihoo

In [16]:
preds_train = bmodel.predict(X_train, output_margin=True)
preds_test = bmodel.predict(X_test, output_margin=True)

In [17]:
# train
cindex_censored(y_train, preds_train)

0.6649808870518152

In [18]:
# test
cindex_censored(y_test, preds_test)

0.6278086707093073