# `auton-survival` Cross Validation Survival Regression

`auton-survival` offers a simple to use API to train Survival Regression Models that performs cross validation model selection by minimizing integrated brier score. In this notebook we demonstrate the use of `auton-survival` to train survival models on the *SUPPORT* dataset in cross validation fashion.

In [1]:
#import sys

#sys.path.append('../')
from auton_survival import datasets
outcomes, features = datasets.load_support()

In [2]:
from auton_survival.preprocessing import Preprocessor

cat_feats = ['sex', 'dzgroup', 'dzclass', 'income', 'race', 'ca']
num_feats = ['age', 'num.co', 'meanbp', 'wblc', 'hrt', 'resp', 
             'temp', 'pafi', 'alb', 'bili', 'crea', 'sod', 'ph', 
             'glucose', 'bun', 'urine', 'adlp', 'adls']

# Data should be processed in a fold-independent manner when performing cross-validation. 
# For simplicity in this demo, we process the dataset in a non-independent manner.
preprocessor = Preprocessor(cat_feat_strat='ignore', num_feat_strat= 'mean') 
x = preprocessor.fit_transform(features, cat_feats=cat_feats, num_feats=num_feats,
                                one_hot=True, fill_value=-1)

In [3]:
x


Unnamed: 0,age,num.co,meanbp,wblc,hrt,resp,temp,pafi,alb,bili,...,dzclass_Coma,income_$25-$50k,income_>$50k,income_under $11k,race_black,race_hispanic,race_other,race_white,ca_no,ca_yes
0,0.012772,-1.390013,0.449837,-6.931821e-01,-0.892283,-0.138967,-0.881504,1.569019e+00,-1.655686e+00,-5.238337e-01,...,False,False,False,False,False,False,True,False,False,False
1,-0.148262,0.097711,-1.500702,5.187096e-01,0.470382,1.114591,-2.005013,-1.495658e+00,-6.389701e-16,9.880260e-17,...,False,False,False,False,False,False,False,True,True,False
2,-0.635153,0.097711,-0.525432,-4.201759e-01,-0.290175,0.487812,0.235766,-8.319880e-02,-6.389701e-16,-7.892740e-02,...,False,False,False,True,False,False,False,True,True,False
3,-1.299688,0.097711,-0.344827,-3.546971e-01,-0.290175,0.905665,-1.680444,-3.003564e-16,-6.389701e-16,9.880260e-17,...,False,False,False,True,False,False,False,True,False,False
4,1.105258,-0.646151,-0.922764,1.258366e-01,0.470382,-0.347893,0.635237,-6.997670e-01,-6.389701e-16,9.880260e-17,...,False,False,False,False,False,False,False,True,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9100,0.219471,-0.646151,0.883291,-5.403626e-01,0.216863,-0.138967,-1.124933,4.276908e-01,1.077955e+00,-4.793410e-01,...,False,False,False,False,False,False,False,True,True,False
9101,-0.480786,-0.646151,-1.500702,1.939826e-16,-3.078884,-1.601451,1.190748,-2.222321e-01,-6.389701e-16,9.880260e-17,...,True,False,False,False,False,False,False,True,True,False
9102,0.495813,-0.646151,0.955533,-4.312668e-01,-0.448624,0.069960,-0.325992,-6.290945e-01,-6.389701e-16,-4.793410e-01,...,False,False,False,False,False,False,False,True,True,False
9103,-1.002436,-0.646151,0.522080,-5.185008e-01,0.407002,0.069960,-0.563174,1.997678e+00,-2.518775e+00,-4.793410e-01,...,False,False,False,False,False,False,False,True,False,True


In [4]:
import numpy as np
times = np.quantile(outcomes.time[outcomes.event==1], [0.25, 0.5, 0.75]).tolist()

In [5]:
from auton_survival.experiments import SurvivalRegressionCV

param_grid = {'k' : [3],
              'distribution' : ['Weibull'],
              'learning_rate' : [1e-4, 1e-3],
              'layers' : [[100]]}

experiment = SurvivalRegressionCV(model='dsm', num_folds=3, hyperparam_grid=param_grid, random_seed=0)
model = experiment.fit(x, outcomes, times, metric='brs')

At hyper-param {'distribution': 'Weibull', 'k': 3, 'layers': [100], 'learning_rate': 0.0001}
At fold: 0


TypeError: can't convert np.ndarray of type numpy.object_. The only supported types are: float64, float32, float16, complex64, complex128, int64, int32, int16, int8, uint8, and bool.

In [7]:
print(experiment.folds)
model

[2 2 0 ... 0 0 0]


NameError: name 'model' is not defined

In [6]:
out_risk = model.predict_risk(x, times)
out_survival = model.predict_survival(x, times)

NameError: name 'model' is not defined

In [None]:
from auton_survival.metrics import survival_regression_metric

for fold in set(experiment.folds):
    print(survival_regression_metric('brs', outcomes[experiment.folds==fold], 
                                     out_survival[experiment.folds==fold], 
                                     times=times))

In [None]:
from auton_survival.metrics import survival_regression_metric

for fold in set(experiment.folds):
    print(survival_regression_metric('ctd', outcomes[experiment.folds==fold], 
                                     out_survival[experiment.folds==fold], 
                                     times=times))

In [None]:
for fold in set(experiment.folds):
    for time in times:
        print(time)