In [5]:
import pandas as pd
import numpy as np
from numpy import savetxt
from xgbsurv.datasets import (load_metabric, load_flchain, load_rgbsg, load_support, load_tcga)
from xgbsurv.models.utils import sort_X_y_pandas, transform_back, transform
from xgbsurv.models.efron_final import efron_likelihood, get_cumulative_hazard_function_efron
import torch
from torch import nn
from sklearn.metrics import make_scorer
from sklearn.model_selection import RandomizedSearchCV, StratifiedKFold, train_test_split
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.preprocessing import StandardScaler, LabelEncoder, LabelBinarizer, OneHotEncoder
from sklearn.compose import make_column_transformer, make_column_selector
from sklearn.decomposition import PCA
from loss_functions_pytorch import EfronLoss, efron_likelihood_torch
from skorch import NeuralNet
from skorch.callbacks import EarlyStopping, Callback, LRScheduler
import skorch.callbacks
from sklearn.model_selection import ShuffleSplit
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.model_selection import check_cv
from numbers import Number
import torch.utils.data

from skorch.utils import flatten
from skorch.utils import is_pandas_ndframe
from skorch.utils import check_indexing
from skorch.utils import multi_indexing
from skorch.utils import to_numpy
from skorch.dataset import get_len
from skorch.dataset import ValidSplit
from pycox.evaluation import EvalSurv
from scipy.stats import uniform as scuniform
from scipy.stats import randint as scrandint
from scipy.stats import loguniform as scloguniform
import random
import os
#torch.set_default_dtype(torch.float64)
#torch.set_default_tensor_type(torch.DoubleTensor)

## Set Parameters

In [6]:
# set parameters, put into function
n_outer_splits = 5
n_inner_splits = 5
rand_state = 42
n_iter = 50
early_stopping_rounds=10
base_score = 0.0

# set seed for scipy
np.random.seed(rand_state)

param_grid = {
    'estimator__module__n_layers': [1, 2, 4],
    'estimator__module__num_nodes': [64, 128, 256, 512],
    'estimator__module__dropout': scuniform(0.0,0.7),
    'estimator__optimizer__weight_decay': [0.4, 0.2, 0.1, 0.05, 0.02, 0.01, 0],
    'estimator__batch_size': [64, 128, 256, 512, 1024],
    #lr not in paper because of learning rate finder
    # note: setting learning rate higher would make exp(partial_hazard) explode
    #'estimator__lr': scloguniform(0.001,0.01), # scheduler unten einbauen
    # use callback instead
    'estimator__lr':[0.01],
    'estimator__max_epochs':  scrandint(150,250) # corresponds to num_rounds
}

## Set Seed

In [7]:
def seed_torch(seed=rand_state):
    """Sets all seeds within torch and adjacent libraries.

    Args:
        seed: Random seed to be used by the seeding functions.

    Returns:
        None
    """
    random.seed(seed)
    #os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    #torch.backends.cudnn.benchmark = False
    #torch.backends.cudnn.deterministic = True
    return None


class FixSeed(Callback):
    def __init__(self, seed):
        self.seed = seed

    def initialize(self):
        seed_torch(self.seed)
        return super().initialize()

## Set Loss Function

In [8]:
# Define Scorer
def custom_scoring_function(y_true, y_pred):

        #y_true = torch.from_numpy(y_true)
        if isinstance(y_pred, np.ndarray):
            y_pred = torch.from_numpy(y_pred)
        if isinstance(y_true, np.ndarray):
            y_true = torch.from_numpy(y_true)
        if isinstance(y_pred, pd.Series):
            y_pred = torch.tensor(y_pred.values)
        if isinstance(y_true, pd.Series):
            y_true = torch.tensor(y_true.values)
        score = efron_likelihood_torch(y_true, y_pred) #.to(torch.float32)
        return score.numpy()

scoring_function = make_scorer(custom_scoring_function, greater_is_better=False)

## Set Torch Model

In [9]:

class SurvivalModel(nn.Module):
    def __init__(self, n_layers, input_units, num_nodes, dropout, out_features):
        super(SurvivalModel, self).__init__()
        self.n_layers = n_layers
        self.in_features = input_units
        self.num_nodes = num_nodes
        self.dropout = dropout
        self.out_features = out_features
        model = []
        # first layer
        model.append(torch.nn.Linear(input_units, num_nodes))
        model.append(torch.nn.ReLU())
        model.append(torch.nn.Dropout(dropout))
        model.append(torch.nn.BatchNorm1d(num_nodes))

        for i in range(n_layers-1):
            model.append(torch.nn.Linear(num_nodes, num_nodes))
            #init.kaiming_normal_(model[-1].weight, nonlinearity='relu')
            model.append(torch.nn.ReLU())
            model.append(torch.nn.Dropout(dropout))
            model.append(torch.nn.BatchNorm1d(num_nodes))

        # output layer
        model.append(torch.nn.Linear(num_nodes, out_features))
    
        self.layers = nn.Sequential(*model)

        # for layer in self.layers:
        #     if isinstance(layer, nn.Linear):
        #         #nn.init.uniform_(layer.weight, a=-0.5, b=0.5)
        #         nn.init.kaiming_normal_(layer.weight)


    def forward(self, X):
        X = X.to(torch.float32)
        res = self.layers(X)
        #print(res)
        return res


## Set up Scaler

In [10]:
class CustomStandardScaler(StandardScaler):
    
    def __init__(self, copy=True, with_mean=True, with_std=True):
        super().__init__(copy=copy, with_mean=with_mean, with_std=with_std)
        
    def fit(self, X, y=None):
        return super().fit(X, y)
    
    def transform(self, X, y=None):
        X_transformed = super().transform(X, y)
        return X_transformed.astype(np.float32)
    
    def fit_transform(self, X, y=None):
        X_transformed = super().fit_transform(X, y)
        return X_transformed.astype(np.float32)

## Custom Split

In [11]:


# Define stratified inner k-fold cross-validation
class CustomSplit(StratifiedKFold):
    def __init__(self, n_splits=2, shuffle=True, random_state=rand_state):
        super().__init__(n_splits=n_splits, shuffle=shuffle, random_state=random_state)

    def split(self, X, y, groups=None):
        print('split', X.dtypes)
        try:
            if y.shape[1]>1:
                y = y[:,0]
        except:
            pass
        bins = np.sign(y)
        return super().split(X, bins, groups=groups)

    def get_n_splits(self, X=None, y=None, groups=None):
        return self.n_splits

outer_custom_cv = CustomSplit(n_splits=n_outer_splits, shuffle=True, random_state=rand_state)
inner_custom_cv = CustomSplit(n_splits=n_outer_splits, shuffle=True, random_state=rand_state)



## Custom Valid Split

In [12]:



class CustomStandardScaler(StandardScaler):
    
    def __init__(self, copy=True, with_mean=True, with_std=True):
        super().__init__(copy=copy, with_mean=with_mean, with_std=with_std)
        
    def fit(self, X, y=None):
        return super().fit(X, y)
    
    def transform(self, X, y=None):
        X_transformed = super().transform(X, y)
        return X_transformed.astype(np.float32)
    
    def fit_transform(self, X, y=None):
        X_transformed = super().fit_transform(X, y)
        return X_transformed.astype(np.float32)
    
class CustomValidSplit():

    def __init__(
            self,
            cv=5,
            stratified=False,
            random_state=None,
    ):
        self.stratified = stratified
        self.random_state = random_state

        if isinstance(cv, Number) and (cv <= 0):
            raise ValueError("Numbers less than 0 are not allowed for cv "
                             "but ValidSplit got {}".format(cv))

        if not self._is_float(cv) and random_state is not None:
            raise ValueError(
                "Setting a random_state has no effect since cv is not a float. "
                "You should leave random_state to its default (None), or set cv "
                "to a float value.",
            )

        self.cv = cv

    def _is_stratified(self, cv):
        return isinstance(cv, (StratifiedKFold, StratifiedShuffleSplit))

    def _is_float(self, x):
        if not isinstance(x, Number):
            return False
        return not float(x).is_integer()

    def _check_cv_float(self):
        cv_cls = StratifiedShuffleSplit if self.stratified else ShuffleSplit
        return cv_cls(test_size=self.cv, random_state=self.random_state)

    def _check_cv_non_float(self, y):
        return check_cv(
            self.cv,
            y=y,
            classifier=self.stratified,
        )

    def check_cv(self, y):
        """Resolve which cross validation strategy is used."""
        y_arr = None
        if self.stratified:
            # Try to convert y to numpy for sklearn's check_cv; if conversion
            # doesn't work, still try.
            try:
                y_arr = to_numpy(y)
            except (AttributeError, TypeError):
                y_arr = y

        if self._is_float(self.cv):
            return self._check_cv_float()
        return self._check_cv_non_float(y_arr)

    def _is_regular(self, x):
        return (x is None) or isinstance(x, np.ndarray) or is_pandas_ndframe(x)

    def __call__(self, dataset, y=None, groups=None):
        # key change here
        y = np.sign(y)
        bad_y_error = ValueError(
            "Stratified CV requires explicitly passing a suitable y.")
        if (y is None) and self.stratified:
            raise bad_y_error

        cv = self.check_cv(y)
        if self.stratified and not self._is_stratified(cv):
            raise bad_y_error

        # pylint: disable=invalid-name
        len_dataset = get_len(dataset)
        if y is not None:
            len_y = get_len(y)
            if len_dataset != len_y:
                raise ValueError("Cannot perform a CV split if dataset and y "
                                 "have different lengths.")

        args = (np.arange(len_dataset),)
        if self._is_stratified(cv):
            args = args + (to_numpy(y),)

        idx_train, idx_valid = next(iter(cv.split(*args, groups=groups)))
        dataset_train = torch.utils.data.Subset(dataset, idx_train)
        dataset_valid = torch.utils.data.Subset(dataset, idx_valid)
        return dataset_train, dataset_valid


## Input Shape Setter

In [13]:
class InputShapeSetter(skorch.callbacks.Callback):
    def on_train_begin(self, net, X, y):
        net.set_params(module__input_units=X.shape[-1])

## Setting Training Procedure

In [10]:

def train_eval(X, y, net, n_iter, filename):
        model = 'skorch_efron_'
        dataset_name = filename.split('_')[0]
        # add IBS later
        outer_scores = {'cindex_train_'+dataset_name:[], 'cindex_test_'+dataset_name:[],
                        'ibs_train_'+dataset_name:[], 'ibs_test_'+dataset_name:[]}
        best_params = {'best_params_'+dataset_name:[]}
        best_model = {'best_model_'+dataset_name:[]}
        ct = make_column_transformer(
                (StandardScaler(), make_column_selector(dtype_include=['float32'])),
                #(OneHotEncoder(sparse_output=False,handle_unknown='infrequent_if_exist'), make_column_selector(dtype_include=['category', 'object'])),
                remainder='passthrough')

        pipe = Pipeline([('scaler',ct),
                        ('estimator', net)])
        rs = RandomizedSearchCV(pipe, param_grid, scoring = scoring_function, n_jobs=-1, 
                                    n_iter=n_iter, refit=True, random_state=rand_state)
        for i, (train_index, test_index) in enumerate(outer_custom_cv.split(X, y)):
                # Split data into training and testing sets for outer fold
                X_train, X_test = X.iloc[train_index], X.iloc[test_index]
                y_train, y_test = y.iloc[train_index], y.iloc[test_index]
                X_train, y_train = sort_X_y_pandas(X_train, y_train)
                X_test, y_test = sort_X_y_pandas(X_test, y_test)

                #print(X_train.shape, type(X_train))
                #print(y_train.shape, type(y_train))
                #print(X_test.shape, type(X_test))
                #print(y_test.shape, type(y_test))
                
                # save splits and data
                savetxt('splits/'+model+'train_index_'+str(i)+'_'+filename, train_index, delimiter=',')
                savetxt('splits/'+model+'test_index_'+str(i)+'_'+filename, test_index, delimiter=',')
                
                #savetxt('splits/X_train_'+str(i)+'_'+filename, X_train, delimiter=',')
                #savetxt('splits/X_test_'+str(i)+'_'+filename, X_test, delimiter=',')

                #savetxt('splits/y_train_'+str(i)+'_'+filename, y_train, delimiter=',')
                #savetxt('splits/y_test_'+str(i)+'_'+filename, y_test, delimiter=',')


                rs.fit(X_train, y_train)
                best_preds_train = rs.best_estimator_.predict(X_train)
                best_preds_test = rs.best_estimator_.predict(X_test)

                savetxt('predictions/'+model+'best_preds_train_'+str(i)+'_'+filename, best_preds_train, delimiter=',')
                savetxt('predictions/'+model+'best_preds_test_'+str(i)+'_'+filename, best_preds_test, delimiter=',')

                # save hyperparameter settings
                params = rs.best_estimator_.get_params()
                best_params['best_params_'+dataset_name] += [rs.best_params_]
                best_model['best_model_'+dataset_name] += [params]
                try:
                    cum_hazard_train = get_cumulative_hazard_function_efron(
                            X_train.values, X_train.values, y_train.values, y_train.values,
                            best_preds_train.reshape(-1), best_preds_train.reshape(-1)
                            )

                    df_survival_train = np.exp(-cum_hazard_train)
                    durations_train, events_train = transform_back(y_train.values)
                    time_grid_train = np.linspace(durations_train.min(), durations_train.max(), 100)
                    ev = EvalSurv(df_survival_train, durations_train, events_train, censor_surv='km')
                    print('Concordance Index',ev.concordance_td('antolini'))
                    print('Integrated Brier Score:',ev.integrated_brier_score(time_grid_train))
                    cindex_score_train = ev.concordance_td('antolini')
                    ibs_score_train = ev.integrated_brier_score(time_grid_train)

                    outer_scores['cindex_train_'+dataset_name] += [cindex_score_train]
                    outer_scores['ibs_train_'+dataset_name] += [ibs_score_train]

                except:
                    outer_scores['cindex_train_'+dataset_name] += [np.nan]
                    outer_scores['ibs_train_'+dataset_name] += [np.nan]
                    
                try:
                    cum_hazard_test = get_cumulative_hazard_function_efron(
                            X_train.values, X_test.values, y_train.values, y_test.values,
                            best_preds_train.reshape(-1), best_preds_test.reshape(-1)
                            )
                    df_survival_test = np.exp(-cum_hazard_test)
                    durations_test, events_test = transform_back(y_test.values)
                    print('durations',durations_test.min(), durations_test.max())
                    time_grid_test = np.linspace(durations_test.min(), durations_test.max(), 100)
                    ev = EvalSurv(df_survival_test, durations_test, events_test, censor_surv='km')
                    print('Concordance Index',ev.concordance_td('antolini'))
                    print('Integrated Brier Score:',ev.integrated_brier_score(time_grid_test))
                    cindex_score_test = ev.concordance_td('antolini')
                    ibs_score_test = ev.integrated_brier_score(time_grid_test)

                    outer_scores['cindex_test_'+dataset_name] += [cindex_score_test]
                    outer_scores['ibs_test_'+dataset_name] += [ibs_score_test]
                except: 
                    outer_scores['cindex_test_'+dataset_name] += [np.nan]
                    outer_scores['ibs_test_'+dataset_name] += [np.nan]
            
        df_best_params = pd.DataFrame(best_params)
        df_best_model = pd.DataFrame(best_model)
        df_outer_scores = pd.DataFrame(outer_scores)
        df_metrics = pd.concat([df_best_params,df_best_model,df_outer_scores], axis=1)
        df_metrics.to_csv('metrics/'+model+'metric_summary_'+'_'+filename, index=False)
        
        # cindex
        df_agg_metrics_cindex = pd.DataFrame({'dataset':[dataset_name],
                                              'cindex_train_mean':df_outer_scores['cindex_train_'+dataset_name].mean(),
                                              'cindex_train_std':df_outer_scores['cindex_train_'+dataset_name].std(),
                                              'cindex_test_mean':df_outer_scores['cindex_test_'+dataset_name].mean(),
                                              'cindex_test_std':df_outer_scores['cindex_test_'+dataset_name].std() })
        # IBS
        df_agg_metrics_ibs = pd.DataFrame({'dataset':[dataset_name],
                                              'ibs_train_mean':df_outer_scores['ibs_train_'+dataset_name].mean(),
                                              'ibs_train_std':df_outer_scores['ibs_train_'+dataset_name].std(),
                                              'ibs_test_mean':df_outer_scores['ibs_test_'+dataset_name].mean(),
                                              'ibs_test_std':df_outer_scores['ibs_test_'+dataset_name].std() })

        return df_agg_metrics_cindex, df_agg_metrics_ibs, best_model, best_params, outer_scores, best_preds_train, best_preds_test #, X_train, X_test, y_train, y_test

                

In [11]:
data_set_fns = [load_metabric,  load_flchain, load_rgbsg, load_support] #, load_flchain, load_rgbsg, load_support, load_tcga]
data_set_fns_str = ['load_metabric', 'load_flchain', 'load_rgbsg', 'load_support'] 
one_hot_dict = {'load_flchain': ['mgus'],  'load_rgbsg':['grade'], 'load_support':['cancer', 'race'],}
agg_metrics_cindex = []
agg_metrics_ibs = []

for idx, dataset in enumerate(data_set_fns):
    # get name of current dataset
    data = dataset(path="/Users/JUSC/Documents/xgbsurv/xgbsurv/datasets/data/", as_frame=True)
    X  = data.data #.astype(np.float32)
    y = data.target #.values #.to_numpy()

    #print(data_set_fns_str[idx])
    if data_set_fns_str[idx] in one_hot_dict.keys():
        X = pd.get_dummies(X, columns=one_hot_dict[data_set_fns_str[idx]])
    X, y = sort_X_y_pandas(X, y)
    
    net = NeuralNet(
        SurvivalModel, 
        #module__n_layers = 1,
        module__input_units = X.shape[1],
        #module__num_nodes = 32,
        #module__dropout = 0.1, # these could also be removed
        module__out_features = 1,
        # for split sizes when result size = 1
        iterator_train__drop_last=True,
        #iterator_valid__drop_last=True,
        criterion=EfronLoss,
        optimizer=torch.optim.AdamW,
        optimizer__weight_decay = 0.4,
        batch_size=32, # separate train and valid->iterator_train__batch_size=128 and iterator_valid__batch_size=128 ?
        callbacks=[
            (
                "sched",
                LRScheduler(
                    torch.optim.lr_scheduler.ReduceLROnPlateau,
                    monitor="valid_loss",
                    patience=5,
                ),
            ),
            (
                "es",
                EarlyStopping(
                    monitor="valid_loss",
                    patience=early_stopping_rounds,
                    load_best=True,
                ),
            ),
            ("seed", FixSeed(seed=42)),
            #("Input Shape Setter",InputShapeSetter())
        ],
        train_split = CustomValidSplit(0.2, stratified=True, random_state=rand_state), 
        verbose=0
    )
    df_agg_metrics_cindex, df_agg_metrics_ibs, best_model,params, outer_scores, best_preds_train, best_preds_test = train_eval(X, y, net, n_iter, data.filename)
    agg_metrics_cindex.append(df_agg_metrics_cindex)
    agg_metrics_ibs.append(df_agg_metrics_ibs)


split MKI67                float32
EGFR                 float32
PGR                  float32
ERBB2                float32
hormone_treatment      uint8
radiotherapy           uint8
chemotherapy           uint8
ER_positive            uint8
age                  float32
dtype: object
Concordance Index 0.6193755200043974
Integrated Brier Score: 0.1593675155626451
durations 0.76666665 337.03333
Concordance Index 0.5712868536962818
Integrated Brier Score: 0.1768424988998544
Concordance Index 0.6245436306039408
Integrated Brier Score: 0.16440115398089575
durations 0.1 330.36667
Concordance Index 0.596334244527916
Integrated Brier Score: 0.17352940341419393
Concordance Index 0.6401055230916116
Integrated Brier Score: 0.1595401554297718
durations 1.4333333 301.23334
Concordance Index 0.6260729932795432
Integrated Brier Score: 0.17511993067926607
Concordance Index 0.6193762249806972
Integrated Brier Score: 0.167672603341387
durations 1.2333333 355.2
Concordance Index 0.6162093066408263
Integrated

Traceback (most recent call last):
  File "/Users/JUSC/miniconda3/envs/xgbsurv/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 767, in _score
    scores = scorer(estimator, X_test, y_test)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/JUSC/miniconda3/envs/xgbsurv/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 234, in __call__
    return self._score(
           ^^^^^^^^^^^^
  File "/Users/JUSC/miniconda3/envs/xgbsurv/lib/python3.11/site-packages/sklearn/metrics/_scorer.py", line 282, in _score
    return self._sign * self._score_func(y_true, y_pred, **self._kwargs)
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/var/folders/jr/dh6mkdzs31lc5pkqymtdbh180000gp/T/ipykernel_14409/1822012356.py", line 14, in custom_scoring_function
AttributeError: 'int' object has no attribute 'numpy'

Traceback (most recent call last):
  File "/Users/JUSC/miniconda3/envs/xgbsurv/lib/python3.11/site-packages/sklearn/m

Concordance Index 0.6579201417350656
Integrated Brier Score: 0.18873146495631968
durations 1.87269 84.0
Concordance Index 0.6519774611949819
Integrated Brier Score: 0.19665510599105304
Concordance Index 0.639884390645848
Integrated Brier Score: 0.18892060451744142
durations 0.49281314 85.81519
Concordance Index 0.6630215635222645
Integrated Brier Score: 0.18840753005720642
Concordance Index 0.6780304073767064
Integrated Brier Score: 0.1805890024518303
durations 1.4784395 84.0
Concordance Index 0.6373253654203344
Integrated Brier Score: 0.18471555722633215
Concordance Index 0.6718815614904193
Integrated Brier Score: 0.1801101332478502
durations 0.26283368 84.20534
Concordance Index 0.6875050070766684
Integrated Brier Score: 0.1801718009767508
split age                 float32
sex                   uint8
n_comorbidities     float32
diabetes              uint8
dementia              uint8
blood_pressure      float32
heart_rate          float32
respiration_rate    float32
temperature       

In [12]:
df_final_efron_1_cindex = pd.concat([df for df in agg_metrics_cindex]).round(4)
df_final_efron_1_cindex.to_csv('metrics/final_deep_learning_efron_1_cindex.csv', index=False)
df_final_efron_1_cindex.to_csv('/Users/JUSC/Documents/644928e0fb7e147893e8ec15/05_thesis/tables/final_deep_learning_efron_1_cindex.csv', index=False)  #
df_final_efron_1_cindex

Unnamed: 0,dataset,cindex_train_mean,cindex_train_std,cindex_test_mean,cindex_test_std
0,METABRIC,0.6252,0.0086,0.6068,0.0231
0,FLCHAIN,0.7967,0.0031,0.7938,0.0125
0,RGBSG,0.6619,0.0147,0.6571,0.0194
0,SUPPORT,0.5975,0.0071,0.5869,0.0082


In [13]:
df_final_efron_1_ibs = pd.concat([df for df in agg_metrics_ibs]).round(4)
df_final_efron_1_ibs.to_csv('metrics/final_deep_learning_efron_1_ibs.csv', index=False)
df_final_efron_1_ibs.to_csv('/Users/JUSC/Documents/644928e0fb7e147893e8ec15/05_thesis/tables/final_deep_learning_efron_1_ibs.csv', index=False) 
df_final_efron_1_ibs

Unnamed: 0,dataset,ibs_train_mean,ibs_train_std,ibs_test_mean,ibs_test_std
0,METABRIC,0.1628,0.0035,0.1726,0.0064
0,FLCHAIN,0.1023,0.0014,0.1026,0.0029
0,RGBSG,0.1851,0.0044,0.1877,0.0061
0,SUPPORT,0.2015,0.0035,0.2036,0.0026


## TCGA

In [14]:
param_grid_tcga = {
    'estimator__module__n_layers': [1, 2, 4],
    'estimator__module__num_nodes': [64, 128, 256, 512],
    'estimator__module__dropout': scuniform(0.0,0.7),
    'estimator__optimizer__weight_decay': [0.4, 0.2, 0.1, 0.05, 0.02, 0.01, 0],
    'estimator__batch_size': [64, 128, 256, 512, 1024],
    #lr not in paper because of learning rate finder
    # note: setting learning rate higher would make exp(partial_hazard) explode
    #'estimator__lr': scloguniform(0.001,0.01), # scheduler unten einbauen
    # use callback instead
    'estimator__lr':[0.01],
    'estimator__max_epochs':  scrandint(150,250), # corresponds to num_rounds
    #'pca__n_components': [8, 16, 32, 64]
}

In [15]:

def train_eval(X, y, net, n_iter, filename):
        model = 'skorch_efron_tcga'
        dataset_name = filename.split('_')[0]

        # add IBS later
        outer_scores = {'cindex_train_'+dataset_name:[], 'cindex_test_'+dataset_name:[],
                        'ibs_train_'+dataset_name:[], 'ibs_test_'+dataset_name:[]}
        best_params = {'best_params_'+dataset_name:[]}
        best_model = {'best_model_'+dataset_name:[]}
        ct = make_column_transformer(
                (StandardScaler(), make_column_selector(dtype_include=['float32'])),
                #(OneHotEncoder(sparse_output=False, handle_unknown='infrequent_if_exist'), make_column_selector(dtype_include=['category', 'object'])),
                remainder='drop')
        pipe = Pipeline([('scaler',ct),
                         #('pca', PCA()),
                        ('estimator', net)])
        rs = RandomizedSearchCV(pipe, param_grid_tcga, scoring = scoring_function, n_jobs=-1, 
                                    n_iter=n_iter, refit=True, random_state=rand_state)
        for i, (train_index, test_index) in enumerate(outer_custom_cv.split(X, y)):
                # Split data into training and testing sets for outer fold
                X_train, X_test = X.iloc[train_index], X.iloc[test_index]
                y_train, y_test = y.iloc[train_index], y.iloc[test_index]
                X_train, y_train = sort_X_y_pandas(X_train, y_train)
                X_test, y_test = sort_X_y_pandas(X_test, y_test)

                # print(X_train.shape, type(X_train))
                # print(y_train.shape, type(y_train))
                # print(X_test.shape, type(X_test))
                # print(y_test.shape, type(y_test))
                # save splits and data
                savetxt('splits/train_index_'+str(i)+'_'+filename, train_index, delimiter=',')
                savetxt('splits/test_index_'+str(i)+'_'+filename, test_index, delimiter=',')
                
                #savetxt('splits/X_train_'+str(i)+'_'+filename, X_train, delimiter=',')
                #savetxt('splits/X_test_'+str(i)+'_'+filename, X_test, delimiter=',')

                #savetxt('splits/y_train_'+str(i)+'_'+filename, y_train, delimiter=',')
                #savetxt('splits/y_test_'+str(i)+'_'+filename, y_test, delimiter=',')




                rs.fit(X_train, y_train)
                best_preds_train = rs.best_estimator_.predict(X_train)
                best_preds_test = rs.best_estimator_.predict(X_test)
                savetxt('predictions/'+model+'best_preds_train_'+str(i)+'_'+filename, best_preds_train, delimiter=',')
                savetxt('predictions/'+model+'best_preds_test_'+str(i)+'_'+filename, best_preds_test, delimiter=',')


                # save hyperparameter settings
                params = rs.best_estimator_.get_params
                best_params['best_params_'+dataset_name] += [rs.best_params_]
                best_model['best_model_'+dataset_name] += [params]
                try:
                    cum_hazard_train = get_cumulative_hazard_function_efron(
                            X_train.values, X_train.values, y_train.values, y_train.values,
                            best_preds_train.reshape(-1), best_preds_train.reshape(-1)
                            )

                    df_survival_train = np.exp(-cum_hazard_train)
                    durations_train, events_train = transform_back(y_train.values)
                    time_grid_train = np.linspace(durations_train.min(), durations_train.max(), 100)
                    ev = EvalSurv(df_survival_train, durations_train, events_train, censor_surv='km')
                    print('Concordance Index',ev.concordance_td('antolini'))
                    print('Integrated Brier Score:',ev.integrated_brier_score(time_grid_train))
                    cindex_score_train = ev.concordance_td('antolini')
                    ibs_score_train = ev.integrated_brier_score(time_grid_train)

                    outer_scores['cindex_train_'+dataset_name] += [cindex_score_train]
                    outer_scores['ibs_train_'+dataset_name] += [ibs_score_train]

                except:
                    outer_scores['cindex_train_'+dataset_name] += [np.nan]
                    outer_scores['ibs_train_'+dataset_name] += [np.nan]
                    
                try:
                    cum_hazard_test = get_cumulative_hazard_function_efron(
                            X_train.values, X_test.values, y_train.values, y_test.values,
                            best_preds_train.reshape(-1), best_preds_test.reshape(-1)
                            )
                    df_survival_test = np.exp(-cum_hazard_test)
                    durations_test, events_test = transform_back(y_test.values)
                    print('durations',durations_test.min(), durations_test.max())
                    time_grid_test = np.linspace(durations_test.min(), durations_test.max(), 100)
                    ev = EvalSurv(df_survival_test, durations_test, events_test, censor_surv='km')
                    print('Concordance Index',ev.concordance_td('antolini'))
                    print('Integrated Brier Score:',ev.integrated_brier_score(time_grid_test))
                    cindex_score_test = ev.concordance_td('antolini')
                    ibs_score_test = ev.integrated_brier_score(time_grid_test)

                    outer_scores['cindex_test_'+dataset_name] += [cindex_score_test]
                    outer_scores['ibs_test_'+dataset_name] += [ibs_score_test]
                except: 
                    outer_scores['cindex_test_'+dataset_name] += [np.nan]
                    outer_scores['ibs_test_'+dataset_name] += [np.nan]
            
        df_best_params = pd.DataFrame(best_params)
        df_best_model = pd.DataFrame(best_model)
        df_outer_scores = pd.DataFrame(outer_scores)
        df_metrics = pd.concat([df_best_params,df_best_model,df_outer_scores], axis=1)
        df_metrics.to_csv('metrics/'+model+'metric_summary_'+'_'+filename, index=False)
        # cindex
        df_agg_metrics_cindex = pd.DataFrame({'dataset':[dataset_name],
                                              'cindex_train_mean':df_outer_scores['cindex_train_'+dataset_name].mean(),
                                              'cindex_train_std':df_outer_scores['cindex_train_'+dataset_name].std(),
                                              'cindex_test_mean':df_outer_scores['cindex_test_'+dataset_name].mean(),
                                              'cindex_test_std':df_outer_scores['cindex_test_'+dataset_name].std() })
        # IBS
        df_agg_metrics_ibs = pd.DataFrame({'dataset':[dataset_name],
                                              'ibs_train_mean':df_outer_scores['ibs_train_'+dataset_name].mean(),
                                              'ibs_train_std':df_outer_scores['ibs_train_'+dataset_name].std(),
                                              'ibs_test_mean':df_outer_scores['ibs_test_'+dataset_name].mean(),
                                              'ibs_test_std':df_outer_scores['ibs_test_'+dataset_name].std() })
        return df_agg_metrics_cindex, df_agg_metrics_ibs,best_model, best_params, outer_scores, best_preds_train, best_preds_test

In [17]:
cancer_types = ['BLCA',
    'BRCA',
    'HNSC',
    'KIRC',
    'LGG',
    'LIHC',
    'LUAD',
    'LUSC',
    'OV',
    'STAD']


class InputShapeSetter(skorch.callbacks.Callback):
    def on_train_begin(self, net, X, y):
        net.set_params(module__input_units=X.shape[-1])

agg_metrics_cindex = []
agg_metrics_ibs = []

for idx, cancer_type in enumerate(cancer_types):
    # get name of current dataset
    data = load_tcga(path="/Users/JUSC/Documents/xgbsurv/xgbsurv/datasets/data/", cancer_type=cancer_type, as_frame=True)
    X  = data.data #.astype(np.float32)
    y = data.target #.values #.to_numpy()

    X, y = sort_X_y_pandas(X, y)

    net = NeuralNet(
        SurvivalModel, 
        module__n_layers = 1,
        module__input_units = X.shape[1],
        #module__num_nodes = 32,
        #module__dropout = 0.1, # these could also be removed
        module__out_features = 1,
        # for split sizes when result size = 1
        iterator_train__drop_last=True,
        #iterator_valid__drop_last=True,
        criterion=EfronLoss,
        optimizer=torch.optim.AdamW,
        optimizer__weight_decay = 0.4,
        batch_size=32, # separate train and valid->iterator_train__batch_size=128 and iterator_valid__batch_size=128 ?
        callbacks=[
            (
                "sched",
                LRScheduler(
                    torch.optim.lr_scheduler.ReduceLROnPlateau,
                    monitor="valid_loss",
                    patience=5,
                ),
            ),
            (
                "es",
                EarlyStopping(
                    monitor="valid_loss",
                    patience=10,
                    load_best=True,
                ),
            ),
            ("seed", FixSeed(seed=42)),
            #("Input Shape Setter",InputShapeSetter())
        ],
        train_split = CustomValidSplit(0.2, stratified=True, random_state=rand_state),  
        verbose=0
    )
    df_agg_metrics_cindex, df_agg_metrics_ibs, best_model,params, outer_scores, best_preds_train, best_preds_test = train_eval(X, y, net, n_iter, data.filename)
    agg_metrics_cindex.append(df_agg_metrics_cindex)
    agg_metrics_ibs.append(df_agg_metrics_ibs)

split gex_?|100130426      float32
gex_?|100133144      float32
gex_?|100134869      float32
gex_?|10357          float32
gex_?|10431          float32
                      ...   
gex_ZYG11A|440590    float32
gex_ZYG11B|79699     float32
gex_ZYX|7791         float32
gex_ZZEF1|23140      float32
gex_ZZZ3|26009       float32
Length: 20531, dtype: object


  new_threshold = score - abs_threshold_change


Concordance Index 0.7158818540506656
Integrated Brier Score: 0.21412643325303699
durations 17.0 4343.0
Concordance Index 0.5955801104972376
Integrated Brier Score: 0.21242619438216373


  new_threshold = score - abs_threshold_change
  new_threshold = score - abs_threshold_change
  new_threshold = score - abs_threshold_change
  new_threshold = score - abs_threshold_change


Concordance Index 0.7130031534038211
Integrated Brier Score: 0.1814666836970672
durations 15.0 3817.0
Concordance Index 0.5505188421627526
Integrated Brier Score: 0.28284234806477276


  new_threshold = score - abs_threshold_change
  new_threshold = score - abs_threshold_change
  new_threshold = score - abs_threshold_change


Concordance Index 0.6811468288444831
Integrated Brier Score: 0.20489835102280696
durations 59.0 5050.0
Concordance Index 0.5830346475507766
Integrated Brier Score: 0.2547183863696064
Concordance Index 0.6862221571998098
Integrated Brier Score: 0.21317623852059647
durations 55.0 5041.0
Concordance Index 0.6065292096219931
Integrated Brier Score: 0.303931257244285
Concordance Index 0.7275065172972592
Integrated Brier Score: 0.18789337296082578
durations 13.0 3432.0
Concordance Index 0.5688429217840982
Integrated Brier Score: 0.2514053963317619
split gex_?|100130426      float32
gex_?|100133144      float32
gex_?|100134869      float32
gex_?|10357          float32
gex_?|10431          float32
                      ...   
gex_ZYG11A|440590    float32
gex_ZYG11B|79699     float32
gex_ZYX|7791         float32
gex_ZZEF1|23140      float32
gex_ZZZ3|26009       float32
Length: 20531, dtype: object


  new_threshold = score - abs_threshold_change


Concordance Index 0.6869636769719055
Integrated Brier Score: 0.19685714452751085
durations 5.0 8605.0
Concordance Index 0.4392361111111111
Integrated Brier Score: 0.21829540469309708
Concordance Index 0.5238727734161903
Integrated Brier Score: 0.18803525447204142
durations 5.0 8008.0
Concordance Index 0.5103954341622503
Integrated Brier Score: 0.1885917807669175


  new_threshold = score - abs_threshold_change


Concordance Index 0.5966329476263251
Integrated Brier Score: 0.1764159968289377
durations 5.0 8556.0
Concordance Index 0.44328358208955226
Integrated Brier Score: 0.19878453449437192


  new_threshold = score - abs_threshold_change
  new_threshold = score - abs_threshold_change
  new_threshold = score - abs_threshold_change
  new_threshold = score - abs_threshold_change


Concordance Index 0.4765514678558157
Integrated Brier Score: 0.19527090568941444
durations 1.0 7106.0
Concordance Index 0.47054722899965146
Integrated Brier Score: 0.17854680599986736
Concordance Index 0.5752099525952307
Integrated Brier Score: 0.1906263674921274
durations 1.0 8391.0
Concordance Index 0.5904650254005471
Integrated Brier Score: 0.1962586157171207
split gex_?|100130426      float32
gex_?|100133144      float32
gex_?|100134869      float32
gex_?|10357          float32
gex_?|10431          float32
                      ...   
gex_ZYG11A|440590    float32
gex_ZYG11B|79699     float32
gex_ZYX|7791         float32
gex_ZZEF1|23140      float32
gex_ZZZ3|26009       float32
Length: 20531, dtype: object
Concordance Index 0.5625891785382646
Integrated Brier Score: 0.18387111856060354
durations 23.0 4760.0
Concordance Index 0.6120599512025096
Integrated Brier Score: 0.1982352074773835


  new_threshold = score - abs_threshold_change


Concordance Index 0.539677217453505
Integrated Brier Score: 0.1841052260042525
durations 14.0 4282.0
Concordance Index 0.517433234421365
Integrated Brier Score: 0.1856272318830407


  new_threshold = score - abs_threshold_change


Concordance Index 0.5639498711225669
Integrated Brier Score: 0.18033342236845598
durations 11.0 6417.0
Concordance Index 0.5789473684210527
Integrated Brier Score: 0.20509491709935604


  new_threshold = score - abs_threshold_change


Concordance Index 0.5611388384754991
Integrated Brier Score: 0.16962219432778766
durations 2.0 5480.0
Concordance Index 0.5764541971438523
Integrated Brier Score: 0.2170799607428195


  new_threshold = score - abs_threshold_change
  new_threshold = score - abs_threshold_change


Concordance Index 0.5670484344532353
Integrated Brier Score: 0.1835234966635478
durations 14.0 5152.0
Concordance Index 0.5777051561365287
Integrated Brier Score: 0.18657265480340693
split gex_?|100130426      float32
gex_?|100133144      float32
gex_?|100134869      float32
gex_?|10357          float32
gex_?|10431          float32
                      ...   
gex_ZYG11A|440590    float32
gex_ZYG11B|79699     float32
gex_ZYX|7791         float32
gex_ZZEF1|23140      float32
gex_ZZZ3|26009       float32
Length: 20531, dtype: object
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0


  new_threshold = score - abs_threshold_change


0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0


  new_threshold = score - abs_threshold_change


0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0


  new_threshold = score - abs_threshold_change


0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
Concordance Index 0.7365467150874876
Integrated Brier Score: 0.1587273471171815
durations 3.0 3431.0
Concordance Index 0.6706638115631691
Integrated Brier Score: 0.17162059856997275
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
Concordance Index 0.747035788099641
Integrated Brier Score: 0.17121598193205342
durations 3.0 4537.0
Concordance Index 0.6956719817767654
Integrated Brier Score: 0.14162547191780075
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
Concordance Index 0.5894515223493846
Integrated Brier Score: 0.230004871766898
dur

  new_threshold = score - abs_threshold_change
  new_threshold = score - abs_threshold_change
  new_threshold = score - abs_threshold_change


0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0


  new_threshold = score - abs_threshold_change
  new_threshold = score - abs_threshold_change


0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0


  new_threshold = score - abs_threshold_change
  new_threshold = score - abs_threshold_change


0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0


  new_threshold = score - abs_threshold_change


0
0


  new_threshold = score - abs_threshold_change


0
0
0
0
0
0
0
0


  new_threshold = score - abs_threshold_change


0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
Concordance Index 0.7249054328161834
Integrated Brier Score: 0.15498855977531317
durations 13.0 4067.0
Concordance Index 0.7092355280600973
Integrated Brier Score: 0.18615803887597343
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
Concordance Index 0.7114195371743571
Integrated Brier Score: 0.16479572900835784
durations 2.0 3944.0
Concordance Index 0.6434641845406718
Integrated Brier Score: 0.18217058999662805
split gex_?|100130426      float32
gex_?|100133144      float32
gex_?|100134869      float32
gex_?|10357          float32
gex_?|10431          float32
                      ...   
gex_ZYG11A|440590    float32
gex_ZYG11B|79699     float32
gex_ZYX|7791         float32
gex_ZZEF1|23140     

  new_threshold = score - abs_threshold_change


Concordance Index 0.5079461428098444
Integrated Brier Score: 0.21213023005367412
durations 6.0 3308.0
Concordance Index 0.38929695697796435
Integrated Brier Score: 0.2197766197753838
Concordance Index 0.5463565022421525
Integrated Brier Score: 0.21330430661788433
durations 1.0 3437.0
Concordance Index 0.5077071290944123
Integrated Brier Score: 0.2033636835418834
Concordance Index 0.5217190120886113
Integrated Brier Score: 0.20799672366109287
durations 8.0 3675.0
Concordance Index 0.42163543441226575
Integrated Brier Score: 0.23377096957571752


  new_threshold = score - abs_threshold_change
  new_threshold = score - abs_threshold_change


Concordance Index 0.5531807754713814
Integrated Brier Score: 0.20620738284885726
durations 6.0 3478.0
Concordance Index 0.6574427480916031
Integrated Brier Score: 0.2083669547605282


  new_threshold = score - abs_threshold_change
  new_threshold = score - abs_threshold_change


Concordance Index 0.5593409826419535
Integrated Brier Score: 0.19954671502759233
durations 6.0 2728.0
Concordance Index 0.4513491414554375
Integrated Brier Score: 0.2307368684436665
split gex_?|100130426      float32
gex_?|100133144      float32
gex_?|100134869      float32
gex_?|10357          float32
gex_?|10431          float32
                      ...   
gex_ZYG11A|440590    float32
gex_ZYG11B|79699     float32
gex_ZYX|7791         float32
gex_ZZEF1|23140      float32
gex_ZZZ3|26009       float32
Length: 20531, dtype: object
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0


  new_threshold = score - abs_threshold_change
  new_threshold = score - abs_threshold_change


0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0


  new_threshold = score - abs_threshold_change


0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
Concordance Index 0.5226191616384127
Integrated Brier Score: 0.18450681268643837
durations 4.0 3635.0
Concordance Index 0.4332993890020367
Integrated Brier Score: 0.20523375350064293
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
00

0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0

  new_threshold = score - abs_threshold_change


Concordance Index 0.5500802975234553
Integrated Brier Score: 0.1891091357425002
durations 1.0 3838.0
Concordance Index 0.42496793501496366
Integrated Brier Score: 0.2107196876473967
Concordance Index 0.5259777528765476
Integrated Brier Score: 0.19278908883955925
durations 4.0 4026.0
Concordance Index 0.5361179361179361
Integrated Brier Score: 0.19071632831538604
Concordance Index 0.524290238660785
Integrated Brier Score: 0.19128282039015757
durations 12.0 5287.0
Concordance Index 0.5350236355822948
Integrated Brier Score: 0.2077758229522273
Concordance Index 0.5331928633975481
Integrated Brier Score: 0.18525794376951302
durations 3.0 4765.0
Concordance Index 0.5085241110569898
Integrated Brier Score: 0.18972056408218146


  new_threshold = score - abs_threshold_change


Concordance Index 0.5374706265394524
Integrated Brier Score: 0.1772565333489037
durations 2.0 4694.0
Concordance Index 0.5031874203144922
Integrated Brier Score: 0.23038539253775075
split gex_?|100133144      float32
gex_?|100134869      float32
gex_?|10357          float32
gex_?|10431          float32
gex_?|155060         float32
                      ...   
gex_ZYG11A|440590    float32
gex_ZYG11B|79699     float32
gex_ZYX|7791         float32
gex_ZZEF1|23140      float32
gex_ZZZ3|26009       float32
Length: 19076, dtype: object


  new_threshold = score - abs_threshold_change
  new_threshold = score - abs_threshold_change


Concordance Index 0.5568133406366016
Integrated Brier Score: 0.12547915607148066
durations 8.0 4424.0
Concordance Index 0.4793070259865255
Integrated Brier Score: 0.1368579458168809
Concordance Index 0.5110780226325193
Integrated Brier Score: 0.14115343810948275
durations 9.0 5481.0
Concordance Index 0.45454545454545453
Integrated Brier Score: 0.11970272089756841


  new_threshold = score - abs_threshold_change


Concordance Index 0.5568898592916061
Integrated Brier Score: 0.12560097536482542
durations 53.0 4624.0
Concordance Index 0.5131195335276968
Integrated Brier Score: 0.1487005891607636
Concordance Index 0.536114795610756
Integrated Brier Score: 0.12393575323756159
durations 24.0 3871.0
Concordance Index 0.5099403578528827
Integrated Brier Score: 0.14735930683242254


  new_threshold = score - abs_threshold_change


Concordance Index 0.5336819440172255
Integrated Brier Score: 0.1204424599275313
durations 11.0 3525.0
Concordance Index 0.50187265917603
Integrated Brier Score: 0.16695636649584908
split gex_?|100133144      float32
gex_?|100134869      float32
gex_?|10357          float32
gex_?|10431          float32
gex_?|155060         float32
                      ...   
gex_ZYG11A|440590    float32
gex_ZYG11B|79699     float32
gex_ZYX|7791         float32
gex_ZZEF1|23140      float32
gex_ZZZ3|26009       float32
Length: 19076, dtype: object
Concordance Index 0.592316173352557
Integrated Brier Score: 0.20382264544145196
durations 7.0 3540.0
Concordance Index 0.5735797399041752
Integrated Brier Score: 0.22359732928205342


  new_threshold = score - abs_threshold_change
  new_threshold = score - abs_threshold_change


Concordance Index 0.583249791144528
Integrated Brier Score: 0.2103019851969471
durations 14.0 2267.0
Concordance Index 0.6364306784660767
Integrated Brier Score: 0.18670280741761702


  new_threshold = score - abs_threshold_change


Concordance Index 0.5777292014915777
Integrated Brier Score: 0.21554080857270327
durations 8.0 3720.0
Concordance Index 0.6534653465346535
Integrated Brier Score: 0.2085018780982113
Concordance Index 0.6037399065023374
Integrated Brier Score: 0.2178630302636448
durations 3.0 2197.0
Concordance Index 0.4800275482093664
Integrated Brier Score: 0.19829305210923884
Concordance Index 0.5929328319945308
Integrated Brier Score: 0.20376827014243937
durations 8.0 3519.0
Concordance Index 0.5788079470198676
Integrated Brier Score: 0.23499438551694968


In [18]:
df_final_efron_1_cindex = pd.concat([df for df in agg_metrics_cindex]).round(4)
df_final_efron_1_cindex.to_csv('metrics/final_deep_learning_tcga_efron_1_cindex.csv', index=False)
df_final_efron_1_cindex.to_csv('/Users/JUSC/Documents/644928e0fb7e147893e8ec15/05_thesis/tables/final_deep_learning_tcga_efron_1_cindex.csv', index=False)  #
df_final_efron_1_cindex

Unnamed: 0,dataset,cindex_train_mean,cindex_train_std,cindex_test_mean,cindex_test_std
0,BLCA,0.7048,0.0201,0.5809,0.0221
0,BRCA,0.5718,0.0795,0.4908,0.0625
0,HNSC,0.5589,0.011,0.5725,0.0342
0,KIRC,0.7019,0.0642,0.6397,0.093
0,LGG,0.7955,0.0227,0.7587,0.0514
0,LIHC,0.5377,0.0219,0.4855,0.1055
0,LUAD,0.5143,0.0175,0.5098,0.0527
0,LUSC,0.5342,0.0104,0.5016,0.0454
0,OV,0.5389,0.0191,0.4918,0.0246
0,STAD,0.59,0.01,0.5845,0.068


In [19]:
df_final_efron_1_ibs = pd.concat([df for df in agg_metrics_ibs]).round(4)
df_final_efron_1_ibs.to_csv('metrics/final_deep_learning_tcga_efron_1_ibs.csv', index=False)
df_final_efron_1_ibs.to_csv('/Users/JUSC/Documents/644928e0fb7e147893e8ec15/05_thesis/tables/final_deep_learning_tcga_efron_1_ibs.csv', index=False) 
df_final_efron_1_ibs

Unnamed: 0,dataset,ibs_train_mean,ibs_train_std,ibs_test_mean,ibs_test_std
0,BLCA,0.2003,0.0149,0.2611,0.0347
0,BRCA,0.1894,0.0081,0.1961,0.0147
0,HNSC,0.1803,0.0062,0.1985,0.0132
0,KIRC,0.1759,0.0308,0.1812,0.0297
0,LGG,0.1613,0.0069,0.1884,0.0467
0,LIHC,0.2078,0.0055,0.2192,0.0134
0,LUAD,0.1918,0.012,0.199,0.0137
0,LUSC,0.1871,0.0062,0.2059,0.0167
0,OV,0.1273,0.008,0.1439,0.0173
0,STAD,0.2103,0.0065,0.2104,0.0193
