# Demo Notebook:
## Random Survival Forest

In [1]:
import os
from pathlib import Path
import sys
node_type = os.getenv('BB_CPU')
venv_dir = f'/rds/homes/g/gaddcz/Projects/CPRD/virtual-envTorch2.0-{node_type}'
venv_site_pkgs = Path(venv_dir) / 'lib' / f'python{sys.version_info.major}.{sys.version_info.minor}' / 'site-packages'
if venv_site_pkgs.exists():
    sys.path.insert(0, str(venv_site_pkgs))
    print(f"Added path '{venv_site_pkgs}' at start of search paths.")
else:
    print(f"Path '{venv_site_pkgs}' not found. Check that it exists and/or that it exists for node-type '{node_type}'.")

%load_ext autoreload
%autoreload 2

Added path '/rds/homes/g/gaddcz/Projects/CPRD/virtual-envTorch2.0-icelake/lib/python3.10/site-packages' at start of search paths.


In [2]:
import pytorch_lightning
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random
import sqlite3
from dataclasses import dataclass
import logging
from FastEHR.dataloader import FoundationalDataModule
import pickle
from tqdm import tqdm

from pycox.datasets import support
from pycox.evaluation import EvalSurv
from sklearn.preprocessing import StandardScaler
from sklearn_pandas import DataFrameMapper
from torch.utils.data import TensorDataset, DataLoader

from sklearn import set_config
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OrdinalEncoder
from sksurv.datasets import load_gbsg2
from sksurv.preprocessing import OneHotEncoder
from sksurv.ensemble import RandomSurvivalForest

torch.manual_seed(1337)
logging.basicConfig(level=logging.INFO)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
# device = "cpu"    # if more informative debugging statements are needed
print(f"Using device: {device}.")

Using device: cuda.


In [3]:
set_config(display="text")  # displays text representation of estimators

# Load data

In [4]:
def get_dataloaders(dataset, competing_risk, sample_size=None, seed=None):

    match dataset.lower():
        case "pycox":
            df_train = support.read_df()
            df_test = df_train.sample(frac=0.2)
            df_train = df_train.drop(df_test.index)
            df_val = df_train.sample(frac=0.2)
            df_train = df_train.drop(df_val.index)
            
            cols_standardize = ['x0', 'x7', 'x8', 'x9', 'x10', 'x11', 'x12', 'x13']
            cols_leave = ['x1', 'x2', 'x3', 'x4', 'x5', 'x6']
            
            standardize = [([col], StandardScaler()) for col in cols_standardize]
            leave = [(col, None) for col in cols_leave]
            
            x_mapper = DataFrameMapper(standardize + leave)
            
            x_train = x_mapper.fit_transform(df_train).astype('float32')
            x_val = x_mapper.transform(df_val).astype('float32')
            x_test = x_mapper.transform(df_test).astype('float32')
            
            get_target = lambda df: (df['duration'].values, df['event'].values)
            y_train = get_target(df_train)
            y_val = get_target(df_val)
            y_test = get_target(df_test)
            
            t_train, e_train = y_train
            t_val, e_val = y_val
            t_test, e_test = y_test
            
            t_train_max = np.amax(t_train)
            t_train = t_train / t_train_max
            t_val = t_val / t_train_max
            t_test = t_test / t_train_max
            
    
        case "hypertension" | "cvd":
    
            # Training samples
            if sample_size is not None:
                save_path =  f"/rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_{dataset}/" + f"benchmark_data/N={sample_size}_seed{seed}.pickle" 
            else:
                save_path = f"/rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_{dataset}/" + "benchmark_data/all.pickle"
                
            with open(save_path, "rb") as handle:
                print(f"Loading training dataset from {save_path}")
                data_train = pickle.load(handle)
            
            # display(data["X_train"].head())
            # display(data["y_train"])
            # print(data.keys())
            
            data = {}
            data["X_train"] = data_train["X_train"]
            data["y_train"] = data_train["y_train"]
    
            # Test and validation samples
    
            save_path = f"/rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_{dataset}/" + "benchmark_data/all.pickle"
            with open(save_path, "rb") as handle:
                print(f"Loading validation/test datasets from {save_path}")
                data_val_test = pickle.load(handle)
                
            data["X_val"] = data_val_test["X_val"]
            data["y_val"] = data_val_test["y_val"]
            data["X_test"] = data_val_test["X_test"]
            data["y_test"] = data_val_test["y_test"]
    
            # Convert to correct formats
            x_train = data["X_train"].to_numpy(dtype=np.float32)
            x_val = data["X_val"].to_numpy(dtype=np.float32)
            x_test = data["X_test"].to_numpy(dtype=np.float32)
            
            t_train = np.asarray([i[1] for i in data["y_train"]])
            t_val = np.asarray([i[1] for i in data["y_val"]])        
            t_test = np.asarray([i[1] for i in data["y_test"]])
    
            if competing_risk is False:
                e_train = np.asarray([0 if i[0] == 0 else 1 for i in data["y_train"]])
                e_val = np.asarray([0 if i[0] == 0 else 1 for i in data["y_val"]])
                e_test = np.asarray([0 if i[0] == 0 else 1 for i in data["y_test"]])
            else:
                e_train = np.asarray([i[0] for i in data["y_train"]])
                e_val = np.asarray([i[0] for i in data["y_val"]])
                e_test = np.asarray([i[0] for i in data["y_test"]])

    # display(x_train.shape)
    # display(type(x_train))
    # display(type(x_train[0,0]))
    # display(e_train.shape)
    # display(type(e_train))
    # display(type(e_train[0]))
    # display(t_train.shape)
    # display(type(t_train))
    # display(type(t_train[0]))
    # print(np.mean(e_train))
    # print(np.mean(t_train))
    # print(np.std(t_train))
    # print(np.mean(x_train))
    # print(t_train.min())
    # print(t_train.max())
    # print(np.unique(e_test, return_counts=True))

    # print(x_train.shape)
    # print(t_train.shape)
    # print(e_train.shape)

    
    Xtrain = pd.DataFrame(x_train, )    #  columns=list(dm.train_set.tokenizer._stoi.keys())[1:]
    Xval = pd.DataFrame(x_val, )    #  columns=list(dm.train_set.tokenizer._stoi.keys())[1:]
    Xtest = pd.DataFrame(x_test, )    #  columns=list(dm.train_set.tokenizer._stoi.keys())[1:]

    if competing_risk is False:
        ytrain = np.array([(_yk, _yt) for _yk, _yt in zip(e_train, t_train)], dtype=[('cens', 'bool'), ('time', '<f8')])
        yval = np.array([(_yk, _yt) for _yk, _yt in zip(e_val, t_val)], dtype=[('cens', 'bool'), ('time', '<f8')])
        ytest = np.array([(_yk, _yt) for _yk, _yt in zip(e_test, t_test)], dtype=[('cens', 'bool'), ('time', '<f8')])
    else:
        # Package does not support Competing Risks
        raise NotImplementedError
        
        # ytrain = np.array([(_yk, _yt) for _yk, _yt in zip(e_train, t_train)])# , dtype=[('cens', 'float'), ('time', '<f8')])
        # yval = np.array([(_yk, _yt) for _yk, _yt in zip(e_val, t_val)]) #, dtype=[('cens', 'float'), ('time', '<f8')])
        # ytest = np.array([(_yk, _yt) for _yk, _yt in zip(e_test, t_test)]) #, dtype=[('cens', 'float'), ('time', '<f8')])
    # print(Xtrain.head())
    # print(ytrain[:5])

    return (Xtrain, ytrain), (Xval, yval), (Xtest, ytest)




# Example dataloader function usage

In [5]:
dataset_train, dataset_val, dataset_test = get_dataloaders("CVD", False, sample_size=2999, seed=1)

Loading training dataset from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/N=2999_seed1.pickle
Loading validation/test datasets from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/all.pickle


In [6]:
print(dataset_test[0].head())
print(dataset_test[1][:3])

   0    1    2    3    4    5    6    7    8    9    ...  269  270  271  272  \
0  0.0  0.0  1.0  0.0  0.0  0.0  0.0  1.0  0.0  0.0  ...  0.0  0.0  0.0  0.0   
1  1.0  0.0  0.0  0.0  0.0  0.0  0.0  1.0  0.0  0.0  ...  1.0  0.0  1.0  1.0   
2  1.0  0.0  0.0  0.0  0.0  0.0  0.0  1.0  0.0  0.0  ...  0.0  1.0  1.0  1.0   
3  1.0  0.0  0.0  0.0  0.0  0.0  1.0  0.0  0.0  0.0  ...  1.0  0.0  1.0  1.0   
4  1.0  0.0  0.0  0.0  0.0  0.0  0.0  1.0  0.0  0.0  ...  1.0  0.0  0.0  0.0   

   273  274  275  276  277  278  
0  0.0  0.0  0.0  0.0  0.0  0.0  
1  1.0  1.0  1.0  1.0  1.0  1.0  
2  0.0  1.0  1.0  1.0  0.0  0.0  
3  0.0  1.0  1.0  1.0  0.0  0.0  
4  0.0  1.0  1.0  1.0  0.0  0.0  

[5 rows x 279 columns]
[(False, 2.43013716) (False, 0.34082222) (False, 2.12383461)]


# Train model

In [7]:
dataset = "CVD" # "Hypertension"
competing_risk = False
sample_sizes = [int(np.exp(_log_n)) for _log_n in np.linspace(np.log(3000), np.log(500000), 10)]      # [3000, 12500, 30000, 60000, 100000]: # 600, 1200, 
# sample_sizes = [None]

sample_sizes = [None]

# the time grid which we generate over
t_eval = np.linspace(0, 1, 1000) 
# the time grid which we calculate scores over
time_grid = np.linspace(start=0, stop=1 , num=300)


In [None]:
model_names, all_ctd, all_ibs, all_inbll = [], [], [], []

for sample_size in sample_sizes:

    seeds = [1,2,3,4,5]


    for seed in seeds:
        # Load dataset
        dataset_train, dataset_val, dataset_test = get_dataloaders(dataset, competing_risk, sample_size=sample_size, seed=seed)

        # Create RSF model with default bootstrap values due to memory constraints
        print(f"Fitting Random Survival Forest")
        model_name = f"RandomSurvivalForest-{'CR' if competing_risk else 'SR'}-{dataset}-Ns{sample_size}-seed{seed}"

        rsf = RandomSurvivalForest(
            bootstrap=True,
            max_samples=1000,    
            random_state=seed,
            low_memory=False
        )

        # Train model
        rsf.fit(dataset_train[0], dataset_train[1])
    
        # Test
        bsz = 512
        print(f"Evaluating performance by splitting {dataset_test[0].shape} test samples into batches of size {bsz}")
        
        ctd = []
        ibs = []
        inbll = []
        for batch_idx in range(0, dataset_test[0].shape[0], bsz):
        
            batch_dataset_test = (dataset_test[0][batch_idx:batch_idx + bsz], dataset_test[1][batch_idx:batch_idx + bsz])
            actual_bsz = batch_dataset_test[0].shape[0]
        
            # Predict survival functionfor batch
            surv = rsf.predict_survival_function(batch_dataset_test[0], return_array=True)
            
            # Find the indices in rsf.unique_times_ that are closest to values in t_eval, so we can evaluate the RSF if the same way as other benchmarks
            closest_indices = [np.abs(rsf.unique_times_ - v).argmin() for v in t_eval]
            surv_reduced = surv[:, closest_indices]
        
            # Format appropriately
            df_surv = pd.DataFrame(np.transpose(surv_reduced), index=t_eval)
            
            lbls_test = np.zeros((actual_bsz,))
            t_test = np.zeros((actual_bsz,))
            for sample_idx in range(actual_bsz):
                lbls_test[sample_idx] = 1 if batch_dataset_test[1][sample_idx][0] == True else 0
                t_test[sample_idx] = batch_dataset_test[1][sample_idx][1]
        
            # Same treatment as in SurvivEHR
            ev = EvalSurv(df_surv, t_test, lbls_test, censor_surv='km')
            ctd.append(ev.concordance_td())
            ibs.append(ev.integrated_brier_score(time_grid))
            inbll.append(ev.integrated_nbll(time_grid))
        
            # print(f"Scores up to sample {batch_idx+bsz}:".ljust(50) + f"Ctd: {np.mean(ctd):.3f}. IBS: {np.mean(ibs):.4f}. INBLL: {np.mean(inbll):.3f}")
        
        ctd = np.mean(ctd)
        ibs = np.mean(ibs)
        inbll = np.mean(inbll)
        
        print(f"{model_name}:".ljust(20) + f"N={sample_size}.".ljust(15) + f"Ctd: {ctd}. IBS: {ibs}. INBLL: {inbll}")
        model_names.append(model_name)
        all_ctd.append(ctd)
        all_ibs.append(ibs)
        all_inbll.append(inbll)

        # print(f"\tRandom Survival Forest ({'CR' if competing_risk else 'SR'}):".ljust(20) + f"N={sample_size}.".ljust(15) + f"Ctd: {ctd}. IBS: {ibs}. INBLL: {inbll}")





Loading training dataset from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/all.pickle
Loading validation/test datasets from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/all.pickle
Fitting Random Survival Forest


# Output across different setups

Cardiovascular disease Single Risk

Loading training dataset from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/N=2999_seed1.pickle
Loading validation/test datasets from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/all.pickle
Fitting Random Survival Forest
Evaluating performance by splitting (35758, 279) test samples into batches of size 512
RandomSurvivalForest-SR-CVD-Ns2999-seed1:N=2999.        Ctd: 0.6050549974162767. IBS: 0.03383077678732194. INBLL: 0.14606885241154202
Loading training dataset from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/N=2999_seed2.pickle
Loading validation/test datasets from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/all.pickle
Fitting Random Survival Forest
Evaluating performance by splitting (35758, 279) test samples into batches of size 512
RandomSurvivalForest-SR-CVD-Ns2999-seed2:N=2999.        Ctd: 0.5856461320569792. IBS: 0.03378237574761976. INBLL: 0.14689878007698268
Loading training dataset from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/N=2999_seed3.pickle
Loading validation/test datasets from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/all.pickle
Fitting Random Survival Forest
Evaluating performance by splitting (35758, 279) test samples into batches of size 512
RandomSurvivalForest-SR-CVD-Ns2999-seed3:N=2999.        Ctd: 0.5936900908610602. IBS: 0.03383942512083073. INBLL: 0.14740256058901058
Loading training dataset from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/N=2999_seed4.pickle
Loading validation/test datasets from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/all.pickle
Fitting Random Survival Forest
Evaluating performance by splitting (35758, 279) test samples into batches of size 512
RandomSurvivalForest-SR-CVD-Ns2999-seed4:N=2999.        Ctd: 0.5923465319233058. IBS: 0.033796528365858024. INBLL: 0.1469527995637962
Loading training dataset from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/N=2999_seed5.pickle
Loading validation/test datasets from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/all.pickle
Fitting Random Survival Forest
Evaluating performance by splitting (35758, 279) test samples into batches of size 512
RandomSurvivalForest-SR-CVD-Ns2999-seed5:N=2999.        Ctd: 0.5988433449825469. IBS: 0.03379244025458404. INBLL: 0.14664936458812675
Loading training dataset from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/N=5296_seed1.pickle
Loading validation/test datasets from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/all.pickle
Fitting Random Survival Forest
Evaluating performance by splitting (35758, 279) test samples into batches of size 512
RandomSurvivalForest-SR-CVD-Ns5296-seed1:N=5296.        Ctd: 0.5983413921798679. IBS: 0.033815929245841835. INBLL: 0.1470517860336029
Loading training dataset from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/N=5296_seed2.pickle
Loading validation/test datasets from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/all.pickle
Fitting Random Survival Forest
Evaluating performance by splitting (35758, 279) test samples into batches of size 512
RandomSurvivalForest-SR-CVD-Ns5296-seed2:N=5296.        Ctd: 0.5898195317696245. IBS: 0.03378236636861716. INBLL: 0.14611380520089556
Loading training dataset from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/N=5296_seed3.pickle
Loading validation/test datasets from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/all.pickle
Fitting Random Survival Forest
Evaluating performance by splitting (35758, 279) test samples into batches of size 512
RandomSurvivalForest-SR-CVD-Ns5296-seed3:N=5296.        Ctd: 0.6030335497205598. IBS: 0.03377549170594944. INBLL: 0.14621315482583425
Loading training dataset from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/N=5296_seed4.pickle
Loading validation/test datasets from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/all.pickle
Fitting Random Survival Forest
Evaluating performance by splitting (35758, 279) test samples into batches of size 512
RandomSurvivalForest-SR-CVD-Ns5296-seed4:N=5296.        Ctd: 0.5953006850223939. IBS: 0.03376983853660661. INBLL: 0.1461485310831724
Loading training dataset from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/N=5296_seed5.pickle
Loading validation/test datasets from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/all.pickle
Fitting Random Survival Forest
Evaluating performance by splitting (35758, 279) test samples into batches of size 512
RandomSurvivalForest-SR-CVD-Ns5296-seed5:N=5296.        Ctd: 0.5956759467174443. IBS: 0.033777151131126476. INBLL: 0.1464706651267304
Loading training dataset from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/N=9351_seed1.pickle
Loading validation/test datasets from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/all.pickle
Fitting Random Survival Forest
Evaluating performance by splitting (35758, 279) test samples into batches of size 512
RandomSurvivalForest-SR-CVD-Ns9351-seed1:N=9351.        Ctd: 0.6037574440899701. IBS: 0.03374016754812074. INBLL: 0.14632119526075635
Loading training dataset from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/N=9351_seed2.pickle
Loading validation/test datasets from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/all.pickle
Fitting Random Survival Forest
Evaluating performance by splitting (35758, 279) test samples into batches of size 512
RandomSurvivalForest-SR-CVD-Ns9351-seed2:N=9351.        Ctd: 0.6074400398218748. IBS: 0.03368897489612709. INBLL: 0.14512773930996004
Loading training dataset from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/N=9351_seed3.pickle
Loading validation/test datasets from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/all.pickle
Fitting Random Survival Forest
Evaluating performance by splitting (35758, 279) test samples into batches of size 512
RandomSurvivalForest-SR-CVD-Ns9351-seed3:N=9351.        Ctd: 0.6102088428271658. IBS: 0.03375174017089292. INBLL: 0.14574036016456002
Loading training dataset from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/N=9351_seed4.pickle
Loading validation/test datasets from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/all.pickle
Fitting Random Survival Forest
Evaluating performance by splitting (35758, 279) test samples into batches of size 512
RandomSurvivalForest-SR-CVD-Ns9351-seed4:N=9351.        Ctd: 0.5945422064366496. IBS: 0.03375690168237134. INBLL: 0.1460028175800757
Loading training dataset from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/N=9351_seed5.pickle
Loading validation/test datasets from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/all.pickle
Fitting Random Survival Forest
Evaluating performance by splitting (35758, 279) test samples into batches of size 512
RandomSurvivalForest-SR-CVD-Ns9351-seed5:N=9351.        Ctd: 0.6051031703799004. IBS: 0.033747079388685194. INBLL: 0.14616254240934823
Loading training dataset from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/N=16509_seed1.pickle
Loading validation/test datasets from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/all.pickle
Fitting Random Survival Forest
Evaluating performance by splitting (35758, 279) test samples into batches of size 512
RandomSurvivalForest-SR-CVD-Ns16509-seed1:N=16509.       Ctd: 0.6049865399441875. IBS: 0.033738979774062455. INBLL: 0.14590164777816397
Loading training dataset from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/N=16509_seed2.pickle
Loading validation/test datasets from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/all.pickle
Fitting Random Survival Forest
Evaluating performance by splitting (35758, 279) test samples into batches of size 512
RandomSurvivalForest-SR-CVD-Ns16509-seed2:N=16509.       Ctd: 0.6171773537198268. IBS: 0.03363106050931568. INBLL: 0.14447922939606384
Loading training dataset from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/N=16509_seed3.pickle
Loading validation/test datasets from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/all.pickle
Fitting Random Survival Forest
Evaluating performance by splitting (35758, 279) test samples into batches of size 512
RandomSurvivalForest-SR-CVD-Ns16509-seed3:N=16509.       Ctd: 0.612768685814654. IBS: 0.03373435975841272. INBLL: 0.14555285711135127
Loading training dataset from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/N=16509_seed4.pickle
Loading validation/test datasets from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/all.pickle
Fitting Random Survival Forest
Evaluating performance by splitting (35758, 279) test samples into batches of size 512
RandomSurvivalForest-SR-CVD-Ns16509-seed4:N=16509.       Ctd: 0.5981712817200965. IBS: 0.03373238118731551. INBLL: 0.14585963812661548
Loading training dataset from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/N=16509_seed5.pickle
Loading validation/test datasets from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/all.pickle
Fitting Random Survival Forest
Evaluating performance by splitting (35758, 279) test samples into batches of size 512
RandomSurvivalForest-SR-CVD-Ns16509-seed5:N=16509.       Ctd: 0.6068932939020619. IBS: 0.03373709095502161. INBLL: 0.145904539485273
Loading training dataset from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/N=29148_seed1.pickle
Loading validation/test datasets from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/all.pickle
Fitting Random Survival Forest
Evaluating performance by splitting (35758, 279) test samples into batches of size 512
RandomSurvivalForest-SR-CVD-Ns29148-seed1:N=29148.       Ctd: 0.6090793110104706. IBS: 0.033704308803194874. INBLL: 0.1454898441786305
Loading training dataset from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/N=29148_seed2.pickle
Loading validation/test datasets from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/all.pickle
Fitting Random Survival Forest
Evaluating performance by splitting (35758, 279) test samples into batches of size 512
RandomSurvivalForest-SR-CVD-Ns29148-seed2:N=29148.       Ctd: 0.6166020259238167. IBS: 0.03368006391189444. INBLL: 0.14495964943093298
Loading training dataset from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/N=29148_seed3.pickle
Loading validation/test datasets from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/all.pickle
Fitting Random Survival Forest
Evaluating performance by splitting (35758, 279) test samples into batches of size 512
RandomSurvivalForest-SR-CVD-Ns29148-seed3:N=29148.       Ctd: 0.6083219477923164. IBS: 0.03372055037145741. INBLL: 0.14570410758464833
Loading training dataset from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/N=29148_seed4.pickle
Loading validation/test datasets from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/all.pickle
Fitting Random Survival Forest
Evaluating performance by splitting (35758, 279) test samples into batches of size 512
RandomSurvivalForest-SR-CVD-Ns29148-seed4:N=29148.       Ctd: 0.6078736368358186. IBS: 0.033739914987627934. INBLL: 0.14543424048434073
Loading training dataset from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/N=29148_seed5.pickle
Loading validation/test datasets from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/all.pickle
Fitting Random Survival Forest
Evaluating performance by splitting (35758, 279) test samples into batches of size 512
RandomSurvivalForest-SR-CVD-Ns29148-seed5:N=29148.       Ctd: 0.612693945085753. IBS: 0.03370229188828538. INBLL: 0.1460348225096571
Loading training dataset from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/N=51461_seed1.pickle
Loading validation/test datasets from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/all.pickle
Fitting Random Survival Forest
Evaluating performance by splitting (35758, 279) test samples into batches of size 512
RandomSurvivalForest-SR-CVD-Ns51461-seed1:N=51461.       Ctd: 0.6150953521461171. IBS: 0.03370465481724242. INBLL: 0.14527436925940937
Loading training dataset from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/N=51461_seed2.pickle
Loading validation/test datasets from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/all.pickle
Fitting Random Survival Forest
Evaluating performance by splitting (35758, 279) test samples into batches of size 512
RandomSurvivalForest-SR-CVD-Ns51461-seed2:N=51461.       Ctd: 0.6189151200537787. IBS: 0.033677752779884496. INBLL: 0.14479172368969886
Loading training dataset from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/N=51461_seed3.pickle
Loading validation/test datasets from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/all.pickle
Fitting Random Survival Forest
Evaluating performance by splitting (35758, 279) test samples into batches of size 512
RandomSurvivalForest-SR-CVD-Ns51461-seed3:N=51461.       Ctd: 0.6120926578638579. IBS: 0.03371722467466767. INBLL: 0.14518973232949106
Loading training dataset from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/N=51461_seed4.pickle
Loading validation/test datasets from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/all.pickle
Fitting Random Survival Forest
Evaluating performance by splitting (35758, 279) test samples into batches of size 512
RandomSurvivalForest-SR-CVD-Ns51461-seed4:N=51461.       Ctd: 0.6137722256497855. IBS: 0.033732357764109086. INBLL: 0.14549238706285114
Loading training dataset from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/N=51461_seed5.pickle
Loading validation/test datasets from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/all.pickle
Fitting Random Survival Forest
Evaluating performance by splitting (35758, 279) test samples into batches of size 512
RandomSurvivalForest-SR-CVD-Ns51461-seed5:N=51461.       Ctd: 0.6152068444093235. IBS: 0.03371718323582292. INBLL: 0.14548398937787416
Loading training dataset from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/N=90856_seed1.pickle
Loading validation/test datasets from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/all.pickle
Fitting Random Survival Forest
Evaluating performance by splitting (35758, 279) test samples into batches of size 512
RandomSurvivalForest-SR-CVD-Ns90856-seed1:N=90856.       Ctd: 0.6115662133927512. IBS: 0.03376328110059976. INBLL: 0.1462502418661736
Loading training dataset from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/N=90856_seed2.pickle
Loading validation/test datasets from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/all.pickle
Fitting Random Survival Forest
Evaluating performance by splitting (35758, 279) test samples into batches of size 512
RandomSurvivalForest-SR-CVD-Ns90856-seed2:N=90856.       Ctd: 0.6134182054310305. IBS: 0.03370235133980715. INBLL: 0.14536891108941255
Loading training dataset from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/N=90856_seed3.pickle
Loading validation/test datasets from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/all.pickle
Fitting Random Survival Forest
Evaluating performance by splitting (35758, 279) test samples into batches of size 512
RandomSurvivalForest-SR-CVD-Ns90856-seed3:N=90856.       Ctd: 0.6066187339184608. IBS: 0.03375056834121298. INBLL: 0.14594548369635338
Loading training dataset from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/N=90856_seed4.pickle
Loading validation/test datasets from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/all.pickle
Fitting Random Survival Forest
Evaluating performance by splitting (35758, 279) test samples into batches of size 512
RandomSurvivalForest-SR-CVD-Ns90856-seed4:N=90856.       Ctd: 0.6137537489091667. IBS: 0.03369198195338203. INBLL: 0.1454274996417398
Loading training dataset from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/N=90856_seed5.pickle
Loading validation/test datasets from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/all.pickle
Fitting Random Survival Forest
Evaluating performance by splitting (35758, 279) test samples into batches of size 512
RandomSurvivalForest-SR-CVD-Ns90856-seed5:N=90856.       Ctd: 0.6125065969634841. IBS: 0.03369951228376922. INBLL: 0.14562201667364003
Loading training dataset from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/N=160407_seed1.pickle
Loading validation/test datasets from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/all.pickle
Fitting Random Survival Forest
Evaluating performance by splitting (35758, 279) test samples into batches of size 512
RandomSurvivalForest-SR-CVD-Ns160407-seed1:N=160407.      Ctd: 0.6074466129790403. IBS: 0.03370869523623503. INBLL: 0.145395121727693
Loading training dataset from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/N=160407_seed2.pickle
Loading validation/test datasets from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/all.pickle
Fitting Random Survival Forest
Evaluating performance by splitting (35758, 279) test samples into batches of size 512
RandomSurvivalForest-SR-CVD-Ns160407-seed2:N=160407.      Ctd: 0.6104350934665655. IBS: 0.033715398226301756. INBLL: 0.14532762602255492
Loading training dataset from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/N=160407_seed3.pickle
Loading validation/test datasets from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/all.pickle
Fitting Random Survival Forest
Evaluating performance by splitting (35758, 279) test samples into batches of size 512
RandomSurvivalForest-SR-CVD-Ns160407-seed3:N=160407.      Ctd: 0.6174864312707282. IBS: 0.03372897922605097. INBLL: 0.1451439386820801
Loading training dataset from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/N=160407_seed4.pickle
Loading validation/test datasets from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/all.pickle
Fitting Random Survival Forest
Evaluating performance by splitting (35758, 279) test samples into batches of size 512
RandomSurvivalForest-SR-CVD-Ns160407-seed4:N=160407.      Ctd: 0.6161027732336405. IBS: 0.03372194388654121. INBLL: 0.14553281585589306
Loading training dataset from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/N=160407_seed5.pickle
Loading validation/test datasets from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/all.pickle
Fitting Random Survival Forest
Evaluating performance by splitting (35758, 279) test samples into batches of size 512
RandomSurvivalForest-SR-CVD-Ns160407-seed5:N=160407.      Ctd: 0.6080864309634134. IBS: 0.03373469073224334. INBLL: 0.1457159713880119
Loading training dataset from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/N=283203_seed1.pickle
Loading validation/test datasets from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/all.pickle
Fitting Random Survival Forest
Evaluating performance by splitting (35758, 279) test samples into batches of size 512
RandomSurvivalForest-SR-CVD-Ns283203-seed1:N=283203.      Ctd: 0.6198004309013945. IBS: 0.03368757582304347. INBLL: 0.145226209669375
Loading training dataset from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/N=283203_seed2.pickle
Loading validation/test datasets from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/all.pickle
Fitting Random Survival Forest
Evaluating performance by splitting (35758, 279) test samples into batches of size 512
RandomSurvivalForest-SR-CVD-Ns283203-seed2:N=283203.      Ctd: 0.6134348891479078. IBS: 0.03371836186433899. INBLL: 0.14535939802093856
Loading training dataset from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/N=283203_seed3.pickle
Loading validation/test datasets from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/all.pickle
Fitting Random Survival Forest
Evaluating performance by splitting (35758, 279) test samples into batches of size 512
RandomSurvivalForest-SR-CVD-Ns283203-seed3:N=283203.      Ctd: 0.6092781628435148. IBS: 0.033730884032591335. INBLL: 0.14549239488786206
Loading training dataset from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/N=283203_seed4.pickle
Loading validation/test datasets from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/all.pickle
Fitting Random Survival Forest
Evaluating performance by splitting (35758, 279) test samples into batches of size 512
RandomSurvivalForest-SR-CVD-Ns283203-seed4:N=283203.      Ctd: 0.6177802733435455. IBS: 0.033710801488295875. INBLL: 0.1451376648384558
Loading training dataset from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/N=283203_seed5.pickle
Loading validation/test datasets from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/all.pickle
Fitting Random Survival Forest
Evaluating performance by splitting (35758, 279) test samples into batches of size 512
RandomSurvivalForest-SR-CVD-Ns283203-seed5:N=283203.      Ctd: 0.6098186179612868. IBS: 0.03372014485175013. INBLL: 0.1454747021258158
Loading training dataset from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/N=500000_seed1.pickle
Loading validation/test datasets from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/all.pickle
Fitting Random Survival Forest
Evaluating performance by splitting (35758, 279) test samples into batches of size 512
RandomSurvivalForest-SR-CVD-Ns500000-seed1:N=500000.      Ctd: 0.6181133559643474. IBS: 0.033678623933015926. INBLL: 0.14484824930176862
Loading training dataset from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/N=500000_seed2.pickle
Loading validation/test datasets from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/all.pickle
Fitting Random Survival Forest
Evaluating performance by splitting (35758, 279) test samples into batches of size 512
RandomSurvivalForest-SR-CVD-Ns500000-seed2:N=500000.      Ctd: 0.6120717907289123. IBS: 0.03368571523884484. INBLL: 0.14500663438800465
Loading training dataset from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/N=500000_seed3.pickle
Loading validation/test datasets from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/all.pickle
Fitting Random Survival Forest
Evaluating performance by splitting (35758, 279) test samples into batches of size 512
RandomSurvivalForest-SR-CVD-Ns500000-seed3:N=500000.      Ctd: 0.6083339750394585. IBS: 0.03370765207242725. INBLL: 0.14521212801497835
Loading training dataset from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/N=500000_seed4.pickle
Loading validation/test datasets from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/all.pickle
Fitting Random Survival Forest
Evaluating performance by splitting (35758, 279) test samples into batches of size 512
RandomSurvivalForest-SR-CVD-Ns500000-seed4:N=500000.      Ctd: 0.612006239300436. IBS: 0.0337064649130458. INBLL: 0.145366314266464
Loading training dataset from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/N=500000_seed5.pickle
Loading validation/test datasets from /rds/projects/g/gokhalkm-optimal/OPTIMAL_MASTER_DATASET/data/FoundationalModel/FineTune_CVD/benchmark_data/all.pickle
Fitting Random Survival Forest
Evaluating performance by splitting (35758, 279) test samples into batches of size 512
RandomSurvivalForest-SR-CVD-Ns500000-seed5:N=500000.      Ctd: 0.609981496408701. IBS: 0.0336829623504523. INBLL: 0.145314769647461


In [None]:
print(len(ctd))