# Survival Forest for LTRC data

In [2]:
import imageio
from IPython.display import display, Image

import sys
sys.path.append("../")
import matplotlib.pyplot as plot
import numpy as np
import pandas as pd
import seaborn as sns
from lifelines import datasets
from lifelines.fitters import coxph_fitter, log_logistic_aft_fitter
from lifelines.plotting import plot_lifetimes
from sklearn.model_selection import train_test_split

from benchmark import synthetic
from survival_trees import LTRCTrees, RandomForestLTRCFitter, RandomForestLTRC, LTRCTreesFitter
from survival_trees import plotting
from survival_trees.metric import concordance_index, time_dependent_roc

Proportion of censored event 0.475
Average duration 0.02670637525582452
Average truncated subjects 0.07425000000000004


In [None]:
ltrc_forest = RandomForestLTRCFitter(
            n_estimators=20,
            min_samples_leaf=3,
            max_samples=0.8)

In [None]:
data = pd.read_csv("./dataset.csv")

In [None]:
data["entry_date"] = 0
y = data[["entry_date", "time", "death"]]
X = data.drop(columns=y.columns.tolist())
x_train, x_test, y_train, y_test = train_test_split(
            X, y, train_size=0.6)
ltrc_forest.fit(
    pd.concat((x_train, y_train), axis=1).dropna(),
    entry_col=y_train.columns[0],
    duration_col=y_train.columns[1],
    event_col=y_train.columns[2]
                        )
test = 1 - ltrc_forest.predict_cumulative_hazard(
    x_test).astype(float).T
test = test.dropna()
c_index = concordance_index(
    test, death=y_test.loc[test.index].iloc[:, 2],
    censoring_time=y_test.loc[test.index].iloc[:, 1])
result = np.nanmean(c_index)

print(result)