In [1]:
import sys

# adapt paths
sys.path.append("/home/cctrotte/krauthammer/stcs_code/pysurvival_mine/")

In [2]:
#### 1 - Importing packages
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from pysurvival_mine.models.simulations import SimulationModel
from pysurvival_mine.models.parametric_mine import GompertzModelMine, LogLogisticModelMine, LogNormalModelMine
from pysurvival_mine.utils.metrics import concordance_index
from pysurvival_mine.utils.display import integrated_brier_score
%pylab inline

#### 2 - Generating the dataset from a Gompertz parametric model
# Initializing the simulation model
sim = SimulationModel( survival_distribution = 'Gompertz',
                       risk_type = 'linear',
                       censored_parameter = 10.0,
                       alpha = .01, beta = 3.0 )

# Generating N random samples 
N = 1000
dataset = sim.generate_data(num_samples = N, num_features = 3)

# Showing a few data-points 
time_column = 'time'
event_column = 'event'
dataset.head(2)

%pylab is deprecated, use %matplotlib inline and import the required libraries.
Populating the interactive namespace from numpy and matplotlib
Number of data-points: 1000 - Number of events: 958.0


Unnamed: 0,x_1,x_2,x_3,time,event
0,0.000576,8.180828,8.0,2.552202,1.0
1,0.000383,4.979415,10.0,2.026853,1.0


In [9]:
#### 3 - Creating the modeling dataset
# Defining the features
# Defining the features
features = sim.features

# Building training and testing sets #
index_train, index_test = train_test_split( range(N), test_size = 0.2)
data_train = dataset.loc[index_train].reset_index( drop = True )
data_test  = dataset.loc[index_test].reset_index( drop = True )

# Creating the X, T and E input
X_train, X_test = data_train[features], data_test[features]
T_train, T_test = data_train['time'].values, data_test['time'].values
E_train, E_test = data_train['event'].values, data_test['event'].values
X_valid, T_valid, E_valid = X_test, T_test, E_test

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_valid = scaler.transform(X_valid)
X_test = scaler.transform(X_test)
#### 4 - Creating an instance of the Gompertz model and fitting the data.
# Building the model
gomp_model = LogNormalModelMine(auto_scaler = False)
gomp_model.fit(X_train, T_train, E_train, X_valid, T_valid, E_valid, lr=1e-3, init_method='zeros',
    optimizer ='adam', l2_reg = 1e-3, num_epochs=2000)


#### 5 - Cross Validation / Model Performances
c_index = concordance_index(gomp_model, X_test, T_test, E_test) #0.77
print('C-index: {:.2f}'.format(c_index))

ibs = integrated_brier_score(gomp_model, X_test, T_test, E_test, t_max=30,
            figure_size=(20, 6.5) )
print('IBS: {:.2f}'.format(ibs))

% Completion:   0%|                                                           |

ValueError: The #1 argument contains null values

In [7]:
plt.figure()
plt.plot(gomp_model.metrics["c_index_valid"], label = "valid")
plt.plot(gomp_model.metrics["c_index_train"], label = "train")
plt.legend()

AttributeError: 'LogNormalModelMine' object has no attribute 'metrics'

<Figure size 640x480 with 0 Axes>