In [5]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sksurv.metrics import concordance_index_censored
from sksurv.svm import FastSurvivalSVM
import matplotlib.pyplot as plt

In [6]:
data = pd.read_csv(os.path.join('..', '..', 'data', 'tidy_Stroke_Vital_Sign.csv'))
data_x = data.drop(['UID', 'Hospital_ID', 'SurvivalWeeks', 'admission_date',
                    'discharge_date', 'death_date', 'Mortality', 'CVDeath'], axis=1)
categorical_ix = [0, 2, 3, 4, 5, 6, 7, 8, 15, 16, 17]
categorical_columns = data_x.columns[categorical_ix].values
data_x_one_hot = pd.get_dummies(data_x, columns=categorical_columns)

data_y = data[['Mortality', 'SurvivalWeeks']]
data_y['Mortality'] = data_y['Mortality'].astype(bool)
data_y = np.array(list(data_y.to_records(index=False)))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_y['Mortality'] = data_y['Mortality'].astype(bool)


In [7]:
X_train, X_test, y_train, y_test = train_test_split(
    data_x_one_hot, data_y, test_size=0.25, random_state=369)

# As a ranking problem

In [9]:
fsSVM = FastSurvivalSVM(max_iter=1000, tol=1e-5, random_state=0)

In [10]:
fsSVM.fit(X_train, y_train)

print(fsSVM.score(X_test, y_test))

0.8107292326449881


# As a regression problem

In [14]:
y_log_t = y_train.copy()
y_log_t["SurvivalWeeks"] = np.log1p(y_train["SurvivalWeeks"])
y_log_t

array([(False, 4.64026151), (False, 5.91079664), (False, 5.312502  ), ...,
       ( True, 1.09861229), (False, 5.03323513), (False, 5.65748919)],
      dtype=(numpy.record, [('Mortality', '?'), ('SurvivalWeeks', '<f8')]))

In [15]:
ref_estimator = FastSurvivalSVM(rank_ratio=0.0, max_iter=1000, tol=1e-5, random_state=0)
ref_estimator.fit(X_train, y_log_t)

cindex = concordance_index_censored(
    y_train['Mortality'],
    y_train['SurvivalWeeks'],
    -ref_estimator.predict(X_train),  # flip sign to obtain risk scores
)
print(cindex[0])

0.566484241714088
