In [66]:
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import pairwise_kernels
from sksurv.svm import FastKernelSurvivalSVM
from sksurv.column import encode_categorical
from sksurv.datasets import load_veterans_lung_cancer
from sksurv.metrics import concordance_index_censored
import pickle

In [50]:
def gramMatrix(X1, X2, K_function):
    gram_matrix = np.zeros((X1.shape[0], X2.shape[0]))
    for i, x1 in enumerate(X1):
        for j, x2 in enumerate(X2):
            gram_matrix[i, j] = K_function(x1, x2)
    return gram_matrix

In [51]:
kernels = {
    'power': lambda x, beta=2: gramMatrix(x, x, lambda x1, x2: -np.linalg.norm(x1 - x2)**beta),
    'log': lambda x, beta=2:  gramMatrix(x, x, lambda x1, x2: -np.log( 1 + np.linalg.norm(x1 - x2, axis=0)**beta )),
    'mixture': lambda x, alpha = 0.05, gamma=0.5, degree=2: alpha * pairwise_kernels(x, metric='rbf', gamma=gamma) + (1-alpha)*pairwise_kernels(x, metric='poly', degree=degree),
}
random_state = 0

In [52]:
# get data
data_x, y = load_veterans_lung_cancer()
x = np.array(encode_categorical(data_x))
x

  columns_to_encode = {nam for nam, s in table.iteritems() if _is_categorical_or_object(s)}
  for name, series in table.iteritems():


array([[69.,  0.,  0., ...,  7.,  0.,  0.],
       [64.,  0.,  0., ...,  5.,  1.,  0.],
       [38.,  0.,  0., ...,  3.,  0.,  0.],
       ...,
       [67.,  1.,  0., ..., 18.,  1.,  1.],
       [65.,  1.,  0., ...,  4.,  0.,  1.],
       [37.,  1.,  0., ...,  3.,  0.,  1.]])

In [53]:
y 

array([( True,  72.), ( True, 411.), ( True, 228.), ( True, 126.),
       ( True, 118.), ( True,  10.), ( True,  82.), ( True, 110.),
       ( True, 314.), (False, 100.), ( True,  42.), ( True,   8.),
       ( True, 144.), (False,  25.), ( True,  11.), ( True,  30.),
       ( True, 384.), ( True,   4.), ( True,  54.), ( True,  13.),
       (False, 123.), (False,  97.), ( True, 153.), ( True,  59.),
       ( True, 117.), ( True,  16.), ( True, 151.), ( True,  22.),
       ( True,  56.), ( True,  21.), ( True,  18.), ( True, 139.),
       ( True,  20.), ( True,  31.), ( True,  52.), ( True, 287.),
       ( True,  18.), ( True,  51.), ( True, 122.), ( True,  27.),
       ( True,  54.), ( True,   7.), ( True,  63.), ( True, 392.),
       ( True,  10.), ( True,   8.), ( True,  92.), ( True,  35.),
       ( True, 117.), ( True, 132.), ( True,  12.), ( True, 162.),
       ( True,   3.), ( True,  95.), ( True, 177.), ( True, 162.),
       ( True, 216.), ( True, 553.), ( True, 278.), ( True,  1

In [54]:
# nx = np.array(x)
# gram_matrix = np.zeros((nx.shape[0], x.shape[0]))
# for i, x1 in enumerate(nx):
#     for j, x2 in enumerate(nx):
#         print(x1, x2)

[69.  0.  0.  1. 60.  7.  0.  0.] [69.  0.  0.  1. 60.  7.  0.  0.]
[69.  0.  0.  1. 60.  7.  0.  0.] [64.  0.  0.  1. 70.  5.  1.  0.]
[69.  0.  0.  1. 60.  7.  0.  0.] [38.  0.  0.  1. 60.  3.  0.  0.]
[69.  0.  0.  1. 60.  7.  0.  0.] [63.  0.  0.  1. 60.  9.  1.  0.]
[69.  0.  0.  1. 60.  7.  0.  0.] [65.  0.  0.  1. 70. 11.  1.  0.]
[69.  0.  0.  1. 60.  7.  0.  0.] [49.  0.  0.  1. 20.  5.  0.  0.]
[69.  0.  0.  1. 60.  7.  0.  0.] [69.  0.  0.  1. 40. 10.  1.  0.]
[69.  0.  0.  1. 60.  7.  0.  0.] [68.  0.  0.  1. 80. 29.  0.  0.]
[69.  0.  0.  1. 60.  7.  0.  0.] [43.  0.  0.  1. 50. 18.  0.  0.]
[69.  0.  0.  1. 60.  7.  0.  0.] [70.  0.  0.  1. 70.  6.  0.  0.]
[69.  0.  0.  1. 60.  7.  0.  0.] [81.  0.  0.  1. 60.  4.  0.  0.]
[69.  0.  0.  1. 60.  7.  0.  0.] [63.  0.  0.  1. 40. 58.  1.  0.]
[69.  0.  0.  1. 60.  7.  0.  0.] [63.  0.  0.  1. 30.  4.  0.  0.]
[69.  0.  0.  1. 60.  7.  0.  0.] [52.  0.  0.  1. 80.  9.  1.  0.]
[69.  0.  0.  1. 60.  7.  0.  0.] [48.  0.  0.  

In [60]:
#evaulate kernel matrix
choice = 'log'
kernel = kernels[choice]
kernel_matrix = kernel(x)
kernel_matrix

[69.  0.  0.  1. 60.  7.  0.  0.] [69.  0.  0.  1. 60.  7.  0.  0.]
[69.  0.  0.  1. 60.  7.  0.  0.] [64.  0.  0.  1. 70.  5.  1.  0.]
[69.  0.  0.  1. 60.  7.  0.  0.] [38.  0.  0.  1. 60.  3.  0.  0.]
[69.  0.  0.  1. 60.  7.  0.  0.] [63.  0.  0.  1. 60.  9.  1.  0.]
[69.  0.  0.  1. 60.  7.  0.  0.] [65.  0.  0.  1. 70. 11.  1.  0.]
[69.  0.  0.  1. 60.  7.  0.  0.] [49.  0.  0.  1. 20.  5.  0.  0.]
[69.  0.  0.  1. 60.  7.  0.  0.] [69.  0.  0.  1. 40. 10.  1.  0.]
[69.  0.  0.  1. 60.  7.  0.  0.] [68.  0.  0.  1. 80. 29.  0.  0.]
[69.  0.  0.  1. 60.  7.  0.  0.] [43.  0.  0.  1. 50. 18.  0.  0.]
[69.  0.  0.  1. 60.  7.  0.  0.] [70.  0.  0.  1. 70.  6.  0.  0.]
[69.  0.  0.  1. 60.  7.  0.  0.] [81.  0.  0.  1. 60.  4.  0.  0.]
[69.  0.  0.  1. 60.  7.  0.  0.] [63.  0.  0.  1. 40. 58.  1.  0.]
[69.  0.  0.  1. 60.  7.  0.  0.] [63.  0.  0.  1. 30.  4.  0.  0.]
[69.  0.  0.  1. 60.  7.  0.  0.] [52.  0.  0.  1. 80.  9.  1.  0.]
[69.  0.  0.  1. 60.  7.  0.  0.] [48.  0.  0.  

array([[-0.        , -4.87519732, -6.88550967, ..., -5.43807931,
        -6.06145692, -7.57250299],
       [-4.87519732, -0.        , -6.66185474, ..., -5.20400669,
        -4.67282883, -7.75705114],
       [-6.88550967, -6.66185474, -0.        , ..., -7.06561336,
        -7.03350648, -6.80793494],
       ...,
       [-5.43807931, -5.20400669, -7.06561336, ..., -0.        ,
        -5.71042702, -7.91095738],
       [-6.06145692, -4.67282883, -7.03350648, ..., -5.71042702,
        -0.        , -8.0974263 ],
       [-7.57250299, -7.75705114, -6.80793494, ..., -7.91095738,
        -8.0974263 , -0.        ]])

In [61]:
# run SVM on kernel matrix
model = FastKernelSurvivalSVM(kernel='precomputed', random_state=random_state)

In [62]:
model.fit(kernel_matrix, y)

  model.fit(kernel_matrix, y)


In [63]:
def score_survival_model(model, X, y):
    prediction = model.predict(X)
    result = concordance_index_censored(y['Status'], y['Survival_in_days'], prediction)
    return result[0]

In [64]:
score = score_survival_model(model, kernel_matrix, y)
score

0.5638346206269877

In [71]:
# save the model to disk
filename = os.path.join('models', 'finalized_model.sav')
pickle.dump(model, open(filename, 'wb'))
 
# some time later...
 
# load the model from disk
loaded_model = pickle.load(open(filename, 'rb'))


In [72]:
# score = score_survival_model(loaded_model, kernel_matrix, y)
# score