In [53]:
import pandas as pd
import numpy as np
import time

from sksurv.datasets import load_veterans_lung_cancer
from sksurv.linear_model import CoxPHSurvivalAnalysis
from sklearn.model_selection import GridSearchCV
from sklearn.feature_selection import SelectKBest
from sklearn.pipeline import Pipeline
from statsmodels.stats.outliers_influence import variance_inflation_factor    


In [49]:
data = pd.read_pickle('ALLDATA.pkl')

In [50]:
data=data.loc[-data['Wait Time (s)'].isin(['Err1','Err2','Err3','Err4','Err5'])]
data=data.loc[data['Age_9-12'].isin([0,1])]
data_y = pd.DataFrame(index=range( data.shape[0]),columns=['Status','Survival'])
data_y['Survival']=data['Wait Time (s)'].values
data_y["Status"] = pd.DataFrame([True] * data.shape[0]).values   #all pedestrians cross, so no right censored data
data_x=data.loc[:,['Speed Limit', 'Lane Width', 'Minimum Gap', 'Mean Arrival Rate', 'AV', 
                   'Full Braking Before Impact_-1.0', 'Full Braking Before Impact_1', 
                   'Full Braking Before Impact_2', 'Full Braking Before Impact_3', 'Clear', 'Snowy',
                   'One way', 'two way', 'Two way with median', 'Day', 'Night', 'numcars', 
                    'Age_9-12', 'Age_15-18', 'Age_12-15', 'Age_18 - 24', 'Age_25 - 29', 'Age_30 - 39', 'Age_40 - 49', 'Age_50 - 59', 'Age_60+', 'Gender_Female', 'Occupation_Employed', 'Occupation_Student', 'Occupation_Unemployed', 
                   'Occupation_kid', 'Education_Bachelors degree', 'Education_College/University student', 
                   'Education_Doctorate degree', 'Education_High school diploma', 'Education_Masters degree', 
                   'Education_Professional degree', 'Education_kid', 'driving license_Yes', 'mode_Bike', 'mode_Car',
                   'mode_Public Transit', 'mode_Walking', 'workwalk_No', 'workwalk_Sometimes', 'workwalk_Yes', 
                   'shopwalk_No', 'shopwalk_Sometimes', 'shopwalk_Yes', 'shopwalk_kid', 'Vrexp_Yes', 'Heart_Currently',
                   'Heart_Over the years', 'vision_Currently', 'vision_Currently;Over the years', 
                   'vision_Over the years', 'anxiety_Currently', 'anxiety_Over the years', 'Headaches_Currently',
                   'Headaches_Over the years', 'dizziness_Over the years']]       #numwalk and VRexpnum removed because of some false inputs in the data should be fixed later

In [51]:
status = data_y["Status"]
survival = data_y['Survival']
data_y = np.zeros(data.shape[0], dtype={'names':('Status', 'Survival'),
                          'formats':('bool', 'f8')})

data_y['Status'] = status
data_y['Survival'] = survival

In [56]:
def calculate_vif_(X, thresh=5.0):
    variables = list(range(X.shape[1]))
    dropped = True
    while dropped:
        dropped = False
        vif = [variance_inflation_factor(X.iloc[:, variables].values, ix)
               for ix in range(X.iloc[:, variables].shape[1])]

        maxloc = vif.index(max(vif))
        if max(vif) > thresh:
            print('dropping \'' + X.iloc[:, variables].columns[maxloc] +
                  '\' at index: ' + str(maxloc))
            del variables[maxloc]
            dropped = True

    print('Remaining variables:')
    print(X.columns[variables])
    return X.iloc[:, variables]

In [58]:
data_x=data_x.apply(pd.to_numeric, errors='coerce')
data_x=calculate_vif_(data_x, thresh=5.0)

  vif = 1. / (1. - r_squared_i)


dropping 'Full Braking Before Impact_-1.0' at index: 5
dropping 'Clear' at index: 8
dropping 'One way' at index: 9
dropping 'Day' at index: 11
dropping 'Age_9-12' at index: 13
dropping 'Age_18 - 24' at index: 15
dropping 'Occupation_Employed' at index: 21
dropping 'Occupation_kid' at index: 23
dropping 'Education_Bachelors degree' at index: 23
dropping 'Education_kid' at index: 28
dropping 'mode_Bike' at index: 29
dropping 'workwalk_No' at index: 32
dropping 'shopwalk_Yes' at index: 36
dropping 'AV' at index: 4
dropping 'Lane Width' at index: 1
dropping 'Speed Limit' at index: 0
dropping 'driving license_Yes' at index: 25
dropping 'Minimum Gap' at index: 0
dropping 'Mean Arrival Rate' at index: 0
dropping 'mode_Car' at index: 23
dropping 'numcars' at index: 7
dropping 'Heart_Over the years' at index: 31
Remaining variables:
Index(['Full Braking Before Impact_1', 'Full Braking Before Impact_2',
       'Full Braking Before Impact_3', 'Snowy', 'two way',
       'Two way with median', 'Nig

In [59]:
estimator = CoxPHSurvivalAnalysis()
estimator.fit(data_x,data_y)

CoxPHSurvivalAnalysis(alpha=0, n_iter=100, tol=1e-09, verbose=0)

In [60]:
estimator.score(data_x, data_y)

0.5940847838760723

In [61]:
def fit_and_score_features(X, y):
    n_features = X.shape[1]
    scores = np.empty(n_features)
    m = CoxPHSurvivalAnalysis(alpha=0.000001,n_iter=100, tol=1e-09, verbose=0)
    for j in range(n_features):
        Xj = X[:, j:j+1]
        m.fit(Xj, y)
        scores[j] = m.score(Xj, y)
    return scores

scores = fit_and_score_features(data_x.values, data_y)
pd.Series(scores, index=data_x.columns).sort_values(ascending=False)

Age_30 - 39                             0.536548
Occupation_Student                      0.522318
Education_High school diploma           0.520144
Two way with median                     0.518122
shopwalk_No                             0.516514
workwalk_Yes                            0.514533
Age_50 - 59                             0.513162
mode_Public Transit                     0.512536
Education_Masters degree                0.511592
Gender_Female                           0.510568
Full Braking Before Impact_1            0.510389
Headaches_Over the years                0.507856
vision_Over the years                   0.507032
Age_40 - 49                             0.506626
Education_Doctorate degree              0.506580
Education_College/University student    0.506384
Vrexp_Yes                               0.505919
Snowy                                   0.505850
Full Braking Before Impact_2            0.505284
Headaches_Currently                     0.505247
anxiety_Currently   

In [22]:

#data_x=data_x.loc[:,pd.Series(scores, index=data_x.columns)[scores>=0.5].index]   #covariates that have no effect are removed



ValueError: Length of passed values is 61, index implies 53

In [62]:
pipe = Pipeline([('select', SelectKBest(fit_and_score_features, k=3)),
                 ('model', CoxPHSurvivalAnalysis(alpha=0.000001,n_iter=100, tol=1e-09, verbose=10))])

In [63]:
starttime=time.time()

param_grid = {'select__k': np.arange(1,data_x.shape[1]+1)}
gcv = GridSearchCV(pipe, param_grid, return_train_score=True)

gcv.fit(data_x, data_y)

endtime=time.time()


timeelapsed=endtime-starttime



iter      1: update = [0.65296434]
iter      1: loss = 6.3860825337
iter      2: update = [0.21807322]
iter      2: loss = 6.3845721139
iter      3: update = [0.0217759]
iter      3: loss = 6.3845601122
iter      4: update = [0.00019979]
iter      4: loss = 6.3845601112
iter      4: optimization converged
iter      1: update = [-0.32088462]
iter      1: loss = 6.4023695079
iter      2: update = [0.01304118]
iter      2: loss = 6.4023493211
iter      3: update = [1.46143536e-05]
iter      3: loss = 6.4023493211
iter      3: optimization converged
iter      1: update = [-0.39857686]
iter      1: loss = 6.3976940897
iter      2: update = [0.02781648]
iter      2: loss = 6.3976076007
iter      3: update = [0.0001075]
iter      3: loss = 6.3976075995
iter      3: optimization converged
iter      1: update = [ 0.62220575 -0.069986  ]
iter      1: loss = 6.3855819481
iter      2: update = [0.21983592 0.00494314]
iter      2: loss = 6.3840764192
iter      3: update = [ 2.16400435e-02 -6.418715

iter      1: update = [-0.21012904 -0.28342977 -0.10044462  0.5261845  -0.16268653 -0.05654894
  0.17442952]
iter      1: loss = 6.3778558470
iter      2: update = [0.00639962 0.01758489 0.00445487 0.17831296 0.01050296 0.00343006
 0.01645775]
iter      2: loss = 6.3768445703
iter      3: update = [-4.03335954e-05 -1.60930585e-04 -1.01983656e-04  1.42875982e-02
 -1.35681676e-04 -4.66000056e-05 -6.48337120e-05]
iter      3: loss = 6.3768396074
iter      4: update = [ 6.35865285e-08  1.52706427e-07 -6.82757174e-09  8.41478400e-05
 -4.70198627e-07  2.13762107e-07 -9.65889959e-07]
iter      4: loss = 6.3768396073
iter      4: optimization converged
iter      1: update = [-0.48633995  0.28105732  0.17808788 -0.2257041   0.27302858  0.75303937
 -0.37620369]
iter      1: loss = 6.3722481001
iter      2: update = [ 0.03409685  0.10345175 -0.00379671  0.01504192 -0.00807923  0.00867858
  0.02539601]
iter      2: loss = 6.3719214557
iter      3: update = [-2.33567723e-04  3.14319940e-03  1.32140

iter      1: update = [-0.23863867 -0.2698439  -0.46113681  0.24536945 -0.25747685  0.77888184
 -0.13534924  0.04734721 -0.11904467 -0.19882451 -0.42145195]
iter      1: loss = 6.3580610258
iter      2: update = [ 0.00972991  0.00897263  0.06177772  0.03938056  0.00520168  0.2189634
  0.01028499 -0.00550297  0.00406086  0.07523007  0.01909453]
iter      2: loss = 6.3561970463
iter      3: update = [ 3.68520135e-05 -1.05517248e-04  7.90306808e-04 -2.34509380e-04
 -1.79177755e-04  1.83908909e-02  2.38607934e-04 -1.57502415e-04
  9.55459816e-05  2.48818427e-03 -6.77243096e-04]
iter      3: loss = 6.3561884527
iter      4: update = [-4.10797122e-08  2.78078635e-07  4.63102678e-07  2.00489017e-06
 -6.86363354e-09  1.38234778e-04 -7.64210023e-07 -6.37543722e-07
 -2.00817594e-07  6.05517060e-07 -4.38625276e-07]
iter      4: loss = 6.3561884522
iter      4: optimization converged
iter      1: update = [-0.03452508 -0.21088972 -0.00305844 -0.31423919 -0.05916029  0.43428842
 -0.22458671 -0.2322

iter      1: update = [-0.23149615 -0.29353974 -0.46867686  0.2292815   0.00892253 -0.26544392
  0.94907129  0.23184157 -0.01076436 -0.23973186  0.05753068 -0.10375915
 -0.22360552 -0.36610098]
iter      1: loss = 6.3530239362
iter      2: update = [ 0.008738   -0.0052758   0.03374512  0.02198659  0.01547657 -0.01553034
  0.18823272  0.00291869  0.00959013  0.02615358  0.01084121  0.0004906
  0.0571324   0.01212513]
iter      2: loss = 6.3513261988
iter      3: update = [ 4.43161847e-05 -8.55774714e-05  4.99329561e-04 -1.08586729e-04
  5.17183306e-05  6.48946088e-05  1.74092272e-02 -1.70935020e-05
 -5.40985114e-04  2.45092619e-05 -2.23285860e-04  2.42325102e-04
  2.09929522e-03 -4.23259636e-04]
iter      3: loss = 6.3513185508
iter      4: update = [-1.42957416e-09  2.71150199e-07  3.33305535e-07  1.54255514e-06
  7.31959565e-08 -1.39851171e-07  1.24703222e-04  5.83997051e-08
  1.43746193e-07 -7.11912415e-07 -6.83426443e-07 -2.98200476e-07
 -4.23741859e-08 -4.72559101e-07]
iter      4:

iter      1: update = [-0.06993477  0.06542977 -0.1522639  -0.0225398  -0.49597217  0.23610462
  0.22280945 -0.17242466  0.05946301  0.14081883  0.27969987  0.33222173
 -0.15762743  0.4392085  -0.29146456  0.1156212 ]
iter      1: loss = 6.3625866859
iter      2: update = [-0.00027252  0.00751068  0.00039822  0.02653859  0.02155526  0.09786293
  0.02861476  0.01377466  0.00135756  0.03155004  0.12474678  0.01250407
 -0.01452856 -0.03838871 -0.00126169  0.01525985]
iter      2: loss = 6.3619994753
iter      3: update = [-2.48247632e-05  2.33496575e-05  3.56456618e-05 -3.74293900e-04
 -4.30535913e-04  3.09016107e-03  1.45863052e-04 -8.43828569e-05
 -3.49139665e-05  3.62396417e-04  4.00100863e-03  1.87363073e-04
  2.10241411e-05 -1.55755219e-04  1.31915827e-04  8.81838865e-05]
iter      3: loss = 6.3619989282
iter      4: update = [ 4.07293719e-09  3.46363263e-09  7.46226231e-09  7.53799564e-08
  1.28479580e-07  4.70822241e-06  7.04051601e-08 -1.44142623e-08
 -1.73122482e-08 -1.19458139e-

iter      1: update = [-0.06052562  0.03193975  0.0706097  -0.15578113 -0.10030125 -0.58101429
 -0.38232246  0.13498273  0.19491342 -0.24710771  0.05110367  0.13652202
  0.26387011  0.35968681 -0.13651673  0.41435061 -0.22862323  0.07060626]
iter      1: loss = 6.3605548306
iter      2: update = [ 4.19293992e-03  9.16792529e-03  6.76603051e-03  1.08917545e-04
  3.12299769e-02  2.62468342e-02  3.08294863e-02  9.37530585e-02
  3.46254987e-02  1.53048713e-02 -5.92975681e-04  2.71201356e-02
  1.24190428e-01  1.07365711e-02 -1.74703150e-02 -4.83940182e-02
  5.49962600e-03  1.81446454e-02]
iter      2: loss = 6.3599689555
iter      3: update = [-2.08052802e-06  4.47719661e-05  3.45903380e-05  3.88341759e-05
 -3.57918122e-04 -4.46433842e-04 -3.67388708e-04  2.75712907e-03
  1.71337807e-04  7.18391339e-07 -6.00400348e-06  3.84012017e-04
  3.80457126e-03  1.99067506e-04  2.68036341e-05 -1.03149962e-04
  1.98952027e-05  1.54906990e-04]
iter      3: loss = 6.3599684782
iter      4: update = [ 7.8

iter      5: update = [-7.88819553e-09 -5.86861816e-08  7.29848037e-08  7.03433013e-08
 -1.00831576e-08  1.03488013e-08  1.44263692e-08  3.77471202e-04
  2.01139814e-08  2.48423771e-09  1.44091181e-08  1.16666943e-08
 -3.98571321e-09 -1.75056363e-08  9.74657022e-09  1.32249998e-08
  8.09058165e-09  1.42014589e-08 -2.61093890e-08 -1.91567297e-08]
iter      5: loss = 6.3544442814
iter      5: optimization converged
iter      1: update = [-0.05890432  0.03632555  0.06924245 -0.15492127 -0.0911682  -0.57904915
 -0.34021087  0.1520393   0.20861257 -0.23177222  0.04901946  0.13650481
  0.29667765  0.36495185 -0.13774336  0.39792459 -0.2359972  -0.00481138
 -0.08100935  0.08329878]
iter      1: loss = 6.3602611808
iter      2: update = [ 0.00449414  0.00903088  0.00507906  0.00117478  0.03453092  0.02702431
  0.04343002  0.1033503   0.03146947  0.01514753 -0.00018184  0.0238564
  0.14656662  0.01451751 -0.01729953 -0.05572272  0.00712481 -0.02508242
 -0.01298479  0.01632423]
iter      2: loss

iter      1: update = [-0.15032115 -0.11594022 -0.16935209 -0.19918724 -0.00419015 -0.36139615
  0.28900819 -0.03311718 -2.25781753 -0.23098762  0.09463237  0.72228111
 -0.40097785 -0.20249196 -0.29272068  0.05653725 -0.21046328  0.3918203
 -0.1924494  -0.14871352 -0.22043557  0.39333225]
iter      1: loss = 6.3584485385
iter      2: update = [-0.00454579 -0.00217493  0.0013846   0.0007211  -0.00285475  0.00468016
 -0.01074053  0.00745666  0.57041699  0.01892678 -0.00525816  0.2039177
  0.00210409 -0.0104378   0.04774355  0.03864044 -0.01615177 -0.01532656
 -0.02281738 -0.23657596  0.00504178  0.005272  ]
iter      2: loss = 6.3520210242
iter      3: update = [ 2.25945803e-04 -1.61062770e-05  4.46250667e-05 -1.92878928e-05
 -5.29895124e-05 -2.38775025e-04 -3.10227993e-04 -2.51551473e-04
  2.24474353e-01 -4.04308499e-04 -8.45148133e-06  3.33846652e-02
 -1.53279325e-04 -8.38735213e-06 -4.85700162e-04 -3.57051727e-05
  5.11087630e-05  8.62996794e-04  1.15747643e-03 -3.38859878e-02
 -2.123

iter      5: update = [ 1.69357426e-07  6.35941007e-07 -1.53002972e-07  4.30631311e-07
  6.87942868e-08  1.09617302e-07  1.93488854e-07  6.30313797e-08
  7.81601691e-04 -9.33773176e-09  4.51353310e-08 -2.88422314e-08
 -1.87279500e-08  9.00727661e-08 -6.47404105e-08  7.81601933e-04
 -1.09763920e-08  3.39665081e-08 -6.17087914e-08 -3.68773729e-08
 -1.06536483e-07  1.18558287e-08  1.29873585e-07]
iter      5: loss = 6.3537637377
iter      6: update = [ 1.17088970e-10  4.32446467e-10 -1.02371353e-10  2.91119003e-10
  4.87483111e-11  7.78359623e-11  1.38281370e-10  4.45651344e-11
  5.79805694e-07 -5.59021656e-12  3.24738452e-11 -2.10176034e-11
 -1.19471490e-11  6.23434364e-11 -4.53940468e-11  5.79806523e-07
 -9.62798944e-12  2.14517200e-11 -4.29674801e-11 -2.74718038e-11
 -7.44950074e-11  7.62781372e-12  9.28497699e-11]
iter      6: loss = 6.3537637377
iter      6: optimization converged
iter      1: update = [-0.09687608 -0.0526224  -0.2353955  -0.61016093 -0.57058161  0.03438054
 -0.04034

iter      1: update = [-1.48176995e-01 -1.19197030e-01 -1.81311527e-01  5.71582870e-02
 -1.96876311e-01  3.67850035e-04 -3.23653312e-01  3.51951178e-01
 -6.02791095e-02 -2.20772116e+00 -2.46877163e-01  1.52943094e-01
  1.54768676e-01 -3.71303499e-01  6.91494796e-01 -4.63022776e-01
 -2.51309223e-01 -3.05663681e-01  4.07130556e-03 -2.67360460e-01
  3.94977673e-01 -2.37924161e-01 -1.13118259e-01 -2.03979701e-01
  3.99669665e-01]
iter      1: loss = 6.3564332099
iter      2: update = [-1.74179376e-03 -3.76675414e-04  1.47836782e-03  2.22674960e-03
  1.38800143e-03 -5.03091939e-03  1.38027751e-03  1.94252468e-04
  6.97234907e-03  5.71354002e-01  1.71224625e-02  1.15095069e-04
  2.47960817e-03 -9.21763586e-03  2.03930352e-01 -1.67327368e-03
 -1.80048296e-02  3.96710140e-02  3.66806737e-02 -2.11732244e-02
 -1.80127101e-02 -3.07090461e-02 -2.25152088e-01  6.05058097e-03
  1.35457411e-02]
iter      2: loss = 6.3499813686
iter      3: update = [ 2.47931535e-04 -4.13710577e-05  2.60825799e-05 -1.

iter      5: update = [-1.03829441e-08 -5.85893413e-08  7.05704373e-08 -2.17441854e-08
  6.89548117e-08 -1.23486543e-08  3.90890750e-09 -2.82451966e-08
  8.50852770e-09  3.64633758e-04  2.13865116e-08 -1.06790660e-08
 -4.13151349e-08  3.29027645e-08  6.32863653e-09  2.05278782e-08
  1.35954326e-09 -4.06128606e-09  1.83708633e-08  1.86554913e-08
 -3.34822140e-09  8.54799134e-09  2.59488424e-08 -3.18504063e-08
  1.13098586e-08 -3.01936924e-08]
iter      5: loss = 6.3493851953
iter      5: optimization converged
iter      1: update = [-0.08313142  0.03269119  0.07355934 -0.15792917 -0.10167786 -0.65219223
 -0.56202413 -0.07130162 -1.2599038   0.14958199 -0.355002    0.00761717
  0.07773163  0.22957428  0.36171137 -1.2599038  -0.08525622  0.39936092
 -0.14617588  0.10970737 -0.06868044  0.03145464 -0.31029282  0.24990763
  0.24990763 -0.86809204]
iter      1: loss = 6.3585446606
iter      2: update = [ 4.92681658e-03  7.97855012e-03  6.49967605e-03  2.34510872e-04
  3.80425587e-02  2.62792

iter      5: update = [ 1.65507754e-07  6.15189082e-07 -1.50387865e-07  4.19753001e-07
 -2.65816493e-08  6.09199500e-08  1.02765711e-07  1.94046303e-07
  1.18526683e-07  7.54188674e-04 -4.92640797e-09  4.83023285e-08
 -2.09876699e-08 -1.00271798e-08  7.66028576e-08 -7.09707694e-08
  7.54188427e-04 -1.37224457e-08  4.70988266e-08 -7.98426366e-08
 -4.23563440e-08 -7.94561164e-08  6.59329897e-09  1.48494939e-07
 -1.49570106e-07 -1.49570367e-07  1.29628535e-07]
iter      5: loss = 6.3483014662
iter      6: update = [ 1.10645097e-10  4.04023613e-10 -9.71949411e-11  2.74073257e-10
 -1.37514160e-11  4.19825392e-11  7.12021349e-11  1.34239355e-10
  8.17920841e-11  5.39958694e-07 -2.25756668e-12  3.34973447e-11
 -1.49076069e-11 -6.53409144e-12  5.23951007e-11 -4.76515878e-11
  5.39961322e-07 -1.17344857e-11  2.82770224e-11 -5.25510534e-11
 -3.14247736e-11 -5.49506725e-11  3.18841460e-12  1.01862900e-10
 -9.10031521e-11 -9.10354526e-11  8.37841260e-11]
iter      6: loss = 6.3483014662
iter      

iter      1: update = [-0.1184813  -0.06498423  0.07968773 -0.03065653 -0.23453513 -0.03132605
 -0.50763771 -0.73272258  0.23867485 -0.00442357 -0.24720466  0.10494846
 -0.06288124  0.9725519   0.29966997  0.00170186 -0.21915768 -0.29875439
  0.19372774  0.34066822  0.2469944  -0.28610349  1.2338506  -0.13889875
 -0.27666617  0.01888247 -0.33106128 -0.72643921  0.93283478]
iter      1: loss = 6.3290223218
iter      2: update = [ 8.39144091e-04 -1.07655970e-04  6.07918255e-03 -5.45125389e-03
  6.10470010e-03 -1.22087711e-02  3.05573024e-02  1.31087981e-01
  6.43499031e-02  4.81054946e-02  1.02356018e-02  3.68170744e-03
  7.63519988e-03  1.87833782e-01 -1.30386059e-02 -3.46783492e-02
 -1.38366851e-02  2.75627489e-02 -2.49992727e-03 -2.98495865e-02
 -4.96480941e-03 -9.12270154e-03 -5.53616362e-03  5.82105833e-02
  7.43473895e-02 -5.88761757e-02 -7.96485308e-02  5.21464719e-02
 -1.08678846e-01]
iter      2: loss = 6.3256011623
iter      3: update = [-2.31526921e-04 -2.29161289e-04 -1.32391

iter      1: update = [-1.45269952e-01 -1.14443463e-01 -1.74815381e-01  5.72227866e-02
 -2.02978363e-01  1.87182177e-04 -2.44726830e-01  3.60824070e-01
  1.08980749e-01 -2.13440695e+00 -2.02957399e-01  1.71034693e-01
  1.98586056e-01  1.05629342e-01 -2.77135105e-01  6.99446126e-01
 -3.85765646e-01 -2.15590739e-01  2.24294119e-02 -2.53398879e-01
  1.01004600e-01  2.43613647e-01 -1.97045029e-01  1.90083382e-01
  3.92676925e-01 -4.88886202e-02 -2.70557782e-01 -3.04276437e-02
 -2.11702044e-01  4.81875624e-01]
iter      1: loss = 6.3548052198
iter      2: update = [-0.00291526 -0.00313029  0.00079731  0.00168     0.00098403 -0.00379151
 -0.00193785  0.0092432   0.0087536   0.57918449  0.01788693  0.00571978
  0.00191676  0.01159883 -0.00462134  0.20731269  0.00847336 -0.02362733
  0.00346377  0.03452928  0.03942563  0.03030495 -0.01828648 -0.02460837
 -0.01411299 -0.03115547 -0.04734153 -0.20392398  0.0102302   0.0322399 ]
iter      2: loss = 6.3482881945
iter      3: update = [ 3.05383227e

iter      5: update = [-1.03939727e-08 -5.74185977e-08  6.78477246e-08 -2.19341523e-08
  6.69476388e-08 -1.25594767e-08  7.47700961e-09 -2.23070235e-08
  2.78709574e-09  3.49398035e-04  2.09996124e-08 -7.96012335e-09
 -3.73461460e-08 -1.21227400e-08  1.88649817e-08  9.21961950e-09
  1.49505358e-08  8.16567395e-09  1.59210003e-08 -2.01349773e-09
  1.64333680e-08  2.89994927e-09  1.96088307e-08 -2.02181196e-08
 -1.07089459e-09 -2.51866597e-08  3.64701160e-08 -3.06799173e-08
 -5.32707130e-08  1.01411749e-08 -5.79723424e-09]
iter      5: loss = 6.3445756934
iter      5: optimization converged
iter      1: update = [-0.07822215  0.04827595  0.07290266 -0.15965508 -0.23353011  0.45588873
  0.1432999  -0.04604506 -0.58393099 -0.46715424 -0.00159402 -1.19175563
  0.17349079 -0.29555014  0.20373134  0.04942132  0.05741509  0.24392926
  0.38610007 -1.19175563 -0.1195634   0.37215808 -0.13367795  0.09003786
 -0.07544713  0.03737217 -0.26478058  0.04424248  0.24295957  0.24295957
 -0.86311126]
ite

iter      1: update = [-0.07657198  0.05039249  0.07527578 -0.15824858 -0.24017283  0.45086135
  0.12619276 -0.09538746 -0.60749542 -0.49192386 -0.01798928 -1.18334168
  0.15419445 -0.32631306  0.21538261  0.07567829  0.09428572  0.26344695
  0.41473828 -1.18334168 -0.12931617  0.38800044 -0.12335581  0.06479909
 -0.06668357  0.03457709 -0.24980926  0.03532868  0.30975472  0.23514855
  0.23514855 -1.14362034]
iter      1: loss = 6.3547221112
iter      2: update = [ 0.00148218  0.0049988   0.0053242   0.00659088  0.00086023  0.08481923
  0.01680993  0.0478115   0.04248331  0.06471837  0.11490278  0.3859764
  0.02322461  0.02512089  0.05159359 -0.0004328   0.01066645  0.15328553
 -0.0021817   0.3859764  -0.02487479 -0.03784064 -0.01415672 -0.03372855
 -0.02023103 -0.00085862  0.03065958 -0.00442357 -0.04809729  0.05402636
  0.05402636  0.16403076]
iter      2: loss = 6.3463865063
iter      3: update = [ 1.47455434e-04  5.41962980e-04 -1.27536557e-04 -8.30055759e-05
  3.04306616e-04 -1.72

iter      5: update = [ 1.76361492e-07  6.18418380e-07 -1.45082227e-07 -1.02406496e-07
  3.71413325e-07 -5.26118602e-08  1.63421814e-08  1.32780460e-07
  1.64616395e-07  2.51638531e-07  1.55335072e-07  7.40972711e-04
  1.40964403e-08  8.54074247e-08 -3.76532458e-08 -2.43619669e-08
 -6.69466927e-08  6.62181649e-08 -7.06424774e-08  7.40972619e-04
  5.01263763e-09  5.71907907e-08 -8.36106764e-08 -1.26270181e-08
 -8.86954042e-08  8.56109196e-08  3.00020830e-08  1.51810580e-07
  9.51029746e-08 -1.92282063e-07 -1.15291539e-07 -1.15294146e-07
  3.08539838e-07]
iter      5: loss = 6.3454524348
iter      6: update = [ 1.15254152e-10  3.99099714e-10 -9.24548086e-11 -6.67449050e-11
  2.37923402e-10 -3.13398161e-11  1.03734905e-11  8.80607471e-11
  1.10562980e-10  1.69796006e-10  1.05052424e-10  5.21061191e-07
  9.62167884e-12  5.75673688e-11 -1.98789501e-11 -1.67772304e-11
 -4.23287918e-11  4.51260384e-11 -4.67500946e-11  5.21068216e-07
  7.38692127e-13  3.47790150e-11 -5.46656146e-11 -1.17901904

iter      5: update = [-8.40674608e-08 -3.10751799e-07  3.58721495e-07 -1.50233912e-07
 -7.96356069e-08  3.91195692e-07 -8.82698556e-08  1.02973013e-09
  1.47347319e-07  1.77469213e-07  2.65603110e-07  1.79201103e-07
  7.40523885e-04  1.35735415e-08  7.92735684e-08 -2.97391423e-08
 -2.74856075e-08 -7.65373514e-08  8.68208378e-08 -6.81743489e-08
  7.40523866e-04  1.38057570e-08  7.22948909e-08 -9.01036356e-08
 -1.10677782e-08 -9.42190213e-08  9.36000135e-08  3.74680153e-08
  1.30265171e-07  1.06203016e-07 -1.69945855e-07 -1.11470811e-07
 -1.11470376e-07  2.98004050e-07]
iter      5: loss = 6.3454524346
iter      6: update = [-5.14931340e-11 -1.98699679e-10  2.32474820e-10 -9.56776754e-11
 -5.20719016e-11  2.50245919e-10 -5.41216051e-11  5.20011041e-13
  9.72801618e-11  1.18691923e-10  1.78589755e-10  1.20214534e-10
  5.20425533e-07  9.29021252e-12  5.36305850e-11 -1.47364880e-11
 -1.87203130e-11 -4.83917975e-11  5.82893990e-11 -4.50982832e-11
  5.20422949e-07  6.37171073e-12  4.43606941

iter      5: update = [-8.40674411e-08 -3.10751781e-07  3.58721515e-07 -1.50233910e-07
 -7.96356067e-08  3.91195694e-07 -8.82698601e-08  1.02973427e-09
  1.47347339e-07  1.77469239e-07  2.65603144e-07  1.79201125e-07
  7.40523873e-04  1.35735487e-08  7.92735899e-08 -2.97391174e-08
 -2.74855972e-08 -7.65373524e-08  8.68208452e-08 -6.81743434e-08
  7.40523878e-04  1.38057609e-08  7.22948921e-08 -9.01036366e-08
 -1.10677769e-08 -9.42190240e-08  9.36000424e-08  3.74680298e-08
  1.30265181e-07  1.06203036e-07 -1.69945867e-07 -1.11471200e-07
 -1.11469983e-07  2.98004073e-07  0.00000000e+00]
iter      5: loss = 6.3454524346
iter      6: update = [-5.15219757e-11 -1.98727873e-10  2.32446177e-10 -9.56807445e-11
 -5.20735365e-11  2.50243320e-10 -5.41155318e-11  5.15485817e-13
  9.72550638e-11  1.18660923e-10  1.78548058e-10  1.20187253e-10
  5.20424147e-07  9.28181796e-12  5.36052384e-11 -1.47669987e-11
 -1.87343338e-11 -4.83934264e-11  5.82807321e-11 -4.51056835e-11
  5.20424291e-07  6.36636456

iter      5: update = [-8.28792103e-08 -3.09480225e-07  3.58630493e-07 -1.49556520e-07
 -8.09808485e-08  3.89768573e-07 -8.29812497e-08  6.75404713e-11
  1.55303512e-07  1.79321783e-07  2.73515451e-07  1.82542403e-07
  7.39070367e-04  7.49610326e-09  9.11327844e-08 -2.30403263e-08
 -2.90453490e-08 -5.72067003e-08  9.33777841e-08 -7.64525541e-08
  7.39070191e-04  8.62089976e-09  3.84512902e-08 -6.84809336e-08
 -1.83453995e-08 -9.63847172e-08  9.26794457e-08  3.08252871e-08
  1.21370500e-07  8.44819290e-08 -1.78801278e-07 -1.12060830e-07
 -9.75337111e-08 -1.12059613e-07  2.99768812e-07  0.00000000e+00]
iter      5: loss = 6.3449110076
iter      6: update = [-5.07051475e-11 -1.97602363e-10  2.31936872e-10 -9.51090808e-11
 -5.28246594e-11  2.48834773e-10 -5.10203158e-11 -2.08731298e-13
  1.02142067e-10  1.19256767e-10  1.82586664e-10  1.21152478e-10
  5.18398970e-07  5.38122372e-12  6.05316592e-11 -1.14342234e-11
 -2.00230682e-11 -3.62016827e-11  6.19909413e-11 -5.03851131e-11
  5.18405039

iter      5: update = [-8.91022492e-08 -3.17074892e-07  3.48116300e-07 -1.50622128e-07
 -7.99214665e-08  3.88122969e-07 -5.90396551e-08 -8.81908668e-08
  5.43128073e-09  1.62446303e-07  1.83171666e-07  2.85590642e-07
  1.83905318e-07  7.38138822e-04  1.03533349e-08  9.55535753e-08
 -1.74441945e-08 -3.49793890e-08 -6.10099432e-08  8.22123313e-08
 -7.71402152e-08  7.38138987e-04  1.36547351e-08  4.32697662e-08
 -6.88555345e-08 -1.41018169e-08 -9.65026818e-08  9.90436911e-08
  3.49293887e-08  1.27496024e-07  8.85552943e-08 -1.96313398e-07
 -1.05088670e-07 -1.07482143e-07 -1.05088670e-07  3.24313356e-07
  0.00000000e+00]
iter      5: loss = 6.3448785828
iter      6: update = [-5.37906379e-11 -2.01519259e-10  2.25132671e-10 -9.51130436e-11
 -5.23064821e-11  2.46717193e-10 -3.45216873e-11 -5.41913200e-11
  2.88405428e-12  1.05993062e-10  1.21142279e-10  1.88936086e-10
  1.21484029e-10  5.17086509e-07  6.94251419e-12  6.29296041e-11
 -8.16904916e-12 -2.34190854e-11 -3.83153806e-11  5.52917605

iter      4: update = [-4.23223832e-06 -1.68933045e-05  2.20509099e-05 -9.73277692e-06
 -4.44445903e-06  2.41043597e-05 -3.92273705e-06  3.68315636e-07
  1.92466166e-06  1.47467561e-05  1.64441103e-05  2.24271559e-05
  1.24106146e-05  2.73671319e-02  1.78192213e-06  5.61272649e-06
  2.81130855e-06 -5.92410550e-06 -5.79259272e-06  5.45609806e-06
 -5.64594540e-06  2.73671304e-02 -4.43111070e-07 -1.48437901e-06
 -3.93683819e-06 -3.97715477e-06 -8.37620770e-06  4.52049508e-06
  9.87817232e-07 -9.21411115e-06  9.22120993e-06  4.32511629e-06
 -1.97600514e-05  3.87252669e-05 -2.75418315e-06  3.87252779e-05
  1.96474024e-05  0.00000000e+00]
iter      4: loss = 6.3445209171
iter      5: update = [-8.76052046e-08 -3.15332161e-07  3.45106514e-07 -1.49275679e-07
 -7.95528700e-08  3.84734427e-07 -5.73878165e-08 -8.91367653e-08
  5.35350893e-09  1.73028794e-07  1.89589300e-07  2.86369876e-07
  1.90276235e-07  7.33594206e-04  8.58010230e-09  1.04248036e-07
 -8.06697147e-09 -3.35913953e-08 -5.74571225

iter      1: update = [-0.08334616 -0.01233131  0.04822813  0.0817537  -0.16592342 -0.24865089
 -0.02060693  0.36714194  0.09540419 -0.09253471 -0.56809438 -0.4172228
  0.01741844 -1.14972595  0.20021176 -0.39531152  0.26810362  0.13119294
  0.01338649  0.2658855   0.46453718 -1.14972595 -0.09151735  0.58937144
  0.15755395 -0.1838418   0.11000183 -0.12879949  0.13989143  0.09421702
  0.21735964 -0.1844222   0.08750956  0.2417159   0.2762501   0.25243631
  0.2762501  -1.02598974  0.        ]
iter      1: loss = 6.3531664221
iter      2: update = [ 9.83805033e-03  9.46911308e-03  1.18428080e-02  4.27476277e-03
  8.31569991e-03  1.76258949e-03 -8.06370663e-03  8.91712006e-02
  1.37992222e-02  4.69379576e-02  5.17737481e-02  6.96905988e-02
  1.20962648e-01  3.87465204e-01  1.72405299e-02  5.29247575e-02
  4.65787322e-02 -8.45671951e-03  3.09710443e-02  1.45406970e-01
 -1.08488595e-02  3.87465207e-01 -3.24612313e-02 -7.62470473e-02
 -2.55911639e-02 -2.37013334e-03 -3.34825535e-02  3.493108

In [64]:
gcv.best_params_

{'select__k': 1}

In [46]:
pipe.set_params(**gcv.best_params_)
pipe.fit(data_x, data_y)

iter      1: update = [-0.01574184  0.53942117  0.00081675  0.14268407  0.01968065 -0.282868
 -0.03143889 -0.19215379  0.56981461 -0.09358615]
iter      1: loss = 6.7606396809
iter      2: update = [ 2.08459156e-04 -1.68132389e-03  2.77867594e-05  5.58092324e-03
  3.96115855e-03  1.61643247e-02 -1.71670454e-02 -2.67975659e-02
  1.87454127e-01  5.78788522e-03]
iter      2: loss = 6.7597342095
iter      3: update = [-3.32845509e-06  6.10995158e-05  2.92797413e-08  4.64111823e-05
 -1.17654290e-04 -1.59813173e-04  7.30261146e-05  1.71564881e-04
  1.40056648e-02  2.43563468e-06]
iter      3: loss = 6.7597297647
iter      4: update = [-1.80381049e-08 -2.73325338e-07 -7.93919070e-11  1.94033510e-07
 -1.14303595e-07 -8.07161252e-08 -3.61109686e-07 -5.88288841e-07
  8.43518636e-05  1.55527620e-07]
iter      4: loss = 6.7597297646
iter      4: optimization converged


Pipeline(memory=None,
     steps=[('select', SelectKBest(k=10,
      score_func=<function fit_and_score_features at 0x7f82f85371e0>)), ('model', CoxPHSurvivalAnalysis(alpha=1e-06, n_iter=100, tol=1e-09, verbose=10))])

In [47]:
pipe.score(data_x, data_y)

0.5871271758141084

In [48]:
pd.DataFrame(gcv.cv_results_).sort_values(by='mean_test_score', ascending=False)


Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_select__k,params,split0_test_score,split1_test_score,split2_test_score,mean_test_score,std_test_score,rank_test_score,split0_train_score,split1_train_score,split2_train_score,mean_train_score,std_train_score
9,8.932500,0.095792,0.036739,0.000311,10,{'select__k': 10},0.547882,0.568809,0.533020,0.549904,0.014680,1,0.599506,0.578039,0.610643,0.596063,0.013531
8,8.673455,0.197501,0.034316,0.000426,9,{'select__k': 9},0.549493,0.560509,0.532990,0.547664,0.011309,2,0.598978,0.577984,0.610673,0.595878,0.013524
11,8.810566,0.146123,0.039854,0.001538,12,{'select__k': 12},0.543657,0.569798,0.525528,0.546328,0.018172,3,0.607869,0.582864,0.614041,0.601592,0.013480
10,8.899614,0.204316,0.038404,0.001487,11,{'select__k': 11},0.547839,0.569282,0.518866,0.545329,0.020659,4,0.599588,0.581886,0.611521,0.597665,0.012175
12,8.803736,0.199515,0.040824,0.001167,13,{'select__k': 13},0.544272,0.557451,0.529813,0.543845,0.011287,5,0.613126,0.582587,0.614929,0.603547,0.014839
13,8.877484,0.152291,0.051685,0.013087,14,{'select__k': 14},0.543903,0.558141,0.529073,0.543706,0.011868,6,0.615773,0.583032,0.615115,0.604640,0.015282
14,8.930264,0.117295,0.045663,0.002817,15,{'select__k': 15},0.543500,0.556759,0.529149,0.543136,0.011274,7,0.616395,0.583520,0.615147,0.605021,0.015212
7,8.798854,0.069682,0.033998,0.000848,8,{'select__k': 8},0.549193,0.548240,0.526258,0.541230,0.010594,8,0.597671,0.572652,0.606492,0.592272,0.014333
2,8.481652,0.153073,0.015252,0.000460,3,{'select__k': 3},0.544902,0.529252,0.549035,0.541063,0.008520,9,0.582867,0.561930,0.584673,0.576490,0.010322
1,8.484536,0.222326,0.014485,0.000148,2,{'select__k': 2},0.545105,0.543323,0.532538,0.540322,0.005552,10,0.564752,0.557328,0.569653,0.563911,0.005067


In [None]:
scores = fit_and_score_features(data_x.values, data_y)
pd.Series(scores, index=data_x.columns)