# Test on FLCHAIN

In [1]:
import numpy

In [2]:
import sklearn

In [3]:
import survwrap

In [4]:
survwrap.list_available_datasets()

('flchain', 'gbsg2', 'metabric', 'support')

In [5]:
mb_df = survwrap.get_data('flchain')
mb_df.dataframe.info()

<class 'pandas.core.frame.DataFrame'>
Index: 6524 entries, 0 to 6523
Data columns (total 10 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   age         6524 non-null   float64
 1   sex         6524 non-null   float64
 2   sample.yr   6524 non-null   int64  
 3   kappa       6524 non-null   float64
 4   lambda      6524 non-null   float64
 5   flc.grp     6524 non-null   int64  
 6   creatinine  6524 non-null   float64
 7   mgus        6524 non-null   float64
 8   time        6524 non-null   float64
 9   event       6524 non-null   float64
dtypes: float64(8), int64(2)
memory usage: 560.7 KB


In [6]:
X, y = mb_df.get_X_y()
X.shape, y.shape

((6524, 8), (6524,))

In [7]:
y[:10]

array([( True,   85.), ( True, 1281.), ( True,   69.), ( True,  115.),
       ( True, 1039.), ( True, 1355.), ( True, 2851.), ( True,  372.),
       ( True, 3309.), ( True, 1326.)],
      dtype=[('event', '?'), ('time', '<f8')])

### Generate a (stratified) train-test split and Scale the features (only) 

First do the stratified splitting THEN do scaling, parameterized on X_train set ONLY 

In [8]:
from sklearn.preprocessing import StandardScaler, RobustScaler

In [9]:
X_train, X_test, y_train, y_test = survwrap.survival_train_test_split(X, y, rng_seed=2309)

In [10]:
scaler = StandardScaler().fit(X_train)
[X_train, X_test] = [ scaler.transform(_) for _ in  [X_train, X_test] ]
X_train.shape, X_test.shape

((4893, 8), (1631, 8))

In [11]:
survwrap.get_indicator(y).sum(), survwrap.get_indicator(y_train).sum(), survwrap.get_indicator(y_test).sum(),


(1962, 1472, 490)

## check possible dimensionality reduction

In [12]:
from sklearn.decomposition import PCA

In [13]:
pca= PCA(n_components=0.995, random_state=2308).fit(X_train)
print('PCA components:',pca.n_components_)

PCA components: 8


Massive (50%) reduction using PCA

In [14]:
## Stratified CV spliter for survival analysis

In [15]:
#from sklearn.model_selection import RepeatedStratifiedKFold, StratifiedKFold

In [16]:
#testkf= RepeatedStratifiedKFold(n_splits=5,n_repeats=2,random_state=2307)
#for trn,tst in testkf.split(X_train, survwrap.get_indicator(y_train)):
#    print(trn,tst) 

# test coxnet

In [17]:
rng_seed=2401

In [18]:
coxnet = survwrap.CoxNet(rng_seed)
coxnet.fit(X_train, y_train)

In [19]:
coxnet.score(X_test, y_test)

0.7927079761717836

In [20]:
#from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

In [21]:
cox_grid = survwrap.CoxNet().get_parameter_grid()
cox_grid['l1_ratio'] = [0.5]
cox_grid

{'alpha': [0.001,
  0.003,
  0.005,
  0.008,
  0.01,
  0.02,
  0.03,
  0.04,
  0.05,
  0.06,
  0.07,
  0.08,
  0.09,
  0.1,
  0.15,
  0.2,
  0.3],
 'l1_ratio': [0.5]}

In [22]:
opt_coxnet, opt_coxnet_params, opt_coxnet_search = survwrap.optimize(survwrap.CoxNet(rng_seed), X_train, y_train, 
                                                                    user_grid=cox_grid, n_jobs=4)
opt_coxnet.score(X_test, y_test), opt_coxnet_params

(0.793126155601201, {'alpha': 0.02, 'l1_ratio': 0.5})

In [23]:
if opt_coxnet_search.scoring: print('Grullo!') 
else: print('OK')

OK


In [24]:
#opt_coxnet_search.cv_results_
survwrap.get_model_scores_df(opt_coxnet_search)

KeyError: 'rank_test_mean_fit_time'

# Test FastCPH

In [59]:
batch5 = int(X_train.shape[0] /20 ) 

In [60]:
fl_lasso=survwrap.FastCPH(rng_seed)
fl_lasso

In [61]:
#fl_dsm.layer_sizes=[3]
#fl_dsm.learning_rate=0.01
fl_lasso_xplore=fl_lasso.fit(X_train,y_train)
fl_lasso_xplore.score(X_train,y_train)

  return torch.empty(output_size, device=input.device).scatter_reduce(


epoch: 0
loss: 8.593567848205566
epoch: 1
loss: 8.584389686584473
epoch: 2
loss: 8.575233459472656
epoch: 3
loss: 8.5661039352417
epoch: 4
loss: 8.556997299194336
epoch: 5
loss: 8.547910690307617
epoch: 6
loss: 8.538849830627441
epoch: 7
loss: 8.529813766479492
epoch: 8
loss: 8.520800590515137
epoch: 9
loss: 8.51181411743164
epoch: 10
loss: 8.502851486206055
epoch: 11
loss: 8.493910789489746
epoch: 12
loss: 8.484992027282715
epoch: 13
loss: 8.476089477539062
epoch: 14
loss: 8.46721076965332
epoch: 15
loss: 8.458356857299805
epoch: 16
loss: 8.449514389038086
epoch: 17
loss: 8.440692901611328
epoch: 18
loss: 8.431884765625
epoch: 19
loss: 8.42309284210205
epoch: 20
loss: 8.414307594299316
epoch: 21
loss: 8.40553092956543
epoch: 22
loss: 8.396761894226074
epoch: 23
loss: 8.387993812561035
epoch: 24
loss: 8.379231452941895
epoch: 25
loss: 8.370476722717285
epoch: 26
loss: 8.361723899841309
epoch: 27
loss: 8.35296630859375
epoch: 28
loss: 8.344212532043457
epoch: 29
loss: 8.33544921875
epoc

0.7942605159620818

In [63]:
fl_grid =fl_lasso.get_parameter_grid(max_width=X_train.shape[1])
fl_grid

{'layer_sizes': [[8], [8, 8], [8, 8, 8], [8, 8, 8, 8]]}

In [29]:
# Stratified CV
#opt_dsm, opt_dsm_params, opt_dsm_search = optimize(survwrap.DeepSurvivalMachines(rng_seed=2308),  X_train, y_train, n_jobs=8,
                                                  # user_grid=grid,cv=RepeatedStratifiedKFold(n_splits=5, n_repeats=2, random_state=2308).split(X_train,survwrap.get_indicator(y_train)))
#opt_dsm.score(X_test, y_test), opt_dsm_params

In [37]:
# Stratified CV
fl_lasso_cv=survwrap.FastCPH(rng_seed)
fl_grid=fl_lasso_cv.get_parameter_grid(max_width=X_train.shape[1])
fl_grid

{'layer_sizes': [[8], [8, 8], [8, 8, 8], [8, 8, 8, 8]]}

In [39]:
opt_lasso, opt_lasso_params, opt_lasso_search = survwrap.optimize(fl_lasso_cv,  X_train, y_train, 
                                                                  user_grid=fl_grid,
                                                                 cv=survwrap.survival_crossval_splitter(X_train,y_train,
                                                                                                        n_repeats=1,n_splits=3),
                                                                 )

  return torch.empty(output_size, device=input.device).scatter_reduce(


epoch: 0
loss: 7.848711013793945
epoch: 1
loss: 7.841729164123535
epoch: 2
loss: 7.8348798751831055
epoch: 3
loss: 7.828098297119141
epoch: 4
loss: 7.821382999420166
epoch: 5
loss: 7.814732551574707
epoch: 6
loss: 7.8081464767456055
epoch: 7
loss: 7.801629066467285
epoch: 8
loss: 7.795176029205322
epoch: 9
loss: 7.788789749145508
epoch: 10
loss: 7.782468318939209
epoch: 11
loss: 7.776213645935059
epoch: 12
loss: 7.770027160644531
epoch: 13
loss: 7.763906478881836
epoch: 14
loss: 7.758082866668701
epoch: 15
loss: 7.7524027824401855
epoch: 16
loss: 7.7467851638793945
epoch: 17
loss: 7.741224765777588
epoch: 18
loss: 7.735724449157715
epoch: 19
loss: 7.730283260345459
epoch: 20
loss: 7.7249016761779785
epoch: 21
loss: 7.719577312469482
epoch: 22
loss: 7.714314937591553
epoch: 23
loss: 7.70911169052124
epoch: 24
loss: 7.703969478607178
epoch: 25
loss: 7.6988844871521
epoch: 26
loss: 7.693858623504639
epoch: 27
loss: 7.688893795013428
epoch: 28
loss: 7.683988094329834
epoch: 29
loss: 7.6791

In [41]:
opt_lasso.score(X_test, y_test), opt_lasso_params

(0.7922897967423662, {'layer_sizes': [8, 8, 8, 8]})