In [1]:
import os
import pandas as pd 
import numpy as np

from expert_tree import get_expert_tree_results, Expert_Tree
from wrapper import best_first_search_mg

<b>Read in pre-feature-selection data</b>

In [2]:
data_pre = pd.read_csv('./data/feature_selection_preprocessed_data.csv')
data_pre.head(5)

Unnamed: 0.1,Unnamed: 0,Username,Mobility,ArmSwelling,BreastSwelling,Skin,PAS,FHT,DISCOMFORT,SYM_COUNT,...,Radiation,Age,SLNB_Removed_LN,ALND_Removed_LN,SLNB_ALND_Removed,Number_nodes,Mastectomy,Lumpectomy,Hormonal,BMI
0,0,B001_year,1,0,0,0,0,1,0,2,...,0,40.0,1.0,0.0,0.0,1.0,1,0,0,22.1
1,1,B002_year,0,2,3,0,0,1,1,7,...,1,63.0,0.0,14.0,0.0,14.0,0,1,0,38.7
2,2,B003_year,1,0,0,0,0,0,0,1,...,0,42.0,2.0,0.0,0.0,2.0,1,0,1,32.6
3,3,B004_year,0,0,0,0,0,1,1,3,...,1,47.0,5.0,0.0,0.0,5.0,1,0,1,42.6
4,4,B005_year,2,1,0,0,2,0,2,11,...,1,33.0,0.0,16.0,0.0,16.0,1,0,0,17.4


In [3]:
print(f"shape: {data_pre.shape}")

shape: (906, 25)


<b>Read in the data Simay used</b> 

In [4]:
data_post = pd.read_csv('./data/META_data_correct_v5.csv')
data_post.head(5)

Unnamed: 0,Username,Mobility,ArmSwelling,BreastSwelling,Skin,PAS,FHT,DISCOMFORT,SYM_COUNT,TIME_LAPSE,LVC,ChestWallSwelling,fluid_total
0,ML001,3,3,3,1,1,3,2,18,2.0,0.169732,0,5
1,ML002,1,0,0,0,0,1,2,8,8.5,-0.067333,1,2
2,ML003,3,2,3,4,3,4,4,22,3.8,0.094652,2,6
3,ML004,1,0,0,0,1,1,1,6,2.3,0.009488,0,2
4,ML005,3,2,2,1,2,2,2,19,9.1,0.035549,2,6


In [5]:
print(f"shape: {data_post.shape}")

shape: (1042, 13)


<b>Check the overlap between the two dataset based on Username</b>

In [6]:
num_overlapped_username = len(list(set(data_post['Username']) & set(data_pre['Username'])))
print(f'number of overlapped usernames between the two csv files: {num_overlapped_username}')

number of overlapped usernames between the two csv files: 526


<b> Read data </b>

In [7]:
# define which dataset to use 
IF_USE_PRE_DATA = 1

In [8]:
# read in dataset 
DATA_PATH = ('./data/META_data_correct_v5.csv', './data/feature_selection_preprocessed_data.csv')[IF_USE_PRE_DATA > 0]
data = pd.read_csv(DATA_PATH)

# drop data within a 6 months
data = data[data.TIME_LAPSE >=0.5]
data = data.drop(columns=['Username', 'Unnamed: 0'] if IF_USE_PRE_DATA else 'Username')

# the data_pre contain '#DEV/0!' or "" to make the coloumn string instead of float
# and these dirty element would cause error in .astype operation
# here convert "" or  '#DEV/0!' to nan
data = data.apply(lambda x: pd.to_numeric(x, errors='coerce'))
# drop rows containing nan
data = data.dropna()

# add log of time elapsed
data['TIME_LAPSE_LOG'] = np.log(data['TIME_LAPSE'])

# prent shape and columns
print(f"data shape: {data.shape}")
print("columns:")
print(data.columns.values)

data shape: (892, 23)
columns:
['Mobility' 'ArmSwelling' 'BreastSwelling' 'Skin' 'PAS' 'FHT' 'DISCOMFORT'
 'SYM_COUNT' 'TIME_LAPSE' 'LVC' 'ChestWallSwelling' 'fluid_total'
 'Chemotherapy' 'Radiation' 'Age' 'SLNB_Removed_LN' 'ALND_Removed_LN'
 'SLNB_ALND_Removed' 'Number_nodes' 'Mastectomy' 'Lumpectomy' 'Hormonal'
 'BMI']


<b>Generate label </b>

In [9]:
# generate 5 labels 
for i, row in data.iterrows():
    tag = Expert_Tree(row).run()

In [10]:
# generate 3 labels
labels_3, labelNames_3, flags_3 = get_expert_tree_results(data, three_class=True)

In [11]:
# generate 2 labels 
labelslabels_2, labelNames_2, flags_2 = get_expert_tree_results(data, three_class=True)

<b>Drop LVC and TIME ELAPSE<b>

In [12]:
# drop LVC and time time elapse
data = data.drop(columns=['LVC', 'TIME_LAPSE'])
# prent shape and columns
print(f"data shape: {data.shape}")
print("columns:")
print(data.columns.values)

data shape: (892, 22)
columns:
['Mobility' 'ArmSwelling' 'BreastSwelling' 'Skin' 'PAS' 'FHT' 'DISCOMFORT'
 'SYM_COUNT' 'ChestWallSwelling' 'fluid_total' 'Chemotherapy' 'Radiation'
 'Age' 'SLNB_Removed_LN' 'ALND_Removed_LN' 'SLNB_ALND_Removed'
 'Number_nodes' 'Mastectomy' 'Lumpectomy' 'Hormonal' 'BMI'
 'TIME_LAPSE_LOG']


<b>Generate Data for model</b>

In [13]:
# get features for model  
X = data.values
y = labels_3
print(f"X shape: {X.shape}")
print(f"y length: {len(y)}")

X shape: (892, 22)
y length: 892


<b>Define Evaluator</b>

In [14]:
from sklearn.model_selection import StratifiedKFold, RepeatedStratifiedKFold, cross_validate

def cross_validate_custom(X, y, num_repeated, estimator):
    n_splits = 8
    if num_repeated > 1:
        skf = RepeatedStratifiedKFold(n_splits=n_splits, num_repeated=num_repeated)
    else:
        skf = StratifiedKFold(n_splits=n_splits)
    scores = cross_validate(estimator, X, y, scoring='accuracy', n_jobs=-1, cv=skf, verbose=0, return_estimator=True, return_train_score=True)
    return np.mean(scores['test_score']), np.mean(scores['train_score'])

<b>Define estimator</b>

In [15]:
# gradient boosting tree
from sklearn.ensemble import GradientBoostingClassifier
params = {'learning_rate': 0.1, 'max_depth': 2, 'n_estimators': 70}
gbt = GradientBoostingClassifier(**params)
gbt

GradientBoostingClassifier(ccp_alpha=0.0, criterion='friedman_mse', init=None,
                           learning_rate=0.1, loss='deviance', max_depth=2,
                           max_features=None, max_leaf_nodes=None,
                           min_impurity_decrease=0.0, min_impurity_split=None,
                           min_samples_leaf=1, min_samples_split=2,
                           min_weight_fraction_leaf=0.0, n_estimators=70,
                           n_iter_no_change=None, presort='deprecated',
                           random_state=None, subsample=1.0, tol=0.0001,
                           validation_fraction=0.1, verbose=0,
                           warm_start=False)

In [16]:
# XGB
from xgboost import XGBClassifier
xgb = XGBClassifier(**params)
xgb

XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
              colsample_bynode=None, colsample_bytree=None, gamma=None,
              gpu_id=None, importance_type='gain', interaction_constraints=None,
              learning_rate=0.1, max_delta_step=None, max_depth=2,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              n_estimators=70, n_jobs=None, num_parallel_tree=None,
              objective='binary:logistic', random_state=None, reg_alpha=None,
              reg_lambda=None, scale_pos_weight=None, subsample=None,
              tree_method=None, validate_parameters=None, verbosity=None)

<b>Run-Search</b>

In [17]:
patience = 100
estimator = xgb
evaluator = cross_validate_custom
best_feature_set_accu, _, best_feature_set, record = best_first_search_mg(X, y, patience, estimator, evaluator, \
                     num_repeated=1, verbose=False, mega_step=False)

[0]
accuracy: 0.5839768339768339
[1]
accuracy: 0.755680904118404
[2]
accuracy: 0.5034085424710425
[3]
accuracy: 0.5715894465894467
[4]
accuracy: 0.5469956563706564
[5]
accuracy: 0.6546010296010296
[6]
accuracy: 0.5639981499356499
[7]
accuracy: 0.6770732786357786
[8]
accuracy: 0.5089888996138996
[9]
accuracy: 0.721877010939511
[10]
accuracy: 0.442818935006435
[11]
accuracy: 0.442818935006435
[12]
accuracy: 0.4046714124839125
[13]
accuracy: 0.4384049227799228
[14]
accuracy: 0.4427887709137709
[15]
accuracy: 0.4933236808236808
[16]
accuracy: 0.46738256113256116
[17]
accuracy: 0.442818935006435
[18]
accuracy: 0.442818935006435
[19]
accuracy: 0.4617921492921493
[20]
accuracy: 0.4585042631917632
[21]
accuracy: 0.5478603603603603
[1 9]
accuracy: 0.7858550514800515
K: 1
iteration time= 4.042376279830933
local best = 0.7859, features = [1 9]
[0 1 9]
accuracy: 0.8094835907335908
[1 2 9]
accuracy: 0.7634431306306306
[1 3 9]
accuracy: 0.8128217503217503
[1 4 9]
accuracy: 0.7768862612612613
[1 5 9]

[ 1  2  3  5  6  8  9 14 19 21]
accuracy: 0.8643721846846847
[ 1  2  3  5  6  8  9 14 20 21]
accuracy: 0.8564893018018018
[ 1  2  3  5  6  8  9 11 14 18 21]
accuracy: 0.8643721846846847
K: 0
iteration time= 4.413939952850342
[ 0  1  2  3  4  5  6  8  9 14 21]
accuracy: 0.8632762226512227
[ 2  3  4  5  6  8  9 14 21]
accuracy: 0.7533180501930502
[ 1  3  4  5  6  8  9 14 21]
accuracy: 0.8307894948519948
[ 1  2  4  5  6  8  9 14 21]
accuracy: 0.8431366634491634
[ 1  2  3  4  5  8  9 14 21]
accuracy: 0.8307291666666666
[ 1  2  3  4  5  6  7  8  9 14 21]
accuracy: 0.8543577059202059
[ 1  2  3  4  5  6  9 14 21]
accuracy: 0.8621400418275418
[ 1  2  3  4  5  6  8 14 21]
accuracy: 0.8678008365508366
[ 1  2  3  4  5  6  8  9 10 14 21]
accuracy: 0.8610239703989704
[ 1  2  3  4  5  6  8  9 11 14 21]
accuracy: 0.8643721846846847
[ 1  2  3  4  5  6  8  9 12 14 21]
accuracy: 0.8587918275418276
[ 1  2  3  4  5  6  8  9 13 14 21]
accuracy: 0.8621903153153153
[ 1  2  3  4  5  6  8  9 21]
accuracy: 0.85

[ 1  2  3  4  5  6  8 10 11 13 14 17 20 21]
accuracy: 0.8722751769626769
[ 1  2  3  4  5  6  8 10 11 13 14 18 20 21]
accuracy: 0.8711591055341055
[ 1  2  3  4  5  6  8 10 11 13 14 19 20 21]
accuracy: 0.8722751769626769
[ 1  2  3  4  5  6  8 10 11 13 14 21]
accuracy: 0.8666847651222651
[ 1  2  3  4  5  6  8 10 11 13 14 20]
accuracy: 0.8532416344916345
[ 1  2  3  4  5  6  8 10 11 13 14 17 19 20 21]
accuracy: 0.8722751769626769
K: 2
iteration time= 5.445363759994507
[ 0  1  2  3  4  5  6  8 10 14 20 21]
accuracy: 0.866654601029601
[ 2  3  4  5  6  8 10 14 20 21]
accuracy: 0.7049851190476191
[ 1  3  4  5  6  8 10 14 20 21]
accuracy: 0.8499034749034748
[ 1  2  4  5  6  8 10 14 20 21]
accuracy: 0.8420306467181466
[ 1  2  3  5  6  8 10 14 20 21]
accuracy: 0.8677807271557272
[ 1  2  3  4  6  8 10 14 20 21]
accuracy: 0.8476210585585585
[ 1  2  3  4  5  8 10 14 20 21]
accuracy: 0.836369851994852
[ 1  2  3  4  5  6  7  8 10 14 20 21]
accuracy: 0.8565496299871299
[ 1  2  3  4  5  6 10 14 20 21]
ac

[ 1  2  3  4  5  6  8 11 13 14 17 20]
accuracy: 0.8532416344916345
[ 1  2  3  4  5  6 11 13 14 17 19 20 21]
accuracy: 0.8711591055341055
K: 8
iteration time= 4.949003458023071
[ 0  1  2  3  4  5  6  8 11 13 14 19 20 21]
accuracy: 0.8677706724581724
[ 2  3  4  5  6  8 11 13 14 19 20 21]
accuracy: 0.7016167953667953
[ 1  3  4  5  6  8 11 13 14 19 20 21]
accuracy: 0.8442929536679536
[ 1  2  4  5  6  8 11 13 14 19 20 21]
accuracy: 0.8375462516087516
[ 1  2  3  5  6  8 11 13 14 19 20 21]
accuracy: 0.866654601029601
[ 1  2  3  4  6  8 11 13 14 19 20 21]
accuracy: 0.8476210585585585
[ 1  2  3  4  5  8 11 13 14 19 20 21]
accuracy: 0.83527388996139
[ 1  2  3  4  5  6  7  8 11 13 14 19 20 21]
accuracy: 0.8576657014157014
[ 1  2  3  4  5  6 11 13 14 19 20 21]
accuracy: 0.8711591055341055
[ 1  2  3  4  5  6  8  9 11 13 14 19 20 21]
accuracy: 0.8598777348777349
[ 1  2  3  4  5  6  8 11 12 13 14 19 20 21]
accuracy: 0.8700430341055341
[ 1  2  3  4  5  6  8 11 13 19 20 21]
accuracy: 0.8677907818532818

[ 2  3  4  5  6  8 10 14 19 20 21]
accuracy: 0.7094393500643501
[ 1  3  4  5  6  8 10 14 19 20 21]
accuracy: 0.8499034749034748
[ 1  2  4  5  6  8 10 14 19 20 21]
accuracy: 0.8386723777348777
[ 1  2  3  5  6  8 10 14 19 20 21]
accuracy: 0.8677807271557272
[ 1  2  3  4  6  8 10 14 19 20 21]
accuracy: 0.8476210585585585
[ 1  2  3  4  5  8 10 14 19 20 21]
accuracy: 0.8374959781209781
[ 1  2  3  4  5  6  7  8 10 14 19 20 21]
accuracy: 0.8565496299871299
[ 1  2  3  4  5  6 10 14 19 20 21]
accuracy: 0.8711490508365509
[ 1  2  3  4  5  6  8  9 10 14 19 20 21]
accuracy: 0.8576254826254825
[ 1  2  3  4  5  6  8 14 19 20 21]
accuracy: 0.8711591055341055
[ 1  2  3  4  5  6  8 10 12 14 19 20 21]
accuracy: 0.8689269626769627
[ 1  2  3  4  5  6  8 10 19 20 21]
accuracy: 0.8677807271557272
[ 1  2  3  4  5  6  8 10 14 15 19 20 21]
accuracy: 0.8689068532818532
[ 1  2  3  4  5  6  8 10 14 16 19 20 21]
accuracy: 0.8677807271557272
[ 1  2  3  4  5  6  8 10 14 19 21]
accuracy: 0.8689269626769627
[ 1  2  3 

[ 1  2  3  4  6  8 10 14 17 19 20 21]
accuracy: 0.8476210585585585
[ 1  2  3  4  5  8 10 14 17 19 20 21]
accuracy: 0.8386120495495495
[ 1  2  3  4  5  6  7  8 10 14 17 19 20 21]
accuracy: 0.8565496299871299
[ 1  2  3  4  5  6 10 14 17 19 20 21]
accuracy: 0.8711490508365509
[ 1  2  3  4  5  6  8  9 10 14 17 19 20 21]
accuracy: 0.8553933397683398
[ 1  2  3  4  5  6  8 14 17 19 20 21]
accuracy: 0.8711591055341055
[ 1  2  3  4  5  6  8 10 12 14 17 19 20 21]
accuracy: 0.8689269626769627
[ 1  2  3  4  5  6  8 10 17 19 20 21]
accuracy: 0.8677807271557272
[ 1  2  3  4  5  6  8 10 14 15 17 19 20 21]
accuracy: 0.8689068532818532
[ 1  2  3  4  5  6  8 10 14 16 17 19 20 21]
accuracy: 0.8677807271557272
[ 1  2  3  4  5  6  8 10 14 17 19 21]
accuracy: 0.8678008365508365
[ 1  2  3  4  5  6  8 10 14 17 19 20]
accuracy: 0.8532416344916345
[ 1  2  3  4  5  6 14 17 19 20 21]
accuracy: 0.8711490508365509
K: 21
iteration time= 4.668956279754639
[ 0  1  2  3  4  5  6  8 10 13 14 17 19 20 21]
accuracy: 0.865

[ 0  1  2  3  4  5  6  8 10 11 13 14 18 19 20 21]
accuracy: 0.8644224581724582
[ 2  3  4  5  6  8 10 11 13 14 18 19 20 21]
accuracy: 0.7027429214929215
[ 1  3  4  5  6  8 10 11 13 14 18 19 20 21]
accuracy: 0.8510396557271558
[ 1  2  4  5  6  8 10 11 13 14 18 19 20 21]
accuracy: 0.8375462516087516
[ 1  2  3  5  6  8 10 11 13 14 18 19 20 21]
accuracy: 0.8655385296010296
[ 1  2  3  4  6  8 10 11 13 14 18 19 20 21]
accuracy: 0.8464949324324325
[ 1  2  3  4  5  8 10 11 13 14 18 19 20 21]
accuracy: 0.83527388996139
[ 1  2  3  4  5  6  7  8 10 11 13 14 18 19 20 21]
accuracy: 0.8543074324324325
[ 1  2  3  4  5  6  8  9 10 11 13 14 18 19 20 21]
accuracy: 0.8610038610038611
[ 1  2  3  4  5  6  8 10 11 12 13 14 18 19 20 21]
accuracy: 0.8689269626769627
[ 1  2  3  4  5  6  8 10 11 13 18 19 20 21]
accuracy: 0.868916907979408
[ 1  2  3  4  5  6  8 10 11 13 14 15 18 19 20 21]
accuracy: 0.8711389961389961
[ 1  2  3  4  5  6  8 10 11 13 14 16 18 19 20 21]
accuracy: 0.8666646557271558
[ 1  2  3  4  5  6

[ 1  2  3  4  5  6  7  8 13 14 18 19 20 21]
accuracy: 0.8543074324324325
[ 1  2  3  4  5  6 13 14 18 19 20 21]
accuracy: 0.8711591055341055
[ 1  2  3  4  5  6  8  9 13 14 18 19 20 21]
accuracy: 0.8610038610038611
[ 1  2  3  4  5  6  8 13 18 19 20 21]
accuracy: 0.868916907979408
[ 1  2  3  4  5  6  8 13 14 15 18 19 20 21]
accuracy: 0.8711389961389961
[ 1  2  3  4  5  6  8 13 14 16 18 19 20 21]
accuracy: 0.8666646557271558
[ 1  2  3  4  5  6  8 13 14 18 19 21]
accuracy: 0.8655485842985843
[ 1  2  3  4  5  6  8 13 14 18 19 20]
accuracy: 0.8543577059202059
[ 1  2  3  4  5  6 13 14 15 18 19 20 21]
accuracy: 0.8700430341055341
K: 34
iteration time= 4.11209511756897
[ 0  1  2  3  4  5  6  8 11 13 14 17 18 19 20 21]
accuracy: 0.866654601029601
[ 2  3  4  5  6  8 11 13 14 17 18 19 20 21]
accuracy: 0.7016167953667953
[ 1  3  4  5  6  8 11 13 14 17 18 19 20 21]
accuracy: 0.8499135296010296
[ 1  2  4  5  6  8 11 13 14 17 18 19 20 21]
accuracy: 0.8375462516087516
[ 1  2  3  5  6  8 11 13 14 17 18 1

[ 1  2  3  4  5  6  8 10 11 12 14 18 20 21]
accuracy: 0.8678108912483912
[ 1  2  3  4  5  6  8 10 11 18 20 21]
accuracy: 0.8711591055341055
[ 1  2  3  4  5  6  8 10 11 14 15 18 20 21]
accuracy: 0.8711389961389961
[ 1  2  3  4  5  6  8 10 11 14 16 18 20 21]
accuracy: 0.8688967985842986
[ 1  2  3  4  5  6  8 10 11 14 18 21]
accuracy: 0.8700329794079793
[ 1  2  3  4  5  6  8 10 11 14 18 20]
accuracy: 0.8532416344916345
[ 1  2  3  4  5  6 10 11 18 20 21]
accuracy: 0.8677807271557272
K: 41
iteration time= 4.424130201339722
[ 0  1  2  3  4  5  6 11 13 14 20 21]
accuracy: 0.8688967985842986
[ 2  3  4  5  6 11 13 14 20 21]
accuracy: 0.7038791023166022
[ 1  3  4  5  6 11 13 14 20 21]
accuracy: 0.8499235842985844
[ 1  2  4  5  6 11 13 14 20 21]
accuracy: 0.8375462516087516
[ 1  2  3  5  6 11 13 14 20 21]
accuracy: 0.8655485842985843
[ 1  2  3  4  6 11 13 14 20 21]
accuracy: 0.8453989703989704
[ 1  2  3  4  5 11 13 14 20 21]
accuracy: 0.836400016087516
[ 1  2  3  4  5  6  7 11 13 14 20 21]
accura

[ 2  3  4  5  6 10 13 14 15 20 21]
accuracy: 0.6971525096525095
[ 1  3  4  5  6 10 13 14 15 20 21]
accuracy: 0.8499235842985844
[ 1  2  4  5  6 10 13 14 15 20 21]
accuracy: 0.8375462516087516
[ 1  2  3  5  6 10 13 14 15 20 21]
accuracy: 0.8666646557271558
[ 1  2  3  4  6 10 13 14 15 20 21]
accuracy: 0.8487572393822393
[ 1  2  3  4  5 10 13 14 15 20 21]
accuracy: 0.8341276544401544
[ 1  2  3  4  5  6  7 10 13 14 15 20 21]
accuracy: 0.8554235038610039
[ 1  2  3  4  5  6  9 10 13 14 15 20 21]
accuracy: 0.8621400418275418
[ 1  2  3  4  5  6 10 12 13 14 15 20 21]
accuracy: 0.8689169079794079
[ 1  2  3  4  5  6 10 14 15 20 21]
accuracy: 0.8722651222651223
[ 1  2  3  4  5  6 10 13 15 20 21]
accuracy: 0.8700229247104247
[ 1  2  3  4  5  6 10 13 14 15 16 20 21]
accuracy: 0.8700229247104247
[ 1  2  3  4  5  6 10 13 14 15 17 20 21]
accuracy: 0.8722751769626769
[ 1  2  3  4  5  6 10 13 14 15 18 20 21]
accuracy: 0.8700430341055341
[ 1  2  3  4  5  6 10 13 14 15 19 20 21]
accuracy: 0.872275176962676

[ 1  2  3  4  5  6 10 15 17 19 20 21]
accuracy: 0.8711389961389961
[ 1  2  3  4  5  6 10 14 15 16 17 19 20 21]
accuracy: 0.8722550675675675
[ 1  2  3  4  5  6 10 14 15 17 18 19 20 21]
accuracy: 0.8711591055341055
[ 1  2  3  4  5  6 10 14 15 17 20 21]
accuracy: 0.8722651222651223
[ 1  2  3  4  5  6 10 14 15 17 19 21]
accuracy: 0.864432512870013
[ 1  2  3  4  5  6 10 14 15 17 19 20]
accuracy: 0.8610541344916345
[ 1  2  3  4  5  6 14 15 17 20 21]
accuracy: 0.8711490508365508
K: 3
iteration time= 4.6635801792144775
[ 0  1  2  3  4  5  6 10 11 13 14 15 17 19 20 21]
accuracy: 0.8689068532818532
[ 2  3  4  5  6 10 11 13 14 15 17 19 20 21]
accuracy: 0.7072373712998713
[ 1  3  4  5  6 10 11 13 14 15 17 19 20 21]
accuracy: 0.8487974581724582
[ 1  2  4  5  6 10 11 13 14 15 17 19 20 21]
accuracy: 0.8386723777348777
[ 1  2  3  5  6 10 11 13 14 15 17 19 20 21]
accuracy: 0.8666646557271558
[ 1  2  3  4  6 10 11 13 14 15 17 19 20 21]
accuracy: 0.847641167953668
[ 1  2  3  4  5 10 11 13 14 15 17 19 20 

[ 1  2  3  4  5  6 11 14 15 19 20]
accuracy: 0.8610541344916345
[ 1  2  3  4  5  6 11 15 16 19 20 21]
accuracy: 0.8700229247104247
K: 10
iteration time= 3.7674577236175537
[ 0  1  2  3  4  5  6 10 13 14 15 17 20 21]
accuracy: 0.8689068532818532
[ 2  3  4  5  6 10 13 14 15 17 20 21]
accuracy: 0.6971525096525095
[ 1  3  4  5  6 10 13 14 15 17 20 21]
accuracy: 0.8487974581724582
[ 1  2  4  5  6 10 13 14 15 17 20 21]
accuracy: 0.8375462516087516
[ 1  2  3  5  6 10 13 14 15 17 20 21]
accuracy: 0.8666646557271558
[ 1  2  3  4  6 10 13 14 15 17 20 21]
accuracy: 0.8487572393822393
[ 1  2  3  4  5 10 13 14 15 17 20 21]
accuracy: 0.833011583011583
[ 1  2  3  4  5  6  7 10 13 14 15 17 20 21]
accuracy: 0.8554235038610039
[ 1  2  3  4  5  6  9 10 13 14 15 17 20 21]
accuracy: 0.8621400418275418
[ 1  2  3  4  5  6 10 12 13 14 15 17 20 21]
accuracy: 0.8689169079794079
[ 1  2  3  4  5  6 10 13 15 17 20 21]
accuracy: 0.8700229247104247
[ 1  2  3  4  5  6 10 13 14 15 16 17 20 21]
accuracy: 0.870022924710

[ 1  2  3  5  6 10 11 14 15 17 20 21]
accuracy: 0.8689169079794079
[ 1  2  3  4  6 10 11 14 15 17 20 21]
accuracy: 0.8509893822393823
[ 1  2  3  4  5 10 11 14 15 17 20 21]
accuracy: 0.8352437258687259
[ 1  2  3  4  5  6  7 10 11 14 15 17 20 21]
accuracy: 0.8543074324324325
[ 1  2  3  4  5  6  9 10 11 14 15 17 20 21]
accuracy: 0.8565194658944659
[ 1  2  3  4  5  6 10 11 12 14 15 17 20 21]
accuracy: 0.8700229247104247
[ 1  2  3  4  5  6 10 11 15 17 20 21]
accuracy: 0.8711389961389961
[ 1  2  3  4  5  6 10 11 14 15 16 17 20 21]
accuracy: 0.8722550675675675
[ 1  2  3  4  5  6 10 11 14 15 17 21]
accuracy: 0.864432512870013
[ 1  2  3  4  5  6 10 11 14 15 17 20]
accuracy: 0.8610541344916345
[ 1  2  3  4  5  6 10 11 15 16 17 20 21]
accuracy: 0.8711389961389961
K: 18
iteration time= 3.9172523021698
[ 0  1  2  3  4  5  6 10 14 15 17 20 21]
accuracy: 0.8688867438867438
[ 2  3  4  5  6 10 14 15 17 20 21]
accuracy: 0.7083634974259974
[ 1  3  4  5  6 10 14 15 17 20 21]
accuracy: 0.8499336389961389
[

[ 1  2  3  4  5  6  8 10 11 14 15 16 17 18 19 20 21]
accuracy: 0.8689169079794079
[ 1  2  3  4  5  6  8 10 11 16 17 18 19 20 21]
accuracy: 0.8644224581724582
[ 1  2  3  4  5  6  8 10 11 15 17 18 19 20 21]
accuracy: 0.8700430341055341
[ 1  2  3  4  5  6  8 10 11 15 16 18 19 20 21]
accuracy: 0.8733912483912484
[ 1  2  3  4  5  6  8 10 11 15 16 17 18 20 21]
accuracy: 0.8722651222651223
[ 1  2  3  4  5  6  8 10 11 15 16 17 18 19 21]
accuracy: 0.8666646557271558
[ 1  2  3  4  5  6  8 10 11 15 16 17 18 19 20]
accuracy: 0.8532114703989704
[ 1  2  3  4  5  6  8 11 15 16 18 19 20 21]
accuracy: 0.872275176962677
K: 24
iteration time= 5.8280134201049805
[ 0  1  2  3  4  5  6  8 10 15 16 17 18 19 20 21]
accuracy: 0.866654601029601
[ 2  3  4  5  6  8 10 15 16 17 18 19 20 21]
accuracy: 0.7083333333333333
[ 1  3  4  5  6  8 10 15 16 17 18 19 20 21]
accuracy: 0.8476612773487773
[ 1  2  4  5  6  8 10 15 16 17 18 19 20 21]
accuracy: 0.8341678732303732
[ 1  2  3  5  6  8 10 15 16 17 18 19 20 21]
accuracy

[ 1  2  3  4  6  8 11 15 16 17 18 19 20 21]
accuracy: 0.8420105373230373
[ 1  2  3  4  5  8 11 15 16 17 18 19 20 21]
accuracy: 0.8363597972972973
[ 1  2  3  4  5  6  7  8 11 15 16 17 18 19 20 21]
accuracy: 0.8486969111969112
[ 1  2  3  4  5  6 11 15 16 17 18 19 20 21]
accuracy: 0.8700229247104247
[ 1  2  3  4  5  6  8  9 11 15 16 17 18 19 20 21]
accuracy: 0.8621299871299871
[ 1  2  3  4  5  6  8 11 12 15 16 17 18 19 20 21]
accuracy: 0.865558638996139
[ 1  2  3  4  5  6  8 11 13 15 16 17 18 19 20 21]
accuracy: 0.8700229247104247
[ 1  2  3  4  5  6  8 11 14 15 16 17 18 19 20 21]
accuracy: 0.8678008365508365
[ 1  2  3  4  5  6  8 11 16 17 18 19 20 21]
accuracy: 0.866654601029601
[ 1  2  3  4  5  6  8 11 15 17 18 19 20 21]
accuracy: 0.8689269626769627
[ 1  2  3  4  5  6  8 11 15 16 17 18 20 21]
accuracy: 0.8711490508365509
[ 1  2  3  4  5  6  8 11 15 16 17 18 19 21]
accuracy: 0.8677807271557272
[ 1  2  3  4  5  6  8 11 15 16 17 18 19 20]
accuracy: 0.8532114703989704
[ 1  2  3  4  5  6 11 1

[ 1  2  3  4  5  8 10 15 16 17 18 20 21]
accuracy: 0.8386019948519948
[ 1  2  3  4  5  6  7  8 10 15 16 17 18 20 21]
accuracy: 0.8486969111969112
[ 1  2  3  4  5  6 10 15 16 17 18 20 21]
accuracy: 0.8700229247104247
[ 1  2  3  4  5  6  8  9 10 15 16 17 18 20 21]
accuracy: 0.862129987129987
[ 1  2  3  4  5  6  8 10 12 15 16 17 18 20 21]
accuracy: 0.8666747104247104
[ 1  2  3  4  5  6  8 10 13 15 16 17 18 20 21]
accuracy: 0.8700229247104247
[ 1  2  3  4  5  6  8 10 14 15 16 17 18 20 21]
accuracy: 0.8689169079794079
[ 1  2  3  4  5  6  8 10 16 17 18 20 21]
accuracy: 0.8633063867438867
[ 1  2  3  4  5  6  8 10 15 17 18 20 21]
accuracy: 0.8700430341055341
[ 1  2  3  4  5  6  8 10 15 16 17 18 21]
accuracy: 0.867790781853282
[ 1  2  3  4  5  6  8 10 15 16 17 18 20]
accuracy: 0.8532114703989704
[ 1  2  3  4  5  6 10 15 17 18 20 21]
accuracy: 0.8711490508365509
K: 36
iteration time= 4.8235132694244385
[ 0  1  2  3  4  5  6  8 10 11 15 16 18 20 21]
accuracy: 0.8677807271557272
[ 2  3  4  5  6  8

[ 0  1  2  3  4  5  6 10 14 15 16 17 20 21]
accuracy: 0.8688967985842986
[ 2  3  4  5  6 10 14 15 16 17 20 21]
accuracy: 0.7117217664092664
[ 1  3  4  5  6 10 14 15 16 17 20 21]
accuracy: 0.8499135296010296
[ 1  2  4  5  6 10 14 15 16 17 20 21]
accuracy: 0.8319156209781209
[ 1  2  3  5  6 10 14 15 16 17 20 21]
accuracy: 0.8644124034749034
[ 1  2  3  4  6 10 14 15 16 17 20 21]
accuracy: 0.8487270752895753
[ 1  2  3  4  5 10 14 15 16 17 20 21]
accuracy: 0.8352437258687259
[ 1  2  3  4  5  6  7 10 14 15 16 17 20 21]
accuracy: 0.8565395752895753
[ 1  2  3  4  5  6  8 10 14 15 16 17 20 21]
accuracy: 0.8689169079794079
[ 1  2  3  4  5  6  9 10 14 15 16 17 20 21]
accuracy: 0.8576556467181468
[ 1  2  3  4  5  6 14 15 16 17 20 21]
accuracy: 0.8711389961389961
[ 1  2  3  4  5  6 10 12 14 15 16 17 20 21]
accuracy: 0.867790781853282
[ 1  2  3  4  5  6 10 14 16 17 20 21]
accuracy: 0.8688967985842986
[ 1  2  3  4  5  6 10 14 15 16 17 18 20 21]
accuracy: 0.8700229247104247
[ 1  2  3  4  5  6 10 14 15

[ 0  1  2  3  4  5  6  8 10 14 17 18 20 21]
accuracy: 0.866654601029601
[ 2  3  4  5  6  8 10 14 17 18 20 21]
accuracy: 0.7027328667953667
[ 1  3  4  5  6  8 10 14 17 18 20 21]
accuracy: 0.8510396557271558
[ 1  2  4  5  6  8 10 14 17 18 20 21]
accuracy: 0.8397884491634491
[ 1  2  3  5  6  8 10 14 17 18 20 21]
accuracy: 0.8666646557271558
[ 1  2  3  4  6  8 10 14 17 18 20 21]
accuracy: 0.8487471846846847
[ 1  2  3  4  5  8 10 14 17 18 20 21]
accuracy: 0.8374859234234234
[ 1  2  3  4  5  6  7  8 10 14 17 18 20 21]
accuracy: 0.8543074324324325
[ 1  2  3  4  5  6 10 14 17 18 20 21]
accuracy: 0.8711490508365509
[ 1  2  3  4  5  6  8  9 10 14 17 18 20 21]
accuracy: 0.8598877895752897
[ 1  2  3  4  5  6  8 10 12 14 17 18 20 21]
accuracy: 0.8678108912483912
[ 1  2  3  4  5  6  8 10 17 18 20 21]
accuracy: 0.8711591055341055
[ 1  2  3  4  5  6  8 10 14 15 17 18 20 21]
accuracy: 0.8711389961389961
[ 1  2  3  4  5  6  8 10 14 16 17 18 20 21]
accuracy: 0.8688967985842986
[ 1  2  3  4  5  6  8 10 14

[ 1  2  3  4  5  6  9 11 13 14 19 20 21]
accuracy: 0.8587516087516087
[ 1  2  3  4  5  6 11 12 13 14 19 20 21]
accuracy: 0.8689269626769627
[ 1  2  3  4  5  6 11 13 19 20 21]
accuracy: 0.8689068532818534
[ 1  2  3  4  5  6 11 13 14 16 19 20 21]
accuracy: 0.8677807271557272
[ 1  2  3  4  5  6 11 13 14 19 21]
accuracy: 0.8644425675675675
[ 1  2  3  4  5  6 11 13 14 19 20]
accuracy: 0.8543476512226512
[ 1  2  3  4  5  6 11 12 13 19 20 21]
accuracy: 0.8666747104247104
K: 57
iteration time= 3.7911455631256104
[ 0  1  2  3  4  5  6 11 13 14 18 19 20 21]
accuracy: 0.8688967985842986
[ 2  3  4  5  6 11 13 14 18 19 20 21]
accuracy: 0.7049750643500643
[ 1  3  4  5  6 11 13 14 18 19 20 21]
accuracy: 0.8442929536679538
[ 1  2  4  5  6 11 13 14 18 19 20 21]
accuracy: 0.8364301801801802
[ 1  2  3  5  6 11 13 14 18 19 20 21]
accuracy: 0.8655485842985843
[ 1  2  3  4  6 11 13 14 18 19 20 21]
accuracy: 0.8453989703989704
[ 1  2  3  4  5 11 13 14 18 19 20 21]
accuracy: 0.8375160875160874
[ 1  2  3  4  5

[ 2  3  4  5  6 10 11 13 14 17 20 21]
accuracy: 0.7072574806949807
[ 1  3  4  5  6 10 11 13 14 17 20 21]
accuracy: 0.847671332046332
[ 1  2  4  5  6 10 11 13 14 17 20 21]
accuracy: 0.8375462516087516
[ 1  2  3  5  6 10 11 13 14 17 20 21]
accuracy: 0.8655485842985843
[ 1  2  3  4  6 10 11 13 14 17 20 21]
accuracy: 0.8431567728442728
[ 1  2  3  4  5 10 11 13 14 17 20 21]
accuracy: 0.8386321589446589
[ 1  2  3  4  5  6  7 10 11 13 14 17 20 21]
accuracy: 0.8576657014157014
[ 1  2  3  4  5  6  9 10 11 13 14 17 20 21]
accuracy: 0.8587616634491635
[ 1  2  3  4  5  6 10 11 12 13 14 17 20 21]
accuracy: 0.8689269626769627
[ 1  2  3  4  5  6 10 11 13 17 20 21]
accuracy: 0.8689068532818534
[ 1  2  3  4  5  6 10 11 13 14 16 17 20 21]
accuracy: 0.8677807271557272
[ 1  2  3  4  5  6 10 11 13 14 17 21]
accuracy: 0.8644425675675675
[ 1  2  3  4  5  6 10 11 13 14 17 20]
accuracy: 0.8543476512226512
[ 1  2  3  4  5  6 10 11 12 13 17 20 21]
accuracy: 0.8655485842985844
K: 65
iteration time= 3.995490074157

[ 1  2  3  4  5  8 14 19 20 21]
accuracy: 0.8374959781209781
[ 1  2  3  4  5  6  7  8 14 19 20 21]
accuracy: 0.8565496299871299
[ 1  2  3  4  5  6  8  9 14 19 20 21]
accuracy: 0.8564993564993564
[ 1  2  3  4  5  6  8 12 14 19 20 21]
accuracy: 0.8678108912483913
[ 1  2  3  4  5  6  8 19 20 21]
accuracy: 0.8677807271557272
[ 1  2  3  4  5  6  8 14 16 19 20 21]
accuracy: 0.8666646557271558
[ 1  2  3  4  5  6  8 14 19 21]
accuracy: 0.8689269626769627
[ 1  2  3  4  5  6  8 14 19 20]
accuracy: 0.8532416344916345
[ 0  1  2  3  4  5  6  8 14 19 21]
accuracy: 0.8677606177606179
K: 72
iteration time= 3.70131254196167
[ 0  1  2  3  4  5  6  8 14 18 19 20 21]
accuracy: 0.8711289414414414
[ 2  3  4  5  6  8 14 18 19 20 21]
accuracy: 0.7105453667953667
[ 1  3  4  5  6  8 14 18 19 20 21]
accuracy: 0.8510396557271558
[ 1  2  4  5  6  8 14 18 19 20 21]
accuracy: 0.838662323037323
[ 1  2  3  5  6  8 14 18 19 20 21]
accuracy: 0.8677907818532818
[ 1  2  3  4  6  8 14 18 19 20 21]
accuracy: 0.8487471846846

[ 0  1  2  3  4  5  6 10 13 14 17 19 20 21]
accuracy: 0.8677807271557272
[ 2  3  4  5  6 10 13 14 17 19 20 21]
accuracy: 0.7072172619047619
[ 1  3  4  5  6 10 13 14 17 19 20 21]
accuracy: 0.8499235842985844
[ 1  2  4  5  6 10 13 14 17 19 20 21]
accuracy: 0.8364201254826256
[ 1  2  3  5  6 10 13 14 17 19 20 21]
accuracy: 0.8655485842985843
[ 1  2  3  4  6 10 13 14 17 19 20 21]
accuracy: 0.8431567728442728
[ 1  2  3  4  5 10 13 14 17 19 20 21]
accuracy: 0.83527388996139
[ 1  2  3  4  5  6  7 10 13 14 17 19 20 21]
accuracy: 0.8576657014157014
[ 1  2  3  4  5  6  9 10 13 14 17 19 20 21]
accuracy: 0.8587616634491635
[ 1  2  3  4  5  6 10 13 17 19 20 21]
accuracy: 0.8677907818532818
[ 1  2  3  4  5  6 10 13 14 16 17 19 20 21]
accuracy: 0.8677807271557272
[ 1  2  3  4  5  6 10 13 14 17 19 21]
accuracy: 0.8644425675675675
[ 1  2  3  4  5  6 10 13 14 17 19 20]
accuracy: 0.8554637226512226
[ 1  2  3  4  5  6 10 13 16 17 19 20 21]
accuracy: 0.8666747104247104
K: 80
iteration time= 3.6772365570068

[ 1  2  3  4  5  6  8 17 18 20 21]
accuracy: 0.8700430341055341
[ 1  2  3  4  5  6  8 14 15 17 18 20 21]
accuracy: 0.8700229247104247
[ 1  2  3  4  5  6  8 14 16 17 18 20 21]
accuracy: 0.8677807271557272
[ 1  2  3  4  5  6  8 14 17 18 21]
accuracy: 0.8689169079794079
[ 1  2  3  4  5  6  8 14 17 18 20]
accuracy: 0.8532416344916345
[ 1  2  3  4  5  6 17 18 20 21]
accuracy: 0.8677807271557272
K: 87
iteration time= 4.009920120239258
[ 0  1  2  3  4  5  6  8 10 11 18 20 21]
accuracy: 0.8688967985842986
[ 2  3  4  5  6  8 10 11 18 20 21]
accuracy: 0.710635859073359
[ 1  3  4  5  6  8 10 11 18 20 21]
accuracy: 0.8487874034749034
[ 1  2  4  5  6  8 10 11 18 20 21]
accuracy: 0.8386723777348777
[ 1  2  3  5  6  8 10 11 18 20 21]
accuracy: 0.8688967985842986
[ 1  2  3  4  6  8 10 11 18 20 21]
accuracy: 0.8487371299871299
[ 1  2  3  4  5  8 10 11 18 20 21]
accuracy: 0.8363497425997426
[ 1  2  3  4  5  6  7  8 10 11 18 20 21]
accuracy: 0.8543174871299871
[ 1  2  3  4  5  6  8  9 10 11 18 20 21]
acc

[ 1  2  4  5  6  8 15 16 17 19 20 21]
accuracy: 0.8341779279279279
[ 1  2  3  5  6  8 15 16 17 19 20 21]
accuracy: 0.8689068532818532
[ 1  2  3  4  6  8 15 16 17 19 20 21]
accuracy: 0.8420105373230373
[ 1  2  3  4  5  8 15 16 17 19 20 21]
accuracy: 0.8363597972972973
[ 1  2  3  4  5  6  7  8 15 16 17 19 20 21]
accuracy: 0.8498129826254827
[ 1  2  3  4  5  6  8  9 15 16 17 19 20 21]
accuracy: 0.857645592020592
[ 1  2  3  4  5  6  8 12 15 16 17 19 20 21]
accuracy: 0.8666747104247104
[ 1  2  3  4  5  6  8 13 15 16 17 19 20 21]
accuracy: 0.8689068532818534
[ 1  2  3  4  5  6  8 14 15 16 17 19 20 21]
accuracy: 0.8678008365508365
[ 1  2  3  4  5  6  8 16 17 19 20 21]
accuracy: 0.866654601029601
[ 1  2  3  4  5  6  8 15 17 19 20 21]
accuracy: 0.8689169079794079
[ 1  2  3  4  5  6  8 15 16 17 19 21]
accuracy: 0.8666747104247104
[ 1  2  3  4  5  6  8 15 16 17 19 20]
accuracy: 0.853221525096525
[ 1  2  3  4  5  6  8 13 15 17 19 20 21]
accuracy: 0.8666847651222651
K: 95
iteration time= 4.47513246

In [18]:
print(best_feature_set_accu)
print(best_feature_set)
print("selected feature: {}".format(data.columns.values[best_feature_set != 0]))

0.8733912483912484
[0 1 1 1 1 1 1 0 0 0 1 1 0 0 1 1 0 0 0 1 1 1]
selected feature: ['ArmSwelling' 'BreastSwelling' 'Skin' 'PAS' 'FHT' 'DISCOMFORT'
 'Chemotherapy' 'Radiation' 'ALND_Removed_LN' 'SLNB_ALND_Removed'
 'Hormonal' 'BMI' 'TIME_LAPSE_LOG']


<b>Get Result with Wrapper</b>

In [19]:
X_selected = X[:, best_feature_set.nonzero()[0]]
n_splits = 8
skf = StratifiedKFold(n_splits=n_splits)
scores = cross_validate(estimator, X_selected, y, scoring='accuracy', n_jobs=-1, cv=skf, verbose=0, return_estimator=True, return_train_score=True)
print("with wrapper")
print("test  mean %f, test std: %f, train mean %f, train std %f" % (np.mean(scores['test_score']), np.std(scores['test_score']), np.mean(scores['train_score']), np.std(scores['train_score'])) )


with wrapper
test  mean 0.873391, test std: 0.027045, train mean 0.902146, train std 0.006603


In [20]:
feature_importance_arrays_norm = np.sum([x.feature_importances_ for x in scores['estimator']], axis=0) / len(scores['estimator'])
feat_names = data.columns[best_feature_set.nonzero()[0]].values
print("sorted, feature name; importance")
assert len(feat_names) == len(feature_importance_arrays_norm), "importance and feature number not equal"
feature_weight_pair = sorted(zip(feat_names, feature_importance_arrays_norm), key=lambda pair : pair[1], reverse=True)
print(feature_weight_pair)

sorted, feature name; importance
[('ArmSwelling', 0.3346959), ('FHT', 0.16281149), ('DISCOMFORT', 0.108422555), ('Skin', 0.10132748), ('BreastSwelling', 0.08209898), ('PAS', 0.08125391), ('TIME_LAPSE_LOG', 0.051521815), ('SLNB_ALND_Removed', 0.032941613), ('BMI', 0.023978047), ('ALND_Removed_LN', 0.018085772), ('Chemotherapy', 0.0014470029), ('Hormonal', 0.0014153958), ('Radiation', 0.0)]


In [21]:
feature_weight_pair

[('ArmSwelling', 0.3346959),
 ('FHT', 0.16281149),
 ('DISCOMFORT', 0.108422555),
 ('Skin', 0.10132748),
 ('BreastSwelling', 0.08209898),
 ('PAS', 0.08125391),
 ('TIME_LAPSE_LOG', 0.051521815),
 ('SLNB_ALND_Removed', 0.032941613),
 ('BMI', 0.023978047),
 ('ALND_Removed_LN', 0.018085772),
 ('Chemotherapy', 0.0014470029),
 ('Hormonal', 0.0014153958),
 ('Radiation', 0.0)]

<b>Get Result without Wrapper</b>

In [23]:
# generate result without wrapper
y = labels_3
patience = 25
estimator = gbt #xgb
n_splits = 8
skf = StratifiedKFold(n_splits=n_splits)
scores = cross_validate(estimator, X, y, scoring='accuracy', n_jobs=-1, cv=skf, verbose=0, return_estimator=True, return_train_score=True)
print("without wrapper")
print("test  mean %f, test std: %f, train mean %f, train std %f" % (np.mean(scores['test_score']), np.std(scores['test_score']), np.mean(scores['train_score']), np.std(scores['train_score'])) )


X shape: (892, 22)
y length: 892
without wrapper
test  mean 0.859898, test std: 0.029632, train mean 0.927291, train std 0.003816


In [24]:
feature_importance_arrays_norm = np.sum([x.feature_importances_ for x in scores['estimator']], axis=0) / len(scores['estimator'])
feat_names = data.columns.values
print("sorted, feature name; importance")
assert len(feat_names) == len(feature_importance_arrays_norm), "importance and feature number not equal"
feature_weight_pair = sorted(zip(feat_names, feature_importance_arrays_norm), key=lambda pair : pair[1], reverse=True)
print(feature_weight_pair)


sorted, feature name; importance
[('ArmSwelling', 0.5042139533764998), ('fluid_total', 0.22354907088854026), ('SYM_COUNT', 0.07642150949770112), ('Skin', 0.04053912972527959), ('BreastSwelling', 0.037249191603719506), ('TIME_LAPSE_LOG', 0.0314832962990357), ('DISCOMFORT', 0.029645109524763905), ('FHT', 0.021715989775506282), ('Mobility', 0.010916041568631641), ('BMI', 0.009125936179318485), ('Age', 0.003254287901373082), ('SLNB_ALND_Removed', 0.0028819714853762163), ('Number_nodes', 0.002074960005831022), ('ALND_Removed_LN', 0.0016635636064119127), ('ChestWallSwelling', 0.001432678590024589), ('PAS', 0.0013800725548271783), ('SLNB_Removed_LN', 0.0010509623826924913), ('Chemotherapy', 0.0004608449298339932), ('Hormonal', 0.00040386593499156035), ('Lumpectomy', 0.000345258780941503), ('Radiation', 0.0001923053887002093), ('Mastectomy', 0.0)]


In [25]:
feature_weight_pair

[('ArmSwelling', 0.5042139533764998),
 ('fluid_total', 0.22354907088854026),
 ('SYM_COUNT', 0.07642150949770112),
 ('Skin', 0.04053912972527959),
 ('BreastSwelling', 0.037249191603719506),
 ('TIME_LAPSE_LOG', 0.0314832962990357),
 ('DISCOMFORT', 0.029645109524763905),
 ('FHT', 0.021715989775506282),
 ('Mobility', 0.010916041568631641),
 ('BMI', 0.009125936179318485),
 ('Age', 0.003254287901373082),
 ('SLNB_ALND_Removed', 0.0028819714853762163),
 ('Number_nodes', 0.002074960005831022),
 ('ALND_Removed_LN', 0.0016635636064119127),
 ('ChestWallSwelling', 0.001432678590024589),
 ('PAS', 0.0013800725548271783),
 ('SLNB_Removed_LN', 0.0010509623826924913),
 ('Chemotherapy', 0.0004608449298339932),
 ('Hormonal', 0.00040386593499156035),
 ('Lumpectomy', 0.000345258780941503),
 ('Radiation', 0.0001923053887002093),
 ('Mastectomy', 0.0)]