# Import

In [1]:
import pandas as pd
from pandas import DataFrame
import numpy as np

import xgboost as xgb 
from xgboost import plot_importance , XGBClassifier

import lightgbm as lgbm
from lightgbm import LGBMClassifier

from sklearn.metrics import *
from sklearn.model_selection import train_test_split

from tqdm import notebook

# Read Data

In [2]:
train_original = pd.read_csv('./open data/train.csv')
test_original = pd.read_csv('./open data/test_x.csv')
train = train_original.copy()
test = test_original.copy()

# Data

In [3]:
# fill NA
def fill_married(data):
    pdata = data.copy()
    pdata.loc[ (pdata.married==0)&(pdata.age_group=='10s'),'married' ] = 1
    pdata.loc[ (pdata.married==0)&(pdata.age_group=='20s'),'married' ] = 1
    pdata.loc[pdata.married==0,'married'] = 2
    
    return pdata

def fill_education(data):
    pdata = data.copy()
    pdata.loc[(pdata.education==0)&(pdata.age_group=='10s'),'education'] = 2
    pdata.loc[pdata.education==0,'education'] = 3

    return pdata

def fill_engnat(data):
    pdata = data.copy()
    pdata.loc[pdata.engnat==0,'engnat'] = 1
    
    return pdata

def fill_hand(data):
    pdata = data.copy()
    pdata.loc[pdata.hand==0,'hand'] = 1
    
    return pdata
# feature engineering
def Mach_score(data):
    pdata = data.copy()
    Answers = []
    for i in range(20):
        Answers.append('Q'+chr(97+i)+'A')
    reverse_col = ['QeA','QfA','QkA','QqA','QrA','QaA','QdA','QgA','QiA','QnA']
    for col in reverse_col:
        pdata[col] = -pdata[col]
    pdata['Mach_score'] = pdata[Answers].sum(axis=1)
    
    return pdata

def w_score(data):
    pdata = data.copy()
    wr = []
    wf = []
    for i in range(1,14):
        wr.append(f'wr_{i:02d}')
    for i in range(1,4):
        wf.append(f'wf_{i:02d}')
    
    pdata['wr'] = pdata[wr].sum(axis=1)
    pdata['wf'] = pdata[wf].sum(axis=1)
    
    return pdata

def TIPI(data):
    pdata = data.copy()
    pdata['tp_score_1'] = pdata['tp01'] - pdata['tp06']
    pdata['tp_score_2'] = pdata['tp07'] - pdata['tp02']
    pdata['tp_score_3'] = pdata['tp03'] - pdata['tp08']
    pdata['tp_score_4'] = pdata['tp09'] - pdata['tp04']
    pdata['tp_score_5'] = pdata['tp05'] - pdata['tp10']
    
    return pdata

# drop outlier
def drop_outlier(data, datatype):
    
    assert datatype == 'train' or datatype=='test', 'Wrong data type given'
    
    pdata = data.copy()
    if datatype=='train':
        
        out_arr = []
        out_arr.append( data[data.familysize>=16].index )
        out_arr.append( data[data.wr<=3].index )
        out_arr.append( data[data.wf>=2].index )

        out = []
        for outarr in out_arr:
            out = np.union1d(out, outarr)

        pdata = data.drop(out)
    
    return pdata
# feature banding
def age_band(data):
    pdata = data.copy()
    pdata['age_group'].replace(['10s','20s','30s','40s','50s','60s','+70s'],[1,2,3,4,5,5,5],inplace=True)
    
    return pdata

def E_band(data, num_band):
    pdata = data.copy()
    for i in range(20):
        col = 'Q'+chr(i+97)+'E'
        pdata[col] = pd.qcut(pdata[col],num_band)
        unique = pdata[col].unique()
        pdata[col].replace(unique,range(num_band),inplace=True)
        
    return pdata

def family_band(data):
    pdata = data.copy()
    pdata.loc[pdata.familysize >= 6,'familysize'] = 6
    
    return pdata
# categorical value to numerical value
def cat_gender(data):
    feature = 'gender'
    pdata = data.copy()
    pdata[feature].replace(['Male','Female'],[0,1],inplace=True)
    
    return pdata

def cat_race(data):
    feature = 'race'
    pdata = data.copy()
    unique = ['White', 'Asian', 'Other', 'Black', 'Native American', 'Arab', 'Indigenous Australian']
    pdata[feature].replace(unique,[0,1,2,3,4,5,6],inplace=True)
    
    return pdata

def cat_religion(data):
    feature = 'religion'
    pdata = data.copy()
    unique = ['Other', 'Hindu', 'Agnostic', 'Atheist', 'Christian_Other',
       'Christian_Catholic', 'Muslim', 'Buddhist', 'Christian_Protestant',
       'Jewish', 'Christian_Mormon', 'Sikh']
    pdata[feature].replace(unique,[11,10,0,1,2,3,4,5,6,7,8,9],inplace=True)
    
    return pdata

def cat_num(data):
    pdata = data.copy()
    pdata = cat_gender(pdata)
    pdata = cat_race(pdata)
    pdata = cat_religion(pdata)
    
    return pdata
# drop feature
def drop_feature(data, feature_arr):
    arr = feature_arr + ['index'] 
    
    """for i in range(20):
        arr.append('Q'+chr(i+97)+'A')
    for i in range(20):
        arr.append('Q'+chr(i+97)+'E')
    for i in range(1,14):
        arr.append(f'wr_{i:02d}')
    for i in range(1,4):
        arr.append(f'wf_{i:02d}')
    for i in range(1,11):
        arr.append(f'tp{i:02d}')"""
    

    pdata = data.drop(arr,axis=1)
    
    return pdata


In [4]:
def preprocess(data, datatype, feature_arr):
    
    pdata = data.copy()
    # fill NA
    pdata = fill_married(pdata)
    pdata = fill_education(pdata)
    pdata = fill_engnat(pdata)
    pdata = fill_hand(pdata)
    # feature engineering
    pdata = Mach_score(pdata)
    pdata = w_score(pdata)
    pdata = TIPI(pdata)
    # drop outlier
    pdata = drop_outlier(pdata,datatype)
    # feature banding
    pdata = age_band(pdata)
    pdata = family_band(pdata)
    pdata = E_band(pdata,10)
    # categorical value to numerical value
    pdata = cat_num(pdata)
    # drop feature
    pdata = drop_feature(pdata, feature_arr)
    # unify type of data
    pdata = pdata.astype(np.int)
    
    return pdata

# XGBoost

In [5]:
opt = { 'max_depth' : 8,
        'n_estimators' : 200,
        'learning_rate' : 0.010,
         'min_child_weight' : 6,
         'colsample_bytree' : 0.8,
        'verbosity' : 0,
        'objective' : 'binary:logistic',
        'booster' : 'gbtree',
        'subsample' : 0.8}

In [6]:
opt_arr_1 = ['QbA','QcA','QjA','QhA','QmA','QtA','QlA','QkA','QoA',
 'QsA','QrA', 'QeA',  'QnA','QgA', 'QdA'] + \
['QaE','QbE','QcE','QeE','QgE','QmE','QfE','QiE','QoE',
 'QlE','QrE','QpE','QnE','QtE','QsE','QkE'] + \
['tp01','tp04','tp05','tp09','tp10','tp02', 'tp08'] +\
['wf_01', 'wf_03'] + \
['wr_06','wr_09', 'wr_11','wr_07', 'wr_12','wr_13'] +\
['tp_score_4','hand']

opt_arr_2 = ['QjA','QaE', 'QbA', 'QeE', 'QfE', 'QhA', 'QiE', 'tp09', 'tp_score_4',
      'QbE', 'QtA', 'tp01', 'tp_score_2',
       'QmA', 'QmE', 'tp04',
       'QgE', 'QkA', 'QoE', 'QsA',
       'QlE', 'QoE', 'QrE', 'wf_03',
       'QoA', 'QlA', 'QsE', 'tp10',
       'QpE', 'tp08', 'wf_01',
       'QkE', 'QrA', 'wr_05', 'wr_09', 'wr_10', 'wr_11',
       'QgA', 'QtE', 'hand', 'tp06', 'QeA', 'wr_06', 'wr_12',
       'wr_03', 'wr_07', 'QdA', 'QdE', 'QnE', 'wr_13'
      ]
opt_arr_3 = ['QaE', 'QbA', 'QbE', 'QfE', 'QgE', 'QhA', 'QjA', 'QkA', 'QmA', 'QmE', 
             'QqA', 'QrA', 'QrE', 'QsA', 'QtA', 'tp04', 'tp09', 'wf_03', 'wr_13', 
             'tp_score_4', 'QdA', 'QiE', 'QlA', 'QlE', 'QoA', 'QpE', 'tp01', 
             'wr_06', 'QnE', 'QoE', 'QsE', 'hand', 'tp10', 'wf_01', 'wr_12', 
             'QeA', 'QeE', 'wr_11', 'QcA', 'tp02', 'wr_10']

# XGB Permutation Importance

In [7]:
def permutation_importance(param, th, bad, train_data, val_data):
    num_iter = 10
    
    train_x = preprocess(train_data,'train',bad)
    train_y = train_x['voted']
    train_x = train_x.drop(['voted'],axis=1)
    val_x = preprocess(val_data,'test',bad)
    val_y = val_x['voted']
    val_x = val_x.drop(['voted'],axis=1)
    
    model = XGBClassifier(**param)
    model.fit(train_x,train_y,verbose=False)
    
    results = {}
    
    pred = model.predict_proba(val_x)[:,1]
    results['base_score'] = roc_auc_score(val_y, pred)
    
    print(f'Base auc {results["base_score"]:.6f}')
    
    for col in notebook.tqdm(val_x.columns):
        freezed = val_x[col].copy()
        results[col] = 0.0
        for i in range(num_iter):
            val_x[col] = np.random.permutation(val_x[col])
            pred_per = model.predict_proba(val_x)[:,1]
            results[col] += roc_auc_score(val_y, pred_per)
            val_x[col] = freezed
        results[col] = np.divide(results[col],num_iter)
        
        if (results[col]-results["base_score"]>th):
            print('\33[31m'+f'{col:10s} : {results[col]-results["base_score"]:+.6f}'+'\33[0m')
        else:
            print(f'{col:10s} : {results[col]-results["base_score"]:+.6f}')
        
    bad_features = [ k for k in results if results[k]>results['base_score']+th ]
    
    # bad_features.remove('base_score')
    
    return results['base_score'], bad_features

In [8]:
para = { 'max_depth' : 8,
        'n_estimators' : 200,
        'learning_rate' : 0.010,
         'min_child_weight' : 6,
         'colsample_bytree' : 0.8,
        'verbosity' : 0,
        'objective' : 'binary:logistic',
        'booster' : 'gbtree',
        'subsample' : 0.8,
        'tree_method' : 'gpu_hist'
       }

In [14]:
t_data , v_data = train_test_split(train,test_size=0.14998, shuffle=False)
res1, bad_features1 = permutation_importance(opt, 0.00001, opt_arr_1, t_data,v_data)

Base auc 0.774480


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=36.0), HTML(value='')))

QaA        : -0.000533
QdE        : -0.000316
QfA        : -0.000641
QhE        : -0.000290
QiA        : -0.000142
QjE        : -0.000788
QpA        : -0.000435
QqA        : -0.000563
QqE        : -0.001197
age_group  : -0.067630
education  : -0.066235
engnat     : -0.006271
familysize : -0.000601
gender     : -0.000634
married    : -0.007296
race       : -0.027586
religion   : -0.005594
tp03       : -0.000511
tp06       : -0.000612
tp07       : -0.000402
urban      : -0.000820
wf_02      : -0.000057
wr_01      : -0.000330
wr_02      : -0.000143
wr_03      : -0.000160
wr_04      : -0.000207
wr_05      : -0.000084
wr_08      : -0.000149
wr_10      : -0.000013
Mach_score : -0.000417
wr         : -0.001037
wf         : -0.000185
tp_score_1 : -0.001401
tp_score_2 : -0.000315
tp_score_3 : -0.000728
tp_score_5 : -0.000232



In [16]:
def find_bad(param, seed, arr, th):
    
    train_data, val_data = train_test_split(train, test_size=0.14998, random_state=seed)
    
    th = th
    opt_auc = 0
    cur_auc = 0
    arr = arr
    important = ['Mach_score']
    for i in []:
        important.append(f'tp_score_{i}')
    
    while (opt_auc==cur_auc):
        cur_auc, cur_bad = permutation_importance(param,th,arr, train_data, val_data)
        print(f'{arr} = {cur_auc:.6f}')
        
        if opt_auc<cur_auc:
            opt_auc=cur_auc
        
        # inter = list(set(arr)&set(important))
        cur_bad = [f for f in cur_bad if f not in important]
        
        if not cur_bad:
            th = th*0.25
            print('\33[31m' + f'Threshold decreased : {th}' + '\33[0m')
        
        arr += cur_bad
        print('-'*163)
    
    return opt_auc, arr

In [19]:
res, badfeature = find_bad(para, False, ['tp_score_2'], 0.0001)

Base auc 0.771409


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=83.0), HTML(value='')))

QaA        : -0.000637
QaE        : -0.000064
QbA        : -0.000123
QbE        : -0.000144
QcA        : -0.000249
QcE        : -0.000117
[31mQdA        : +0.000133[0m
QdE        : -0.000019
QeA        : -0.000105
QeE        : -0.000017
QfA        : +0.000041
QfE        : +0.000038
QgA        : +0.000071
[31mQgE        : +0.000289[0m
QhA        : -0.000224
QhE        : +0.000051
QiA        : -0.000206
QiE        : +0.000087
QjA        : -0.000416
QjE        : -0.000264
QkA        : -0.000048
QkE        : +0.000038
[31mQlA        : +0.000124[0m
QlE        : +0.000098
QmA        : +0.000085
QmE        : -0.000043
QnA        : -0.000101
[31mQnE        : +0.000262[0m
QoA        : +0.000036
QoE        : -0.000148
QpA        : +0.000093
QpE        : -0.000020
QqA        : -0.000123
[31mQqE        : +0.000195[0m
QrA        : -0.000246
[31mQrE        : +0.000199[0m
QsA        : +0.000038
QsE        : +0.000045
QtA        : -0.000138
QtE        : -0.000011
age_group  : -0.067536
edu

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=72.0), HTML(value='')))

QaA        : -0.000183
QaE        : -0.000147
QbA        : +0.000041
QbE        : -0.000045
QcA        : -0.000108
QcE        : -0.000039
[31mQdE        : +0.000491[0m
QeA        : -0.000030
[31mQeE        : +0.000163[0m
QfA        : -0.000008
[31mQfE        : +0.000122[0m
[31mQgA        : +0.000121[0m
QhA        : -0.000266
QhE        : +0.000056
QiA        : -0.000254
[31mQiE        : +0.000101[0m
QjA        : -0.000253
QjE        : -0.000121
QkA        : -0.000218
QkE        : +0.000028
[31mQlE        : +0.000101[0m
QmA        : -0.000016
QmE        : -0.000036
QnA        : -0.000230
[31mQoA        : +0.000130[0m
QoE        : -0.000224
QpA        : +0.000006
QpE        : +0.000013
QqA        : -0.000100
QrA        : +0.000042
QsA        : +0.000010
QsE        : +0.000096
QtA        : -0.000078
QtE        : -0.000024
age_group  : -0.064336
education  : -0.074248
engnat     : -0.005908
gender     : -0.000225
hand       : -0.000051
married    : -0.006437
race       : -0.0

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=63.0), HTML(value='')))

QaA        : -0.000465
QaE        : -0.000006
QbA        : -0.000116
QbE        : -0.000005
QcA        : -0.000333
QcE        : -0.000007
QeA        : -0.000132
QfA        : +0.000099
QhA        : -0.000042
QhE        : -0.000004
QiA        : -0.000189
QjA        : -0.000314
QjE        : -0.000214
QkA        : +0.000086
QkE        : -0.000080
QmA        : -0.000041
QmE        : -0.000108
QnA        : -0.000192
QoE        : -0.000159
[31mQpA        : +0.000275[0m
QpE        : +0.000018
QqA        : -0.000208
QrA        : -0.000201
QsA        : -0.000099
QsE        : +0.000059
QtA        : -0.000120
QtE        : +0.000017
age_group  : -0.063473
education  : -0.067194
engnat     : -0.006770
gender     : -0.000386
hand       : -0.000000
married    : -0.006308
race       : -0.024433
religion   : -0.004596
tp02       : -0.000386
tp03       : -0.000531
tp04       : -0.000133
tp05       : -0.000013
tp06       : -0.001112
tp07       : -0.000470
tp08       : +0.000009
tp09       : -0.000191
tp

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=61.0), HTML(value='')))

QaA        : -0.000419
QaE        : -0.000085
QbA        : -0.000153
QbE        : -0.000044
QcA        : -0.000077
QcE        : -0.000007
QeA        : -0.000129
QfA        : +0.000018
QhA        : -0.000207
QhE        : -0.000037
QiA        : -0.000276
QjA        : -0.000329
QjE        : -0.000414
QkA        : -0.000304
QkE        : -0.000002
QmA        : -0.000296
QmE        : -0.000159
QnA        : -0.000300
QoE        : -0.000362
QpE        : -0.000035
QqA        : -0.000043
QrA        : -0.000186
QsA        : -0.000038
QsE        : -0.000121
QtA        : -0.000171
QtE        : -0.000082
age_group  : -0.068387
education  : -0.063361
engnat     : -0.006236
gender     : -0.000408
hand       : -0.000026
married    : -0.006358
race       : -0.026411
religion   : -0.004442
tp02       : -0.000209
tp03       : -0.000722
tp04       : -0.000109
tp05       : +0.000035
tp06       : -0.001214
tp07       : -0.000445
tp08       : -0.000620
tp09       : -0.000095
tp10       : -0.000036
urban      

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=61.0), HTML(value='')))

QaA        : -0.000490
QaE        : -0.000119
QbA        : -0.000098
QbE        : -0.000057
QcA        : -0.000172
QcE        : +0.000021
QeA        : -0.000142
[31mQfA        : +0.000161[0m
QhA        : -0.000269
QhE        : -0.000078
QiA        : -0.000217
QjA        : -0.000239
QjE        : -0.000390
QkA        : -0.000241
QkE        : -0.000065
QmA        : -0.000214
QmE        : -0.000135
QnA        : -0.000312
QoE        : -0.000268
[31mQpE        : +0.000134[0m
QqA        : -0.000141
QrA        : -0.000231
QsA        : -0.000019
QsE        : -0.000291
QtA        : -0.000082
QtE        : -0.000060
age_group  : -0.068416
education  : -0.062662
engnat     : -0.006297
gender     : -0.000442
hand       : -0.000017
married    : -0.006729
race       : -0.026266
religion   : -0.004043
tp02       : -0.000343
tp03       : -0.001122
tp04       : +0.000005
tp05       : -0.000038
tp06       : -0.001529
tp07       : -0.000492
tp08       : -0.000577
tp09       : -0.000215
tp10       : -0.

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=58.0), HTML(value='')))

QaA        : -0.000519
QaE        : -0.000034
[31mQbA        : +0.000090[0m
QbE        : -0.000059
QcA        : -0.000125
[31mQcE        : +0.000070[0m
QeA        : -0.000207
QhA        : -0.000170
[31mQhE        : +0.000130[0m
QiA        : -0.000177
QjA        : -0.000313
QjE        : -0.000307
QkA        : +0.000015
[31mQkE        : +0.000201[0m
QmA        : -0.000054
[31mQmE        : +0.000027[0m
QnA        : -0.000055
QoE        : -0.000264
QqA        : -0.000325
QrA        : -0.000257
QsA        : -0.000043
[31mQsE        : +0.000172[0m
QtA        : -0.000002
[31mQtE        : +0.000158[0m
age_group  : -0.068137
education  : -0.071710
engnat     : -0.006005
gender     : -0.000372
[31mhand       : +0.000055[0m
married    : -0.006723
race       : -0.026649
religion   : -0.005098
tp02       : -0.000265
tp03       : -0.000569
tp04       : -0.000189
tp05       : -0.000157
tp06       : -0.001325
tp07       : -0.000838
tp08       : -0.000257
[31mtp09       : +0.000087[0m

# LGBM Permutation Importance

In [7]:
def permutation_importance_lgbm(param, th, bad, train_data, val_data):
    num_iter = 10
    
    train_x = preprocess(train_data,'train',bad)
    train_y = train_x['voted']
    train_x = train_x.drop(['voted'],axis=1)
    val_x = preprocess(val_data,'test',bad)
    val_y = val_x['voted']
    val_x = val_x.drop(['voted'],axis=1)
    
    model = LGBMClassifier(**param)
    model.fit(train_x,train_y,verbose=False)
    
    results = {}
    
    pred = model.predict_proba(val_x)[:,1]
    results['base_score'] = roc_auc_score(val_y, pred)
    
    print(f'Base auc {results["base_score"]:.6f}')
    
    for col in notebook.tqdm(val_x.columns):
        freezed = val_x[col].copy()
        results[col] = 0.0
        for i in range(num_iter):
            val_x[col] = np.random.permutation(val_x[col])
            pred_per = model.predict_proba(val_x)[:,1]
            results[col] += roc_auc_score(val_y, pred_per)
            val_x[col] = freezed
        results[col] = np.divide(results[col],num_iter)
        
        if (results[col]-results["base_score"]>th):
            print('\33[31m'+f'{col:10s} : {results[col]-results["base_score"]:+.6f}'+'\33[0m')
        else:
            print(f'{col:10s} : {results[col]-results["base_score"]:+.6f}')
        
    bad_features = [ k for k in results if results[k]>results['base_score']+th ]
    
    # bad_features.remove('base_score')
    
    return results['base_score'], bad_features

In [8]:
lgbm_opt= { 'max_depth' : 5, 
            'num_leaves' : 359,
            'n_estimators' : 581,
            'learning_rate' : 0.05,
            'min_child_weight' : 37,
            'colsample_bytree' : 0.67,
            'feature_fraction' : 0.77,
            'bagging_fraction' : 0.91,
            'lambda_l1' : 0.41,
            'lambda_l2' : 0.09,
            'objective' : 'binary',
            'boosting_type' : 'dart'
            }

In [10]:
t_data, v_data = train_test_split(train, test_size=0.14998, random_state=0, shuffle=False)
results, bad_features = permutation_importance_lgbm(lgbm_opt,0.0001,opt_arr_2,t_data,v_data)

Base auc 0.773743


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=35.0), HTML(value='')))

QaA        : -0.000947
QcA        : -0.000383
QcE        : -0.000334
QfA        : -0.000340
QhE        : -0.000087
QiA        : -0.000314
QjE        : -0.000501
QnA        : -0.000033
QpA        : -0.000826
QqA        : -0.000323
QqE        : -0.001146
age_group  : -0.067544
education  : -0.068741
engnat     : -0.006341
familysize : -0.000352
gender     : -0.000360
married    : -0.006721
race       : -0.024145
religion   : -0.006040
tp02       : -0.000635
tp03       : -0.000344
tp05       : -0.000050
tp07       : -0.000196
urban      : -0.000927
[31mwf_02      : +0.000166[0m
wr_01      : -0.000233
wr_02      : -0.000466
wr_04      : -0.000070
wr_08      : -0.000116
Mach_score : -0.000465
wr         : -0.001902
wf         : -0.000140
tp_score_1 : -0.002084
tp_score_3 : -0.000444
tp_score_5 : -0.000623



In [29]:
def find_bad_lgbm(param, seed, th, arr):
    
    train_data, val_data = train_test_split(train, test_size=0.14998, random_state=seed, shuffle=False)
    
    th = th
    opt_auc = 0
    cur_auc = 0
    arr = arr
    important = ['Mach_score', 'tp_score_1']
    
    while (opt_auc==cur_auc):
        cur_auc, cur_bad = permutation_importance_lgbm(param,th,arr, train_data, val_data)
        print(f'{arr} = {cur_auc:.6f}')
        
        if opt_auc<cur_auc:
            opt_auc=cur_auc
        
        inter = list(set(arr)&set(important))
        cur_bad = [f for f in cur_bad if f not in important]
        
        if not cur_bad:
            th = th*0.2
            print('\33[31m' + f'Threshold decreased : {th}' + '\33[0m')
        
        arr += cur_bad
    
    return opt_auc, arr

In [30]:
find_bad_lgbm(lgbm_opt,0, 0.0001, ['tp_score_2'])

Base auc 0.766605


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=83.0), HTML(value='')))

QaA        : -0.000584
[31mQaE        : +0.000200[0m
QbA        : +0.000089
QbE        : +0.000045
QcA        : +0.000083
QcE        : -0.000219
QdA        : -0.000081
QdE        : -0.000079
QeA        : -0.000146
QeE        : -0.000059
QfA        : -0.000019
QfE        : +0.000095
QgA        : +0.000067
[31mQgE        : +0.000138[0m
[31mQhA        : +0.000321[0m
QhE        : -0.000087
QiA        : +0.000026
QiE        : -0.000050
[31mQjA        : +0.000785[0m
QjE        : -0.000219
QkA        : -0.000206
QkE        : -0.000078
QlA        : +0.000027
QlE        : -0.000277
[31mQmA        : +0.000478[0m
QmE        : +0.000064
QnA        : -0.000112
QnE        : -0.000168
[31mQoA        : +0.000252[0m
[31mQoE        : +0.000229[0m
QpA        : -0.000333
[31mQpE        : +0.000259[0m
QqA        : +0.000047
QqE        : -0.000595
QrA        : +0.000071
[31mQrE        : +0.000132[0m
QsA        : +0.000098
QsE        : -0.000108
QtA        : +0.000074
QtE        : -0.000315

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=71.0), HTML(value='')))

QaA        : -0.000598
[31mQbA        : +0.000226[0m
QbE        : -0.000051
QcA        : -0.000094
QcE        : -0.000205
QdA        : -0.000311
QdE        : -0.000121
QeA        : +0.000021
QeE        : -0.000263
QfA        : +0.000068
[31mQfE        : +0.000204[0m
QgA        : -0.000190
QhE        : -0.000204
[31mQiA        : +0.000178[0m
QiE        : -0.000043
QjE        : -0.000289
QkA        : -0.000148
QkE        : +0.000013
QlA        : +0.000009
[31mQlE        : +0.000326[0m
QmE        : +0.000045
QnA        : -0.000122
QnE        : +0.000046
QpA        : -0.000292
QqA        : -0.000051
QqE        : -0.000471
QrA        : +0.000077
QsA        : +0.000070
QsE        : -0.000045
[31mQtA        : +0.000168[0m
QtE        : -0.000475
age_group  : -0.065719
education  : -0.067772
engnat     : -0.004450
familysize : -0.000606
gender     : -0.000185
hand       : +0.000046
married    : -0.006986
race       : -0.022122
religion   : -0.004324
tp01       : -0.000080
tp02       :

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=65.0), HTML(value='')))

QaA        : -0.000808
QbE        : +0.000018
QcA        : +0.000015
QcE        : -0.000006
QdA        : -0.000152
QdE        : -0.000038
QeA        : -0.000273
QeE        : -0.000188
QfA        : -0.000205
QgA        : -0.000095
QhE        : -0.000372
QiE        : -0.000064
QjE        : -0.000338
QkA        : -0.000115
QkE        : -0.000215
QlA        : +0.000070
QmE        : +0.000067
QnA        : -0.000175
QnE        : -0.000023
QpA        : -0.000504
QqA        : -0.000490
QqE        : -0.000665
QrA        : -0.000463
[31mQsA        : +0.000210[0m
QsE        : -0.000004
QtE        : -0.000200
age_group  : -0.067777
education  : -0.065925
engnat     : -0.005440
familysize : -0.000276
gender     : -0.000157
hand       : +0.000015
married    : -0.006166
race       : -0.023537
religion   : -0.005010
[31mtp01       : +0.000100[0m
tp02       : -0.000521
tp03       : -0.000299
tp05       : -0.000321
tp06       : -0.000941
tp07       : -0.000597
tp08       : -0.000262
tp10       : +0.

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=62.0), HTML(value='')))

QaA        : -0.000651
QbE        : -0.000013
QcA        : -0.000049
QcE        : -0.000103
QdA        : -0.000034
QdE        : -0.000206
QeA        : -0.000019
QeE        : -0.000149
QfA        : -0.000189
QgA        : -0.000273
QhE        : -0.000452
QiE        : +0.000019
QjE        : -0.000276
QkA        : -0.000102
QkE        : -0.000085
QlA        : +0.000091
[31mQmE        : +0.000283[0m
QnA        : -0.000197
QnE        : +0.000052
QpA        : -0.000295
QqA        : +0.000051
QqE        : -0.000776
QrA        : -0.000153
[31mQsE        : +0.000195[0m
QtE        : -0.000405
age_group  : -0.067502
education  : -0.066782
engnat     : -0.006255
familysize : -0.000424
gender     : -0.000619
hand       : -0.000039
married    : -0.006013
race       : -0.022874
religion   : -0.004361
tp02       : -0.000627
tp03       : +0.000014
tp05       : -0.000298
tp06       : -0.000538
tp07       : -0.000530
tp08       : +0.000011
tp10       : -0.000151
urban      : -0.000808
wf_01      : +0.

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=59.0), HTML(value='')))

QaA        : -0.000802
[31mQbE        : +0.000175[0m
QcA        : +0.000034
QcE        : -0.000373
QdA        : -0.000130
QdE        : -0.000129
QeA        : -0.000142
QeE        : -0.000157
QfA        : -0.000233
QgA        : -0.000054
QhE        : -0.000275
QiE        : -0.000189
QjE        : -0.000482
QkA        : -0.000242
QkE        : -0.000137
[31mQlA        : +0.000104[0m
QnA        : -0.000291
QnE        : -0.000459
QpA        : -0.000499
QqA        : -0.000696
QqE        : -0.000632
QrA        : -0.000096
QtE        : -0.000364
age_group  : -0.069082
education  : -0.066599
engnat     : -0.005191
familysize : -0.000546
gender     : -0.000275
hand       : -0.000057
married    : -0.006651
race       : -0.024244
religion   : -0.004516
tp02       : -0.000617
tp03       : -0.000199
tp05       : -0.000179
tp06       : -0.000416
tp07       : -0.000543
tp08       : -0.000072
tp10       : -0.000228
urban      : -0.001037
wf_01      : +0.000026
wr_01      : -0.000445
wr_02      : -0.

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=57.0), HTML(value='')))

QaA        : -0.000450
QcA        : -0.000001
QcE        : -0.000133
QdA        : -0.000151
QdE        : -0.000165
QeA        : -0.000161
QeE        : -0.000037
QfA        : -0.000299
QgA        : -0.000161
QhE        : -0.000482
QiE        : -0.000005
QjE        : -0.000417
QkA        : -0.000388
QkE        : -0.000016
QnA        : -0.000254
QnE        : -0.000092
QpA        : -0.000496
QqA        : -0.000444
QqE        : -0.000972
QrA        : -0.000300
QtE        : -0.000366
age_group  : -0.066246
education  : -0.067618
engnat     : -0.006062
familysize : -0.000468
gender     : -0.000288
hand       : -0.000054
married    : -0.006175
race       : -0.022759
religion   : -0.004809
tp02       : -0.000680
tp03       : -0.000005
tp05       : +0.000007
tp06       : -0.000359
tp07       : -0.000525
tp08       : -0.000131
tp10       : -0.000124
urban      : -0.000674
wf_01      : +0.000000
wr_01      : -0.000381
wr_02      : -0.000460
wr_03      : -0.000040
wr_04      : -0.000069
wr_05      

(0.7733180087582812,
 ['tp_score_2',
  'QaE',
  'QgE',
  'QhA',
  'QjA',
  'QmA',
  'QoA',
  'QoE',
  'QpE',
  'QrE',
  'tp09',
  'wf_02',
  'tp_score_4',
  'QbA',
  'QfE',
  'QiA',
  'QlE',
  'QtA',
  'tp04',
  'QsA',
  'tp01',
  'wr_13',
  'QmE',
  'QsE',
  'wf_03',
  'QbE',
  'QlA'])