In [1]:
import xgboost as xgb
from sklearn.metrics import mean_squared_error
import pandas as pd
import numpy as np
from sklearn.metrics import roc_auc_score
from scipy.stats import uniform,randint
from sklearn.model_selection import RandomizedSearchCV

In [2]:
from sklearn.model_selection import train_test_split
import category_encoders

In [41]:
features = pd.read_csv("features_12.csv")

In [20]:
kaggle = pd.read_csv('trocafone_kaggle_test.csv')
labels = pd.read_csv('labels_training_set.csv')

In [21]:
training = labels.set_index('person').join(features.set_index('person'))
train_labels = training['label']
train_data = training.drop('label',axis=1)

In [22]:
predict_data = kaggle.set_index('person').join(features.set_index('person'))

In [23]:
encoder = category_encoders.TargetEncoder(smoothing=True)
train_data_enc = encoder.fit_transform(np.array(train_data),np.array(train_labels))
predict_data_enc = encoder.transform(np.array(predict_data))

In [24]:
xtrain,xtest,ytrain,ytest = train_test_split(train_data_enc,np.array(train_labels),train_size=0.75,stratify=np.array(train_labels))



In [25]:
params={
    'n_estimators':randint(150,500),
    'learning_rate':uniform(0.01,0.3),
    'subsample':uniform(0.3,0.7),
    'min_child_weight':randint(5,20),
    'max_depth':randint(6,20),
    'gamma':randint(0,10),
    'colsample_bytree':uniform(0.4,0.6),
    'subsample':uniform(0,1),
    'max_delta_step':randint(0,5)
}
grid = RandomizedSearchCV(xgb.XGBClassifier(n_jobs=-1),
                          param_distributions=params,
                          scoring='roc_auc',
                          cv=2,
                          verbose=1,
                          n_iter=100)

In [26]:
grid.fit(xtrain,ytrain)

Fitting 2 folds for each of 100 candidates, totalling 200 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 200 out of 200 | elapsed:  9.6min finished


RandomizedSearchCV(cv=2, error_score='raise-deprecating',
          estimator=XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
       max_depth=3, min_child_weight=1, missing=None, n_estimators=100,
       n_jobs=-1, nthread=None, objective='binary:logistic',
       random_state=0, reg_alpha=0, reg_lambda=1, scale_pos_weight=1,
       seed=None, silent=True, subsample=1),
          fit_params=None, iid='warn', n_iter=100, n_jobs=None,
          param_distributions={'n_estimators': <scipy.stats._distn_infrastructure.rv_frozen object at 0x00000236CFB48B70>, 'learning_rate': <scipy.stats._distn_infrastructure.rv_frozen object at 0x00000236CFB48470>, 'subsample': <scipy.stats._distn_infrastructure.rv_frozen object at 0x00000236CFB71B38>, 'min_...1BE0>, 'max_delta_step': <scipy.stats._distn_infrastructure.rv_frozen object at 0x00000236CFB712E8>},
          pre_dispatch='2*n_jobs', random_state=None,

In [27]:
grid.score(xtest,ytest)

0.8702299405333842

In [28]:
grid.best_params_

{'colsample_bytree': 0.9044393087086191,
 'gamma': 7,
 'learning_rate': 0.04291681392641188,
 'max_delta_step': 4,
 'max_depth': 12,
 'min_child_weight': 17,
 'n_estimators': 199,
 'subsample': 0.9387819055106965}

In [29]:
xg_classifier = grid.best_estimator_

In [33]:
xtr,xte,ytr,yte = train_test_split(train_data_enc,np.array(train_labels),stratify=np.array(train_labels))

In [34]:
xg_classifier.fit(xtr,ytr)

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=0.9044393087086191, gamma=7,
       learning_rate=0.04291681392641188, max_delta_step=4, max_depth=12,
       min_child_weight=17, missing=None, n_estimators=199, n_jobs=-1,
       nthread=None, objective='binary:logistic', random_state=0,
       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
       silent=True, subsample=0.9387819055106965)

In [35]:
roc_auc_score(yte,xg_classifier.predict_proba(xte)[:,1])

0.8712501273019513

In [110]:
train = xgb.DMatrix(data=xtr,label=ytr)
test = xgb.DMatrix(data=xte,label=yte)
data = xgb.DMatrix(data=train_data_enc,label=train_labels)
pred = xgb.DMatrix(data=predict_data_enc)
params = {'colsample_bytree': 0.6155688148280034,
 'gamma': 4,
 'learning_rate': 0.01465667380346192,
 'max_delta_step': 3,
 'max_depth': 18,
 'min_child_weight': 16,
 'n_estimators': 434,
 'subsample': 0.5879465783476493,
 'eval_metric':'auc'}

In [111]:
xgb.train(dtrain=train,params=params,num_boost_round=400,evals=[(train,'train'),(test,'eval')])

[16:40:17] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 6 extra nodes, 164 pruned nodes, max_depth=2
[0]	train-auc:0.792055	eval-auc:0.787746
[16:40:17] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 8 extra nodes, 166 pruned nodes, max_depth=3
[1]	train-auc:0.821955	eval-auc:0.821993
[16:40:17] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 6 extra nodes, 178 pruned nodes, max_depth=3
[2]	train-auc:0.824941	eval-auc:0.827098
[16:40:17] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 8 extra nodes, 174 pruned nodes, max_depth=3
[3]	train-auc:0.8298	eval-auc:0.827864
[16:40:17] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 4 extra nodes, 166 pruned nodes, max_depth=2
[4]	train-auc:0.829047	eval-auc:0.828043
[16:40:17] C:\Users\Administrator\Des

[16:40:19] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 4 extra nodes, 236 pruned nodes, max_depth=2
[43]	train-auc:0.854955	eval-auc:0.856631
[16:40:19] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 6 extra nodes, 202 pruned nodes, max_depth=3
[44]	train-auc:0.854919	eval-auc:0.856665
[16:40:19] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 2 extra nodes, 224 pruned nodes, max_depth=1
[45]	train-auc:0.854881	eval-auc:0.85664
[16:40:20] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 6 extra nodes, 220 pruned nodes, max_depth=2
[46]	train-auc:0.854706	eval-auc:0.856375
[16:40:20] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 4 extra nodes, 216 pruned nodes, max_depth=2
[47]	train-auc:0.854705	eval-auc:0.856612
[16:40:20] C:\Users\Administrat

[16:40:25] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 2 extra nodes, 286 pruned nodes, max_depth=1
[128]	train-auc:0.855003	eval-auc:0.855163
[16:40:25] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 8 extra nodes, 278 pruned nodes, max_depth=4
[129]	train-auc:0.857239	eval-auc:0.854301
[16:40:25] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 290 pruned nodes, max_depth=0
[130]	train-auc:0.857239	eval-auc:0.854301
[16:40:25] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 260 pruned nodes, max_depth=0
[131]	train-auc:0.857239	eval-auc:0.854301
[16:40:25] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 304 pruned nodes, max_depth=0
[132]	train-auc:0.857239	eval-auc:0.854301
[16:40:25] C:\Users\Admin

[16:40:31] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 312 pruned nodes, max_depth=0
[213]	train-auc:0.859062	eval-auc:0.856344
[16:40:31] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 308 pruned nodes, max_depth=0
[214]	train-auc:0.859062	eval-auc:0.856344
[16:40:31] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 2 extra nodes, 336 pruned nodes, max_depth=1
[215]	train-auc:0.858898	eval-auc:0.8564
[16:40:31] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 306 pruned nodes, max_depth=0
[216]	train-auc:0.858898	eval-auc:0.8564
[16:40:31] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 262 pruned nodes, max_depth=0
[217]	train-auc:0.858898	eval-auc:0.8564
[16:40:32] C:\Users\Administrat

[16:40:34] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 308 pruned nodes, max_depth=0
[256]	train-auc:0.858992	eval-auc:0.857051
[16:40:34] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 342 pruned nodes, max_depth=0
[257]	train-auc:0.858992	eval-auc:0.857051
[16:40:34] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 368 pruned nodes, max_depth=0
[258]	train-auc:0.858992	eval-auc:0.857051
[16:40:35] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 312 pruned nodes, max_depth=0
[259]	train-auc:0.858992	eval-auc:0.857051
[16:40:35] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 326 pruned nodes, max_depth=0
[260]	train-auc:0.858992	eval-auc:0.857051
[16:40:35] C:\Users\Admin

[16:40:37] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 350 pruned nodes, max_depth=0
[299]	train-auc:0.859525	eval-auc:0.857064
[16:40:38] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 356 pruned nodes, max_depth=0
[300]	train-auc:0.859525	eval-auc:0.857064
[16:40:38] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 324 pruned nodes, max_depth=0
[301]	train-auc:0.859525	eval-auc:0.857064
[16:40:38] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 310 pruned nodes, max_depth=0
[302]	train-auc:0.859525	eval-auc:0.857064
[16:40:38] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 372 pruned nodes, max_depth=0
[303]	train-auc:0.859525	eval-auc:0.857064
[16:40:38] C:\Users\Admin

[16:40:41] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 544 pruned nodes, max_depth=0
[342]	train-auc:0.859531	eval-auc:0.857388
[16:40:41] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 304 pruned nodes, max_depth=0
[343]	train-auc:0.859531	eval-auc:0.857388
[16:40:41] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 334 pruned nodes, max_depth=0
[344]	train-auc:0.859531	eval-auc:0.857388
[16:40:41] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 336 pruned nodes, max_depth=0
[345]	train-auc:0.859531	eval-auc:0.857388
[16:40:41] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 338 pruned nodes, max_depth=0
[346]	train-auc:0.859531	eval-auc:0.857388
[16:40:41] C:\Users\Admin

[16:40:44] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 310 pruned nodes, max_depth=0
[385]	train-auc:0.85981	eval-auc:0.857821
[16:40:44] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 350 pruned nodes, max_depth=0
[386]	train-auc:0.85981	eval-auc:0.857821
[16:40:44] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 366 pruned nodes, max_depth=0
[387]	train-auc:0.85981	eval-auc:0.857821
[16:40:44] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 314 pruned nodes, max_depth=0
[388]	train-auc:0.85981	eval-auc:0.857821
[16:40:44] C:\Users\Administrator\Desktop\xgboost\src\tree\updater_prune.cc:74: tree pruning end, 1 roots, 0 extra nodes, 336 pruned nodes, max_depth=0
[389]	train-auc:0.85981	eval-auc:0.857821
[16:40:44] C:\Users\Administra

<xgboost.core.Booster at 0x20b5823bf60>

In [36]:
xg_classifier.fit(train_data_enc,train_labels)

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=0.9044393087086191, gamma=7,
       learning_rate=0.04291681392641188, max_delta_step=4, max_depth=12,
       min_child_weight=17, missing=None, n_estimators=199, n_jobs=-1,
       nthread=None, objective='binary:logistic', random_state=0,
       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
       silent=True, subsample=0.9387819055106965)

In [37]:
proba_loco = xg_classifier.predict_proba(predict_data_enc)[:,1]

In [38]:
submit = kaggle.set_index('person')

In [39]:
submit['label'] = proba_loco

In [40]:
submit.to_csv('submit.csv')