In [1]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split, StratifiedKFold, RandomizedSearchCV
import myfunctions as mf

In [2]:
df = pd.read_csv('prepped_data.csv')

train_labels = pd.read_csv('../../data/prepared/train_labels.csv')
val_labels = pd.read_csv('../../data/prepared/val_labels.csv')

In [3]:
all_labels = pd.concat([train_labels, val_labels], axis=0)

In [4]:
df = df.merge(all_labels, on='customer_ID', how='left')

In [5]:
seed = 42

train, val = train_test_split(df, train_size=0.9,
                              random_state=seed)

In [6]:
train_X = train.drop(columns=['customer_ID', 'target'])
train_y = train['target']
val_X = val.drop(columns=['customer_ID', 'target'])
val_y = val['target']

In [None]:
params = {
    'max_depth': 5,
    'learning_rate': 0.1,
    'verbosity': 2,
    'objective': 'precision',
    'colsample_bytree': 0.8,
    'random_state': seed,
}

In [40]:
model = xgb.XGBClassifier(max_depth=5, learning_rate=0.1,
                          verbosity=2, objective='binary:logistic',
                          subsample=0.9, colsample_bytree=0.9,
                          random_state=seed, eval_metric='map',
                          early_stopping_rounds=2)

In [18]:
model.fit(X=train_X, y=train_y, eval_set=[(val_X, val_y)])

[11:59:40] INFO: /Users/runner/work/xgboost/xgboost/python-package/build/temp.macosx-10.9-x86_64-3.7/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 62 extra nodes, 0 pruned nodes, max_depth=5
[0]	validation_0-map:0.83288
[11:59:51] INFO: /Users/runner/work/xgboost/xgboost/python-package/build/temp.macosx-10.9-x86_64-3.7/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 62 extra nodes, 0 pruned nodes, max_depth=5
[1]	validation_0-map:0.84302
[12:00:03] INFO: /Users/runner/work/xgboost/xgboost/python-package/build/temp.macosx-10.9-x86_64-3.7/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 62 extra nodes, 0 pruned nodes, max_depth=5
[2]	validation_0-map:0.85253
[12:00:15] INFO: /Users/runner/work/xgboost/xgboost/python-package/build/temp.macosx-10.9-x86_64-3.7/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 62 extra nodes, 0 pruned nodes, max_depth=5
[3]	validation_0-map:0.85410
[12:00:27] INFO: /Users/runner/work/xgboost/xgboost/python-package/build/tem

[12:06:55] INFO: /Users/runner/work/xgboost/xgboost/python-package/build/temp.macosx-10.9-x86_64-3.7/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 62 extra nodes, 0 pruned nodes, max_depth=5
[36]	validation_0-map:0.88358
[12:07:07] INFO: /Users/runner/work/xgboost/xgboost/python-package/build/temp.macosx-10.9-x86_64-3.7/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 62 extra nodes, 0 pruned nodes, max_depth=5
[37]	validation_0-map:0.88373
[12:07:18] INFO: /Users/runner/work/xgboost/xgboost/python-package/build/temp.macosx-10.9-x86_64-3.7/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 62 extra nodes, 0 pruned nodes, max_depth=5
[38]	validation_0-map:0.88416
[12:07:30] INFO: /Users/runner/work/xgboost/xgboost/python-package/build/temp.macosx-10.9-x86_64-3.7/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 62 extra nodes, 0 pruned nodes, max_depth=5
[39]	validation_0-map:0.88456
[12:07:41] INFO: /Users/runner/work/xgboost/xgboost/python-package/build

[12:14:01] INFO: /Users/runner/work/xgboost/xgboost/python-package/build/temp.macosx-10.9-x86_64-3.7/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 62 extra nodes, 0 pruned nodes, max_depth=5
[72]	validation_0-map:0.89179
[12:14:13] INFO: /Users/runner/work/xgboost/xgboost/python-package/build/temp.macosx-10.9-x86_64-3.7/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 58 extra nodes, 0 pruned nodes, max_depth=5
[73]	validation_0-map:0.89189
[12:14:25] INFO: /Users/runner/work/xgboost/xgboost/python-package/build/temp.macosx-10.9-x86_64-3.7/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 62 extra nodes, 0 pruned nodes, max_depth=5
[74]	validation_0-map:0.89198
[12:14:37] INFO: /Users/runner/work/xgboost/xgboost/python-package/build/temp.macosx-10.9-x86_64-3.7/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 62 extra nodes, 0 pruned nodes, max_depth=5
[75]	validation_0-map:0.89214
[12:14:49] INFO: /Users/runner/work/xgboost/xgboost/python-package/build

XGBClassifier(base_score=0.5, booster='gbtree', callbacks=None,
              colsample_bylevel=1, colsample_bynode=1, colsample_bytree=0.9,
              early_stopping_rounds=2, enable_categorical=False,
              eval_metric='map', gamma=0, gpu_id=-1, grow_policy='depthwise',
              importance_type=None, interaction_constraints='',
              learning_rate=0.1, max_bin=256, max_cat_to_onehot=4,
              max_delta_step=0, max_depth=5, max_leaves=0, min_child_weight=1,
              missing=nan, monotone_constraints='()', n_estimators=100,
              n_jobs=0, num_parallel_tree=1, predictor='auto', random_state=42,
              reg_alpha=0, reg_lambda=1, ...)

In [29]:
def model_evaluator(model, data, y_true):
    
    y_hat = model.predict_proba(data)[:,1]
    
    y_true_final = pd.DataFrame(y_true)
    
    y_hat_final = pd.DataFrame(y_hat, columns=['prediction'])
    
    return mf.amex_metric(y_true_final, y_hat_final)

In [30]:
model_evaluator(model, val_X, val_y)

-0.009090736030707971

In [31]:
def model_evaluator2(model, data, y_true):
    
    y_hat = model.predict(data)
    
    y_true_final = pd.DataFrame(y_true)
    
    y_hat_final = pd.DataFrame(y_hat, columns=['prediction'])
    
    return mf.amex_metric(y_true_final, y_hat_final)

In [32]:
model_evaluator2(model, val_X, val_y)

-0.00979498201547981

In [21]:
model.predict_proba(val_X)

array([[0.4377954 , 0.5622046 ],
       [0.99891853, 0.00108149],
       [0.5066159 , 0.49338415],
       ...,
       [0.20120198, 0.798798  ],
       [0.9976615 , 0.00233852],
       [0.99515253, 0.00484747]], dtype=float32)

In [23]:
predictions = model.predict_proba(val_X)
predictions[:,1]

array([0.5622046 , 0.00108149, 0.49338415, ..., 0.798798  , 0.00233852,
       0.00484747], dtype=float32)

In [26]:
val_y

203562    1
206859    0
68697     1
175493    0
5922      0
         ..
142352    0
3564      0
51647     1
39864     0
202799    0
Name: target, Length: 25316, dtype: int64

In [33]:
model.predict(val_X)

array([1, 0, 0, ..., 1, 0, 0])

In [34]:
predictions = pd.DataFrame(val_y)

In [36]:
predictions['preds'] = model.predict(val_X)

In [37]:
predictions

Unnamed: 0,target,preds
203562,1,1
206859,0,0
68697,1,0
175493,0,0
5922,0,0
...,...,...
142352,0,0
3564,0,0
51647,1,1
39864,0,0


In [38]:
predictions['correct'] = predictions['target'] == predictions['preds']
predictions

Unnamed: 0,target,preds,correct
203562,1,1,True
206859,0,0,True
68697,1,0,False
175493,0,0,True
5922,0,0,True
...,...,...,...
142352,0,0,True
3564,0,0,True
51647,1,1,True
39864,0,0,True


In [39]:
predictions['correct'].mean()

0.9014852267340813