In [1]:
import feather
import xgboost as xgb
import numpy as np

In [2]:
df_train = feather.read_dataframe('feather/df_train.feather')
df_test = feather.read_dataframe('feather/df_val.feather')

In [3]:
to_exlude = ['decision']

features = list(df_train.columns)
for c in to_exlude:
    features.remove(c)

In [4]:
xgb_pars = {
    'eta': 0.3,
    'gamma': 0,
    'max_depth': 6,
    'min_child_weight': 1,
    'max_delta_step': 0,
    'subsample': 1,
    'colsample_bytree': 1,
    'colsample_bylevel': 1,
    'lambda': 1,
    'alpha': 0,
    'tree_method': 'approx',
# not deafauts
    'objective': 'binary:logistic',
    'eval_metric': 'auc',
    'nthread': 8,
    'seed': 42,
    'silent': 1
}

n_estimators = 100

In [5]:
X_train = df_train[features].astype('float32').values
y_train = df_train.decision.values

X_val = df_test[features].astype('float32').values
y_val = df_test.decision.values

In [6]:
del df_train, df_test

In [7]:
dtrain = xgb.DMatrix(X_train, label=y_train, feature_names=features, missing=np.nan)
dval = xgb.DMatrix(X_val, label=y_val, feature_names=features, missing=np.nan)
watchlist = [(dtrain, 'train'), (dval, 'val')]

In [8]:
del X_train, X_val, y_train, y_val

In [10]:
from sklearn.metrics import roc_auc_score

In [15]:
model = xgb.train(xgb_pars, dtrain, num_boost_round=n_estimators, verbose_eval=10,
                  evals=watchlist)

y_pred = model.predict(dval)
y_val = dval.get_label()
roc_auc_score(y_val, y_pred)

0.80925207799570154

LB score: 0.7892