In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pickle

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import f1_score
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier

from splits import split_users # contains split_users func

pd.set_option("display.max_columns", 101)
pd.set_option("display.max_rows", 100)

In [2]:
# # read in pickled DFs generated by query_dfs.py

# df_orders = pd.read_pickle("./pickle/df_orders.pickle")
# df_train = pd.read_pickle("./pickle/df_train.pickle")
# df_prior = pd.read_pickle("./pickle/df_prior.pickle")
# df_prod_detail = pd.read_pickle("./pickle/df_prod_detail.pickle")

In [3]:
# read in pickled feature DF generated by feature_engineering_1.ipynb
X = pd.read_pickle("./pickle/X_F.pickle")

In [4]:
# check scikit-learn version
import sklearn
print(sklearn.__version__)

0.24.1


In [5]:
# X_train, X_test, y_train, y_test = split_users(X, test_size=0.2)# only using 10%


In [6]:


# rf = RandomForestClassifier(n_estimators=500, max_depth=8, max_features=8, n_jobs=-1)
# rf_fit = rf.fit(X_train, y_train)


In [7]:
# preds = rf_fit.predict(X_test)

In [8]:
# f1_score(y_test, preds)

In [9]:
# with open(f"models/rf_fit.pickle", "wb") as pfile:
#         pickle.dump(rf_fit, pfile)

## Grid Search

In [39]:
X_train, X_test, y_train, y_test = split_users(X, subset=0.02, test_size=0.2)


    X_train sample size: 185180
    X_test sample size: 42450


In [11]:
# estimator = XGBClassifier(objective='binary:logistic',
#                           use_label_encoder=False,
#                           eval_metric='logloss',
#                           random_state=54,
#                           learning_rate=0.01,
# )

# params = {
#     'max_depth': [7, 8, 9],
#     'n_estimators': [400, 500],
#     'colsample_bytree': [0.6, 0.7, 0.8],
#     'min_child_weight': [7, 8, 9]}

In [12]:
# grid_search = GridSearchCV(
#     estimator = estimator,
#     param_grid = params,
#     verbose=10
# )

In [13]:
# %%time
# grid_xgb_fit = grid_search.fit(X_train, y_train)
# print("The best parameters are: \n", grid_search.best_params_)



In [14]:
# with open(f"models/grid_xgb_fit.pickle", "wb") as pfile:
#         pickle.dump(grid_xgb_fit, pfile)

## Grid Search Results

**On a different VM, I used grid search to tune min_child_weight and colsample_bytree parameters. Here was the grid & results:**

```
estimator = XGBClassifier(objective='binary:logistic',
                          use_label_encoder=False,
                          eval_metric='logloss',
                          random_state=54,
                          max_depth=3,
                          learning_rate=0.01,
                          n_estimators=500
)

params = {
    'min_child_weight': range(1, 10, 1),
    'colsample_bytree': [.6, .7, .8, .9, 1.0]
}

# results
Fitting 5 folds for each of 45 candidates, totalling 225 fits
The best parameters are: 
 {'colsample_bytree': 0.7, 'min_child_weight': 8}
CPU times: user 1h 47min 29s, sys: 1.55 s, total: 1h 47min 30s
Wall time: 10min 54s

```

**Below were the parameters for our grid search.**
```
estimator = XGBClassifier(objective='binary:logistic',
                          use_label_encoder=False,
                          eval_metric='logloss',
                          random_state=54,
                          learning_rate=0.01,
)

params = {
    'max_depth': [7, 8, 9],
    'n_estimators': [400, 500],
    'colsample_bytree': [0.6, 0.7, 0.8],
    'min_child_weight': [7, 8, 9]}
```

**And the results:**

```
Best results are:
{'colsample_bytree': 0.8, 'max_depth': 7, 'min_child_weight': 9, 'n_estimators': 400}
CPU times: user 21h 33min 24s, sys: 18.2 s, total: 21h 33min 42s
```

Great. Now we have our parameters. Let's run the model!


In [48]:
X_train, X_test, y_train, y_test = split_users(X, subset=False, test_size=0.2)


    X_train sample size: 6782401
    X_test sample size: 1692260


In [None]:
%%time
xgb = XGBClassifier(colsample_bytree=0.8,
                    min_child_weight=9,
                    n_estimators=400,
                    max_depth=7,
                    learning_rate=0.009,
                    eval_metric='logloss',
                    verbosity=3,
                    use_label_encoder =False)

xgb_fit = xgb.fit(X_train, y_train)

[04:23:45] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/gbm/gbtree.cc:146: Tree method is automatically selected to be 'approx' for faster speed. To use old behavior (exact greedy algorithm on single machine), set tree_method to 'exact'.
[04:23:45] DEBUG: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/gbm/gbtree.cc:154: Using tree method: 1
[04:23:45] DEBUG: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/gbm/gbtree.cc:119: Using updaters: grow_histmaker,prune
[04:23:50] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[04:23:54] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 248 extra nodes, 0 pruned nodes, max_depth=7
[04:23:57] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/wor

[04:26:29] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 248 extra nodes, 0 pruned nodes, max_depth=7
[04:26:33] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[04:26:37] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[04:26:41] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[04:26:44] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[04:26:48] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree

In [None]:
with open(f"models/xgboost_fit_all.pickle", "wb") as pfile:
        pickle.dump(xgb_fit, pfile)

In [47]:
# with open(f"models/xgboost_fit_all.pickle", "rb") as pfile:
#     xgb_fit = pickle.load(pfile)

EOFError: Ran out of input

In [None]:
preds_all = xgb_fit.predict(X_test)

In [None]:
xgb_all_score = f1_score(y_test, preds_all)
xgb_all_score

`xgb_fit` F-1 score: ``

Now let's try a couple more, each time without one of the following features that may/may not be helpful
* log features
* streak
* streak_abs

In [20]:
# X.columns

In [21]:
# X_2 = X.drop(columns=["prod_dpt_mkt_share_log", "prod_aisle_mkt_share_log"])

In [22]:
# X_train, X_test, y_train, y_test = split_users(X_2, subset=False, test_size=0.2)


    X_train sample size: 6782401
    X_test sample size: 1692260


In [23]:
# xgb_no_log = XGBClassifier(colsample_bytree=0.8,
#                                min_child_weight=9,
#                                n_estimators=400,
#                                max_depth=7,
#                                learning_rate=0.009,
#                                eval_metric='logloss',
#                                verbosity=3,
#                                use_label_encoder =False)




In [24]:
# xgb_fit_no_log = xgb_no_log.fit(X_train, y_train)

[02:52:10] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/gbm/gbtree.cc:146: Tree method is automatically selected to be 'approx' for faster speed. To use old behavior (exact greedy algorithm on single machine), set tree_method to 'exact'.
[02:52:10] DEBUG: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/gbm/gbtree.cc:154: Using tree method: 1
[02:52:10] DEBUG: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/gbm/gbtree.cc:119: Using updaters: grow_histmaker,prune
[02:52:15] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[02:52:19] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[02:52:24] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/wor

[02:55:06] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[02:55:11] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[02:55:15] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[02:55:19] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[02:55:23] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[02:55:27] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree

[02:58:16] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[02:58:20] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[02:58:24] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[02:58:28] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[02:58:32] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[02:58:36] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree

[03:01:25] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:01:29] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:01:34] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:01:41] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:01:47] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:01:53] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree

[03:04:51] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:04:55] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:05:00] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 252 extra nodes, 0 pruned nodes, max_depth=7
[03:05:04] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 252 extra nodes, 0 pruned nodes, max_depth=7
[03:05:08] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:05:12] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree

[03:08:06] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:08:10] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 252 extra nodes, 0 pruned nodes, max_depth=7
[03:08:14] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:08:18] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:08:22] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:08:26] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree

[03:11:17] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:11:21] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:11:25] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:11:29] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:11:33] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:11:38] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree

[03:14:30] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:14:34] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:14:38] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:14:42] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:14:46] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:14:51] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree

[03:17:42] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:17:47] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:17:50] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:17:55] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:17:59] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:18:03] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree

In [25]:
# xgb_fit_no_log

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=0.8, eval_metric='logloss',
              gamma=0, gpu_id=-1, importance_type='gain',
              interaction_constraints='', learning_rate=0.009, max_delta_step=0,
              max_depth=7, min_child_weight=9, missing=nan,
              monotone_constraints='()', n_estimators=400, n_jobs=16,
              num_parallel_tree=1, random_state=0, reg_alpha=0, reg_lambda=1,
              scale_pos_weight=1, subsample=1, tree_method='approx',
              use_label_encoder=False, validate_parameters=1, verbosity=3)

In [26]:
# preds_no_log = xgb_fit_no_log.predict(X_test)

In [27]:
# xgb_no_log_score = f1_score(y_test, preds_no_log)
# xgb_no_log_score

0.2720328996646104

`xgb_no_log_score` F-1 score: `0.2720328996646104`

In [28]:
# with open(f"models/xgboost_fit_no_log.pickle", "wb") as pfile:
#         pickle.dump(xgb_fit_no_log, pfile)

In [29]:
# X_3 = X.drop(columns="streak")
# X_train, X_test, y_train, y_test = split_users(X_3, subset=False, test_size=0.2)

# xgb_no_streak = XGBClassifier(colsample_bytree=0.8,
#                                min_child_weight=9,
#                                n_estimators=400,
#                                max_depth=7,
#                                learning_rate=0.009,
#                                eval_metric='logloss',
#                                verbosity=3,
#                                use_label_encoder =False)

# xgb_fit_no_streak = xgb_no_streak.fit(X_train, y_train)



    X_train sample size: 6782401
    X_test sample size: 1692260
[03:20:20] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/gbm/gbtree.cc:146: Tree method is automatically selected to be 'approx' for faster speed. To use old behavior (exact greedy algorithm on single machine), set tree_method to 'exact'.
[03:20:20] DEBUG: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/gbm/gbtree.cc:154: Using tree method: 1
[03:20:20] DEBUG: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/gbm/gbtree.cc:119: Using updaters: grow_histmaker,prune
[03:20:26] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:20:30] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:20:34] INFO: /h

[03:23:24] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:23:28] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:23:33] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:23:37] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:23:41] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:23:45] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree

[03:26:40] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:26:44] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:26:48] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:26:52] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:26:57] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:27:01] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree

[03:29:57] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:30:01] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:30:05] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:30:09] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:30:14] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:30:18] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree

[03:33:13] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 252 extra nodes, 0 pruned nodes, max_depth=7
[03:33:17] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:33:22] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:33:26] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 252 extra nodes, 0 pruned nodes, max_depth=7
[03:33:30] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:33:35] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree

[03:36:33] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:36:37] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:36:41] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:36:45] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:36:50] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:36:54] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree

[03:39:51] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:39:55] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:40:00] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:40:04] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:40:09] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:40:13] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree

[03:43:09] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:43:13] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:43:18] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:43:22] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:43:26] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:43:30] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree

[03:46:27] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:46:31] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:46:36] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:46:40] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 252 extra nodes, 0 pruned nodes, max_depth=7
[03:46:44] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:46:48] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree

In [30]:
# preds_no_streak = xgb_fit_no_streak.predict(X_test)


In [31]:

# xgb_score_no_streak = f1_score(y_test, preds_no_streak)
# xgb_score_no_streak

0.2726189454819306

`xgb_score_no_streak` F-1 score: `0.2726189454819306`

In [32]:
# with open(f"models/xgboost_fit_no_streak.pickle", "wb") as pfile:
#         pickle.dump(xgb_fit_no_streak, pfile)

In [33]:
X_4 = X.drop(columns="streak_abs")
X_train, X_test, y_train, y_test = split_users(X_4, subset=False, test_size=0.2)

xgb_no_streak_abs = XGBClassifier(colsample_bytree=0.8,
                               min_child_weight=9,
                               n_estimators=400,
                               max_depth=7,
                               learning_rate=0.009,
                               eval_metric='logloss',
                               verbosity=3,
                               use_label_encoder =False)





    X_train sample size: 6782401
    X_test sample size: 1692260


In [34]:
xgb_fit_no_streak_abs = xgb_no_streak_abs.fit(X_train, y_train)



[03:49:07] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/gbm/gbtree.cc:146: Tree method is automatically selected to be 'approx' for faster speed. To use old behavior (exact greedy algorithm on single machine), set tree_method to 'exact'.
[03:49:07] DEBUG: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/gbm/gbtree.cc:154: Using tree method: 1
[03:49:07] DEBUG: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/gbm/gbtree.cc:119: Using updaters: grow_histmaker,prune
[03:49:13] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:49:17] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:49:22] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/wor

[03:52:12] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:52:16] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:52:21] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:52:25] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:52:29] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:52:34] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree

[03:55:28] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:55:32] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:55:36] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:55:41] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:55:45] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:55:50] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree

[03:58:47] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:58:52] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:58:56] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:59:00] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:59:05] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[03:59:09] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree

[04:02:06] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[04:02:11] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[04:02:15] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[04:02:19] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[04:02:23] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[04:02:28] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree

[04:05:29] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 250 extra nodes, 0 pruned nodes, max_depth=7
[04:05:33] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 252 extra nodes, 0 pruned nodes, max_depth=7
[04:05:37] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[04:05:42] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 252 extra nodes, 0 pruned nodes, max_depth=7
[04:05:46] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[04:05:50] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree

[04:08:51] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[04:08:55] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[04:09:00] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[04:09:04] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[04:09:09] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[04:09:13] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree

[04:12:11] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[04:12:16] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[04:12:20] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[04:12:25] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[04:12:29] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[04:12:33] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree

[04:15:32] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[04:15:37] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[04:15:41] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 252 extra nodes, 0 pruned nodes, max_depth=7
[04:15:45] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 252 extra nodes, 0 pruned nodes, max_depth=7
[04:15:49] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[04:15:53] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree

In [35]:
preds_no_streak_abs = xgb_fit_no_streak_abs.predict(X_test)

In [36]:
xgb_score_no_streak_abs = f1_score(y_test, preds_no_streak_abs)
xgb_score_no_streak_abs

0.2711982545843419

In [37]:
with open(f"models/xgboost_fit_no_streak_abs.pickle", "wb") as pfile:
        pickle.dump(xgb_fit_no_streak_abs, pfile)

And now we'll do the same without `colsample_bytree` and `min_child_weight`. We'll call this model `xgb_fit_2`.

We'll then compare results.

In [None]:
X_5 = X.drop(columns=['prod_dpt_mkt_share', 'prod_dpt_mkt_share_log', 'dpt_total_sales'])
X_train, X_test, y_train, y_test = split_users(X_5, subset=False, test_size=0.2)

xgb_no_dpt = XGBClassifier(colsample_bytree=0.8,
                               min_child_weight=9,
                               n_estimators=400,
                               max_depth=7,
                               learning_rate=0.009,
                               eval_metric='logloss',
                               verbosity=3,
                               use_label_encoder =False)


xgb_fit_no_dpt = xgb_no_dpt.fit(X_train, y_train)


In [None]:
preds_no_dpt = xgb_fit_no_dpt.predict(X_test)

xgb_score_no_dpt = f1_score(y_test, preds_no_dpt)
xgb_score_no_dpt

In [None]:
with open(f"models/xgb_score_no_dpt.pickle", "wb") as pfile:
        pickle.dump(xgb_score_no_dpt, pfile)