In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pickle

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import f1_score
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier

from splits import split_users # contains split_users func

pd.set_option("display.max_columns", 101)
pd.set_option("display.max_rows", 100)

In [2]:
# # read in pickled DFs generated by query_dfs.py

# df_orders = pd.read_pickle("./pickle/df_orders.pickle")
# df_train = pd.read_pickle("./pickle/df_train.pickle")
# df_prior = pd.read_pickle("./pickle/df_prior.pickle")
# df_prod_detail = pd.read_pickle("./pickle/df_prod_detail.pickle")

In [3]:
# read in pickled feature DF generated by feature_engineering_1.ipynb
X = pd.read_pickle("./pickle/X_F.pickle")
X.columns

Index(['product_id', 'user_id', 'user_total_prod_orders', 'cart', 'in_cart',
       'last_cart', 'in_last_cart', 'qty_reordered', 'qty_sold',
       'prod_reorder_pct', 'prod_prior_sales', 'prod_pct_reorders',
       'prod_avg_atco', 'user_avg_cart_size', 'days_since_prior_order',
       'order_hour_of_day', 'user_avg_spacing', 'streak_nan', 'up_buy_streak',
       'up_n5_n_buys', 'up_n5_buy_ratio', 'up_atco_sum', 'up_atco_avg',
       'prod_total_mkt_share', 'prod_total_mkt_share_log', 'aisle_total_sales',
       'prod_aisle_mkt_share', 'prod_aisle_mkt_share_log', 'streak_abs'],
      dtype='object')

In [4]:
# check scikit-learn version
import sklearn
print(sklearn.__version__)

0.24.1


## Let's start with Random Forest

In [5]:
# X_train, X_test, y_train, y_test = split_users(X, test_size=0.2)

In [6]:
# rf = RandomForestClassifier(n_estimators=500, max_depth=8, max_features=8, n_jobs=-1)
# rf_fit = rf.fit(X_train, y_train)

In [7]:
# preds = rf_fit.predict(X_test)

In [8]:
# f1_score(y_test, preds)

In [9]:
# with open(f"models/rf_fit.pickle", "wb") as pfile:
#         pickle.dump(rf_fit, pfile)

## Grid Search

In [10]:
# X_train, X_test, y_train, y_test = split_users(X, subset=0.035, test_size=0.2)

In [11]:
# estimator = XGBClassifier(objective='binary:logistic',
#                           use_label_encoder=False,
#                           eval_metric='logloss',
#                           random_state=54,
#                           learning_rate=0.009,
# )

# params = {
#     'max_depth': [6, 7, 8],
#     'n_estimators': [325, 375],
#     'colsample_bytree': [0.7, 0.8, 0.9],
#     'min_child_weight': [8, 9, 10]}

In [12]:
# grid_search = GridSearchCV(
#     estimator = estimator,
#     param_grid = params,
#     verbose=10
# )

In [13]:
# %%time
# grid_xgb_fit = grid_search.fit(X_train, y_train)
# print("The best parameters are: \n", grid_search.best_params_)

In [14]:
# with open(f"models/grid_xgb_fit.pickle", "wb") as pfile:
#         pickle.dump(grid_xgb_fit, pfile)

## Grid Search Results

**On a different VM, I used grid search to tune min_child_weight and colsample_bytree parameters. Here was the grid & results:**

```
estimator = XGBClassifier(objective='binary:logistic',
                          use_label_encoder=False,
                          eval_metric='logloss',
                          random_state=54,
                          max_depth=3,
                          learning_rate=0.01,
                          n_estimators=500
)

params = {
    'min_child_weight': range(1, 10, 1),
    'colsample_bytree': [.6, .7, .8, .9, 1.0]
}

# results
Fitting 5 folds for each of 45 candidates, totalling 225 fits
The best parameters are: 
 {'colsample_bytree': 0.7, 'min_child_weight': 8}
CPU times: user 1h 47min 29s, sys: 1.55 s, total: 1h 47min 30s
Wall time: 10min 54s

```

**Below were the parameters for our grid search.**
```
estimator = XGBClassifier(objective='binary:logistic',
                          use_label_encoder=False,
                          eval_metric='logloss',
                          random_state=54,
                          learning_rate=0.01,
)

params = {
    'max_depth': [7, 8, 9],
    'n_estimators': [400, 500],
    'colsample_bytree': [0.6, 0.7, 0.8],
    'min_child_weight': [7, 8, 9]}
```

**And the results:**

```
Best results are:
{'colsample_bytree': 0.8, 'max_depth': 7, 'min_child_weight': 9, 'n_estimators': 400}
CPU times: user 21h 33min 24s, sys: 18.2 s, total: 21h 33min 42s
```

**Final (final) grid search results**

The best parameters are: 
 {'colsample_bytree': 0.8, 'max_depth': 7, 'min_child_weight': 9, 'n_estimators': 325}
CPU times: user 1d 2h 54min 9s, sys: 32.2 s, total: 1d 2h 54min 41s

Great. Now we have our parameters. Let's run the model!


In [59]:
X_train, X_test, y_train, y_test = split_users(X, subset=False, test_size=0.2)


    X_train sample size: 6782401
    X_test sample size: 1692260


In [60]:
%%time
xgb = XGBClassifier(colsample_bytree=0.8,
                    min_child_weight=9,
                    n_estimators=400,
                    max_depth=7,
                    learning_rate=0.009,
                    eval_metric='logloss',
                    verbosity=3,
                    use_label_encoder =False)

xgb_fit = xgb.fit(X_train, y_train)

[19:40:38] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/gbm/gbtree.cc:146: Tree method is automatically selected to be 'approx' for faster speed. To use old behavior (exact greedy algorithm on single machine), set tree_method to 'exact'.
[19:40:38] DEBUG: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/gbm/gbtree.cc:154: Using tree method: 1
[19:40:38] DEBUG: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/gbm/gbtree.cc:119: Using updaters: grow_histmaker,prune
[19:40:44] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[19:40:48] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[19:40:53] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/wor

[19:43:51] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[19:43:56] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[19:44:00] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[19:44:05] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[19:44:09] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 248 extra nodes, 0 pruned nodes, max_depth=7
[19:44:14] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree

[19:47:17] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[19:47:21] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[19:47:26] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[19:47:31] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[19:47:35] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[19:47:40] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree

[19:50:43] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[19:50:47] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[19:50:52] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[19:50:56] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[19:51:01] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[19:51:05] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree

[19:54:05] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[19:54:10] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[19:54:14] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[19:54:19] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[19:54:23] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[19:54:28] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree

[19:57:31] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[19:57:35] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[19:57:39] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 252 extra nodes, 0 pruned nodes, max_depth=7
[19:57:44] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[19:57:49] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 252 extra nodes, 0 pruned nodes, max_depth=7
[19:57:53] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree

[20:00:58] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[20:01:02] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[20:01:07] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[20:01:11] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[20:01:15] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[20:01:20] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree

[20:04:23] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[20:04:27] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[20:04:32] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[20:04:36] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 252 extra nodes, 0 pruned nodes, max_depth=7
[20:04:41] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[20:04:45] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree

[20:07:49] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[20:07:53] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 252 extra nodes, 0 pruned nodes, max_depth=7
[20:07:58] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[20:08:02] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[20:08:07] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree/updater_prune.cc:101: tree pruning end, 254 extra nodes, 0 pruned nodes, max_depth=7
[20:08:11] INFO: /home/conda/feedstock_root/build_artifacts/xgboost_1607604574104/work/src/tree

In [61]:
with open(f"models/xgboost_fit_all.pickle", "wb") as pfile:
        pickle.dump(xgb_fit, pfile)

In [62]:
# with open(f"models/xgboost_fit_all.pickle", "rb") as pfile:
#     xgb_fit = pickle.load(pfile)

In [63]:
preds_all = xgb_fit.predict(X_test)

In [64]:
xgb_all_score = f1_score(y_test, preds_all)
xgb_all_score

0.2723744684097107

`xgb_fit` F-1 score: `0.2722773221438108`
* this is before the following feature changes:
    * remove `streak`
    * remove department features
    * add `prod_reorder_pct`

After those changes, our score is: `0.2723744684097107`

Now let's try a couple more, each time without one of the following features that may/may not be helpful
* log features
* streak
* streak_abs

In [21]:
# X.columns

In [22]:
# X_2 = X.drop(columns=["prod_dpt_mkt_share_log", "prod_aisle_mkt_share_log"])

In [23]:
# X_train, X_test, y_train, y_test = split_users(X_2, subset=False, test_size=0.2)

In [24]:
# xgb_no_log = XGBClassifier(colsample_bytree=0.8,
#                                min_child_weight=9,
#                                n_estimators=400,
#                                max_depth=7,
#                                learning_rate=0.009,
#                                eval_metric='logloss',
#                                verbosity=3,
#                                use_label_encoder =False)

In [25]:
# xgb_fit_no_log = xgb_no_log.fit(X_train, y_train)

In [26]:
# xgb_fit_no_log

In [27]:
# preds_no_log = xgb_fit_no_log.predict(X_test)

In [28]:
# xgb_no_log_score = f1_score(y_test, preds_no_log)
# xgb_no_log_score

`xgb_no_log_score` F-1 score: `0.2720328996646104`

In [29]:
# with open(f"models/xgboost_fit_no_log.pickle", "wb") as pfile:
#         pickle.dump(xgb_fit_no_log, pfile)

In [30]:
# X_3 = X.drop(columns="streak")
# X_train, X_test, y_train, y_test = split_users(X_3, subset=False, test_size=0.2)

# xgb_no_streak = XGBClassifier(colsample_bytree=0.8,
#                                min_child_weight=9,
#                                n_estimators=400,
#                                max_depth=7,
#                                learning_rate=0.009,
#                                eval_metric='logloss',
#                                verbosity=3,
#                                use_label_encoder =False)

# xgb_fit_no_streak = xgb_no_streak.fit(X_train, y_train)


In [31]:
# preds_no_streak = xgb_fit_no_streak.predict(X_test)

In [32]:
# xgb_score_no_streak = f1_score(y_test, preds_no_streak)
# xgb_score_no_streak

`xgb_score_no_streak` F-1 score: `0.2726189454819306`

Our best score yet! Next time, we'll run it again without this column. It is redundant, as it's captured in `xgb_score_no_streak_abs`, which is well-accompanied by `streak_nan` and `up_buy_streak`.

In [33]:
# with open(f"models/xgboost_fit_no_streak.pickle", "wb") as pfile:
#         pickle.dump(xgb_fit_no_streak, pfile)

In [34]:
# X_4 = X.drop(columns="streak_abs")
# X_train, X_test, y_train, y_test = split_users(X_4, subset=False, test_size=0.2)

# xgb_no_streak_abs = XGBClassifier(colsample_bytree=0.8,
#                                min_child_weight=9,
#                                n_estimators=400,
#                                max_depth=7,
#                                learning_rate=0.009,
#                                eval_metric='logloss',
#                                verbosity=3,
#                                use_label_encoder =False)

In [35]:
# xgb_fit_no_streak_abs = xgb_no_streak_abs.fit(X_train, y_train)

In [36]:
# preds_no_streak_abs = xgb_fit_no_streak_abs.predict(X_test)

In [37]:
# xgb_score_no_streak_abs = f1_score(y_test, preds_no_streak_abs)
# xgb_score_no_streak_abs

`xgb_score_no_streak_abs` F-1 score: `0.2711982545843419`

In [38]:
# with open(f"models/xgboost_fit_no_streak_abs.pickle", "wb") as pfile:
#         pickle.dump(xgb_fit_no_streak_abs, pfile)

And now we'll do the same without `colsample_bytree` and `min_child_weight`. We'll call this model `xgb_fit_2`.

We'll then compare results.

In [39]:
# X_5 = X.drop(columns=['prod_dpt_mkt_share', 'prod_dpt_mkt_share_log', 'dpt_total_sales'])
# X_train, X_test, y_train, y_test = split_users(X_5, subset=False, test_size=0.2)

# xgb_no_dpt = XGBClassifier(colsample_bytree=0.8,
#                                min_child_weight=9,
#                                n_estimators=400,
#                                max_depth=7,
#                                learning_rate=0.009,
#                                eval_metric='logloss',
#                                verbosity=3,
#                                use_label_encoder =False)


# xgb_fit_no_dpt = xgb_no_dpt.fit(X_train, y_train)

In [40]:
# with open(f"models/xgb_fit_no_dpt.pickle", "wb") as pfile:
#         pickle.dump(xgb_fit_no_dpt, pfile)

In [41]:
# preds_no_dpt = xgb_fit_no_dpt.predict(X_test)

# xgb_score_no_dpt = f1_score(y_test, preds_no_dpt)
# xgb_score_no_dpt

`xgb_score_no_dpt` F-1 score: `0.27230669439304045`

**Now let's test out removing the `up_atco` (user-purchase add-to-cart-order) columns**

We'll start with just dropping the `up_atco_sum` column, since that's the one that makes the least sense (to me, anyways).

In [42]:
# X_6 = X.drop(columns='up_atco_sum')
# X_train, X_test, y_train, y_test = split_users(X_6, subset=False, test_size=0.2)

# xgb_no_atco_sum = XGBClassifier(colsample_bytree=0.8,
#                                min_child_weight=9,
#                                n_estimators=400,
#                                max_depth=7,
#                                learning_rate=0.009,
#                                eval_metric='logloss',
#                                verbosity=3,
#                                use_label_encoder =False)

In [43]:
# xgb_fit_no_atco_sum = xgb_no_atco_sum.fit(X_train, y_train)

In [44]:
# with open(f"models/xgb_fit_no_atco_sum.pickle", "wb") as pfile:
#         pickle.dump(xgb_fit_no_atco_sum, pfile)

In [45]:
# preds_no_atco_sum = xgb_fit_no_atco_sum.predict(X_test)

# xgb_score_no_atco_sum = f1_score(y_test, preds_no_atco_sum)
# xgb_score_no_atco_sum

`xgb_score_no_atco_sum` F-1 score: `0.27225540149568883`

And now let's try with `atco_sum`, but without `atco_avg`.

In [46]:
# X_7 = X.drop(columns='up_atco_avg')
# X_train, X_test, y_train, y_test = split_users(X_7, subset=False, test_size=0.2)

# xgb_no_atco_avg = XGBClassifier(colsample_bytree=0.8,
#                                min_child_weight=9,
#                                n_estimators=400,
#                                max_depth=7,
#                                learning_rate=0.009,
#                                eval_metric='logloss',
#                                verbosity=3,
#                                use_label_encoder =False)

# xgb_fit_no_atco_avg = xgb_no_atco_avg.fit(X_train, y_train)

In [47]:
# with open(f"models/xgb_no_atco_avg.pickle", "wb") as pfile:
#         pickle.dump(xgb_fit_no_atco_avg, pfile)

In [48]:
# preds_no_atco_avg = xgb_fit_no_atco_avg.predict(X_test)

# xgb_score_no_atco_avg = f1_score(y_test, preds_no_atco_avg)
# xgb_score_no_atco_avg

`xgb_score_no_atco_avg` F-1 score: `0.27166193689933943`

And now without both ATCO columns...

In [49]:
# X_8 = X.drop(columns=['up_atco_avg', 'up_atco_sum'])
# X_train, X_test, y_train, y_test = split_users(X_8, subset=False, test_size=0.2)

# xgb_no_atco = XGBClassifier(colsample_bytree=0.8,
#                                min_child_weight=9,
#                                n_estimators=400,
#                                max_depth=7,
#                                learning_rate=0.009,
#                                eval_metric='logloss',
#                                verbosity=3,
#                                use_label_encoder =False)

# xgb_fit_no_atco = xgb_no_atco.fit(X_train, y_train)

In [50]:
# with open(f"models/xgb_no_atco.pickle", "wb") as pfile:
#         pickle.dump(xgb_fit_no_atco, pfile)

In [51]:
# preds_no_atco = xgb_fit_no_atco.predict(X_test)

# xgb_score_no_atco = f1_score(y_test, preds_no_atco)
# xgb_score_no_atco

`xgb_fit_no_atco` F-1 score: `0.2717455536704855`

Alright, it turns out the model actually does perform better with both `up_atco_sum` and `up_atco_avg`. We'll leave them in for now.