In [None]:
# Import needed packages
import pandas as pd
from xgboost import XGBClassifier
import random
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, StratifiedKFold

# Define path to the data and read the data into dataframe
data = ".../xgboost_data_full.csv"
df = pd.read_csv(data)

# Split the data into features (X) and target (y): 
X = df.iloc[:,2:]
y = df.iloc[:,1]

# Define XGBoost base hyperparameters
xgb_model = XGBClassifier(
    tree_method = 'hist', 
    objective='binary:logistic', 
    eval_metric="logloss", 
    use_label_encoder = False,
)

# Randomized search
param_grid = {
    'max_depth': range(2, 8, 1),
    'n_estimators': range(200, 800, 100),
    'reg_alpha': [0, 1, 15, 50],
    'max_delta_step' : [0, 1, 10, 30],
    'learning_rate' : [0.001, 0.1, 0.3, 0.5],
}

kfold = StratifiedKFold(n_splits=3, shuffle=True)
opt = RandomizedSearchCV(xgb_model, param_grid, scoring='roc_auc', n_iter=30, cv=kfold, verbose=3)
opt.fit(X, y, verbose=3)
print("Best: %f using %s" % (opt.best_score_, opt.best_params_))

In [None]:
# Grid search
param_grid = {
    'max_depth': [4, 6],
    'n_estimators': [400, 500],
    'reg_alpha': [15, 50],
    'max_delta_step' : [1, 30],
    'learning_rate' : [0.1]
}

kfold = StratifiedKFold(n_splits=5, shuffle=True)
opt = GridSearchCV(xgb_model, param_grid, scoring = 'roc_auc', cv=kfold, verbose=3)
opt.fit(X, y, verbose=3)
print("Best: %f using %s" % (opt.best_score_, opt.best_params_))

In [None]:
# Final GridSearchCV parameters
param_grid = {
    'max_depth': [4],
    'n_estimators': [400],
    'reg_alpha': [15],
    'max_delta_step' : [1],
    'learning_rate' : [0.1, 0.3]
}
kfold = StratifiedKFold(n_splits=5, shuffle=True)
opt = GridSearchCV(xgb_model, param_grid, scoring = 'roc_auc', cv=kfold, verbose=3)
opt.fit(X, y, verbose=3)
print("Best: %f using %s" % (opt.best_score_, opt.best_params_))
optimized_xgb_model = opt.best_estimator_
optimized_xgb_model.save_model('TrainedXGBF.model')