In [None]:
# Function: Find best estimator
def param_tuning(estimator, parameters, data, target):

    # Set up grid search
    grid_search = GridSearchCV(
        estimator=estimator,
        param_grid=parameters,
        scoring="roc_auc",
        n_jobs=10,
        cv=10,
        verbose=True,
    )

    # Train on training data
    grid_search.fit(data, target)

    # Output best estimator
    return grid_search.best_estimator_

In [None]:
# Function: Train classifier and evaluate performance
def model_fit(model, data_train, data_test, target_train, target_test):

    # Fit on training data
    model.fit(data_train, target_train)

    # Cross validate on training data
    scores = cross_val_score(model, data_train, target_train, cv=10, scoring="roc_auc")
    print("AUC in each fold: {}; mean: {}\n".format(scores, scores.mean()))

    # Predict outcome for testing data
    y_pred = model.predict(data_test)

    # Display performance metrics
    xgb_eval = classification_report(target_test, y_pred, labels=[0, 1])
    print(xgb_eval)

    # Confusion matrix
    plot_confusion_matrix(
        model,
        data_test,
        target_test,
        display_labels=["survived", "dead"],
        cmap=plt.cm.Blues,
        normalize=None,
    )
    plt.title("Confusion matrix")

In [None]:
# Default model
xgb_base = XGBClassifier(learning_rate=0.1, early_stopping_rounds=10)
model_fit(xgb_base, X_train, X_test, y_train, y_test)

In [None]:
# Tune max_depth and min_child_weight
param_step1 = {"max_depth": range(2, 10, 1), "min_child_weight": range(1, 6, 2)}
xgb_step1 = param_tuning(xgb_base, param_step1, X_train, y_train)
model_fit(xgb_step1, X_train, X_test, y_train, y_test)

In [None]:
# Tune gamma
param_step2 = {"gamma": np.arange(0.0, 10.0, 0.05)}
xgb_step2 = param_tuning(xgb_step1, param_step2, X_train, y_train)
model_fit(xgb_step2, X_train, X_test, y_train, y_test)

In [None]:
# Tune subsample and colsample_bytree
param_step3 = {
    "subsample": [0.5, 0.6, 0.7, 0.8, 0.9],
    "colsample_bytree": [0.5, 0.6, 0.7, 0.8, 0.9],
}
xgb_step3 = param_tuning(xgb_step2, param_step3, X_train, y_train)
model_fit(xgb_step3, X_train, X_test, y_train, y_test)

In [None]:
# Tune reg_alpha
param_step4 = {"reg_alpha": [1e-5, 1e-2, 0.1, 0.5, 1, 5, 100]}
xgb_step4 = param_tuning(xgb_step3, param_step4, X_train, y_train)
model_fit(xgb_step4, X_train, X_test, y_train, y_test)

In [None]:
# Tune learning rate
param_step5 = {"learning_rate": np.arange(0.001, 0.1, 0.0001)}
xgb_step5 = param_tuning(xgb_step4, param_step5, X_train, y_train)
model_fit(xgb_step5, X_train, X_test, y_train, y_test)