In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

# Assuming you have already prepared the X (features) and y (target) data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create the XGBoost classifier model
xgb_model = XGBClassifier()

# Hyperparameter tuning options
# Uncomment one of the following blocks to choose the tuning method

# Option 1: Grid Search (specify the hyperparameter grid)
#Grid Search: This method searches exhaustively through a specified parameter grid. It provides a comprehensive exploration of the hyperparameter space but can be computationally expensive, especially with many hyperparameters.
# param_grid = {
#     'max_depth': [3, 5, 7],
#     'learning_rate': [0.1, 0.01, 0.001],
#     'n_estimators': [100, 200, 300],
#     'min_child_weight': [1, 3, 5],
#     'gamma': [0, 0.1, 0.2]
# }
# grid_search = GridSearchCV(estimator=xgb_model, param_grid=param_grid, scoring='accuracy', cv=5)
# grid_search.fit(X_train, y_train)
# xgb_model = grid_search.best_estimator_

# Option 2: Randomized Search (specify the hyperparameter distributions)
#Randomized Search: This method searches randomly through a specified parameter distribution. It provides a good exploration of the hyperparameter space and is less computationally expensive than Grid Search. However, it may not find the optimal hyperparameters.
# param_distributions = {
#     'max_depth': [3, 5, 7, 10],
#     'learning_rate': [0.1, 0.01, 0.001],
#     'n_estimators': [100, 200, 300, 400],
#     'min_child_weight': [1, 3, 5],
#     'gamma': [0, 0.1, 0.2, 0.3, 0.4]
# }
# randomized_search = RandomizedSearchCV(estimator=xgb_model, param_distributions=param_distributions,
#                                        scoring='accuracy', cv=5, n_iter=10, random_state=42)
# randomized_search.fit(X_train, y_train)
# xgb_model = randomized_search.best_estimator_

# Note: Uncomment only one of the above options for hyperparameter tuning.

# Train the model on the training data
xgb_model.fit(X_train, y_train)

# Use the trained model to make predictions on the test data
y_pred_xgb = xgb_model.predict(X_test)

# Calculate metrics for XGBoost
xgb_accuracy = accuracy_score(y_test, y_pred_xgb)
xgb_precision = precision_score(y_test, y_pred_xgb)
xgb_recall = recall_score(y_test, y_pred_xgb)
xgb_f1 = f1_score(y_test, y_pred_xgb)
xgb_confusion_matrix = confusion_matrix(y_test, y_pred_xgb)

# Print the evaluation metrics
print("XGBoost Accuracy:", xgb_accuracy)
print("XGBoost Precision:", xgb_precision)
print("XGBoost Recall:", xgb_recall)
print("XGBoost F1-score:", xgb_f1)
print("XGBoost Confusion Matrix:")
print(xgb_confusion_matrix)
