In [1]:
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.datasets import load_breast_cancer

In [2]:
cancer_data = load_breast_cancer()
X = cancer_data.data
y = cancer_data.target

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the XGBClassifier
xg_classifier = XGBClassifier()

In [4]:
xg_params = {'n_estimators': [50, 100],
             'learning_rate': [0.01, 0.1],
             'max_depth': [3, 5],
             'subsample': [0.8, 1.0],
             'colsample_bytree': [0.8, 1.0]}

In [5]:
randomized_cv = RandomizedSearchCV(xg_classifier, xg_params, scoring='accuracy', cv=5, n_iter=10, random_state=42)
randomized_cv.fit(X_train, y_train)

In [6]:
# Get the best hyperparameters for XGBoost
best_n_estimators_xg = randomized_cv.best_params_['n_estimators']
best_learning_rate_xg =randomized_cv.best_params_['learning_rate']
best_max_depth_xg = randomized_cv.best_params_['max_depth']
best_subsample_xg = randomized_cv.best_params_['subsample']
best_colsample_bytree_xg = randomized_cv.best_params_['colsample_bytree']

# Print the best hyperparameters for XGBoost
print(f'Best n_estimators for XGBoost: {best_n_estimators_xg}')
print(f'Best learning_rate for XGBoost: {best_learning_rate_xg}')
print(f'Best max_depth for XGBoost: {best_max_depth_xg}')
print(f'Best subsample for XGBoost: {best_subsample_xg}')
print(f'Best colsample_bytree for XGBoost: {best_colsample_bytree_xg}')


Best n_estimators for XGBoost: 100
Best learning_rate for XGBoost: 0.1
Best max_depth for XGBoost: 5
Best subsample for XGBoost: 1.0
Best colsample_bytree for XGBoost: 0.8


In [8]:

best_model_randomized = randomized_cv.best_estimator_
y_pred_randomized = best_model_randomized.predict(X_test)

In [9]:
accuracy_randomized = accuracy_score(y_test, y_pred_randomized)
report_randomized = classification_report(y_test, y_pred_randomized)

print("RandomizedSearchCV - XGBoost Classifier:")
print(f'Accuracy on test set: {accuracy_randomized:.2f}')
print(f'Classification Report:\n{report_randomized}')

RandomizedSearchCV - XGBoost Classifier:
Accuracy on test set: 0.96
Classification Report:
              precision    recall  f1-score   support

           0       0.98      0.93      0.95        43
           1       0.96      0.99      0.97        71

    accuracy                           0.96       114
   macro avg       0.97      0.96      0.96       114
weighted avg       0.97      0.96      0.96       114

