In [None]:
import numpy as np
import matplotlib.pyplot as plt
import xgboost as xgb
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [None]:
data = load_breast_cancer()
X = data.data
y = data.target
feature_names = data.feature_names

In [None]:
# Split Data (80% Train, 20% Test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# XGBoost has a Scikit-Learn wrapper (XGBClassifier) making it easy to use.
# use_label_encoder=False removes a warning in newer versions
# eval_metric='logloss' avoids warnings

model = xgb.XGBClassifier(
    n_estimators=100,      # Number of trees
    learning_rate=0.1,     # Step size shrinkage
    max_depth=3,           # Depth of trees
    use_label_encoder=False,
    eval_metric='logloss',
    random_state=42
)

In [None]:
model.fit(X_train, y_train)

In [None]:
y_pred = model.predict(X_test)

print(f"Accuracy: {accuracy_score(y_test, y_pred)*100:.2f}%")
print("\n--- Classification Report ---")
print(classification_report(y_test, y_pred))

In [None]:
# XGBoost provides excellent tools to see which features drove the decision.

from xgboost import plot_importance

plt.figure(figsize=(10, 8))
plot_importance(model, max_num_features=10) # Top 10 features
plt.title("XGBoost: Top 10 Important Features")
plt.show()

In [None]:
# OBSERVATION:
# The plot shows the F-score (how many times a feature was used to split data).
# This is crucial for business explainability.