In [2]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, roc_auc_score, roc_curve
from xgboost import XGBClassifier,XGBRegressor ## KEY CHANGE
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt

In [3]:
cancer = load_breast_cancer()
X = pd.DataFrame(cancer.data, columns=cancer.feature_names)
y = cancer.target

In [6]:
display(X.describe().T.head())

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
mean radius,569.0,14.127292,3.524049,6.981,11.7,13.37,15.78,28.11
mean texture,569.0,19.289649,4.301036,9.71,16.17,18.84,21.8,39.28
mean perimeter,569.0,91.969033,24.298981,43.79,75.17,86.24,104.1,188.5
mean area,569.0,654.889104,351.914129,143.5,420.3,551.1,782.7,2501.0
mean smoothness,569.0,0.09636,0.014064,0.05263,0.08637,0.09587,0.1053,0.1634


In [7]:
corr = X.corr()
fig = px.imshow(corr, color_continuous_scale='Viridis', title='Feature Correlation Heatmap')
fig.show()

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42, stratify=y)


In [10]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [11]:
xgb_model = XGBClassifier(
    n_estimators=250,
    learning_rate=0.1,
    max_depth=5,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42,
    objective='binary:logistic',
    eval_metric='logloss',
    use_label_encoder=False
)

In [12]:
xgb_model.fit(X_train_scaled, y_train)


Parameters: { "use_label_encoder" } are not used.




In [13]:
y_pred = xgb_model.predict(X_test_scaled)
y_prob = xgb_model.predict_proba(X_test_scaled)[:, 1]

In [14]:
accuracy = accuracy_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_prob)

In [15]:
print(f"Accuracy: {accuracy:.3f}")
print(f"ROC AUC: {roc_auc:.3f}")
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.965
ROC AUC: 0.996

Classification Report:
               precision    recall  f1-score   support

           0       0.98      0.92      0.95        53
           1       0.96      0.99      0.97        90

    accuracy                           0.97       143
   macro avg       0.97      0.96      0.96       143
weighted avg       0.97      0.97      0.96       143



In [16]:
cm = confusion_matrix(y_test, y_pred)
cm_df = pd.DataFrame(cm, index=["Malignant (0)", "Benign (1)"], columns=["Predicted 0", "Predicted 1"])


In [17]:
fig = px.imshow(cm_df, text_auto=True, color_continuous_scale='Blues', title="Confusion Matrix (XGBoost)")
fig.show()

In [18]:
fpr, tpr, _ = roc_curve(y_test, y_prob)
fig = go.Figure()
fig.add_trace(go.Scatter(x=fpr, y=tpr, mode='lines', name='ROC Curve'))
fig.add_trace(go.Scatter(x=[0,1], y=[0,1], mode='lines', name='Baseline', line=dict(dash='dash')))
fig.update_layout(title=f"ROC Curve (AUC = {roc_auc:.3f})", xaxis_title='False Positive Rate', yaxis_title='True Positive Rate')
fig.show()

In [19]:
xgb_model.feature_importances_

array([0.0027175 , 0.0177395 , 0.04975429, 0.0309447 , 0.00772217,
       0.00659995, 0.00429517, 0.13262087, 0.00354972, 0.00827313,
       0.00843634, 0.01081198, 0.01660089, 0.01287023, 0.00761245,
       0.00467301, 0.00990754, 0.00339351, 0.00319824, 0.00299103,
       0.11599503, 0.01755204, 0.21662848, 0.07505312, 0.01773515,
       0.03803241, 0.01519857, 0.14910701, 0.00731431, 0.00267159],
      dtype=float32)

In [21]:
importance = xgb_model.feature_importances_
importance_df = pd.DataFrame({'Feature': cancer.feature_names, 'Importance': importance})
importance_df = importance_df.sort_values(by='Importance', ascending=False)#.head(15)

fig = px.bar(importance_df, x='Importance', y='Feature', orientation='h',
             color='Importance', color_continuous_scale='Viridis',
             title=' Most Important Features (XGBoost)')
fig.update_layout(yaxis=dict(categoryorder='total ascending'))
fig.show()