# Boosting, Bagging and Stacking

In [None]:
!pip install -r requirements.txt --quiet

In [None]:
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import StackingClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, f1_score

## Load and prepare data

In [None]:
data = load_breast_cancer()
X = data.data
y = data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
FOLDS = 5
N_ITERATIONS = 5
RANDOM_SEED = 42

## Bootstrap Aggregation

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.model_selection import RandomizedSearchCV

base_estimator = DecisionTreeClassifier(random_state=RANDOM_SEED)
bagging = BaggingClassifier(estimator=base_estimator, 
                            random_state=RANDOM_SEED)

In [None]:
bagging_params = {
    'n_estimators': [10, 50, 100, 150,200],
    'max_samples': [0.1, 0.3, 0.5, 1.0],
    'max_features': [0.5, 1.0]
}

In [None]:
bagging_search = RandomizedSearchCV(bagging, 
                                    bagging_params, 
                                    n_iter=N_ITERATIONS, cv=FOLDS, 
                                    random_state=RANDOM_SEED)
bagging_search.fit(X_train_scaled, y_train)

## Boosting

In [None]:
from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV
boosting = XGBClassifier(random_state=RANDOM_SEED, 
                         use_label_encoder=False, 
                         eval_metric='logloss')
boosting_params = {
    'n_estimators': [50, 100],
    'learning_rate': [0.05, 0.1, 0.2],
    'max_depth': [3, 5]
}

boosting_search = GridSearchCV(boosting, 
                               boosting_params, 
                               cv=FOLDS)
boosting_search.fit(X_train_scaled, y_train)

## Stacking 

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import StackingClassifier

estimators = [
    ('lr', LogisticRegression(max_iter=1000)),
    ('svc', SVC(probability=True)),
    ('dt', DecisionTreeClassifier(random_state=RANDOM_SEED)),
    ('knn', KNeighborsClassifier()),
    ('rf', RandomForestClassifier(n_estimators=150, random_state=RANDOM_SEED)),
    ('naive_bayes', GaussianNB()),
    ('ada_boost', AdaBoostClassifier(n_estimators=150, random_state=RANDOM_SEED)),
]

In [None]:
stacking = StackingClassifier(
    estimators=estimators,
    final_estimator=LogisticRegression(),
    passthrough=True  
)

In [None]:
stacking.fit(X_train_scaled, y_train)

## Evaluate methods

In [None]:
bagging_preds = bagging_search.predict(X_test_scaled)
boosting_preds = boosting_search.predict(X_test_scaled)
stacking_preds = stacking.predict(X_test_scaled)

In [None]:
from sklearn.metrics import (recall_score, precision_score, 
                             f1_score, accuracy_score)
metrics = {
    "Model": ["Bagging", "Boosting", "Stacking"],
    "Precision": [
        precision_score(y_test, bagging_preds),
        precision_score(y_test, boosting_preds),
        precision_score(y_test, stacking_preds)
    ],
    "Recall": [
        recall_score(y_test, bagging_preds),
        recall_score(y_test, boosting_preds),
        recall_score(y_test, stacking_preds)
    ],
    "F1 Score": [
        f1_score(y_test, bagging_preds),
        f1_score(y_test, boosting_preds),
        f1_score(y_test, stacking_preds)
    ],
    "Accuracy Score": [
        accuracy_score(y_test, bagging_preds),
        accuracy_score(y_test, boosting_preds),
        accuracy_score(y_test, stacking_preds)
    ]}
metrics_df = pd.DataFrame(metrics)

In [None]:
metrics_df

In [None]:
from hyperparameter.visualize import plot_hp_comparison
plot_hp_comparison(
    results_df=metrics_df,
    metric_cols=["Precision", "Recall", 
                 "F1 Score", "Accuracy Score"],
    x_col_name="Model",
    title="Comparison of Ensemble Methods Performance",
    x_label_rotation=0,
    palette='Greys'
)