In [None]:
# Step 1: Imports and data loading

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


from sklearn.datasets import load_breast_cancer
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
import warnings
warnings.filterwarnings('ignore')

# Load a freely available dataset from scikit-learn
data = load_breast_cancer()  # features + target in memory [web:17]
df = pd.DataFrame(data.data, columns=data.feature_names)
df['target'] = data.target

# Separate features and target
X = df.drop('target', axis=1)
y = df['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)


In [2]:
# Aggregating bootstrap models
bagged_model = BaggingClassifier(random_state=42).fit(X_train, y_train)
bagged_score = bagged_model.score(X_test, y_test)

bagged_score

0.9440559440559441

In [None]:
# Pasting vs. Bagging
# BaggingClassifier to change from bagging to pasting
pasted_model = BaggingClassifier(random_state=42, 
                                bootstrap=False).fit(X_train, y_train)
pasted_score = pasted_model.score(X_test, y_test)

pasted_score

0.9440559440559441

In [4]:
# Random Subspaces
# change from bagging to random subspaces with at most 10 features sampled

subspace_model = BaggingClassifier(random_state=42, 
                                bootstrap=False, 
                                  max_features=10).fit(X_train, y_train)
subspace_score = subspace_model.score(X_test, y_test)

subspace_score

0.958041958041958

In [5]:
# Random Patches
# change from bagging to random patches
patches_model = BaggingClassifier(random_state=42, 
                                bootstrap=False, 
                                  max_features=10,
                                 max_samples=0.3).fit(X_train, y_train)
patches_score = patches_model.score(X_test, y_test)

patches_score

0.965034965034965

Bagging typically works with complex tree models