# Ensemble - Bagging

In [9]:
# Import required libraries
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pandas as pd

In [16]:
# Load open-source dataset
data = load_breast_cancer()

df = pd.DataFrame(data.data, columns=data.feature_names)
df["target"] = data.target

print(df.sample(10))

     mean radius  mean texture  mean perimeter  mean area  mean smoothness  \
28         15.30         25.27          102.40      732.4          0.10820   
402        12.96         18.29           84.18      525.2          0.07351   
45         18.65         17.60          123.70     1076.0          0.10990   
236        23.21         26.97          153.50     1670.0          0.09509   
205        15.12         16.68           98.78      716.6          0.08876   
322        12.86         13.32           82.82      504.8          0.11340   
183        11.41         14.92           73.53      402.0          0.09059   
508        16.30         15.70          104.70      819.8          0.09427   
116         8.95         15.76           58.74      245.2          0.09462   
256        19.55         28.77          133.60     1207.0          0.09260   

     mean compactness  mean concavity  mean concave points  mean symmetry  \
28            0.16970         0.16830              0.08751      

In [12]:
# Separate features and target
X = df.drop("target", axis=1)
y = df["target"]

In [None]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [17]:
print(X_train.shape)
print(X_test.shape)

(455, 30)
(114, 30)


# WITHOUT Ensemble Learning (Single Decision Tree)

In [18]:
# Import Decision Tree
from sklearn.tree import DecisionTreeClassifier

# Create Decision Tree model
dt_model = DecisionTreeClassifier(random_state=42)

# Train model
dt_model.fit(X_train, y_train)

# Predictions
y_pred = dt_model.predict(X_test)

# Accuracy
dt_accuracy = accuracy_score(y_test, y_pred)

print("Decision Tree Accuracy (No Ensemble):", dt_accuracy)


Decision Tree Accuracy (No Ensemble): 0.9473684210526315


# WITH Ensemble Learning (Bagging)

In [14]:
from sklearn.ensemble import RandomForestClassifier

# Create Random Forest model
rf_model = RandomForestClassifier(
    n_estimators=100,
    random_state=42
)

# Train model
rf_model.fit(X_train, y_train)

# Predictions
y_pred = rf_model.predict(X_test)

# Accuracy
rf_accuracy = accuracy_score(y_test, y_pred)

print("Random Forest Accuracy (Ensemble - Bagging):", rf_accuracy)


Random Forest Accuracy (Ensemble - Bagging): 0.9649122807017544


# STOP

# WITH Ensemble Learning: Gradient Boosting (Boosting)

In [15]:
from sklearn.ensemble import GradientBoostingClassifier

# Create Gradient Boosting model
gb_model = GradientBoostingClassifier(random_state=42)

# Train model
gb_model.fit(X_train, y_train)

# Predictions
y_pred = gb_model.predict(X_test)

# Accuracy
gb_accuracy = accuracy_score(y_test, y_pred)

print("Gradient Boosting Accuracy (Ensemble - Boosting):", gb_accuracy)


Gradient Boosting Accuracy (Ensemble - Boosting): 0.956140350877193
