In [1]:

###############################################################################
#                          1. Importing Libraries                             #
###############################################################################

import numpy as np
# The usual stuff
import pandas as pd
# To measure performance
from sklearn import metrics
# Iris dataset
from sklearn.datasets import load_iris
from sklearn.ensemble import BaggingClassifier
# Data preprocessing and machine learning
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier

In [2]:
###############################################################################
#                 2. Stump vs Ensemble of 1000 Decision Stumps                #
###############################################################################

# Load data and store it into pandas DataFrame objects
iris = load_iris()
X = pd.DataFrame(iris.data[:, :], columns=iris.feature_names[:])
y = pd.DataFrame(iris.target, columns=["Species"])

# Splitting Dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=20, random_state=100)

# Defining the stump
stump = DecisionTreeClassifier(max_depth=1)

# Creating an ensemble
ensemble = BaggingClassifier(base_estimator=stump, n_estimators=1000,
                             bootstrap=False)

# Training classifiers
stump.fit(X_train, np.ravel(y_train))
ensemble.fit(X_train, np.ravel(y_train))

# Making predictions
y_pred_stump = stump.predict(X_test)
y_pred_ensemble = ensemble.predict(X_test)

# Determine performance
stump_accuracy = metrics.accuracy_score(y_test, y_pred_stump)
ensemble_accuracy = metrics.accuracy_score(y_test, y_pred_ensemble)

# Print message to user
print(f"The accuracy of the stump is {stump_accuracy * 100:.1f} %")
print(f"The accuracy of the ensemble is {ensemble_accuracy * 100:.1f} %")

The accuracy of the stump is 55.0 %
The accuracy of the ensemble is 55.0 %


In [3]:
###############################################################################
#                          3. Stump vs Random Forest                          #
###############################################################################

# Defining the stump
stump = DecisionTreeClassifier(max_depth=1, splitter="best", max_features="sqrt")

# Create Random Forest
ensemble = BaggingClassifier(base_estimator=stump, n_estimators=1000,
                             bootstrap=True)

# Training classifiers
stump.fit(X_train, np.ravel(y_train))
ensemble.fit(X_train, np.ravel(y_train))

# Making predictions
y_pred_tree = stump.predict(X_test)
y_pred_ensemble = ensemble.predict(X_test)

# Determine performance
stump_accuracy = metrics.accuracy_score(y_test, y_pred_stump)
ensemble_accuracy = metrics.accuracy_score(y_test, y_pred_ensemble)

# Print message to user
print(f"The accuracy of the stump is {stump_accuracy * 100:.1f} %")
print(f"The accuracy of the Random Forest is {ensemble_accuracy * 100:.1f} %")

The accuracy of the stump is 55.0 %
The accuracy of the Random Forest is 90.0 %


In [4]:
###############################################################################
#                            4. Stump vs Extra Trees                          #
###############################################################################

# Defining the stump
stump = DecisionTreeClassifier(max_depth=1, splitter="random", max_features="sqrt")

# Create Extra Trees
ensemble = BaggingClassifier(base_estimator=stump, n_estimators=1000,
                             bootstrap=False)

# Training classifiers
stump.fit(X_train, np.ravel(y_train))
ensemble.fit(X_train, np.ravel(y_train))

# Making predictions
y_pred_tree = stump.predict(X_test)
y_pred_ensemble = ensemble.predict(X_test)

# Determine performance
stump_accuracy = metrics.accuracy_score(y_test, y_pred_stump)
ensemble_accuracy = metrics.accuracy_score(y_test, y_pred_ensemble)

# Print message to user
print(f"The accuracy of the stump is {stump_accuracy * 100:.1f} %")
print(f"The accuracy of the Extra Trees is {ensemble_accuracy * 100:.1f} %")

The accuracy of the stump is 55.0 %
The accuracy of the Extra Trees is 95.0 %
