In [3]:
##############################################################
######################### TUTORIAL 1 #########################
##############################################################
#############################################################
######################### Libraries #########################
#############################################################

import numpy as np
import pandas as pd

# Iris dataset
from sklearn.datasets import load_iris

# Data preprocessing and machine learning
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier

# To measure performance
from sklearn import metrics

In [4]:
#############################################################################################
######################### Stump vs Ensemble of 1000 Decision Stumps #########################
#############################################################################################
# Load data and store it into pandas DataFrame objects
iris = load_iris()
X = pd.DataFrame(iris.data[:, :], columns = iris.feature_names[:])
y = pd.DataFrame(iris.target, columns =["Species"])

# Splitting Dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 20, random_state = 100)

# Defining the stump
stump = DecisionTreeClassifier(max_depth = 1)

# Creating an ensemble 
ensemble = BaggingClassifier(base_estimator = stump, n_estimators = 1000,
                             bootstrap = False)

# Training classifiers
stump.fit(X_train, np.ravel(y_train))
ensemble.fit(X_train, np.ravel(y_train))

# Making predictions
y_pred_stump = stump.predict(X_test)
y_pred_ensemble = ensemble.predict(X_test)

# Determine performance
stump_accuracy = metrics.accuracy_score(y_test, y_pred_stump)
ensemble_accuracy = metrics.accuracy_score(y_test, y_pred_ensemble)

# Print message to user
print(f"The accuracy of the stump is {stump_accuracy*100:.1f} %")
print(f"The accuracy of the ensemble is {ensemble_accuracy*100:.1f} %")


The accuracy of the stump is 55.0 %
The accuracy of the ensemble is 55.0 %


In [5]:
###########################################################################
######################### Stump vs Random Forest  #########################
###########################################################################
# Defining the stump
stump = DecisionTreeClassifier(max_depth = 1, splitter = "best", max_features = "sqrt")

# Create Random Forest 
ensemble = BaggingClassifier(base_estimator = stump, n_estimators = 1000,
                             bootstrap = True)

# Training classifiers
stump.fit(X_train, np.ravel(y_train))
ensemble.fit(X_train, np.ravel(y_train))

# Making predictions
y_pred_tree = stump.predict(X_test)
y_pred_ensemble = ensemble.predict(X_test)

# Determine performance
stump_accuracy = metrics.accuracy_score(y_test, y_pred_stump)
ensemble_accuracy = metrics.accuracy_score(y_test, y_pred_ensemble)

# Print message to user
print(f"The accuracy of the stump is {stump_accuracy*100:.1f} %")
print(f"The accuracy of the Random Forest is {ensemble_accuracy*100:.1f} %")

The accuracy of the stump is 55.0 %
The accuracy of the Random Forest is 95.0 %


In [6]:
#########################################################################
######################### Stump vs Extra Trees  #########################
#########################################################################
# Defining the stump
stump = DecisionTreeClassifier(max_depth = 1, splitter = "random", max_features = "sqrt")

# Create Extra Trees
ensemble = BaggingClassifier(base_estimator = stump, n_estimators = 1000,
                             bootstrap = False)

# Training classifiers
stump.fit(X_train, np.ravel(y_train))
ensemble.fit(X_train, np.ravel(y_train))

# Making predictions
y_pred_tree = stump.predict(X_test)
y_pred_ensemble = ensemble.predict(X_test)

# Determine performance
stump_accuracy = metrics.accuracy_score(y_test, y_pred_stump)
ensemble_accuracy = metrics.accuracy_score(y_test, y_pred_ensemble)

# Print message to user
print(f"The accuracy of the stump is {stump_accuracy*100:.1f} %")
print(f"The accuracy of the Extra Trees is {ensemble_accuracy*100:.1f} %")

The accuracy of the stump is 55.0 %
The accuracy of the Extra Trees is 95.0 %


In [7]:
##############################################################
######################### TUTORIAL 2 #########################
##############################################################
# Importing Libraries
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier

# Load data and store it into pandas DataFrame objects
iris = load_iris()
X = pd.DataFrame(iris.data[:, :], columns = iris.feature_names[:])
y = pd.DataFrame(iris.target, columns =["Species"])

# Defining and fitting a DecisionTreeClassifier instance
tree = DecisionTreeClassifier(max_depth = 2)
tree.fit(X,y)

DecisionTreeClassifier(max_depth=2)

In [10]:
# Visualize Decision Tree
from sklearn.tree import export_graphviz

# Creates dot file named tree.dot
export_graphviz(
            tree,
            out_file =  "myTreeName.dot",
            feature_names = list(X.columns),
            class_names = iris.target_names,
            filled = True,
            rounded = True)


In [11]:
# Making a Prediction On a New Sample
sample_one_pred = int(tree.predict([[5, 5, 1, 3]]))
sample_two_pred = int(tree.predict([[5, 5, 2.6, 1.5]]))
print(f"The first sample most likely belongs a {iris.target_names[sample_one_pred]} flower.")
print(f"The second sample most likely belongs a {iris.target_names[sample_two_pred]} flower.")

The first sample most likely belongs a virginica flower.
The second sample most likely belongs a versicolor flower.


In [None]:
# Setting parameters
tree = DecisionTreeClassifier(criterion = "entropy", splitter = "random", max_depth = 2,  min_samples_split = 5,
                              min_samples_leaf = 2, max_features = 2)
tree.fit(X,y)


