In [1]:
import warnings
warnings.filterwarnings('ignore')

from matplotlib.colors import ListedColormap
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn import datasets
import numpy as np
import pandas as pd
from sklearn import tree
from sklearn import metrics

In [2]:
iris = datasets.load_iris()
# X = iris.data[:, [2, 3]]
X = iris.data
y = iris.target
print('Class labels:', np.unique(y))

Class labels: [0 1 2]


In [3]:
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
sc.fit(X)
X = sc.transform(X)

In [4]:
# split data into train and test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

In [5]:
def plot_decision_regions(X, y, classifier):
    
    h = .02  # step size in the mesh
    # setup marker generator and color map
    markers = ('s', 'x', 'o', '^', 'v')
    colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan')
    cmap = ListedColormap(colors[:len(np.unique(y))])

    # plot the decision surface
    x1_min, x1_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    x2_min, x2_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, h),
                           np.arange(x2_min, x2_max, h))
    Z = classifier.predict(np.array([xx1.ravel(), xx2.ravel()]).T)
    Z = Z.reshape(xx1.shape)
    plt.contourf(xx1, xx2, Z, alpha=0.4, cmap=cmap)
    plt.xlim(xx1.min(), xx1.max())
    plt.ylim(xx2.min(), xx2.max())

    for idx, cl in enumerate(np.unique(y)):
        plt.scatter(x=X[y == cl, 0], y=X[y == cl, 1],
                    alpha=0.8, c=cmap(idx),
                    marker=markers[idx], label=cl)

In [6]:
clf = tree.DecisionTreeClassifier(criterion = 'entropy', random_state=0)
clf.fit(X_train, y_train)

In [7]:
print ("Train - Accuracy :", metrics.accuracy_score(y_train, clf.predict(X_train)))

Train - Accuracy : 1.0


In [8]:
print ("Train - Confusion matrix :",metrics.confusion_matrix(y_train, clf.predict(X_train)))

Train - Confusion matrix : [[34  0  0]
 [ 0 32  0]
 [ 0  0 39]]


In [9]:
print ("Train - classification report :", metrics.classification_report(y_train, clf.predict(X_train)))

Train - classification report :               precision    recall  f1-score   support

           0       1.00      1.00      1.00        34
           1       1.00      1.00      1.00        32
           2       1.00      1.00      1.00        39

    accuracy                           1.00       105
   macro avg       1.00      1.00      1.00       105
weighted avg       1.00      1.00      1.00       105



In [10]:
print ("Test - Accuracy :", metrics.accuracy_score(y_test, clf.predict(X_test)))

Test - Accuracy : 0.9777777777777777


In [11]:
print ("Test - Confusion matrix :",metrics.confusion_matrix(y_test, clf.predict(X_test)))

Test - Confusion matrix : [[16  0  0]
 [ 0 17  1]
 [ 0  0 11]]


In [12]:
print ("Test - classification report :", metrics.classification_report(y_test, clf.predict(X_test)))

Test - classification report :               precision    recall  f1-score   support

           0       1.00      1.00      1.00        16
           1       1.00      0.94      0.97        18
           2       0.92      1.00      0.96        11

    accuracy                           0.98        45
   macro avg       0.97      0.98      0.98        45
weighted avg       0.98      0.98      0.98        45



***Visualize Decision Tree***

In [18]:
from sklearn.tree import export_graphviz
import pydotplus
from IPython.display import Image, display

# Your model fitting code (assuming clf is a trained DecisionTreeClassifier)

# Export the decision tree to a dot file
export_graphviz(clf, out_file='data/tree.dot',
                feature_names=iris.feature_names,
                class_names=clf.classes_.astype(int).astype(str),
                filled=True, rounded=True,
                special_characters=True,
                node_ids=1)

# Convert the dot file to a PDF
graph = pydotplus.graph_from_dot_file('data/tree.dot')
graph.write_pdf("iris.pdf")

# Display the PDF directly in the notebook
# display(Image(filename="iris.pdf"))

True