#Resource

https://stackoverflow.com/questions/61901365/modulenotfounderror-no-module-named-sklearn-externals-six

https://stackoverflow.com/questions/28312534/graphvizs-executables-are-not-found-python-3-4

https://mljar.com/blog/visualize-decision-tree/


# A graphical example

In [None]:
# %pip install sklearn
# %pip install mglearn
# %brew install graphviz

In [None]:
import six
import sys
sys.modules['sklearn.externals.six'] = six

In [None]:
import numpy as np
from matplotlib import pyplot

# Some functions to plot our points and draw the models
def plot_points(features, labels, size_of_points=100):
    X = np.array(features)
    y = np.array(labels)
    class1 = X[np.argwhere(y==0)]
    class2 = X[np.argwhere(y==1)]
    pyplot.scatter([s[0][0] for s in class1],
                [s[0][1] for s in class1],
                s = size_of_points,
                color = 'red',
                edgecolor = 'k',
                marker = '^')
    pyplot.scatter([s[0][0] for s in class2],
                [s[0][1] for s in class2],
                s = size_of_points,
                color = 'green',
                edgecolor = 'k',
                marker = 's')

def plot_model(X, y, model, size_of_points=100):
    X = np.array(X)
    y = np.array(y)
    plot_step = 0.2
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, plot_step),
                         np.arange(y_min, y_max, plot_step))
    Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    pyplot.contourf(xx, yy, Z, colors=['red', 'blue'], alpha=0.2, levels=range(-1,2))
    pyplot.contour(xx, yy, Z,colors = 'k',linewidths = 1)
    plot_points(X, y, size_of_points)
    pyplot.show()
    
def display_tree(dt):
    from sklearn.externals.six import StringIO  
    from IPython.display import Image  
    from sklearn.tree import export_graphviz
    import pydotplus
    dot_data = StringIO()
    export_graphviz(dt, out_file=dot_data,  
                    filled=True, rounded=True,
                    special_characters=True)
    graph = pydotplus.graph_from_dot_data(dot_data.getvalue())  
    return Image(graph.create_png())


## A Toy Example

In [None]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
import utils

In [None]:
dataset = pd.DataFrame({
    'x_0':[7,3,2,1,2,4,1,8,6,7,8,9],
    'x_1':[1,2,3,5,6,7,9,10,5,8,4,6],
    'y': [0,0,0,0,0,0,1,1,1,1,1,1]})

dataset

In [None]:
features = dataset[['x_0', 'x_1']]
labels = dataset['y']

plot_points(features, labels)

# Building a decision tree using Gini index

In [None]:
decision_tree = DecisionTreeClassifier()
decision_tree.fit(features, labels)
decision_tree.score(features, labels)

In [None]:
display_tree(decision_tree)

In [None]:
plot_model(features, labels, decision_tree)

# Building a decision tree using entropy

In [None]:
decision_tree_entropy = DecisionTreeClassifier(criterion='entropy')
decision_tree_entropy.fit(features, labels)
decision_tree_entropy.score(features, labels)

In [None]:
display_tree(decision_tree_entropy)

In [None]:
plot_model(features, labels, decision_tree_entropy)

# Building a decision tree of depth one (a vertical or horizontal line)

In [None]:
decision_tree_depth_1 = DecisionTreeClassifier(max_depth=1)
decision_tree_depth_1.fit(features, labels)
decision_tree_depth_1.score(features, labels)
plot_model(features, labels, decision_tree_depth_1)

In [None]:
display_tree(decision_tree_depth_1)

In [None]:
decision_tree_depth_2 = DecisionTreeClassifier(max_depth=2)
decision_tree_depth_2.fit(features, labels)
decision_tree_depth_2.score(features, labels)
plot_model(features, labels, decision_tree_depth_2)

In [None]:
display_tree(decision_tree_depth_2)