**Chapter 6 – Decision Trees**

In [None]:
import numpy as np
import os
np.random.seed(42)
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt

# Training and visualizing

In [None]:
from sklearn.datasets import load_iris

iris = load_iris()
X = iris.data[:, 2:] 
y = iris.target

In [None]:
def plot_data(X, y, axes=[0, 7.5, 0, 3]):
    plt.plot(X[:, 0][y==0], X[:, 1][y==0], "yo")
    plt.plot(X[:, 0][y==1], X[:, 1][y==1], "bs")
    plt.plot(X[:, 0][y==2], X[:, 1][y==2], "g^")
    plt.axis(axes)
    plt.xlabel("$x_1$", fontsize=18)
    plt.ylabel("$x_2$", fontsize=18, rotation=90)
plot_data(X,y)

Train DecisionTreeClassifier

In [None]:
from sklearn.tree import DecisionTreeClassifier
tree_clf = ??
??

Make graph of the tree

In [None]:
from sklearn.tree import export_graphviz

export_graphviz(
        tree_clf,
        out_file="iris_tree.dot",
        feature_names=iris.feature_names[2:],
        class_names=iris.target_names,
        rounded=True,
        filled=True
    )

Download graphviz at:
www.graphviz.org/

Set the path variable to:

C:\Program Files (x86)\Graphviz2.38


$ conda install -c conda-forge graphviz

In [None]:
!dot -Tpng iris_tree.dot -o iris_tree9.png

![](iris_tree9.png)

### Feature importance

In [None]:
tree_clf.feature_importances_

In [None]:
ginis = tree_clf.tree_.impurity
samples = tree_clf.tree_.n_node_samples

In [None]:
fea_imp_petal = ginis[0]*samples[0] - ginis[1]*samples[1] - ginis[2]*samples[2]
fea_imp_width = ginis[2]*samples[2] - ginis[3]*samples[3] - ginis[4]*samples[4]
fea_imp_petal/(fea_imp_petal+fea_imp_width)

In [None]:
def plot_decision_boundary(clf, X, y, axes=[0, 7.5, 0, 3]):
    x1s = np.linspace(axes[0], axes[1], 100)
    x2s = np.linspace(axes[2], axes[3], 100)
    x1, x2 = np.meshgrid(x1s, x2s)
    X_new = np.c_[x1.ravel(), x2.ravel()]
    y_pred = clf.predict(X_new).reshape(x1.shape)
    plt.contourf(x1, x2, y_pred, alpha=0.3)
    plot_data(X, y)
    plt.axis(axes)
    
plt.figure(figsize=(8, 4))
plot_decision_boundary(tree_clf, X, y, )

plt.show()

# Predicting classes and class probabilities

In [None]:
x_new=[[5, 1.5]]

In [None]:
# predict probability
??

In [None]:
# predict class
??

# Regularization Hyperparameters

- max_depth
(the maximum depth of the Decision Tree)
- min_samples_split 
(the minimum number of samples a node must have before it can be split)
- min_samples_leaf 
(the minimum number of samples a leaf node must have)
- min_weight_fraction_leaf 
(same as min_samples_leaf but expressed as a fraction of the total number of weighted instances)
- max_leaf_nodes 
(maximum number of leaf nodes)
- max_features 
(maximum number of features that are evaluated for splitting at each node)


In [None]:
from sklearn.datasets import make_moons
Xm, ym = make_moons(n_samples=100, noise=0.25, random_state=53)
plt.figure(figsize=(8, 4))
plot_data(Xm, ym, axes=[-1.5, 2.5, -1, 1.5])

Train DecisionTreeClassifier & regularize

In [None]:
deep_tree_clf = ??
??

plt.figure(figsize=(8, 4))
plot_decision_boundary(deep_tree_clf, Xm, ym, axes=[-1.5, 2.5, -1, 1.5])

plt.show()

# Regression trees

In [None]:
# Quadratic training set + noise
np.random.seed(42)
m = 200
X = np.random.rand(m, 1)
y = 4 * (X - 0.5) ** 2
y = y + np.random.randn(m, 1) / 10

def plot_quad(X, y):
    plt.plot(X, y, "b.")
    plt.xlabel("$x_1$", fontsize=18)
    plt.ylabel("$y$", fontsize=18)
plot_quad(X, y)

Train DecisionTreeRegressor & regularize

In [None]:
from sklearn.tree import DecisionTreeRegressor

tree_reg = ??
??

x1 = np.linspace(0, 1, 500).reshape(-1, 1)
y_pred = tree_reg.predict(x1)

plot_quad(X, y)
plt.plot(x1, y_pred, 'r')

Create graph of the tree

In [None]:
??

In [None]:
??

![](regression_tree.png)