# Decision Trees
* Decision trees implement orthogonal decision boundaries (i.e. they can be combined with dim. red such as PCA)

In [11]:
from sklearn import datasets
from sklearn.tree import DecisionTreeClassifier

iris = datasets.load_iris()
X = iris.data[:, 2:] # petal length AND width
y = iris.target

tree_clf = DecisionTreeClassifier(max_depth=2)
tree_clf.fit(X, y)

from sklearn.tree import export_graphviz

export_graphviz(tree_clf,
                out_file="iris.tree.dot", #image_path("iris.tree.dot"),
                feature_names=iris.feature_names[2:],
                class_names=iris.target_names,
                rounded = True,
                filled = True
               )
                

In [12]:
tree_clf.predict_proba([[5, 1.5]]), tree_clf.predict([[5, 1.5]])

(array([[0.        , 0.90740741, 0.09259259]]), array([1]))

### Gini impurity

\begin{equation}
G_i = 1 - \sum^{n}_{k=1} p_{i,k}^2
\end{equation}

for the $i$-th node, $p$ is the ratio of class $k$ instances among the training instances in the $i$-th node. It is 0.0 for "pure nodes"

### CART (Classification and Regression Tree)

\begin{equation}
J(k, t_k) = \frac{m_{\rm left}}{m}G_{\rm left} + \frac{m_{\rm right}}{m}G_{\rm right}
\end{equation}

i.e. find the feature which splits the training data into the purest possible subset (minimal Gini impurity). $t_k$ is the threshold of that feature.

### Entropy

\begin{equation}
H_i = - \sum^{n}_{k=1, p_{i,k} \neq0} p_{i,k} \log(p_{i,k})
\end{equation}


In [13]:
tree_clf = DecisionTreeClassifier(max_depth=2, criterion="entropy")
tree_clf.fit(X, y)

DecisionTreeClassifier(class_weight=None, criterion='entropy', max_depth=2,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best')

In [14]:
tree_clf.predict_proba([[5, 1.5]]), tree_clf.predict([[5, 1.5]])

(array([[0.        , 0.90740741, 0.09259259]]), array([1]))

## Regression

In [18]:
from sklearn.tree import DecisionTreeRegressor

tree_reg = DecisionTreeRegressor(max_depth=2)
tree_reg.fit(X,y)

DecisionTreeRegressor(criterion='mse', max_depth=2, max_features=None,
           max_leaf_nodes=None, min_impurity_decrease=0.0,
           min_impurity_split=None, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           presort=False, random_state=None, splitter='best')

In [20]:
tree_reg.predict([[5, 1.5]])

array([1.09259259])

# Exercises