# Classification with Decision Trees

In [None]:
import numpy as np
import matplotlib.pyplot as plt

from sklearn.datasets import make_blobs
from sklearn.model_selection import train_test_split
from sklearn.inspection import DecisionBoundaryDisplay

from sklearn.linear_model import LogisticRegression

import sklearn.tree
from sklearn.tree import DecisionTreeClassifier

import dtreeviz

Using make_blobs to generate some data for classification:
https://docs.w3cub.com/scikit_learn/modules/generated/sklearn.datasets.make_blobs

In [None]:
x, y = make_blobs(n_samples=300,
                  random_state=0, 
                  cluster_std=2)

In [None]:
plt.scatter(x[:,0], x[:,1], c=y);

## Train/test split

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, 
                                                    y, 
                                                    random_state=0)

## First, Logistic Regression

In [None]:
logreg_clf = LogisticRegression()
logreg_clf.fit(x_train, y_train)

In [None]:
DecisionBoundaryDisplay.from_estimator(logreg_clf, 
                                       x,
                                       response_method="predict",
                                       cmap="RdBu", 
                                       alpha=0.5
)

plt.scatter(x[:,0], x[:,1], c=y);

In [None]:
test_score = logreg_clf.score(x_test, y_test)
print(f"Accuracy of Logistic Regression: {test_score:.2f}")

## Decision Tree

In [None]:
tree_clf = DecisionTreeClassifier()

# tree_clf = DecisionTreeClassifier(max_depth=2)

In [None]:
tree_clf.fit(x_train, y_train)

In [None]:
DecisionBoundaryDisplay.from_estimator(tree_clf, 
                                       x,
                                       response_method="predict",
                                       cmap="RdBu", 
                                       alpha=0.5
)

plt.scatter(x[:,0], x[:,1], c=y);

In [None]:
text_representation = sklearn.tree.export_text(tree_clf)
print(text_representation)

In [None]:
tree_clf.classes_

In [None]:
plt.figure(figsize=(12,8))
sklearn.tree.plot_tree(tree_clf, 
               feature_names=['x0','x1'],  
               class_names=[str(i) for i in tree_clf.classes_],
               filled=True);

In [None]:
%%capture --no-display

vizmodel = dtreeviz.model(tree_clf, 
         x,
         y,
         feature_names=['x0','x1'],
         class_names=[i for i in tree_clf.classes_],
         target_name="y")

vizmodel.view()

In [None]:
sample = [0., 1.]
print(vizmodel.explain_prediction_path(sample))

In [None]:
test_score = tree_clf.score(x_test, y_test)
print(f"Accuracy of Decision Tree: {test_score:.2f}")

### How to make predictions?

In [None]:
sample = [[0,1]]
print('Predicted Class:',tree_clf.predict(sample))

In [None]:
y_pred_proba = tree_clf.predict_proba(sample)
print(y_pred_proba)

In [None]:
plt.bar(tree_clf.classes_, y_pred_proba[0])