In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import plot_tree
from sklearn.metrics import classification_report

from sklearn.datasets import make_moons
from sklearn.inspection import DecisionBoundaryDisplay

from sklearn.ensemble import RandomForestClassifier

## Classification of nice and ugly shapes

In [None]:
df_train = pd.DataFrame({
    "circle": [True, False, False, False, True, True],
    "filled": [False, True, True, True, False, True],
    "red": [False, False, False, True, True, False],
    "label": ["nice", "nice", "nice", "ugly", "ugly", "ugly"] })
df_train

Now use the model on the test data and evaluate its quality.

In [None]:
df_test = pd.DataFrame({
    "circle": [False, False, True, True],
    "filled": [False, False, True, True],
    "red": [True, False, False, True],
    "label": ["nice", "nice", "ugly", "ugly"] })
df_test

## Two Moons

First, we create the data set with two entangled moons.

In [8]:
X, y = make_moons(n_samples=100, noise = 0.25, random_state=0)
y_cat = pd.Categorical.from_codes(y, categories=["A", "B"])

In [None]:
sns.set_theme()

fig, ax = plt.subplots(figsize = (4, 4))
sns.scatterplot(ax=ax, x = X[:, 0], y = X[:, 1], hue=y, palette=["tab:red", "tab:blue"])
ax.set(xlabel= "X[0]", ylabel="X[1]")
plt.show()

Now fit a decision tree of depth 1.

In [None]:
tree_1 = DecisionTreeClassifier(max_depth=1)
tree_1.fit(X, y)

In [None]:
fig, ax = plt.subplots(figsize=(4,4))

DecisionBoundaryDisplay.from_estimator(ax=ax, estimator = tree_1, X= X,
    grid_resolution=200, eps=0.1, alpha=0.3, cmap="RdBu", response_method="predict")

sns.scatterplot(ax=ax, x = X[:, 0], y = X[:, 1], hue=y_cat, palette=["tab:red", "tab:blue"])
ax.set(xlabel= "X[0]", ylabel="X[1]")
plt.show()

In [None]:
plot_tree(tree_1,
    class_names=["A", "B"],
    label = "all",
    impurity=False,
    fontsize = 10)
plt.show()

For comparison we train a decision tree of depth 2 and also show the corresponding decision boundaries.

In [None]:
tree_2 = DecisionTreeClassifier(max_depth=2)
tree_2.fit(X, y)

fig, axs = plt.subplots(1, 2, figsize=(12, 5), layout="constrained")

DecisionBoundaryDisplay.from_estimator(ax=axs[0], estimator = tree_2, X= X,
    grid_resolution=200, eps=0.1, alpha=0.3, cmap="RdBu", response_method="predict")

sns.scatterplot(ax=axs[0], x = X[:, 0], y = X[:, 1], hue=y_cat, palette=["tab:red", "tab:blue"])
axs[0].set(xlabel= "X[0]", ylabel="X[1]")

plot_tree(tree_2, ax=axs[1],
    class_names=["A", "B"],
    label = "all",
    impurity=False)

plt.show()

Finally, a decision tree of unbounded depth.

In [None]:
tree_full = DecisionTreeClassifier()
tree_full.fit(X, y)

fig, axs = plt.subplots(1, 2, figsize=(12, 5), layout="constrained")

DecisionBoundaryDisplay.from_estimator(ax=axs[0], estimator = tree_full, X= X,
    grid_resolution=200, eps=0.1, alpha=0.3, cmap="RdBu", response_method="predict")

sns.scatterplot(ax=axs[0], x = X[:, 0], y = X[:, 1], hue=y_cat, palette=["tab:red", "tab:blue"])
axs[0].set(xlabel= "X[0]", ylabel="X[1]")

plot_tree(tree_full, ax=axs[1],
    class_names=["A", "B"],
    label = "all",
    impurity=False)

plt.show()

### Random Forests

We train an ensemble of 100 slightly different decision trees, where each tree only has a relatively small depth.

In [None]:
rf = RandomForestClassifier(n_estimators=100, max_depth=3, random_state=0).fit(X, y)

fig, ax = plt.subplots(1, 1, figsize=(6, 5), layout="constrained")

DecisionBoundaryDisplay.from_estimator(ax=ax, estimator = rf, X= X, 
    grid_resolution=200, eps=0.1, alpha=0.3, cmap="RdBu", response_method="predict")

sns.scatterplot(ax=ax, x = X[:, 0], y = X[:, 1], hue=y_cat, palette=["tab:red", "tab:blue"])

ax.set(xlabel= "X[0]", ylabel="X[1]")

plt.show()