In [None]:
import numpy as np
from numpy import pi
import matplotlib.pyplot as plt
from sklearn.model_selection import cross_val_score
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.inspection import DecisionBoundaryDisplay

In [None]:
# generate spiral data (see https://gist.github.com/45deg/e731d9e7f478de134def5668324c44c5)
N = 200
theta = np.sqrt(np.random.rand(N))*2*pi # np.linspace(0,2*pi,100)

r_a = 2*theta + pi
data_a = np.array([np.cos(theta)*r_a, np.sin(theta)*r_a]).T
x_a = data_a + np.random.randn(N,2)

r_b = -2*theta - pi
data_b = np.array([np.cos(theta)*r_b, np.sin(theta)*r_b]).T
x_b = data_b + np.random.randn(N,2)

res_a = np.append(x_a, np.zeros((N,1)), axis=1)
res_b = np.append(x_b, np.ones((N,1)), axis=1)

res = np.append(res_a, res_b, axis=0)
np.random.shuffle(res)

plt.scatter(x_a[:,0],x_a[:,1])
plt.scatter(x_b[:,0],x_b[:,1])
X=np.r_[x_a,x_b]
y=np.r_[np.zeros(N),np.ones(N)]
print("Input data")
plt.show()

In [None]:
clf = DecisionTreeClassifier(random_state=0)
print("10 fold cross validation")
print(cross_val_score(clf, X,y, cv=10))

print("Decision Tree")
clf = clf.fit(X,y)
plt.figure(figsize=(15,15))
plot_tree(clf)
plt.show()

print("Decision Boundary Plot")
feature_1, feature_2 = np.meshgrid(
    np.linspace(X[:, 0].min(), X[:, 0].max()),
    np.linspace(X[:, 1].min(), X[:, 1].max())
)
grid = np.vstack([feature_1.ravel(), feature_2.ravel()]).T
tree = DecisionTreeClassifier().fit(X,y)
y_pred = np.reshape(tree.predict(grid), feature_1.shape)
display = DecisionBoundaryDisplay(
    xx0=feature_1, xx1=feature_2, response=y_pred
)
display.plot(alpha=0.5, cmap="plasma")
display.ax_.scatter(
    X[:, 0], X[:, 1], c=y, edgecolor="black"
)
plt.show()