# Decision tree

## References


In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import math

import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import sklearn.datasets as sk_datasets

import random_tree_models.decisiontree as dtree
import random_tree_models.randomforest as rf

In [None]:
rng = np.random.RandomState(42)

## Classification

split score:
* gini
* entropy

In [None]:
X, y = sk_datasets.make_classification(
    n_samples=1_000,
    n_features=2,
    n_classes=2,
    n_redundant=0,
    class_sep=2,
    random_state=rng,
)
sns.scatterplot(x=X[:, 0], y=X[:, 1], hue=y, alpha=0.3);

In [None]:
frac_subsamples = 2 / 3
frac_features = 1  # math.sqrt(X.shape[1]) / X.shape[1]
frac_subsamples, frac_features, X.shape[1]

In [None]:
model = rf.RandomForestClassifier(
    measure_name="gini",
    max_depth=4,
    n_trees=10,
    random_state=42,
    frac_subsamples=frac_subsamples,
    frac_features=frac_features,
)

In [None]:
model.fit(X, y)

In [None]:
model.trees_[0].growth_params_.frac_subsamples

In [None]:
dtree.show_tree(model.trees_[0])

In [None]:
y_prob = model.predict_proba(X)
y_prob[:5]

In [None]:
x0 = np.linspace(X[:, 0].min(), X[:, 0].max(), 100)
x1 = np.linspace(X[:, 1].min(), X[:, 1].max(), 100)
X0, X1 = np.meshgrid(x0, x1)
X_plot = np.array([X0.ravel(), X1.ravel()]).T

In [None]:
y_prob = model.predict_proba(X_plot)[:, 1]
y_prob[:5]

In [None]:
fig, ax = plt.subplots()
im = ax.pcolormesh(X0, X1, y_prob.reshape(X0.shape), alpha=0.2)
fig.colorbar(im, ax=ax)
sns.scatterplot(x=X[:, 0], y=X[:, 1], hue=y, ax=ax, alpha=0.3)
plt.show()

## Regression

split score:

* variance

In [None]:
X, y, coefs = sk_datasets.make_regression(
    n_samples=1_000, n_features=2, n_targets=1, coef=True, random_state=rng
)
sns.scatterplot(x=X[:, 0], y=y, alpha=0.3)

In [None]:
model = rf.RandomForestRegressor(
    measure_name="variance",
    max_depth=2,
    frac_subsamples=frac_subsamples,
    frac_features=frac_features,
    n_trees=10,
    random_state=42,
)

In [None]:
model.fit(X, y)

In [None]:
dtree.show_tree(model.trees_[0])

In [None]:
x0 = np.linspace(X[:, 0].min(), X[:, 0].max(), 100)
x1 = np.linspace(X[:, 1].min(), X[:, 1].max(), 100)
X0, X1 = np.meshgrid(x0, x1)
X_plot = np.array([X0.ravel(), X1.ravel()]).T

In [None]:
y_pred = model.predict(X_plot)
y_pred[:5]

In [None]:
fig, axs = plt.subplots(nrows=2, figsize=(12, 6))

ax = axs[0]
sns.scatterplot(x=X_plot[:, 0], y=y_pred, ax=ax, alpha=0.1, label="prediction")

ax = axs[1]
sns.scatterplot(x=X_plot[:, 1], y=y_pred, ax=ax, alpha=0.1, label="prediction")

plt.tight_layout()

In [None]:
fig, ax = plt.subplots()
im = ax.pcolormesh(X0, X1, y_pred.reshape(X0.shape), alpha=0.2)
fig.colorbar(im, ax=ax)
sns.scatterplot(x=X[:, 0], y=X[:, 1], hue=y, ax=ax, alpha=0.3)
plt.show()

In [None]:
y_pred = model.predict(X)

fig, axs = plt.subplots(nrows=2, figsize=(12, 6))

ax = axs[0]
sns.scatterplot(x=X[:, 0], y=y_pred, ax=ax, alpha=0.1, label="prediction")
sns.scatterplot(x=X[:, 0], y=y, ax=ax, alpha=0.1, label="actual")

ax = axs[1]
sns.scatterplot(x=X[:, 1], y=y_pred, ax=ax, alpha=0.1, label="prediction")
sns.scatterplot(x=X[:, 1], y=y, ax=ax, alpha=0.1, label="actual")

plt.tight_layout()