https://en.wikipedia.org/wiki/Gradient_boosting

https://blog.paperspace.com/gradient-boosting-for-classification/

Friedman 2001, Greedy Function Approximation: A Gradient Boosting Machine



In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import sklearn.datasets as sk_datasets

import seaborn as sns
import matplotlib.pyplot as plt

import numpy as np

import src.gradientboostedtrees as gbt
import src.decisiontree as dt
rng = np.random.RandomState(42)

## Regression

In [None]:
X, y, coefs = sk_datasets.make_regression(n_samples=1_000, n_features=2, n_targets=1, coef=True)
sns.scatterplot(x=X[:,0], y=y, alpha=.3)

In [None]:
coefs

In [None]:
model = gbt.GradientBoostedTreeRegressor(n_trees=5, max_depth=2, factor=1., rng=rng)

In [None]:
model.fit(X, y)

In [None]:
model.trees_

In [None]:
dt.show_tree(model.trees_[0])

In [None]:
x0 = np.linspace(X[:,0].min(), X[:,0].max(), 100)
x1 = np.linspace(X[:,1].min(), X[:,1].max(), 100)
X0, X1 = np.meshgrid(x0,x1)
X_plot = np.array([X0.ravel(),X1.ravel()]).T

In [None]:
y_pred = model.predict(X_plot)
y_pred[:5]

In [None]:
fig, axs = plt.subplots(nrows=2, figsize=(12,6))

ax = axs[0]
sns.scatterplot(x=X_plot[:,0], y=y_pred, ax=ax, alpha=.1, label="prediction")

ax = axs[1]
sns.scatterplot(x=X_plot[:,1], y=y_pred, ax=ax, alpha=.1, label="prediction")

plt.tight_layout()

In [None]:
fig, ax = plt.subplots()
im = ax.pcolormesh(X0, X1, y_pred.reshape(X0.shape), alpha=.2)
fig.colorbar(im, ax=ax)
sns.scatterplot(x=X[:,0], y=X[:,1], hue=y, ax=ax, alpha=.3)
plt.show()

In [None]:
y_pred = model.predict(X)

fig, axs = plt.subplots(nrows=2, figsize=(12,6))

ax = axs[0]
sns.scatterplot(x=X[:,0], y=y_pred, ax=ax, alpha=.1, label="prediction")
sns.scatterplot(x=X[:,0], y=y, ax=ax, alpha=.1, label="actual")

ax = axs[1]
sns.scatterplot(x=X[:,1], y=y_pred, ax=ax, alpha=.1, label="prediction")
sns.scatterplot(x=X[:,1], y=y, ax=ax, alpha=.1, label="actual")

plt.tight_layout()

## Binary Classification

In [None]:
X, y = sk_datasets.make_classification(n_samples=1_000, n_features=2, n_classes=2, n_redundant=0, class_sep=2, random_state=rng)
sns.scatterplot(x=X[:,0], y=X[:,1], hue=y, alpha=.3)

In [None]:
model = gbt.GradientBoostedTreeClassifier(n_trees=5, max_depth=2, rng=rng)

In [None]:
model.fit(X, y)

In [None]:
model.trees_

In [None]:
dt.show_tree(model.trees_[0])

In [None]:
x0 = np.linspace(X[:,0].min(), X[:,0].max(), 100)
x1 = np.linspace(X[:,1].min(), X[:,1].max(), 100)
X0, X1 = np.meshgrid(x0,x1)
X_plot = np.array([X0.ravel(),X1.ravel()]).T

In [None]:
y_pred = model.predict(X_plot)
y_pred[:5]

In [None]:
fig, ax = plt.subplots()
im = ax.pcolormesh(X0, X1, y_pred.reshape(X0.shape), alpha=.2)
fig.colorbar(im, ax=ax)
sns.scatterplot(x=X[:,0], y=X[:,1], hue=y, ax=ax, alpha=.3)
plt.show()