In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor, \
    GradientBoostingRegressor, GradientBoostingClassifier
from sklearn.linear_model import SGDRegressor
from sklearn.linear_model import RidgeCV, LogisticRegressionCV, LinearRegression, LogisticRegression
from sklearn.tree import DecisionTreeRegressor

In [2]:
# create a random data matrix X with 1000 rows, 10 features, all N(0, 1)
np.random.seed(0)
X = np.random.randn(1000, 10)
# create a binary target vector y which is a linear combination of the first 3 features
y = X[:, 0] + 2*X[:, 1] + 3*X[:, 2] > 0
# y = X[:, 0] + 2*X[:, 1] + 3*X[:, 2]

In [3]:
# train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [4]:
gb = GradientBoostingClassifier(random_state=42, verbose=2, n_estimators=1, init="zero")
gb.fit(X_train, y_train)

      Iter       Train Loss   Remaining Time 
         1           1.2563            0.00s


In [5]:
# get predictions and mse
gb_preds = gb.predict(X_test)
# get misclassification rates
gb_misclassification = np.mean(gb_preds != y_test)
print(f"GB misclassification: {gb_misclassification}")
# gb_mse = np.mean((gb_preds - y_test) ** 2)
# rf_mse = np.mean((rf_preds - y_test) ** 2)
# print(f"GB MSE: {gb_mse}, RF MSE: {rf_mse}")

GB misclassification: 0.14


In [6]:
y_train = y_train.astype(float)
neg_gradient = -gb._loss.gradient(y_train, np.zeros(y_train.shape[0])) * 4

In [7]:
# fit decision tree regressor
tree = DecisionTreeRegressor(
                criterion=gb.criterion,
                splitter="best",
                max_depth=gb.max_depth,
                min_samples_split=gb.min_samples_split,
                min_samples_leaf=gb.min_samples_leaf,
                min_weight_fraction_leaf=gb.min_weight_fraction_leaf,
                min_impurity_decrease=gb.min_impurity_decrease,
                max_features=gb.max_features,
                max_leaf_nodes=gb.max_leaf_nodes,
                random_state=gb.random_state,
                ccp_alpha=gb.ccp_alpha,
            )
tree.fit(X_train, neg_gradient)

In [8]:
gb.estimators_[0][0]

In [9]:
gb.estimators_[0][0].predict(X_train)

array([ 0.98039216,  1.35483871, -1.86086957, -1.86086957,  1.        ,
        0.98039216, -1.86086957,  1.35483871, -1.68      , -1.86086957,
       -1.86086957,  1.95675676, -1.65957447, -1.86086957,  0.98039216,
        0.98039216, -1.86086957,  0.98039216,  1.95675676, -0.55555556,
       -1.86086957, -1.86086957, -1.86086957,  0.98039216,  0.98039216,
       -1.86086957,  0.98039216,  0.98039216,  1.95675676, -1.86086957,
        0.98039216,  1.95675676,  1.95675676, -0.55555556,  1.95675676,
       -1.65957447,  1.95675676, -0.55555556, -1.86086957,  1.95675676,
       -1.86086957,  1.35483871,  1.        ,  1.95675676,  1.        ,
        0.98039216, -1.86086957, -1.65957447, -1.68      , -1.86086957,
        1.95675676, -0.55555556, -1.86086957, -1.65957447, -1.86086957,
       -1.86086957,  0.98039216, -1.65957447,  1.95675676,  1.        ,
       -1.86086957, -1.65957447,  1.95675676, -0.55555556,  0.98039216,
        0.98039216, -1.65957447, -1.86086957,  1.95675676, -1.86

In [10]:
tree.predict(X_train)

array([ 0.98039216,  1.35483871, -1.86086957, -1.86086957,  1.        ,
        0.98039216, -1.86086957,  1.35483871, -1.68      , -1.86086957,
       -1.86086957,  1.95675676, -1.65957447, -1.86086957,  0.98039216,
        0.98039216, -1.86086957,  0.98039216,  1.95675676, -0.55555556,
       -1.86086957, -1.86086957, -1.86086957,  0.98039216,  0.98039216,
       -1.86086957,  0.98039216,  0.98039216,  1.95675676, -1.86086957,
        0.98039216,  1.95675676,  1.95675676, -0.55555556,  1.95675676,
       -1.65957447,  1.95675676, -0.55555556, -1.86086957,  1.95675676,
       -1.86086957,  1.35483871,  1.        ,  1.95675676,  1.        ,
        0.98039216, -1.86086957, -1.65957447, -1.68      , -1.86086957,
        1.95675676, -0.55555556, -1.86086957, -1.65957447, -1.86086957,
       -1.86086957,  0.98039216, -1.65957447,  1.95675676,  1.        ,
       -1.86086957, -1.65957447,  1.95675676, -0.55555556,  0.98039216,
        0.98039216, -1.65957447, -1.86086957,  1.95675676, -1.86

In [11]:
np.all(tree.predict(X_train)*4 == gb.estimators_[0][0].predict(X_train))

False

In [12]:
np.mean(y)

0.503

In [13]:
# round np.mean(y) to the nearest integer
np.round(np.mean(y))

1.0

In [14]:
# predictions = np.zeros(y_train.shape[0])
# get majority class of y
majority_class = np.round(np.mean(y))
predictions = np.full(y_train.shape[0], majority_class)
eps = np.finfo(np.float32).eps  # FIXME: This is quite large!
predictions = np.clip(predictions, eps, 1 - eps, dtype=np.float64)

In [15]:
approx_y = np.clip(y_train, eps, 1 - eps, dtype=np.float64)

In [16]:
approx_y

array([9.99999881e-01, 9.99999881e-01, 9.99999881e-01, 1.19209290e-07,
       9.99999881e-01, 1.19209290e-07, 1.19209290e-07, 9.99999881e-01,
       1.19209290e-07, 1.19209290e-07, 1.19209290e-07, 9.99999881e-01,
       1.19209290e-07, 1.19209290e-07, 9.99999881e-01, 9.99999881e-01,
       1.19209290e-07, 1.19209290e-07, 9.99999881e-01, 9.99999881e-01,
       1.19209290e-07, 1.19209290e-07, 1.19209290e-07, 9.99999881e-01,
       9.99999881e-01, 1.19209290e-07, 9.99999881e-01, 9.99999881e-01,
       9.99999881e-01, 1.19209290e-07, 1.19209290e-07, 9.99999881e-01,
       9.99999881e-01, 1.19209290e-07, 9.99999881e-01, 1.19209290e-07,
       9.99999881e-01, 1.19209290e-07, 1.19209290e-07, 9.99999881e-01,
       1.19209290e-07, 9.99999881e-01, 9.99999881e-01, 9.99999881e-01,
       9.99999881e-01, 1.19209290e-07, 1.19209290e-07, 1.19209290e-07,
       1.19209290e-07, 1.19209290e-07, 9.99999881e-01, 1.19209290e-07,
       1.19209290e-07, 1.19209290e-07, 1.19209290e-07, 1.19209290e-07,
      

In [17]:
predictions

array([0.99999988, 0.99999988, 0.99999988, 0.99999988, 0.99999988,
       0.99999988, 0.99999988, 0.99999988, 0.99999988, 0.99999988,
       0.99999988, 0.99999988, 0.99999988, 0.99999988, 0.99999988,
       0.99999988, 0.99999988, 0.99999988, 0.99999988, 0.99999988,
       0.99999988, 0.99999988, 0.99999988, 0.99999988, 0.99999988,
       0.99999988, 0.99999988, 0.99999988, 0.99999988, 0.99999988,
       0.99999988, 0.99999988, 0.99999988, 0.99999988, 0.99999988,
       0.99999988, 0.99999988, 0.99999988, 0.99999988, 0.99999988,
       0.99999988, 0.99999988, 0.99999988, 0.99999988, 0.99999988,
       0.99999988, 0.99999988, 0.99999988, 0.99999988, 0.99999988,
       0.99999988, 0.99999988, 0.99999988, 0.99999988, 0.99999988,
       0.99999988, 0.99999988, 0.99999988, 0.99999988, 0.99999988,
       0.99999988, 0.99999988, 0.99999988, 0.99999988, 0.99999988,
       0.99999988, 0.99999988, 0.99999988, 0.99999988, 0.99999988,
       0.99999988, 0.99999988, 0.99999988, 0.99999988, 0.99999

In [18]:
gb._loss.link.link(predictions)

array([15.94238503, 15.94238503, 15.94238503, 15.94238503, 15.94238503,
       15.94238503, 15.94238503, 15.94238503, 15.94238503, 15.94238503,
       15.94238503, 15.94238503, 15.94238503, 15.94238503, 15.94238503,
       15.94238503, 15.94238503, 15.94238503, 15.94238503, 15.94238503,
       15.94238503, 15.94238503, 15.94238503, 15.94238503, 15.94238503,
       15.94238503, 15.94238503, 15.94238503, 15.94238503, 15.94238503,
       15.94238503, 15.94238503, 15.94238503, 15.94238503, 15.94238503,
       15.94238503, 15.94238503, 15.94238503, 15.94238503, 15.94238503,
       15.94238503, 15.94238503, 15.94238503, 15.94238503, 15.94238503,
       15.94238503, 15.94238503, 15.94238503, 15.94238503, 15.94238503,
       15.94238503, 15.94238503, 15.94238503, 15.94238503, 15.94238503,
       15.94238503, 15.94238503, 15.94238503, 15.94238503, 15.94238503,
       15.94238503, 15.94238503, 15.94238503, 15.94238503, 15.94238503,
       15.94238503, 15.94238503, 15.94238503, 15.94238503, 15.94

In [19]:
predictions

array([0.99999988, 0.99999988, 0.99999988, 0.99999988, 0.99999988,
       0.99999988, 0.99999988, 0.99999988, 0.99999988, 0.99999988,
       0.99999988, 0.99999988, 0.99999988, 0.99999988, 0.99999988,
       0.99999988, 0.99999988, 0.99999988, 0.99999988, 0.99999988,
       0.99999988, 0.99999988, 0.99999988, 0.99999988, 0.99999988,
       0.99999988, 0.99999988, 0.99999988, 0.99999988, 0.99999988,
       0.99999988, 0.99999988, 0.99999988, 0.99999988, 0.99999988,
       0.99999988, 0.99999988, 0.99999988, 0.99999988, 0.99999988,
       0.99999988, 0.99999988, 0.99999988, 0.99999988, 0.99999988,
       0.99999988, 0.99999988, 0.99999988, 0.99999988, 0.99999988,
       0.99999988, 0.99999988, 0.99999988, 0.99999988, 0.99999988,
       0.99999988, 0.99999988, 0.99999988, 0.99999988, 0.99999988,
       0.99999988, 0.99999988, 0.99999988, 0.99999988, 0.99999988,
       0.99999988, 0.99999988, 0.99999988, 0.99999988, 0.99999988,
       0.99999988, 0.99999988, 0.99999988, 0.99999988, 0.99999

In [20]:
gb.validation_fraction

0.1

In [21]:
tree.predict(X_train)

array([ 0.98039216,  1.35483871, -1.86086957, -1.86086957,  1.        ,
        0.98039216, -1.86086957,  1.35483871, -1.68      , -1.86086957,
       -1.86086957,  1.95675676, -1.65957447, -1.86086957,  0.98039216,
        0.98039216, -1.86086957,  0.98039216,  1.95675676, -0.55555556,
       -1.86086957, -1.86086957, -1.86086957,  0.98039216,  0.98039216,
       -1.86086957,  0.98039216,  0.98039216,  1.95675676, -1.86086957,
        0.98039216,  1.95675676,  1.95675676, -0.55555556,  1.95675676,
       -1.65957447,  1.95675676, -0.55555556, -1.86086957,  1.95675676,
       -1.86086957,  1.35483871,  1.        ,  1.95675676,  1.        ,
        0.98039216, -1.86086957, -1.65957447, -1.68      , -1.86086957,
        1.95675676, -0.55555556, -1.86086957, -1.65957447, -1.86086957,
       -1.86086957,  0.98039216, -1.65957447,  1.95675676,  1.        ,
       -1.86086957, -1.65957447,  1.95675676, -0.55555556,  0.98039216,
        0.98039216, -1.65957447, -1.86086957,  1.95675676, -1.86

In [22]:
# get type of X_train
X_train.dtype
# convert to float32
X_train = X_train.astype(np.float32)

In [23]:
gb._raw_predict_init(X_train)

array([[0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],

In [24]:
rawpreds = gb._raw_predict(X_train)

In [25]:
gb.init_.predict(X_train)

AttributeError: 'str' object has no attribute 'predict'

In [58]:
predictions = np.zeros(y_train.shape[0])
gb._loss.link.link(predictions)

array([-inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf,
       -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf,
       -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf,
       -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf,
       -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf,
       -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf,
       -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf,
       -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf,
       -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf,
       -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf,
       -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf,
       -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf,
       -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf, -inf,
       -inf, -inf, -inf, -inf, -inf, -inf, -inf, -i

In [59]:
-gb._loss.gradient(y_train, predictions)

array([ 0.5,  0.5,  0.5, -0.5,  0.5, -0.5, -0.5,  0.5, -0.5, -0.5, -0.5,
        0.5, -0.5, -0.5,  0.5,  0.5, -0.5, -0.5,  0.5,  0.5, -0.5, -0.5,
       -0.5,  0.5,  0.5, -0.5,  0.5,  0.5,  0.5, -0.5, -0.5,  0.5,  0.5,
       -0.5,  0.5, -0.5,  0.5, -0.5, -0.5,  0.5, -0.5,  0.5,  0.5,  0.5,
        0.5, -0.5, -0.5, -0.5, -0.5, -0.5,  0.5, -0.5, -0.5, -0.5, -0.5,
       -0.5,  0.5,  0.5,  0.5,  0.5, -0.5, -0.5,  0.5,  0.5,  0.5, -0.5,
       -0.5, -0.5,  0.5, -0.5,  0.5, -0.5, -0.5,  0.5, -0.5,  0.5,  0.5,
       -0.5, -0.5, -0.5,  0.5,  0.5, -0.5, -0.5,  0.5, -0.5, -0.5, -0.5,
       -0.5,  0.5,  0.5, -0.5,  0.5,  0.5, -0.5,  0.5, -0.5,  0.5, -0.5,
       -0.5, -0.5,  0.5, -0.5,  0.5,  0.5,  0.5, -0.5,  0.5, -0.5, -0.5,
        0.5,  0.5,  0.5,  0.5, -0.5, -0.5,  0.5,  0.5,  0.5,  0.5, -0.5,
       -0.5, -0.5,  0.5,  0.5, -0.5,  0.5, -0.5, -0.5,  0.5,  0.5, -0.5,
       -0.5, -0.5,  0.5, -0.5,  0.5,  0.5, -0.5,  0.5, -0.5,  0.5,  0.5,
       -0.5, -0.5,  0.5, -0.5, -0.5,  0.5, -0.5,  0

In [57]:
gb._loss.link.link(-gb._loss.gradient(y_train, np.zeros(y_train.shape[0])))

array([ 0.,  0.,  0., nan,  0., nan, nan,  0., nan, nan, nan,  0., nan,
       nan,  0.,  0., nan, nan,  0.,  0., nan, nan, nan,  0.,  0., nan,
        0.,  0.,  0., nan, nan,  0.,  0., nan,  0., nan,  0., nan, nan,
        0., nan,  0.,  0.,  0.,  0., nan, nan, nan, nan, nan,  0., nan,
       nan, nan, nan, nan,  0.,  0.,  0.,  0., nan, nan,  0.,  0.,  0.,
       nan, nan, nan,  0., nan,  0., nan, nan,  0., nan,  0.,  0., nan,
       nan, nan,  0.,  0., nan, nan,  0., nan, nan, nan, nan,  0.,  0.,
       nan,  0.,  0., nan,  0., nan,  0., nan, nan, nan,  0., nan,  0.,
        0.,  0., nan,  0., nan, nan,  0.,  0.,  0.,  0., nan, nan,  0.,
        0.,  0.,  0., nan, nan, nan,  0.,  0., nan,  0., nan, nan,  0.,
        0., nan, nan, nan,  0., nan,  0.,  0., nan,  0., nan,  0.,  0.,
       nan, nan,  0., nan, nan,  0., nan,  0.,  0., nan,  0.,  0., nan,
        0., nan, nan, nan,  0.,  0., nan, nan, nan,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., nan, na

In [51]:
-gb._loss.gradient(y_train, np.zeros(y_train.shape[0]))

array([ 0.5,  0.5,  0.5, -0.5,  0.5, -0.5, -0.5,  0.5, -0.5, -0.5, -0.5,
        0.5, -0.5, -0.5,  0.5,  0.5, -0.5, -0.5,  0.5,  0.5, -0.5, -0.5,
       -0.5,  0.5,  0.5, -0.5,  0.5,  0.5,  0.5, -0.5, -0.5,  0.5,  0.5,
       -0.5,  0.5, -0.5,  0.5, -0.5, -0.5,  0.5, -0.5,  0.5,  0.5,  0.5,
        0.5, -0.5, -0.5, -0.5, -0.5, -0.5,  0.5, -0.5, -0.5, -0.5, -0.5,
       -0.5,  0.5,  0.5,  0.5,  0.5, -0.5, -0.5,  0.5,  0.5,  0.5, -0.5,
       -0.5, -0.5,  0.5, -0.5,  0.5, -0.5, -0.5,  0.5, -0.5,  0.5,  0.5,
       -0.5, -0.5, -0.5,  0.5,  0.5, -0.5, -0.5,  0.5, -0.5, -0.5, -0.5,
       -0.5,  0.5,  0.5, -0.5,  0.5,  0.5, -0.5,  0.5, -0.5,  0.5, -0.5,
       -0.5, -0.5,  0.5, -0.5,  0.5,  0.5,  0.5, -0.5,  0.5, -0.5, -0.5,
        0.5,  0.5,  0.5,  0.5, -0.5, -0.5,  0.5,  0.5,  0.5,  0.5, -0.5,
       -0.5, -0.5,  0.5,  0.5, -0.5,  0.5, -0.5, -0.5,  0.5,  0.5, -0.5,
       -0.5, -0.5,  0.5, -0.5,  0.5,  0.5, -0.5,  0.5, -0.5,  0.5,  0.5,
       -0.5, -0.5,  0.5, -0.5, -0.5,  0.5, -0.5,  0

In [None]:
rawpreds = gb._loss.link.link(np.zeros(y_train.shape[0]))

In [43]:
tree.predict(X_train)

array([ 0.2416367 ,  0.30489043,  0.2416367 , -0.45361205,  0.22502081,
        0.2416367 , -0.45361205,  0.30489043, -0.37809851, -0.45361205,
        0.2416367 ,  0.44042576, -0.31411594, -0.45361205,  0.2416367 ,
        0.2416367 , -0.31411594, -0.31411594,  0.44042576,  0.2416367 ,
       -0.45361205, -0.45361205, -0.45361205, -0.31411594,  0.2416367 ,
       -0.45361205,  0.2416367 ,  0.2416367 ,  0.44042576, -0.45361205,
       -0.31411594,  0.44042576,  0.44042576, -0.32611468,  0.44042576,
       -0.31411594,  0.44042576, -0.32611468, -0.31411594,  0.44042576,
       -0.45361205,  0.30489043,  0.22502081,  0.44042576,  0.22502081,
        0.2416367 , -0.31411594, -0.31411594, -0.37809851, -0.45361205,
        0.44042576, -0.32611468, -0.45361205, -0.31411594, -0.45361205,
       -0.45361205,  0.2416367 , -0.31411594,  0.44042576,  0.22502081,
       -0.31411594, -0.31411594,  0.44042576, -0.32611468,  0.2416367 ,
       -0.31411594, -0.31411594, -0.45361205,  0.44042576, -0.45