# Imports

In [448]:
import numpy as np
import pandas as pd
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
from sklearn.datasets import load_breast_cancer
from sklearn.datasets import load_wine

# Functions

In [476]:
def multiclass_initial_prediction(y, n_classes):
    class_counts = np.bincount(y, minlength=n_classes)
    class_probs = class_counts / len(y)

    log_odds = np.log(class_probs + 1e-15)
    return log_odds - log_odds.mean()

In [450]:
def binary_initial_prediction(y):
    pi = np.mean(y)

    initial_log_odds = np.log(pi / (1 - pi))

    return initial_log_odds

In [451]:
def compare_decision_function(model, X, initial_prediction=None):
    learning_rate = model.learning_rate
    estimators = model.estimators_
    estimator_results = []

    if initial_prediction is None and model.init_ != 'zero':
        print("Error - Missing initial_prediction")
        return None

    for estimator in estimators:
        class_predictions = [tree.predict(X) for tree in estimator]
        estimator_results.append(np.array(class_predictions).T)

    final_predictions = np.sum(estimator_results, axis=0) * learning_rate

    if model.init_ != 'zero':
        final_predictions += initial_prediction

    if np.isscalar(initial_prediction) and initial_prediction != None:
      final_predictions = final_predictions.flatten()

    if not np.allclose(final_predictions, gb.decision_function(X)):
      print("Error - Deicison Function does not match")

    return final_predictions

In [493]:
def get_decision_function(model, X, y):
  if len(np.unique(y)) > 2:
    initial_prediction = multiclass_initial_prediction(y, len(np.unique(y)))
  else:
    initial_prediction = binary_initial_prediction(y)

  final_predictions = compare_decision_function(model, X, initial_prediction)
  return final_predictions

In [494]:
get_decision_function(gb, X_cancer_test, y_cancer_test)

Error - Deicison Function does not match


array([1.90880322, 1.90880322, 1.90880322, 1.11940938, 1.90880322,
       1.90880322])

# Test Function

## Multiclass

In [452]:
gb = GradientBoostingClassifier(n_estimators = 2)

In [453]:
iris = load_iris()
X_iris, y_iris = iris.data, iris.target
X_iris_train, X_iris_test, y_iris_train, y_iris_test = train_test_split(X_iris, y_iris, test_size=0.1, random_state=101)

In [454]:
gb.fit(X_iris_train, y_iris_train)

In [455]:
gb.init_

In [456]:
gb.decision_function(X_iris_test)

array([[ 0.35084085, -0.15511048, -0.21511215],
       [ 0.35084085, -0.15511048, -0.21511215],
       [ 0.35084085, -0.15511048, -0.21511215],
       [-0.21526034,  0.07323811, -0.21789365],
       [-0.21501958,  0.38382644, -0.21511549],
       [-0.21526034, -0.15511048,  0.3564349 ],
       [-0.21501958,  0.38382644, -0.21511549],
       [-0.21501958,  0.38382644, -0.21511549],
       [-0.21526034, -0.15511048,  0.3564349 ],
       [ 0.35084085, -0.15511048, -0.21511215],
       [-0.21526034, -0.15511048,  0.35084085],
       [ 0.35084085, -0.15511048, -0.21511215],
       [ 0.35084085, -0.15511048, -0.21511215],
       [-0.21526034, -0.15511048,  0.35084085],
       [-0.21526034, -0.15511048,  0.18139282]])

In [457]:
mydecision = compare_decision_function(gb, X_iris_test)
mydecision

Error - Missing initial_prediction


In [458]:
multiclass_init = multiclass_initial_prediction(y_iris_train, 3)
multiclass_init

[0.32592593 0.34814815 0.32592593]


array([-0.02198599,  0.04397198, -0.02198599])

In [459]:
np.isscalar(multiclass_init)

False

In [460]:
initprediction = compare_decision_function(gb, X_iris_test, multiclass_init)
initprediction

array([[ 0.35084085, -0.15511048, -0.21511215],
       [ 0.35084085, -0.15511048, -0.21511215],
       [ 0.35084085, -0.15511048, -0.21511215],
       [-0.21526034,  0.07323811, -0.21789365],
       [-0.21501958,  0.38382644, -0.21511549],
       [-0.21526034, -0.15511048,  0.3564349 ],
       [-0.21501958,  0.38382644, -0.21511549],
       [-0.21501958,  0.38382644, -0.21511549],
       [-0.21526034, -0.15511048,  0.3564349 ],
       [ 0.35084085, -0.15511048, -0.21511215],
       [-0.21526034, -0.15511048,  0.35084085],
       [ 0.35084085, -0.15511048, -0.21511215],
       [ 0.35084085, -0.15511048, -0.21511215],
       [-0.21526034, -0.15511048,  0.35084085],
       [-0.21526034, -0.15511048,  0.18139282]])

## Binary

In [461]:
gb = GradientBoostingClassifier(n_estimators = 2)

In [462]:
cancer = load_breast_cancer()
X_cancer, y_cancer = cancer.data, cancer.target
X_cancer_train, X_cancer_test, y_cancer_train, y_cancer_test = train_test_split(X_cancer, y_cancer, test_size=0.01, random_state=101)

In [463]:
gb.fit(X_cancer_train, y_cancer_train)

In [464]:
gb.init_

In [465]:
gb.decision_function(X_cancer_test)

array([0.81113835, 0.81113835, 0.81113835, 0.02174451, 0.81113835,
       0.81113835])

In [466]:
binary_init = binary_initial_prediction(y_cancer_train)
binary_init

0.5117730421220308

In [468]:
initprediction = compare_decision_function(gb, X_cancer_test, binary_init)
initprediction

array([0.81113835, 0.81113835, 0.81113835, 0.02174451, 0.81113835,
       0.81113835])