In [1]:
import ngboost

In [2]:
from ngboost import NGBRegressor
from sklearn.base import BaseEstimator
from sklearn.utils import check_array
from sklearn.model_selection import train_test_split
from sklearn.model_selection import ShuffleSplit
from sklearn.model_selection import GroupShuffleSplit

from ngboost.distns import (
    Bernoulli,
    ClassificationDistn,
    LogNormal,
    Normal,
    RegressionDistn,
)
from ngboost.distns.utils import SurvivalDistnClass
from ngboost.helpers import Y_from_censored
from ngboost.learners import default_tree_learner
from ngboost.manifold import manifold
from ngboost.ngboost import NGBoost
from ngboost.scores import LogScore

In [3]:
from sklearn.datasets import load_boston
from ngboost import NGBRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error


#https://stanfordmlgroup.github.io/ngboost/1-useage.html

X, y = load_boston(True)
X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.1, random_state = 41)



In [4]:
base_learner = DecisionTreeRegressor(random_state = 41)
ngb = NGBRegressor(Base = base_learner, validation_fraction=0.1, random_state = 41, auto_early_stopping_rounds=10)
ngb.fit(X, y)

[iter 0] loss=3.6579 val_loss=3.4377 scale=2.0000 norm=13.6804
== Early stopping achieved.
== Best iteration / VAL59 (val_loss=3.0712)


NGBRegressor(Base=DecisionTreeRegressor(random_state=41),
             auto_early_stopping_rounds=10,
             random_state=RandomState(MT19937) at 0x1FE523E2C40)

In [None]:
len(X_test)

51

In [None]:
base_learner = DecisionTreeRegressor(random_state = 55)
ngb = NGBRegressor()

In [None]:
ngb.fit(X_train, Y_train)

ValueError: ignored

In [None]:
base_learner = DecisionTreeRegressor(random_state = 55)
ngb = NGBRegressor(random_state = 55, Base = base_learner).fit(X_train, Y_train, X_val = X_test, Y_val = Y_test, early_stopping_rounds = 10)

ValueError: ignored

In [None]:
base_learner = DecisionTreeRegressor(random_state = 42)
ngbh = NGBHistGradientBoostingRegressor(random_state = 42, Base = base_learner, early_stopping_rounds = None, validation_fraction = None).fit(X_train, Y_train)

[iter 0] loss=3.6441 val_loss=0.0000 scale=2.0000 norm=13.5053
[iter 100] loss=2.3805 val_loss=0.0000 scale=2.0000 norm=2.1737
[iter 200] loss=1.1195 val_loss=0.0000 scale=4.0000 norm=2.0294
[iter 300] loss=-0.9605 val_loss=0.0000 scale=8.0000 norm=3.9999
[iter 400] loss=-4.5948 val_loss=0.0000 scale=4.0000 norm=1.9884


In [None]:
Y_preds = ngbh.predict(X_test)

test_MSE = mean_squared_error(Y_preds, Y_test)
test_MSE

14.61412374848873

In [None]:
base_learner = DecisionTreeRegressor(random_state = 42)

ngb = NGBRegressor(random_state = 42, Base = base_learner).fit(X_train, Y_train)

[iter 0] loss=3.6441 val_loss=0.0000 scale=2.0000 norm=13.5053
[iter 100] loss=2.3805 val_loss=0.0000 scale=2.0000 norm=2.1737
[iter 200] loss=1.1195 val_loss=0.0000 scale=4.0000 norm=2.0294
[iter 300] loss=-0.9605 val_loss=0.0000 scale=8.0000 norm=3.9999
[iter 400] loss=-4.5948 val_loss=0.0000 scale=4.0000 norm=1.9884


In [None]:
Y_preds = ngb.predict(X_test)

test_MSE = mean_squared_error(Y_preds, Y_test)
test_MSE

14.61412374848873

In [None]:
class NGBRegressor2(NGBoost, BaseEstimator):
    """
    Constructor for NGBoost regression models.
    NGBRegressor is a wrapper for the generic NGBoost class that facilitates regression.
    Use this class if you want to predict an outcome that could take an
    infinite number of (ordered) values.
    Parameters:
        Dist              : assumed distributional form of Y|X=x.
                            A distribution from ngboost.distns, e.g. Normal
        Score             : rule to compare probabilistic predictions P̂ to the observed data y.
                            A score from ngboost.scores, e.g. LogScore
        Base              : base learner to use in the boosting algorithm.
                            Any instantiated sklearn regressor, e.g. DecisionTreeRegressor()
        natural_gradient  : logical flag indicating whether the natural gradient should be used
        n_estimators      : the number of boosting iterations to fit
        learning_rate     : the learning rate
        minibatch_frac    : the percent subsample of rows to use in each boosting iteration
        col_sample        : the percent subsample of columns to use in each boosting iteration
        verbose           : flag indicating whether output should be printed during fitting
        verbose_eval      : increment (in boosting iterations) at which output should be printed
        tol               : numerical tolerance to be used in optimization
        random_state      : seed for reproducibility. See
                            https://stackoverflow.com/questions/28064634/random-state-pseudo-random-number-in-scikit-learn
    Output:
        An NGBRegressor object that can be fit.
    """

    def __init__(
        self,
        Dist=Normal,
        Score=LogScore,
        Base=default_tree_learner,
        natural_gradient=True,
        n_estimators=500,
        learning_rate=0.01,
        minibatch_frac=1.0,
        col_sample=1.0,
        verbose=True,
        verbose_eval=100,
        tol=1e-4,
        random_state=None,
        validation_fraction=0.1,
        early_stopping_rounds=10
    ):
        self.validation_fraction = validation_fraction
        self.early_stopping_rounds = early_stopping_rounds

        assert issubclass(
            Dist, RegressionDistn
        ), f"{Dist.__name__} is not useable for regression."

        if not hasattr(
            Dist, "scores"
        ):  # user is trying to use a dist that only has censored scores implemented
            Dist = Dist.uncensor(Score)

        super().__init__(
            Dist,
            Score,
            Base,
            natural_gradient,
            n_estimators,
            learning_rate,
            minibatch_frac,
            col_sample,
            verbose,
            verbose_eval,
            tol,
            random_state,
        )

    def __getstate__(self):
        state = super().__getstate__()
        # Remove the unpicklable entries.
        if self.Dist.__name__ == "DistWithUncensoredScore":
            state["Dist"] = self.Dist.__base__
            state["uncensor"] = True
        return state

    def __setstate__(self, state_dict):
        if "uncensor" in state_dict.keys():
            state_dict["Dist"] = state_dict["Dist"].uncensor(state_dict["Score"])
        super().__setstate__(state_dict)

In [None]:
ngb = NGBRegressor2(random_state = 55, Base = base_learner).fit(X_train, Y_train, X_val = X_test, Y_val = Y_test, early_stopping_rounds = 10)

[iter 0] loss=3.6497 val_loss=3.4925 scale=2.0000 norm=13.6949
[iter 100] loss=2.3897 val_loss=2.6101 scale=2.0000 norm=2.1963
== Early stopping achieved.
== Best iteration / VAL128 (val_loss=2.5474)


In [None]:
ngb = NGBRegressor2(random_state = 55, Base = base_learner)

In [None]:
    def __init__(
        self,
        Dist=Normal,
        Score=LogScore,
        Base=default_tree_learner,
        natural_gradient=True,
        n_estimators=500,
        learning_rate=0.01,
        minibatch_frac=1.0,
        col_sample=1.0,
        verbose=True,
        verbose_eval=100,
        tol=1e-4,
        random_state=None,
        validation_fraction=0.1,
        early_stopping_rounds=10
    ):
        self.validation_fraction = validation_fraction
        self.early_stopping_rounds = early_stopping_rounds
        super().__init__(
            Dist,
            Score,
            Base,
            natural_gradient,
            n_estimators,
            learning_rate,
            minibatch_frac,
            col_sample,
            verbose,
            verbose_eval,
            tol,
            random_state,
        )