In [3]:
from sklearn.ensemble import RandomForestRegressor, HistGradientBoostingRegressor
from sklearn.linear_model import RidgeCV
import numpy as np
from sklearn.utils.validation import (
    check_is_fitted,
    check_consistent_length,
    _check_sample_weight,
)
from sklearn.datasets import load_iris
from sklearn.metrics import mean_squared_error

In [5]:
class LRBoostRegressor():
    
    def __init__(self, linear_model=RidgeCV(), non_linear_model=HistGradientBoostingRegressor()):
        """
        Args:
            linear_model (optional): Linear model (not enforced) for initial fit. MUST BE SKLEARN COMPLIANT. Defaults to RidgeCV().
            non_linear_model (optional): Non-Linear model (not enforced) for initial fit. MUST BE SKLEARN COMPLIANT. Defaults to HistGradientBoostingRegressor().
        """
        self.linear_model = linear_model
        self.non_linear_model = non_linear_model

    def __sklearn_is_fitted__(self):
        return True

    def fit(self, X, y, sample_weight=None):
        self.linear_model.fit(X, y, sample_weight=sample_weight)
        linear_prediction = self.linear_model.predict(X)
        linear_residual = np.subtract(linear_prediction, y)
        self.non_linear_model.fit(X, y=linear_residual, sample_weight=sample_weight)
        
        return self

    def predict(self, X) -> np.array:
        check_is_fitted(self)
        non_linear_prediction = self.non_linear_model.predict(X)
        linear_prediction = self.linear_model.predict(X)

        return np.add(non_linear_prediction, linear_prediction)

In [7]:
X, y = load_iris(return_X_y=True)
lrb = LRBoostRegressor().fit(X, y)
predictions = lrb.predict(X)
mean_squared_error(predictions, y)

0.125064631553949

In [9]:
X, y = load_iris(return_X_y=True)

## Example of how to pass arguments to constituent estimators
ridge_args = {"alphas": np.logspace(-4, 3, 10, endpoint=True),
               "cv": 5}

rf_args = {"n_estimators": 50, 
            "n_jobs": -1}

lrb = LRBoostRegressor(linear_model=RidgeCV(**ridge_args),
                        non_linear_model=RandomForestRegressor(**rf_args))
lrb = lrb.fit(X, y)
predictions = lrb.predict(X)
mean_squared_error(predictions, y)

0.15024071867057287

In [19]:
from sklearn.datasets import load_iris
from sklearn.linear_model import RidgeCV

X, y = load_iris(return_X_y=True)

def ridge(X, y):
    args = {"alphas": (1000, 100000), "cv": (5)}
    mod = RidgeCV(**args).fit(X, y)
    preds = mod.predict(X)
    return(preds)

In [7]:
lrb.non_linear_model.get_params()

{'categorical_features': None,
 'early_stopping': 'auto',
 'l2_regularization': 0.0,
 'learning_rate': 0.1,
 'loss': 'squared_error',
 'max_bins': 255,
 'max_depth': None,
 'max_iter': 50,
 'max_leaf_nodes': 31,
 'min_samples_leaf': 10,
 'monotonic_cst': None,
 'n_iter_no_change': 10,
 'random_state': None,
 'scoring': 'loss',
 'tol': 1e-07,
 'validation_fraction': 0.1,
 'verbose': 0,
 'warm_start': False}

In [4]:
from sklearn.datasets import load_iris

X, y = load_iris(return_X_y=True)

## Example of how to pass arguments to constituent estimators
ridge_args = {"alphas": np.logspace(-4, 3, 10, endpoint=True),
               "cv": 5}

rf_args = {"n_estimators": 50, 
            "n_jobs": -1}

lrb = LRBoostRegressor(linear_model=RidgeCV(**ridge_args),
                        non_linear_model=RandomForestRegressor(**rf_args))
lrb = lrb.fit(X, y)
predictions = lrb.predict(X)

NameError: name 'LRBoostRegressor' is not defined