In [6]:
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestRegressor
from lightgbm import LGBMRegressor
from lrboost import LRBoostRegressor
from lrboost.rongba import RONGBA
import numpy as np
from sklearn.linear_model import RidgeCV, Ridge
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint, uniform

X, y = load_iris(return_X_y=True)


## Default Usage

In [None]:
lrb = LRBoostRegressor().fit(X, y)
preds = lrb.predict(X)[1:10]
#BUG -- need to fix ridge errors here
 

## Directly Provide Model Parameters

In [None]:
ridge_args = {"alphas": np.logspace(-4, 3, 10, endpoint=True), "cv": 5}
rf_args = {"n_estimators": 50, "n_jobs": -1}
lrb = LRBoostRegressor(primary_model=RidgeCV(**ridge_args),
                    secondary_model=RandomForestRegressor(**rf_args))
lrb = lrb.fit(X, y)
preds = lrb.predict(X)

## Tune Hyperparamters

- Note that when doing a tuning search such asn RandomSearchCV(), the primary model cannot be also a CV'd model. Therefore we replace RidgeCV() with Ridge().

In [10]:
from tabnanny import verbose


X_train = X[0:140, ]
X_val = X[140:150, ]
y_train = y[0:140]
y_val = y[140:150]

fit_params = {
    "early_stopping_rounds": 3, 
    "eval_metric": 'rmse', 
    "eval_set": [(X_val, y_val)],
    "eval_names": ['validation'],
    "verbose": 100
    }

lrb = LRBoostRegressor(primary_model=Ridge(),
                        secondary_model=LGBMRegressor())

param_grid = {
    'primary_model__alpha': np.logspace(-4, 3, 10, endpoint=True), 
    'secondary_model__num_leaves': randint(6, 50), 
    'secondary_model__min_child_samples': randint(100, 500), 
    'secondary_model__min_child_weight': [1e-5, 1e-3, 1e-2, 1e-1, 1, 1e1, 1e2, 1e3, 1e4],
    'secondary_model__learning_rate': list(np.logspace(np.log10(0.005), np.log10(0.5), base = 10, num = 100)),
    'secondary_model__subsample': uniform(loc = 0.2, scale = 0.8), 
    'secondary_model__colsample_bytree': uniform(loc = 0.4, scale = 0.6),
    'secondary_model__reg_alpha': [0, 1e-1, 1, 2, 10, 100],
    'secondary_model__reg_lambda': [0, 1e-1, 1, 2, 10, 100]
    }
        
rand_search = RandomizedSearchCV(
    estimator = lrb, 
    param_distributions = param_grid).fit(X, y, secondary_fit_params = fit_params)

best_model = rand_search.best_estimator_

preds = best_model.predict(X)


Training until validation scores don't improve for 3 rounds
Early stopping, best iteration is:
[1]	validation's rmse: 2	validation's l2: 4
Training until validation scores don't improve for 3 rounds
Early stopping, best iteration is:
[1]	validation's rmse: 2	validation's l2: 4
Training until validation scores don't improve for 3 rounds
Early stopping, best iteration is:
[1]	validation's rmse: 2	validation's l2: 4
Training until validation scores don't improve for 3 rounds
Early stopping, best iteration is:
[1]	validation's rmse: 2	validation's l2: 4
Training until validation scores don't improve for 3 rounds
Early stopping, best iteration is:
[1]	validation's rmse: 2	validation's l2: 4
Training until validation scores don't improve for 3 rounds
Early stopping, best iteration is:
[1]	validation's rmse: 2	validation's l2: 4
Training until validation scores don't improve for 3 rounds
Early stopping, best iteration is:
[1]	validation's rmse: 2	validation's l2: 4
Training until validation s