## Eval Sets

This notebook can be used too test the behavior of LightGBM when using evaluation datasets.

In [1]:
import dask.array as da
from dask.distributed import Client, LocalCluster, wait
from lightgbm.dask import DaskLGBMRegressor
from sklearn.datasets import make_regression

In [2]:
n_workers = 3
cluster = LocalCluster(n_workers=n_workers)

client = Client(cluster)
client.wait_for_workers(n_workers)

print(f"View the dashboard: {cluster.dashboard_link}")

View the dashboard: http://127.0.0.1:8787/status


In [3]:
def _make_dataset(n_samples):
    X, y = make_regression(n_samples=n_samples)
    dX = da.from_array(X, chunks=(1000, X.shape[1]))
    dy = da.from_array(y, chunks=1000)
    return dX, dy

# training data
dX, dy = _make_dataset(10_000)

# eval data
dX_e, dy_e = _make_dataset(2_000)

In [7]:
reg_params = {
    "client": client,
    "max_depth": 5,
    "objective": "regression_l1",
    "learning_rate": 0.1,
    "tree_learner": "data",
    "n_estimators": 100,
    "min_child_samples": 1
}

In [8]:
# model with eval sets
dask_reg = DaskLGBMRegressor(**reg_params)
dask_reg.fit(
    X=dX,
    y=dy,
    eval_set=[
        (dX, dy),
        (dX_e, dy_e)
    ]
)
print(dask_reg.best_score_)

Finding random open ports for workers
defaultdict(<class 'collections.OrderedDict'>, {'valid_0': OrderedDict([('l1', 23.068264930953998)]), 'valid_1': OrderedDict([('l1', 213.37329453022264)])})


In [9]:
# model without eval sets
dask_reg = DaskLGBMRegressor(**reg_params)
dask_reg.fit(
    X=dX,
    y=dy,
)
print(dask_reg.best_score_)

Finding random open ports for workers
defaultdict(<class 'collections.OrderedDict'>, {})
