This notebook tests `lightgbm.dask`'s behavior with sparse inputs to `pred_contrib()`.

In [None]:
import dask.array as da
import numpy as np
from dask.distributed import Client, LocalCluster
from lightgbm.dask import DaskLGBMClassifier
from lightgbm.sklearn import LGBMClassifier
from scipy.sparse import csc_matrix
from sklearn.datasets import make_blobs

In [None]:
n_workers = 3
cluster = LocalCluster(n_workers=n_workers)
client = Client(cluster)
client.wait_for_workers(n_workers)

print(f"View the dashboard: {cluster.dashboard_link}")

In [None]:
chunk_size = 50
X, y = make_blobs(n_samples=100, centers=3, random_state=42)
rnd = np.random.RandomState(42)
dX = da.from_array(X, chunks=(chunk_size, X.shape[1])).map_blocks(csc_matrix)
dy = da.from_array(y, chunks=chunk_size)

In [None]:
dask_clf = DaskLGBMClassifier(n_estimators=5, num_leaves=2, tree_learner="data")
dask_clf.fit(dX, dy)

preds = dask_clf.predict(dX, pred_contrib=True)
preds_computed = preds.compute()

print(
    type(preds),
    type(preds.partitions[0].compute()),
    type(preds_computed),
    f"{dask_clf.n_classes_} classes, {dX.shape[1]} features",
)
print("---")
print(dX.partitions[0].compute())
print("---")
preds.compute().shape

In [None]:
preds.partitions[0].compute()

In [None]:
X = dX.compute()
y = dy.compute()

local_clf = LGBMClassifier()
local_clf.fit(X=dX.compute(), y=y)
local_preds = local_clf.predict(dX.compute().tocsc(), pred_contrib=True)

print(local_clf.n_classes_, type(local_preds))
print("---")
print(local_preds)

In [None]:
local_preds[0]