In [2]:
import numpy as np
import sklearn.datasets
import sklearn.metrics
from sklearn.model_selection import train_test_split
import xgboost as xgb

def objective(trial):
    # Load our dataset
    X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)
    dtrain = xgb.DMatrix(X_train, label=y_train)
    dtest = xgb.DMatrix(X_test, label=y_test)

    # Get set of hyperparameters
    param = {
        "silent": 1,
        "objective": "binary:logistic",
        "booster": trial.suggest_categorical("booster", ["gbtree", "dart"]),
        "lambda": trial.suggest_float("lambda", 1e-8, 1.0, log=True),
        "alpha": trial.suggest_float("alpha", 1e-8, 1.0, log=True),
        "max_depth": trial.suggest_int("max_depth", 1, 9),
        "eta": trial.suggest_float("eta", 1e-8, 1.0, log=True),
        "gamma": trial.suggest_float("gamma", 1e-8, 1.0, log=True),
        "grow_policy": trial.suggest_categorical("grow_policy", ["depthwise", "lossguide"]),
    }

    # Train XGBoost model
    bst = xgb.train(param, dtrain)
    preds = bst.predict(dtest)

    # Compute and return model accuracy
    pred_labels = np.rint(preds)
    accuracy = sklearn.metrics.accuracy_score(y_test, pred_labels)
    return accuracy

from dask.distributed import Client
import coiled
import dask_optuna
import joblib
import optuna

# Create a Dask cluster with Coiled
cluster = coiled.Cluster(n_workers=10, software="coiled-examples/ml-with-dask")
# Connect Dask to our cluster
client = Client(cluster)
print(f"Dask dashboard is available at {client.dashboard_link}")
client.wait_for_workers(10)

# Create Dask-compatible Optuna storage class
storage = dask_optuna.DaskStorage()

# Run 500 optimizations trial on our cluster
study = optuna.create_study(direction="maximize", storage=storage)
with joblib.parallel_backend("dask"):
    study.optimize(objective, n_trials=500, n_jobs=-1)

Output()

distributed.client - ERROR - Failed to reconnect to scheduler after 30.00 seconds, closing client


Dask dashboard is available at http://44.193.195.50:8787


Traceback (most recent call last):
  File "/Users/rpelgrim/mambaforge/envs/coiled_taxi/lib/python3.9/site-packages/distributed/comm/tcp.py", line 409, in connect
    stream = await self.client.connect(
  File "/Users/rpelgrim/mambaforge/envs/coiled_taxi/lib/python3.9/site-packages/tornado/tcpclient.py", line 275, in connect
    af, addr, stream = await connector.start(connect_timeout=timeout)
asyncio.exceptions.CancelledError

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/rpelgrim/mambaforge/envs/coiled_taxi/lib/python3.9/asyncio/tasks.py", line 490, in wait_for
    return fut.result()
asyncio.exceptions.CancelledError

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/Users/rpelgrim/mambaforge/envs/coiled_taxi/lib/python3.9/site-packages/distributed/comm/core.py", line 289, in connect
    comm = await asyncio.wait_for(
  File "/Users/rpelgrim/mambafo