# TPOT on Dask on CDSW Workers

In [None]:
!pip3 install dask[complete]==2021.1.1 dask-ml==1.8.0 tpot==0.11.7

In [9]:
import os
import time

import cdsw
from dask.distributed import Client
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from tpot import TPOTClassifier



In [2]:
os.makedirs("_scheduler_", exist_ok=True)
os.makedirs("_worker_", exist_ok=True)

## Start Dask scheduler

In [4]:
dask_scheduler = cdsw.launch_workers(
  n=1,
  cpu=1,
  memory=2,
  kernel="python3",
  code=f"!dask-scheduler --host 0.0.0.0 --dashboard-address 8080 --scheduler-file _scheduler_/dask.log"
)

Wait for the scheduler to start.

In [5]:
time.sleep(10)

Find the IP address of the scheduler.

In [7]:
workers = cdsw.list_workers()
scheduler_id = dask_scheduler[0]['id']
scheduler_ip = [worker['ip_address'] for worker in workers
                if worker['id'] == scheduler_id][0]

scheduler_url = f"tcp://{scheduler_ip}:8786"

scheduler_url

'tcp://100.66.192.2:8786'

## Start Dask workers

In [8]:
dask_workers = cdsw.launch_workers(
  n=2,
  cpu=1,
  memory=0.5,
  kernel="python3",
  code=f"!dask-worker {scheduler_url} --local-directory _worker_"
)

# Wait for the workers to start.
time.sleep(10)

## Connect Dask client

In [12]:
client = Client(scheduler_url)

In [13]:
client

0,1
Client  Scheduler: tcp://100.66.192.2:8786  Dashboard: http://100.66.192.2:8080/status,Cluster  Workers: 2  Cores: 16  Memory: 1000.00 MB


## Load data

In [11]:
digits = load_digits()
X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, train_size=0.75, test_size=0.25)

## Define estimator (using Dask!)

In [14]:
estimator = TPOTClassifier(use_dask=True)

## Fit estimator (using Dask workers!)

In [15]:
estimator.fit(X_train, y_train)

TPOTClassifier(use_dask=True)

In [17]:
estimator.predict(X_train)

array([9, 3, 8, ..., 2, 8, 3])

## Close workers

In [19]:
cdsw.stop_workers()

[<Response [204]>, <Response [204]>, <Response [204]>]

distributed.client - ERROR - Failed to reconnect to scheduler after 10.00 seconds, closing client
_GatheringFuture exception was never retrieved
future: <_GatheringFuture finished exception=CancelledError()>
concurrent.futures._base.CancelledError
