## Using Dask-ML for Hyper-Parameter Tuning

In [1]:
from dask.distributed import Client, progress
client = Client(processes=False, threads_per_worker=4,
                n_workers=3, memory_limit='2GB')
client

Perhaps you already have a cluster running?
Hosting the HTTP server on port 61949 instead


0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://10.0.0.227:61949/status,

0,1
Dashboard: http://10.0.0.227:61949/status,Workers: 3
Total threads: 12,Total memory: 5.59 GiB
Status: running,Using processes: False

0,1
Comm: inproc://10.0.0.227/28175/1,Workers: 3
Dashboard: http://10.0.0.227:61949/status,Total threads: 12
Started: Just now,Total memory: 5.59 GiB

0,1
Comm: inproc://10.0.0.227/28175/4,Total threads: 4
Dashboard: http://10.0.0.227:61950/status,Memory: 1.86 GiB
Nanny: None,
Local directory: /var/folders/08/zn3bgg793fndlb4lnlcv_sxr0000gn/T/dask-scratch-space/worker-pcgmuynj,Local directory: /var/folders/08/zn3bgg793fndlb4lnlcv_sxr0000gn/T/dask-scratch-space/worker-pcgmuynj

0,1
Comm: inproc://10.0.0.227/28175/6,Total threads: 4
Dashboard: http://10.0.0.227:61951/status,Memory: 1.86 GiB
Nanny: None,
Local directory: /var/folders/08/zn3bgg793fndlb4lnlcv_sxr0000gn/T/dask-scratch-space/worker-5iw6yy90,Local directory: /var/folders/08/zn3bgg793fndlb4lnlcv_sxr0000gn/T/dask-scratch-space/worker-5iw6yy90

0,1
Comm: inproc://10.0.0.227/28175/8,Total threads: 4
Dashboard: http://10.0.0.227:61952/status,Memory: 1.86 GiB
Nanny: None,
Local directory: /var/folders/08/zn3bgg793fndlb4lnlcv_sxr0000gn/T/dask-scratch-space/worker-b15lsj3p,Local directory: /var/folders/08/zn3bgg793fndlb4lnlcv_sxr0000gn/T/dask-scratch-space/worker-b15lsj3p


In [2]:
from sklearn.svm import SVC
from sklearn.datasets import make_classification
from sklearn.model_selection import GridSearchCV

In [3]:
X, y = make_classification(n_samples=500,
                           random_state=0,
                           n_classes=3,
                           n_features=5,
                           n_informative=3,
                           n_redundant=2)
print(len(X[0]))
X[:2]

5


array([[ 1.35660265,  0.01737486, -0.39541825,  1.73600924,  0.69678037],
       [-3.13368837, -3.58305728, -0.05784148,  0.47604655,  3.79569034]])

In [4]:
y[:2]

array([2, 1])

In [5]:
param_grid = {
    "C": [0.00001, 0.0001, 0.001, 0.01, 0.1, 1],
    "kernel": ['rbf', 'poly', 'sigmoid'],
    "degree": [1, 2, 3, 4],
    "coef0": [1, 0.5, 0.3, 0.2 , 0.1],
    "gamma": ["auto", "scale"]
             }


In [6]:
clf = SVC(random_state=0, probability=True)

grid_search = GridSearchCV(clf,
                           param_grid=param_grid,
                           cv=3,
                           n_jobs=-1)

In [7]:
%time grid_search.fit(X, y)

CPU times: user 599 ms, sys: 125 ms, total: 724 ms
Wall time: 3.64 s


In [9]:
# from sklearn.externals import joblib
import joblib

with joblib.parallel_backend('dask'):
    %time _ = grid_search.fit(X, y)

CPU times: user 25.2 s, sys: 612 ms, total: 25.9 s
Wall time: 3.75 s


In [10]:
grid_search.predict(X)[:10]

array([2, 1, 2, 2, 1, 1, 2, 2, 0, 0])