In [1]:
import numpy as np
from sklearn.datasets import load_digits
from sklearn.model_selection import RandomizedSearchCV
from sklearn.svm import SVC      

## Data

In [2]:
digits = load_digits()

## Initializations

In [3]:
param_space = {
    'C': np.logspace(-6, 6, 30),
    'gamma': np.logspace(-8, 8, 30),
    'tol': np.logspace(-4, -1, 30),
    'class_weight': [None, 'balanced'],
}

In [4]:
model = SVC(kernel='rbf')
search = RandomizedSearchCV(model, param_space, cv=5, n_iter=100, verbose=1)

# Option 1: Distributed on three nodes with Ray

## Initializing Ray

In [6]:
import ray
from ray.util.joblib import register_ray
from joblib import parallel_backend
register_ray()
ray.init(address='auto')



{'node_ip_address': '172.31.54.229',
 'raylet_ip_address': '172.31.54.229',
 'redis_address': '172.31.54.229:6379',
 'object_store_address': '/tmp/ray/session_2020-04-17_07-31-07_412730_3320/sockets/plasma_store',
 'raylet_socket_name': '/tmp/ray/session_2020-04-17_07-31-07_412730_3320/sockets/raylet',
 'webui_url': 'localhost:8265',
 'session_dir': '/tmp/ray/session_2020-04-17_07-31-07_412730_3320'}

## Executing with Ray

In [8]:
with parallel_backend('ray'):
    search.fit(digits.data, digits.target)



Fitting 5 folds for each of 100 candidates, totalling 500 fits


[Parallel(n_jobs=-1)]: Using backend RayBackend with 24 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 tasks      | elapsed:    0.8s
[Parallel(n_jobs=-1)]: Done 152 tasks      | elapsed:    4.4s
[Parallel(n_jobs=-1)]: Done 402 tasks      | elapsed:   10.4s
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed:   14.4s finished


# Option 2: Default sklearn

In [10]:
search.set_params(n_jobs=-1)
search.fit(digits.data, digits.target)

Fitting 5 folds for each of 100 candidates, totalling 500 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    4.1s
[Parallel(n_jobs=-1)]: Done 184 tasks      | elapsed:   13.9s
[Parallel(n_jobs=-1)]: Done 434 tasks      | elapsed:   29.9s
[Parallel(n_jobs=-1)]: Done 500 out of 500 | elapsed:   34.2s finished


RandomizedSearchCV(cv=5, error_score='raise-deprecating',
                   estimator=SVC(C=1.0, cache_size=200, class_weight=None,
                                 coef0=0.0, decision_function_shape='ovr',
                                 degree=3, gamma='auto_deprecated',
                                 kernel='rbf', max_iter=-1, probability=False,
                                 random_state=None, shrinking=True, tol=0.001,
                                 verbose=False),
                   iid='warn', n_iter=100, n_jobs=-1,
                   param_distributions={'C': array([1.0...
       0.00032903, 0.00041753, 0.00052983, 0.00067234, 0.00085317,
       0.00108264, 0.00137382, 0.00174333, 0.00221222, 0.00280722,
       0.00356225, 0.00452035, 0.00573615, 0.00727895, 0.00923671,
       0.01172102, 0.01487352, 0.01887392, 0.02395027, 0.03039195,
       0.0385662 , 0.04893901, 0.06210169, 0.07880463, 0.1       ])},
                   pre_dispatch='2*n_jobs', random_state=None, ref