In [1]:
from dask.distributed import Client, progress
import os

In [2]:
from sklearn.datasets import make_classification
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
import pandas as pd

In [3]:
service_port = os.environ['DASK_SCHEDULER_SERVICE_PORT']
service_host = os.environ['DASK_SCHEDULER_SERVICE_HOST']
client = Client(f'{service_host}:{service_port}')
client


+-------------+----------------+---------------+---------------+
| Package     | client         | scheduler     | workers       |
+-------------+----------------+---------------+---------------+
| dask        | 2021.06.0      | 2021.06.2     | 2021.06.2     |
| distributed | 2021.06.0      | 2021.06.2     | 2021.06.2     |
| python      | 3.8.10.final.0 | 3.8.0.final.0 | 3.8.0.final.0 |
+-------------+----------------+---------------+---------------+


0,1
Client  Scheduler: tcp://dask-60fb57296e4ed90f61bab71d-dask-scheduler.dask-qe1643-compute.svc.cluster.local:8786  Dashboard: http://dask-60fb57296e4ed90f61bab71d-dask-scheduler.dask-qe1643-compute.svc.cluster.local:8787/status,Cluster  Workers: 3  Cores: 3  Memory: 12.00 GiB


In [4]:
X, y = make_classification(n_samples=1000, random_state=0)
X[:5]

array([[-1.06377997,  0.67640868,  1.06935647, -0.21758002,  0.46021477,
        -0.39916689, -0.07918751,  1.20938491, -0.78531472, -0.17218611,
        -1.08535744, -0.99311895,  0.30693511,  0.06405769, -1.0542328 ,
        -0.52749607, -0.0741832 , -0.35562842,  1.05721416, -0.90259159],
       [ 0.0708476 , -1.69528125,  2.44944917, -0.5304942 , -0.93296221,
         2.86520354,  2.43572851, -1.61850016,  1.30071691,  0.34840246,
         0.54493439,  0.22532411,  0.60556322, -0.19210097, -0.06802699,
         0.9716812 , -1.79204799,  0.01708348, -0.37566904, -0.62323644],
       [ 0.94028404, -0.49214582,  0.67795602, -0.22775445,  1.40175261,
         1.23165333, -0.77746425,  0.01561602,  1.33171299,  1.08477266,
        -0.97805157, -0.05012039,  0.94838552, -0.17342825, -0.47767184,
         0.76089649,  1.00115812, -0.06946407,  1.35904607, -1.18958963],
       [-0.29951677,  0.75988955,  0.18280267, -1.55023271,  0.33821802,
         0.36324148, -2.10052547, -0.4380675 , -

In [6]:
param_grid = {"C": [0.001, 0.01, 0.1, 0.5, 1.0, 2.0, 5.0, 10.0],
              "kernel": ['rbf', 'poly', 'sigmoid'],
              "shrinking": [True, False]}

grid_search = GridSearchCV(SVC(gamma='auto', random_state=0, probability=True),
                           param_grid=param_grid,
                           return_train_score=False,
                           cv=3,
                           n_jobs=-1)

In [7]:
grid_search.fit(X, y)

GridSearchCV(cv=3,
             estimator=SVC(gamma='auto', probability=True, random_state=0),
             n_jobs=-1,
             param_grid={'C': [0.001, 0.01, 0.1, 0.5, 1.0, 2.0, 5.0, 10.0],
                         'kernel': ['rbf', 'poly', 'sigmoid'],
                         'shrinking': [True, False]})

In [9]:
import joblib

with joblib.parallel_backend():
    grid_search.fit(X, y)

TypeError: __init__() missing 1 required positional argument: 'backend'

In [None]:
pd.DataFrame(grid_search.cv_results_).head()

In [None]:
grid_search.predict(X)[:5]

In [None]:
grid_search.score(X, y)