# Use a local ipyparallel cluster

Start the controller and some engines:

1. In a terminal start your controller with `ipcontroller`:
```shell
$ ipcontroller
```

2. In another terminal start your engines with `ipcluster`:
```shell
$ ipcluster engines -n 8
```

IPython Parallel structure:

<img src="https://ipyparallel.readthedocs.io/en/latest/_images/wideView.png" alt='ipyparallel' style="width: 300px;"/>

### 1. Install development versions of Joblib and IPython parallel

In [None]:
import subprocess

subprocess.check_call(
    'pip install git+https://github.com/ipython/ipyparallel'.split())
subprocess.check_call(
    'pip install git+https://github.com/joblib/joblib'.split())

### 2. Monkey patch internal joblib in scikit-learn

In [None]:
# Force the use of the development branch of joblib in scikit-learn
# won't be necessary once scikit-learn will get in sync with joblib
# 0.10+
import joblib
from sklearn.externals import joblib as skl_joblib
print('Monkeypatching scikit-learn embedded joblib')
for k, v in vars(joblib).items():
   setattr(skl_joblib, k, v)

### 2. Parameter search with scikit-learn

In [None]:
from sklearn.datasets import load_digits

digits = load_digits()

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt

sample_idx = 124
plt.imshow(digits.data[sample_idx].reshape(8, 8), interpolation='nearest',
           cmap=plt.cm.gray)
plt.title("True label: %d" % digits.target[sample_idx]);

In [None]:
from sklearn.grid_search import RandomizedSearchCV
from sklearn.svm import SVC
import numpy as np
from pprint import pprint

model = SVC(kernel='rbf')

param_space = {
    'C': np.logspace(-6, 6, 13),
    'gamma': np.logspace(-8, 8, 17),
    'tol': np.logspace(-4, -1, 4),
    'class_weight': [None, 'balanced'],
}

## Run using single job

In [None]:
%%time
search = RandomizedSearchCV(model, param_space, cv=2, random_state=0, n_iter=1)
search.fit(digits.data, digits.target)

print("Best parameter score: %0.3f" % search.best_score_)
pprint(search.best_params_)

## Run using the ipyparallel cluster

In [None]:
import ipyparallel as ipp
from ipyparallel.joblib import IPythonParallelBackend
from joblib import parallel_backend, register_parallel_backend
register_parallel_backend('ipyparallel', IPythonParallelBackend)

In [None]:
%%time
with parallel_backend('ipyparallel'):
    search = RandomizedSearchCV(model, param_space, cv=2, n_iter=5, random_state=0, verbose=10)
    search.fit(digits.data, digits.target)

In [None]:
print("Best parameter score: %0.3f" % search.best_score_)
pprint(search.best_params_)