# En este ejercicio vamos a optimizar parámetros #

(Credits to https://github.com/codiply/blog-ipython-notebooks/blob/master/scikit-learn-estimator-selection-helper.ipynb )

Para optimizar los parámetros usaremos un GridSearch.

Y comparar clasificadores.



In [1]:
import sys
import IPython
import numpy as np
import pandas as pd
import sklearn as sk


This is a helper class for running paramater grid search across different classification or regression models. The helper takes two dictionaries as its constructor parameters. The first dictionary contains the models to be scored, while the second contains the parameters for each model (see examples below or the [GridSearchCV documentation](http://scikit-learn.org/stable/modules/generated/sklearn.grid_search.GridSearchCV.html) for the expected format). The `fit(X, y)` method runs a parameter grid search with cross validation for each model and for the given training data. After calling `fit(X, y)`, the `score_summary()` method returns a data frame with a summary of the scores.

In [2]:
from sklearn.grid_search import GridSearchCV

class EstimatorSelectionHelper:
    def __init__(self, models, params):
        if not set(models.keys()).issubset(set(params.keys())):
            missing_params = list(set(models.keys()) - set(params.keys()))
            raise ValueError("Some estimators are missing parameters: %s" % missing_params)
        self.models = models
        self.params = params
        self.keys = models.keys()
        self.grid_searches = {}
    
    def fit(self, X, y, cv=3, n_jobs=1, verbose=1, scoring=None, refit=False):
        for key in self.keys:
            print("Running GridSearchCV for %s." % key)
            model = self.models[key]
            params = self.params[key]
            gs = GridSearchCV(model, params, cv=cv, n_jobs=n_jobs, 
                              verbose=verbose, scoring=scoring, refit=refit)
            gs.fit(X,y)
            self.grid_searches[key] = gs    
    
    def score_summary(self, sort_by='mean_score'):
        def row(key, scores, params):
            d = {
                 'estimator': key,
                 'min_score': min(scores),
                 'max_score': max(scores),
                 'mean_score': mean(scores),
                 'std_score': std(scores),
            }
            return pd.Series(dict(params.items() + d.items()))
                      
        rows = [row(k, gsc.cv_validation_scores, gsc.parameters) 
                for k in self.keys
                for gsc in self.grid_searches[k].grid_scores_]
        df = pd.concat(rows, axis=1).T.sort([sort_by], ascending=False)
        
        columns = ['estimator', 'min_score', 'mean_score', 'max_score', 'std_score']
        columns = columns + [c for c in df.columns if c not in columns]
        
        return df[columns]



Classification example
----

I load the data.

In [3]:
from sklearn import datasets

iris = datasets.load_iris()
X_iris = iris.data
y_iris = iris.target

print ("Los datos son : " , iris.data[0:5])


Los datos son :  [[5.1 3.5 1.4 0.2]
 [4.9 3.  1.4 0.2]
 [4.7 3.2 1.3 0.2]
 [4.6 3.1 1.5 0.2]
 [5.  3.6 1.4 0.2]]


Definimos dos diccionarios.

- Diccionario de modelos.
- Diccionario de juegos de parámetros (GridSearch) a probar con cada modelo.



In [4]:
from sklearn.ensemble import (ExtraTreesClassifier, RandomForestClassifier, 
                              AdaBoostClassifier, GradientBoostingClassifier)
from sklearn.svm import SVC

models1 = { 
    'ExtraTreesClassifier': ExtraTreesClassifier(),
    'RandomForestClassifier': RandomForestClassifier(),
    'AdaBoostClassifier': AdaBoostClassifier(),
    'GradientBoostingClassifier': GradientBoostingClassifier(),
    'SVC': SVC()
}

params1 = { 
    'ExtraTreesClassifier': { 'n_estimators': [16, 32] },
    'RandomForestClassifier': { 'n_estimators': [16, 32] },
    'AdaBoostClassifier':  { 'n_estimators': [16, 32] },
    'GradientBoostingClassifier': { 'n_estimators': [16, 32], 'learning_rate': [0.8, 1.0] },
    'SVC': [
        {'kernel': ['linear'], 'C': [1, 10]},
        {'kernel': ['rbf'], 'C': [1, 10], 'gamma': [0.001, 0.0001]},
    ]
}

I create the helper and fit the data.

In [5]:
helper1 = EstimatorSelectionHelper(models1, params1)
helper1.fit(X_iris, y_iris, scoring='f1', n_jobs=-1)

Running GridSearchCV for ExtraTreesClassifier.
Fitting 3 folds for each of 2 candidates, totalling 6 fits


JoblibValueError: JoblibValueError
___________________________________________________________________________
Multiprocessing exception:
...........................................................................
/root/anaconda2/envs/py36/lib/python3.6/runpy.py in _run_module_as_main(mod_name='ipykernel_launcher', alter_argv=1)
    188         sys.exit(msg)
    189     main_globals = sys.modules["__main__"].__dict__
    190     if alter_argv:
    191         sys.argv[0] = mod_spec.origin
    192     return _run_code(code, main_globals, None,
--> 193                      "__main__", mod_spec)
        mod_spec = ModuleSpec(name='ipykernel_launcher', loader=<_f...b/python3.6/site-packages/ipykernel_launcher.py')
    194 
    195 def run_module(mod_name, init_globals=None,
    196                run_name=None, alter_sys=False):
    197     """Execute a module's code without importing it

...........................................................................
/root/anaconda2/envs/py36/lib/python3.6/runpy.py in _run_code(code=<code object <module> at 0x7fe508aa19c0, file "/...3.6/site-packages/ipykernel_launcher.py", line 5>, run_globals={'__annotations__': {}, '__builtins__': <module 'builtins' (built-in)>, '__cached__': '/root/anaconda2/envs/py36/lib/python3.6/site-packages/__pycache__/ipykernel_launcher.cpython-36.pyc', '__doc__': 'Entry point for launching an IPython kernel.\n\nTh...orts until\nafter removing the cwd from sys.path.\n', '__file__': '/root/anaconda2/envs/py36/lib/python3.6/site-packages/ipykernel_launcher.py', '__loader__': <_frozen_importlib_external.SourceFileLoader object>, '__name__': '__main__', '__package__': '', '__spec__': ModuleSpec(name='ipykernel_launcher', loader=<_f...b/python3.6/site-packages/ipykernel_launcher.py'), 'app': <module 'ipykernel.kernelapp' from '/root/anacon.../python3.6/site-packages/ipykernel/kernelapp.py'>, ...}, init_globals=None, mod_name='__main__', mod_spec=ModuleSpec(name='ipykernel_launcher', loader=<_f...b/python3.6/site-packages/ipykernel_launcher.py'), pkg_name='', script_name=None)
     80                        __cached__ = cached,
     81                        __doc__ = None,
     82                        __loader__ = loader,
     83                        __package__ = pkg_name,
     84                        __spec__ = mod_spec)
---> 85     exec(code, run_globals)
        code = <code object <module> at 0x7fe508aa19c0, file "/...3.6/site-packages/ipykernel_launcher.py", line 5>
        run_globals = {'__annotations__': {}, '__builtins__': <module 'builtins' (built-in)>, '__cached__': '/root/anaconda2/envs/py36/lib/python3.6/site-packages/__pycache__/ipykernel_launcher.cpython-36.pyc', '__doc__': 'Entry point for launching an IPython kernel.\n\nTh...orts until\nafter removing the cwd from sys.path.\n', '__file__': '/root/anaconda2/envs/py36/lib/python3.6/site-packages/ipykernel_launcher.py', '__loader__': <_frozen_importlib_external.SourceFileLoader object>, '__name__': '__main__', '__package__': '', '__spec__': ModuleSpec(name='ipykernel_launcher', loader=<_f...b/python3.6/site-packages/ipykernel_launcher.py'), 'app': <module 'ipykernel.kernelapp' from '/root/anacon.../python3.6/site-packages/ipykernel/kernelapp.py'>, ...}
     86     return run_globals
     87 
     88 def _run_module_code(code, init_globals=None,
     89                     mod_name=None, mod_spec=None,

...........................................................................
/root/anaconda2/envs/py36/lib/python3.6/site-packages/ipykernel_launcher.py in <module>()
     11     # This is added back by InteractiveShellApp.init_path()
     12     if sys.path[0] == '':
     13         del sys.path[0]
     14 
     15     from ipykernel import kernelapp as app
---> 16     app.launch_new_instance()

...........................................................................
/root/anaconda2/envs/py36/lib/python3.6/site-packages/traitlets/config/application.py in launch_instance(cls=<class 'ipykernel.kernelapp.IPKernelApp'>, argv=None, **kwargs={})
    653 
    654         If a global instance already exists, this reinitializes and starts it
    655         """
    656         app = cls.instance(**kwargs)
    657         app.initialize(argv)
--> 658         app.start()
        app.start = <bound method IPKernelApp.start of <ipykernel.kernelapp.IPKernelApp object>>
    659 
    660 #-----------------------------------------------------------------------------
    661 # utility functions, for convenience
    662 #-----------------------------------------------------------------------------

...........................................................................
/root/anaconda2/envs/py36/lib/python3.6/site-packages/ipykernel/kernelapp.py in start(self=<ipykernel.kernelapp.IPKernelApp object>)
    473         if self.poller is not None:
    474             self.poller.start()
    475         self.kernel.start()
    476         self.io_loop = ioloop.IOLoop.current()
    477         try:
--> 478             self.io_loop.start()
        self.io_loop.start = <bound method ZMQIOLoop.start of <zmq.eventloop.ioloop.ZMQIOLoop object>>
    479         except KeyboardInterrupt:
    480             pass
    481 
    482 launch_new_instance = IPKernelApp.launch_instance

...........................................................................
/root/anaconda2/envs/py36/lib/python3.6/site-packages/zmq/eventloop/ioloop.py in start(self=<zmq.eventloop.ioloop.ZMQIOLoop object>)
    172             )
    173         return loop
    174     
    175     def start(self):
    176         try:
--> 177             super(ZMQIOLoop, self).start()
        self.start = <bound method ZMQIOLoop.start of <zmq.eventloop.ioloop.ZMQIOLoop object>>
    178         except ZMQError as e:
    179             if e.errno == ETERM:
    180                 # quietly return on ETERM
    181                 pass

...........................................................................
/root/anaconda2/envs/py36/lib/python3.6/site-packages/tornado/ioloop.py in start(self=<zmq.eventloop.ioloop.ZMQIOLoop object>)
    883                 self._events.update(event_pairs)
    884                 while self._events:
    885                     fd, events = self._events.popitem()
    886                     try:
    887                         fd_obj, handler_func = self._handlers[fd]
--> 888                         handler_func(fd_obj, events)
        handler_func = <function wrap.<locals>.null_wrapper>
        fd_obj = <zmq.sugar.socket.Socket object>
        events = 5
    889                     except (OSError, IOError) as e:
    890                         if errno_from_exception(e) == errno.EPIPE:
    891                             # Happens when the client closes the connection
    892                             pass

...........................................................................
/root/anaconda2/envs/py36/lib/python3.6/site-packages/tornado/stack_context.py in null_wrapper(*args=(<zmq.sugar.socket.Socket object>, 5), **kwargs={})
    272         # Fast path when there are no active contexts.
    273         def null_wrapper(*args, **kwargs):
    274             try:
    275                 current_state = _state.contexts
    276                 _state.contexts = cap_contexts[0]
--> 277                 return fn(*args, **kwargs)
        args = (<zmq.sugar.socket.Socket object>, 5)
        kwargs = {}
    278             finally:
    279                 _state.contexts = current_state
    280         null_wrapper._wrapped = True
    281         return null_wrapper

...........................................................................
/root/anaconda2/envs/py36/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py in _handle_events(self=<zmq.eventloop.zmqstream.ZMQStream object>, fd=<zmq.sugar.socket.Socket object>, events=5)
    435             # dispatch events:
    436             if events & IOLoop.ERROR:
    437                 gen_log.error("got POLLERR event on ZMQStream, which doesn't make sense")
    438                 return
    439             if events & IOLoop.READ:
--> 440                 self._handle_recv()
        self._handle_recv = <bound method ZMQStream._handle_recv of <zmq.eventloop.zmqstream.ZMQStream object>>
    441                 if not self.socket:
    442                     return
    443             if events & IOLoop.WRITE:
    444                 self._handle_send()

...........................................................................
/root/anaconda2/envs/py36/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py in _handle_recv(self=<zmq.eventloop.zmqstream.ZMQStream object>)
    467                 gen_log.error("RECV Error: %s"%zmq.strerror(e.errno))
    468         else:
    469             if self._recv_callback:
    470                 callback = self._recv_callback
    471                 # self._recv_callback = None
--> 472                 self._run_callback(callback, msg)
        self._run_callback = <bound method ZMQStream._run_callback of <zmq.eventloop.zmqstream.ZMQStream object>>
        callback = <function wrap.<locals>.null_wrapper>
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    473                 
    474         # self.update_state()
    475         
    476 

...........................................................................
/root/anaconda2/envs/py36/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py in _run_callback(self=<zmq.eventloop.zmqstream.ZMQStream object>, callback=<function wrap.<locals>.null_wrapper>, *args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    409         close our socket."""
    410         try:
    411             # Use a NullContext to ensure that all StackContexts are run
    412             # inside our blanket exception handler rather than outside.
    413             with stack_context.NullContext():
--> 414                 callback(*args, **kwargs)
        callback = <function wrap.<locals>.null_wrapper>
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    415         except:
    416             gen_log.error("Uncaught exception, closing connection.",
    417                           exc_info=True)
    418             # Close the socket on an uncaught exception from a user callback

...........................................................................
/root/anaconda2/envs/py36/lib/python3.6/site-packages/tornado/stack_context.py in null_wrapper(*args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    272         # Fast path when there are no active contexts.
    273         def null_wrapper(*args, **kwargs):
    274             try:
    275                 current_state = _state.contexts
    276                 _state.contexts = cap_contexts[0]
--> 277                 return fn(*args, **kwargs)
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    278             finally:
    279                 _state.contexts = current_state
    280         null_wrapper._wrapped = True
    281         return null_wrapper

...........................................................................
/root/anaconda2/envs/py36/lib/python3.6/site-packages/ipykernel/kernelbase.py in dispatcher(msg=[<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>])
    278         if self.control_stream:
    279             self.control_stream.on_recv(self.dispatch_control, copy=False)
    280 
    281         def make_dispatcher(stream):
    282             def dispatcher(msg):
--> 283                 return self.dispatch_shell(stream, msg)
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    284             return dispatcher
    285 
    286         for s in self.shell_streams:
    287             s.on_recv(make_dispatcher(s), copy=False)

...........................................................................
/root/anaconda2/envs/py36/lib/python3.6/site-packages/ipykernel/kernelbase.py in dispatch_shell(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, msg={'buffers': [], 'content': {'allow_stdin': True, 'code': "helper1 = EstimatorSelectionHelper(models1, para...per1.fit(X_iris, y_iris, scoring='f1', n_jobs=-1)", 'silent': False, 'stop_on_error': True, 'store_history': True, 'user_expressions': {}}, 'header': {'date': datetime.datetime(2018, 4, 23, 8, 54, 46, 522037, tzinfo=tzutc()), 'msg_id': 'ca85a829e4e046069648ab17c80e6dbe', 'msg_type': 'execute_request', 'session': '00bde99bd4e44ca4851ab252493ca9e3', 'username': 'username', 'version': '5.2'}, 'metadata': {}, 'msg_id': 'ca85a829e4e046069648ab17c80e6dbe', 'msg_type': 'execute_request', 'parent_header': {}})
    228             self.log.warn("Unknown message type: %r", msg_type)
    229         else:
    230             self.log.debug("%s: %s", msg_type, msg)
    231             self.pre_handler_hook()
    232             try:
--> 233                 handler(stream, idents, msg)
        handler = <bound method Kernel.execute_request of <ipykernel.ipkernel.IPythonKernel object>>
        stream = <zmq.eventloop.zmqstream.ZMQStream object>
        idents = [b'00bde99bd4e44ca4851ab252493ca9e3']
        msg = {'buffers': [], 'content': {'allow_stdin': True, 'code': "helper1 = EstimatorSelectionHelper(models1, para...per1.fit(X_iris, y_iris, scoring='f1', n_jobs=-1)", 'silent': False, 'stop_on_error': True, 'store_history': True, 'user_expressions': {}}, 'header': {'date': datetime.datetime(2018, 4, 23, 8, 54, 46, 522037, tzinfo=tzutc()), 'msg_id': 'ca85a829e4e046069648ab17c80e6dbe', 'msg_type': 'execute_request', 'session': '00bde99bd4e44ca4851ab252493ca9e3', 'username': 'username', 'version': '5.2'}, 'metadata': {}, 'msg_id': 'ca85a829e4e046069648ab17c80e6dbe', 'msg_type': 'execute_request', 'parent_header': {}}
    234             except Exception:
    235                 self.log.error("Exception in message handler:", exc_info=True)
    236             finally:
    237                 self.post_handler_hook()

...........................................................................
/root/anaconda2/envs/py36/lib/python3.6/site-packages/ipykernel/kernelbase.py in execute_request(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, ident=[b'00bde99bd4e44ca4851ab252493ca9e3'], parent={'buffers': [], 'content': {'allow_stdin': True, 'code': "helper1 = EstimatorSelectionHelper(models1, para...per1.fit(X_iris, y_iris, scoring='f1', n_jobs=-1)", 'silent': False, 'stop_on_error': True, 'store_history': True, 'user_expressions': {}}, 'header': {'date': datetime.datetime(2018, 4, 23, 8, 54, 46, 522037, tzinfo=tzutc()), 'msg_id': 'ca85a829e4e046069648ab17c80e6dbe', 'msg_type': 'execute_request', 'session': '00bde99bd4e44ca4851ab252493ca9e3', 'username': 'username', 'version': '5.2'}, 'metadata': {}, 'msg_id': 'ca85a829e4e046069648ab17c80e6dbe', 'msg_type': 'execute_request', 'parent_header': {}})
    394         if not silent:
    395             self.execution_count += 1
    396             self._publish_execute_input(code, parent, self.execution_count)
    397 
    398         reply_content = self.do_execute(code, silent, store_history,
--> 399                                         user_expressions, allow_stdin)
        user_expressions = {}
        allow_stdin = True
    400 
    401         # Flush output before sending the reply.
    402         sys.stdout.flush()
    403         sys.stderr.flush()

...........................................................................
/root/anaconda2/envs/py36/lib/python3.6/site-packages/ipykernel/ipkernel.py in do_execute(self=<ipykernel.ipkernel.IPythonKernel object>, code="helper1 = EstimatorSelectionHelper(models1, para...per1.fit(X_iris, y_iris, scoring='f1', n_jobs=-1)", silent=False, store_history=True, user_expressions={}, allow_stdin=True)
    203 
    204         self._forward_input(allow_stdin)
    205 
    206         reply_content = {}
    207         try:
--> 208             res = shell.run_cell(code, store_history=store_history, silent=silent)
        res = undefined
        shell.run_cell = <bound method ZMQInteractiveShell.run_cell of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = "helper1 = EstimatorSelectionHelper(models1, para...per1.fit(X_iris, y_iris, scoring='f1', n_jobs=-1)"
        store_history = True
        silent = False
    209         finally:
    210             self._restore_input()
    211 
    212         if res.error_before_exec is not None:

...........................................................................
/root/anaconda2/envs/py36/lib/python3.6/site-packages/ipykernel/zmqshell.py in run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, *args=("helper1 = EstimatorSelectionHelper(models1, para...per1.fit(X_iris, y_iris, scoring='f1', n_jobs=-1)",), **kwargs={'silent': False, 'store_history': True})
    532             )
    533         self.payload_manager.write_payload(payload)
    534 
    535     def run_cell(self, *args, **kwargs):
    536         self._last_traceback = None
--> 537         return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
        self.run_cell = <bound method ZMQInteractiveShell.run_cell of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        args = ("helper1 = EstimatorSelectionHelper(models1, para...per1.fit(X_iris, y_iris, scoring='f1', n_jobs=-1)",)
        kwargs = {'silent': False, 'store_history': True}
    538 
    539     def _showtraceback(self, etype, evalue, stb):
    540         # try to preserve ordering of tracebacks and print statements
    541         sys.stdout.flush()

...........................................................................
/root/anaconda2/envs/py36/lib/python3.6/site-packages/IPython/core/interactiveshell.py in run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, raw_cell="helper1 = EstimatorSelectionHelper(models1, para...per1.fit(X_iris, y_iris, scoring='f1', n_jobs=-1)", store_history=True, silent=False, shell_futures=True)
   2723                 self.displayhook.exec_result = result
   2724 
   2725                 # Execute the user code
   2726                 interactivity = "none" if silent else self.ast_node_interactivity
   2727                 has_raised = self.run_ast_nodes(code_ast.body, cell_name,
-> 2728                    interactivity=interactivity, compiler=compiler, result=result)
        interactivity = 'last_expr'
        compiler = <IPython.core.compilerop.CachingCompiler object>
   2729                 
   2730                 self.last_execution_succeeded = not has_raised
   2731                 self.last_execution_result = result
   2732 

...........................................................................
/root/anaconda2/envs/py36/lib/python3.6/site-packages/IPython/core/interactiveshell.py in run_ast_nodes(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, nodelist=[<_ast.Assign object>, <_ast.Expr object>], cell_name='<ipython-input-5-d1740e7aad54>', interactivity='last', compiler=<IPython.core.compilerop.CachingCompiler object>, result=<ExecutionResult object at 7fe4d4e1cc88, executi..._before_exec=None error_in_exec=None result=None>)
   2851                     return True
   2852 
   2853             for i, node in enumerate(to_run_interactive):
   2854                 mod = ast.Interactive([node])
   2855                 code = compiler(mod, cell_name, "single")
-> 2856                 if self.run_code(code, result):
        self.run_code = <bound method InteractiveShell.run_code of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = <code object <module> at 0x7fe4d4de2030, file "<ipython-input-5-d1740e7aad54>", line 2>
        result = <ExecutionResult object at 7fe4d4e1cc88, executi..._before_exec=None error_in_exec=None result=None>
   2857                     return True
   2858 
   2859             # Flush softspace
   2860             if softspace(sys.stdout, 0):

...........................................................................
/root/anaconda2/envs/py36/lib/python3.6/site-packages/IPython/core/interactiveshell.py in run_code(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, code_obj=<code object <module> at 0x7fe4d4de2030, file "<ipython-input-5-d1740e7aad54>", line 2>, result=<ExecutionResult object at 7fe4d4e1cc88, executi..._before_exec=None error_in_exec=None result=None>)
   2905         outflag = True  # happens in more places, so it's easier as default
   2906         try:
   2907             try:
   2908                 self.hooks.pre_run_code_hook()
   2909                 #rprint('Running code', repr(code_obj)) # dbg
-> 2910                 exec(code_obj, self.user_global_ns, self.user_ns)
        code_obj = <code object <module> at 0x7fe4d4de2030, file "<ipython-input-5-d1740e7aad54>", line 2>
        self.user_global_ns = {'AdaBoostClassifier': <class 'sklearn.ensemble.weight_boosting.AdaBoostClassifier'>, 'EstimatorSelectionHelper': <class '__main__.EstimatorSelectionHelper'>, 'ExtraTreesClassifier': <class 'sklearn.ensemble.forest.ExtraTreesClassifier'>, 'GradientBoostingClassifier': <class 'sklearn.ensemble.gradient_boosting.GradientBoostingClassifier'>, 'GridSearchCV': <class 'sklearn.grid_search.GridSearchCV'>, 'IPython': <module 'IPython' from '/root/anaconda2/envs/py36/lib/python3.6/site-packages/IPython/__init__.py'>, 'In': ['', 'import sys\nimport IPython\nimport numpy as np\nimport pandas as pd\nimport sklearn as sk', 'from sklearn.grid_search import GridSearchCV\n\ncl...t in columns]\n        \n        return df[columns]', 'from sklearn import datasets\n\niris = datasets.lo...rget\n\nprint ("Los datos son : " , iris.data[0:5])', "from sklearn.ensemble import (ExtraTreesClassifi... 'C': [1, 10], 'gamma': [0.001, 0.0001]},\n    ]\n}", "helper1 = EstimatorSelectionHelper(models1, para...per1.fit(X_iris, y_iris, scoring='f1', n_jobs=-1)"], 'Out': {}, 'RandomForestClassifier': <class 'sklearn.ensemble.forest.RandomForestClassifier'>, 'SVC': <class 'sklearn.svm.classes.SVC'>, ...}
        self.user_ns = {'AdaBoostClassifier': <class 'sklearn.ensemble.weight_boosting.AdaBoostClassifier'>, 'EstimatorSelectionHelper': <class '__main__.EstimatorSelectionHelper'>, 'ExtraTreesClassifier': <class 'sklearn.ensemble.forest.ExtraTreesClassifier'>, 'GradientBoostingClassifier': <class 'sklearn.ensemble.gradient_boosting.GradientBoostingClassifier'>, 'GridSearchCV': <class 'sklearn.grid_search.GridSearchCV'>, 'IPython': <module 'IPython' from '/root/anaconda2/envs/py36/lib/python3.6/site-packages/IPython/__init__.py'>, 'In': ['', 'import sys\nimport IPython\nimport numpy as np\nimport pandas as pd\nimport sklearn as sk', 'from sklearn.grid_search import GridSearchCV\n\ncl...t in columns]\n        \n        return df[columns]', 'from sklearn import datasets\n\niris = datasets.lo...rget\n\nprint ("Los datos son : " , iris.data[0:5])', "from sklearn.ensemble import (ExtraTreesClassifi... 'C': [1, 10], 'gamma': [0.001, 0.0001]},\n    ]\n}", "helper1 = EstimatorSelectionHelper(models1, para...per1.fit(X_iris, y_iris, scoring='f1', n_jobs=-1)"], 'Out': {}, 'RandomForestClassifier': <class 'sklearn.ensemble.forest.RandomForestClassifier'>, 'SVC': <class 'sklearn.svm.classes.SVC'>, ...}
   2911             finally:
   2912                 # Reset our crash handler in place
   2913                 sys.excepthook = old_excepthook
   2914         except SystemExit as e:

...........................................................................
/home/curso/python-machine-learning-book-2nd-edition/ejercicios/<ipython-input-5-d1740e7aad54> in <module>()
      1 helper1 = EstimatorSelectionHelper(models1, params1)
----> 2 helper1.fit(X_iris, y_iris, scoring='f1', n_jobs=-1)

...........................................................................
/home/curso/python-machine-learning-book-2nd-edition/ejercicios/<ipython-input-2-f9b5431195ed> in fit(self=<__main__.EstimatorSelectionHelper object>, X=array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1....2, 3.4, 5.4, 2.3],
       [5.9, 3. , 5.1, 1.8]]), y=array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,... 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]), cv=3, n_jobs=-1, verbose=1, scoring='f1', refit=False)
     15             print("Running GridSearchCV for %s." % key)
     16             model = self.models[key]
     17             params = self.params[key]
     18             gs = GridSearchCV(model, params, cv=cv, n_jobs=n_jobs, 
     19                               verbose=verbose, scoring=scoring, refit=refit)
---> 20             gs.fit(X,y)
     21             self.grid_searches[key] = gs    
     22     
     23     def score_summary(self, sort_by='mean_score'):
     24         def row(key, scores, params):

...........................................................................
/root/anaconda2/envs/py36/lib/python3.6/site-packages/sklearn/grid_search.py in fit(self=GridSearchCV(cv=3, error_score='raise',
       e...bs',
       refit=False, scoring='f1', verbose=1), X=array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1....2, 3.4, 5.4, 2.3],
       [5.9, 3. , 5.1, 1.8]]), y=array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,... 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]))
    833         y : array-like, shape = [n_samples] or [n_samples, n_output], optional
    834             Target relative to X for classification or regression;
    835             None for unsupervised learning.
    836 
    837         """
--> 838         return self._fit(X, y, ParameterGrid(self.param_grid))
        self._fit = <bound method BaseSearchCV._fit of GridSearchCV(...s',
       refit=False, scoring='f1', verbose=1)>
        X = array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1....2, 3.4, 5.4, 2.3],
       [5.9, 3. , 5.1, 1.8]])
        y = array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,... 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
        self.param_grid = {'n_estimators': [16, 32]}
    839 
    840 
    841 class RandomizedSearchCV(BaseSearchCV):
    842     """Randomized search on hyper parameters.

...........................................................................
/root/anaconda2/envs/py36/lib/python3.6/site-packages/sklearn/grid_search.py in _fit(self=GridSearchCV(cv=3, error_score='raise',
       e...bs',
       refit=False, scoring='f1', verbose=1), X=array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1....2, 3.4, 5.4, 2.3],
       [5.9, 3. , 5.1, 1.8]]), y=array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,... 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]), parameter_iterable=<sklearn.grid_search.ParameterGrid object>)
    569         )(
    570             delayed(_fit_and_score)(clone(base_estimator), X, y, self.scorer_,
    571                                     train, test, self.verbose, parameters,
    572                                     self.fit_params, return_parameters=True,
    573                                     error_score=self.error_score)
--> 574                 for parameters in parameter_iterable
        parameters = undefined
        parameter_iterable = <sklearn.grid_search.ParameterGrid object>
    575                 for train, test in cv)
    576 
    577         # Out is a list of triplet: score, estimator, n_test_samples
    578         n_fits = len(out)

...........................................................................
/root/anaconda2/envs/py36/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in __call__(self=Parallel(n_jobs=-1), iterable=<generator object BaseSearchCV._fit.<locals>.<genexpr>>)
    784             if pre_dispatch == "all" or n_jobs == 1:
    785                 # The iterable was consumed all at once by the above for loop.
    786                 # No need to wait for async callbacks to trigger to
    787                 # consumption.
    788                 self._iterating = False
--> 789             self.retrieve()
        self.retrieve = <bound method Parallel.retrieve of Parallel(n_jobs=-1)>
    790             # Make sure that we get a last message telling us we are done
    791             elapsed_time = time.time() - self._start_time
    792             self._print('Done %3i out of %3i | elapsed: %s finished',
    793                         (len(self._output), len(self._output),

---------------------------------------------------------------------------
Sub-process traceback:
---------------------------------------------------------------------------
ValueError                                         Mon Apr 23 10:54:47 2018
PID: 23422               Python 3.6.4: /root/anaconda2/envs/py36/bin/python
...........................................................................
/root/anaconda2/envs/py36/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in __call__(self=<sklearn.externals.joblib.parallel.BatchedCalls object>)
    126     def __init__(self, iterator_slice):
    127         self.items = list(iterator_slice)
    128         self._size = len(self.items)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
        self.items = [(<function _fit_and_score>, (ExtraTreesClassifier(bootstrap=False, class_weig..., random_state=None, verbose=0, warm_start=False), array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1....2, 3.4, 5.4, 2.3],
       [5.9, 3. , 5.1, 1.8]]), array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,... 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]), make_scorer(f1_score), array([ 17,  18,  19,  20,  21,  22,  23,  24,  ...,
       142, 143, 144, 145, 146, 147, 148, 149]), array([  0,   1,   2,   3,   4,   5,   6,   7,  ...07, 108, 109, 110, 111, 112, 113, 114, 115, 116]), 1, {'n_estimators': 16}, {}), {'error_score': 'raise', 'return_parameters': True})]
    132 
    133     def __len__(self):
    134         return self._size
    135 

...........................................................................
/root/anaconda2/envs/py36/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in <listcomp>(.0=<list_iterator object>)
    126     def __init__(self, iterator_slice):
    127         self.items = list(iterator_slice)
    128         self._size = len(self.items)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
        func = <function _fit_and_score>
        args = (ExtraTreesClassifier(bootstrap=False, class_weig..., random_state=None, verbose=0, warm_start=False), array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1....2, 3.4, 5.4, 2.3],
       [5.9, 3. , 5.1, 1.8]]), array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,... 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]), make_scorer(f1_score), array([ 17,  18,  19,  20,  21,  22,  23,  24,  ...,
       142, 143, 144, 145, 146, 147, 148, 149]), array([  0,   1,   2,   3,   4,   5,   6,   7,  ...07, 108, 109, 110, 111, 112, 113, 114, 115, 116]), 1, {'n_estimators': 16}, {})
        kwargs = {'error_score': 'raise', 'return_parameters': True}
    132 
    133     def __len__(self):
    134         return self._size
    135 

...........................................................................
/root/anaconda2/envs/py36/lib/python3.6/site-packages/sklearn/cross_validation.py in _fit_and_score(estimator=ExtraTreesClassifier(bootstrap=False, class_weig..., random_state=None, verbose=0, warm_start=False), X=array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1....2, 3.4, 5.4, 2.3],
       [5.9, 3. , 5.1, 1.8]]), y=array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,... 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]), scorer=make_scorer(f1_score), train=array([ 17,  18,  19,  20,  21,  22,  23,  24,  ...,
       142, 143, 144, 145, 146, 147, 148, 149]), test=array([  0,   1,   2,   3,   4,   5,   6,   7,  ...07, 108, 109, 110, 111, 112, 113, 114, 115, 116]), verbose=1, parameters={'n_estimators': 16}, fit_params={}, return_train_score=False, return_parameters=True, error_score='raise')
   1689                              " numeric value. (Hint: if using 'raise', please"
   1690                              " make sure that it has been spelled correctly.)"
   1691                              )
   1692 
   1693     else:
-> 1694         test_score = _score(estimator, X_test, y_test, scorer)
        test_score = undefined
        estimator = ExtraTreesClassifier(bootstrap=False, class_weig..., random_state=None, verbose=0, warm_start=False)
        X_test = array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1....4, 3.2, 5.3, 2.3],
       [6.5, 3. , 5.5, 1.8]])
        y_test = array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2])
        scorer = make_scorer(f1_score)
   1695         if return_train_score:
   1696             train_score = _score(estimator, X_train, y_train, scorer)
   1697 
   1698     scoring_time = time.time() - start_time

...........................................................................
/root/anaconda2/envs/py36/lib/python3.6/site-packages/sklearn/cross_validation.py in _score(estimator=ExtraTreesClassifier(bootstrap=False, class_weig..., random_state=None, verbose=0, warm_start=False), X_test=array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1....4, 3.2, 5.3, 2.3],
       [6.5, 3. , 5.5, 1.8]]), y_test=array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2]), scorer=make_scorer(f1_score))
   1746 def _score(estimator, X_test, y_test, scorer):
   1747     """Compute the score of an estimator on a given test set."""
   1748     if y_test is None:
   1749         score = scorer(estimator, X_test)
   1750     else:
-> 1751         score = scorer(estimator, X_test, y_test)
        score = undefined
        scorer = make_scorer(f1_score)
        estimator = ExtraTreesClassifier(bootstrap=False, class_weig..., random_state=None, verbose=0, warm_start=False)
        X_test = array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1....4, 3.2, 5.3, 2.3],
       [6.5, 3. , 5.5, 1.8]])
        y_test = array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2])
   1752     if hasattr(score, 'item'):
   1753         try:
   1754             # e.g. unwrap memmapped scalars
   1755             score = score.item()

...........................................................................
/root/anaconda2/envs/py36/lib/python3.6/site-packages/sklearn/metrics/scorer.py in __call__(self=make_scorer(f1_score), estimator=ExtraTreesClassifier(bootstrap=False, class_weig..., random_state=None, verbose=0, warm_start=False), X=array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1....4, 3.2, 5.3, 2.3],
       [6.5, 3. , 5.5, 1.8]]), y_true=array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2]), sample_weight=None)
    103             return self._sign * self._score_func(y_true, y_pred,
    104                                                  sample_weight=sample_weight,
    105                                                  **self._kwargs)
    106         else:
    107             return self._sign * self._score_func(y_true, y_pred,
--> 108                                                  **self._kwargs)
        self._kwargs = {}
    109 
    110 
    111 class _ProbaScorer(_BaseScorer):
    112     def __call__(self, clf, X, y, sample_weight=None):

...........................................................................
/root/anaconda2/envs/py36/lib/python3.6/site-packages/sklearn/metrics/classification.py in f1_score(y_true=array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2]), y_pred=array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...2, 2, 2, 1, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2]), labels=None, pos_label=1, average='binary', sample_weight=None)
    709 
    710 
    711     """
    712     return fbeta_score(y_true, y_pred, 1, labels=labels,
    713                        pos_label=pos_label, average=average,
--> 714                        sample_weight=sample_weight)
        sample_weight = None
    715 
    716 
    717 def fbeta_score(y_true, y_pred, beta, labels=None, pos_label=1,
    718                 average='binary', sample_weight=None):

...........................................................................
/root/anaconda2/envs/py36/lib/python3.6/site-packages/sklearn/metrics/classification.py in fbeta_score(y_true=array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2]), y_pred=array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...2, 2, 2, 1, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2]), beta=1, labels=None, pos_label=1, average='binary', sample_weight=None)
    823                                                  beta=beta,
    824                                                  labels=labels,
    825                                                  pos_label=pos_label,
    826                                                  average=average,
    827                                                  warn_for=('f-score',),
--> 828                                                  sample_weight=sample_weight)
        sample_weight = None
    829     return f
    830 
    831 
    832 def _prf_divide(numerator, denominator, metric, modifier, average, warn_for):

...........................................................................
/root/anaconda2/envs/py36/lib/python3.6/site-packages/sklearn/metrics/classification.py in precision_recall_fscore_support(y_true=array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2]), y_pred=array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...2, 2, 2, 1, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2]), beta=1, labels=None, pos_label=1, average='binary', warn_for=('f-score',), sample_weight=None)
   1035                     raise ValueError("pos_label=%r is not a valid label: %r" %
   1036                                      (pos_label, present_labels))
   1037             labels = [pos_label]
   1038         else:
   1039             raise ValueError("Target is %s but average='binary'. Please "
-> 1040                              "choose another average setting." % y_type)
        y_type = 'multiclass'
   1041     elif pos_label not in (None, 1):
   1042         warnings.warn("Note that pos_label (set to %r) is ignored when "
   1043                       "average != 'binary' (got %r). You may use "
   1044                       "labels=[pos_label] to specify a single positive class."

ValueError: Target is multiclass but average='binary'. Please choose another average setting.
___________________________________________________________________________

Finally, I print the summary.

In [None]:
helper1.score_summary(sort_by='min_score')

Regression example
----

I load the data.

In [None]:
diabetes = datasets.load_diabetes()
X_diabetes = diabetes.data
y_diabetes = diabetes.target

I define the models and the grid search parameters.

In [None]:
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.linear_model import LinearRegression, Ridge, Lasso

models2 = { 
    'LinearRegression': LinearRegression(),
    'Ridge': Ridge(),
    'Lasso': Lasso()
}

params2 = { 
    'LinearRegression': { },
    'Ridge': { 'alpha': [0.1, 1.0] },
    'Lasso': { 'alpha': [0.1, 1.0] }
}

I create the helper and fit the data.

In [None]:
helper2 = EstimatorSelectionHelper(models2, params2)
helper2.fit(X_diabetes, y_diabetes, n_jobs=-1)

Finally, I print the summary.

In [None]:
helper2.score_summary()