# Modeling

In [16]:
%matplotlib inline
import cPickle
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.cross_validation import train_test_split
from sklearn.grid_search import GridSearchCV, RandomizedSearchCV
from sklearn.metrics import log_loss
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.svm import LinearSVC

Grab the engineered data

In [2]:
def read_pickle(file_name):
    f = open(file_name, 'rb')
    p = cPickle.load(f)
    f.close()
    return p


train = read_pickle('data/train.engineered')
test = read_pickle('data/test.engineered')
outcomes = read_pickle('data/outcomes.engineered')
outcomes_le = read_pickle('data/outcomes_le.engineered')

Split the `train` data into training/test sets using the hold-out method. Though there is a DataFrame labeled `test`, this is really the set that we want to make predictions against (and, we don't have labeled examples for this set).

In [3]:
X_train, X_test, y_train, y_test = train_test_split(
    np.array(train), outcomes, test_size = 0.2, random_state = 10)

## Baseline model

Though I suspect other models will make more accurate predictions, let me quickly try out a logistic regression model w/ different regularization hyperparameters.

In [7]:
def train_test_model(model, hyperparameters, X_train, X_test, y_train, y_test):
    """
    Given a [model] and a set of possible [hyperparameters], along with 
    matricies corresponding to hold-out cross-validation, returns a model w/ 
    optimized hyperparameters using log-loss scoring and 5-fold cross-validation.
    """
    optimized_model = GridSearchCV(
        model, hyperparameters, cv = 5, n_jobs = -1, scoring = 'log_loss')
    optimized_model.fit(X_train, y_train)
    print 'Optimized parameters:', optimized_model.best_params_
    print 'Log loss:', np.absolute(optimized_model.score(X_test, y_test))
    return optimized_model


def create_submission(name, model, train, outcomes, outcomes_le, test):
    """
    Train [model] on [train] and predict the probabilties on [test], and
    format the submission according to Kaggle.
    """
    clf = model.best_estimator_
    clf.fit(np.array(train), outcomes)
    probs = clf.predict_proba(np.array(test))
    results = pd.DataFrame(probs)
    results.columns = list(outcomes_le.inverse_transform(list(results)))
    results['ID'] = pd.read_csv('data/test.csv')[['ID']].astype(int)
    results = results[['ID', 'Adoption', 'Died', 'Euthanasia', 
                       'Return_to_owner', 'Transfer']]
    results.to_csv('submissions/' + name, index = False)
    return None

In [5]:
%%time
logit_model = train_test_model(
    LogisticRegression(), 
    {'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000], 'penalty': ['l1', 'l2']}, 
    X_train, X_test, y_train, y_test)

Optimized parameters: {'penalty': 'l1', 'C': 1}
Log loss: 0.88836929837
CPU times: user 12.3 s, sys: 48 ms, total: 12.4 s
Wall time: 2min 2s




In [8]:
create_submission('first_submission.csv', logit_model, 
                  train, outcomes, outcomes_le, test)

My estimate of the test error was much lower than the actual error - the log loss on the public leaderboard for this model is 1.83, compared to 0.89 here. Let's also try a logistic regression using an elastic net penalty instead of an L1 penalty.

In [11]:
%%time
logit_en_model = train_test_model(
    SGDClassifier(penalty = 'elasticnet', loss = 'log'), 
    {'alpha': [0.001, 0.01, 0.1, 1, 10, 100, 1000]}, 
    X_train, X_test, y_train, y_test)

Optimized parameters: {'alpha': 0.001}
Log loss: 0.896135463298
CPU times: user 2.43 s, sys: 56 ms, total: 2.48 s
Wall time: 10.2 s


The result was a higher error, time to try a more advanced model.

## SVM

I am guessing that a Random Forest will do a better job making predictions than an SVM, but I want to try an SVM first. Fist up is a linear SVM using stochastic gradient descent.

In [19]:
%%time
lin_svm_sgd_model = train_test_model(
    LinearSVC(), 
    {'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000]}, 
    X_train, X_test, y_train, y_test)

JoblibValueError: JoblibValueError
___________________________________________________________________________
Multiprocessing exception:
...........................................................................
/home/jake/miniconda2/envs/shelter-animals/lib/python2.7/runpy.py in _run_module_as_main(mod_name='ipykernel.__main__', alter_argv=1)
    157     pkg_name = mod_name.rpartition('.')[0]
    158     main_globals = sys.modules["__main__"].__dict__
    159     if alter_argv:
    160         sys.argv[0] = fname
    161     return _run_code(code, main_globals, None,
--> 162                      "__main__", fname, loader, pkg_name)
        fname = '/home/jake/miniconda2/envs/shelter-animals/lib/python2.7/site-packages/ipykernel/__main__.py'
        loader = <pkgutil.ImpLoader instance>
        pkg_name = 'ipykernel'
    163 
    164 def run_module(mod_name, init_globals=None,
    165                run_name=None, alter_sys=False):
    166     """Execute a module's code without importing it

...........................................................................
/home/jake/miniconda2/envs/shelter-animals/lib/python2.7/runpy.py in _run_code(code=<code object <module> at 0x7f6b89d72e30, file "/...2.7/site-packages/ipykernel/__main__.py", line 1>, run_globals={'__builtins__': <module '__builtin__' (built-in)>, '__doc__': None, '__file__': '/home/jake/miniconda2/envs/shelter-animals/lib/python2.7/site-packages/ipykernel/__main__.py', '__loader__': <pkgutil.ImpLoader instance>, '__name__': '__main__', '__package__': 'ipykernel', 'app': <module 'ipykernel.kernelapp' from '/home/jake/m...python2.7/site-packages/ipykernel/kernelapp.pyc'>}, init_globals=None, mod_name='__main__', mod_fname='/home/jake/miniconda2/envs/shelter-animals/lib/python2.7/site-packages/ipykernel/__main__.py', mod_loader=<pkgutil.ImpLoader instance>, pkg_name='ipykernel')
     67         run_globals.update(init_globals)
     68     run_globals.update(__name__ = mod_name,
     69                        __file__ = mod_fname,
     70                        __loader__ = mod_loader,
     71                        __package__ = pkg_name)
---> 72     exec code in run_globals
        code = <code object <module> at 0x7f6b89d72e30, file "/...2.7/site-packages/ipykernel/__main__.py", line 1>
        run_globals = {'__builtins__': <module '__builtin__' (built-in)>, '__doc__': None, '__file__': '/home/jake/miniconda2/envs/shelter-animals/lib/python2.7/site-packages/ipykernel/__main__.py', '__loader__': <pkgutil.ImpLoader instance>, '__name__': '__main__', '__package__': 'ipykernel', 'app': <module 'ipykernel.kernelapp' from '/home/jake/m...python2.7/site-packages/ipykernel/kernelapp.pyc'>}
     73     return run_globals
     74 
     75 def _run_module_code(code, init_globals=None,
     76                     mod_name=None, mod_fname=None,

...........................................................................
/home/jake/miniconda2/envs/shelter-animals/lib/python2.7/site-packages/ipykernel/__main__.py in <module>()
      1 
      2 
----> 3 
      4 if __name__ == '__main__':
      5     from ipykernel import kernelapp as app
      6     app.launch_new_instance()
      7 
      8 
      9 
     10 

...........................................................................
/home/jake/miniconda2/envs/shelter-animals/lib/python2.7/site-packages/traitlets/config/application.py in launch_instance(cls=<class 'ipykernel.kernelapp.IPKernelApp'>, argv=None, **kwargs={})
    591         
    592         If a global instance already exists, this reinitializes and starts it
    593         """
    594         app = cls.instance(**kwargs)
    595         app.initialize(argv)
--> 596         app.start()
        app.start = <bound method IPKernelApp.start of <ipykernel.kernelapp.IPKernelApp object>>
    597 
    598 #-----------------------------------------------------------------------------
    599 # utility functions, for convenience
    600 #-----------------------------------------------------------------------------

...........................................................................
/home/jake/miniconda2/envs/shelter-animals/lib/python2.7/site-packages/ipykernel/kernelapp.py in start(self=<ipykernel.kernelapp.IPKernelApp object>)
    437         
    438         if self.poller is not None:
    439             self.poller.start()
    440         self.kernel.start()
    441         try:
--> 442             ioloop.IOLoop.instance().start()
    443         except KeyboardInterrupt:
    444             pass
    445 
    446 launch_new_instance = IPKernelApp.launch_instance

...........................................................................
/home/jake/miniconda2/envs/shelter-animals/lib/python2.7/site-packages/zmq/eventloop/ioloop.py in start(self=<zmq.eventloop.ioloop.ZMQIOLoop object>)
    157             PollIOLoop.configure(ZMQIOLoop)
    158         return PollIOLoop.current(*args, **kwargs)
    159     
    160     def start(self):
    161         try:
--> 162             super(ZMQIOLoop, self).start()
        self.start = <bound method ZMQIOLoop.start of <zmq.eventloop.ioloop.ZMQIOLoop object>>
    163         except ZMQError as e:
    164             if e.errno == ETERM:
    165                 # quietly return on ETERM
    166                 pass

...........................................................................
/home/jake/miniconda2/envs/shelter-animals/lib/python2.7/site-packages/tornado/ioloop.py in start(self=<zmq.eventloop.ioloop.ZMQIOLoop object>)
    878                 self._events.update(event_pairs)
    879                 while self._events:
    880                     fd, events = self._events.popitem()
    881                     try:
    882                         fd_obj, handler_func = self._handlers[fd]
--> 883                         handler_func(fd_obj, events)
        handler_func = <function null_wrapper>
        fd_obj = <zmq.sugar.socket.Socket object>
        events = 1
    884                     except (OSError, IOError) as e:
    885                         if errno_from_exception(e) == errno.EPIPE:
    886                             # Happens when the client closes the connection
    887                             pass

...........................................................................
/home/jake/miniconda2/envs/shelter-animals/lib/python2.7/site-packages/tornado/stack_context.py in null_wrapper(*args=(<zmq.sugar.socket.Socket object>, 1), **kwargs={})
    270         # Fast path when there are no active contexts.
    271         def null_wrapper(*args, **kwargs):
    272             try:
    273                 current_state = _state.contexts
    274                 _state.contexts = cap_contexts[0]
--> 275                 return fn(*args, **kwargs)
        args = (<zmq.sugar.socket.Socket object>, 1)
        kwargs = {}
    276             finally:
    277                 _state.contexts = current_state
    278         null_wrapper._wrapped = True
    279         return null_wrapper

...........................................................................
/home/jake/miniconda2/envs/shelter-animals/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py in _handle_events(self=<zmq.eventloop.zmqstream.ZMQStream object>, fd=<zmq.sugar.socket.Socket object>, events=1)
    435             # dispatch events:
    436             if events & IOLoop.ERROR:
    437                 gen_log.error("got POLLERR event on ZMQStream, which doesn't make sense")
    438                 return
    439             if events & IOLoop.READ:
--> 440                 self._handle_recv()
        self._handle_recv = <bound method ZMQStream._handle_recv of <zmq.eventloop.zmqstream.ZMQStream object>>
    441                 if not self.socket:
    442                     return
    443             if events & IOLoop.WRITE:
    444                 self._handle_send()

...........................................................................
/home/jake/miniconda2/envs/shelter-animals/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py in _handle_recv(self=<zmq.eventloop.zmqstream.ZMQStream object>)
    467                 gen_log.error("RECV Error: %s"%zmq.strerror(e.errno))
    468         else:
    469             if self._recv_callback:
    470                 callback = self._recv_callback
    471                 # self._recv_callback = None
--> 472                 self._run_callback(callback, msg)
        self._run_callback = <bound method ZMQStream._run_callback of <zmq.eventloop.zmqstream.ZMQStream object>>
        callback = <function null_wrapper>
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    473                 
    474         # self.update_state()
    475         
    476 

...........................................................................
/home/jake/miniconda2/envs/shelter-animals/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py in _run_callback(self=<zmq.eventloop.zmqstream.ZMQStream object>, callback=<function null_wrapper>, *args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    409         close our socket."""
    410         try:
    411             # Use a NullContext to ensure that all StackContexts are run
    412             # inside our blanket exception handler rather than outside.
    413             with stack_context.NullContext():
--> 414                 callback(*args, **kwargs)
        callback = <function null_wrapper>
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    415         except:
    416             gen_log.error("Uncaught exception, closing connection.",
    417                           exc_info=True)
    418             # Close the socket on an uncaught exception from a user callback

...........................................................................
/home/jake/miniconda2/envs/shelter-animals/lib/python2.7/site-packages/tornado/stack_context.py in null_wrapper(*args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    270         # Fast path when there are no active contexts.
    271         def null_wrapper(*args, **kwargs):
    272             try:
    273                 current_state = _state.contexts
    274                 _state.contexts = cap_contexts[0]
--> 275                 return fn(*args, **kwargs)
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    276             finally:
    277                 _state.contexts = current_state
    278         null_wrapper._wrapped = True
    279         return null_wrapper

...........................................................................
/home/jake/miniconda2/envs/shelter-animals/lib/python2.7/site-packages/ipykernel/kernelbase.py in dispatcher(msg=[<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>])
    271         if self.control_stream:
    272             self.control_stream.on_recv(self.dispatch_control, copy=False)
    273 
    274         def make_dispatcher(stream):
    275             def dispatcher(msg):
--> 276                 return self.dispatch_shell(stream, msg)
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    277             return dispatcher
    278 
    279         for s in self.shell_streams:
    280             s.on_recv(make_dispatcher(s), copy=False)

...........................................................................
/home/jake/miniconda2/envs/shelter-animals/lib/python2.7/site-packages/ipykernel/kernelbase.py in dispatch_shell(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, msg={'buffers': [], 'content': {u'allow_stdin': True, u'code': u"%%time\nlin_svm_sgd_model = train_test_model(\...10000]}, \n    X_train, X_test, y_train, y_test)", u'silent': False, u'stop_on_error': True, u'store_history': True, u'user_expressions': {}}, 'header': {'date': '2016-06-02T21:20:43.656075', u'msg_id': u'DD694A6CE74D4BFE8281E40469656CFE', u'msg_type': u'execute_request', u'session': u'BCA0561FD636481F83ED377C8E2B9391', u'username': u'username', u'version': u'5.0'}, 'metadata': {}, 'msg_id': u'DD694A6CE74D4BFE8281E40469656CFE', 'msg_type': u'execute_request', 'parent_header': {}})
    223             self.log.error("UNKNOWN MESSAGE TYPE: %r", msg_type)
    224         else:
    225             self.log.debug("%s: %s", msg_type, msg)
    226             self.pre_handler_hook()
    227             try:
--> 228                 handler(stream, idents, msg)
        handler = <bound method IPythonKernel.execute_request of <ipykernel.ipkernel.IPythonKernel object>>
        stream = <zmq.eventloop.zmqstream.ZMQStream object>
        idents = ['BCA0561FD636481F83ED377C8E2B9391']
        msg = {'buffers': [], 'content': {u'allow_stdin': True, u'code': u"%%time\nlin_svm_sgd_model = train_test_model(\...10000]}, \n    X_train, X_test, y_train, y_test)", u'silent': False, u'stop_on_error': True, u'store_history': True, u'user_expressions': {}}, 'header': {'date': '2016-06-02T21:20:43.656075', u'msg_id': u'DD694A6CE74D4BFE8281E40469656CFE', u'msg_type': u'execute_request', u'session': u'BCA0561FD636481F83ED377C8E2B9391', u'username': u'username', u'version': u'5.0'}, 'metadata': {}, 'msg_id': u'DD694A6CE74D4BFE8281E40469656CFE', 'msg_type': u'execute_request', 'parent_header': {}}
    229             except Exception:
    230                 self.log.error("Exception in message handler:", exc_info=True)
    231             finally:
    232                 self.post_handler_hook()

...........................................................................
/home/jake/miniconda2/envs/shelter-animals/lib/python2.7/site-packages/ipykernel/kernelbase.py in execute_request(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, ident=['BCA0561FD636481F83ED377C8E2B9391'], parent={'buffers': [], 'content': {u'allow_stdin': True, u'code': u"%%time\nlin_svm_sgd_model = train_test_model(\...10000]}, \n    X_train, X_test, y_train, y_test)", u'silent': False, u'stop_on_error': True, u'store_history': True, u'user_expressions': {}}, 'header': {'date': '2016-06-02T21:20:43.656075', u'msg_id': u'DD694A6CE74D4BFE8281E40469656CFE', u'msg_type': u'execute_request', u'session': u'BCA0561FD636481F83ED377C8E2B9391', u'username': u'username', u'version': u'5.0'}, 'metadata': {}, 'msg_id': u'DD694A6CE74D4BFE8281E40469656CFE', 'msg_type': u'execute_request', 'parent_header': {}})
    386         if not silent:
    387             self.execution_count += 1
    388             self._publish_execute_input(code, parent, self.execution_count)
    389 
    390         reply_content = self.do_execute(code, silent, store_history,
--> 391                                         user_expressions, allow_stdin)
        user_expressions = {}
        allow_stdin = True
    392 
    393         # Flush output before sending the reply.
    394         sys.stdout.flush()
    395         sys.stderr.flush()

...........................................................................
/home/jake/miniconda2/envs/shelter-animals/lib/python2.7/site-packages/ipykernel/ipkernel.py in do_execute(self=<ipykernel.ipkernel.IPythonKernel object>, code=u"%%time\nlin_svm_sgd_model = train_test_model(\...10000]}, \n    X_train, X_test, y_train, y_test)", silent=False, store_history=True, user_expressions={}, allow_stdin=True)
    194 
    195         reply_content = {}
    196         # FIXME: the shell calls the exception handler itself.
    197         shell._reply_content = None
    198         try:
--> 199             shell.run_cell(code, store_history=store_history, silent=silent)
        shell.run_cell = <bound method ZMQInteractiveShell.run_cell of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = u"%%time\nlin_svm_sgd_model = train_test_model(\...10000]}, \n    X_train, X_test, y_train, y_test)"
        store_history = True
        silent = False
    200         except:
    201             status = u'error'
    202             # FIXME: this code right now isn't being used yet by default,
    203             # because the run_cell() call above directly fires off exception

...........................................................................
/home/jake/miniconda2/envs/shelter-animals/lib/python2.7/site-packages/IPython/core/interactiveshell.py in run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, raw_cell=u"%%time\nlin_svm_sgd_model = train_test_model(\...10000]}, \n    X_train, X_test, y_train, y_test)", store_history=True, silent=False, shell_futures=True)
   2718                 self.displayhook.exec_result = result
   2719 
   2720                 # Execute the user code
   2721                 interactivity = "none" if silent else self.ast_node_interactivity
   2722                 self.run_ast_nodes(code_ast.body, cell_name,
-> 2723                    interactivity=interactivity, compiler=compiler, result=result)
        interactivity = 'last_expr'
        compiler = <IPython.core.compilerop.CachingCompiler instance>
   2724 
   2725                 # Reset this so later displayed values do not modify the
   2726                 # ExecutionResult
   2727                 self.displayhook.exec_result = None

...........................................................................
/home/jake/miniconda2/envs/shelter-animals/lib/python2.7/site-packages/IPython/core/interactiveshell.py in run_ast_nodes(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, nodelist=[<_ast.Expr object>], cell_name='<ipython-input-19-36e3fd199d44>', interactivity='last', compiler=<IPython.core.compilerop.CachingCompiler instance>, result=<IPython.core.interactiveshell.ExecutionResult object>)
   2826                     return True
   2827 
   2828             for i, node in enumerate(to_run_interactive):
   2829                 mod = ast.Interactive([node])
   2830                 code = compiler(mod, cell_name, "single")
-> 2831                 if self.run_code(code, result):
        self.run_code = <bound method ZMQInteractiveShell.run_code of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = <code object <module> at 0x7f6b4d675930, file "<ipython-input-19-36e3fd199d44>", line 1>
        result = <IPython.core.interactiveshell.ExecutionResult object>
   2832                     return True
   2833 
   2834             # Flush softspace
   2835             if softspace(sys.stdout, 0):

...........................................................................
/home/jake/miniconda2/envs/shelter-animals/lib/python2.7/site-packages/IPython/core/interactiveshell.py in run_code(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, code_obj=<code object <module> at 0x7f6b4d675930, file "<ipython-input-19-36e3fd199d44>", line 1>, result=<IPython.core.interactiveshell.ExecutionResult object>)
   2880         outflag = 1  # happens in more places, so it's easier as default
   2881         try:
   2882             try:
   2883                 self.hooks.pre_run_code_hook()
   2884                 #rprint('Running code', repr(code_obj)) # dbg
-> 2885                 exec(code_obj, self.user_global_ns, self.user_ns)
        code_obj = <code object <module> at 0x7f6b4d675930, file "<ipython-input-19-36e3fd199d44>", line 1>
        self.user_global_ns = {'GridSearchCV': <class 'sklearn.grid_search.GridSearchCV'>, 'In': ['', u'import cPickle\nimport numpy as np\nimport pan...m sklearn.linear_model import LogisticRegression', u"def read_pickle(file_name):\n    f = open(file..._le = read_pickle('data/outcomes_le.engineered')", u'X_train, X_test, y_train, y_test = train_test_...), outcomes, test_size = 0.2, random_state = 10)', u'def train_test_model(model, hyperparameters, X...sions/\' + name, index = False)\n    return None', u'get_ipython().run_cell_magic(u\'time\', u\'\',...\']}, \\n    X_train, X_test, y_train, y_test)")', u"create_submission('first_submission.csv', logi...             train, outcomes, outcomes_le, test)", u'def train_test_model(model, hyperparameters, X...sions/\' + name, index = False)\n    return None', u"create_submission('first_submission.csv', logi...             train, outcomes, outcomes_le, test)", u'import cPickle\nimport numpy as np\nimport pan...r_model import LogisticRegression, SGDClassifier', u'get_ipython().run_cell_magic(u\'time\', u\'\',...00]}, \\n    X_train, X_test, y_train, y_test)")', u'get_ipython().run_cell_magic(u\'time\', u\'\',...00]}, \\n    X_train, X_test, y_train, y_test)")', u'get_ipython().run_cell_magic(u\'time\', u\'\',...00]}, \\n    X_train, X_test, y_train, y_test)")', u'import cPickle\nimport numpy as np\nimport pan...SGDClassifier\nfrom sklearn.svm import LinearSVC', u"get_ipython().magic(u'matplotlib inlineZ')\n\n...SGDClassifier\nfrom sklearn.svm import LinearSVC", u"get_ipython().magic(u'matplotlib inline')\n\ni...SGDClassifier\nfrom sklearn.svm import LinearSVC", u"get_ipython().magic(u'matplotlib inline')\nimp...SGDClassifier\nfrom sklearn.svm import LinearSVC", u'np.logspace(-2, 10, 10)', u'np.logspace(-2, 10, 5)', u'get_ipython().run_cell_magic(u\'time\', u\'\',...00]}, \\n    X_train, X_test, y_train, y_test)")'], 'LinearSVC': <class 'sklearn.svm.classes.LinearSVC'>, 'LogisticRegression': <class 'sklearn.linear_model.logistic.LogisticRegression'>, 'Out': {17: array([  1.00000000e-02,   2.15443469e-01,   4.6...e+07,   4.64158883e+08,
         1.00000000e+10]), 18: array([  1.00000000e-02,   1.00000000e+01,   1.0...e+04,
         1.00000000e+07,   1.00000000e+10])}, 'RandomizedSearchCV': <class 'sklearn.grid_search.RandomizedSearchCV'>, 'SGDClassifier': <class 'sklearn.linear_model.stochastic_gradient.SGDClassifier'>, 'X_test': array([[ 0.        ,  2.988028  ,  0.        , ....  0.        ,
         1.        ,  0.        ]]), 'X_train': array([[ 0.        , -0.07773497,  0.        , ....  0.        ,
         1.        ,  0.        ]]), '_': array([  1.00000000e-02,   1.00000000e+01,   1.0...e+04,
         1.00000000e+07,   1.00000000e+10]), ...}
        self.user_ns = {'GridSearchCV': <class 'sklearn.grid_search.GridSearchCV'>, 'In': ['', u'import cPickle\nimport numpy as np\nimport pan...m sklearn.linear_model import LogisticRegression', u"def read_pickle(file_name):\n    f = open(file..._le = read_pickle('data/outcomes_le.engineered')", u'X_train, X_test, y_train, y_test = train_test_...), outcomes, test_size = 0.2, random_state = 10)', u'def train_test_model(model, hyperparameters, X...sions/\' + name, index = False)\n    return None', u'get_ipython().run_cell_magic(u\'time\', u\'\',...\']}, \\n    X_train, X_test, y_train, y_test)")', u"create_submission('first_submission.csv', logi...             train, outcomes, outcomes_le, test)", u'def train_test_model(model, hyperparameters, X...sions/\' + name, index = False)\n    return None', u"create_submission('first_submission.csv', logi...             train, outcomes, outcomes_le, test)", u'import cPickle\nimport numpy as np\nimport pan...r_model import LogisticRegression, SGDClassifier', u'get_ipython().run_cell_magic(u\'time\', u\'\',...00]}, \\n    X_train, X_test, y_train, y_test)")', u'get_ipython().run_cell_magic(u\'time\', u\'\',...00]}, \\n    X_train, X_test, y_train, y_test)")', u'get_ipython().run_cell_magic(u\'time\', u\'\',...00]}, \\n    X_train, X_test, y_train, y_test)")', u'import cPickle\nimport numpy as np\nimport pan...SGDClassifier\nfrom sklearn.svm import LinearSVC', u"get_ipython().magic(u'matplotlib inlineZ')\n\n...SGDClassifier\nfrom sklearn.svm import LinearSVC", u"get_ipython().magic(u'matplotlib inline')\n\ni...SGDClassifier\nfrom sklearn.svm import LinearSVC", u"get_ipython().magic(u'matplotlib inline')\nimp...SGDClassifier\nfrom sklearn.svm import LinearSVC", u'np.logspace(-2, 10, 10)', u'np.logspace(-2, 10, 5)', u'get_ipython().run_cell_magic(u\'time\', u\'\',...00]}, \\n    X_train, X_test, y_train, y_test)")'], 'LinearSVC': <class 'sklearn.svm.classes.LinearSVC'>, 'LogisticRegression': <class 'sklearn.linear_model.logistic.LogisticRegression'>, 'Out': {17: array([  1.00000000e-02,   2.15443469e-01,   4.6...e+07,   4.64158883e+08,
         1.00000000e+10]), 18: array([  1.00000000e-02,   1.00000000e+01,   1.0...e+04,
         1.00000000e+07,   1.00000000e+10])}, 'RandomizedSearchCV': <class 'sklearn.grid_search.RandomizedSearchCV'>, 'SGDClassifier': <class 'sklearn.linear_model.stochastic_gradient.SGDClassifier'>, 'X_test': array([[ 0.        ,  2.988028  ,  0.        , ....  0.        ,
         1.        ,  0.        ]]), 'X_train': array([[ 0.        , -0.07773497,  0.        , ....  0.        ,
         1.        ,  0.        ]]), '_': array([  1.00000000e-02,   1.00000000e+01,   1.0...e+04,
         1.00000000e+07,   1.00000000e+10]), ...}
   2886             finally:
   2887                 # Reset our crash handler in place
   2888                 sys.excepthook = old_excepthook
   2889         except SystemExit as e:

...........................................................................
/home/jake/shelter-animals-kaggle/<ipython-input-19-36e3fd199d44> in <module>()
----> 1 
      2 
      3 
      4 
      5 
      6 get_ipython().run_cell_magic(u'time', u'', u"lin_svm_sgd_model = train_test_model(\n    LinearSVC(), \n    {'alpha': [0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000]}, \n    X_train, X_test, y_train, y_test)")
      7 
      8 
      9 
     10 

...........................................................................
/home/jake/miniconda2/envs/shelter-animals/lib/python2.7/site-packages/IPython/core/interactiveshell.py in run_cell_magic(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, magic_name=u'time', line=u'', cell=u"lin_svm_sgd_model = train_test_model(\n    Lin...10000]}, \n    X_train, X_test, y_train, y_test)")
   2115             # This will need to be updated if the internal calling logic gets
   2116             # refactored, or else we'll be expanding the wrong variables.
   2117             stack_depth = 2
   2118             magic_arg_s = self.var_expand(line, stack_depth)
   2119             with self.builtin_trap:
-> 2120                 result = fn(magic_arg_s, cell)
        result = undefined
        fn = <bound method ExecutionMagics.time of <IPython.core.magics.execution.ExecutionMagics object>>
        magic_arg_s = u''
        cell = u"lin_svm_sgd_model = train_test_model(\n    Lin...10000]}, \n    X_train, X_test, y_train, y_test)"
   2121             return result
   2122 
   2123     def find_line_magic(self, magic_name):
   2124         """Find and return a line magic by name.

...........................................................................
/home/jake/shelter-animals-kaggle/<decorator-gen-60> in time(self=<IPython.core.magics.execution.ExecutionMagics object>, line=u'', cell=u"lin_svm_sgd_model = train_test_model(\n    Lin...10000]}, \n    X_train, X_test, y_train, y_test)", local_ns=None)
      1 
----> 2 
      3 
      4 
      5 
      6 
      7 
      8 
      9 
     10 

...........................................................................
/home/jake/miniconda2/envs/shelter-animals/lib/python2.7/site-packages/IPython/core/magic.py in <lambda>(f=<function time>, *a=(<IPython.core.magics.execution.ExecutionMagics object>, u'', u"lin_svm_sgd_model = train_test_model(\n    Lin...10000]}, \n    X_train, X_test, y_train, y_test)", None), **k={})
    188     validate_type(magic_kind)
    189 
    190     # This is a closure to capture the magic_kind.  We could also use a class,
    191     # but it's overkill for just that one bit of state.
    192     def magic_deco(arg):
--> 193         call = lambda f, *a, **k: f(*a, **k)
        f = <function time>
        a = (<IPython.core.magics.execution.ExecutionMagics object>, u'', u"lin_svm_sgd_model = train_test_model(\n    Lin...10000]}, \n    X_train, X_test, y_train, y_test)", None)
        k = {}
    194 
    195         if callable(arg):
    196             # "Naked" decorator call (just @foo, no args)
    197             func = arg

...........................................................................
/home/jake/miniconda2/envs/shelter-animals/lib/python2.7/site-packages/IPython/core/magics/execution.py in time(self=<IPython.core.magics.execution.ExecutionMagics object>, line=u'', cell=u"lin_svm_sgd_model = train_test_model(\n    Lin...10000]}, \n    X_train, X_test, y_train, y_test)", local_ns=None)
   1172             st = clock2()
   1173             out = eval(code, glob, local_ns)
   1174             end = clock2()
   1175         else:
   1176             st = clock2()
-> 1177             exec(code, glob, local_ns)
        code = <code object <module> at 0x7f6b495fec30, file "<timed exec>", line 1>
        glob = {'GridSearchCV': <class 'sklearn.grid_search.GridSearchCV'>, 'In': ['', u'import cPickle\nimport numpy as np\nimport pan...m sklearn.linear_model import LogisticRegression', u"def read_pickle(file_name):\n    f = open(file..._le = read_pickle('data/outcomes_le.engineered')", u'X_train, X_test, y_train, y_test = train_test_...), outcomes, test_size = 0.2, random_state = 10)', u'def train_test_model(model, hyperparameters, X...sions/\' + name, index = False)\n    return None', u'get_ipython().run_cell_magic(u\'time\', u\'\',...\']}, \\n    X_train, X_test, y_train, y_test)")', u"create_submission('first_submission.csv', logi...             train, outcomes, outcomes_le, test)", u'def train_test_model(model, hyperparameters, X...sions/\' + name, index = False)\n    return None', u"create_submission('first_submission.csv', logi...             train, outcomes, outcomes_le, test)", u'import cPickle\nimport numpy as np\nimport pan...r_model import LogisticRegression, SGDClassifier', u'get_ipython().run_cell_magic(u\'time\', u\'\',...00]}, \\n    X_train, X_test, y_train, y_test)")', u'get_ipython().run_cell_magic(u\'time\', u\'\',...00]}, \\n    X_train, X_test, y_train, y_test)")', u'get_ipython().run_cell_magic(u\'time\', u\'\',...00]}, \\n    X_train, X_test, y_train, y_test)")', u'import cPickle\nimport numpy as np\nimport pan...SGDClassifier\nfrom sklearn.svm import LinearSVC', u"get_ipython().magic(u'matplotlib inlineZ')\n\n...SGDClassifier\nfrom sklearn.svm import LinearSVC", u"get_ipython().magic(u'matplotlib inline')\n\ni...SGDClassifier\nfrom sklearn.svm import LinearSVC", u"get_ipython().magic(u'matplotlib inline')\nimp...SGDClassifier\nfrom sklearn.svm import LinearSVC", u'np.logspace(-2, 10, 10)', u'np.logspace(-2, 10, 5)', u'get_ipython().run_cell_magic(u\'time\', u\'\',...00]}, \\n    X_train, X_test, y_train, y_test)")'], 'LinearSVC': <class 'sklearn.svm.classes.LinearSVC'>, 'LogisticRegression': <class 'sklearn.linear_model.logistic.LogisticRegression'>, 'Out': {17: array([  1.00000000e-02,   2.15443469e-01,   4.6...e+07,   4.64158883e+08,
         1.00000000e+10]), 18: array([  1.00000000e-02,   1.00000000e+01,   1.0...e+04,
         1.00000000e+07,   1.00000000e+10])}, 'RandomizedSearchCV': <class 'sklearn.grid_search.RandomizedSearchCV'>, 'SGDClassifier': <class 'sklearn.linear_model.stochastic_gradient.SGDClassifier'>, 'X_test': array([[ 0.        ,  2.988028  ,  0.        , ....  0.        ,
         1.        ,  0.        ]]), 'X_train': array([[ 0.        , -0.07773497,  0.        , ....  0.        ,
         1.        ,  0.        ]]), '_': array([  1.00000000e-02,   1.00000000e+01,   1.0...e+04,
         1.00000000e+07,   1.00000000e+10]), ...}
        local_ns = None
   1178             end = clock2()
   1179             out = None
   1180         wall_end = wtime()
   1181         # Compute actual times and report

...........................................................................
/home/jake/shelter-animals-kaggle/<timed exec> in <module>()
      1 
      2 
      3 
----> 4 
      5 
      6 
      7 
      8 
      9 
     10 

...........................................................................
/home/jake/shelter-animals-kaggle/<ipython-input-7-cda99598c230> in train_test_model(model=LinearSVC(C=1.0, class_weight=None, dual=True, f...', random_state=None, tol=0.0001,
     verbose=0), hyperparameters={'alpha': [0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000]}, X_train=array([[ 0.        , -0.07773497,  0.        , ....  0.        ,
         1.        ,  0.        ]]), X_test=array([[ 0.        ,  2.988028  ,  0.        , ....  0.        ,
         1.        ,  0.        ]]), y_train=array([3, 0, 3, ..., 3, 4, 2]), y_test=array([3, 0, 4, ..., 3, 0, 0]))
      4     matricies corresponding to hold-out cross-validation, returns a model w/ 
      5     optimized hyperparameters using log-loss scoring and 5-fold cross-validation.
      6     """
      7     optimized_model = GridSearchCV(
      8         model, hyperparameters, cv = 5, n_jobs = -1, scoring = 'log_loss')
----> 9     optimized_model.fit(X_train, y_train)
     10     print 'Optimized parameters:', optimized_model.best_params_
     11     print 'Log loss:', np.absolute(optimized_model.score(X_test, y_test))
     12     return optimized_model
     13 

...........................................................................
/home/jake/miniconda2/envs/shelter-animals/lib/python2.7/site-packages/sklearn/grid_search.py in fit(self=GridSearchCV(cv=5, error_score='raise',
       e...jobs', refit=True, scoring='log_loss', verbose=0), X=array([[ 0.        , -0.07773497,  0.        , ....  0.        ,
         1.        ,  0.        ]]), y=array([3, 0, 3, ..., 3, 4, 2]))
    799         y : array-like, shape = [n_samples] or [n_samples, n_output], optional
    800             Target relative to X for classification or regression;
    801             None for unsupervised learning.
    802 
    803         """
--> 804         return self._fit(X, y, ParameterGrid(self.param_grid))
        self._fit = <bound method GridSearchCV._fit of GridSearchCV(...obs', refit=True, scoring='log_loss', verbose=0)>
        X = array([[ 0.        , -0.07773497,  0.        , ....  0.        ,
         1.        ,  0.        ]])
        y = array([3, 0, 3, ..., 3, 4, 2])
        self.param_grid = {'alpha': [0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000]}
    805 
    806 
    807 class RandomizedSearchCV(BaseSearchCV):
    808     """Randomized search on hyper parameters.

...........................................................................
/home/jake/miniconda2/envs/shelter-animals/lib/python2.7/site-packages/sklearn/grid_search.py in _fit(self=GridSearchCV(cv=5, error_score='raise',
       e...jobs', refit=True, scoring='log_loss', verbose=0), X=array([[ 0.        , -0.07773497,  0.        , ....  0.        ,
         1.        ,  0.        ]]), y=array([3, 0, 3, ..., 3, 4, 2]), parameter_iterable=<sklearn.grid_search.ParameterGrid object>)
    548         )(
    549             delayed(_fit_and_score)(clone(base_estimator), X, y, self.scorer_,
    550                                     train, test, self.verbose, parameters,
    551                                     self.fit_params, return_parameters=True,
    552                                     error_score=self.error_score)
--> 553                 for parameters in parameter_iterable
        parameters = undefined
        parameter_iterable = <sklearn.grid_search.ParameterGrid object>
    554                 for train, test in cv)
    555 
    556         # Out is a list of triplet: score, estimator, n_test_samples
    557         n_fits = len(out)

...........................................................................
/home/jake/miniconda2/envs/shelter-animals/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py in __call__(self=Parallel(n_jobs=-1), iterable=<generator object <genexpr>>)
    805             if pre_dispatch == "all" or n_jobs == 1:
    806                 # The iterable was consumed all at once by the above for loop.
    807                 # No need to wait for async callbacks to trigger to
    808                 # consumption.
    809                 self._iterating = False
--> 810             self.retrieve()
        self.retrieve = <bound method Parallel.retrieve of Parallel(n_jobs=-1)>
    811             # Make sure that we get a last message telling us we are done
    812             elapsed_time = time.time() - self._start_time
    813             self._print('Done %3i out of %3i | elapsed: %s finished',
    814                         (len(self._output), len(self._output),

---------------------------------------------------------------------------
Sub-process traceback:
---------------------------------------------------------------------------
ValueError                                         Thu Jun  2 21:20:43 2016
PID: 18435Python 2.7.11: /home/jake/miniconda2/envs/shelter-animals/bin/python
...........................................................................
/home/jake/miniconda2/envs/shelter-animals/lib/python2.7/site-packages/sklearn/externals/joblib/parallel.py in __call__(self=<sklearn.externals.joblib.parallel.BatchedCalls object>)
     67     def __init__(self, iterator_slice):
     68         self.items = list(iterator_slice)
     69         self._size = len(self.items)
     70 
     71     def __call__(self):
---> 72         return [func(*args, **kwargs) for func, args, kwargs in self.items]
        func = <function _fit_and_score>
        args = (LinearSVC(C=1.0, class_weight=None, dual=True, f...', random_state=None, tol=0.0001,
     verbose=0), memmap([[ 0.        , -0.07773497,  0.        , ...  0.        ,
         1.        ,  0.        ]]), array([3, 0, 3, ..., 3, 4, 2]), make_scorer(log_loss, greater_is_better=False, needs_proba=True), array([ 4067,  4071,  4078, ..., 21380, 21381, 21382]), array([   0,    1,    2, ..., 4710, 4863, 4966]), 0, {'alpha': 0.001}, {})
        kwargs = {'error_score': 'raise', 'return_parameters': True}
        self.items = [(<function _fit_and_score>, (LinearSVC(C=1.0, class_weight=None, dual=True, f...', random_state=None, tol=0.0001,
     verbose=0), memmap([[ 0.        , -0.07773497,  0.        , ...  0.        ,
         1.        ,  0.        ]]), array([3, 0, 3, ..., 3, 4, 2]), make_scorer(log_loss, greater_is_better=False, needs_proba=True), array([ 4067,  4071,  4078, ..., 21380, 21381, 21382]), array([   0,    1,    2, ..., 4710, 4863, 4966]), 0, {'alpha': 0.001}, {}), {'error_score': 'raise', 'return_parameters': True})]
     73 
     74     def __len__(self):
     75         return self._size
     76 

...........................................................................
/home/jake/miniconda2/envs/shelter-animals/lib/python2.7/site-packages/sklearn/cross_validation.py in _fit_and_score(estimator=LinearSVC(C=1.0, class_weight=None, dual=True, f...', random_state=None, tol=0.0001,
     verbose=0), X=memmap([[ 0.        , -0.07773497,  0.        , ...  0.        ,
         1.        ,  0.        ]]), y=array([3, 0, 3, ..., 3, 4, 2]), scorer=make_scorer(log_loss, greater_is_better=False, needs_proba=True), train=array([ 4067,  4071,  4078, ..., 21380, 21381, 21382]), test=array([   0,    1,    2, ..., 4710, 4863, 4966]), verbose=0, parameters={'alpha': 0.001}, fit_params={}, return_train_score=False, return_parameters=True, error_score='raise')
   1515     fit_params = fit_params if fit_params is not None else {}
   1516     fit_params = dict([(k, _index_param_value(X, v, train))
   1517                       for k, v in fit_params.items()])
   1518 
   1519     if parameters is not None:
-> 1520         estimator.set_params(**parameters)
        estimator.set_params = <bound method LinearSVC.set_params of LinearSVC(..., random_state=None, tol=0.0001,
     verbose=0)>
        parameters = {'alpha': 0.001}
   1521 
   1522     start_time = time.time()
   1523 
   1524     X_train, y_train = _safe_split(estimator, X, y, train)

...........................................................................
/home/jake/miniconda2/envs/shelter-animals/lib/python2.7/site-packages/sklearn/base.py in set_params(self=LinearSVC(C=1.0, class_weight=None, dual=True, f...', random_state=None, tol=0.0001,
     verbose=0), **params={'alpha': 0.001})
    265                 # simple objects case
    266                 if key not in valid_params:
    267                     raise ValueError('Invalid parameter %s for estimator %s. '
    268                                      'Check the list of available parameters '
    269                                      'with `estimator.get_params().keys()`.' %
--> 270                                      (key, self.__class__.__name__))
        key = 'alpha'
        self.__class__.__name__ = 'LinearSVC'
    271                 setattr(self, key, value)
    272         return self
    273 
    274     def __repr__(self):

ValueError: Invalid parameter alpha for estimator LinearSVC. Check the list of available parameters with `estimator.get_params().keys()`.
___________________________________________________________________________