In [2]:
from sklearn.datasets import fetch_20newsgroups
twenty_train = fetch_20newsgroups(subset='train', shuffle=True)

In [3]:
from sklearn.feature_extraction.text import CountVectorizer
count_vect = CountVectorizer()
X_train_counts = count_vect.fit_transform(twenty_train.data)    
X_train_counts.shape

(11314, 130107)

In [4]:
from sklearn.feature_extraction.text import TfidfTransformer
tfdif_transformer = TfidfTransformer()
X_train_tfdif = tfdif_transformer.fit_transform(X_train_counts)
X_train_tfdif.shape

(11314, 130107)

The below is a Naive Bayesian model

In [5]:
from sklearn.naive_bayes import MultinomialNB
clf = MultinomialNB().fit(X_train_tfdif, twenty_train.target)

All of the above can be written much quicker like so

In [6]:
from sklearn.pipeline import Pipeline
text_clf = Pipeline([('vect', CountVectorizer()),
                     ('tfdif', TfidfTransformer()),
                     ('clf', MultinomialNB()),
                     ])
text_clf = text_clf.fit(twenty_train.data, twenty_train.target)

In [7]:
import numpy as np
twenty_test = fetch_20newsgroups(subset='test', shuffle=True)
predicted = text_clf.predict(twenty_test.data)
np.mean(predicted == twenty_test.target)

0.7738980350504514

Now we will try a support vector machine model to see if it gets better results (it does)

In [8]:
from sklearn.linear_model import SGDClassifier

text_clf_svm = Pipeline([('vect', CountVectorizer()),
                         ('tfdif', TfidfTransformer()),
                         ('clf-svm', SGDClassifier(loss='hinge', penalty='l2',
                                                   alpha=1e-3, n_iter=5, random_state=42)),
                         ])

_ = text_clf_svm.fit(twenty_train.data, twenty_train.target)

predicted_svm = text_clf_svm.predict(twenty_test.data)
np.mean(predicted_svm == twenty_test.target)



0.8238183749336165

Various parameters can be tuned to give optimal performance, scikit provides GridSearchCV t do so

In [9]:
from sklearn.model_selection import GridSearchCV
parameters = {'vect_ngram_range': [(1, 1), (1, 2)],
              'tfidf_use_idf': (True, False),
              'clf_alpha': (1e-2, 1e-3),
              }

In [10]:
gs_clf = GridSearchCV(text_clf, parameters, n_jobs=-1)
gs_clf = gs_clf.fit(twenty_train.data, twenty_train.target)

JoblibValueError: JoblibValueError
___________________________________________________________________________
Multiprocessing exception:
...........................................................................
C:\Users\Jamie\AppData\Local\Programs\Python\Python36\lib\runpy.py in _run_module_as_main(mod_name='ipykernel_launcher', alter_argv=1)
    188         sys.exit(msg)
    189     main_globals = sys.modules["__main__"].__dict__
    190     if alter_argv:
    191         sys.argv[0] = mod_spec.origin
    192     return _run_code(code, main_globals, None,
--> 193                      "__main__", mod_spec)
        mod_spec = ModuleSpec(name='ipykernel_launcher', loader=<_f...venv\\lib\\site-packages\\ipykernel_launcher.py')
    194 
    195 def run_module(mod_name, init_globals=None,
    196                run_name=None, alter_sys=False):
    197     """Execute a module's code without importing it

...........................................................................
C:\Users\Jamie\AppData\Local\Programs\Python\Python36\lib\runpy.py in _run_code(code=<code object <module> at 0x0000025DC92E7810, fil...lib\site-packages\ipykernel_launcher.py", line 5>, run_globals={'__annotations__': {}, '__builtins__': <module 'builtins' (built-in)>, '__cached__': r'C:\Users\Jamie\PycharmProjects\DevProject\venv\l...ges\__pycache__\ipykernel_launcher.cpython-36.pyc', '__doc__': 'Entry point for launching an IPython kernel.\n\nTh...orts until\nafter removing the cwd from sys.path.\n', '__file__': r'C:\Users\Jamie\PycharmProjects\DevProject\venv\lib\site-packages\ipykernel_launcher.py', '__loader__': <_frozen_importlib_external.SourceFileLoader object>, '__name__': '__main__', '__package__': '', '__spec__': ModuleSpec(name='ipykernel_launcher', loader=<_f...venv\\lib\\site-packages\\ipykernel_launcher.py'), 'app': <module 'ipykernel.kernelapp' from 'C:\\Users\\J...nv\\lib\\site-packages\\ipykernel\\kernelapp.py'>, ...}, init_globals=None, mod_name='__main__', mod_spec=ModuleSpec(name='ipykernel_launcher', loader=<_f...venv\\lib\\site-packages\\ipykernel_launcher.py'), pkg_name='', script_name=None)
     80                        __cached__ = cached,
     81                        __doc__ = None,
     82                        __loader__ = loader,
     83                        __package__ = pkg_name,
     84                        __spec__ = mod_spec)
---> 85     exec(code, run_globals)
        code = <code object <module> at 0x0000025DC92E7810, fil...lib\site-packages\ipykernel_launcher.py", line 5>
        run_globals = {'__annotations__': {}, '__builtins__': <module 'builtins' (built-in)>, '__cached__': r'C:\Users\Jamie\PycharmProjects\DevProject\venv\l...ges\__pycache__\ipykernel_launcher.cpython-36.pyc', '__doc__': 'Entry point for launching an IPython kernel.\n\nTh...orts until\nafter removing the cwd from sys.path.\n', '__file__': r'C:\Users\Jamie\PycharmProjects\DevProject\venv\lib\site-packages\ipykernel_launcher.py', '__loader__': <_frozen_importlib_external.SourceFileLoader object>, '__name__': '__main__', '__package__': '', '__spec__': ModuleSpec(name='ipykernel_launcher', loader=<_f...venv\\lib\\site-packages\\ipykernel_launcher.py'), 'app': <module 'ipykernel.kernelapp' from 'C:\\Users\\J...nv\\lib\\site-packages\\ipykernel\\kernelapp.py'>, ...}
     86     return run_globals
     87 
     88 def _run_module_code(code, init_globals=None,
     89                     mod_name=None, mod_spec=None,

...........................................................................
C:\Users\Jamie\PycharmProjects\DevProject\venv\lib\site-packages\ipykernel_launcher.py in <module>()
     11     # This is added back by InteractiveShellApp.init_path()
     12     if sys.path[0] == '':
     13         del sys.path[0]
     14 
     15     from ipykernel import kernelapp as app
---> 16     app.launch_new_instance()

...........................................................................
C:\Users\Jamie\PycharmProjects\DevProject\venv\lib\site-packages\traitlets\config\application.py in launch_instance(cls=<class 'ipykernel.kernelapp.IPKernelApp'>, argv=None, **kwargs={})
    653 
    654         If a global instance already exists, this reinitializes and starts it
    655         """
    656         app = cls.instance(**kwargs)
    657         app.initialize(argv)
--> 658         app.start()
        app.start = <bound method IPKernelApp.start of <ipykernel.kernelapp.IPKernelApp object>>
    659 
    660 #-----------------------------------------------------------------------------
    661 # utility functions, for convenience
    662 #-----------------------------------------------------------------------------

...........................................................................
C:\Users\Jamie\PycharmProjects\DevProject\venv\lib\site-packages\ipykernel\kernelapp.py in start(self=<ipykernel.kernelapp.IPKernelApp object>)
    481         if self.poller is not None:
    482             self.poller.start()
    483         self.kernel.start()
    484         self.io_loop = ioloop.IOLoop.current()
    485         try:
--> 486             self.io_loop.start()
        self.io_loop.start = <bound method BaseAsyncIOLoop.start of <tornado.platform.asyncio.AsyncIOMainLoop object>>
    487         except KeyboardInterrupt:
    488             pass
    489 
    490 launch_new_instance = IPKernelApp.launch_instance

...........................................................................
C:\Users\Jamie\PycharmProjects\DevProject\venv\lib\site-packages\tornado\platform\asyncio.py in start(self=<tornado.platform.asyncio.AsyncIOMainLoop object>)
    122         except (RuntimeError, AssertionError):
    123             old_loop = None
    124         try:
    125             self._setup_logging()
    126             asyncio.set_event_loop(self.asyncio_loop)
--> 127             self.asyncio_loop.run_forever()
        self.asyncio_loop.run_forever = <bound method BaseEventLoop.run_forever of <_Win...EventLoop running=True closed=False debug=False>>
    128         finally:
    129             asyncio.set_event_loop(old_loop)
    130 
    131     def stop(self):

...........................................................................
C:\Users\Jamie\AppData\Local\Programs\Python\Python36\lib\asyncio\base_events.py in run_forever(self=<_WindowsSelectorEventLoop running=True closed=False debug=False>)
    416             sys.set_asyncgen_hooks(firstiter=self._asyncgen_firstiter_hook,
    417                                    finalizer=self._asyncgen_finalizer_hook)
    418         try:
    419             events._set_running_loop(self)
    420             while True:
--> 421                 self._run_once()
        self._run_once = <bound method BaseEventLoop._run_once of <_Windo...EventLoop running=True closed=False debug=False>>
    422                 if self._stopping:
    423                     break
    424         finally:
    425             self._stopping = False

...........................................................................
C:\Users\Jamie\AppData\Local\Programs\Python\Python36\lib\asyncio\base_events.py in _run_once(self=<_WindowsSelectorEventLoop running=True closed=False debug=False>)
   1426                         logger.warning('Executing %s took %.3f seconds',
   1427                                        _format_handle(handle), dt)
   1428                 finally:
   1429                     self._current_handle = None
   1430             else:
-> 1431                 handle._run()
        handle._run = <bound method Handle._run of <Handle BaseAsyncIOLoop._handle_events(832, 1)>>
   1432         handle = None  # Needed to break cycles when an exception occurs.
   1433 
   1434     def _set_coroutine_wrapper(self, enabled):
   1435         try:

...........................................................................
C:\Users\Jamie\AppData\Local\Programs\Python\Python36\lib\asyncio\events.py in _run(self=<Handle BaseAsyncIOLoop._handle_events(832, 1)>)
    140             self._callback = None
    141             self._args = None
    142 
    143     def _run(self):
    144         try:
--> 145             self._callback(*self._args)
        self._callback = <bound method BaseAsyncIOLoop._handle_events of <tornado.platform.asyncio.AsyncIOMainLoop object>>
        self._args = (832, 1)
    146         except Exception as exc:
    147             cb = _format_callback_source(self._callback, self._args)
    148             msg = 'Exception in callback {}'.format(cb)
    149             context = {

...........................................................................
C:\Users\Jamie\PycharmProjects\DevProject\venv\lib\site-packages\tornado\platform\asyncio.py in _handle_events(self=<tornado.platform.asyncio.AsyncIOMainLoop object>, fd=832, events=1)
    112             self.writers.remove(fd)
    113         del self.handlers[fd]
    114 
    115     def _handle_events(self, fd, events):
    116         fileobj, handler_func = self.handlers[fd]
--> 117         handler_func(fileobj, events)
        handler_func = <function wrap.<locals>.null_wrapper>
        fileobj = <zmq.sugar.socket.Socket object>
        events = 1
    118 
    119     def start(self):
    120         try:
    121             old_loop = asyncio.get_event_loop()

...........................................................................
C:\Users\Jamie\PycharmProjects\DevProject\venv\lib\site-packages\tornado\stack_context.py in null_wrapper(*args=(<zmq.sugar.socket.Socket object>, 1), **kwargs={})
    271         # Fast path when there are no active contexts.
    272         def null_wrapper(*args, **kwargs):
    273             try:
    274                 current_state = _state.contexts
    275                 _state.contexts = cap_contexts[0]
--> 276                 return fn(*args, **kwargs)
        args = (<zmq.sugar.socket.Socket object>, 1)
        kwargs = {}
    277             finally:
    278                 _state.contexts = current_state
    279         null_wrapper._wrapped = True
    280         return null_wrapper

...........................................................................
C:\Users\Jamie\PycharmProjects\DevProject\venv\lib\site-packages\zmq\eventloop\zmqstream.py in _handle_events(self=<zmq.eventloop.zmqstream.ZMQStream object>, fd=<zmq.sugar.socket.Socket object>, events=1)
    445             return
    446         zmq_events = self.socket.EVENTS
    447         try:
    448             # dispatch events:
    449             if zmq_events & zmq.POLLIN and self.receiving():
--> 450                 self._handle_recv()
        self._handle_recv = <bound method ZMQStream._handle_recv of <zmq.eventloop.zmqstream.ZMQStream object>>
    451                 if not self.socket:
    452                     return
    453             if zmq_events & zmq.POLLOUT and self.sending():
    454                 self._handle_send()

...........................................................................
C:\Users\Jamie\PycharmProjects\DevProject\venv\lib\site-packages\zmq\eventloop\zmqstream.py in _handle_recv(self=<zmq.eventloop.zmqstream.ZMQStream object>)
    475             else:
    476                 raise
    477         else:
    478             if self._recv_callback:
    479                 callback = self._recv_callback
--> 480                 self._run_callback(callback, msg)
        self._run_callback = <bound method ZMQStream._run_callback of <zmq.eventloop.zmqstream.ZMQStream object>>
        callback = <function wrap.<locals>.null_wrapper>
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    481         
    482 
    483     def _handle_send(self):
    484         """Handle a send event."""

...........................................................................
C:\Users\Jamie\PycharmProjects\DevProject\venv\lib\site-packages\zmq\eventloop\zmqstream.py in _run_callback(self=<zmq.eventloop.zmqstream.ZMQStream object>, callback=<function wrap.<locals>.null_wrapper>, *args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    427         close our socket."""
    428         try:
    429             # Use a NullContext to ensure that all StackContexts are run
    430             # inside our blanket exception handler rather than outside.
    431             with stack_context.NullContext():
--> 432                 callback(*args, **kwargs)
        callback = <function wrap.<locals>.null_wrapper>
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    433         except:
    434             gen_log.error("Uncaught exception in ZMQStream callback",
    435                           exc_info=True)
    436             # Re-raise the exception so that IOLoop.handle_callback_exception

...........................................................................
C:\Users\Jamie\PycharmProjects\DevProject\venv\lib\site-packages\tornado\stack_context.py in null_wrapper(*args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    271         # Fast path when there are no active contexts.
    272         def null_wrapper(*args, **kwargs):
    273             try:
    274                 current_state = _state.contexts
    275                 _state.contexts = cap_contexts[0]
--> 276                 return fn(*args, **kwargs)
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    277             finally:
    278                 _state.contexts = current_state
    279         null_wrapper._wrapped = True
    280         return null_wrapper

...........................................................................
C:\Users\Jamie\PycharmProjects\DevProject\venv\lib\site-packages\ipykernel\kernelbase.py in dispatcher(msg=[<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>])
    278         if self.control_stream:
    279             self.control_stream.on_recv(self.dispatch_control, copy=False)
    280 
    281         def make_dispatcher(stream):
    282             def dispatcher(msg):
--> 283                 return self.dispatch_shell(stream, msg)
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    284             return dispatcher
    285 
    286         for s in self.shell_streams:
    287             s.on_recv(make_dispatcher(s), copy=False)

...........................................................................
C:\Users\Jamie\PycharmProjects\DevProject\venv\lib\site-packages\ipykernel\kernelbase.py in dispatch_shell(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, msg={'buffers': [], 'content': {'allow_stdin': False, 'code': 'gs_clf = GridSearchCV(text_clf, parameters, n_jo...s_clf.fit(twenty_train.data, twenty_train.target)', 'output_type': '', 'silent': False, 'user_expressions': {}}, 'header': {'date': datetime.datetime(2018, 7, 1, 20, 54, 47, 819652, tzinfo=tzutc()), 'msg_id': '51590775-122a-431c-8909-ee5d6ce5f612', 'msg_type': 'execute_request', 'session': 'd192dbcf-5f26-4238-9f59-d04b9cf0f5b0', 'username': 'username', 'version': '5.0'}, 'metadata': {}, 'msg_id': '51590775-122a-431c-8909-ee5d6ce5f612', 'msg_type': 'execute_request', 'parent_header': {}})
    228             self.log.warn("Unknown message type: %r", msg_type)
    229         else:
    230             self.log.debug("%s: %s", msg_type, msg)
    231             self.pre_handler_hook()
    232             try:
--> 233                 handler(stream, idents, msg)
        handler = <bound method Kernel.execute_request of <ipykernel.ipkernel.IPythonKernel object>>
        stream = <zmq.eventloop.zmqstream.ZMQStream object>
        idents = [b'c2dceb38-806e008885734c5005c90417']
        msg = {'buffers': [], 'content': {'allow_stdin': False, 'code': 'gs_clf = GridSearchCV(text_clf, parameters, n_jo...s_clf.fit(twenty_train.data, twenty_train.target)', 'output_type': '', 'silent': False, 'user_expressions': {}}, 'header': {'date': datetime.datetime(2018, 7, 1, 20, 54, 47, 819652, tzinfo=tzutc()), 'msg_id': '51590775-122a-431c-8909-ee5d6ce5f612', 'msg_type': 'execute_request', 'session': 'd192dbcf-5f26-4238-9f59-d04b9cf0f5b0', 'username': 'username', 'version': '5.0'}, 'metadata': {}, 'msg_id': '51590775-122a-431c-8909-ee5d6ce5f612', 'msg_type': 'execute_request', 'parent_header': {}}
    234             except Exception:
    235                 self.log.error("Exception in message handler:", exc_info=True)
    236             finally:
    237                 self.post_handler_hook()

...........................................................................
C:\Users\Jamie\PycharmProjects\DevProject\venv\lib\site-packages\ipykernel\kernelbase.py in execute_request(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, ident=[b'c2dceb38-806e008885734c5005c90417'], parent={'buffers': [], 'content': {'allow_stdin': False, 'code': 'gs_clf = GridSearchCV(text_clf, parameters, n_jo...s_clf.fit(twenty_train.data, twenty_train.target)', 'output_type': '', 'silent': False, 'user_expressions': {}}, 'header': {'date': datetime.datetime(2018, 7, 1, 20, 54, 47, 819652, tzinfo=tzutc()), 'msg_id': '51590775-122a-431c-8909-ee5d6ce5f612', 'msg_type': 'execute_request', 'session': 'd192dbcf-5f26-4238-9f59-d04b9cf0f5b0', 'username': 'username', 'version': '5.0'}, 'metadata': {}, 'msg_id': '51590775-122a-431c-8909-ee5d6ce5f612', 'msg_type': 'execute_request', 'parent_header': {}})
    394         if not silent:
    395             self.execution_count += 1
    396             self._publish_execute_input(code, parent, self.execution_count)
    397 
    398         reply_content = self.do_execute(code, silent, store_history,
--> 399                                         user_expressions, allow_stdin)
        user_expressions = {}
        allow_stdin = False
    400 
    401         # Flush output before sending the reply.
    402         sys.stdout.flush()
    403         sys.stderr.flush()

...........................................................................
C:\Users\Jamie\PycharmProjects\DevProject\venv\lib\site-packages\ipykernel\ipkernel.py in do_execute(self=<ipykernel.ipkernel.IPythonKernel object>, code='gs_clf = GridSearchCV(text_clf, parameters, n_jo...s_clf.fit(twenty_train.data, twenty_train.target)', silent=False, store_history=True, user_expressions={}, allow_stdin=False)
    203 
    204         self._forward_input(allow_stdin)
    205 
    206         reply_content = {}
    207         try:
--> 208             res = shell.run_cell(code, store_history=store_history, silent=silent)
        res = undefined
        shell.run_cell = <bound method ZMQInteractiveShell.run_cell of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = 'gs_clf = GridSearchCV(text_clf, parameters, n_jo...s_clf.fit(twenty_train.data, twenty_train.target)'
        store_history = True
        silent = False
    209         finally:
    210             self._restore_input()
    211 
    212         if res.error_before_exec is not None:

...........................................................................
C:\Users\Jamie\PycharmProjects\DevProject\venv\lib\site-packages\ipykernel\zmqshell.py in run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, *args=('gs_clf = GridSearchCV(text_clf, parameters, n_jo...s_clf.fit(twenty_train.data, twenty_train.target)',), **kwargs={'silent': False, 'store_history': True})
    532             )
    533         self.payload_manager.write_payload(payload)
    534 
    535     def run_cell(self, *args, **kwargs):
    536         self._last_traceback = None
--> 537         return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
        self.run_cell = <bound method ZMQInteractiveShell.run_cell of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        args = ('gs_clf = GridSearchCV(text_clf, parameters, n_jo...s_clf.fit(twenty_train.data, twenty_train.target)',)
        kwargs = {'silent': False, 'store_history': True}
    538 
    539     def _showtraceback(self, etype, evalue, stb):
    540         # try to preserve ordering of tracebacks and print statements
    541         sys.stdout.flush()

...........................................................................
C:\Users\Jamie\PycharmProjects\DevProject\venv\lib\site-packages\IPython\core\interactiveshell.py in run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, raw_cell='gs_clf = GridSearchCV(text_clf, parameters, n_jo...s_clf.fit(twenty_train.data, twenty_train.target)', store_history=True, silent=False, shell_futures=True)
   2657         -------
   2658         result : :class:`ExecutionResult`
   2659         """
   2660         try:
   2661             result = self._run_cell(
-> 2662                 raw_cell, store_history, silent, shell_futures)
        raw_cell = 'gs_clf = GridSearchCV(text_clf, parameters, n_jo...s_clf.fit(twenty_train.data, twenty_train.target)'
        store_history = True
        silent = False
        shell_futures = True
   2663         finally:
   2664             self.events.trigger('post_execute')
   2665             if not silent:
   2666                 self.events.trigger('post_run_cell', result)

...........................................................................
C:\Users\Jamie\PycharmProjects\DevProject\venv\lib\site-packages\IPython\core\interactiveshell.py in _run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, raw_cell='gs_clf = GridSearchCV(text_clf, parameters, n_jo...s_clf.fit(twenty_train.data, twenty_train.target)', store_history=True, silent=False, shell_futures=True)
   2780                 self.displayhook.exec_result = result
   2781 
   2782                 # Execute the user code
   2783                 interactivity = 'none' if silent else self.ast_node_interactivity
   2784                 has_raised = self.run_ast_nodes(code_ast.body, cell_name,
-> 2785                    interactivity=interactivity, compiler=compiler, result=result)
        interactivity = 'last_expr'
        compiler = <IPython.core.compilerop.CachingCompiler object>
   2786                 
   2787                 self.last_execution_succeeded = not has_raised
   2788                 self.last_execution_result = result
   2789 

...........................................................................
C:\Users\Jamie\PycharmProjects\DevProject\venv\lib\site-packages\IPython\core\interactiveshell.py in run_ast_nodes(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, nodelist=[<_ast.Assign object>, <_ast.Assign object>], cell_name='<ipython-input-10-ca23821459f7>', interactivity='none', compiler=<IPython.core.compilerop.CachingCompiler object>, result=<ExecutionResult object at 25dce133780, executio...rue silent=False shell_futures=True> result=None>)
   2898 
   2899         try:
   2900             for i, node in enumerate(to_run_exec):
   2901                 mod = ast.Module([node])
   2902                 code = compiler(mod, cell_name, "exec")
-> 2903                 if self.run_code(code, result):
        self.run_code = <bound method InteractiveShell.run_code of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = <code object <module> at 0x0000025DCDF978A0, file "<ipython-input-10-ca23821459f7>", line 2>
        result = <ExecutionResult object at 25dce133780, executio...rue silent=False shell_futures=True> result=None>
   2904                     return True
   2905 
   2906             for i, node in enumerate(to_run_interactive):
   2907                 mod = ast.Interactive([node])

...........................................................................
C:\Users\Jamie\PycharmProjects\DevProject\venv\lib\site-packages\IPython\core\interactiveshell.py in run_code(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, code_obj=<code object <module> at 0x0000025DCDF978A0, file "<ipython-input-10-ca23821459f7>", line 2>, result=<ExecutionResult object at 25dce133780, executio...rue silent=False shell_futures=True> result=None>)
   2958         outflag = True  # happens in more places, so it's easier as default
   2959         try:
   2960             try:
   2961                 self.hooks.pre_run_code_hook()
   2962                 #rprint('Running code', repr(code_obj)) # dbg
-> 2963                 exec(code_obj, self.user_global_ns, self.user_ns)
        code_obj = <code object <module> at 0x0000025DCDF978A0, file "<ipython-input-10-ca23821459f7>", line 2>
        self.user_global_ns = {'CountVectorizer': <class 'sklearn.feature_extraction.text.CountVectorizer'>, 'GridSearchCV': <class 'sklearn.model_selection._search.GridSearchCV'>, 'In': ['', 'import nltk\n\nfrom nltk.stem.snowball import Snow...n((predicted_mnb_stemmed == twenty_train.target))', "from sklearn.datasets import fetch_20newsgroups\n... fetch_20newsgroups(subset='train', shuffle=True)", 'from sklearn.feature_extraction.text import Coun...transform(twenty_train.data)\nX_train_counts.shape', 'from sklearn.feature_extraction.text import Tfid...fit_transform(X_train_counts)\nX_train_tfdif.shape', 'from sklearn.naive_bayes import MultinomialNB\ncl...omialNB().fit(X_train_tfdif, twenty_train.target)', 'from sklearn.pipeline import Pipeline\ntext_clf =...t_clf.fit(twenty_train.data, twenty_train.target)', 'import numpy as np\ntwenty_test = fetch_20newsgro...st.data)\nnp.mean(predicted == twenty_test.target)', 'from sklearn.linear_model import SGDClassifier\n\n...ata)\nnp.mean(predicted_svm == twenty_test.target)', "from sklearn.model_selection import GridSearchCV...       'clf_alpha': (1e-2, 1e-3),\n              }", 'gs_clf = GridSearchCV(text_clf, parameters, n_jo...s_clf.fit(twenty_train.data, twenty_train.target)'], 'MultinomialNB': <class 'sklearn.naive_bayes.MultinomialNB'>, 'Out': {3: (11314, 130107), 4: (11314, 130107), 7: 0.7738980350504514, 8: 0.8238183749336165}, 'Pipeline': <class 'sklearn.pipeline.Pipeline'>, 'SGDClassifier': <class 'sklearn.linear_model.stochastic_gradient.SGDClassifier'>, 'SnowballStemmer': <class 'nltk.stem.snowball.SnowballStemmer'>, 'TfidfTransformer': <class 'sklearn.feature_extraction.text.TfidfTransformer'>, 'X_train_counts': <11314x130107 sparse matrix of type '<class 'num... stored elements in Compressed Sparse Row format>, ...}
        self.user_ns = {'CountVectorizer': <class 'sklearn.feature_extraction.text.CountVectorizer'>, 'GridSearchCV': <class 'sklearn.model_selection._search.GridSearchCV'>, 'In': ['', 'import nltk\n\nfrom nltk.stem.snowball import Snow...n((predicted_mnb_stemmed == twenty_train.target))', "from sklearn.datasets import fetch_20newsgroups\n... fetch_20newsgroups(subset='train', shuffle=True)", 'from sklearn.feature_extraction.text import Coun...transform(twenty_train.data)\nX_train_counts.shape', 'from sklearn.feature_extraction.text import Tfid...fit_transform(X_train_counts)\nX_train_tfdif.shape', 'from sklearn.naive_bayes import MultinomialNB\ncl...omialNB().fit(X_train_tfdif, twenty_train.target)', 'from sklearn.pipeline import Pipeline\ntext_clf =...t_clf.fit(twenty_train.data, twenty_train.target)', 'import numpy as np\ntwenty_test = fetch_20newsgro...st.data)\nnp.mean(predicted == twenty_test.target)', 'from sklearn.linear_model import SGDClassifier\n\n...ata)\nnp.mean(predicted_svm == twenty_test.target)', "from sklearn.model_selection import GridSearchCV...       'clf_alpha': (1e-2, 1e-3),\n              }", 'gs_clf = GridSearchCV(text_clf, parameters, n_jo...s_clf.fit(twenty_train.data, twenty_train.target)'], 'MultinomialNB': <class 'sklearn.naive_bayes.MultinomialNB'>, 'Out': {3: (11314, 130107), 4: (11314, 130107), 7: 0.7738980350504514, 8: 0.8238183749336165}, 'Pipeline': <class 'sklearn.pipeline.Pipeline'>, 'SGDClassifier': <class 'sklearn.linear_model.stochastic_gradient.SGDClassifier'>, 'SnowballStemmer': <class 'nltk.stem.snowball.SnowballStemmer'>, 'TfidfTransformer': <class 'sklearn.feature_extraction.text.TfidfTransformer'>, 'X_train_counts': <11314x130107 sparse matrix of type '<class 'num... stored elements in Compressed Sparse Row format>, ...}
   2964             finally:
   2965                 # Reset our crash handler in place
   2966                 sys.excepthook = old_excepthook
   2967         except SystemExit as e:

...........................................................................
C:\Users\Jamie\PycharmProjects\DevProject\<ipython-input-10-ca23821459f7> in <module>()
      1 gs_clf = GridSearchCV(text_clf, parameters, n_jobs=-1)
----> 2 gs_clf = gs_clf.fit(twenty_train.data, twenty_train.target)

...........................................................................
C:\Users\Jamie\PycharmProjects\DevProject\venv\lib\site-packages\sklearn\model_selection\_search.py in fit(self=GridSearchCV(cv=None, error_score='raise',
     ...ain_score='warn',
       scoring=None, verbose=0), X=["From: lerxst@wam.umd.edu (where's my thing)\nSubj...ught to you by your neighborhood Lerxst ----\n\n\n\n\n", 'From: guykuo@carson.u.washington.edu (Guy Kuo)\nS...poll. Thanks.\n\nGuy Kuo <guykuo@u.washington.edu>\n', 'From: twillis@ec.ecn.purdue.edu (Thomas E Willis... enemies of truth than lies."  - F. W.\nNietzsche\n', 'From: jgreen@amber (Joe Green)\nSubject: Re: Weit...th no sense of humor."\n\t\t\t\t\t\t-- Jonathan Winters\n', "From: jcm@head-cfa.harvard.edu (Jonathan McDowel.... 213 before liftoff, ignore it'.\n\n - Jonathan\n\n\n", 'From: dfo@vttoulu.tko.vtt.fi (Foxvog Douglas)\nSu...cket...\n\n\n\n-- \ndoug foxvog\ndouglas.foxvog@vtt.fi\n', 'From: bmdelane@quads.uchicago.edu (brian manning...ber. "Hmmm... \'News?\' What\'s\nthis?"....)\n\n-Brian\n', 'From: bgrubb@dante.nmsu.edu (GRUBB)\nSubject: Re:...is \n(Digital Review, Oct 21, 1991 v8 n33 p8(1)).\n', 'From: holmes7000@iscsvax.uni.edu\nSubject: WIn 3....eciated.\n\n\nThanx,\n\n-Brando\n\nPS Please E-mail me\n\n', 'From: kerr@ux1.cso.uiuc.edu (Stan Kerr)\nSubject:...Phone: 217-333-5217  Email: stankerr@uiuc.edu   \n', 'From: irwin@cmptrc.lonestar.org (Irwin Arnstein)...------------------------------------------------\n', 'From: david@terminus.ericsson.se (David Bold)\nSu...shes cried,\nAs they swam its clearness through.\n\n', 'From: rodc@fc.hp.com (Rod Cerkoney)\nSubject: *$G...____________________/      \\           \\/    \\__\n', 'From: dbm0000@tm0006.lerc.nasa.gov (David B. Mck...ilization\n  - 1 micro-g thru out the core module\n', 'From: jllee@acsu.buffalo.edu (Johnny L Lee)\nSubj...onable.Very Reasonable.\n\n\t\t\t\t\tThanks,\n\t\t\t\t\t\tJohn\n', 'From: mathew <mathew@mantis.co.uk>\nSubject: Re: ...erally died of\nmalnutrition or disease.\n\n\nmathew\n', 'From: ab@nova.cc.purdue.edu (Allen B)\nSubject: R...soning- not\nthat anyone does, of course! :-)\n\nab\n', 'From: CPKJP@vm.cc.latech.edu (Kevin Parker)\nSubj...s to insurance\ncompanies 8^).\n \nGood luck,\nSerge\n', 'From: ritley@uimrl7.mrl.uiuc.edu ()\nSubject: SEE...er).\n\nAny pointers would be greatly appreciated!\n', 'From: abarden@tybse1.uucp (Ann Marie Barden)\nSub...!\n\nAnn Marie Barden  \tabarden@afseo.eglin.af.mil\n', ...], y=array([7, 4, 4, ..., 3, 1, 8]), groups=None, **fit_params={})
    634                                   return_train_score=self.return_train_score,
    635                                   return_n_test_samples=True,
    636                                   return_times=True, return_parameters=False,
    637                                   error_score=self.error_score)
    638           for parameters, (train, test) in product(candidate_params,
--> 639                                                    cv.split(X, y, groups)))
        cv.split = <bound method StratifiedKFold.split of Stratifie...ld(n_splits=3, random_state=None, shuffle=False)>
        X = ["From: lerxst@wam.umd.edu (where's my thing)\nSubj...ught to you by your neighborhood Lerxst ----\n\n\n\n\n", 'From: guykuo@carson.u.washington.edu (Guy Kuo)\nS...poll. Thanks.\n\nGuy Kuo <guykuo@u.washington.edu>\n', 'From: twillis@ec.ecn.purdue.edu (Thomas E Willis... enemies of truth than lies."  - F. W.\nNietzsche\n', 'From: jgreen@amber (Joe Green)\nSubject: Re: Weit...th no sense of humor."\n\t\t\t\t\t\t-- Jonathan Winters\n', "From: jcm@head-cfa.harvard.edu (Jonathan McDowel.... 213 before liftoff, ignore it'.\n\n - Jonathan\n\n\n", 'From: dfo@vttoulu.tko.vtt.fi (Foxvog Douglas)\nSu...cket...\n\n\n\n-- \ndoug foxvog\ndouglas.foxvog@vtt.fi\n', 'From: bmdelane@quads.uchicago.edu (brian manning...ber. "Hmmm... \'News?\' What\'s\nthis?"....)\n\n-Brian\n', 'From: bgrubb@dante.nmsu.edu (GRUBB)\nSubject: Re:...is \n(Digital Review, Oct 21, 1991 v8 n33 p8(1)).\n', 'From: holmes7000@iscsvax.uni.edu\nSubject: WIn 3....eciated.\n\n\nThanx,\n\n-Brando\n\nPS Please E-mail me\n\n', 'From: kerr@ux1.cso.uiuc.edu (Stan Kerr)\nSubject:...Phone: 217-333-5217  Email: stankerr@uiuc.edu   \n', 'From: irwin@cmptrc.lonestar.org (Irwin Arnstein)...------------------------------------------------\n', 'From: david@terminus.ericsson.se (David Bold)\nSu...shes cried,\nAs they swam its clearness through.\n\n', 'From: rodc@fc.hp.com (Rod Cerkoney)\nSubject: *$G...____________________/      \\           \\/    \\__\n', 'From: dbm0000@tm0006.lerc.nasa.gov (David B. Mck...ilization\n  - 1 micro-g thru out the core module\n', 'From: jllee@acsu.buffalo.edu (Johnny L Lee)\nSubj...onable.Very Reasonable.\n\n\t\t\t\t\tThanks,\n\t\t\t\t\t\tJohn\n', 'From: mathew <mathew@mantis.co.uk>\nSubject: Re: ...erally died of\nmalnutrition or disease.\n\n\nmathew\n', 'From: ab@nova.cc.purdue.edu (Allen B)\nSubject: R...soning- not\nthat anyone does, of course! :-)\n\nab\n', 'From: CPKJP@vm.cc.latech.edu (Kevin Parker)\nSubj...s to insurance\ncompanies 8^).\n \nGood luck,\nSerge\n', 'From: ritley@uimrl7.mrl.uiuc.edu ()\nSubject: SEE...er).\n\nAny pointers would be greatly appreciated!\n', 'From: abarden@tybse1.uucp (Ann Marie Barden)\nSub...!\n\nAnn Marie Barden  \tabarden@afseo.eglin.af.mil\n', ...]
        y = array([7, 4, 4, ..., 3, 1, 8])
        groups = None
    640 
    641         # if one choose to see train score, "out" will contain train score info
    642         if self.return_train_score:
    643             (train_score_dicts, test_score_dicts, test_sample_counts, fit_time,

...........................................................................
C:\Users\Jamie\PycharmProjects\DevProject\venv\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self=Parallel(n_jobs=-1), iterable=<generator object BaseSearchCV.fit.<locals>.<genexpr>>)
    784             if pre_dispatch == "all" or n_jobs == 1:
    785                 # The iterable was consumed all at once by the above for loop.
    786                 # No need to wait for async callbacks to trigger to
    787                 # consumption.
    788                 self._iterating = False
--> 789             self.retrieve()
        self.retrieve = <bound method Parallel.retrieve of Parallel(n_jobs=-1)>
    790             # Make sure that we get a last message telling us we are done
    791             elapsed_time = time.time() - self._start_time
    792             self._print('Done %3i out of %3i | elapsed: %s finished',
    793                         (len(self._output), len(self._output),

---------------------------------------------------------------------------
Sub-process traceback:
---------------------------------------------------------------------------
ValueError                                         Sun Jul  1 21:54:50 2018
PID: 7612Python 3.6.4: C:\Users\Jamie\PycharmProjects\DevProject\venv\Scripts\python.exe
...........................................................................
C:\Users\Jamie\PycharmProjects\DevProject\venv\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self=<sklearn.externals.joblib.parallel.BatchedCalls object>)
    126     def __init__(self, iterator_slice):
    127         self.items = list(iterator_slice)
    128         self._size = len(self.items)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
        self.items = [(<function _fit_and_score>, (Pipeline(memory=None,
     steps=[('vect', Count...B(alpha=1.0, class_prior=None, fit_prior=True))]), ["From: lerxst@wam.umd.edu (where's my thing)\nSubj...ught to you by your neighborhood Lerxst ----\n\n\n\n\n", 'From: guykuo@carson.u.washington.edu (Guy Kuo)\nS...poll. Thanks.\n\nGuy Kuo <guykuo@u.washington.edu>\n', 'From: twillis@ec.ecn.purdue.edu (Thomas E Willis... enemies of truth than lies."  - F. W.\nNietzsche\n', 'From: jgreen@amber (Joe Green)\nSubject: Re: Weit...th no sense of humor."\n\t\t\t\t\t\t-- Jonathan Winters\n', "From: jcm@head-cfa.harvard.edu (Jonathan McDowel.... 213 before liftoff, ignore it'.\n\n - Jonathan\n\n\n", 'From: dfo@vttoulu.tko.vtt.fi (Foxvog Douglas)\nSu...cket...\n\n\n\n-- \ndoug foxvog\ndouglas.foxvog@vtt.fi\n', 'From: bmdelane@quads.uchicago.edu (brian manning...ber. "Hmmm... \'News?\' What\'s\nthis?"....)\n\n-Brian\n', 'From: bgrubb@dante.nmsu.edu (GRUBB)\nSubject: Re:...is \n(Digital Review, Oct 21, 1991 v8 n33 p8(1)).\n', 'From: holmes7000@iscsvax.uni.edu\nSubject: WIn 3....eciated.\n\n\nThanx,\n\n-Brando\n\nPS Please E-mail me\n\n', 'From: kerr@ux1.cso.uiuc.edu (Stan Kerr)\nSubject:...Phone: 217-333-5217  Email: stankerr@uiuc.edu   \n', 'From: irwin@cmptrc.lonestar.org (Irwin Arnstein)...------------------------------------------------\n', 'From: david@terminus.ericsson.se (David Bold)\nSu...shes cried,\nAs they swam its clearness through.\n\n', 'From: rodc@fc.hp.com (Rod Cerkoney)\nSubject: *$G...____________________/      \\           \\/    \\__\n', 'From: dbm0000@tm0006.lerc.nasa.gov (David B. Mck...ilization\n  - 1 micro-g thru out the core module\n', 'From: jllee@acsu.buffalo.edu (Johnny L Lee)\nSubj...onable.Very Reasonable.\n\n\t\t\t\t\tThanks,\n\t\t\t\t\t\tJohn\n', 'From: mathew <mathew@mantis.co.uk>\nSubject: Re: ...erally died of\nmalnutrition or disease.\n\n\nmathew\n', 'From: ab@nova.cc.purdue.edu (Allen B)\nSubject: R...soning- not\nthat anyone does, of course! :-)\n\nab\n', 'From: CPKJP@vm.cc.latech.edu (Kevin Parker)\nSubj...s to insurance\ncompanies 8^).\n \nGood luck,\nSerge\n', 'From: ritley@uimrl7.mrl.uiuc.edu ()\nSubject: SEE...er).\n\nAny pointers would be greatly appreciated!\n', 'From: abarden@tybse1.uucp (Ann Marie Barden)\nSub...!\n\nAnn Marie Barden  \tabarden@afseo.eglin.af.mil\n', ...], array([7, 4, 4, ..., 3, 1, 8]), {'score': <function _passthrough_scorer>}, array([ 3578,  3609,  3617, ..., 11311, 11312, 11313]), array([   0,    1,    2, ..., 4088, 4095, 4111]), 0, {'clf_alpha': 0.01, 'tfidf_use_idf': True, 'vect_ngram_range': (1, 1)}), {'error_score': 'raise', 'fit_params': {}, 'return_n_test_samples': True, 'return_parameters': False, 'return_times': True, 'return_train_score': 'warn'})]
    132 
    133     def __len__(self):
    134         return self._size
    135 

...........................................................................
C:\Users\Jamie\PycharmProjects\DevProject\venv\lib\site-packages\sklearn\externals\joblib\parallel.py in <listcomp>(.0=<list_iterator object>)
    126     def __init__(self, iterator_slice):
    127         self.items = list(iterator_slice)
    128         self._size = len(self.items)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
        func = <function _fit_and_score>
        args = (Pipeline(memory=None,
     steps=[('vect', Count...B(alpha=1.0, class_prior=None, fit_prior=True))]), ["From: lerxst@wam.umd.edu (where's my thing)\nSubj...ught to you by your neighborhood Lerxst ----\n\n\n\n\n", 'From: guykuo@carson.u.washington.edu (Guy Kuo)\nS...poll. Thanks.\n\nGuy Kuo <guykuo@u.washington.edu>\n', 'From: twillis@ec.ecn.purdue.edu (Thomas E Willis... enemies of truth than lies."  - F. W.\nNietzsche\n', 'From: jgreen@amber (Joe Green)\nSubject: Re: Weit...th no sense of humor."\n\t\t\t\t\t\t-- Jonathan Winters\n', "From: jcm@head-cfa.harvard.edu (Jonathan McDowel.... 213 before liftoff, ignore it'.\n\n - Jonathan\n\n\n", 'From: dfo@vttoulu.tko.vtt.fi (Foxvog Douglas)\nSu...cket...\n\n\n\n-- \ndoug foxvog\ndouglas.foxvog@vtt.fi\n', 'From: bmdelane@quads.uchicago.edu (brian manning...ber. "Hmmm... \'News?\' What\'s\nthis?"....)\n\n-Brian\n', 'From: bgrubb@dante.nmsu.edu (GRUBB)\nSubject: Re:...is \n(Digital Review, Oct 21, 1991 v8 n33 p8(1)).\n', 'From: holmes7000@iscsvax.uni.edu\nSubject: WIn 3....eciated.\n\n\nThanx,\n\n-Brando\n\nPS Please E-mail me\n\n', 'From: kerr@ux1.cso.uiuc.edu (Stan Kerr)\nSubject:...Phone: 217-333-5217  Email: stankerr@uiuc.edu   \n', 'From: irwin@cmptrc.lonestar.org (Irwin Arnstein)...------------------------------------------------\n', 'From: david@terminus.ericsson.se (David Bold)\nSu...shes cried,\nAs they swam its clearness through.\n\n', 'From: rodc@fc.hp.com (Rod Cerkoney)\nSubject: *$G...____________________/      \\           \\/    \\__\n', 'From: dbm0000@tm0006.lerc.nasa.gov (David B. Mck...ilization\n  - 1 micro-g thru out the core module\n', 'From: jllee@acsu.buffalo.edu (Johnny L Lee)\nSubj...onable.Very Reasonable.\n\n\t\t\t\t\tThanks,\n\t\t\t\t\t\tJohn\n', 'From: mathew <mathew@mantis.co.uk>\nSubject: Re: ...erally died of\nmalnutrition or disease.\n\n\nmathew\n', 'From: ab@nova.cc.purdue.edu (Allen B)\nSubject: R...soning- not\nthat anyone does, of course! :-)\n\nab\n', 'From: CPKJP@vm.cc.latech.edu (Kevin Parker)\nSubj...s to insurance\ncompanies 8^).\n \nGood luck,\nSerge\n', 'From: ritley@uimrl7.mrl.uiuc.edu ()\nSubject: SEE...er).\n\nAny pointers would be greatly appreciated!\n', 'From: abarden@tybse1.uucp (Ann Marie Barden)\nSub...!\n\nAnn Marie Barden  \tabarden@afseo.eglin.af.mil\n', ...], array([7, 4, 4, ..., 3, 1, 8]), {'score': <function _passthrough_scorer>}, array([ 3578,  3609,  3617, ..., 11311, 11312, 11313]), array([   0,    1,    2, ..., 4088, 4095, 4111]), 0, {'clf_alpha': 0.01, 'tfidf_use_idf': True, 'vect_ngram_range': (1, 1)})
        kwargs = {'error_score': 'raise', 'fit_params': {}, 'return_n_test_samples': True, 'return_parameters': False, 'return_times': True, 'return_train_score': 'warn'}
    132 
    133     def __len__(self):
    134         return self._size
    135 

...........................................................................
C:\Users\Jamie\PycharmProjects\DevProject\venv\lib\site-packages\sklearn\model_selection\_validation.py in _fit_and_score(estimator=Pipeline(memory=None,
     steps=[('vect', Count...B(alpha=1.0, class_prior=None, fit_prior=True))]), X=["From: lerxst@wam.umd.edu (where's my thing)\nSubj...ught to you by your neighborhood Lerxst ----\n\n\n\n\n", 'From: guykuo@carson.u.washington.edu (Guy Kuo)\nS...poll. Thanks.\n\nGuy Kuo <guykuo@u.washington.edu>\n', 'From: twillis@ec.ecn.purdue.edu (Thomas E Willis... enemies of truth than lies."  - F. W.\nNietzsche\n', 'From: jgreen@amber (Joe Green)\nSubject: Re: Weit...th no sense of humor."\n\t\t\t\t\t\t-- Jonathan Winters\n', "From: jcm@head-cfa.harvard.edu (Jonathan McDowel.... 213 before liftoff, ignore it'.\n\n - Jonathan\n\n\n", 'From: dfo@vttoulu.tko.vtt.fi (Foxvog Douglas)\nSu...cket...\n\n\n\n-- \ndoug foxvog\ndouglas.foxvog@vtt.fi\n', 'From: bmdelane@quads.uchicago.edu (brian manning...ber. "Hmmm... \'News?\' What\'s\nthis?"....)\n\n-Brian\n', 'From: bgrubb@dante.nmsu.edu (GRUBB)\nSubject: Re:...is \n(Digital Review, Oct 21, 1991 v8 n33 p8(1)).\n', 'From: holmes7000@iscsvax.uni.edu\nSubject: WIn 3....eciated.\n\n\nThanx,\n\n-Brando\n\nPS Please E-mail me\n\n', 'From: kerr@ux1.cso.uiuc.edu (Stan Kerr)\nSubject:...Phone: 217-333-5217  Email: stankerr@uiuc.edu   \n', 'From: irwin@cmptrc.lonestar.org (Irwin Arnstein)...------------------------------------------------\n', 'From: david@terminus.ericsson.se (David Bold)\nSu...shes cried,\nAs they swam its clearness through.\n\n', 'From: rodc@fc.hp.com (Rod Cerkoney)\nSubject: *$G...____________________/      \\           \\/    \\__\n', 'From: dbm0000@tm0006.lerc.nasa.gov (David B. Mck...ilization\n  - 1 micro-g thru out the core module\n', 'From: jllee@acsu.buffalo.edu (Johnny L Lee)\nSubj...onable.Very Reasonable.\n\n\t\t\t\t\tThanks,\n\t\t\t\t\t\tJohn\n', 'From: mathew <mathew@mantis.co.uk>\nSubject: Re: ...erally died of\nmalnutrition or disease.\n\n\nmathew\n', 'From: ab@nova.cc.purdue.edu (Allen B)\nSubject: R...soning- not\nthat anyone does, of course! :-)\n\nab\n', 'From: CPKJP@vm.cc.latech.edu (Kevin Parker)\nSubj...s to insurance\ncompanies 8^).\n \nGood luck,\nSerge\n', 'From: ritley@uimrl7.mrl.uiuc.edu ()\nSubject: SEE...er).\n\nAny pointers would be greatly appreciated!\n', 'From: abarden@tybse1.uucp (Ann Marie Barden)\nSub...!\n\nAnn Marie Barden  \tabarden@afseo.eglin.af.mil\n', ...], y=array([7, 4, 4, ..., 3, 1, 8]), scorer={'score': <function _passthrough_scorer>}, train=array([ 3578,  3609,  3617, ..., 11311, 11312, 11313]), test=array([   0,    1,    2, ..., 4088, 4095, 4111]), verbose=0, parameters={'clf_alpha': 0.01, 'tfidf_use_idf': True, 'vect_ngram_range': (1, 1)}, fit_params={}, return_train_score='warn', return_parameters=False, return_n_test_samples=True, return_times=True, error_score='raise')
    439                       for k, v in fit_params.items()])
    440 
    441     test_scores = {}
    442     train_scores = {}
    443     if parameters is not None:
--> 444         estimator.set_params(**parameters)
        estimator.set_params = <bound method Pipeline.set_params of Pipeline(me...(alpha=1.0, class_prior=None, fit_prior=True))])>
        parameters = {'clf_alpha': 0.01, 'tfidf_use_idf': True, 'vect_ngram_range': (1, 1)}
    445 
    446     start_time = time.time()
    447 
    448     X_train, y_train = _safe_split(estimator, X, y, train)

...........................................................................
C:\Users\Jamie\PycharmProjects\DevProject\venv\lib\site-packages\sklearn\pipeline.py in set_params(self=Pipeline(memory=None,
     steps=[('vect', Count...B(alpha=1.0, class_prior=None, fit_prior=True))]), **kwargs={'clf_alpha': 0.01, 'tfidf_use_idf': True, 'vect_ngram_range': (1, 1)})
    137 
    138         Returns
    139         -------
    140         self
    141         """
--> 142         self._set_params('steps', **kwargs)
        self._set_params = <bound method _BaseComposition._set_params of Pi...(alpha=1.0, class_prior=None, fit_prior=True))])>
        kwargs = {'clf_alpha': 0.01, 'tfidf_use_idf': True, 'vect_ngram_range': (1, 1)}
    143         return self
    144 
    145     def _validate_steps(self):
    146         names, estimators = zip(*self.steps)

...........................................................................
C:\Users\Jamie\PycharmProjects\DevProject\venv\lib\site-packages\sklearn\utils\metaestimators.py in _set_params(self=Pipeline(memory=None,
     steps=[('vect', Count...B(alpha=1.0, class_prior=None, fit_prior=True))]), attr='steps', **params={'clf_alpha': 0.01, 'tfidf_use_idf': True, 'vect_ngram_range': (1, 1)})
     44         names, _ = zip(*getattr(self, attr))
     45         for name in list(six.iterkeys(params)):
     46             if '__' not in name and name in names:
     47                 self._replace_estimator(attr, name, params.pop(name))
     48         # 3. Step parameters and other initilisation arguments
---> 49         super(_BaseComposition, self).set_params(**params)
        self.set_params = <bound method Pipeline.set_params of Pipeline(me...(alpha=1.0, class_prior=None, fit_prior=True))])>
        params = {'clf_alpha': 0.01, 'tfidf_use_idf': True, 'vect_ngram_range': (1, 1)}
     50         return self
     51 
     52     def _replace_estimator(self, attr, name, new_val):
     53         # assumes `name` is a valid estimator name

...........................................................................
C:\Users\Jamie\PycharmProjects\DevProject\venv\lib\site-packages\sklearn\base.py in set_params(self=Pipeline(memory=None,
     steps=[('vect', Count...B(alpha=1.0, class_prior=None, fit_prior=True))]), **params={'clf_alpha': 0.01, 'tfidf_use_idf': True, 'vect_ngram_range': (1, 1)})
    269             key, delim, sub_key = key.partition('__')
    270             if key not in valid_params:
    271                 raise ValueError('Invalid parameter %s for estimator %s. '
    272                                  'Check the list of available parameters '
    273                                  'with `estimator.get_params().keys()`.' %
--> 274                                  (key, self))
        key = 'clf_alpha'
        self = Pipeline(memory=None,
     steps=[('vect', Count...B(alpha=1.0, class_prior=None, fit_prior=True))])
    275 
    276             if delim:
    277                 nested_params[key][sub_key] = value
    278             else:

ValueError: Invalid parameter clf_alpha for estimator Pipeline(memory=None,
     steps=[('vect', CountVectorizer(analyzer='word', binary=False, decode_error='strict',
        dtype=<class 'numpy.int64'>, encoding='utf-8', input='content',
        lowercase=True, max_df=1.0, max_features=None, min_df=1,
        ngram_range=(1, 1), preprocessor=None, stop_words=None,
        strip...inear_tf=False, use_idf=True)), ('clf', MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True))]). Check the list of available parameters with `estimator.get_params().keys()`.
___________________________________________________________________________

In [11]:
gs_clf.best_score_
gs_clf.best_params_

AttributeError: 'GridSearchCV' object has no attribute 'best_score_'

In [12]:
from sklearn.model_selection import GridSearchCV
parameters_svm = {'vect_ngram_range': [(1, 1), (1, 2)],
              'tfidf_use_idf': (True, False),
              'clf-svm_alpha': (1e-2, 1e-3),
              }

gs_clf_svm = GridSearchCV(text_clf_svm, parameters_svm, n_jobs=-1)
gs_clf_svm = gs_clf_svm.fit(twenty_train.data, twenty_train.target)
gs_clf_svm.best_score_
gs_clf_svm.best_params_

JoblibValueError: JoblibValueError
___________________________________________________________________________
Multiprocessing exception:
...........................................................................
C:\Users\Jamie\AppData\Local\Programs\Python\Python36\lib\runpy.py in _run_module_as_main(mod_name='ipykernel_launcher', alter_argv=1)
    188         sys.exit(msg)
    189     main_globals = sys.modules["__main__"].__dict__
    190     if alter_argv:
    191         sys.argv[0] = mod_spec.origin
    192     return _run_code(code, main_globals, None,
--> 193                      "__main__", mod_spec)
        mod_spec = ModuleSpec(name='ipykernel_launcher', loader=<_f...venv\\lib\\site-packages\\ipykernel_launcher.py')
    194 
    195 def run_module(mod_name, init_globals=None,
    196                run_name=None, alter_sys=False):
    197     """Execute a module's code without importing it

...........................................................................
C:\Users\Jamie\AppData\Local\Programs\Python\Python36\lib\runpy.py in _run_code(code=<code object <module> at 0x0000025DC92E7810, fil...lib\site-packages\ipykernel_launcher.py", line 5>, run_globals={'__annotations__': {}, '__builtins__': <module 'builtins' (built-in)>, '__cached__': r'C:\Users\Jamie\PycharmProjects\DevProject\venv\l...ges\__pycache__\ipykernel_launcher.cpython-36.pyc', '__doc__': 'Entry point for launching an IPython kernel.\n\nTh...orts until\nafter removing the cwd from sys.path.\n', '__file__': r'C:\Users\Jamie\PycharmProjects\DevProject\venv\lib\site-packages\ipykernel_launcher.py', '__loader__': <_frozen_importlib_external.SourceFileLoader object>, '__name__': '__main__', '__package__': '', '__spec__': ModuleSpec(name='ipykernel_launcher', loader=<_f...venv\\lib\\site-packages\\ipykernel_launcher.py'), 'app': <module 'ipykernel.kernelapp' from 'C:\\Users\\J...nv\\lib\\site-packages\\ipykernel\\kernelapp.py'>, ...}, init_globals=None, mod_name='__main__', mod_spec=ModuleSpec(name='ipykernel_launcher', loader=<_f...venv\\lib\\site-packages\\ipykernel_launcher.py'), pkg_name='', script_name=None)
     80                        __cached__ = cached,
     81                        __doc__ = None,
     82                        __loader__ = loader,
     83                        __package__ = pkg_name,
     84                        __spec__ = mod_spec)
---> 85     exec(code, run_globals)
        code = <code object <module> at 0x0000025DC92E7810, fil...lib\site-packages\ipykernel_launcher.py", line 5>
        run_globals = {'__annotations__': {}, '__builtins__': <module 'builtins' (built-in)>, '__cached__': r'C:\Users\Jamie\PycharmProjects\DevProject\venv\l...ges\__pycache__\ipykernel_launcher.cpython-36.pyc', '__doc__': 'Entry point for launching an IPython kernel.\n\nTh...orts until\nafter removing the cwd from sys.path.\n', '__file__': r'C:\Users\Jamie\PycharmProjects\DevProject\venv\lib\site-packages\ipykernel_launcher.py', '__loader__': <_frozen_importlib_external.SourceFileLoader object>, '__name__': '__main__', '__package__': '', '__spec__': ModuleSpec(name='ipykernel_launcher', loader=<_f...venv\\lib\\site-packages\\ipykernel_launcher.py'), 'app': <module 'ipykernel.kernelapp' from 'C:\\Users\\J...nv\\lib\\site-packages\\ipykernel\\kernelapp.py'>, ...}
     86     return run_globals
     87 
     88 def _run_module_code(code, init_globals=None,
     89                     mod_name=None, mod_spec=None,

...........................................................................
C:\Users\Jamie\PycharmProjects\DevProject\venv\lib\site-packages\ipykernel_launcher.py in <module>()
     11     # This is added back by InteractiveShellApp.init_path()
     12     if sys.path[0] == '':
     13         del sys.path[0]
     14 
     15     from ipykernel import kernelapp as app
---> 16     app.launch_new_instance()

...........................................................................
C:\Users\Jamie\PycharmProjects\DevProject\venv\lib\site-packages\traitlets\config\application.py in launch_instance(cls=<class 'ipykernel.kernelapp.IPKernelApp'>, argv=None, **kwargs={})
    653 
    654         If a global instance already exists, this reinitializes and starts it
    655         """
    656         app = cls.instance(**kwargs)
    657         app.initialize(argv)
--> 658         app.start()
        app.start = <bound method IPKernelApp.start of <ipykernel.kernelapp.IPKernelApp object>>
    659 
    660 #-----------------------------------------------------------------------------
    661 # utility functions, for convenience
    662 #-----------------------------------------------------------------------------

...........................................................................
C:\Users\Jamie\PycharmProjects\DevProject\venv\lib\site-packages\ipykernel\kernelapp.py in start(self=<ipykernel.kernelapp.IPKernelApp object>)
    481         if self.poller is not None:
    482             self.poller.start()
    483         self.kernel.start()
    484         self.io_loop = ioloop.IOLoop.current()
    485         try:
--> 486             self.io_loop.start()
        self.io_loop.start = <bound method BaseAsyncIOLoop.start of <tornado.platform.asyncio.AsyncIOMainLoop object>>
    487         except KeyboardInterrupt:
    488             pass
    489 
    490 launch_new_instance = IPKernelApp.launch_instance

...........................................................................
C:\Users\Jamie\PycharmProjects\DevProject\venv\lib\site-packages\tornado\platform\asyncio.py in start(self=<tornado.platform.asyncio.AsyncIOMainLoop object>)
    122         except (RuntimeError, AssertionError):
    123             old_loop = None
    124         try:
    125             self._setup_logging()
    126             asyncio.set_event_loop(self.asyncio_loop)
--> 127             self.asyncio_loop.run_forever()
        self.asyncio_loop.run_forever = <bound method BaseEventLoop.run_forever of <_Win...EventLoop running=True closed=False debug=False>>
    128         finally:
    129             asyncio.set_event_loop(old_loop)
    130 
    131     def stop(self):

...........................................................................
C:\Users\Jamie\AppData\Local\Programs\Python\Python36\lib\asyncio\base_events.py in run_forever(self=<_WindowsSelectorEventLoop running=True closed=False debug=False>)
    416             sys.set_asyncgen_hooks(firstiter=self._asyncgen_firstiter_hook,
    417                                    finalizer=self._asyncgen_finalizer_hook)
    418         try:
    419             events._set_running_loop(self)
    420             while True:
--> 421                 self._run_once()
        self._run_once = <bound method BaseEventLoop._run_once of <_Windo...EventLoop running=True closed=False debug=False>>
    422                 if self._stopping:
    423                     break
    424         finally:
    425             self._stopping = False

...........................................................................
C:\Users\Jamie\AppData\Local\Programs\Python\Python36\lib\asyncio\base_events.py in _run_once(self=<_WindowsSelectorEventLoop running=True closed=False debug=False>)
   1426                         logger.warning('Executing %s took %.3f seconds',
   1427                                        _format_handle(handle), dt)
   1428                 finally:
   1429                     self._current_handle = None
   1430             else:
-> 1431                 handle._run()
        handle._run = <bound method Handle._run of <Handle BaseAsyncIOLoop._handle_events(832, 1)>>
   1432         handle = None  # Needed to break cycles when an exception occurs.
   1433 
   1434     def _set_coroutine_wrapper(self, enabled):
   1435         try:

...........................................................................
C:\Users\Jamie\AppData\Local\Programs\Python\Python36\lib\asyncio\events.py in _run(self=<Handle BaseAsyncIOLoop._handle_events(832, 1)>)
    140             self._callback = None
    141             self._args = None
    142 
    143     def _run(self):
    144         try:
--> 145             self._callback(*self._args)
        self._callback = <bound method BaseAsyncIOLoop._handle_events of <tornado.platform.asyncio.AsyncIOMainLoop object>>
        self._args = (832, 1)
    146         except Exception as exc:
    147             cb = _format_callback_source(self._callback, self._args)
    148             msg = 'Exception in callback {}'.format(cb)
    149             context = {

...........................................................................
C:\Users\Jamie\PycharmProjects\DevProject\venv\lib\site-packages\tornado\platform\asyncio.py in _handle_events(self=<tornado.platform.asyncio.AsyncIOMainLoop object>, fd=832, events=1)
    112             self.writers.remove(fd)
    113         del self.handlers[fd]
    114 
    115     def _handle_events(self, fd, events):
    116         fileobj, handler_func = self.handlers[fd]
--> 117         handler_func(fileobj, events)
        handler_func = <function wrap.<locals>.null_wrapper>
        fileobj = <zmq.sugar.socket.Socket object>
        events = 1
    118 
    119     def start(self):
    120         try:
    121             old_loop = asyncio.get_event_loop()

...........................................................................
C:\Users\Jamie\PycharmProjects\DevProject\venv\lib\site-packages\tornado\stack_context.py in null_wrapper(*args=(<zmq.sugar.socket.Socket object>, 1), **kwargs={})
    271         # Fast path when there are no active contexts.
    272         def null_wrapper(*args, **kwargs):
    273             try:
    274                 current_state = _state.contexts
    275                 _state.contexts = cap_contexts[0]
--> 276                 return fn(*args, **kwargs)
        args = (<zmq.sugar.socket.Socket object>, 1)
        kwargs = {}
    277             finally:
    278                 _state.contexts = current_state
    279         null_wrapper._wrapped = True
    280         return null_wrapper

...........................................................................
C:\Users\Jamie\PycharmProjects\DevProject\venv\lib\site-packages\zmq\eventloop\zmqstream.py in _handle_events(self=<zmq.eventloop.zmqstream.ZMQStream object>, fd=<zmq.sugar.socket.Socket object>, events=1)
    445             return
    446         zmq_events = self.socket.EVENTS
    447         try:
    448             # dispatch events:
    449             if zmq_events & zmq.POLLIN and self.receiving():
--> 450                 self._handle_recv()
        self._handle_recv = <bound method ZMQStream._handle_recv of <zmq.eventloop.zmqstream.ZMQStream object>>
    451                 if not self.socket:
    452                     return
    453             if zmq_events & zmq.POLLOUT and self.sending():
    454                 self._handle_send()

...........................................................................
C:\Users\Jamie\PycharmProjects\DevProject\venv\lib\site-packages\zmq\eventloop\zmqstream.py in _handle_recv(self=<zmq.eventloop.zmqstream.ZMQStream object>)
    475             else:
    476                 raise
    477         else:
    478             if self._recv_callback:
    479                 callback = self._recv_callback
--> 480                 self._run_callback(callback, msg)
        self._run_callback = <bound method ZMQStream._run_callback of <zmq.eventloop.zmqstream.ZMQStream object>>
        callback = <function wrap.<locals>.null_wrapper>
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    481         
    482 
    483     def _handle_send(self):
    484         """Handle a send event."""

...........................................................................
C:\Users\Jamie\PycharmProjects\DevProject\venv\lib\site-packages\zmq\eventloop\zmqstream.py in _run_callback(self=<zmq.eventloop.zmqstream.ZMQStream object>, callback=<function wrap.<locals>.null_wrapper>, *args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    427         close our socket."""
    428         try:
    429             # Use a NullContext to ensure that all StackContexts are run
    430             # inside our blanket exception handler rather than outside.
    431             with stack_context.NullContext():
--> 432                 callback(*args, **kwargs)
        callback = <function wrap.<locals>.null_wrapper>
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    433         except:
    434             gen_log.error("Uncaught exception in ZMQStream callback",
    435                           exc_info=True)
    436             # Re-raise the exception so that IOLoop.handle_callback_exception

...........................................................................
C:\Users\Jamie\PycharmProjects\DevProject\venv\lib\site-packages\tornado\stack_context.py in null_wrapper(*args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    271         # Fast path when there are no active contexts.
    272         def null_wrapper(*args, **kwargs):
    273             try:
    274                 current_state = _state.contexts
    275                 _state.contexts = cap_contexts[0]
--> 276                 return fn(*args, **kwargs)
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    277             finally:
    278                 _state.contexts = current_state
    279         null_wrapper._wrapped = True
    280         return null_wrapper

...........................................................................
C:\Users\Jamie\PycharmProjects\DevProject\venv\lib\site-packages\ipykernel\kernelbase.py in dispatcher(msg=[<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>])
    278         if self.control_stream:
    279             self.control_stream.on_recv(self.dispatch_control, copy=False)
    280 
    281         def make_dispatcher(stream):
    282             def dispatcher(msg):
--> 283                 return self.dispatch_shell(stream, msg)
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    284             return dispatcher
    285 
    286         for s in self.shell_streams:
    287             s.on_recv(make_dispatcher(s), copy=False)

...........................................................................
C:\Users\Jamie\PycharmProjects\DevProject\venv\lib\site-packages\ipykernel\kernelbase.py in dispatch_shell(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, msg={'buffers': [], 'content': {'allow_stdin': False, 'code': 'from sklearn.model_selection import GridSearchCV...t)\ngs_clf_svm.best_score_\ngs_clf_svm.best_params_', 'output_type': '', 'silent': False, 'user_expressions': {}}, 'header': {'date': datetime.datetime(2018, 7, 1, 20, 54, 55, 745053, tzinfo=tzutc()), 'msg_id': '2b96a7c3-6970-438d-80b5-d30bbd324f94', 'msg_type': 'execute_request', 'session': 'd192dbcf-5f26-4238-9f59-d04b9cf0f5b0', 'username': 'username', 'version': '5.0'}, 'metadata': {}, 'msg_id': '2b96a7c3-6970-438d-80b5-d30bbd324f94', 'msg_type': 'execute_request', 'parent_header': {}})
    228             self.log.warn("Unknown message type: %r", msg_type)
    229         else:
    230             self.log.debug("%s: %s", msg_type, msg)
    231             self.pre_handler_hook()
    232             try:
--> 233                 handler(stream, idents, msg)
        handler = <bound method Kernel.execute_request of <ipykernel.ipkernel.IPythonKernel object>>
        stream = <zmq.eventloop.zmqstream.ZMQStream object>
        idents = [b'c2dceb38-806e008885734c5005c90417']
        msg = {'buffers': [], 'content': {'allow_stdin': False, 'code': 'from sklearn.model_selection import GridSearchCV...t)\ngs_clf_svm.best_score_\ngs_clf_svm.best_params_', 'output_type': '', 'silent': False, 'user_expressions': {}}, 'header': {'date': datetime.datetime(2018, 7, 1, 20, 54, 55, 745053, tzinfo=tzutc()), 'msg_id': '2b96a7c3-6970-438d-80b5-d30bbd324f94', 'msg_type': 'execute_request', 'session': 'd192dbcf-5f26-4238-9f59-d04b9cf0f5b0', 'username': 'username', 'version': '5.0'}, 'metadata': {}, 'msg_id': '2b96a7c3-6970-438d-80b5-d30bbd324f94', 'msg_type': 'execute_request', 'parent_header': {}}
    234             except Exception:
    235                 self.log.error("Exception in message handler:", exc_info=True)
    236             finally:
    237                 self.post_handler_hook()

...........................................................................
C:\Users\Jamie\PycharmProjects\DevProject\venv\lib\site-packages\ipykernel\kernelbase.py in execute_request(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, ident=[b'c2dceb38-806e008885734c5005c90417'], parent={'buffers': [], 'content': {'allow_stdin': False, 'code': 'from sklearn.model_selection import GridSearchCV...t)\ngs_clf_svm.best_score_\ngs_clf_svm.best_params_', 'output_type': '', 'silent': False, 'user_expressions': {}}, 'header': {'date': datetime.datetime(2018, 7, 1, 20, 54, 55, 745053, tzinfo=tzutc()), 'msg_id': '2b96a7c3-6970-438d-80b5-d30bbd324f94', 'msg_type': 'execute_request', 'session': 'd192dbcf-5f26-4238-9f59-d04b9cf0f5b0', 'username': 'username', 'version': '5.0'}, 'metadata': {}, 'msg_id': '2b96a7c3-6970-438d-80b5-d30bbd324f94', 'msg_type': 'execute_request', 'parent_header': {}})
    394         if not silent:
    395             self.execution_count += 1
    396             self._publish_execute_input(code, parent, self.execution_count)
    397 
    398         reply_content = self.do_execute(code, silent, store_history,
--> 399                                         user_expressions, allow_stdin)
        user_expressions = {}
        allow_stdin = False
    400 
    401         # Flush output before sending the reply.
    402         sys.stdout.flush()
    403         sys.stderr.flush()

...........................................................................
C:\Users\Jamie\PycharmProjects\DevProject\venv\lib\site-packages\ipykernel\ipkernel.py in do_execute(self=<ipykernel.ipkernel.IPythonKernel object>, code='from sklearn.model_selection import GridSearchCV...t)\ngs_clf_svm.best_score_\ngs_clf_svm.best_params_', silent=False, store_history=True, user_expressions={}, allow_stdin=False)
    203 
    204         self._forward_input(allow_stdin)
    205 
    206         reply_content = {}
    207         try:
--> 208             res = shell.run_cell(code, store_history=store_history, silent=silent)
        res = undefined
        shell.run_cell = <bound method ZMQInteractiveShell.run_cell of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = 'from sklearn.model_selection import GridSearchCV...t)\ngs_clf_svm.best_score_\ngs_clf_svm.best_params_'
        store_history = True
        silent = False
    209         finally:
    210             self._restore_input()
    211 
    212         if res.error_before_exec is not None:

...........................................................................
C:\Users\Jamie\PycharmProjects\DevProject\venv\lib\site-packages\ipykernel\zmqshell.py in run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, *args=('from sklearn.model_selection import GridSearchCV...t)\ngs_clf_svm.best_score_\ngs_clf_svm.best_params_',), **kwargs={'silent': False, 'store_history': True})
    532             )
    533         self.payload_manager.write_payload(payload)
    534 
    535     def run_cell(self, *args, **kwargs):
    536         self._last_traceback = None
--> 537         return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
        self.run_cell = <bound method ZMQInteractiveShell.run_cell of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        args = ('from sklearn.model_selection import GridSearchCV...t)\ngs_clf_svm.best_score_\ngs_clf_svm.best_params_',)
        kwargs = {'silent': False, 'store_history': True}
    538 
    539     def _showtraceback(self, etype, evalue, stb):
    540         # try to preserve ordering of tracebacks and print statements
    541         sys.stdout.flush()

...........................................................................
C:\Users\Jamie\PycharmProjects\DevProject\venv\lib\site-packages\IPython\core\interactiveshell.py in run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, raw_cell='from sklearn.model_selection import GridSearchCV...t)\ngs_clf_svm.best_score_\ngs_clf_svm.best_params_', store_history=True, silent=False, shell_futures=True)
   2657         -------
   2658         result : :class:`ExecutionResult`
   2659         """
   2660         try:
   2661             result = self._run_cell(
-> 2662                 raw_cell, store_history, silent, shell_futures)
        raw_cell = 'from sklearn.model_selection import GridSearchCV...t)\ngs_clf_svm.best_score_\ngs_clf_svm.best_params_'
        store_history = True
        silent = False
        shell_futures = True
   2663         finally:
   2664             self.events.trigger('post_execute')
   2665             if not silent:
   2666                 self.events.trigger('post_run_cell', result)

...........................................................................
C:\Users\Jamie\PycharmProjects\DevProject\venv\lib\site-packages\IPython\core\interactiveshell.py in _run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, raw_cell='from sklearn.model_selection import GridSearchCV...t)\ngs_clf_svm.best_score_\ngs_clf_svm.best_params_', store_history=True, silent=False, shell_futures=True)
   2780                 self.displayhook.exec_result = result
   2781 
   2782                 # Execute the user code
   2783                 interactivity = 'none' if silent else self.ast_node_interactivity
   2784                 has_raised = self.run_ast_nodes(code_ast.body, cell_name,
-> 2785                    interactivity=interactivity, compiler=compiler, result=result)
        interactivity = 'last_expr'
        compiler = <IPython.core.compilerop.CachingCompiler object>
   2786                 
   2787                 self.last_execution_succeeded = not has_raised
   2788                 self.last_execution_result = result
   2789 

...........................................................................
C:\Users\Jamie\PycharmProjects\DevProject\venv\lib\site-packages\IPython\core\interactiveshell.py in run_ast_nodes(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, nodelist=[<_ast.ImportFrom object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Expr object>, <_ast.Expr object>], cell_name='<ipython-input-12-a8bffab43345>', interactivity='last', compiler=<IPython.core.compilerop.CachingCompiler object>, result=<ExecutionResult object at 25dd43f31d0, executio...rue silent=False shell_futures=True> result=None>)
   2898 
   2899         try:
   2900             for i, node in enumerate(to_run_exec):
   2901                 mod = ast.Module([node])
   2902                 code = compiler(mod, cell_name, "exec")
-> 2903                 if self.run_code(code, result):
        self.run_code = <bound method InteractiveShell.run_code of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = <code object <module> at 0x0000025DD42C06F0, file "<ipython-input-12-a8bffab43345>", line 8>
        result = <ExecutionResult object at 25dd43f31d0, executio...rue silent=False shell_futures=True> result=None>
   2904                     return True
   2905 
   2906             for i, node in enumerate(to_run_interactive):
   2907                 mod = ast.Interactive([node])

...........................................................................
C:\Users\Jamie\PycharmProjects\DevProject\venv\lib\site-packages\IPython\core\interactiveshell.py in run_code(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, code_obj=<code object <module> at 0x0000025DD42C06F0, file "<ipython-input-12-a8bffab43345>", line 8>, result=<ExecutionResult object at 25dd43f31d0, executio...rue silent=False shell_futures=True> result=None>)
   2958         outflag = True  # happens in more places, so it's easier as default
   2959         try:
   2960             try:
   2961                 self.hooks.pre_run_code_hook()
   2962                 #rprint('Running code', repr(code_obj)) # dbg
-> 2963                 exec(code_obj, self.user_global_ns, self.user_ns)
        code_obj = <code object <module> at 0x0000025DD42C06F0, file "<ipython-input-12-a8bffab43345>", line 8>
        self.user_global_ns = {'CountVectorizer': <class 'sklearn.feature_extraction.text.CountVectorizer'>, 'GridSearchCV': <class 'sklearn.model_selection._search.GridSearchCV'>, 'In': ['', 'import nltk\n\nfrom nltk.stem.snowball import Snow...n((predicted_mnb_stemmed == twenty_train.target))', "from sklearn.datasets import fetch_20newsgroups\n... fetch_20newsgroups(subset='train', shuffle=True)", 'from sklearn.feature_extraction.text import Coun...transform(twenty_train.data)\nX_train_counts.shape', 'from sklearn.feature_extraction.text import Tfid...fit_transform(X_train_counts)\nX_train_tfdif.shape', 'from sklearn.naive_bayes import MultinomialNB\ncl...omialNB().fit(X_train_tfdif, twenty_train.target)', 'from sklearn.pipeline import Pipeline\ntext_clf =...t_clf.fit(twenty_train.data, twenty_train.target)', 'import numpy as np\ntwenty_test = fetch_20newsgro...st.data)\nnp.mean(predicted == twenty_test.target)', 'from sklearn.linear_model import SGDClassifier\n\n...ata)\nnp.mean(predicted_svm == twenty_test.target)', "from sklearn.model_selection import GridSearchCV...       'clf_alpha': (1e-2, 1e-3),\n              }", 'gs_clf = GridSearchCV(text_clf, parameters, n_jo...s_clf.fit(twenty_train.data, twenty_train.target)', 'gs_clf.best_score_\ngs_clf.best_params_', 'from sklearn.model_selection import GridSearchCV...t)\ngs_clf_svm.best_score_\ngs_clf_svm.best_params_'], 'MultinomialNB': <class 'sklearn.naive_bayes.MultinomialNB'>, 'Out': {3: (11314, 130107), 4: (11314, 130107), 7: 0.7738980350504514, 8: 0.8238183749336165}, 'Pipeline': <class 'sklearn.pipeline.Pipeline'>, 'SGDClassifier': <class 'sklearn.linear_model.stochastic_gradient.SGDClassifier'>, 'SnowballStemmer': <class 'nltk.stem.snowball.SnowballStemmer'>, 'TfidfTransformer': <class 'sklearn.feature_extraction.text.TfidfTransformer'>, 'X_train_counts': <11314x130107 sparse matrix of type '<class 'num... stored elements in Compressed Sparse Row format>, ...}
        self.user_ns = {'CountVectorizer': <class 'sklearn.feature_extraction.text.CountVectorizer'>, 'GridSearchCV': <class 'sklearn.model_selection._search.GridSearchCV'>, 'In': ['', 'import nltk\n\nfrom nltk.stem.snowball import Snow...n((predicted_mnb_stemmed == twenty_train.target))', "from sklearn.datasets import fetch_20newsgroups\n... fetch_20newsgroups(subset='train', shuffle=True)", 'from sklearn.feature_extraction.text import Coun...transform(twenty_train.data)\nX_train_counts.shape', 'from sklearn.feature_extraction.text import Tfid...fit_transform(X_train_counts)\nX_train_tfdif.shape', 'from sklearn.naive_bayes import MultinomialNB\ncl...omialNB().fit(X_train_tfdif, twenty_train.target)', 'from sklearn.pipeline import Pipeline\ntext_clf =...t_clf.fit(twenty_train.data, twenty_train.target)', 'import numpy as np\ntwenty_test = fetch_20newsgro...st.data)\nnp.mean(predicted == twenty_test.target)', 'from sklearn.linear_model import SGDClassifier\n\n...ata)\nnp.mean(predicted_svm == twenty_test.target)', "from sklearn.model_selection import GridSearchCV...       'clf_alpha': (1e-2, 1e-3),\n              }", 'gs_clf = GridSearchCV(text_clf, parameters, n_jo...s_clf.fit(twenty_train.data, twenty_train.target)', 'gs_clf.best_score_\ngs_clf.best_params_', 'from sklearn.model_selection import GridSearchCV...t)\ngs_clf_svm.best_score_\ngs_clf_svm.best_params_'], 'MultinomialNB': <class 'sklearn.naive_bayes.MultinomialNB'>, 'Out': {3: (11314, 130107), 4: (11314, 130107), 7: 0.7738980350504514, 8: 0.8238183749336165}, 'Pipeline': <class 'sklearn.pipeline.Pipeline'>, 'SGDClassifier': <class 'sklearn.linear_model.stochastic_gradient.SGDClassifier'>, 'SnowballStemmer': <class 'nltk.stem.snowball.SnowballStemmer'>, 'TfidfTransformer': <class 'sklearn.feature_extraction.text.TfidfTransformer'>, 'X_train_counts': <11314x130107 sparse matrix of type '<class 'num... stored elements in Compressed Sparse Row format>, ...}
   2964             finally:
   2965                 # Reset our crash handler in place
   2966                 sys.excepthook = old_excepthook
   2967         except SystemExit as e:

...........................................................................
C:\Users\Jamie\PycharmProjects\DevProject\<ipython-input-12-a8bffab43345> in <module>()
      3               'tfidf_use_idf': (True, False),
      4               'clf-svm_alpha': (1e-2, 1e-3),
      5               }
      6 
      7 gs_clf_svm = GridSearchCV(text_clf_svm, parameters_svm, n_jobs=-1)
----> 8 gs_clf_svm = gs_clf_svm.fit(twenty_train.data, twenty_train.target)
      9 gs_clf_svm.best_score_
     10 gs_clf_svm.best_params_

...........................................................................
C:\Users\Jamie\PycharmProjects\DevProject\venv\lib\site-packages\sklearn\model_selection\_search.py in fit(self=GridSearchCV(cv=None, error_score='raise',
     ...ain_score='warn',
       scoring=None, verbose=0), X=["From: lerxst@wam.umd.edu (where's my thing)\nSubj...ught to you by your neighborhood Lerxst ----\n\n\n\n\n", 'From: guykuo@carson.u.washington.edu (Guy Kuo)\nS...poll. Thanks.\n\nGuy Kuo <guykuo@u.washington.edu>\n', 'From: twillis@ec.ecn.purdue.edu (Thomas E Willis... enemies of truth than lies."  - F. W.\nNietzsche\n', 'From: jgreen@amber (Joe Green)\nSubject: Re: Weit...th no sense of humor."\n\t\t\t\t\t\t-- Jonathan Winters\n', "From: jcm@head-cfa.harvard.edu (Jonathan McDowel.... 213 before liftoff, ignore it'.\n\n - Jonathan\n\n\n", 'From: dfo@vttoulu.tko.vtt.fi (Foxvog Douglas)\nSu...cket...\n\n\n\n-- \ndoug foxvog\ndouglas.foxvog@vtt.fi\n', 'From: bmdelane@quads.uchicago.edu (brian manning...ber. "Hmmm... \'News?\' What\'s\nthis?"....)\n\n-Brian\n', 'From: bgrubb@dante.nmsu.edu (GRUBB)\nSubject: Re:...is \n(Digital Review, Oct 21, 1991 v8 n33 p8(1)).\n', 'From: holmes7000@iscsvax.uni.edu\nSubject: WIn 3....eciated.\n\n\nThanx,\n\n-Brando\n\nPS Please E-mail me\n\n', 'From: kerr@ux1.cso.uiuc.edu (Stan Kerr)\nSubject:...Phone: 217-333-5217  Email: stankerr@uiuc.edu   \n', 'From: irwin@cmptrc.lonestar.org (Irwin Arnstein)...------------------------------------------------\n', 'From: david@terminus.ericsson.se (David Bold)\nSu...shes cried,\nAs they swam its clearness through.\n\n', 'From: rodc@fc.hp.com (Rod Cerkoney)\nSubject: *$G...____________________/      \\           \\/    \\__\n', 'From: dbm0000@tm0006.lerc.nasa.gov (David B. Mck...ilization\n  - 1 micro-g thru out the core module\n', 'From: jllee@acsu.buffalo.edu (Johnny L Lee)\nSubj...onable.Very Reasonable.\n\n\t\t\t\t\tThanks,\n\t\t\t\t\t\tJohn\n', 'From: mathew <mathew@mantis.co.uk>\nSubject: Re: ...erally died of\nmalnutrition or disease.\n\n\nmathew\n', 'From: ab@nova.cc.purdue.edu (Allen B)\nSubject: R...soning- not\nthat anyone does, of course! :-)\n\nab\n', 'From: CPKJP@vm.cc.latech.edu (Kevin Parker)\nSubj...s to insurance\ncompanies 8^).\n \nGood luck,\nSerge\n', 'From: ritley@uimrl7.mrl.uiuc.edu ()\nSubject: SEE...er).\n\nAny pointers would be greatly appreciated!\n', 'From: abarden@tybse1.uucp (Ann Marie Barden)\nSub...!\n\nAnn Marie Barden  \tabarden@afseo.eglin.af.mil\n', ...], y=array([7, 4, 4, ..., 3, 1, 8]), groups=None, **fit_params={})
    634                                   return_train_score=self.return_train_score,
    635                                   return_n_test_samples=True,
    636                                   return_times=True, return_parameters=False,
    637                                   error_score=self.error_score)
    638           for parameters, (train, test) in product(candidate_params,
--> 639                                                    cv.split(X, y, groups)))
        cv.split = <bound method StratifiedKFold.split of Stratifie...ld(n_splits=3, random_state=None, shuffle=False)>
        X = ["From: lerxst@wam.umd.edu (where's my thing)\nSubj...ught to you by your neighborhood Lerxst ----\n\n\n\n\n", 'From: guykuo@carson.u.washington.edu (Guy Kuo)\nS...poll. Thanks.\n\nGuy Kuo <guykuo@u.washington.edu>\n', 'From: twillis@ec.ecn.purdue.edu (Thomas E Willis... enemies of truth than lies."  - F. W.\nNietzsche\n', 'From: jgreen@amber (Joe Green)\nSubject: Re: Weit...th no sense of humor."\n\t\t\t\t\t\t-- Jonathan Winters\n', "From: jcm@head-cfa.harvard.edu (Jonathan McDowel.... 213 before liftoff, ignore it'.\n\n - Jonathan\n\n\n", 'From: dfo@vttoulu.tko.vtt.fi (Foxvog Douglas)\nSu...cket...\n\n\n\n-- \ndoug foxvog\ndouglas.foxvog@vtt.fi\n', 'From: bmdelane@quads.uchicago.edu (brian manning...ber. "Hmmm... \'News?\' What\'s\nthis?"....)\n\n-Brian\n', 'From: bgrubb@dante.nmsu.edu (GRUBB)\nSubject: Re:...is \n(Digital Review, Oct 21, 1991 v8 n33 p8(1)).\n', 'From: holmes7000@iscsvax.uni.edu\nSubject: WIn 3....eciated.\n\n\nThanx,\n\n-Brando\n\nPS Please E-mail me\n\n', 'From: kerr@ux1.cso.uiuc.edu (Stan Kerr)\nSubject:...Phone: 217-333-5217  Email: stankerr@uiuc.edu   \n', 'From: irwin@cmptrc.lonestar.org (Irwin Arnstein)...------------------------------------------------\n', 'From: david@terminus.ericsson.se (David Bold)\nSu...shes cried,\nAs they swam its clearness through.\n\n', 'From: rodc@fc.hp.com (Rod Cerkoney)\nSubject: *$G...____________________/      \\           \\/    \\__\n', 'From: dbm0000@tm0006.lerc.nasa.gov (David B. Mck...ilization\n  - 1 micro-g thru out the core module\n', 'From: jllee@acsu.buffalo.edu (Johnny L Lee)\nSubj...onable.Very Reasonable.\n\n\t\t\t\t\tThanks,\n\t\t\t\t\t\tJohn\n', 'From: mathew <mathew@mantis.co.uk>\nSubject: Re: ...erally died of\nmalnutrition or disease.\n\n\nmathew\n', 'From: ab@nova.cc.purdue.edu (Allen B)\nSubject: R...soning- not\nthat anyone does, of course! :-)\n\nab\n', 'From: CPKJP@vm.cc.latech.edu (Kevin Parker)\nSubj...s to insurance\ncompanies 8^).\n \nGood luck,\nSerge\n', 'From: ritley@uimrl7.mrl.uiuc.edu ()\nSubject: SEE...er).\n\nAny pointers would be greatly appreciated!\n', 'From: abarden@tybse1.uucp (Ann Marie Barden)\nSub...!\n\nAnn Marie Barden  \tabarden@afseo.eglin.af.mil\n', ...]
        y = array([7, 4, 4, ..., 3, 1, 8])
        groups = None
    640 
    641         # if one choose to see train score, "out" will contain train score info
    642         if self.return_train_score:
    643             (train_score_dicts, test_score_dicts, test_sample_counts, fit_time,

...........................................................................
C:\Users\Jamie\PycharmProjects\DevProject\venv\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self=Parallel(n_jobs=-1), iterable=<generator object BaseSearchCV.fit.<locals>.<genexpr>>)
    784             if pre_dispatch == "all" or n_jobs == 1:
    785                 # The iterable was consumed all at once by the above for loop.
    786                 # No need to wait for async callbacks to trigger to
    787                 # consumption.
    788                 self._iterating = False
--> 789             self.retrieve()
        self.retrieve = <bound method Parallel.retrieve of Parallel(n_jobs=-1)>
    790             # Make sure that we get a last message telling us we are done
    791             elapsed_time = time.time() - self._start_time
    792             self._print('Done %3i out of %3i | elapsed: %s finished',
    793                         (len(self._output), len(self._output),

---------------------------------------------------------------------------
Sub-process traceback:
---------------------------------------------------------------------------
ValueError                                         Sun Jul  1 21:54:58 2018
PID: 15748Python 3.6.4: C:\Users\Jamie\PycharmProjects\DevProject\venv\Scripts\python.exe
...........................................................................
C:\Users\Jamie\PycharmProjects\DevProject\venv\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self=<sklearn.externals.joblib.parallel.BatchedCalls object>)
    126     def __init__(self, iterator_slice):
    127         self.items = list(iterator_slice)
    128         self._size = len(self.items)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
        self.items = [(<function _fit_and_score>, (Pipeline(memory=None,
     steps=[('vect', Count...
       tol=None, verbose=0, warm_start=False))]), ["From: lerxst@wam.umd.edu (where's my thing)\nSubj...ught to you by your neighborhood Lerxst ----\n\n\n\n\n", 'From: guykuo@carson.u.washington.edu (Guy Kuo)\nS...poll. Thanks.\n\nGuy Kuo <guykuo@u.washington.edu>\n', 'From: twillis@ec.ecn.purdue.edu (Thomas E Willis... enemies of truth than lies."  - F. W.\nNietzsche\n', 'From: jgreen@amber (Joe Green)\nSubject: Re: Weit...th no sense of humor."\n\t\t\t\t\t\t-- Jonathan Winters\n', "From: jcm@head-cfa.harvard.edu (Jonathan McDowel.... 213 before liftoff, ignore it'.\n\n - Jonathan\n\n\n", 'From: dfo@vttoulu.tko.vtt.fi (Foxvog Douglas)\nSu...cket...\n\n\n\n-- \ndoug foxvog\ndouglas.foxvog@vtt.fi\n', 'From: bmdelane@quads.uchicago.edu (brian manning...ber. "Hmmm... \'News?\' What\'s\nthis?"....)\n\n-Brian\n', 'From: bgrubb@dante.nmsu.edu (GRUBB)\nSubject: Re:...is \n(Digital Review, Oct 21, 1991 v8 n33 p8(1)).\n', 'From: holmes7000@iscsvax.uni.edu\nSubject: WIn 3....eciated.\n\n\nThanx,\n\n-Brando\n\nPS Please E-mail me\n\n', 'From: kerr@ux1.cso.uiuc.edu (Stan Kerr)\nSubject:...Phone: 217-333-5217  Email: stankerr@uiuc.edu   \n', 'From: irwin@cmptrc.lonestar.org (Irwin Arnstein)...------------------------------------------------\n', 'From: david@terminus.ericsson.se (David Bold)\nSu...shes cried,\nAs they swam its clearness through.\n\n', 'From: rodc@fc.hp.com (Rod Cerkoney)\nSubject: *$G...____________________/      \\           \\/    \\__\n', 'From: dbm0000@tm0006.lerc.nasa.gov (David B. Mck...ilization\n  - 1 micro-g thru out the core module\n', 'From: jllee@acsu.buffalo.edu (Johnny L Lee)\nSubj...onable.Very Reasonable.\n\n\t\t\t\t\tThanks,\n\t\t\t\t\t\tJohn\n', 'From: mathew <mathew@mantis.co.uk>\nSubject: Re: ...erally died of\nmalnutrition or disease.\n\n\nmathew\n', 'From: ab@nova.cc.purdue.edu (Allen B)\nSubject: R...soning- not\nthat anyone does, of course! :-)\n\nab\n', 'From: CPKJP@vm.cc.latech.edu (Kevin Parker)\nSubj...s to insurance\ncompanies 8^).\n \nGood luck,\nSerge\n', 'From: ritley@uimrl7.mrl.uiuc.edu ()\nSubject: SEE...er).\n\nAny pointers would be greatly appreciated!\n', 'From: abarden@tybse1.uucp (Ann Marie Barden)\nSub...!\n\nAnn Marie Barden  \tabarden@afseo.eglin.af.mil\n', ...], array([7, 4, 4, ..., 3, 1, 8]), {'score': <function _passthrough_scorer>}, array([ 3578,  3609,  3617, ..., 11311, 11312, 11313]), array([   0,    1,    2, ..., 4088, 4095, 4111]), 0, {'clf-svm_alpha': 0.01, 'tfidf_use_idf': True, 'vect_ngram_range': (1, 1)}), {'error_score': 'raise', 'fit_params': {}, 'return_n_test_samples': True, 'return_parameters': False, 'return_times': True, 'return_train_score': 'warn'})]
    132 
    133     def __len__(self):
    134         return self._size
    135 

...........................................................................
C:\Users\Jamie\PycharmProjects\DevProject\venv\lib\site-packages\sklearn\externals\joblib\parallel.py in <listcomp>(.0=<list_iterator object>)
    126     def __init__(self, iterator_slice):
    127         self.items = list(iterator_slice)
    128         self._size = len(self.items)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
        func = <function _fit_and_score>
        args = (Pipeline(memory=None,
     steps=[('vect', Count...
       tol=None, verbose=0, warm_start=False))]), ["From: lerxst@wam.umd.edu (where's my thing)\nSubj...ught to you by your neighborhood Lerxst ----\n\n\n\n\n", 'From: guykuo@carson.u.washington.edu (Guy Kuo)\nS...poll. Thanks.\n\nGuy Kuo <guykuo@u.washington.edu>\n', 'From: twillis@ec.ecn.purdue.edu (Thomas E Willis... enemies of truth than lies."  - F. W.\nNietzsche\n', 'From: jgreen@amber (Joe Green)\nSubject: Re: Weit...th no sense of humor."\n\t\t\t\t\t\t-- Jonathan Winters\n', "From: jcm@head-cfa.harvard.edu (Jonathan McDowel.... 213 before liftoff, ignore it'.\n\n - Jonathan\n\n\n", 'From: dfo@vttoulu.tko.vtt.fi (Foxvog Douglas)\nSu...cket...\n\n\n\n-- \ndoug foxvog\ndouglas.foxvog@vtt.fi\n', 'From: bmdelane@quads.uchicago.edu (brian manning...ber. "Hmmm... \'News?\' What\'s\nthis?"....)\n\n-Brian\n', 'From: bgrubb@dante.nmsu.edu (GRUBB)\nSubject: Re:...is \n(Digital Review, Oct 21, 1991 v8 n33 p8(1)).\n', 'From: holmes7000@iscsvax.uni.edu\nSubject: WIn 3....eciated.\n\n\nThanx,\n\n-Brando\n\nPS Please E-mail me\n\n', 'From: kerr@ux1.cso.uiuc.edu (Stan Kerr)\nSubject:...Phone: 217-333-5217  Email: stankerr@uiuc.edu   \n', 'From: irwin@cmptrc.lonestar.org (Irwin Arnstein)...------------------------------------------------\n', 'From: david@terminus.ericsson.se (David Bold)\nSu...shes cried,\nAs they swam its clearness through.\n\n', 'From: rodc@fc.hp.com (Rod Cerkoney)\nSubject: *$G...____________________/      \\           \\/    \\__\n', 'From: dbm0000@tm0006.lerc.nasa.gov (David B. Mck...ilization\n  - 1 micro-g thru out the core module\n', 'From: jllee@acsu.buffalo.edu (Johnny L Lee)\nSubj...onable.Very Reasonable.\n\n\t\t\t\t\tThanks,\n\t\t\t\t\t\tJohn\n', 'From: mathew <mathew@mantis.co.uk>\nSubject: Re: ...erally died of\nmalnutrition or disease.\n\n\nmathew\n', 'From: ab@nova.cc.purdue.edu (Allen B)\nSubject: R...soning- not\nthat anyone does, of course! :-)\n\nab\n', 'From: CPKJP@vm.cc.latech.edu (Kevin Parker)\nSubj...s to insurance\ncompanies 8^).\n \nGood luck,\nSerge\n', 'From: ritley@uimrl7.mrl.uiuc.edu ()\nSubject: SEE...er).\n\nAny pointers would be greatly appreciated!\n', 'From: abarden@tybse1.uucp (Ann Marie Barden)\nSub...!\n\nAnn Marie Barden  \tabarden@afseo.eglin.af.mil\n', ...], array([7, 4, 4, ..., 3, 1, 8]), {'score': <function _passthrough_scorer>}, array([ 3578,  3609,  3617, ..., 11311, 11312, 11313]), array([   0,    1,    2, ..., 4088, 4095, 4111]), 0, {'clf-svm_alpha': 0.01, 'tfidf_use_idf': True, 'vect_ngram_range': (1, 1)})
        kwargs = {'error_score': 'raise', 'fit_params': {}, 'return_n_test_samples': True, 'return_parameters': False, 'return_times': True, 'return_train_score': 'warn'}
    132 
    133     def __len__(self):
    134         return self._size
    135 

...........................................................................
C:\Users\Jamie\PycharmProjects\DevProject\venv\lib\site-packages\sklearn\model_selection\_validation.py in _fit_and_score(estimator=Pipeline(memory=None,
     steps=[('vect', Count...
       tol=None, verbose=0, warm_start=False))]), X=["From: lerxst@wam.umd.edu (where's my thing)\nSubj...ught to you by your neighborhood Lerxst ----\n\n\n\n\n", 'From: guykuo@carson.u.washington.edu (Guy Kuo)\nS...poll. Thanks.\n\nGuy Kuo <guykuo@u.washington.edu>\n', 'From: twillis@ec.ecn.purdue.edu (Thomas E Willis... enemies of truth than lies."  - F. W.\nNietzsche\n', 'From: jgreen@amber (Joe Green)\nSubject: Re: Weit...th no sense of humor."\n\t\t\t\t\t\t-- Jonathan Winters\n', "From: jcm@head-cfa.harvard.edu (Jonathan McDowel.... 213 before liftoff, ignore it'.\n\n - Jonathan\n\n\n", 'From: dfo@vttoulu.tko.vtt.fi (Foxvog Douglas)\nSu...cket...\n\n\n\n-- \ndoug foxvog\ndouglas.foxvog@vtt.fi\n', 'From: bmdelane@quads.uchicago.edu (brian manning...ber. "Hmmm... \'News?\' What\'s\nthis?"....)\n\n-Brian\n', 'From: bgrubb@dante.nmsu.edu (GRUBB)\nSubject: Re:...is \n(Digital Review, Oct 21, 1991 v8 n33 p8(1)).\n', 'From: holmes7000@iscsvax.uni.edu\nSubject: WIn 3....eciated.\n\n\nThanx,\n\n-Brando\n\nPS Please E-mail me\n\n', 'From: kerr@ux1.cso.uiuc.edu (Stan Kerr)\nSubject:...Phone: 217-333-5217  Email: stankerr@uiuc.edu   \n', 'From: irwin@cmptrc.lonestar.org (Irwin Arnstein)...------------------------------------------------\n', 'From: david@terminus.ericsson.se (David Bold)\nSu...shes cried,\nAs they swam its clearness through.\n\n', 'From: rodc@fc.hp.com (Rod Cerkoney)\nSubject: *$G...____________________/      \\           \\/    \\__\n', 'From: dbm0000@tm0006.lerc.nasa.gov (David B. Mck...ilization\n  - 1 micro-g thru out the core module\n', 'From: jllee@acsu.buffalo.edu (Johnny L Lee)\nSubj...onable.Very Reasonable.\n\n\t\t\t\t\tThanks,\n\t\t\t\t\t\tJohn\n', 'From: mathew <mathew@mantis.co.uk>\nSubject: Re: ...erally died of\nmalnutrition or disease.\n\n\nmathew\n', 'From: ab@nova.cc.purdue.edu (Allen B)\nSubject: R...soning- not\nthat anyone does, of course! :-)\n\nab\n', 'From: CPKJP@vm.cc.latech.edu (Kevin Parker)\nSubj...s to insurance\ncompanies 8^).\n \nGood luck,\nSerge\n', 'From: ritley@uimrl7.mrl.uiuc.edu ()\nSubject: SEE...er).\n\nAny pointers would be greatly appreciated!\n', 'From: abarden@tybse1.uucp (Ann Marie Barden)\nSub...!\n\nAnn Marie Barden  \tabarden@afseo.eglin.af.mil\n', ...], y=array([7, 4, 4, ..., 3, 1, 8]), scorer={'score': <function _passthrough_scorer>}, train=array([ 3578,  3609,  3617, ..., 11311, 11312, 11313]), test=array([   0,    1,    2, ..., 4088, 4095, 4111]), verbose=0, parameters={'clf-svm_alpha': 0.01, 'tfidf_use_idf': True, 'vect_ngram_range': (1, 1)}, fit_params={}, return_train_score='warn', return_parameters=False, return_n_test_samples=True, return_times=True, error_score='raise')
    439                       for k, v in fit_params.items()])
    440 
    441     test_scores = {}
    442     train_scores = {}
    443     if parameters is not None:
--> 444         estimator.set_params(**parameters)
        estimator.set_params = <bound method Pipeline.set_params of Pipeline(me...       tol=None, verbose=0, warm_start=False))])>
        parameters = {'clf-svm_alpha': 0.01, 'tfidf_use_idf': True, 'vect_ngram_range': (1, 1)}
    445 
    446     start_time = time.time()
    447 
    448     X_train, y_train = _safe_split(estimator, X, y, train)

...........................................................................
C:\Users\Jamie\PycharmProjects\DevProject\venv\lib\site-packages\sklearn\pipeline.py in set_params(self=Pipeline(memory=None,
     steps=[('vect', Count...
       tol=None, verbose=0, warm_start=False))]), **kwargs={'clf-svm_alpha': 0.01, 'tfidf_use_idf': True, 'vect_ngram_range': (1, 1)})
    137 
    138         Returns
    139         -------
    140         self
    141         """
--> 142         self._set_params('steps', **kwargs)
        self._set_params = <bound method _BaseComposition._set_params of Pi...       tol=None, verbose=0, warm_start=False))])>
        kwargs = {'clf-svm_alpha': 0.01, 'tfidf_use_idf': True, 'vect_ngram_range': (1, 1)}
    143         return self
    144 
    145     def _validate_steps(self):
    146         names, estimators = zip(*self.steps)

...........................................................................
C:\Users\Jamie\PycharmProjects\DevProject\venv\lib\site-packages\sklearn\utils\metaestimators.py in _set_params(self=Pipeline(memory=None,
     steps=[('vect', Count...
       tol=None, verbose=0, warm_start=False))]), attr='steps', **params={'clf-svm_alpha': 0.01, 'tfidf_use_idf': True, 'vect_ngram_range': (1, 1)})
     44         names, _ = zip(*getattr(self, attr))
     45         for name in list(six.iterkeys(params)):
     46             if '__' not in name and name in names:
     47                 self._replace_estimator(attr, name, params.pop(name))
     48         # 3. Step parameters and other initilisation arguments
---> 49         super(_BaseComposition, self).set_params(**params)
        self.set_params = <bound method Pipeline.set_params of Pipeline(me...       tol=None, verbose=0, warm_start=False))])>
        params = {'clf-svm_alpha': 0.01, 'tfidf_use_idf': True, 'vect_ngram_range': (1, 1)}
     50         return self
     51 
     52     def _replace_estimator(self, attr, name, new_val):
     53         # assumes `name` is a valid estimator name

...........................................................................
C:\Users\Jamie\PycharmProjects\DevProject\venv\lib\site-packages\sklearn\base.py in set_params(self=Pipeline(memory=None,
     steps=[('vect', Count...
       tol=None, verbose=0, warm_start=False))]), **params={'clf-svm_alpha': 0.01, 'tfidf_use_idf': True, 'vect_ngram_range': (1, 1)})
    269             key, delim, sub_key = key.partition('__')
    270             if key not in valid_params:
    271                 raise ValueError('Invalid parameter %s for estimator %s. '
    272                                  'Check the list of available parameters '
    273                                  'with `estimator.get_params().keys()`.' %
--> 274                                  (key, self))
        key = 'clf-svm_alpha'
        self = Pipeline(memory=None,
     steps=[('vect', Count...
       tol=None, verbose=0, warm_start=False))])
    275 
    276             if delim:
    277                 nested_params[key][sub_key] = value
    278             else:

ValueError: Invalid parameter clf-svm_alpha for estimator Pipeline(memory=None,
     steps=[('vect', CountVectorizer(analyzer='word', binary=False, decode_error='strict',
        dtype=<class 'numpy.int64'>, encoding='utf-8', input='content',
        lowercase=True, max_df=1.0, max_features=None, min_df=1,
        ngram_range=(1, 1), preprocessor=None, stop_words=None,
        strip...ty='l2', power_t=0.5, random_state=42, shuffle=True,
       tol=None, verbose=0, warm_start=False))]). Check the list of available parameters with `estimator.get_params().keys()`.
___________________________________________________________________________

In [13]:
from sklearn.pipeline import Pipeline
text_clf = Pipeline([('vect', CountVectorizer(stop_words='english')),
                     ('tfidf', TfidfTransformer()),
                     ('clf', MultinomialNB()),
                     ])

text_clf = text_clf.fit(twenty_train.data, twenty_train.target)

In [14]:
import numpy as np
twenty_test = fetch_20newsgroups(subset='test', shuffle=True)
predicted = text_clf.predict(twenty_test.data)
np.mean(predicted == twenty_test.target)

0.8169144981412639

In [18]:
import nltk

from nltk.stem.snowball import SnowballStemmer
stemmer = SnowballStemmer('english', ignore_stopwords=True)

class StemmedCountVectorizer(CountVectorizer):
    def build_analyzer(self):
        analyzer = super(StemmedCountVectorizer, self).build_analyzer()
        return lambda doc: ([stemmer.stem(w) for w in analyzer(doc)])
    
stemmed_count_vect = StemmedCountVectorizer(stop_words='english')

text_mnb_stemmed = Pipeline([('vext', stemmed_count_vect),
                             ('tfidf', TfidfTransformer()),
                             ('mnb', MultinomialNB(fit_prior=False)),
                             ])
    
text_mnb_stemmed = text_mnb_stemmed.fit(twenty_train.data, twenty_train.target)
    
    
predicted_mnb_stemmed = text_mnb_stemmed.predict(twenty_test.data)
    
np.mean(predicted_mnb_stemmed == twenty_train.target)



0.0