# Import Libraries

In [1]:
import pandas as pd
pd.set_option("display.max_columns",500)
pd.set_option("display.max_rows",500)

from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier
from sklearn.ensemble import BaggingClassifier, RandomForestClassifier, VotingClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn import svm

# Basic Model
## Import Train/Test Data

In [2]:
#import data
test = pd.read_csv('./data/west_nile_test.csv', index_col=0)
train = pd.read_csv('./data/west_nile_train.csv', index_col=0)

## Set X and y variables

In [3]:
#id X and y
X = train.drop(['Date', 'NumMosquitos', 'WnvPresent'], axis=1)
y = train.WnvPresent

## Train Test Split

In [4]:
#train split
X_train, X_test, y_train, y_test = train_test_split(X, y)

## Cross Val Score Potential Classification Models

In [5]:
#cross val score all potential classification models
lr = LogisticRegression() 
knn = KNeighborsClassifier() 
#nb = MultinomialNB() won't take neg vals
dt = DecisionTreeClassifier() 
et = ExtraTreeClassifier()
bag = BaggingClassifier()
rf = RandomForestClassifier()
gb = GradientBoostingClassifier()
ad = AdaBoostClassifier()
svm1 = svm.SVC()


print('lr', cross_val_score(lr, X_train, y_train).mean())
print('knn', cross_val_score(knn, X_train, y_train).mean())
#print('nb', cross_val_score(nb, X_train, y_train).mean()) 
print('dt', cross_val_score(dt, X_train, y_train).mean()) 
print('et', cross_val_score(et, X_train, y_train).mean()) 
print('bag', cross_val_score(bag, X_train, y_train).mean())
print('rf', cross_val_score(rf, X_train, y_train).mean()) 
print('gb', cross_val_score(gb, X_train, y_train).mean()) 
print('ad', cross_val_score(ad, X_train, y_train).mean()) 
print('svm', cross_val_score(svm1, X_train, y_train).mean())

lr 0.9472014791451411
knn 0.9436473737318808
dt 0.9458047504244688
et 0.9458047504244688
bag 0.9458047504244688
rf 0.9449162482401919
gb 0.9465666551863735
ad 0.9472014791451411
svm 0.9472014791451411


- All models seem to reveal similar enough results. 
- LogReg choosen for ease of interpretability.

In [6]:
#fit and score
lr.fit(X_train, y_train)
lr.score(X_test, y_test)

0.9486105824133994

In [7]:
lr_coefs = pd.DataFrame(lr.coef_, columns=X.columns, index=['coef']).T
lr_coefs.coef.sort_values(ascending=False)
#higher coefs are more likely to indicate WNV

Species_CULEX PIPIENS             1.415700
zipcode_60631                     1.179664
Trap_T003                         1.130706
Trap_T228                         0.923288
Species_CULEX PIPIENS/RESTUANS    0.876572
Trap_T096                         0.825998
Trap_T225                         0.799235
Trap_T002                         0.776093
zipcode_60656                     0.772163
Trap_T061                         0.766347
Trap_T006                         0.750869
Trap_T231                         0.729822
Trap_T128                         0.716716
Trap_T107                         0.649172
Trap_T008                         0.606829
Trap_T005                         0.603370
Trap_T900                         0.596111
zipcode_60666                     0.596111
Trap_T086                         0.566806
Trap_T013                         0.560434
Trap_T070                         0.551483
Trap_T030                         0.548906
Trap_T014                         0.526919
Trap_T009  

# Balanced Model
## Import Data

In [8]:
#import data
balenced_train = pd.read_csv('./data/BALANCED_west_nile_train.csv', index_col=0)

In [9]:
balenced_train.WnvPresent.value_counts()

0    9955
1    9951
Name: WnvPresent, dtype: int64

# Set X and y variables

In [10]:
#id X and y
X = balenced_train.drop(['Date', 'NumMosquitos', 'WnvPresent', 'Latitude', 'Longitude'], axis=1)
y = balenced_train.WnvPresent

# Train Test Split

In [11]:
#train split
X_train, X_test, y_train, y_test = train_test_split(X, y)

# Cross Val Score Potential Classification Models

In [12]:
#cross val score all potential classification models
lr = LogisticRegression() 
knn = KNeighborsClassifier() 
#nb = MultinomialNB() won't take neg vals
dt = DecisionTreeClassifier() 
et = ExtraTreeClassifier()
bag = BaggingClassifier()
rf = RandomForestClassifier()
gb = GradientBoostingClassifier()
ad = AdaBoostClassifier()
#svm1 = svm.SVC() #takes a long time, doesn't score partic well.


print('lr', cross_val_score(lr, X_train, y_train).mean())
print('knn', cross_val_score(knn, X_train, y_train).mean())
#print('nb', cross_val_score(nb, X_train, y_train).mean()) 
print('dt', cross_val_score(dt, X_train, y_train).mean()) 
print('et', cross_val_score(et, X_train, y_train).mean()) 
print('bag', cross_val_score(bag, X_train, y_train).mean())
print('rf', cross_val_score(rf, X_train, y_train).mean()) 
print('gb', cross_val_score(gb, X_train, y_train).mean()) 
print('ad', cross_val_score(ad, X_train, y_train).mean()) 
#print('svm', cross_val_score(svm1, X_train, y_train).mean()) #.644


lr 0.6870525825834012
knn 0.6672926079732262
dt 0.7043333605944128
et 0.7039984599172947
bag 0.7056059347033203
rf 0.7033951755966895
gb 0.6822958136742248
ad 0.6680951472249737


In [13]:
lr.fit(X_train, y_train)
lr_coefs = pd.DataFrame(lr.coef_, columns=X.columns, index=['coef']).T
lr_coefs.coef.sort_values(ascending=False)
#higher coefs are more likely to indicate WNV

Trap_T225                         2.213412
Trap_T003                         2.213201
Species_CULEX PIPIENS             2.139395
Trap_T228                         2.123916
zipcode_60631                     1.579941
Trap_T230                         1.577707
Species_CULEX PIPIENS/RESTUANS    1.572804
Trap_T096                         1.525884
Trap_T231                         1.501315
Trap_T128                         1.496060
Trap_T014                         1.358716
Trap_T215                         1.353844
Trap_T155                         1.337028
Trap_T047                         1.209893
Trap_T002                         1.194072
zipcode_60656                     1.152549
Trap_T013                         1.127963
Trap_T027                         1.088643
Trap_T107                         1.049298
Trap_T086                         1.033620
Trap_T082                         0.980592
Trap_T035                         0.967150
Trap_T070                         0.960735
Trap_T114  

## Gridsearch

In [14]:
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import recall_score, make_scorer, f1_score

In [39]:
#Logistic Regression
grid_params = {
    'max_iter': [50,100,300,1000],
    'class_weight':['balanced'],
    'solver':['newton-cg','lbfgs','liblinear','sag','saga'],
    'multi_class':['ovr','multinomial']
    
}

gs = GridSearchCV(
    LogisticRegression(),
    grid_params,
    verbose = 1,
    cv = 5,
    n_jobs = -1
)

gs_results = gs.fit(X_train,y_train)

Fitting 5 folds for each of 40 candidates, totalling 200 fits




JoblibValueError: JoblibValueError
___________________________________________________________________________
Multiprocessing exception:
...........................................................................
/anaconda3/lib/python3.6/runpy.py in _run_module_as_main(mod_name='ipykernel_launcher', alter_argv=1)
    188         sys.exit(msg)
    189     main_globals = sys.modules["__main__"].__dict__
    190     if alter_argv:
    191         sys.argv[0] = mod_spec.origin
    192     return _run_code(code, main_globals, None,
--> 193                      "__main__", mod_spec)
        mod_spec = ModuleSpec(name='ipykernel_launcher', loader=<_f...b/python3.6/site-packages/ipykernel_launcher.py')
    194 
    195 def run_module(mod_name, init_globals=None,
    196                run_name=None, alter_sys=False):
    197     """Execute a module's code without importing it

...........................................................................
/anaconda3/lib/python3.6/runpy.py in _run_code(code=<code object <module> at 0x1028306f0, file "/ana...3.6/site-packages/ipykernel_launcher.py", line 5>, run_globals={'__annotations__': {}, '__builtins__': <module 'builtins' (built-in)>, '__cached__': '/anaconda3/lib/python3.6/site-packages/__pycache__/ipykernel_launcher.cpython-36.pyc', '__doc__': 'Entry point for launching an IPython kernel.\n\nTh...orts until\nafter removing the cwd from sys.path.\n', '__file__': '/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py', '__loader__': <_frozen_importlib_external.SourceFileLoader object>, '__name__': '__main__', '__package__': '', '__spec__': ModuleSpec(name='ipykernel_launcher', loader=<_f...b/python3.6/site-packages/ipykernel_launcher.py'), 'app': <module 'ipykernel.kernelapp' from '/anaconda3/lib/python3.6/site-packages/ipykernel/kernelapp.py'>, ...}, init_globals=None, mod_name='__main__', mod_spec=ModuleSpec(name='ipykernel_launcher', loader=<_f...b/python3.6/site-packages/ipykernel_launcher.py'), pkg_name='', script_name=None)
     80                        __cached__ = cached,
     81                        __doc__ = None,
     82                        __loader__ = loader,
     83                        __package__ = pkg_name,
     84                        __spec__ = mod_spec)
---> 85     exec(code, run_globals)
        code = <code object <module> at 0x1028306f0, file "/ana...3.6/site-packages/ipykernel_launcher.py", line 5>
        run_globals = {'__annotations__': {}, '__builtins__': <module 'builtins' (built-in)>, '__cached__': '/anaconda3/lib/python3.6/site-packages/__pycache__/ipykernel_launcher.cpython-36.pyc', '__doc__': 'Entry point for launching an IPython kernel.\n\nTh...orts until\nafter removing the cwd from sys.path.\n', '__file__': '/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py', '__loader__': <_frozen_importlib_external.SourceFileLoader object>, '__name__': '__main__', '__package__': '', '__spec__': ModuleSpec(name='ipykernel_launcher', loader=<_f...b/python3.6/site-packages/ipykernel_launcher.py'), 'app': <module 'ipykernel.kernelapp' from '/anaconda3/lib/python3.6/site-packages/ipykernel/kernelapp.py'>, ...}
     86     return run_globals
     87 
     88 def _run_module_code(code, init_globals=None,
     89                     mod_name=None, mod_spec=None,

...........................................................................
/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py in <module>()
     11     # This is added back by InteractiveShellApp.init_path()
     12     if sys.path[0] == '':
     13         del sys.path[0]
     14 
     15     from ipykernel import kernelapp as app
---> 16     app.launch_new_instance()

...........................................................................
/anaconda3/lib/python3.6/site-packages/traitlets/config/application.py in launch_instance(cls=<class 'ipykernel.kernelapp.IPKernelApp'>, argv=None, **kwargs={})
    653 
    654         If a global instance already exists, this reinitializes and starts it
    655         """
    656         app = cls.instance(**kwargs)
    657         app.initialize(argv)
--> 658         app.start()
        app.start = <bound method IPKernelApp.start of <ipykernel.kernelapp.IPKernelApp object>>
    659 
    660 #-----------------------------------------------------------------------------
    661 # utility functions, for convenience
    662 #-----------------------------------------------------------------------------

...........................................................................
/anaconda3/lib/python3.6/site-packages/ipykernel/kernelapp.py in start(self=<ipykernel.kernelapp.IPKernelApp object>)
    481         if self.poller is not None:
    482             self.poller.start()
    483         self.kernel.start()
    484         self.io_loop = ioloop.IOLoop.current()
    485         try:
--> 486             self.io_loop.start()
        self.io_loop.start = <bound method BaseAsyncIOLoop.start of <tornado.platform.asyncio.AsyncIOMainLoop object>>
    487         except KeyboardInterrupt:
    488             pass
    489 
    490 launch_new_instance = IPKernelApp.launch_instance

...........................................................................
/anaconda3/lib/python3.6/site-packages/tornado/platform/asyncio.py in start(self=<tornado.platform.asyncio.AsyncIOMainLoop object>)
    122         except (RuntimeError, AssertionError):
    123             old_loop = None
    124         try:
    125             self._setup_logging()
    126             asyncio.set_event_loop(self.asyncio_loop)
--> 127             self.asyncio_loop.run_forever()
        self.asyncio_loop.run_forever = <bound method BaseEventLoop.run_forever of <_Uni...EventLoop running=True closed=False debug=False>>
    128         finally:
    129             asyncio.set_event_loop(old_loop)
    130 
    131     def stop(self):

...........................................................................
/anaconda3/lib/python3.6/asyncio/base_events.py in run_forever(self=<_UnixSelectorEventLoop running=True closed=False debug=False>)
    417             sys.set_asyncgen_hooks(firstiter=self._asyncgen_firstiter_hook,
    418                                    finalizer=self._asyncgen_finalizer_hook)
    419         try:
    420             events._set_running_loop(self)
    421             while True:
--> 422                 self._run_once()
        self._run_once = <bound method BaseEventLoop._run_once of <_UnixS...EventLoop running=True closed=False debug=False>>
    423                 if self._stopping:
    424                     break
    425         finally:
    426             self._stopping = False

...........................................................................
/anaconda3/lib/python3.6/asyncio/base_events.py in _run_once(self=<_UnixSelectorEventLoop running=True closed=False debug=False>)
   1427                         logger.warning('Executing %s took %.3f seconds',
   1428                                        _format_handle(handle), dt)
   1429                 finally:
   1430                     self._current_handle = None
   1431             else:
-> 1432                 handle._run()
        handle._run = <bound method Handle._run of <Handle BaseAsyncIOLoop._handle_events(15, 1)>>
   1433         handle = None  # Needed to break cycles when an exception occurs.
   1434 
   1435     def _set_coroutine_wrapper(self, enabled):
   1436         try:

...........................................................................
/anaconda3/lib/python3.6/asyncio/events.py in _run(self=<Handle BaseAsyncIOLoop._handle_events(15, 1)>)
    140             self._callback = None
    141             self._args = None
    142 
    143     def _run(self):
    144         try:
--> 145             self._callback(*self._args)
        self._callback = <bound method BaseAsyncIOLoop._handle_events of <tornado.platform.asyncio.AsyncIOMainLoop object>>
        self._args = (15, 1)
    146         except Exception as exc:
    147             cb = _format_callback_source(self._callback, self._args)
    148             msg = 'Exception in callback {}'.format(cb)
    149             context = {

...........................................................................
/anaconda3/lib/python3.6/site-packages/tornado/platform/asyncio.py in _handle_events(self=<tornado.platform.asyncio.AsyncIOMainLoop object>, fd=15, events=1)
    112             self.writers.remove(fd)
    113         del self.handlers[fd]
    114 
    115     def _handle_events(self, fd, events):
    116         fileobj, handler_func = self.handlers[fd]
--> 117         handler_func(fileobj, events)
        handler_func = <function wrap.<locals>.null_wrapper>
        fileobj = <zmq.sugar.socket.Socket object>
        events = 1
    118 
    119     def start(self):
    120         try:
    121             old_loop = asyncio.get_event_loop()

...........................................................................
/anaconda3/lib/python3.6/site-packages/tornado/stack_context.py in null_wrapper(*args=(<zmq.sugar.socket.Socket object>, 1), **kwargs={})
    271         # Fast path when there are no active contexts.
    272         def null_wrapper(*args, **kwargs):
    273             try:
    274                 current_state = _state.contexts
    275                 _state.contexts = cap_contexts[0]
--> 276                 return fn(*args, **kwargs)
        args = (<zmq.sugar.socket.Socket object>, 1)
        kwargs = {}
    277             finally:
    278                 _state.contexts = current_state
    279         null_wrapper._wrapped = True
    280         return null_wrapper

...........................................................................
/anaconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py in _handle_events(self=<zmq.eventloop.zmqstream.ZMQStream object>, fd=<zmq.sugar.socket.Socket object>, events=1)
    445             return
    446         zmq_events = self.socket.EVENTS
    447         try:
    448             # dispatch events:
    449             if zmq_events & zmq.POLLIN and self.receiving():
--> 450                 self._handle_recv()
        self._handle_recv = <bound method ZMQStream._handle_recv of <zmq.eventloop.zmqstream.ZMQStream object>>
    451                 if not self.socket:
    452                     return
    453             if zmq_events & zmq.POLLOUT and self.sending():
    454                 self._handle_send()

...........................................................................
/anaconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py in _handle_recv(self=<zmq.eventloop.zmqstream.ZMQStream object>)
    475             else:
    476                 raise
    477         else:
    478             if self._recv_callback:
    479                 callback = self._recv_callback
--> 480                 self._run_callback(callback, msg)
        self._run_callback = <bound method ZMQStream._run_callback of <zmq.eventloop.zmqstream.ZMQStream object>>
        callback = <function wrap.<locals>.null_wrapper>
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    481         
    482 
    483     def _handle_send(self):
    484         """Handle a send event."""

...........................................................................
/anaconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py in _run_callback(self=<zmq.eventloop.zmqstream.ZMQStream object>, callback=<function wrap.<locals>.null_wrapper>, *args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    427         close our socket."""
    428         try:
    429             # Use a NullContext to ensure that all StackContexts are run
    430             # inside our blanket exception handler rather than outside.
    431             with stack_context.NullContext():
--> 432                 callback(*args, **kwargs)
        callback = <function wrap.<locals>.null_wrapper>
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    433         except:
    434             gen_log.error("Uncaught exception in ZMQStream callback",
    435                           exc_info=True)
    436             # Re-raise the exception so that IOLoop.handle_callback_exception

...........................................................................
/anaconda3/lib/python3.6/site-packages/tornado/stack_context.py in null_wrapper(*args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    271         # Fast path when there are no active contexts.
    272         def null_wrapper(*args, **kwargs):
    273             try:
    274                 current_state = _state.contexts
    275                 _state.contexts = cap_contexts[0]
--> 276                 return fn(*args, **kwargs)
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    277             finally:
    278                 _state.contexts = current_state
    279         null_wrapper._wrapped = True
    280         return null_wrapper

...........................................................................
/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py in dispatcher(msg=[<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>])
    278         if self.control_stream:
    279             self.control_stream.on_recv(self.dispatch_control, copy=False)
    280 
    281         def make_dispatcher(stream):
    282             def dispatcher(msg):
--> 283                 return self.dispatch_shell(stream, msg)
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    284             return dispatcher
    285 
    286         for s in self.shell_streams:
    287             s.on_recv(make_dispatcher(s), copy=False)

...........................................................................
/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py in dispatch_shell(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, msg={'buffers': [], 'content': {'allow_stdin': True, 'code': "#Logistic Regression\ngrid_params = {\n    'max_it...jobs = -1\n)\n\ngs_results = gs.fit(X_train,y_train)", 'silent': False, 'stop_on_error': True, 'store_history': True, 'user_expressions': {}}, 'header': {'date': datetime.datetime(2018, 9, 21, 14, 12, 58, 920945, tzinfo=tzutc()), 'msg_id': '95b8814d33034ad88654ae6c6f33099b', 'msg_type': 'execute_request', 'session': '0f4788f72e904a668b00a37e34569968', 'username': 'username', 'version': '5.2'}, 'metadata': {}, 'msg_id': '95b8814d33034ad88654ae6c6f33099b', 'msg_type': 'execute_request', 'parent_header': {}})
    228             self.log.warn("Unknown message type: %r", msg_type)
    229         else:
    230             self.log.debug("%s: %s", msg_type, msg)
    231             self.pre_handler_hook()
    232             try:
--> 233                 handler(stream, idents, msg)
        handler = <bound method Kernel.execute_request of <ipykernel.ipkernel.IPythonKernel object>>
        stream = <zmq.eventloop.zmqstream.ZMQStream object>
        idents = [b'0f4788f72e904a668b00a37e34569968']
        msg = {'buffers': [], 'content': {'allow_stdin': True, 'code': "#Logistic Regression\ngrid_params = {\n    'max_it...jobs = -1\n)\n\ngs_results = gs.fit(X_train,y_train)", 'silent': False, 'stop_on_error': True, 'store_history': True, 'user_expressions': {}}, 'header': {'date': datetime.datetime(2018, 9, 21, 14, 12, 58, 920945, tzinfo=tzutc()), 'msg_id': '95b8814d33034ad88654ae6c6f33099b', 'msg_type': 'execute_request', 'session': '0f4788f72e904a668b00a37e34569968', 'username': 'username', 'version': '5.2'}, 'metadata': {}, 'msg_id': '95b8814d33034ad88654ae6c6f33099b', 'msg_type': 'execute_request', 'parent_header': {}}
    234             except Exception:
    235                 self.log.error("Exception in message handler:", exc_info=True)
    236             finally:
    237                 self.post_handler_hook()

...........................................................................
/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py in execute_request(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, ident=[b'0f4788f72e904a668b00a37e34569968'], parent={'buffers': [], 'content': {'allow_stdin': True, 'code': "#Logistic Regression\ngrid_params = {\n    'max_it...jobs = -1\n)\n\ngs_results = gs.fit(X_train,y_train)", 'silent': False, 'stop_on_error': True, 'store_history': True, 'user_expressions': {}}, 'header': {'date': datetime.datetime(2018, 9, 21, 14, 12, 58, 920945, tzinfo=tzutc()), 'msg_id': '95b8814d33034ad88654ae6c6f33099b', 'msg_type': 'execute_request', 'session': '0f4788f72e904a668b00a37e34569968', 'username': 'username', 'version': '5.2'}, 'metadata': {}, 'msg_id': '95b8814d33034ad88654ae6c6f33099b', 'msg_type': 'execute_request', 'parent_header': {}})
    394         if not silent:
    395             self.execution_count += 1
    396             self._publish_execute_input(code, parent, self.execution_count)
    397 
    398         reply_content = self.do_execute(code, silent, store_history,
--> 399                                         user_expressions, allow_stdin)
        user_expressions = {}
        allow_stdin = True
    400 
    401         # Flush output before sending the reply.
    402         sys.stdout.flush()
    403         sys.stderr.flush()

...........................................................................
/anaconda3/lib/python3.6/site-packages/ipykernel/ipkernel.py in do_execute(self=<ipykernel.ipkernel.IPythonKernel object>, code="#Logistic Regression\ngrid_params = {\n    'max_it...jobs = -1\n)\n\ngs_results = gs.fit(X_train,y_train)", silent=False, store_history=True, user_expressions={}, allow_stdin=True)
    203 
    204         self._forward_input(allow_stdin)
    205 
    206         reply_content = {}
    207         try:
--> 208             res = shell.run_cell(code, store_history=store_history, silent=silent)
        res = undefined
        shell.run_cell = <bound method ZMQInteractiveShell.run_cell of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = "#Logistic Regression\ngrid_params = {\n    'max_it...jobs = -1\n)\n\ngs_results = gs.fit(X_train,y_train)"
        store_history = True
        silent = False
    209         finally:
    210             self._restore_input()
    211 
    212         if res.error_before_exec is not None:

...........................................................................
/anaconda3/lib/python3.6/site-packages/ipykernel/zmqshell.py in run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, *args=("#Logistic Regression\ngrid_params = {\n    'max_it...jobs = -1\n)\n\ngs_results = gs.fit(X_train,y_train)",), **kwargs={'silent': False, 'store_history': True})
    532             )
    533         self.payload_manager.write_payload(payload)
    534 
    535     def run_cell(self, *args, **kwargs):
    536         self._last_traceback = None
--> 537         return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
        self.run_cell = <bound method ZMQInteractiveShell.run_cell of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        args = ("#Logistic Regression\ngrid_params = {\n    'max_it...jobs = -1\n)\n\ngs_results = gs.fit(X_train,y_train)",)
        kwargs = {'silent': False, 'store_history': True}
    538 
    539     def _showtraceback(self, etype, evalue, stb):
    540         # try to preserve ordering of tracebacks and print statements
    541         sys.stdout.flush()

...........................................................................
/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py in run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, raw_cell="#Logistic Regression\ngrid_params = {\n    'max_it...jobs = -1\n)\n\ngs_results = gs.fit(X_train,y_train)", store_history=True, silent=False, shell_futures=True)
   2657         -------
   2658         result : :class:`ExecutionResult`
   2659         """
   2660         try:
   2661             result = self._run_cell(
-> 2662                 raw_cell, store_history, silent, shell_futures)
        raw_cell = "#Logistic Regression\ngrid_params = {\n    'max_it...jobs = -1\n)\n\ngs_results = gs.fit(X_train,y_train)"
        store_history = True
        silent = False
        shell_futures = True
   2663         finally:
   2664             self.events.trigger('post_execute')
   2665             if not silent:
   2666                 self.events.trigger('post_run_cell', result)

...........................................................................
/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py in _run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, raw_cell="#Logistic Regression\ngrid_params = {\n    'max_it...jobs = -1\n)\n\ngs_results = gs.fit(X_train,y_train)", store_history=True, silent=False, shell_futures=True)
   2780                 self.displayhook.exec_result = result
   2781 
   2782                 # Execute the user code
   2783                 interactivity = 'none' if silent else self.ast_node_interactivity
   2784                 has_raised = self.run_ast_nodes(code_ast.body, cell_name,
-> 2785                    interactivity=interactivity, compiler=compiler, result=result)
        interactivity = 'last_expr'
        compiler = <IPython.core.compilerop.CachingCompiler object>
   2786                 
   2787                 self.last_execution_succeeded = not has_raised
   2788                 self.last_execution_result = result
   2789 

...........................................................................
/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py in run_ast_nodes(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, nodelist=[<_ast.Assign object>, <_ast.Assign object>, <_ast.Assign object>], cell_name='<ipython-input-39-8716d0b5f611>', interactivity='none', compiler=<IPython.core.compilerop.CachingCompiler object>, result=<ExecutionResult object at 1a0a3f86a0, execution...rue silent=False shell_futures=True> result=None>)
   2898 
   2899         try:
   2900             for i, node in enumerate(to_run_exec):
   2901                 mod = ast.Module([node])
   2902                 code = compiler(mod, cell_name, "exec")
-> 2903                 if self.run_code(code, result):
        self.run_code = <bound method InteractiveShell.run_code of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = <code object <module> at 0x1a09ff0270, file "<ipython-input-39-8716d0b5f611>", line 18>
        result = <ExecutionResult object at 1a0a3f86a0, execution...rue silent=False shell_futures=True> result=None>
   2904                     return True
   2905 
   2906             for i, node in enumerate(to_run_interactive):
   2907                 mod = ast.Interactive([node])

...........................................................................
/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py in run_code(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, code_obj=<code object <module> at 0x1a09ff0270, file "<ipython-input-39-8716d0b5f611>", line 18>, result=<ExecutionResult object at 1a0a3f86a0, execution...rue silent=False shell_futures=True> result=None>)
   2958         outflag = True  # happens in more places, so it's easier as default
   2959         try:
   2960             try:
   2961                 self.hooks.pre_run_code_hook()
   2962                 #rprint('Running code', repr(code_obj)) # dbg
-> 2963                 exec(code_obj, self.user_global_ns, self.user_ns)
        code_obj = <code object <module> at 0x1a09ff0270, file "<ipython-input-39-8716d0b5f611>", line 18>
        self.user_global_ns = {'AdaBoostClassifier': <class 'sklearn.ensemble.weight_boosting.AdaBoostClassifier'>, 'BaggingClassifier': <class 'sklearn.ensemble.bagging.BaggingClassifier'>, 'DecisionTreeClassifier': <class 'sklearn.tree.tree.DecisionTreeClassifier'>, 'ExtraTreeClassifier': <class 'sklearn.tree.tree.ExtraTreeClassifier'>, 'GradientBoostingClassifier': <class 'sklearn.ensemble.gradient_boosting.GradientBoostingClassifier'>, 'GridSearchCV': <class 'sklearn.model_selection._search.GridSearchCV'>, 'In': ['', 'import pandas as pd\npd.set_option("display.max_c...ifier, AdaBoostClassifier\nfrom sklearn import svm', "#import data\ntest = pd.read_csv('./data/west_nil...ad_csv('./data/west_nile_train.csv', index_col=0)", "#id X and y\nX = train.drop(['Date', 'NumMosquitos', 'WnvPresent'], axis=1)\ny = train.WnvPresent", '#train split\nX_train, X_test, y_train, y_test = train_test_split(X, y)', '#cross val score all potential classification mo..., cross_val_score(svm1, X_train, y_train).mean())', '#fit and score\nlr.fit(X_train, y_train)\nlr.score(X_test, y_test)', 'lr_coefs = pd.DataFrame(lr.coef_, columns=X.colu...se)\n#higher coefs are more likely to indicate WNV', "#import data\nbalenced_train = pd.read_csv('./data/BALANCED_west_nile_train.csv', index_col=0)", 'balenced_train.WnvPresent.value_counts()', "#id X and y\nX = balenced_train.drop(['Date', 'Nu...ongitude'], axis=1)\ny = balenced_train.WnvPresent", '#train split\nX_train, X_test, y_train, y_test = train_test_split(X, y)', '#cross val score all potential classification mo...s_val_score(svm1, X_train, y_train).mean()) #.644', 'lr.fit(X_train, y_train)\nlr_coefs = pd.DataFrame...se)\n#higher coefs are more likely to indicate WNV', 'from sklearn.model_selection import GridSearchCV...etrics import recall_score, make_scorer, f1_score', "#Logistic Regression\ngrid_params = {\n    'max_it...jobs = -1\n)\n\ngs_results = gs.fit(X_train,y_train)", "print('best param',gs.best_params_)\nprint('best score',gs.best_score_)", "#knn\ngrid_params = {\n    'n_neighbors':[3,5,7],\n...jobs = -1\n)\n\ngs_results = gs.fit(X_train,y_train)", "print('best param',gs.best_params_)\nprint('best score',gs.best_score_)", "#dt\ngrid_params = {\n    'criterion':['gini','ent...jobs = -1\n)\n\ngs_results = gs.fit(X_train,y_train)", ...], 'KNeighborsClassifier': <class 'sklearn.neighbors.classification.KNeighborsClassifier'>, 'LogisticRegression': <class 'sklearn.linear_model.logistic.LogisticRegression'>, 'MultinomialNB': <class 'sklearn.naive_bayes.MultinomialNB'>, ...}
        self.user_ns = {'AdaBoostClassifier': <class 'sklearn.ensemble.weight_boosting.AdaBoostClassifier'>, 'BaggingClassifier': <class 'sklearn.ensemble.bagging.BaggingClassifier'>, 'DecisionTreeClassifier': <class 'sklearn.tree.tree.DecisionTreeClassifier'>, 'ExtraTreeClassifier': <class 'sklearn.tree.tree.ExtraTreeClassifier'>, 'GradientBoostingClassifier': <class 'sklearn.ensemble.gradient_boosting.GradientBoostingClassifier'>, 'GridSearchCV': <class 'sklearn.model_selection._search.GridSearchCV'>, 'In': ['', 'import pandas as pd\npd.set_option("display.max_c...ifier, AdaBoostClassifier\nfrom sklearn import svm', "#import data\ntest = pd.read_csv('./data/west_nil...ad_csv('./data/west_nile_train.csv', index_col=0)", "#id X and y\nX = train.drop(['Date', 'NumMosquitos', 'WnvPresent'], axis=1)\ny = train.WnvPresent", '#train split\nX_train, X_test, y_train, y_test = train_test_split(X, y)', '#cross val score all potential classification mo..., cross_val_score(svm1, X_train, y_train).mean())', '#fit and score\nlr.fit(X_train, y_train)\nlr.score(X_test, y_test)', 'lr_coefs = pd.DataFrame(lr.coef_, columns=X.colu...se)\n#higher coefs are more likely to indicate WNV', "#import data\nbalenced_train = pd.read_csv('./data/BALANCED_west_nile_train.csv', index_col=0)", 'balenced_train.WnvPresent.value_counts()', "#id X and y\nX = balenced_train.drop(['Date', 'Nu...ongitude'], axis=1)\ny = balenced_train.WnvPresent", '#train split\nX_train, X_test, y_train, y_test = train_test_split(X, y)', '#cross val score all potential classification mo...s_val_score(svm1, X_train, y_train).mean()) #.644', 'lr.fit(X_train, y_train)\nlr_coefs = pd.DataFrame...se)\n#higher coefs are more likely to indicate WNV', 'from sklearn.model_selection import GridSearchCV...etrics import recall_score, make_scorer, f1_score', "#Logistic Regression\ngrid_params = {\n    'max_it...jobs = -1\n)\n\ngs_results = gs.fit(X_train,y_train)", "print('best param',gs.best_params_)\nprint('best score',gs.best_score_)", "#knn\ngrid_params = {\n    'n_neighbors':[3,5,7],\n...jobs = -1\n)\n\ngs_results = gs.fit(X_train,y_train)", "print('best param',gs.best_params_)\nprint('best score',gs.best_score_)", "#dt\ngrid_params = {\n    'criterion':['gini','ent...jobs = -1\n)\n\ngs_results = gs.fit(X_train,y_train)", ...], 'KNeighborsClassifier': <class 'sklearn.neighbors.classification.KNeighborsClassifier'>, 'LogisticRegression': <class 'sklearn.linear_model.logistic.LogisticRegression'>, 'MultinomialNB': <class 'sklearn.naive_bayes.MultinomialNB'>, ...}
   2964             finally:
   2965                 # Reset our crash handler in place
   2966                 sys.excepthook = old_excepthook
   2967         except SystemExit as e:

...........................................................................
/Users/Celis/GA_work/Assignments/7_assignment/last_updated_project/west-nile-project-4/<ipython-input-39-8716d0b5f611> in <module>()
     13     verbose = 1,
     14     cv = 5,
     15     n_jobs = -1
     16 )
     17 
---> 18 gs_results = gs.fit(X_train,y_train)

...........................................................................
/anaconda3/lib/python3.6/site-packages/sklearn/model_selection/_search.py in fit(self=GridSearchCV(cv=5, error_score='raise',
       e...ain_score='warn',
       scoring=None, verbose=1), X=       Block  AddressAccuracy  Species_CULEX ERR...       0          0  

[14929 rows x 210 columns], y=94       0
9757     1
10237    1
10109    0
1009...  1
Name: WnvPresent, Length: 14929, dtype: int64, groups=None, **fit_params={})
    634                                   return_train_score=self.return_train_score,
    635                                   return_n_test_samples=True,
    636                                   return_times=True, return_parameters=False,
    637                                   error_score=self.error_score)
    638           for parameters, (train, test) in product(candidate_params,
--> 639                                                    cv.split(X, y, groups)))
        cv.split = <bound method StratifiedKFold.split of Stratifie...ld(n_splits=5, random_state=None, shuffle=False)>
        X =        Block  AddressAccuracy  Species_CULEX ERR...       0          0  

[14929 rows x 210 columns]
        y = 94       0
9757     1
10237    1
10109    0
1009...  1
Name: WnvPresent, Length: 14929, dtype: int64
        groups = None
    640 
    641         # if one choose to see train score, "out" will contain train score info
    642         if self.return_train_score:
    643             (train_score_dicts, test_score_dicts, test_sample_counts, fit_time,

...........................................................................
/anaconda3/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in __call__(self=Parallel(n_jobs=-1), iterable=<generator object BaseSearchCV.fit.<locals>.<genexpr>>)
    784             if pre_dispatch == "all" or n_jobs == 1:
    785                 # The iterable was consumed all at once by the above for loop.
    786                 # No need to wait for async callbacks to trigger to
    787                 # consumption.
    788                 self._iterating = False
--> 789             self.retrieve()
        self.retrieve = <bound method Parallel.retrieve of Parallel(n_jobs=-1)>
    790             # Make sure that we get a last message telling us we are done
    791             elapsed_time = time.time() - self._start_time
    792             self._print('Done %3i out of %3i | elapsed: %s finished',
    793                         (len(self._output), len(self._output),

---------------------------------------------------------------------------
Sub-process traceback:
---------------------------------------------------------------------------
ValueError                                         Fri Sep 21 10:13:18 2018
PID: 6233                               Python 3.6.5: /anaconda3/bin/python
...........................................................................
/anaconda3/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in __call__(self=<sklearn.externals.joblib.parallel.BatchedCalls object>)
    126     def __init__(self, iterator_slice):
    127         self.items = list(iterator_slice)
    128         self._size = len(self.items)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
        self.items = [(<function _fit_and_score>, (LogisticRegression(C=1.0, class_weight='balanced...ol=0.0001, verbose=0,
          warm_start=False),        Block  AddressAccuracy  Species_CULEX ERR...       0          0  

[14929 rows x 210 columns], 94       0
9757     1
10237    1
10109    0
1009...  1
Name: WnvPresent, Length: 14929, dtype: int64, {'score': <function _passthrough_scorer>}, array([ 2941,  2942,  2943, ..., 14926, 14927, 14928]), array([   0,    1,    2, ..., 3035, 3036, 3037]), 1, {'class_weight': 'balanced', 'max_iter': 50, 'multi_class': 'multinomial', 'solver': 'liblinear'}), {'error_score': 'raise', 'fit_params': {}, 'return_n_test_samples': True, 'return_parameters': False, 'return_times': True, 'return_train_score': 'warn'})]
    132 
    133     def __len__(self):
    134         return self._size
    135 

...........................................................................
/anaconda3/lib/python3.6/site-packages/sklearn/externals/joblib/parallel.py in <listcomp>(.0=<list_iterator object>)
    126     def __init__(self, iterator_slice):
    127         self.items = list(iterator_slice)
    128         self._size = len(self.items)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
        func = <function _fit_and_score>
        args = (LogisticRegression(C=1.0, class_weight='balanced...ol=0.0001, verbose=0,
          warm_start=False),        Block  AddressAccuracy  Species_CULEX ERR...       0          0  

[14929 rows x 210 columns], 94       0
9757     1
10237    1
10109    0
1009...  1
Name: WnvPresent, Length: 14929, dtype: int64, {'score': <function _passthrough_scorer>}, array([ 2941,  2942,  2943, ..., 14926, 14927, 14928]), array([   0,    1,    2, ..., 3035, 3036, 3037]), 1, {'class_weight': 'balanced', 'max_iter': 50, 'multi_class': 'multinomial', 'solver': 'liblinear'})
        kwargs = {'error_score': 'raise', 'fit_params': {}, 'return_n_test_samples': True, 'return_parameters': False, 'return_times': True, 'return_train_score': 'warn'}
    132 
    133     def __len__(self):
    134         return self._size
    135 

...........................................................................
/anaconda3/lib/python3.6/site-packages/sklearn/model_selection/_validation.py in _fit_and_score(estimator=LogisticRegression(C=1.0, class_weight='balanced...ol=0.0001, verbose=0,
          warm_start=False), X=       Block  AddressAccuracy  Species_CULEX ERR...       0          0  

[14929 rows x 210 columns], y=94       0
9757     1
10237    1
10109    0
1009...  1
Name: WnvPresent, Length: 14929, dtype: int64, scorer={'score': <function _passthrough_scorer>}, train=array([ 2941,  2942,  2943, ..., 14926, 14927, 14928]), test=array([   0,    1,    2, ..., 3035, 3036, 3037]), verbose=1, parameters={'class_weight': 'balanced', 'max_iter': 50, 'multi_class': 'multinomial', 'solver': 'liblinear'}, fit_params={}, return_train_score='warn', return_parameters=False, return_n_test_samples=True, return_times=True, error_score='raise')
    453 
    454     try:
    455         if y_train is None:
    456             estimator.fit(X_train, **fit_params)
    457         else:
--> 458             estimator.fit(X_train, y_train, **fit_params)
        estimator.fit = <bound method LogisticRegression.fit of Logistic...l=0.0001, verbose=0,
          warm_start=False)>
        X_train =        Block  AddressAccuracy  Species_CULEX ERR...       0          0  

[11942 rows x 210 columns]
        y_train = 970      1
3049     1
9532     1
2711     1
1423...  1
Name: WnvPresent, Length: 11942, dtype: int64
        fit_params = {}
    459 
    460     except Exception as e:
    461         # Note fit time as time until error
    462         fit_time = time.time() - start_time

...........................................................................
/anaconda3/lib/python3.6/site-packages/sklearn/linear_model/logistic.py in fit(self=LogisticRegression(C=1.0, class_weight='balanced...ol=0.0001, verbose=0,
          warm_start=False), X=array([[45.,  9.,  0., ...,  0.,  0.,  0.],
    ...0.],
       [41.,  9.,  0., ...,  0.,  0.,  0.]]), y=array([1, 1, 1, ..., 1, 1, 1]), sample_weight=None)
   1217         check_classification_targets(y)
   1218         self.classes_ = np.unique(y)
   1219         n_samples, n_features = X.shape
   1220 
   1221         _check_solver_option(self.solver, self.multi_class, self.penalty,
-> 1222                              self.dual)
        self.dual = False
   1223 
   1224         if self.solver == 'liblinear':
   1225             if self.n_jobs != 1:
   1226                 warnings.warn("'n_jobs' > 1 does not have any effect when"

...........................................................................
/anaconda3/lib/python3.6/site-packages/sklearn/linear_model/logistic.py in _check_solver_option(solver='liblinear', multi_class='multinomial', penalty='l2', dual=False)
    433         raise ValueError("multi_class should be either multinomial or "
    434                          "ovr, got %s" % multi_class)
    435 
    436     if multi_class == 'multinomial' and solver == 'liblinear':
    437         raise ValueError("Solver %s does not support "
--> 438                          "a multinomial backend." % solver)
        solver = 'liblinear'
    439 
    440     if solver not in ['liblinear', 'saga']:
    441         if penalty != 'l2':
    442             raise ValueError("Solver %s supports only l2 penalties, "

ValueError: Solver liblinear does not support a multinomial backend.
___________________________________________________________________________

In [None]:
print('best param',gs.best_params_)
print('best score',gs.best_score_)

In [17]:
#knn
grid_params = {
    'n_neighbors':[3,5,7],
    'weights':['uniform','distance'],
    'metric':['minkowski']
}

gs = GridSearchCV(
    KNeighborsClassifier(),
    grid_params,
    verbose = 1,
    cv = 5,
    n_jobs = -1
)

gs_results = gs.fit(X_train,y_train)

Fitting 5 folds for each of 6 candidates, totalling 30 fits


[Parallel(n_jobs=-1)]: Done  30 out of  30 | elapsed:   28.3s finished


In [18]:
print('best param',gs.best_params_)
print('best score',gs.best_score_)

best param {'metric': 'minkowski', 'n_neighbors': 7, 'weights': 'distance'}
best score 0.6779422600308125


In [19]:
#dt
grid_params = {
    'criterion':['gini','entropy'],
    'splitter':['best','random'],
    'min_samples_leaf':[1,2,4],
    'max_depth':[50,100,115,200,300],
    'min_samples_split':[2,3,5]
}

gs = GridSearchCV(
    DecisionTreeClassifier(),
    grid_params,
    verbose = 1,
    cv = 5,
    n_jobs = -1
)

gs_results = gs.fit(X_train,y_train)

Fitting 5 folds for each of 108 candidates, totalling 540 fits


[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:    3.9s
[Parallel(n_jobs=-1)]: Done 192 tasks      | elapsed:   17.6s
[Parallel(n_jobs=-1)]: Done 442 tasks      | elapsed:   43.3s
[Parallel(n_jobs=-1)]: Done 540 out of 540 | elapsed:   54.4s finished


In [20]:
print('best param',gs.best_params_)
print('best score',gs.best_score_)

best param {'criterion': 'gini', 'max_depth': 100, 'min_samples_leaf': 1, 'min_samples_split': 2, 'splitter': 'best'}
best score 0.7079509679147967


In [21]:
#et
grid_params = {
    'criterion':['gini','entropy'],
    'splitter':['best','random'],
    'min_samples_leaf':[1,2,4,5,8,],
    'max_depth':[50,100,300,500],
    'min_samples_split':[2,3,5]
    
}

gs = GridSearchCV(
    ExtraTreeClassifier(),
    grid_params,
    verbose = 1,
    cv = 5,
    n_jobs = -1
)

gs_results = gs.fit(X_train,y_train)

Fitting 5 folds for each of 240 candidates, totalling 1200 fits


[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:    1.7s
[Parallel(n_jobs=-1)]: Done 192 tasks      | elapsed:    7.5s
[Parallel(n_jobs=-1)]: Done 442 tasks      | elapsed:   17.4s
[Parallel(n_jobs=-1)]: Done 792 tasks      | elapsed:   30.9s
[Parallel(n_jobs=-1)]: Done 1200 out of 1200 | elapsed:   46.8s finished


In [22]:
print('best param',gs.best_params_)
print('best score',gs.best_score_)

best param {'criterion': 'entropy', 'max_depth': 100, 'min_samples_leaf': 1, 'min_samples_split': 5, 'splitter': 'random'}
best score 0.7083528702525287


In [34]:
#bag
grid_params = {
    'n_estimators':[5,10,13],
    'max_samples':[3,5,10,50,100],
    #'max_features':[0,2,3],
    'bootstrap_features':['False','True'],
    
}

gs = GridSearchCV(
    BaggingClassifier(),
    grid_params,
    verbose = 1,
    cv = 5,
    n_jobs = -1
)

gs_results = gs.fit(X_train,y_train)

Fitting 5 folds for each of 30 candidates, totalling 150 fits


[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:    4.9s
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:   15.8s finished


In [35]:
print('best param',gs.best_params_)
print('best score',gs.best_score_)

best param {'bootstrap_features': 'False', 'max_samples': 100, 'n_estimators': 10}
best score 0.638086944872396


In [25]:
#rf
grid_params = {
    'criterion':['gini','entropy'],
    'n_estimators':[5,10,15],
    'min_samples_leaf':[1,2,4,5,8,],
    'max_depth':[50,100,300,500],
    'min_samples_split':[2,3,5]
    
}

gs = GridSearchCV(
    RandomForestClassifier(),
    grid_params,
    verbose = 1,
    cv = 5,
    n_jobs = -1
)

gs_results = gs.fit(X_train,y_train)

Fitting 5 folds for each of 360 candidates, totalling 1800 fits


[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:    4.9s
[Parallel(n_jobs=-1)]: Done 192 tasks      | elapsed:   21.3s
[Parallel(n_jobs=-1)]: Done 442 tasks      | elapsed:   51.3s
[Parallel(n_jobs=-1)]: Done 792 tasks      | elapsed:  1.5min
[Parallel(n_jobs=-1)]: Done 1242 tasks      | elapsed:  2.3min
[Parallel(n_jobs=-1)]: Done 1792 tasks      | elapsed:  3.3min
[Parallel(n_jobs=-1)]: Done 1800 out of 1800 | elapsed:  3.4min finished


In [26]:
print('best param',gs.best_params_)
print('best score',gs.best_score_)

best param {'criterion': 'gini', 'max_depth': 300, 'min_samples_leaf': 2, 'min_samples_split': 5, 'n_estimators': 10}
best score 0.7106973005559649


In [None]:
#gb
#hadn't looked at this because nothing has had a better score

In [None]:
#ad
##hadn't looked at this because nothing has had a better score