In [8]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler

from sklearn import linear_model
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

import warnings
warnings.filterwarnings('ignore')

np.random.seed(27)

In [9]:
train = pd.read_csv('C:/Users/O45820/Downloads/dont-overfit-ii/train.csv')
test = pd.read_csv('C:/Users/O45820/Downloads/dont-overfit-ii/test.csv')

print('Train Shape: ', train.shape)
print('Test Shape: ', test.shape)

Train Shape:  (250, 302)
Test Shape:  (19750, 301)


In [10]:
X_train = train.drop(['id', 'target'], axis=1)
y_train = train['target']

X_test = test.drop(['id'], axis=1)

# scaling data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [11]:
# define models
ridge = linear_model.Ridge()
lasso = linear_model.Lasso()
elastic = linear_model.ElasticNet()
lasso_lars = linear_model.LassoLars()
bayesian_ridge = linear_model.BayesianRidge()
logistic = linear_model.LogisticRegression(solver='liblinear')
sgd = linear_model.SGDClassifier()

In [12]:
models = [ridge, lasso, elastic, lasso_lars, bayesian_ridge, logistic, sgd]

In [15]:
def get_cv_scores(model):
    scores = cross_val_score(model, X_train, y_train, cv=5, scoring='roc_auc')
    print('CV Mean: ', np.mean(scores))
    #print('STD: ', np.std(scores))
    print('\n')

In [16]:
for model in models:
    #print(model)
    get_cv_scores(model)

CV Mean:  0.6759762475523124


CV Mean:  0.5


CV Mean:  0.5


CV Mean:  0.5


CV Mean:  0.688224616492365


CV Mean:  0.7447916666666666


CV Mean:  0.7225694444444445




In [17]:
penalty = ['l1', 'l2']
C = [0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000]
class_weight = [{1:0.5, 0:0.5}, {1:0.4, 0:0.6}, {1:0.6, 0:0.4}, {1:0.7, 0:0.3}]
solver = ['liblinear', 'saga']

param_grid = dict(penalty=penalty,
                  C=C,
                  class_weight=class_weight,
                  solver=solver)

grid = GridSearchCV(estimator=logistic, param_grid=param_grid, scoring='roc_auc', verbose=1, n_jobs=-1)
grid_result = grid.fit(X_train, y_train)

print('Best Score: ', grid_result.best_score_)
print('Best Params: ', grid_result.best_params_)

Fitting 3 folds for each of 128 candidates, totalling 384 fits


[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    8.3s
[Parallel(n_jobs=-1)]: Done 384 out of 384 | elapsed:   13.6s finished


Best Score:  0.7901274633123689
Best Params:  {'C': 1, 'class_weight': {1: 0.6, 0: 0.4}, 'penalty': 'l1', 'solver': 'liblinear'}


In [18]:
logistic = linear_model.LogisticRegression(C=1, class_weight={1:0.6, 0:0.4}, penalty='l1', solver='liblinear')
get_cv_scores(logistic)

CV Mean:  0.8166666666666667




In [19]:
predictions = logistic.fit(X_train, y_train).predict_proba(X_test)

In [21]:
submission = pd.read_csv('C:/Users/O45820/Downloads/dont-overfit-ii/sample_submission.csv')
submission['target'] = predictions
submission.to_csv('submission.csv', index=False)
submission.head()

Unnamed: 0,id,target
0,250,0.232713
1,251,0.528766
2,252,0.222767
3,253,0.001162
4,254,0.13132


In [23]:
loss = ['hinge', 'log', 'modified_huber', 'squared_hinge', 'perceptron']
penalty = ['l1', 'l2', 'elasticnet']
alpha = [0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000]
learning_rate = ['constant', 'optimal', 'invscaling', 'adaptive']
class_weight = [{1:0.5, 0:0.5}, {1:0.4, 0:0.6}, {1:0.6, 0:0.4}, {1:0.7, 0:0.3}]
eta0 = [1, 10, 100]

param_distributions = dict(loss=loss,
                           penalty=penalty,
                           alpha=alpha,
                           learning_rate=learning_rate,
                           class_weight=class_weight,
                           eta0=eta0)

random = RandomizedSearchCV(estimator=sgd, param_distributions=param_distributions, 
                            scoring='roc_auc', verbose=1, n_jobs=-1, n_iter=1000)
random_result = random.fit(X_train, y_train)

print('Best Score: ', random_result.best_score_)
print('Best Params: ', random_result.best_params_)

Fitting 3 folds for each of 1000 candidates, totalling 3000 fits


JoblibValueError: JoblibValueError
___________________________________________________________________________
Multiprocessing exception:
...........................................................................
C:\Users\O45820\AppData\Local\Continuum\anaconda3\lib\runpy.py in _run_module_as_main(mod_name='ipykernel_launcher', alter_argv=1)
    188         sys.exit(msg)
    189     main_globals = sys.modules["__main__"].__dict__
    190     if alter_argv:
    191         sys.argv[0] = mod_spec.origin
    192     return _run_code(code, main_globals, None,
--> 193                      "__main__", mod_spec)
        mod_spec = ModuleSpec(name='ipykernel_launcher', loader=<_f...nda3\\lib\\site-packages\\ipykernel_launcher.py')
    194 
    195 def run_module(mod_name, init_globals=None,
    196                run_name=None, alter_sys=False):
    197     """Execute a module's code without importing it

...........................................................................
C:\Users\O45820\AppData\Local\Continuum\anaconda3\lib\runpy.py in _run_code(code=<code object <module> at 0x000001469234AED0, fil...lib\site-packages\ipykernel_launcher.py", line 5>, run_globals={'__annotations__': {}, '__builtins__': <module 'builtins' (built-in)>, '__cached__': r'C:\Users\O45820\AppData\Local\Continuum\anaconda...ges\__pycache__\ipykernel_launcher.cpython-37.pyc', '__doc__': 'Entry point for launching an IPython kernel.\n\nTh...orts until\nafter removing the cwd from sys.path.\n', '__file__': r'C:\Users\O45820\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel_launcher.py', '__loader__': <_frozen_importlib_external.SourceFileLoader object>, '__name__': '__main__', '__package__': '', '__spec__': ModuleSpec(name='ipykernel_launcher', loader=<_f...nda3\\lib\\site-packages\\ipykernel_launcher.py'), 'app': <module 'ipykernel.kernelapp' from 'C:\\Users\\O...a3\\lib\\site-packages\\ipykernel\\kernelapp.py'>, ...}, init_globals=None, mod_name='__main__', mod_spec=ModuleSpec(name='ipykernel_launcher', loader=<_f...nda3\\lib\\site-packages\\ipykernel_launcher.py'), pkg_name='', script_name=None)
     80                        __cached__ = cached,
     81                        __doc__ = None,
     82                        __loader__ = loader,
     83                        __package__ = pkg_name,
     84                        __spec__ = mod_spec)
---> 85     exec(code, run_globals)
        code = <code object <module> at 0x000001469234AED0, fil...lib\site-packages\ipykernel_launcher.py", line 5>
        run_globals = {'__annotations__': {}, '__builtins__': <module 'builtins' (built-in)>, '__cached__': r'C:\Users\O45820\AppData\Local\Continuum\anaconda...ges\__pycache__\ipykernel_launcher.cpython-37.pyc', '__doc__': 'Entry point for launching an IPython kernel.\n\nTh...orts until\nafter removing the cwd from sys.path.\n', '__file__': r'C:\Users\O45820\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel_launcher.py', '__loader__': <_frozen_importlib_external.SourceFileLoader object>, '__name__': '__main__', '__package__': '', '__spec__': ModuleSpec(name='ipykernel_launcher', loader=<_f...nda3\\lib\\site-packages\\ipykernel_launcher.py'), 'app': <module 'ipykernel.kernelapp' from 'C:\\Users\\O...a3\\lib\\site-packages\\ipykernel\\kernelapp.py'>, ...}
     86     return run_globals
     87 
     88 def _run_module_code(code, init_globals=None,
     89                     mod_name=None, mod_spec=None,

...........................................................................
C:\Users\O45820\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel_launcher.py in <module>()
     11     # This is added back by InteractiveShellApp.init_path()
     12     if sys.path[0] == '':
     13         del sys.path[0]
     14 
     15     from ipykernel import kernelapp as app
---> 16     app.launch_new_instance()

...........................................................................
C:\Users\O45820\AppData\Local\Continuum\anaconda3\lib\site-packages\traitlets\config\application.py in launch_instance(cls=<class 'ipykernel.kernelapp.IPKernelApp'>, argv=None, **kwargs={})
    653 
    654         If a global instance already exists, this reinitializes and starts it
    655         """
    656         app = cls.instance(**kwargs)
    657         app.initialize(argv)
--> 658         app.start()
        app.start = <bound method IPKernelApp.start of <ipykernel.kernelapp.IPKernelApp object>>
    659 
    660 #-----------------------------------------------------------------------------
    661 # utility functions, for convenience
    662 #-----------------------------------------------------------------------------

...........................................................................
C:\Users\O45820\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel\kernelapp.py in start(self=<ipykernel.kernelapp.IPKernelApp object>)
    494         if self.poller is not None:
    495             self.poller.start()
    496         self.kernel.start()
    497         self.io_loop = ioloop.IOLoop.current()
    498         try:
--> 499             self.io_loop.start()
        self.io_loop.start = <bound method BaseAsyncIOLoop.start of <tornado.platform.asyncio.AsyncIOMainLoop object>>
    500         except KeyboardInterrupt:
    501             pass
    502 
    503 launch_new_instance = IPKernelApp.launch_instance

...........................................................................
C:\Users\O45820\AppData\Local\Continuum\anaconda3\lib\site-packages\tornado\platform\asyncio.py in start(self=<tornado.platform.asyncio.AsyncIOMainLoop object>)
    127         except (RuntimeError, AssertionError):
    128             old_loop = None
    129         try:
    130             self._setup_logging()
    131             asyncio.set_event_loop(self.asyncio_loop)
--> 132             self.asyncio_loop.run_forever()
        self.asyncio_loop.run_forever = <bound method BaseEventLoop.run_forever of <_Win...EventLoop running=True closed=False debug=False>>
    133         finally:
    134             asyncio.set_event_loop(old_loop)
    135 
    136     def stop(self):

...........................................................................
C:\Users\O45820\AppData\Local\Continuum\anaconda3\lib\asyncio\base_events.py in run_forever(self=<_WindowsSelectorEventLoop running=True closed=False debug=False>)
    518         sys.set_asyncgen_hooks(firstiter=self._asyncgen_firstiter_hook,
    519                                finalizer=self._asyncgen_finalizer_hook)
    520         try:
    521             events._set_running_loop(self)
    522             while True:
--> 523                 self._run_once()
        self._run_once = <bound method BaseEventLoop._run_once of <_Windo...EventLoop running=True closed=False debug=False>>
    524                 if self._stopping:
    525                     break
    526         finally:
    527             self._stopping = False

...........................................................................
C:\Users\O45820\AppData\Local\Continuum\anaconda3\lib\asyncio\base_events.py in _run_once(self=<_WindowsSelectorEventLoop running=True closed=False debug=False>)
   1753                         logger.warning('Executing %s took %.3f seconds',
   1754                                        _format_handle(handle), dt)
   1755                 finally:
   1756                     self._current_handle = None
   1757             else:
-> 1758                 handle._run()
        handle._run = <bound method Handle._run of <Handle BaseAsyncIOLoop._handle_events(912, 1)>>
   1759         handle = None  # Needed to break cycles when an exception occurs.
   1760 
   1761     def _set_coroutine_origin_tracking(self, enabled):
   1762         if bool(enabled) == bool(self._coroutine_origin_tracking_enabled):

...........................................................................
C:\Users\O45820\AppData\Local\Continuum\anaconda3\lib\asyncio\events.py in _run(self=<Handle BaseAsyncIOLoop._handle_events(912, 1)>)
     83     def cancelled(self):
     84         return self._cancelled
     85 
     86     def _run(self):
     87         try:
---> 88             self._context.run(self._callback, *self._args)
        self._context.run = <built-in method run of Context object>
        self._callback = <bound method BaseAsyncIOLoop._handle_events of <tornado.platform.asyncio.AsyncIOMainLoop object>>
        self._args = (912, 1)
     89         except Exception as exc:
     90             cb = format_helpers._format_callback_source(
     91                 self._callback, self._args)
     92             msg = f'Exception in callback {cb}'

...........................................................................
C:\Users\O45820\AppData\Local\Continuum\anaconda3\lib\site-packages\tornado\platform\asyncio.py in _handle_events(self=<tornado.platform.asyncio.AsyncIOMainLoop object>, fd=912, events=1)
    117             self.writers.remove(fd)
    118         del self.handlers[fd]
    119 
    120     def _handle_events(self, fd, events):
    121         fileobj, handler_func = self.handlers[fd]
--> 122         handler_func(fileobj, events)
        handler_func = <function wrap.<locals>.null_wrapper>
        fileobj = <zmq.sugar.socket.Socket object>
        events = 1
    123 
    124     def start(self):
    125         try:
    126             old_loop = asyncio.get_event_loop()

...........................................................................
C:\Users\O45820\AppData\Local\Continuum\anaconda3\lib\site-packages\tornado\stack_context.py in null_wrapper(*args=(<zmq.sugar.socket.Socket object>, 1), **kwargs={})
    295         # Fast path when there are no active contexts.
    296         def null_wrapper(*args, **kwargs):
    297             try:
    298                 current_state = _state.contexts
    299                 _state.contexts = cap_contexts[0]
--> 300                 return fn(*args, **kwargs)
        args = (<zmq.sugar.socket.Socket object>, 1)
        kwargs = {}
    301             finally:
    302                 _state.contexts = current_state
    303         null_wrapper._wrapped = True
    304         return null_wrapper

...........................................................................
C:\Users\O45820\AppData\Local\Continuum\anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py in _handle_events(self=<zmq.eventloop.zmqstream.ZMQStream object>, fd=<zmq.sugar.socket.Socket object>, events=1)
    445             return
    446         zmq_events = self.socket.EVENTS
    447         try:
    448             # dispatch events:
    449             if zmq_events & zmq.POLLIN and self.receiving():
--> 450                 self._handle_recv()
        self._handle_recv = <bound method ZMQStream._handle_recv of <zmq.eventloop.zmqstream.ZMQStream object>>
    451                 if not self.socket:
    452                     return
    453             if zmq_events & zmq.POLLOUT and self.sending():
    454                 self._handle_send()

...........................................................................
C:\Users\O45820\AppData\Local\Continuum\anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py in _handle_recv(self=<zmq.eventloop.zmqstream.ZMQStream object>)
    475             else:
    476                 raise
    477         else:
    478             if self._recv_callback:
    479                 callback = self._recv_callback
--> 480                 self._run_callback(callback, msg)
        self._run_callback = <bound method ZMQStream._run_callback of <zmq.eventloop.zmqstream.ZMQStream object>>
        callback = <function wrap.<locals>.null_wrapper>
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    481         
    482 
    483     def _handle_send(self):
    484         """Handle a send event."""

...........................................................................
C:\Users\O45820\AppData\Local\Continuum\anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py in _run_callback(self=<zmq.eventloop.zmqstream.ZMQStream object>, callback=<function wrap.<locals>.null_wrapper>, *args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    427         close our socket."""
    428         try:
    429             # Use a NullContext to ensure that all StackContexts are run
    430             # inside our blanket exception handler rather than outside.
    431             with stack_context.NullContext():
--> 432                 callback(*args, **kwargs)
        callback = <function wrap.<locals>.null_wrapper>
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    433         except:
    434             gen_log.error("Uncaught exception in ZMQStream callback",
    435                           exc_info=True)
    436             # Re-raise the exception so that IOLoop.handle_callback_exception

...........................................................................
C:\Users\O45820\AppData\Local\Continuum\anaconda3\lib\site-packages\tornado\stack_context.py in null_wrapper(*args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    295         # Fast path when there are no active contexts.
    296         def null_wrapper(*args, **kwargs):
    297             try:
    298                 current_state = _state.contexts
    299                 _state.contexts = cap_contexts[0]
--> 300                 return fn(*args, **kwargs)
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    301             finally:
    302                 _state.contexts = current_state
    303         null_wrapper._wrapped = True
    304         return null_wrapper

...........................................................................
C:\Users\O45820\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel\kernelbase.py in dispatcher(msg=[<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>])
    278         if self.control_stream:
    279             self.control_stream.on_recv(self.dispatch_control, copy=False)
    280 
    281         def make_dispatcher(stream):
    282             def dispatcher(msg):
--> 283                 return self.dispatch_shell(stream, msg)
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    284             return dispatcher
    285 
    286         for s in self.shell_streams:
    287             s.on_recv(make_dispatcher(s), copy=False)

...........................................................................
C:\Users\O45820\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel\kernelbase.py in dispatch_shell(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, msg={'buffers': [], 'content': {'allow_stdin': True, 'code': "loss = ['hinge', 'log', 'modified_huber', 'squar...rint('Best Params: ', random_result.best_params_)", 'silent': False, 'stop_on_error': True, 'store_history': True, 'user_expressions': {}}, 'header': {'date': datetime.datetime(2019, 8, 4, 23, 33, 7, 73523, tzinfo=tzutc()), 'msg_id': 'e4e8c28587994d078fa76eb1ee1ecca1', 'msg_type': 'execute_request', 'session': '22bb11d5d7ba4aba858dd7ffb1319a93', 'username': 'username', 'version': '5.2'}, 'metadata': {}, 'msg_id': 'e4e8c28587994d078fa76eb1ee1ecca1', 'msg_type': 'execute_request', 'parent_header': {}})
    228             self.log.warning("Unknown message type: %r", msg_type)
    229         else:
    230             self.log.debug("%s: %s", msg_type, msg)
    231             self.pre_handler_hook()
    232             try:
--> 233                 handler(stream, idents, msg)
        handler = <bound method Kernel.execute_request of <ipykernel.ipkernel.IPythonKernel object>>
        stream = <zmq.eventloop.zmqstream.ZMQStream object>
        idents = [b'22bb11d5d7ba4aba858dd7ffb1319a93']
        msg = {'buffers': [], 'content': {'allow_stdin': True, 'code': "loss = ['hinge', 'log', 'modified_huber', 'squar...rint('Best Params: ', random_result.best_params_)", 'silent': False, 'stop_on_error': True, 'store_history': True, 'user_expressions': {}}, 'header': {'date': datetime.datetime(2019, 8, 4, 23, 33, 7, 73523, tzinfo=tzutc()), 'msg_id': 'e4e8c28587994d078fa76eb1ee1ecca1', 'msg_type': 'execute_request', 'session': '22bb11d5d7ba4aba858dd7ffb1319a93', 'username': 'username', 'version': '5.2'}, 'metadata': {}, 'msg_id': 'e4e8c28587994d078fa76eb1ee1ecca1', 'msg_type': 'execute_request', 'parent_header': {}}
    234             except Exception:
    235                 self.log.error("Exception in message handler:", exc_info=True)
    236             finally:
    237                 self.post_handler_hook()

...........................................................................
C:\Users\O45820\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel\kernelbase.py in execute_request(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, ident=[b'22bb11d5d7ba4aba858dd7ffb1319a93'], parent={'buffers': [], 'content': {'allow_stdin': True, 'code': "loss = ['hinge', 'log', 'modified_huber', 'squar...rint('Best Params: ', random_result.best_params_)", 'silent': False, 'stop_on_error': True, 'store_history': True, 'user_expressions': {}}, 'header': {'date': datetime.datetime(2019, 8, 4, 23, 33, 7, 73523, tzinfo=tzutc()), 'msg_id': 'e4e8c28587994d078fa76eb1ee1ecca1', 'msg_type': 'execute_request', 'session': '22bb11d5d7ba4aba858dd7ffb1319a93', 'username': 'username', 'version': '5.2'}, 'metadata': {}, 'msg_id': 'e4e8c28587994d078fa76eb1ee1ecca1', 'msg_type': 'execute_request', 'parent_header': {}})
    394         if not silent:
    395             self.execution_count += 1
    396             self._publish_execute_input(code, parent, self.execution_count)
    397 
    398         reply_content = self.do_execute(code, silent, store_history,
--> 399                                         user_expressions, allow_stdin)
        user_expressions = {}
        allow_stdin = True
    400 
    401         # Flush output before sending the reply.
    402         sys.stdout.flush()
    403         sys.stderr.flush()

...........................................................................
C:\Users\O45820\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel\ipkernel.py in do_execute(self=<ipykernel.ipkernel.IPythonKernel object>, code="loss = ['hinge', 'log', 'modified_huber', 'squar...rint('Best Params: ', random_result.best_params_)", silent=False, store_history=True, user_expressions={}, allow_stdin=True)
    203 
    204         self._forward_input(allow_stdin)
    205 
    206         reply_content = {}
    207         try:
--> 208             res = shell.run_cell(code, store_history=store_history, silent=silent)
        res = undefined
        shell.run_cell = <bound method ZMQInteractiveShell.run_cell of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = "loss = ['hinge', 'log', 'modified_huber', 'squar...rint('Best Params: ', random_result.best_params_)"
        store_history = True
        silent = False
    209         finally:
    210             self._restore_input()
    211 
    212         if res.error_before_exec is not None:

...........................................................................
C:\Users\O45820\AppData\Local\Continuum\anaconda3\lib\site-packages\ipykernel\zmqshell.py in run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, *args=("loss = ['hinge', 'log', 'modified_huber', 'squar...rint('Best Params: ', random_result.best_params_)",), **kwargs={'silent': False, 'store_history': True})
    532             )
    533         self.payload_manager.write_payload(payload)
    534 
    535     def run_cell(self, *args, **kwargs):
    536         self._last_traceback = None
--> 537         return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
        self.run_cell = <bound method ZMQInteractiveShell.run_cell of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        args = ("loss = ['hinge', 'log', 'modified_huber', 'squar...rint('Best Params: ', random_result.best_params_)",)
        kwargs = {'silent': False, 'store_history': True}
    538 
    539     def _showtraceback(self, etype, evalue, stb):
    540         # try to preserve ordering of tracebacks and print statements
    541         sys.stdout.flush()

...........................................................................
C:\Users\O45820\AppData\Local\Continuum\anaconda3\lib\site-packages\IPython\core\interactiveshell.py in run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, raw_cell="loss = ['hinge', 'log', 'modified_huber', 'squar...rint('Best Params: ', random_result.best_params_)", store_history=True, silent=False, shell_futures=True)
   2657         -------
   2658         result : :class:`ExecutionResult`
   2659         """
   2660         try:
   2661             result = self._run_cell(
-> 2662                 raw_cell, store_history, silent, shell_futures)
        raw_cell = "loss = ['hinge', 'log', 'modified_huber', 'squar...rint('Best Params: ', random_result.best_params_)"
        store_history = True
        silent = False
        shell_futures = True
   2663         finally:
   2664             self.events.trigger('post_execute')
   2665             if not silent:
   2666                 self.events.trigger('post_run_cell', result)

...........................................................................
C:\Users\O45820\AppData\Local\Continuum\anaconda3\lib\site-packages\IPython\core\interactiveshell.py in _run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, raw_cell="loss = ['hinge', 'log', 'modified_huber', 'squar...rint('Best Params: ', random_result.best_params_)", store_history=True, silent=False, shell_futures=True)
   2780                 self.displayhook.exec_result = result
   2781 
   2782                 # Execute the user code
   2783                 interactivity = 'none' if silent else self.ast_node_interactivity
   2784                 has_raised = self.run_ast_nodes(code_ast.body, cell_name,
-> 2785                    interactivity=interactivity, compiler=compiler, result=result)
        interactivity = 'last_expr'
        compiler = <IPython.core.compilerop.CachingCompiler object>
   2786                 
   2787                 self.last_execution_succeeded = not has_raised
   2788                 self.last_execution_result = result
   2789 

...........................................................................
C:\Users\O45820\AppData\Local\Continuum\anaconda3\lib\site-packages\IPython\core\interactiveshell.py in run_ast_nodes(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, nodelist=[<_ast.Assign object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Expr object>, <_ast.Expr object>], cell_name='<ipython-input-23-e0f1a27cbfa2>', interactivity='last', compiler=<IPython.core.compilerop.CachingCompiler object>, result=<ExecutionResult object at 14699538518, executio...rue silent=False shell_futures=True> result=None>)
   2896             raise ValueError("Interactivity was %r" % interactivity)
   2897         try:
   2898             for i, node in enumerate(to_run_exec):
   2899                 mod = ast.Module([node])
   2900                 code = compiler(mod, cell_name, "exec")
-> 2901                 if self.run_code(code, result):
        self.run_code = <bound method InteractiveShell.run_code of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = <code object <module> at 0x00000146997B8810, file "<ipython-input-23-e0f1a27cbfa2>", line 16>
        result = <ExecutionResult object at 14699538518, executio...rue silent=False shell_futures=True> result=None>
   2902                     return True
   2903 
   2904             for i, node in enumerate(to_run_interactive):
   2905                 mod = ast.Interactive([node])

...........................................................................
C:\Users\O45820\AppData\Local\Continuum\anaconda3\lib\site-packages\IPython\core\interactiveshell.py in run_code(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, code_obj=<code object <module> at 0x00000146997B8810, file "<ipython-input-23-e0f1a27cbfa2>", line 16>, result=<ExecutionResult object at 14699538518, executio...rue silent=False shell_futures=True> result=None>)
   2956         outflag = True  # happens in more places, so it's easier as default
   2957         try:
   2958             try:
   2959                 self.hooks.pre_run_code_hook()
   2960                 #rprint('Running code', repr(code_obj)) # dbg
-> 2961                 exec(code_obj, self.user_global_ns, self.user_ns)
        code_obj = <code object <module> at 0x00000146997B8810, file "<ipython-input-23-e0f1a27cbfa2>", line 16>
        self.user_global_ns = {'C': [0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000], 'GridSearchCV': <class 'sklearn.model_selection._search.GridSearchCV'>, 'In': ['', 'for model in models:\n    #print(model)\n    get_cv_scores(model)', "def get_cv_scores(model):\n    scores = cross_val...   print('STD: ', np.std(scores))\n    print('\\n')", 'for model in models:\n    #print(model)\n    get_cv_scores(model)', 'for model in models:\n    get_cv_scores(model)', "def get_cv_scores(model):\n    scores = cross_val...   print('STD: ', np.std(scores))\n    print('\\n')", 'for model in models:\n    get_cv_scores(model)', 'for model in models:\n    print(model)\n    get_cv_scores(model)', "import numpy as np\nimport pandas as pd\n\nimport m...ings.filterwarnings('ignore')\n\nnp.random.seed(27)", "train = pd.read_csv('C:/Users/O45820/Downloads/d...', train.shape)\nprint('Test Shape: ', test.shape)", "X_train = train.drop(['id', 'target'], axis=1)\ny...nsform(X_train)\nX_test = scaler.transform(X_test)", "# define models\nridge = linear_model.Ridge()\nlas...r='liblinear')\nsgd = linear_model.SGDClassifier()", 'models = [ridge, lasso, elastic, lasso_lars, bayesian_ridge, logistic, sgd]', "def get_cv_scores(model):\n    scores = cross_val...   print('STD: ', np.std(scores))\n    print('\\n')", 'for model in models:\n    #print(model)\n    get_cv_scores(model)', "def get_cv_scores(model):\n    scores = cross_val...  #print('STD: ', np.std(scores))\n    print('\\n')", 'for model in models:\n    #print(model)\n    get_cv_scores(model)', "penalty = ['l1', 'l2']\nC = [0.0001, 0.001, 0.01,...\nprint('Best Params: ', grid_result.best_params_)", "logistic = linear_model.LogisticRegression(C=1, ...'l1', solver='liblinear')\nget_cv_scores(logistic)", 'predictions = logistic.fit(X_train, y_train).predict_proba(X_test)', ...], 'Out': {21:     id    target
0  250  0.232713
1  251  0.5287...  252  0.222767
3  253  0.001162
4  254  0.131320}, 'RandomizedSearchCV': <class 'sklearn.model_selection._search.RandomizedSearchCV'>, 'StandardScaler': <class 'sklearn.preprocessing.data.StandardScaler'>, 'X_test': array([[ 0.47845174, -0.99884336, -1.72841739, .... -0.27528232,
         0.1228808 ,  0.98522335]]), 'X_train': array([[-0.12173567,  2.17600225,  0.5036917 , .... -1.46487041,
        -0.63053791,  1.65376453]]), '_':     id    target
0  250  0.232713
1  251  0.5287...  252  0.222767
3  253  0.001162
4  254  0.131320, '_21':     id    target
0  250  0.232713
1  251  0.5287...  252  0.222767
3  253  0.001162
4  254  0.131320, ...}
        self.user_ns = {'C': [0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000], 'GridSearchCV': <class 'sklearn.model_selection._search.GridSearchCV'>, 'In': ['', 'for model in models:\n    #print(model)\n    get_cv_scores(model)', "def get_cv_scores(model):\n    scores = cross_val...   print('STD: ', np.std(scores))\n    print('\\n')", 'for model in models:\n    #print(model)\n    get_cv_scores(model)', 'for model in models:\n    get_cv_scores(model)', "def get_cv_scores(model):\n    scores = cross_val...   print('STD: ', np.std(scores))\n    print('\\n')", 'for model in models:\n    get_cv_scores(model)', 'for model in models:\n    print(model)\n    get_cv_scores(model)', "import numpy as np\nimport pandas as pd\n\nimport m...ings.filterwarnings('ignore')\n\nnp.random.seed(27)", "train = pd.read_csv('C:/Users/O45820/Downloads/d...', train.shape)\nprint('Test Shape: ', test.shape)", "X_train = train.drop(['id', 'target'], axis=1)\ny...nsform(X_train)\nX_test = scaler.transform(X_test)", "# define models\nridge = linear_model.Ridge()\nlas...r='liblinear')\nsgd = linear_model.SGDClassifier()", 'models = [ridge, lasso, elastic, lasso_lars, bayesian_ridge, logistic, sgd]', "def get_cv_scores(model):\n    scores = cross_val...   print('STD: ', np.std(scores))\n    print('\\n')", 'for model in models:\n    #print(model)\n    get_cv_scores(model)', "def get_cv_scores(model):\n    scores = cross_val...  #print('STD: ', np.std(scores))\n    print('\\n')", 'for model in models:\n    #print(model)\n    get_cv_scores(model)', "penalty = ['l1', 'l2']\nC = [0.0001, 0.001, 0.01,...\nprint('Best Params: ', grid_result.best_params_)", "logistic = linear_model.LogisticRegression(C=1, ...'l1', solver='liblinear')\nget_cv_scores(logistic)", 'predictions = logistic.fit(X_train, y_train).predict_proba(X_test)', ...], 'Out': {21:     id    target
0  250  0.232713
1  251  0.5287...  252  0.222767
3  253  0.001162
4  254  0.131320}, 'RandomizedSearchCV': <class 'sklearn.model_selection._search.RandomizedSearchCV'>, 'StandardScaler': <class 'sklearn.preprocessing.data.StandardScaler'>, 'X_test': array([[ 0.47845174, -0.99884336, -1.72841739, .... -0.27528232,
         0.1228808 ,  0.98522335]]), 'X_train': array([[-0.12173567,  2.17600225,  0.5036917 , .... -1.46487041,
        -0.63053791,  1.65376453]]), '_':     id    target
0  250  0.232713
1  251  0.5287...  252  0.222767
3  253  0.001162
4  254  0.131320, '_21':     id    target
0  250  0.232713
1  251  0.5287...  252  0.222767
3  253  0.001162
4  254  0.131320, ...}
   2962             finally:
   2963                 # Reset our crash handler in place
   2964                 sys.excepthook = old_excepthook
   2965         except SystemExit as e:

...........................................................................
C:\Users\O45820\<ipython-input-23-e0f1a27cbfa2> in <module>()
     11                            learning_rate=learning_rate,
     12                            class_weight=class_weight,
     13                            eta0=eta0)
     14 
     15 random = RandomizedSearchCV(estimator=sgd, param_distributions=param_distributions, scoring='roc_auc', verbose=1, n_jobs=-1, n_iter=1000)
---> 16 random_result = random.fit(X_train, y_train)
     17 
     18 print('Best Score: ', random_result.best_score_)
     19 print('Best Params: ', random_result.best_params_)

...........................................................................
C:\Users\O45820\AppData\Local\Continuum\anaconda3\lib\site-packages\sklearn\model_selection\_search.py in fit(self=RandomizedSearchCV(cv=None, error_score='raise',...train_score='warn', scoring='roc_auc', verbose=1), X=array([[-0.12173567,  2.17600225,  0.5036917 , .... -1.46487041,
        -0.63053791,  1.65376453]]), y=0      1.0
1      0.0
2      1.0
3      1.0
4   ...    0.0
Name: target, Length: 250, dtype: float64, groups=None, **fit_params={})
    635                                   return_train_score=self.return_train_score,
    636                                   return_n_test_samples=True,
    637                                   return_times=True, return_parameters=False,
    638                                   error_score=self.error_score)
    639           for parameters, (train, test) in product(candidate_params,
--> 640                                                    cv.split(X, y, groups)))
        cv.split = <bound method StratifiedKFold.split of Stratifie...ld(n_splits=3, random_state=None, shuffle=False)>
        X = array([[-0.12173567,  2.17600225,  0.5036917 , .... -1.46487041,
        -0.63053791,  1.65376453]])
        y = 0      1.0
1      0.0
2      1.0
3      1.0
4   ...    0.0
Name: target, Length: 250, dtype: float64
        groups = None
    641 
    642         # if one choose to see train score, "out" will contain train score info
    643         if self.return_train_score:
    644             (train_score_dicts, test_score_dicts, test_sample_counts, fit_time,

...........................................................................
C:\Users\O45820\AppData\Local\Continuum\anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self=Parallel(n_jobs=-1), iterable=<generator object BaseSearchCV.fit.<locals>.<genexpr>>)
    784             if pre_dispatch == "all" or n_jobs == 1:
    785                 # The iterable was consumed all at once by the above for loop.
    786                 # No need to wait for async callbacks to trigger to
    787                 # consumption.
    788                 self._iterating = False
--> 789             self.retrieve()
        self.retrieve = <bound method Parallel.retrieve of Parallel(n_jobs=-1)>
    790             # Make sure that we get a last message telling us we are done
    791             elapsed_time = time.time() - self._start_time
    792             self._print('Done %3i out of %3i | elapsed: %s finished',
    793                         (len(self._output), len(self._output),

---------------------------------------------------------------------------
Sub-process traceback:
---------------------------------------------------------------------------
ValueError                                         Sun Aug  4 20:33:13 2019
PID: 45232Python 3.7.0: C:\Users\O45820\AppData\Local\Continuum\anaconda3\python.exe
...........................................................................
C:\Users\O45820\AppData\Local\Continuum\anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self=<sklearn.externals.joblib.parallel.BatchedCalls object>)
    126     def __init__(self, iterator_slice):
    127         self.items = list(iterator_slice)
    128         self._size = len(self.items)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
        self.items = [(<function _fit_and_score>, (SGDClassifier(alpha=0.001, average=False, class_...ffle=True, tol=None, verbose=0, warm_start=False), array([[-0.12173567,  2.17600225,  0.5036917 , .... -1.46487041,
        -0.63053791,  1.65376453]]), 0      1.0
1      0.0
2      1.0
3      1.0
4   ...    0.0
Name: target, Length: 250, dtype: float64, {'score': make_scorer(roc_auc_score, needs_threshold=True)}, array([ 74,  76,  77,  79,  80,  83,  84,  85,  ...40, 241, 242, 243, 244, 245, 246, 247, 248, 249]), array([  0,   1,   2,   3,   4,   5,   6,   7,  ...,  81,  82,
        86,  91,  96, 101, 102, 104]), 1, {'alpha': 0.001, 'class_weight': {0: 0.6, 1: 0.4}, 'eta0': 100, 'learning_rate': 'adaptive', 'loss': 'hinge', 'penalty': 'l2'}), {'error_score': 'raise', 'fit_params': {}, 'return_n_test_samples': True, 'return_parameters': False, 'return_times': True, 'return_train_score': 'warn'})]
    132 
    133     def __len__(self):
    134         return self._size
    135 

...........................................................................
C:\Users\O45820\AppData\Local\Continuum\anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in <listcomp>(.0=<list_iterator object>)
    126     def __init__(self, iterator_slice):
    127         self.items = list(iterator_slice)
    128         self._size = len(self.items)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
        func = <function _fit_and_score>
        args = (SGDClassifier(alpha=0.001, average=False, class_...ffle=True, tol=None, verbose=0, warm_start=False), array([[-0.12173567,  2.17600225,  0.5036917 , .... -1.46487041,
        -0.63053791,  1.65376453]]), 0      1.0
1      0.0
2      1.0
3      1.0
4   ...    0.0
Name: target, Length: 250, dtype: float64, {'score': make_scorer(roc_auc_score, needs_threshold=True)}, array([ 74,  76,  77,  79,  80,  83,  84,  85,  ...40, 241, 242, 243, 244, 245, 246, 247, 248, 249]), array([  0,   1,   2,   3,   4,   5,   6,   7,  ...,  81,  82,
        86,  91,  96, 101, 102, 104]), 1, {'alpha': 0.001, 'class_weight': {0: 0.6, 1: 0.4}, 'eta0': 100, 'learning_rate': 'adaptive', 'loss': 'hinge', 'penalty': 'l2'})
        kwargs = {'error_score': 'raise', 'fit_params': {}, 'return_n_test_samples': True, 'return_parameters': False, 'return_times': True, 'return_train_score': 'warn'}
    132 
    133     def __len__(self):
    134         return self._size
    135 

...........................................................................
C:\Users\O45820\AppData\Local\Continuum\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py in _fit_and_score(estimator=SGDClassifier(alpha=0.001, average=False, class_...ffle=True, tol=None, verbose=0, warm_start=False), X=array([[-0.12173567,  2.17600225,  0.5036917 , .... -1.46487041,
        -0.63053791,  1.65376453]]), y=0      1.0
1      0.0
2      1.0
3      1.0
4   ...    0.0
Name: target, Length: 250, dtype: float64, scorer={'score': make_scorer(roc_auc_score, needs_threshold=True)}, train=array([ 74,  76,  77,  79,  80,  83,  84,  85,  ...40, 241, 242, 243, 244, 245, 246, 247, 248, 249]), test=array([  0,   1,   2,   3,   4,   5,   6,   7,  ...,  81,  82,
        86,  91,  96, 101, 102, 104]), verbose=1, parameters={'alpha': 0.001, 'class_weight': {0: 0.6, 1: 0.4}, 'eta0': 100, 'learning_rate': 'adaptive', 'loss': 'hinge', 'penalty': 'l2'}, fit_params={}, return_train_score='warn', return_parameters=False, return_n_test_samples=True, return_times=True, error_score='raise')
    439                       for k, v in fit_params.items()])
    440 
    441     test_scores = {}
    442     train_scores = {}
    443     if parameters is not None:
--> 444         estimator.set_params(**parameters)
        estimator.set_params = <bound method BaseSGD.set_params of SGDClassifie...fle=True, tol=None, verbose=0, warm_start=False)>
        parameters = {'alpha': 0.001, 'class_weight': {0: 0.6, 1: 0.4}, 'eta0': 100, 'learning_rate': 'adaptive', 'loss': 'hinge', 'penalty': 'l2'}
    445 
    446     start_time = time.time()
    447 
    448     X_train, y_train = _safe_split(estimator, X, y, train)

...........................................................................
C:\Users\O45820\AppData\Local\Continuum\anaconda3\lib\site-packages\sklearn\linear_model\stochastic_gradient.py in set_params(self=SGDClassifier(alpha=0.001, average=False, class_...ffle=True, tol=None, verbose=0, warm_start=False), *args=(), **kwargs={'alpha': 0.001, 'class_weight': {0: 0.6, 1: 0.4}, 'eta0': 100, 'learning_rate': 'adaptive', 'loss': 'hinge', 'penalty': 'l2'})
     73         # but we are not allowed to set attributes
     74         self._validate_params(set_max_iter=False)
     75 
     76     def set_params(self, *args, **kwargs):
     77         super(BaseSGD, self).set_params(*args, **kwargs)
---> 78         self._validate_params(set_max_iter=False)
        self._validate_params = <bound method BaseSGD._validate_params of SGDCla...fle=True, tol=None, verbose=0, warm_start=False)>
     79         return self
     80 
     81     @abstractmethod
     82     def fit(self, X, y):

...........................................................................
C:\Users\O45820\AppData\Local\Continuum\anaconda3\lib\site-packages\sklearn\linear_model\stochastic_gradient.py in _validate_params(self=SGDClassifier(alpha=0.001, average=False, class_...ffle=True, tol=None, verbose=0, warm_start=False), set_max_iter=False)
    100                              "learning_rate is 'optimal'. alpha is used "
    101                              "to compute the optimal learning rate.")
    102 
    103         # raises ValueError if not registered
    104         self._get_penalty_type(self.penalty)
--> 105         self._get_learning_rate_type(self.learning_rate)
        self._get_learning_rate_type = <bound method BaseSGD._get_learning_rate_type of...fle=True, tol=None, verbose=0, warm_start=False)>
        self.learning_rate = 'adaptive'
    106 
    107         if self.loss not in self.loss_functions:
    108             raise ValueError("The loss %s is not supported. " % self.loss)
    109 

...........................................................................
C:\Users\O45820\AppData\Local\Continuum\anaconda3\lib\site-packages\sklearn\linear_model\stochastic_gradient.py in _get_learning_rate_type(self=SGDClassifier(alpha=0.001, average=False, class_...ffle=True, tol=None, verbose=0, warm_start=False), learning_rate='adaptive')
    147     def _get_learning_rate_type(self, learning_rate):
    148         try:
    149             return LEARNING_RATE_TYPES[learning_rate]
    150         except KeyError:
    151             raise ValueError("learning rate %s "
--> 152                              "is not supported. " % learning_rate)
        learning_rate = 'adaptive'
    153 
    154     def _get_penalty_type(self, penalty):
    155         penalty = str(penalty).lower()
    156         try:

ValueError: learning rate adaptive is not supported. 
___________________________________________________________________________

In [24]:
sgd = linear_model.SGDClassifier(alpha=0.1,
                                 class_weight={1:0.7, 0:0.3},
                                 eta0=100,
                                 learning_rate='optimal',
                                 loss='log',
                                 penalty='elasticnet')
get_cv_scores(sgd)

CV Mean:  0.7684027777777778




In [25]:
predictions = sgd.fit(X_train, y_train).predict_proba(X_test)

In [27]:
submission = pd.read_csv('C:/Users/O45820/Downloads/dont-overfit-ii/sample_submission.csv')
submission['target'] = predictions
submission.to_csv('submission.csv', index=False)
submission.head()

Unnamed: 0,id,target
0,250,0.096457
1,251,0.172581
2,252,0.165242
3,253,0.092918
4,254,0.138168
