In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import zipfile
import category_encoders as ce
%matplotlib inline

In [2]:
# 불러올 데이터의 타입과 컬럼을 정의한다.
column_dtypes = {
        'MachineIdentifier':                                    'object',
        'ProductName':                                          'category',
        'EngineVersion':                                        'category',
        'AppVersion':                                           'category',
        'AvSigVersion':                                         'category',
        'IsBeta':                                               'int8',
        'RtpStateBitfield':                                     'float16',
        'IsSxsPassiveMode':                                     'int8',
        'DefaultBrowsersIdentifier':                            'float16',
        'AVProductStatesIdentifier':                            'float32',
        'AVProductsInstalled':                                  'float16',
        'AVProductsEnabled':                                    'float16',
        'HasTpm':                                               'int8',
        'CountryIdentifier':                                    'int16',
        'CityIdentifier':                                       'float32',
        'OrganizationIdentifier':                               'float16',
        'GeoNameIdentifier':                                    'float16',
        'LocaleEnglishNameIdentifier':                          'int8',
        'Platform':                                             'category',
        'Processor':                                            'category',
        'OsVer':                                                'category',
        'OsBuild':                                              'int16',
        'OsSuite':                                              'int16',
        'OsPlatformSubRelease':                                 'category',
        'OsBuildLab':                                           'category',
        'SkuEdition':                                           'category',
        'IsProtected':                                          'float16',
        'AutoSampleOptIn':                                      'int8',
        'PuaMode':                                              'category',
        'SMode':                                                'float16',
        'IeVerIdentifier':                                      'float16',
        'SmartScreen':                                          'category',
        'Firewall':                                             'float16',
        'UacLuaenable':                                         'float32',
        'Census_MDC2FormFactor':                                'category',
        'Census_DeviceFamily':                                  'category',
        'Census_OEMNameIdentifier':                             'float16',
        'Census_OEMModelIdentifier':                            'float32',
        'Census_ProcessorCoreCount':                            'float16',
        'Census_ProcessorManufacturerIdentifier':               'float16',
        'Census_ProcessorModelIdentifier':                      'float16',
        'Census_ProcessorClass':                                'category',
        'Census_PrimaryDiskTotalCapacity':                      'float32',
        'Census_PrimaryDiskTypeName':                           'category',
        'Census_SystemVolumeTotalCapacity':                     'float32',
        'Census_HasOpticalDiskDrive':                           'int8',
        'Census_TotalPhysicalRAM':                              'float32',
        'Census_ChassisTypeName':                               'category',
        'Census_InternalPrimaryDiagonalDisplaySizeInInches':    'float16',
        'Census_InternalPrimaryDisplayResolutionHorizontal':    'float16',
        'Census_InternalPrimaryDisplayResolutionVertical':      'float16',
        'Census_PowerPlatformRoleName':                         'category',
        'Census_InternalBatteryType':                           'category',
        'Census_InternalBatteryNumberOfCharges':                'float32',
        'Census_OSVersion':                                     'category',
        'Census_OSArchitecture':                                'category',
        'Census_OSBranch':                                      'category',
        'Census_OSBuildNumber':                                 'int16',
        'Census_OSBuildRevision':                               'int32',
        'Census_OSEdition':                                     'category',
        'Census_OSSkuName':                                     'category',
        'Census_OSInstallTypeName':                             'category',
        'Census_OSInstallLanguageIdentifier':                   'float16',
        'Census_OSUILocaleIdentifier':                          'int16',
        'Census_OSWUAutoUpdateOptionsName':                     'category',
        'Census_IsPortableOperatingSystem':                     'int8',
        'Census_GenuineStateName':                              'category',
        'Census_ActivationChannel':                             'category',
        'Census_IsFlightingInternal':                           'float16',
        'Census_IsFlightsDisabled':                             'float16',
        'Census_FlightRing':                                    'category',
        'Census_ThresholdOptIn':                                'float16',
        'Census_FirmwareManufacturerIdentifier':                'float16',
        'Census_FirmwareVersionIdentifier':                     'float32',
        'Census_IsSecureBootEnabled':                           'int8',
        'Census_IsWIMBootEnabled':                              'float16',
        'Census_IsVirtualDevice':                               'float16',
        'Census_IsTouchEnabled':                                'int8',
        'Census_IsPenCapable':                                  'int8',
        'Census_IsAlwaysOnAlwaysConnectedCapable':              'float16',
        'Wdft_IsGamer':                                         'float16',
        'Wdft_RegionIdentifier':                                'float16',
        'HasDetections':                                        'int8'
        }

use_columns = ['MachineIdentifier'
    ,'ProductName'
    ,'EngineVersion'
    ,'AppVersion'
    ,'AvSigVersion'
    ,'IsBeta'
    ,'RtpStateBitfield'
    ,'IsSxsPassiveMode'
    ,'DefaultBrowsersIdentifier'
    ,'AVProductStatesIdentifier'
    ,'AVProductsInstalled'
    ,'AVProductsEnabled'
    ,'HasTpm'
    ,'CountryIdentifier'
    ,'CityIdentifier'
    ,'OrganizationIdentifier'
    ,'GeoNameIdentifier'
    ,'LocaleEnglishNameIdentifier'
    ,'Platform'
    ,'Processor'
    ,'OsVer'
    ,'OsBuild'
    ,'OsSuite'
    ,'OsPlatformSubRelease'
    ,'OsBuildLab'
    ,'SkuEdition'
    ,'IsProtected'
    ,'AutoSampleOptIn'
    ,'PuaMode'
    ,'SMode'
    ,'IeVerIdentifier'
    ,'SmartScreen'
    ,'Firewall'
    ,'UacLuaenable'
    ,'Census_MDC2FormFactor'
    ,'Census_DeviceFamily'
    ,'Census_OEMNameIdentifier'
    ,'Census_OEMModelIdentifier'
    ,'Census_ProcessorCoreCount'
    ,'Census_ProcessorManufacturerIdentifier'
    ,'Census_ProcessorModelIdentifier'
    ,'Census_ProcessorClass'
    ,'Census_PrimaryDiskTotalCapacity'
    ,'Census_PrimaryDiskTypeName'
    ,'Census_SystemVolumeTotalCapacity'
    ,'Census_HasOpticalDiskDrive'
    ,'Census_TotalPhysicalRAM'
    ,'Census_ChassisTypeName'
    ,'Census_InternalPrimaryDiagonalDisplaySizeInInches'
    ,'Census_InternalPrimaryDisplayResolutionHorizontal'
    ,'Census_PowerPlatformRoleName'
    ,'Census_InternalBatteryType'
    ,'Census_InternalBatteryNumberOfCharges'
    ,'Census_OSVersion'
    ,'Census_OSArchitecture'
    ,'Census_OSBranch'
    ,'Census_OSBuildRevision'
    ,'Census_OSEdition'
    ,'Census_OSSkuName'
    ,'Census_OSInstallTypeName'
    ,'Census_OSInstallLanguageIdentifier'
    ,'Census_OSWUAutoUpdateOptionsName'
    ,'Census_IsPortableOperatingSystem'
    ,'Census_GenuineStateName'
    ,'Census_ActivationChannel'
    ,'Census_IsFlightingInternal'
    ,'Census_IsFlightsDisabled'
    ,'Census_FlightRing'
    ,'Census_ThresholdOptIn'
    ,'Census_FirmwareManufacturerIdentifier'
    ,'Census_FirmwareVersionIdentifier'
    ,'Census_IsSecureBootEnabled'
    ,'Census_IsWIMBootEnabled'
    ,'Census_IsVirtualDevice'
    ,'Census_IsTouchEnabled'
    ,'Census_IsPenCapable'
    ,'Census_IsAlwaysOnAlwaysConnectedCapable'
    ,'Wdft_IsGamer'
    ,'Wdft_RegionIdentifier'
    ,'HasDetections']

In [4]:
# 데이터를 불러온다.
zf = zipfile.ZipFile('../input/all.zip', 'r')
tr_train = pd.read_csv(zf.open('train.csv'), dtype=column_dtypes, usecols= use_columns)
X_test = pd.read_csv(zf.open('test.csv'), dtype=column_dtypes, usecols= use_columns[:-1])
del zf

In [5]:
# 학습 데이터의 xy를 분리한다.
y_train = tr_train.HasDetections
X_train = tr_train.drop(['HasDetections'], axis=1)
del tr_train

In [6]:
# Target Encoder를 실행한다.
category_columns = list(X_train.select_dtypes(include=['category']).columns)
enc = ce.TargetEncoder(cols=category_columns).fit(X_train, y_train)
X_train = enc.transform(X_train)
X_test = enc.transform(X_test)

for column in category_columns:
    X_train[column] = X_train[column].astype('float16')
    X_test[column] = X_test[column].astype('float16')

X_train.to_pickle("../result/X_train_target_encoded.pkl")
X_test.to_pickle("../result/X_test_target_encoded.pkl")

In [None]:
# X_train = pd.read_pickle("../result/X_train_target_encoded.pkl")
# X_test = pd.read_pickle("../result/X_test_target_encoded.pkl")

In [47]:
# 결측치를 평균값으로 채운다.
def fill_nan(df, column, mean):
    dtype = df[column].dtype
    if dtype == 'float16':
        df[column] = df[column].astype('float32')
    df[column].fillna(mean, inplace=True)
    if dtype == 'float16':
        df[column] = df[column].astype('float16')

for column in list(X_train.columns[X_train.isna().any()]):
    mean = X_train[column].mean()
    fill_nan(X_train, column, mean)
    fill_nan(X_test, column, mean)

In [52]:
# ID 저장 및 제거 
ID_test = X_test.MachineIdentifier
X_train.drop(['MachineIdentifier'], axis=1, inplace=True)
X_test.drop(['MachineIdentifier'], axis=1, inplace=True)

In [58]:
lgb_params = {'num_leaves': 60,
         'min_data_in_leaf': 60, 
         'objective':'binary',
         'max_depth': -1,
         'learning_rate': 0.1,
         "boosting": "gbdt",
         "feature_fraction": 0.8,
         "bagging_freq": 1,
         "bagging_fraction": 0.8 ,
         "bagging_seed": 11,
         "metric": 'auc',
         "lambda_l1": 0.1,
         "random_state": 133,
         "verbosity": -1}

In [54]:
from sklearn.model_selection import cross_val_score
import lightgbm as lgb
from sklearn.model_selection import KFold, StratifiedKFold 

folds = StratifiedKFold(n_splits=5, shuffle=True, random_state=15)

for fold_, (trn_idx, val_idx) in enumerate(folds.split(X_train,y_train.values)):
    trn_data = lgb.Dataset(X_train.iloc[trn_idx], label=y_train.iloc[trn_idx])
    val_data = lgb.Dataset(X_train.iloc[val_idx], label=y_train.iloc[val_idx])
    
    num_round = 10000
    clf = lgb.train(lgb_params, trn_data, num_round, valid_sets = [trn_data, val_data], verbose_eval=-1, early_stopping_rounds = 200)

JoblibMemoryError: JoblibMemoryError
___________________________________________________________________________
Multiprocessing exception:
...........................................................................
C:\Users\southman\Anaconda3\lib\runpy.py in _run_module_as_main(mod_name='ipykernel_launcher', alter_argv=1)
    188         sys.exit(msg)
    189     main_globals = sys.modules["__main__"].__dict__
    190     if alter_argv:
    191         sys.argv[0] = mod_spec.origin
    192     return _run_code(code, main_globals, None,
--> 193                      "__main__", mod_spec)
        mod_spec = ModuleSpec(name='ipykernel_launcher', loader=<_f...nda3\\lib\\site-packages\\ipykernel_launcher.py')
    194 
    195 def run_module(mod_name, init_globals=None,
    196                run_name=None, alter_sys=False):
    197     """Execute a module's code without importing it

...........................................................................
C:\Users\southman\Anaconda3\lib\runpy.py in _run_code(code=<code object <module> at 0x0000000002E11AE0, fil...lib\site-packages\ipykernel_launcher.py", line 5>, run_globals={'__annotations__': {}, '__builtins__': <module 'builtins' (built-in)>, '__cached__': r'C:\Users\southman\Anaconda3\lib\site-packages\__pycache__\ipykernel_launcher.cpython-36.pyc', '__doc__': 'Entry point for launching an IPython kernel.\n\nTh...orts until\nafter removing the cwd from sys.path.\n', '__file__': r'C:\Users\southman\Anaconda3\lib\site-packages\ipykernel_launcher.py', '__loader__': <_frozen_importlib_external.SourceFileLoader object>, '__name__': '__main__', '__package__': '', '__spec__': ModuleSpec(name='ipykernel_launcher', loader=<_f...nda3\\lib\\site-packages\\ipykernel_launcher.py'), 'app': <module 'ipykernel.kernelapp' from 'C:\\Users\\s...a3\\lib\\site-packages\\ipykernel\\kernelapp.py'>, ...}, init_globals=None, mod_name='__main__', mod_spec=ModuleSpec(name='ipykernel_launcher', loader=<_f...nda3\\lib\\site-packages\\ipykernel_launcher.py'), pkg_name='', script_name=None)
     80                        __cached__ = cached,
     81                        __doc__ = None,
     82                        __loader__ = loader,
     83                        __package__ = pkg_name,
     84                        __spec__ = mod_spec)
---> 85     exec(code, run_globals)
        code = <code object <module> at 0x0000000002E11AE0, fil...lib\site-packages\ipykernel_launcher.py", line 5>
        run_globals = {'__annotations__': {}, '__builtins__': <module 'builtins' (built-in)>, '__cached__': r'C:\Users\southman\Anaconda3\lib\site-packages\__pycache__\ipykernel_launcher.cpython-36.pyc', '__doc__': 'Entry point for launching an IPython kernel.\n\nTh...orts until\nafter removing the cwd from sys.path.\n', '__file__': r'C:\Users\southman\Anaconda3\lib\site-packages\ipykernel_launcher.py', '__loader__': <_frozen_importlib_external.SourceFileLoader object>, '__name__': '__main__', '__package__': '', '__spec__': ModuleSpec(name='ipykernel_launcher', loader=<_f...nda3\\lib\\site-packages\\ipykernel_launcher.py'), 'app': <module 'ipykernel.kernelapp' from 'C:\\Users\\s...a3\\lib\\site-packages\\ipykernel\\kernelapp.py'>, ...}
     86     return run_globals
     87 
     88 def _run_module_code(code, init_globals=None,
     89                     mod_name=None, mod_spec=None,

...........................................................................
C:\Users\southman\Anaconda3\lib\site-packages\ipykernel_launcher.py in <module>()
     11     # This is added back by InteractiveShellApp.init_path()
     12     if sys.path[0] == '':
     13         del sys.path[0]
     14 
     15     from ipykernel import kernelapp as app
---> 16     app.launch_new_instance()

...........................................................................
C:\Users\southman\Anaconda3\lib\site-packages\traitlets\config\application.py in launch_instance(cls=<class 'ipykernel.kernelapp.IPKernelApp'>, argv=None, **kwargs={})
    653 
    654         If a global instance already exists, this reinitializes and starts it
    655         """
    656         app = cls.instance(**kwargs)
    657         app.initialize(argv)
--> 658         app.start()
        app.start = <bound method IPKernelApp.start of <ipykernel.kernelapp.IPKernelApp object>>
    659 
    660 #-----------------------------------------------------------------------------
    661 # utility functions, for convenience
    662 #-----------------------------------------------------------------------------

...........................................................................
C:\Users\southman\Anaconda3\lib\site-packages\ipykernel\kernelapp.py in start(self=<ipykernel.kernelapp.IPKernelApp object>)
    481         if self.poller is not None:
    482             self.poller.start()
    483         self.kernel.start()
    484         self.io_loop = ioloop.IOLoop.current()
    485         try:
--> 486             self.io_loop.start()
        self.io_loop.start = <bound method BaseAsyncIOLoop.start of <tornado.platform.asyncio.AsyncIOMainLoop object>>
    487         except KeyboardInterrupt:
    488             pass
    489 
    490 launch_new_instance = IPKernelApp.launch_instance

...........................................................................
C:\Users\southman\Anaconda3\lib\site-packages\tornado\platform\asyncio.py in start(self=<tornado.platform.asyncio.AsyncIOMainLoop object>)
    122         except (RuntimeError, AssertionError):
    123             old_loop = None
    124         try:
    125             self._setup_logging()
    126             asyncio.set_event_loop(self.asyncio_loop)
--> 127             self.asyncio_loop.run_forever()
        self.asyncio_loop.run_forever = <bound method BaseEventLoop.run_forever of <_Win...EventLoop running=True closed=False debug=False>>
    128         finally:
    129             asyncio.set_event_loop(old_loop)
    130 
    131     def stop(self):

...........................................................................
C:\Users\southman\Anaconda3\lib\asyncio\base_events.py in run_forever(self=<_WindowsSelectorEventLoop running=True closed=False debug=False>)
    417             sys.set_asyncgen_hooks(firstiter=self._asyncgen_firstiter_hook,
    418                                    finalizer=self._asyncgen_finalizer_hook)
    419         try:
    420             events._set_running_loop(self)
    421             while True:
--> 422                 self._run_once()
        self._run_once = <bound method BaseEventLoop._run_once of <_Windo...EventLoop running=True closed=False debug=False>>
    423                 if self._stopping:
    424                     break
    425         finally:
    426             self._stopping = False

...........................................................................
C:\Users\southman\Anaconda3\lib\asyncio\base_events.py in _run_once(self=<_WindowsSelectorEventLoop running=True closed=False debug=False>)
   1427                         logger.warning('Executing %s took %.3f seconds',
   1428                                        _format_handle(handle), dt)
   1429                 finally:
   1430                     self._current_handle = None
   1431             else:
-> 1432                 handle._run()
        handle._run = <bound method Handle._run of <Handle BaseAsyncIOLoop._handle_events(808, 1)>>
   1433         handle = None  # Needed to break cycles when an exception occurs.
   1434 
   1435     def _set_coroutine_wrapper(self, enabled):
   1436         try:

...........................................................................
C:\Users\southman\Anaconda3\lib\asyncio\events.py in _run(self=<Handle BaseAsyncIOLoop._handle_events(808, 1)>)
    140             self._callback = None
    141             self._args = None
    142 
    143     def _run(self):
    144         try:
--> 145             self._callback(*self._args)
        self._callback = <bound method BaseAsyncIOLoop._handle_events of <tornado.platform.asyncio.AsyncIOMainLoop object>>
        self._args = (808, 1)
    146         except Exception as exc:
    147             cb = _format_callback_source(self._callback, self._args)
    148             msg = 'Exception in callback {}'.format(cb)
    149             context = {

...........................................................................
C:\Users\southman\Anaconda3\lib\site-packages\tornado\platform\asyncio.py in _handle_events(self=<tornado.platform.asyncio.AsyncIOMainLoop object>, fd=808, events=1)
    112             self.writers.remove(fd)
    113         del self.handlers[fd]
    114 
    115     def _handle_events(self, fd, events):
    116         fileobj, handler_func = self.handlers[fd]
--> 117         handler_func(fileobj, events)
        handler_func = <function wrap.<locals>.null_wrapper>
        fileobj = <zmq.sugar.socket.Socket object>
        events = 1
    118 
    119     def start(self):
    120         try:
    121             old_loop = asyncio.get_event_loop()

...........................................................................
C:\Users\southman\Anaconda3\lib\site-packages\tornado\stack_context.py in null_wrapper(*args=(<zmq.sugar.socket.Socket object>, 1), **kwargs={})
    271         # Fast path when there are no active contexts.
    272         def null_wrapper(*args, **kwargs):
    273             try:
    274                 current_state = _state.contexts
    275                 _state.contexts = cap_contexts[0]
--> 276                 return fn(*args, **kwargs)
        args = (<zmq.sugar.socket.Socket object>, 1)
        kwargs = {}
    277             finally:
    278                 _state.contexts = current_state
    279         null_wrapper._wrapped = True
    280         return null_wrapper

...........................................................................
C:\Users\southman\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py in _handle_events(self=<zmq.eventloop.zmqstream.ZMQStream object>, fd=<zmq.sugar.socket.Socket object>, events=1)
    445             return
    446         zmq_events = self.socket.EVENTS
    447         try:
    448             # dispatch events:
    449             if zmq_events & zmq.POLLIN and self.receiving():
--> 450                 self._handle_recv()
        self._handle_recv = <bound method ZMQStream._handle_recv of <zmq.eventloop.zmqstream.ZMQStream object>>
    451                 if not self.socket:
    452                     return
    453             if zmq_events & zmq.POLLOUT and self.sending():
    454                 self._handle_send()

...........................................................................
C:\Users\southman\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py in _handle_recv(self=<zmq.eventloop.zmqstream.ZMQStream object>)
    475             else:
    476                 raise
    477         else:
    478             if self._recv_callback:
    479                 callback = self._recv_callback
--> 480                 self._run_callback(callback, msg)
        self._run_callback = <bound method ZMQStream._run_callback of <zmq.eventloop.zmqstream.ZMQStream object>>
        callback = <function wrap.<locals>.null_wrapper>
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    481         
    482 
    483     def _handle_send(self):
    484         """Handle a send event."""

...........................................................................
C:\Users\southman\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py in _run_callback(self=<zmq.eventloop.zmqstream.ZMQStream object>, callback=<function wrap.<locals>.null_wrapper>, *args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    427         close our socket."""
    428         try:
    429             # Use a NullContext to ensure that all StackContexts are run
    430             # inside our blanket exception handler rather than outside.
    431             with stack_context.NullContext():
--> 432                 callback(*args, **kwargs)
        callback = <function wrap.<locals>.null_wrapper>
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    433         except:
    434             gen_log.error("Uncaught exception in ZMQStream callback",
    435                           exc_info=True)
    436             # Re-raise the exception so that IOLoop.handle_callback_exception

...........................................................................
C:\Users\southman\Anaconda3\lib\site-packages\tornado\stack_context.py in null_wrapper(*args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    271         # Fast path when there are no active contexts.
    272         def null_wrapper(*args, **kwargs):
    273             try:
    274                 current_state = _state.contexts
    275                 _state.contexts = cap_contexts[0]
--> 276                 return fn(*args, **kwargs)
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    277             finally:
    278                 _state.contexts = current_state
    279         null_wrapper._wrapped = True
    280         return null_wrapper

...........................................................................
C:\Users\southman\Anaconda3\lib\site-packages\ipykernel\kernelbase.py in dispatcher(msg=[<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>])
    278         if self.control_stream:
    279             self.control_stream.on_recv(self.dispatch_control, copy=False)
    280 
    281         def make_dispatcher(stream):
    282             def dispatcher(msg):
--> 283                 return self.dispatch_shell(stream, msg)
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    284             return dispatcher
    285 
    286         for s in self.shell_streams:
    287             s.on_recv(make_dispatcher(s), copy=False)

...........................................................................
C:\Users\southman\Anaconda3\lib\site-packages\ipykernel\kernelbase.py in dispatch_shell(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, msg={'buffers': [], 'content': {'allow_stdin': True, 'code': 'from sklearn.model_selection import cross_val_sc...%0.4f) [%s]" % (score.mean(), score.std(), "eb"))', 'silent': False, 'stop_on_error': True, 'store_history': True, 'user_expressions': {}}, 'header': {'date': datetime.datetime(2018, 12, 26, 8, 26, 25, 483888, tzinfo=tzutc()), 'msg_id': 'f66e83e344934610baf7d8ca28e308f3', 'msg_type': 'execute_request', 'session': '1b53fc4358ec4b12874bd800e728ea34', 'username': 'username', 'version': '5.2'}, 'metadata': {}, 'msg_id': 'f66e83e344934610baf7d8ca28e308f3', 'msg_type': 'execute_request', 'parent_header': {}})
    228             self.log.warn("Unknown message type: %r", msg_type)
    229         else:
    230             self.log.debug("%s: %s", msg_type, msg)
    231             self.pre_handler_hook()
    232             try:
--> 233                 handler(stream, idents, msg)
        handler = <bound method Kernel.execute_request of <ipykernel.ipkernel.IPythonKernel object>>
        stream = <zmq.eventloop.zmqstream.ZMQStream object>
        idents = [b'1b53fc4358ec4b12874bd800e728ea34']
        msg = {'buffers': [], 'content': {'allow_stdin': True, 'code': 'from sklearn.model_selection import cross_val_sc...%0.4f) [%s]" % (score.mean(), score.std(), "eb"))', 'silent': False, 'stop_on_error': True, 'store_history': True, 'user_expressions': {}}, 'header': {'date': datetime.datetime(2018, 12, 26, 8, 26, 25, 483888, tzinfo=tzutc()), 'msg_id': 'f66e83e344934610baf7d8ca28e308f3', 'msg_type': 'execute_request', 'session': '1b53fc4358ec4b12874bd800e728ea34', 'username': 'username', 'version': '5.2'}, 'metadata': {}, 'msg_id': 'f66e83e344934610baf7d8ca28e308f3', 'msg_type': 'execute_request', 'parent_header': {}}
    234             except Exception:
    235                 self.log.error("Exception in message handler:", exc_info=True)
    236             finally:
    237                 self.post_handler_hook()

...........................................................................
C:\Users\southman\Anaconda3\lib\site-packages\ipykernel\kernelbase.py in execute_request(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, ident=[b'1b53fc4358ec4b12874bd800e728ea34'], parent={'buffers': [], 'content': {'allow_stdin': True, 'code': 'from sklearn.model_selection import cross_val_sc...%0.4f) [%s]" % (score.mean(), score.std(), "eb"))', 'silent': False, 'stop_on_error': True, 'store_history': True, 'user_expressions': {}}, 'header': {'date': datetime.datetime(2018, 12, 26, 8, 26, 25, 483888, tzinfo=tzutc()), 'msg_id': 'f66e83e344934610baf7d8ca28e308f3', 'msg_type': 'execute_request', 'session': '1b53fc4358ec4b12874bd800e728ea34', 'username': 'username', 'version': '5.2'}, 'metadata': {}, 'msg_id': 'f66e83e344934610baf7d8ca28e308f3', 'msg_type': 'execute_request', 'parent_header': {}})
    394         if not silent:
    395             self.execution_count += 1
    396             self._publish_execute_input(code, parent, self.execution_count)
    397 
    398         reply_content = self.do_execute(code, silent, store_history,
--> 399                                         user_expressions, allow_stdin)
        user_expressions = {}
        allow_stdin = True
    400 
    401         # Flush output before sending the reply.
    402         sys.stdout.flush()
    403         sys.stderr.flush()

...........................................................................
C:\Users\southman\Anaconda3\lib\site-packages\ipykernel\ipkernel.py in do_execute(self=<ipykernel.ipkernel.IPythonKernel object>, code='from sklearn.model_selection import cross_val_sc...%0.4f) [%s]" % (score.mean(), score.std(), "eb"))', silent=False, store_history=True, user_expressions={}, allow_stdin=True)
    203 
    204         self._forward_input(allow_stdin)
    205 
    206         reply_content = {}
    207         try:
--> 208             res = shell.run_cell(code, store_history=store_history, silent=silent)
        res = undefined
        shell.run_cell = <bound method ZMQInteractiveShell.run_cell of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = 'from sklearn.model_selection import cross_val_sc...%0.4f) [%s]" % (score.mean(), score.std(), "eb"))'
        store_history = True
        silent = False
    209         finally:
    210             self._restore_input()
    211 
    212         if res.error_before_exec is not None:

...........................................................................
C:\Users\southman\Anaconda3\lib\site-packages\ipykernel\zmqshell.py in run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, *args=('from sklearn.model_selection import cross_val_sc...%0.4f) [%s]" % (score.mean(), score.std(), "eb"))',), **kwargs={'silent': False, 'store_history': True})
    532             )
    533         self.payload_manager.write_payload(payload)
    534 
    535     def run_cell(self, *args, **kwargs):
    536         self._last_traceback = None
--> 537         return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
        self.run_cell = <bound method ZMQInteractiveShell.run_cell of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        args = ('from sklearn.model_selection import cross_val_sc...%0.4f) [%s]" % (score.mean(), score.std(), "eb"))',)
        kwargs = {'silent': False, 'store_history': True}
    538 
    539     def _showtraceback(self, etype, evalue, stb):
    540         # try to preserve ordering of tracebacks and print statements
    541         sys.stdout.flush()

...........................................................................
C:\Users\southman\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py in run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, raw_cell='from sklearn.model_selection import cross_val_sc...%0.4f) [%s]" % (score.mean(), score.std(), "eb"))', store_history=True, silent=False, shell_futures=True)
   2657         -------
   2658         result : :class:`ExecutionResult`
   2659         """
   2660         try:
   2661             result = self._run_cell(
-> 2662                 raw_cell, store_history, silent, shell_futures)
        raw_cell = 'from sklearn.model_selection import cross_val_sc...%0.4f) [%s]" % (score.mean(), score.std(), "eb"))'
        store_history = True
        silent = False
        shell_futures = True
   2663         finally:
   2664             self.events.trigger('post_execute')
   2665             if not silent:
   2666                 self.events.trigger('post_run_cell', result)

...........................................................................
C:\Users\southman\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py in _run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, raw_cell='from sklearn.model_selection import cross_val_sc...%0.4f) [%s]" % (score.mean(), score.std(), "eb"))', store_history=True, silent=False, shell_futures=True)
   2780                 self.displayhook.exec_result = result
   2781 
   2782                 # Execute the user code
   2783                 interactivity = 'none' if silent else self.ast_node_interactivity
   2784                 has_raised = self.run_ast_nodes(code_ast.body, cell_name,
-> 2785                    interactivity=interactivity, compiler=compiler, result=result)
        interactivity = 'last_expr'
        compiler = <IPython.core.compilerop.CachingCompiler object>
   2786                 
   2787                 self.last_execution_succeeded = not has_raised
   2788                 self.last_execution_result = result
   2789 

...........................................................................
C:\Users\southman\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py in run_ast_nodes(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, nodelist=[<_ast.ImportFrom object>, <_ast.ImportFrom object>, <_ast.ImportFrom object>, <_ast.ImportFrom object>, <_ast.ImportFrom object>, <_ast.ImportFrom object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Expr object>, <_ast.Assign object>, <_ast.Expr object>], cell_name='<ipython-input-54-054461f0af81>', interactivity='last', compiler=<IPython.core.compilerop.CachingCompiler object>, result=<ExecutionResult object at d649aa90, execution_c...rue silent=False shell_futures=True> result=None>)
   2898 
   2899         try:
   2900             for i, node in enumerate(to_run_exec):
   2901                 mod = ast.Module([node])
   2902                 code = compiler(mod, cell_name, "exec")
-> 2903                 if self.run_code(code, result):
        self.run_code = <bound method InteractiveShell.run_code of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = <code object <module> at 0x00000000CA23E150, file "<ipython-input-54-054461f0af81>", line 26>
        result = <ExecutionResult object at d649aa90, execution_c...rue silent=False shell_futures=True> result=None>
   2904                     return True
   2905 
   2906             for i, node in enumerate(to_run_interactive):
   2907                 mod = ast.Interactive([node])

...........................................................................
C:\Users\southman\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py in run_code(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, code_obj=<code object <module> at 0x00000000CA23E150, file "<ipython-input-54-054461f0af81>", line 26>, result=<ExecutionResult object at d649aa90, execution_c...rue silent=False shell_futures=True> result=None>)
   2958         outflag = True  # happens in more places, so it's easier as default
   2959         try:
   2960             try:
   2961                 self.hooks.pre_run_code_hook()
   2962                 #rprint('Running code', repr(code_obj)) # dbg
-> 2963                 exec(code_obj, self.user_global_ns, self.user_ns)
        code_obj = <code object <module> at 0x00000000CA23E150, file "<ipython-input-54-054461f0af81>", line 26>
        self.user_global_ns = {'GridSearchCV': <class 'sklearn.model_selection._search.GridSearchCV'>, 'ID_test': 0          0000010489e3af074adeac69c53e555e
1   ...MachineIdentifier, Length: 7853253, dtype: object, 'In': ['', "import pandas as pd\nimport numpy as np\nimport ma..._ipython().run_line_magic('matplotlib', 'inline')", "# 불러올 데이터의 타입과 컬럼을 정의한다.\ncolumn_dtypes = {\n     ...   ,'Wdft_RegionIdentifier'\n    ,'HasDetections']", "# 데이터를 불러온다.\nzf = zipfile.ZipFile('../input/all....=column_dtypes, usecols= use_columns[:-1])\ndel zf", "# 데이터를 불러온다.\nzf = zipfile.ZipFile('../input/all....=column_dtypes, usecols= use_columns[:-1])\ndel zf", "# 학습 데이터의 xy를 분리한다.\ny_train = tr_train.HasDetect...rain.drop(['HasDetections'], axis=1)\ndel tr_train", '# Target Encoder를 실행한다.\ncategory_columns = list(....to_pickle("../result/X_test_target_encoded.pkl")', '# 결측치를 평균값으로 채운다.\n#X_train.fillna(X_train.mean()...llna(X_train.mean(), inplace=True)\nX_train.isna()', '# 결측치를 평균값으로 채운다.\n#X_train.fillna(X_train.mean()..._train.mean(), inplace=True)\nX_train.isna().any()', '# 결측치를 평균값으로 채운다.\n#X_train.fillna(X_train.mean()...), inplace=True)\ndf.columns[X_train.isna().any()]', '# 결측치를 평균값으로 채운다.\n#X_train.fillna(X_train.mean()...place=True)\nX_train.columns[X_train.isna().any()]', '# 결측치를 평균값으로 채운다.\n#X_train.fillna(X_train.mean()...=True)\nX_train.columns[X_train.isna().any()].size', '# 결측치를 평균값으로 채운다.\n#X_train.fillna(X_train.mean()...ce=True)\nX_test.columns[X_test.isna().any()].size', '# 결측치를 평균값으로 채운다.\n#X_train.fillna(X_train.mean()...train.isna().any()]):\n    print(column)\n    break', '# 결측치를 평균값으로 채운다.\n#X_train.fillna(X_train.mean()...a().any()]):\n    X_train[column].mean()\n    break', '# 결측치를 평균값으로 채운다.\n#X_train.fillna(X_train.mean()...()]):\n    print(X_train[column].mean())\n    break', '# 결측치를 평균값으로 채운다.\n#X_train.fillna(X_train.mean()...a().any()]):\n    print(X_train[column])\n    break', '# 결측치를 평균값으로 채운다.\n#X_train.fillna(X_train.mean()...train.isna().any()]):\n    print(column)\n    break', 'X_train.RtpStateBitfield.mean()', 'X_train.RtpStateBitfield.mean(skipna=False)', ...], 'MLPClassifier': <class 'sklearn.neural_network.multilayer_perceptron.MLPClassifier'>, 'Out': {7:          MachineIdentifier  ProductName  EngineV...             False  

[8921483 rows x 79 columns], 8: MachineIdentifier                               ...                     True
Length: 79, dtype: bool, 10: Index(['RtpStateBitfield', 'DefaultBrowsersIdent..., 'Wdft_RegionIdentifier'],
      dtype='object'), 11: 35, 12: 35, 18: nan, 19: nan, 20: nan, 21: 0          7.0
1          7.0
2          7.0
3  ...RtpStateBitfield, Length: 8921483, dtype: float16, 22: nan, ...}, 'RandomForestClassifier': <class 'sklearn.ensemble.forest.RandomForestClassifier'>, 'RandomizedSearchCV': <class 'sklearn.model_selection._search.RandomizedSearchCV'>, 'VotingClassifier': <class 'sklearn.ensemble.voting_classifier.VotingClassifier'>, 'XGBClassifier': <class 'xgboost.sklearn.XGBClassifier'>, 'X_test':          ProductName  EngineVersion  AppVersion ...              10.0  

[7853253 rows x 78 columns], ...}
        self.user_ns = {'GridSearchCV': <class 'sklearn.model_selection._search.GridSearchCV'>, 'ID_test': 0          0000010489e3af074adeac69c53e555e
1   ...MachineIdentifier, Length: 7853253, dtype: object, 'In': ['', "import pandas as pd\nimport numpy as np\nimport ma..._ipython().run_line_magic('matplotlib', 'inline')", "# 불러올 데이터의 타입과 컬럼을 정의한다.\ncolumn_dtypes = {\n     ...   ,'Wdft_RegionIdentifier'\n    ,'HasDetections']", "# 데이터를 불러온다.\nzf = zipfile.ZipFile('../input/all....=column_dtypes, usecols= use_columns[:-1])\ndel zf", "# 데이터를 불러온다.\nzf = zipfile.ZipFile('../input/all....=column_dtypes, usecols= use_columns[:-1])\ndel zf", "# 학습 데이터의 xy를 분리한다.\ny_train = tr_train.HasDetect...rain.drop(['HasDetections'], axis=1)\ndel tr_train", '# Target Encoder를 실행한다.\ncategory_columns = list(....to_pickle("../result/X_test_target_encoded.pkl")', '# 결측치를 평균값으로 채운다.\n#X_train.fillna(X_train.mean()...llna(X_train.mean(), inplace=True)\nX_train.isna()', '# 결측치를 평균값으로 채운다.\n#X_train.fillna(X_train.mean()..._train.mean(), inplace=True)\nX_train.isna().any()', '# 결측치를 평균값으로 채운다.\n#X_train.fillna(X_train.mean()...), inplace=True)\ndf.columns[X_train.isna().any()]', '# 결측치를 평균값으로 채운다.\n#X_train.fillna(X_train.mean()...place=True)\nX_train.columns[X_train.isna().any()]', '# 결측치를 평균값으로 채운다.\n#X_train.fillna(X_train.mean()...=True)\nX_train.columns[X_train.isna().any()].size', '# 결측치를 평균값으로 채운다.\n#X_train.fillna(X_train.mean()...ce=True)\nX_test.columns[X_test.isna().any()].size', '# 결측치를 평균값으로 채운다.\n#X_train.fillna(X_train.mean()...train.isna().any()]):\n    print(column)\n    break', '# 결측치를 평균값으로 채운다.\n#X_train.fillna(X_train.mean()...a().any()]):\n    X_train[column].mean()\n    break', '# 결측치를 평균값으로 채운다.\n#X_train.fillna(X_train.mean()...()]):\n    print(X_train[column].mean())\n    break', '# 결측치를 평균값으로 채운다.\n#X_train.fillna(X_train.mean()...a().any()]):\n    print(X_train[column])\n    break', '# 결측치를 평균값으로 채운다.\n#X_train.fillna(X_train.mean()...train.isna().any()]):\n    print(column)\n    break', 'X_train.RtpStateBitfield.mean()', 'X_train.RtpStateBitfield.mean(skipna=False)', ...], 'MLPClassifier': <class 'sklearn.neural_network.multilayer_perceptron.MLPClassifier'>, 'Out': {7:          MachineIdentifier  ProductName  EngineV...             False  

[8921483 rows x 79 columns], 8: MachineIdentifier                               ...                     True
Length: 79, dtype: bool, 10: Index(['RtpStateBitfield', 'DefaultBrowsersIdent..., 'Wdft_RegionIdentifier'],
      dtype='object'), 11: 35, 12: 35, 18: nan, 19: nan, 20: nan, 21: 0          7.0
1          7.0
2          7.0
3  ...RtpStateBitfield, Length: 8921483, dtype: float16, 22: nan, ...}, 'RandomForestClassifier': <class 'sklearn.ensemble.forest.RandomForestClassifier'>, 'RandomizedSearchCV': <class 'sklearn.model_selection._search.RandomizedSearchCV'>, 'VotingClassifier': <class 'sklearn.ensemble.voting_classifier.VotingClassifier'>, 'XGBClassifier': <class 'xgboost.sklearn.XGBClassifier'>, 'X_test':          ProductName  EngineVersion  AppVersion ...              10.0  

[7853253 rows x 78 columns], ...}
   2964             finally:
   2965                 # Reset our crash handler in place
   2966                 sys.excepthook = old_excepthook
   2967         except SystemExit as e:

...........................................................................
C:\Users\southman\Documents\GitHub\mmp\source\<ipython-input-54-054461f0af81> in <module>()
     21 #    "rf__n_estimators":[45], "rf__max_depth":[20], "rf__min_samples_leaf":[3],
     22 #    'mlp__solver':['adam'], 'mlp__max_iter':[1000], 'mlp__early_stopping':[True], 
     23 #    'mlp__hidden_layer_sizes':[(128,64,32)],'mlp__activation':['logistic'],
     24 }
     25 clf = GridSearchCV(clf_eb, parameters, n_jobs=-1, cv=5)
---> 26 clf.fit(X_train, y_train)
     27 #print(clf.best_params_)
     28 score = cross_val_score(clf, X_train, y_train, cv=5, scoring='accuracy')
     29 print("Accuracy: %0.4f (+/- %0.4f) [%s]" % (score.mean(), score.std(), "eb"))

...........................................................................
C:\Users\southman\Anaconda3\lib\site-packages\sklearn\model_selection\_search.py in fit(self=GridSearchCV(cv=5, error_score='raise',
       e...ain_score='warn',
       scoring=None, verbose=0), X=         ProductName  EngineVersion  AppVersion ...              11.0  

[8921483 rows x 78 columns], y=0          0
1          0
2          0
3        ...Name: HasDetections, Length: 8921483, dtype: int8, groups=None, **fit_params={})
    634                                   return_train_score=self.return_train_score,
    635                                   return_n_test_samples=True,
    636                                   return_times=True, return_parameters=False,
    637                                   error_score=self.error_score)
    638           for parameters, (train, test) in product(candidate_params,
--> 639                                                    cv.split(X, y, groups)))
        cv.split = <bound method StratifiedKFold.split of Stratifie...ld(n_splits=5, random_state=None, shuffle=False)>
        X =          ProductName  EngineVersion  AppVersion ...              11.0  

[8921483 rows x 78 columns]
        y = 0          0
1          0
2          0
3        ...Name: HasDetections, Length: 8921483, dtype: int8
        groups = None
    640 
    641         # if one choose to see train score, "out" will contain train score info
    642         if self.return_train_score:
    643             (train_score_dicts, test_score_dicts, test_sample_counts, fit_time,

...........................................................................
C:\Users\southman\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self=Parallel(n_jobs=-1), iterable=<generator object BaseSearchCV.fit.<locals>.<genexpr>>)
    784             if pre_dispatch == "all" or n_jobs == 1:
    785                 # The iterable was consumed all at once by the above for loop.
    786                 # No need to wait for async callbacks to trigger to
    787                 # consumption.
    788                 self._iterating = False
--> 789             self.retrieve()
        self.retrieve = <bound method Parallel.retrieve of Parallel(n_jobs=-1)>
    790             # Make sure that we get a last message telling us we are done
    791             elapsed_time = time.time() - self._start_time
    792             self._print('Done %3i out of %3i | elapsed: %s finished',
    793                         (len(self._output), len(self._output),

---------------------------------------------------------------------------
Sub-process traceback:
---------------------------------------------------------------------------
MemoryError                                        Wed Dec 26 17:29:25 2018
PID: 6116              Python 3.6.5: C:\Users\southman\Anaconda3\python.exe
...........................................................................
C:\Users\southman\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self=<sklearn.externals.joblib.parallel.BatchedCalls object>)
    126     def __init__(self, iterator_slice):
    127         self.items = list(iterator_slice)
    128         self._size = len(self.items)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
        self.items = <class 'list'> instance
    132 
    133     def __len__(self):
    134         return self._size
    135 

...........................................................................
C:\Users\southman\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in <listcomp>(.0=<list_iterator object>)
    126     def __init__(self, iterator_slice):
    127         self.items = list(iterator_slice)
    128         self._size = len(self.items)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
        func = <function _fit_and_score>
        args = <class 'tuple'> instance
        kwargs = {'error_score': 'raise', 'fit_params': {}, 'return_n_test_samples': True, 'return_parameters': False, 'return_times': True, 'return_train_score': 'warn'}
    132 
    133     def __len__(self):
    134         return self._size
    135 

...........................................................................
C:\Users\southman\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py in _fit_and_score(estimator=VotingClassifier(estimators=[('xgb', XGBClassifi...form=None, n_jobs=1, voting='soft', weights=None), X=<class 'pandas.core.frame.DataFrame'> instance, y=0          0
1          0
2          0
3        ...Name: HasDetections, Length: 8921483, dtype: int8, scorer={'score': <function _passthrough_scorer>}, train=memmap([1783781, 1783782, 1783784, ..., 8921480, 8921481, 8921482]), test=memmap([      0,       1,       2, ..., 1784812, 1784813, 1784814]), verbose=0, parameters={'xgb__colsample_bytree': 0.84, 'xgb__gamma': 0.2, 'xgb__learning_rate': 0.2, 'xgb__max_depth': 4, 'xgb__min_child_weight': 4, 'xgb__reg_alpha': 0.01, 'xgb__subsample': 0.9}, fit_params={}, return_train_score='warn', return_parameters=False, return_n_test_samples=True, return_times=True, error_score='raise')
    453 
    454     try:
    455         if y_train is None:
    456             estimator.fit(X_train, **fit_params)
    457         else:
--> 458             estimator.fit(X_train, y_train, **fit_params)
        estimator.fit = <bound method VotingClassifier.fit of VotingClas...orm=None, n_jobs=1, voting='soft', weights=None)>
        X_train =          ProductName          ...            Wdf...                11.0

[7137185 rows x 78 columns]
        y_train = 1783781    1
1783782    1
1783784    1
1783787  ...Name: HasDetections, Length: 7137185, dtype: int8
        fit_params = {}
    459 
    460     except Exception as e:
    461         # Note fit time as time until error
    462         fit_time = time.time() - start_time

...........................................................................
C:\Users\southman\Anaconda3\lib\site-packages\sklearn\ensemble\voting_classifier.py in fit(self=VotingClassifier(estimators=[('xgb', XGBClassifi...form=None, n_jobs=1, voting='soft', weights=None), X=         ProductName          ...            Wdf...                11.0

[7137185 rows x 78 columns], y=1783781    1
1783782    1
1783784    1
1783787  ...Name: HasDetections, Length: 7137185, dtype: int8, sample_weight=None)
    184         transformed_y = self.le_.transform(y)
    185 
    186         self.estimators_ = Parallel(n_jobs=self.n_jobs)(
    187                 delayed(_parallel_fit_estimator)(clone(clf), X, transformed_y,
    188                                                  sample_weight=sample_weight)
--> 189                 for clf in clfs if clf is not None)
        clfs = (XGBClassifier(base_score=0.5, booster='gbtree', ...=1, seed=None, silent=True,
       subsample=0.9),)
    190 
    191         return self
    192 
    193     @property

...........................................................................
C:\Users\southman\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self=Parallel(n_jobs=1), iterable=<generator object VotingClassifier.fit.<locals>.<genexpr>>)
    774         self.n_completed_tasks = 0
    775         try:
    776             # Only set self._iterating to True if at least a batch
    777             # was dispatched. In particular this covers the edge
    778             # case of Parallel used with an exhausted iterator.
--> 779             while self.dispatch_one_batch(iterator):
        self.dispatch_one_batch = <bound method Parallel.dispatch_one_batch of Parallel(n_jobs=1)>
        iterator = <generator object VotingClassifier.fit.<locals>.<genexpr>>
    780                 self._iterating = True
    781             else:
    782                 self._iterating = False
    783 

...........................................................................
C:\Users\southman\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in dispatch_one_batch(self=Parallel(n_jobs=1), iterator=<generator object VotingClassifier.fit.<locals>.<genexpr>>)
    620             tasks = BatchedCalls(itertools.islice(iterator, batch_size))
    621             if len(tasks) == 0:
    622                 # No more tasks available in the iterator: tell caller to stop.
    623                 return False
    624             else:
--> 625                 self._dispatch(tasks)
        self._dispatch = <bound method Parallel._dispatch of Parallel(n_jobs=1)>
        tasks = <sklearn.externals.joblib.parallel.BatchedCalls object>
    626                 return True
    627 
    628     def _print(self, msg, msg_args):
    629         """Display the message on stout or stderr depending on verbosity"""

...........................................................................
C:\Users\southman\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in _dispatch(self=Parallel(n_jobs=1), batch=<sklearn.externals.joblib.parallel.BatchedCalls object>)
    583         self.n_dispatched_tasks += len(batch)
    584         self.n_dispatched_batches += 1
    585 
    586         dispatch_timestamp = time.time()
    587         cb = BatchCompletionCallBack(dispatch_timestamp, len(batch), self)
--> 588         job = self._backend.apply_async(batch, callback=cb)
        job = undefined
        self._backend.apply_async = <bound method SequentialBackend.apply_async of <...lib._parallel_backends.SequentialBackend object>>
        batch = <sklearn.externals.joblib.parallel.BatchedCalls object>
        cb = <sklearn.externals.joblib.parallel.BatchCompletionCallBack object>
    589         self._jobs.append(job)
    590 
    591     def dispatch_next(self):
    592         """Dispatch more data for parallel processing

...........................................................................
C:\Users\southman\Anaconda3\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py in apply_async(self=<sklearn.externals.joblib._parallel_backends.SequentialBackend object>, func=<sklearn.externals.joblib.parallel.BatchedCalls object>, callback=<sklearn.externals.joblib.parallel.BatchCompletionCallBack object>)
    106             raise ValueError('n_jobs == 0 in Parallel has no meaning')
    107         return 1
    108 
    109     def apply_async(self, func, callback=None):
    110         """Schedule a func to be run"""
--> 111         result = ImmediateResult(func)
        result = undefined
        func = <sklearn.externals.joblib.parallel.BatchedCalls object>
    112         if callback:
    113             callback(result)
    114         return result
    115 

...........................................................................
C:\Users\southman\Anaconda3\lib\site-packages\sklearn\externals\joblib\_parallel_backends.py in __init__(self=<sklearn.externals.joblib._parallel_backends.ImmediateResult object>, batch=<sklearn.externals.joblib.parallel.BatchedCalls object>)
    327 
    328 class ImmediateResult(object):
    329     def __init__(self, batch):
    330         # Don't delay the application, to avoid keeping the input
    331         # arguments in memory
--> 332         self.results = batch()
        self.results = undefined
        batch = <sklearn.externals.joblib.parallel.BatchedCalls object>
    333 
    334     def get(self):
    335         return self.results
    336 

...........................................................................
C:\Users\southman\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in __call__(self=<sklearn.externals.joblib.parallel.BatchedCalls object>)
    126     def __init__(self, iterator_slice):
    127         self.items = list(iterator_slice)
    128         self._size = len(self.items)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
        self.items = [(<function _parallel_fit_estimator>, (XGBClassifier(base_score=0.5, booster='gbtree', ...=1, seed=None, silent=True,
       subsample=0.9),          ProductName          ...            Wdf...                11.0

[7137185 rows x 78 columns], array([1, 1, 1, ..., 1, 1, 0], dtype=int64)), {'sample_weight': None})]
    132 
    133     def __len__(self):
    134         return self._size
    135 

...........................................................................
C:\Users\southman\Anaconda3\lib\site-packages\sklearn\externals\joblib\parallel.py in <listcomp>(.0=<list_iterator object>)
    126     def __init__(self, iterator_slice):
    127         self.items = list(iterator_slice)
    128         self._size = len(self.items)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
        func = <function _parallel_fit_estimator>
        args = (XGBClassifier(base_score=0.5, booster='gbtree', ...=1, seed=None, silent=True,
       subsample=0.9),          ProductName          ...            Wdf...                11.0

[7137185 rows x 78 columns], array([1, 1, 1, ..., 1, 1, 0], dtype=int64))
        kwargs = {'sample_weight': None}
    132 
    133     def __len__(self):
    134         return self._size
    135 

...........................................................................
C:\Users\southman\Anaconda3\lib\site-packages\sklearn\ensemble\voting_classifier.py in _parallel_fit_estimator(estimator=XGBClassifier(base_score=0.5, booster='gbtree', ...=1, seed=None, silent=True,
       subsample=0.9), X=         ProductName          ...            Wdf...                11.0

[7137185 rows x 78 columns], y=array([1, 1, 1, ..., 1, 1, 0], dtype=int64), sample_weight=None)
     26 def _parallel_fit_estimator(estimator, X, y, sample_weight=None):
     27     """Private function used to fit an estimator within a job."""
     28     if sample_weight is not None:
     29         estimator.fit(X, y, sample_weight=sample_weight)
     30     else:
---> 31         estimator.fit(X, y)
        estimator.fit = <bound method XGBClassifier.fit of XGBClassifier...1, seed=None, silent=True,
       subsample=0.9)>
        X =          ProductName          ...            Wdf...                11.0

[7137185 rows x 78 columns]
        y = array([1, 1, 1, ..., 1, 1, 0], dtype=int64)
     32     return estimator
     33 
     34 
     35 class VotingClassifier(_BaseComposition, ClassifierMixin, TransformerMixin):

...........................................................................
C:\Users\southman\Anaconda3\lib\site-packages\xgboost\sklearn.py in fit(self=XGBClassifier(base_score=0.5, booster='gbtree', ...=1, seed=None, silent=True,
       subsample=0.9), X=         ProductName          ...            Wdf...                11.0

[7137185 rows x 78 columns], y=array([1, 1, 1, ..., 1, 1, 0], dtype=int64), sample_weight=None, eval_set=None, eval_metric=None, early_stopping_rounds=None, verbose=True, xgb_model=None, sample_weight_eval_set=None, callbacks=None)
    688         if sample_weight is not None:
    689             train_dmatrix = DMatrix(X, label=training_labels, weight=sample_weight,
    690                                     missing=self.missing, nthread=self.n_jobs)
    691         else:
    692             train_dmatrix = DMatrix(X, label=training_labels,
--> 693                                     missing=self.missing, nthread=self.n_jobs)
        self.missing = nan
        self.n_jobs = 1
    694 
    695         self._Booster = train(xgb_options, train_dmatrix, self.n_estimators,
    696                               evals=evals,
    697                               early_stopping_rounds=early_stopping_rounds,

...........................................................................
C:\Users\southman\Anaconda3\lib\site-packages\xgboost\core.py in __init__(self=<xgboost.core.DMatrix object>, data=         ProductName          ...            Wdf...                11.0

[7137185 rows x 78 columns], label=array([1, 1, 1, ..., 1, 1, 0], dtype=int64), missing=nan, weight=None, silent=False, feature_names=None, feature_types=None, nthread=1)
    358                 self._feature_types = feature_types
    359             return
    360 
    361         data, feature_names, feature_types = _maybe_pandas_data(data,
    362                                                                 feature_names,
--> 363                                                                 feature_types)
        feature_types = None
    364 
    365         data, feature_names, feature_types = _maybe_dt_data(data,
    366                                                             feature_names,
    367                                                             feature_types)

...........................................................................
C:\Users\southman\Anaconda3\lib\site-packages\xgboost\core.py in _maybe_pandas_data(data=         ProductName          ...            Wdf...                11.0

[7137185 rows x 78 columns], feature_names=['ProductName', 'EngineVersion', 'AppVersion', 'AvSigVersion', 'IsBeta', 'RtpStateBitfield', 'IsSxsPassiveMode', 'DefaultBrowsersIdentifier', 'AVProductStatesIdentifier', 'AVProductsInstalled', 'AVProductsEnabled', 'HasTpm', 'CountryIdentifier', 'CityIdentifier', 'OrganizationIdentifier', 'GeoNameIdentifier', 'LocaleEnglishNameIdentifier', 'Platform', 'Processor', 'OsVer', ...], feature_types=['float', 'float', 'float', 'float', 'int', 'float', 'int', 'float', 'float', 'float', 'float', 'int', 'int', 'float', 'float', 'float', 'int', 'float', 'float', 'float', ...])
    237             feature_names = data.columns.format()
    238 
    239     if feature_types is None:
    240         feature_types = [PANDAS_DTYPE_MAPPER[dtype.name] for dtype in data_dtypes]
    241 
--> 242     data = data.values.astype('float')
        data =          ProductName          ...            Wdf...                11.0

[7137185 rows x 78 columns]
        data.values.astype = undefined
    243 
    244     return data, feature_names, feature_types
    245 
    246 

...........................................................................
C:\Users\southman\Anaconda3\lib\site-packages\pandas\core\generic.py in values(self=         ProductName          ...            Wdf...                11.0

[7137185 rows x 78 columns])
   4624         --------
   4625         pandas.DataFrame.index : Retrievie the index labels
   4626         pandas.DataFrame.columns : Retrieving the column names
   4627         """
   4628         self._consolidate_inplace()
-> 4629         return self._data.as_array(transpose=self._AXIS_REVERSED)
        self._data.as_array = <bound method BlockManager.as_array of BlockMana...ock: slice(55, 56, 1), 1 x 7137185, dtype: int32>
        self._AXIS_REVERSED = True
   4630 
   4631     @property
   4632     def _values(self):
   4633         """internal implementation"""

...........................................................................
C:\Users\southman\Anaconda3\lib\site-packages\pandas\core\internals.py in as_array(self=BlockManager
Items: Index(['ProductName', 'Engin...lock: slice(55, 56, 1), 1 x 7137185, dtype: int32, transpose=True, items=None)
   3944             mgr = self
   3945 
   3946         if self._is_single_block or not self.is_mixed_type:
   3947             arr = mgr.blocks[0].get_values()
   3948         else:
-> 3949             arr = mgr._interleave()
        arr = undefined
        mgr._interleave = <bound method BlockManager._interleave of BlockM...ock: slice(55, 56, 1), 1 x 7137185, dtype: int32>
   3950 
   3951         return arr.transpose() if transpose else arr
   3952 
   3953     def _interleave(self):

...........................................................................
C:\Users\southman\Anaconda3\lib\site-packages\pandas\core\internals.py in _interleave(self=BlockManager
Items: Index(['ProductName', 'Engin...lock: slice(55, 56, 1), 1 x 7137185, dtype: int32)
   3955         Return ndarray from blocks with specified item order
   3956         Items must be contained in the blocks
   3957         """
   3958         dtype = _interleaved_dtype(self.blocks)
   3959 
-> 3960         result = np.empty(self.shape, dtype=dtype)
        result = undefined
        self.shape = (78, 7137185)
        dtype = dtype('float64')
   3961 
   3962         if result.shape[0] == 0:
   3963             # Workaround for numpy 1.7 bug:
   3964             #

MemoryError: 
___________________________________________________________________________

In [None]:
pred = clf.predict(X_test)
submission = pd.concat([ID_test, pred] ,axis=1)
submission_pca.to_csv('../result/submission.csv', index=False)