In [7]:
import pdb
import numpy as np
import gcp.bigquery as bq
import gcp.storage as storage
from sklearn.pipeline import Pipeline
try:
   import cPickle as pickle
except:
   import pickle
EST_PICKLE_FILENAME = 'baseline_final_estimator.pkl'

# First feature HAS to be 'district_id' for MAPE calculation.
fields_str = """
district_id	timeofday_slot	day_in_week	is_sunday	sum_price	avg_price	poi1	poi2	poi3
	poi4	poi5	traffic_tj_level1	traffic_tj_level2	traffic_tj_level3	traffic_tj_level4
	weather	weather_pm25	weather_temperature	gap
"""
fields = map(lambda x: x.strip(), fields_str.split('\t'))
features = fields[1:]

# Scorer Creation (MAPE)

In [8]:
def mape(X, predictions, y):
  num_timeslots = 43
  num_districts = 66
  if len(y.shape) == 1:
    y = np.asmatrix(y)
  if len(predictions.shape) == 1:
    predictions = np.asmatrix(predictions)
  Xy = np.concatenate((X, y.T, predictions.T), axis=1)
  districts = np.unique(X[:,0])
  district_scores = np.zeros(len(districts))
  for counter, key in enumerate(districts):
    group = np.compress((Xy[:,0] == key).flat, Xy, axis=0)
    district_scores[counter] = np.sum(np.absolute(
        (group[:,-2] -
         group[:,-1])/
        group[:,-2]
      )) / num_timeslots
  return np.sum(district_scores) / num_districts

def mape_scorer(estimator, X, y):
  predictions = estimator.predict(X)
  return -mape(X, predictions, y)

Testing MAPE

In [9]:
from sklearn.linear_model import LogisticRegression

est = LogisticRegression()
X = np.array([[1, 1], [1, 2], [2, 3], [2, 4]])
predictions = np.array([1, 2, 3, 4])
y = np.array([1, 2, 3, 4])

# Should return 0.0
mape(X, predictions, y)

0.0

# Feature Selection

In [10]:
%%sql --module q_all

SELECT *, HASH(CAST(district_id AS STRING) +timeslot) AS hash_value,
  IF(ABS(HASH(CAST(district_id AS STRING) + timeslot)) % 2 == 1, 'True', 'False')
    AS included_in_sample
FROM [datalab-projects-1331:xjk_algo_comp.future_gaps_final1]
WHERE gap > 0

# The above query randomizes its outputs.

In [11]:
query = bq.Query(q_all)
tableresult = query.results()

In [12]:
all_data = np.zeros((tableresult.length, len(fields)))
print 'there are {} rows'.format(tableresult.length)
for rcounter, row in enumerate(tableresult):
  for fcounter, field in enumerate(fields):
    all_data[rcounter, fcounter] = row[field]
  if rcounter % 5000 == 0:
    print 'processed {} rows'.format(rcounter)

there are 102680 rows
there are 102680 rows
there are 102680 rows
processed 0 rows
processed 0 rows
processed 0 rows
processed 5000 rows
processed 5000 rows
processed 5000 rows
processed 10000 rows
processed 10000 rows
processed 10000 rows
processed 15000 rows
processed 15000 rows
processed 15000 rows
processed 20000 rows
processed 20000 rows
processed 20000 rows
processed 25000 rows
processed 25000 rows
processed 25000 rows
processed 30000 rows
processed 30000 rows
processed 30000 rows
processed 35000 rows
processed 35000 rows
processed 35000 rows
processed 40000 rows
processed 40000 rows
processed 40000 rows
processed 45000 rows
processed 45000 rows
processed 45000 rows
processed 50000 rows
processed 50000 rows
processed 50000 rows
processed 55000 rows
processed 55000 rows
processed 55000 rows
processed 60000 rows
processed 60000 rows
processed 60000 rows
processed 65000 rows
processed 65000 rows
processed 65000 rows
processed 70000 rows
processed 70000 rows
processed 70000 rows
proc

In [13]:
# Get timeslots to test from GCS
item = storage.Item('datalab-projects-1331-datalab','data/timeslots_to_test.txt')
timeslots_to_test = item.read_from().strip().split('\n')
tquery = ','.join(map(lambda x: "'{}'".format(x), timeslots_to_test))
print(tquery)

'2016-01-22-46','2016-01-22-58','2016-01-22-70','2016-01-22-82','2016-01-22-94','2016-01-22-106','2016-01-22-118','2016-01-22-130','2016-01-22-142','2016-01-24-58','2016-01-24-70','2016-01-24-82','2016-01-24-94','2016-01-24-106','2016-01-24-118','2016-01-24-130','2016-01-24-142','2016-01-26-46','2016-01-26-58','2016-01-26-70','2016-01-26-82','2016-01-26-94','2016-01-26-106','2016-01-26-118','2016-01-26-130','2016-01-26-142','2016-01-28-58','2016-01-28-70','2016-01-28-82','2016-01-28-94','2016-01-28-106','2016-01-28-118','2016-01-28-130','2016-01-28-142','2016-01-30-46','2016-01-30-58','2016-01-30-70','2016-01-30-82','2016-01-30-94','2016-01-30-106','2016-01-30-118','2016-01-30-130','2016-01-30-142'


In [14]:
%%sql --module q_all_t

SELECT *
FROM [datalab-projects-1331:xjk_algo_comp_test.future_gaps_final1]
WHERE gap > 0 AND timeslot NOT IN ('2016-01-22-46','2016-01-22-58','2016-01-22-70','2016-01-22-82',
    '2016-01-22-94','2016-01-22-106','2016-01-22-118','2016-01-22-130','2016-01-22-142',
    '2016-01-24-58','2016-01-24-70','2016-01-24-82','2016-01-24-94','2016-01-24-106',
    '2016-01-24-118','2016-01-24-130','2016-01-24-142','2016-01-26-46','2016-01-26-58',
    '2016-01-26-70','2016-01-26-82','2016-01-26-94','2016-01-26-106','2016-01-26-118',
    '2016-01-26-130','2016-01-26-142','2016-01-28-58','2016-01-28-70','2016-01-28-82',
    '2016-01-28-94','2016-01-28-106','2016-01-28-118','2016-01-28-130','2016-01-28-142',
    '2016-01-30-46','2016-01-30-58','2016-01-30-70','2016-01-30-82','2016-01-30-94',
    '2016-01-30-106','2016-01-30-118','2016-01-30-130','2016-01-30-142')
ORDER BY timeslot, district_id

# Test dataset - used to check if estimator can generalize well to new data.

In [15]:
query_t = bq.Query(q_all_t)
tableresult_t = query_t.results()

In [16]:
all_data_t = np.zeros((tableresult_t.length, len(fields)))
print 'there are {} rows'.format(tableresult_t.length)
for rcounter, row in enumerate(tableresult_t):
  for fcounter, field in enumerate(fields):
    all_data_t[rcounter, fcounter] = row[field]
  if rcounter % 1000 == 0:
    print 'processed {} rows'.format(rcounter)

there are 3509 rows
there are 3509 rows
there are 3509 rows
processed 0 rows
processed 1000 rows
processed 0 rows
processed 1000 rows
processed 0 rows
processed 1000 rows
processed 2000 rows
processed 2000 rows
processed 2000 rows
processed 3000 rows
processed 3000 rows
processed 3000 rows


# Building and Testing Algorithm(s)

In [17]:
# Useful code to check NaN and Inf values. This is needed since these values would
# cause "Input contains NaN, infinity or a value too large for dtype('float32')
# errors when left unchecked.
print "Checkinf for NaN and Inf"
print "np.nan=", np.where(np.isnan(all_data))
print "is.inf=", np.where(np.isinf(all_data))
print "np.max=", np.max(abs(all_data))

Checkinf for NaN and Inf
np.nan= (array([     2,      2,      2, ..., 102679, 102679, 102679]), array([15, 16, 17, ..., 15, 16, 17]))
is.inf= (array([], dtype=int64), array([], dtype=int64))
np.max= nan
Checkinf for NaN and Inf
np.nan= (array([     2,      2,      2, ..., 102679, 102679, 102679]), array([15, 16, 17, ..., 15, 16, 17]))
is.inf= (array([], dtype=int64), array([], dtype=int64))
np.max= nan


In [18]:
all_data[np.isnan(all_data)] = 0
all_data_t[np.isnan(all_data_t)] = 0

In [19]:
# Useful code to check NaN and Inf values. This is needed since these values would
# cause "Input contains NaN, infinity or a value too large for dtype('float32')
# errors when left unchecked.
print "Checkinf for NaN and Inf"
print "np.nan=", np.where(np.isnan(all_data))
print "is.inf=", np.where(np.isinf(all_data))
print "np.max=", np.max(abs(all_data))

Checkinf for NaN and Inf
np.nan= (array([], dtype=int64), array([], dtype=int64))
is.inf= (array([], dtype=int64), array([], dtype=int64))
np.max= 708222.866131
Checkinf for NaN and Inf
np.nan= (array([], dtype=int64), array([], dtype=int64))
is.inf= (array([], dtype=int64), array([], dtype=int64))
np.max= 708222.866131
Checkinf for NaN and Inf
np.nan= (array([], dtype=int64), array([], dtype=int64))
is.inf= (array([], dtype=int64), array([], dtype=int64))
np.max= 708222.866131


In [31]:
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_classif
# from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.preprocessing import Imputer
# from sklearn.grid_search import RandomizedSearchCV
from sklearn.grid_search import GridSearchCV


steps = [
#   ('impute', Imputer(0)),
#   ('feature_selection', SelectKBest(f_classif)),
  ('estimate', AdaBoostRegressor())
]

est = Pipeline(steps)

data_train = all_data[:,1:]
targets_train = all_data[:,0]
data_test = all_data_t[:,1:]
targets_test = all_data_t[:,0]

params = {
#   "feature_selection__k": [i for i in range(1, len(features) - 1)]
#   'estimate__max_features': [i for i in range(1, len(features))],
#   'estimate__n_estimators': [5, 10, 15, 20, 30]
  'estimate_learning_rate': [0.1, 0.3, 0.7, 1, 3, 5, 10],
  'estimate__n_estimators': [5, 10, 20, 50, 60, 80]
}
# cross_validation_iter = StratifiedShuffleSplit(y=targets_train, test_size=0.3,
#                                                random_state=RANDOM_STATE, n_iter=10)
# search_params = RandomizedSearchCV(
#   estimator=est,
#   param_distributions=params,
# #   cv=10,
#   scoring=mape_scorer,
#   n_jobs=2,
#   n_iter=5
# )

search_params = GridSearchCV(
  estimator=est,
  param_grid=params,
  cv=5,
  scoring=mape_scorer,
  n_jobs=2,
  verbose=1
)

search_params.fit(data_train, targets_train)
print(search_params.grid_scores_)
print(search_params.best_params_)
print(search_params.best_score_)
search_params.best_estimator_

Fitting 5 folds for each of 42 candidates, totalling 210 fits
Fitting 5 folds for each of 42 candidates, totalling 210 fits
Fitting 5 folds for each of 42 candidates, totalling 210 fits
An unexpected error occurred while tokenizing input
The following traceback may be corrupted or invalid
The error message is: ('EOF in multi-line statement', (2, 0))

An unexpected error occurred while tokenizing input
The following traceback may be corrupted or invalid
The error message is: ('EOF in multi-line statement', (2, 0))

An unexpected error occurred while tokenizing input
The following traceback may be corrupted or invalid
The error message is: ('EOF in multi-line statement', (2, 0))

An unexpected error occurred while tokenizing input
The following traceback may be corrupted or invalid
The error message is: ('EOF in multi-line statement', (2, 0))

An unexpected error occurred while tokenizing input
The following traceback may be corrupted or invalid
The error message is: ('EOF in multi-line 

JoblibValueError: JoblibValueError
___________________________________________________________________________
Multiprocessing exception:
    ...........................................................................
/usr/lib/python2.7/runpy.py in _run_module_as_main(mod_name='ipykernel.__main__', alter_argv=1)
    157     pkg_name = mod_name.rpartition('.')[0]
    158     main_globals = sys.modules["__main__"].__dict__
    159     if alter_argv:
    160         sys.argv[0] = fname
    161     return _run_code(code, main_globals, None,
--> 162                      "__main__", fname, loader, pkg_name)
        fname = '/usr/local/lib/python2.7/dist-packages/ipykernel/__main__.py'
        loader = <pkgutil.ImpLoader instance>
        pkg_name = 'ipykernel'
    163 
    164 def run_module(mod_name, init_globals=None,
    165                run_name=None, alter_sys=False):
    166     """Execute a module's code without importing it

...........................................................................
/usr/lib/python2.7/runpy.py in _run_code(code=<code object <module> at 0x7f99f453b330, file "/...2.7/dist-packages/ipykernel/__main__.py", line 1>, run_globals={'__builtins__': <module '__builtin__' (built-in)>, '__doc__': None, '__file__': '/usr/local/lib/python2.7/dist-packages/ipykernel/__main__.py', '__loader__': <pkgutil.ImpLoader instance>, '__name__': '__main__', '__package__': 'ipykernel', 'app': <module 'ipykernel.kernelapp' from '/usr/local/lib/python2.7/dist-packages/ipykernel/kernelapp.pyc'>}, init_globals=None, mod_name='__main__', mod_fname='/usr/local/lib/python2.7/dist-packages/ipykernel/__main__.py', mod_loader=<pkgutil.ImpLoader instance>, pkg_name='ipykernel')
     67         run_globals.update(init_globals)
     68     run_globals.update(__name__ = mod_name,
     69                        __file__ = mod_fname,
     70                        __loader__ = mod_loader,
     71                        __package__ = pkg_name)
---> 72     exec code in run_globals
        code = <code object <module> at 0x7f99f453b330, file "/...2.7/dist-packages/ipykernel/__main__.py", line 1>
        run_globals = {'__builtins__': <module '__builtin__' (built-in)>, '__doc__': None, '__file__': '/usr/local/lib/python2.7/dist-packages/ipykernel/__main__.py', '__loader__': <pkgutil.ImpLoader instance>, '__name__': '__main__', '__package__': 'ipykernel', 'app': <module 'ipykernel.kernelapp' from '/usr/local/lib/python2.7/dist-packages/ipykernel/kernelapp.pyc'>}
     73     return run_globals
     74 
     75 def _run_module_code(code, init_globals=None,
     76                     mod_name=None, mod_fname=None,

...........................................................................
/usr/local/lib/python2.7/dist-packages/ipykernel/__main__.py in <module>()
      1 
      2 
----> 3 
      4 if __name__ == '__main__':
      5     from ipykernel import kernelapp as app
      6     app.launch_new_instance()
      7 
      8 
      9 
     10 

...........................................................................
/usr/local/lib/python2.7/dist-packages/traitlets/config/application.py in launch_instance(cls=<class 'ipykernel.kernelapp.IPKernelApp'>, argv=None, **kwargs={})
    591         
    592         If a global instance already exists, this reinitializes and starts it
    593         """
    594         app = cls.instance(**kwargs)
    595         app.initialize(argv)
--> 596         app.start()
        app.start = <bound method IPKernelApp.start of <ipykernel.kernelapp.IPKernelApp object>>
    597 
    598 #-----------------------------------------------------------------------------
    599 # utility functions, for convenience
    600 #-----------------------------------------------------------------------------

...........................................................................
/usr/local/lib/python2.7/dist-packages/ipykernel/kernelapp.py in start(self=<ipykernel.kernelapp.IPKernelApp object>)
    437         
    438         if self.poller is not None:
    439             self.poller.start()
    440         self.kernel.start()
    441         try:
--> 442             ioloop.IOLoop.instance().start()
    443         except KeyboardInterrupt:
    444             pass
    445 
    446 launch_new_instance = IPKernelApp.launch_instance

...........................................................................
/usr/local/lib/python2.7/dist-packages/zmq/eventloop/ioloop.py in start(self=<zmq.eventloop.ioloop.ZMQIOLoop object>)
    157             PollIOLoop.configure(ZMQIOLoop)
    158         return PollIOLoop.current(*args, **kwargs)
    159     
    160     def start(self):
    161         try:
--> 162             super(ZMQIOLoop, self).start()
        self.start = <bound method ZMQIOLoop.start of <zmq.eventloop.ioloop.ZMQIOLoop object>>
    163         except ZMQError as e:
    164             if e.errno == ETERM:
    165                 # quietly return on ETERM
    166                 pass

...........................................................................
/usr/local/lib/python2.7/dist-packages/tornado/ioloop.py in start(self=<zmq.eventloop.ioloop.ZMQIOLoop object>)
    878                 self._events.update(event_pairs)
    879                 while self._events:
    880                     fd, events = self._events.popitem()
    881                     try:
    882                         fd_obj, handler_func = self._handlers[fd]
--> 883                         handler_func(fd_obj, events)
        handler_func = <function null_wrapper>
        fd_obj = <zmq.sugar.socket.Socket object>
        events = 1
    884                     except (OSError, IOError) as e:
    885                         if errno_from_exception(e) == errno.EPIPE:
    886                             # Happens when the client closes the connection
    887                             pass

...........................................................................
/usr/local/lib/python2.7/dist-packages/tornado/stack_context.py in null_wrapper(*args=(<zmq.sugar.socket.Socket object>, 1), **kwargs={})
    270         # Fast path when there are no active contexts.
    271         def null_wrapper(*args, **kwargs):
    272             try:
    273                 current_state = _state.contexts
    274                 _state.contexts = cap_contexts[0]
--> 275                 return fn(*args, **kwargs)
        args = (<zmq.sugar.socket.Socket object>, 1)
        kwargs = {}
    276             finally:
    277                 _state.contexts = current_state
    278         null_wrapper._wrapped = True
    279         return null_wrapper

...........................................................................
/usr/local/lib/python2.7/dist-packages/zmq/eventloop/zmqstream.py in _handle_events(self=<zmq.eventloop.zmqstream.ZMQStream object>, fd=<zmq.sugar.socket.Socket object>, events=1)
    435             # dispatch events:
    436             if events & IOLoop.ERROR:
    437                 gen_log.error("got POLLERR event on ZMQStream, which doesn't make sense")
    438                 return
    439             if events & IOLoop.READ:
--> 440                 self._handle_recv()
        self._handle_recv = <bound method ZMQStream._handle_recv of <zmq.eventloop.zmqstream.ZMQStream object>>
    441                 if not self.socket:
    442                     return
    443             if events & IOLoop.WRITE:
    444                 self._handle_send()

...........................................................................
/usr/local/lib/python2.7/dist-packages/zmq/eventloop/zmqstream.py in _handle_recv(self=<zmq.eventloop.zmqstream.ZMQStream object>)
    467                 gen_log.error("RECV Error: %s"%zmq.strerror(e.errno))
    468         else:
    469             if self._recv_callback:
    470                 callback = self._recv_callback
    471                 # self._recv_callback = None
--> 472                 self._run_callback(callback, msg)
        self._run_callback = <bound method ZMQStream._run_callback of <zmq.eventloop.zmqstream.ZMQStream object>>
        callback = <function null_wrapper>
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    473                 
    474         # self.update_state()
    475         
    476 

...........................................................................
/usr/local/lib/python2.7/dist-packages/zmq/eventloop/zmqstream.py in _run_callback(self=<zmq.eventloop.zmqstream.ZMQStream object>, callback=<function null_wrapper>, *args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    409         close our socket."""
    410         try:
    411             # Use a NullContext to ensure that all StackContexts are run
    412             # inside our blanket exception handler rather than outside.
    413             with stack_context.NullContext():
--> 414                 callback(*args, **kwargs)
        callback = <function null_wrapper>
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    415         except:
    416             gen_log.error("Uncaught exception, closing connection.",
    417                           exc_info=True)
    418             # Close the socket on an uncaught exception from a user callback

...........................................................................
/usr/local/lib/python2.7/dist-packages/tornado/stack_context.py in null_wrapper(*args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    270         # Fast path when there are no active contexts.
    271         def null_wrapper(*args, **kwargs):
    272             try:
    273                 current_state = _state.contexts
    274                 _state.contexts = cap_contexts[0]
--> 275                 return fn(*args, **kwargs)
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    276             finally:
    277                 _state.contexts = current_state
    278         null_wrapper._wrapped = True
    279         return null_wrapper

...........................................................................
/usr/local/lib/python2.7/dist-packages/ipykernel/kernelbase.py in dispatcher(msg=[<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>])
    271         if self.control_stream:
    272             self.control_stream.on_recv(self.dispatch_control, copy=False)
    273 
    274         def make_dispatcher(stream):
    275             def dispatcher(msg):
--> 276                 return self.dispatch_shell(stream, msg)
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    277             return dispatcher
    278 
    279         for s in self.shell_streams:
    280             s.on_recv(make_dispatcher(s), copy=False)

...........................................................................
/usr/local/lib/python2.7/dist-packages/ipykernel/kernelbase.py in dispatch_shell(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, msg={'buffers': [], 'content': {u'allow_stdin': True, u'code': u'from sklearn.feature_selection import SelectKB...rams.best_score_)\nsearch_params.best_estimator_', u'silent': False, u'stop_on_error': True, u'store_history': True, u'user_expressions': {}}, 'header': {'date': '2016-06-12T12:52:44.380983', u'msg_id': u'51BC7D2045F34C1DBFB596E8921EC318', u'msg_type': u'execute_request', u'session': u'CEF40C66C22B4B51AB5041666FFC5A00', u'username': u'username', u'version': u'5.0'}, 'metadata': {}, 'msg_id': u'51BC7D2045F34C1DBFB596E8921EC318', 'msg_type': u'execute_request', 'parent_header': {}})
    223             self.log.error("UNKNOWN MESSAGE TYPE: %r", msg_type)
    224         else:
    225             self.log.debug("%s: %s", msg_type, msg)
    226             self.pre_handler_hook()
    227             try:
--> 228                 handler(stream, idents, msg)
        handler = <bound method IPythonKernel.execute_request of <ipykernel.ipkernel.IPythonKernel object>>
        stream = <zmq.eventloop.zmqstream.ZMQStream object>
        idents = ['CEF40C66C22B4B51AB5041666FFC5A00']
        msg = {'buffers': [], 'content': {u'allow_stdin': True, u'code': u'from sklearn.feature_selection import SelectKB...rams.best_score_)\nsearch_params.best_estimator_', u'silent': False, u'stop_on_error': True, u'store_history': True, u'user_expressions': {}}, 'header': {'date': '2016-06-12T12:52:44.380983', u'msg_id': u'51BC7D2045F34C1DBFB596E8921EC318', u'msg_type': u'execute_request', u'session': u'CEF40C66C22B4B51AB5041666FFC5A00', u'username': u'username', u'version': u'5.0'}, 'metadata': {}, 'msg_id': u'51BC7D2045F34C1DBFB596E8921EC318', 'msg_type': u'execute_request', 'parent_header': {}}
    229             except Exception:
    230                 self.log.error("Exception in message handler:", exc_info=True)
    231             finally:
    232                 self.post_handler_hook()

...........................................................................
/usr/local/lib/python2.7/dist-packages/ipykernel/kernelbase.py in execute_request(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, ident=['CEF40C66C22B4B51AB5041666FFC5A00'], parent={'buffers': [], 'content': {u'allow_stdin': True, u'code': u'from sklearn.feature_selection import SelectKB...rams.best_score_)\nsearch_params.best_estimator_', u'silent': False, u'stop_on_error': True, u'store_history': True, u'user_expressions': {}}, 'header': {'date': '2016-06-12T12:52:44.380983', u'msg_id': u'51BC7D2045F34C1DBFB596E8921EC318', u'msg_type': u'execute_request', u'session': u'CEF40C66C22B4B51AB5041666FFC5A00', u'username': u'username', u'version': u'5.0'}, 'metadata': {}, 'msg_id': u'51BC7D2045F34C1DBFB596E8921EC318', 'msg_type': u'execute_request', 'parent_header': {}})
    386         if not silent:
    387             self.execution_count += 1
    388             self._publish_execute_input(code, parent, self.execution_count)
    389 
    390         reply_content = self.do_execute(code, silent, store_history,
--> 391                                         user_expressions, allow_stdin)
        user_expressions = {}
        allow_stdin = True
    392 
    393         # Flush output before sending the reply.
    394         sys.stdout.flush()
    395         sys.stderr.flush()

...........................................................................
/usr/local/lib/python2.7/dist-packages/ipykernel/ipkernel.py in do_execute(self=<ipykernel.ipkernel.IPythonKernel object>, code=u'from sklearn.feature_selection import SelectKB...rams.best_score_)\nsearch_params.best_estimator_', silent=False, store_history=True, user_expressions={}, allow_stdin=True)
    194 
    195         reply_content = {}
    196         # FIXME: the shell calls the exception handler itself.
    197         shell._reply_content = None
    198         try:
--> 199             shell.run_cell(code, store_history=store_history, silent=silent)
        shell.run_cell = <bound method ZMQInteractiveShell.run_cell of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = u'from sklearn.feature_selection import SelectKB...rams.best_score_)\nsearch_params.best_estimator_'
        store_history = True
        silent = False
    200         except:
    201             status = u'error'
    202             # FIXME: this code right now isn't being used yet by default,
    203             # because the run_cell() call above directly fires off exception

...........................................................................
/usr/local/lib/python2.7/dist-packages/IPython/core/interactiveshell.py in run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, raw_cell=u'from sklearn.feature_selection import SelectKB...rams.best_score_)\nsearch_params.best_estimator_', store_history=True, silent=False, shell_futures=True)
   2718                 self.displayhook.exec_result = result
   2719 
   2720                 # Execute the user code
   2721                 interactivity = "none" if silent else self.ast_node_interactivity
   2722                 self.run_ast_nodes(code_ast.body, cell_name,
-> 2723                    interactivity=interactivity, compiler=compiler, result=result)
        interactivity = 'last_expr'
        compiler = <IPython.core.compilerop.CachingCompiler instance>
   2724 
   2725                 # Reset this so later displayed values do not modify the
   2726                 # ExecutionResult
   2727                 self.displayhook.exec_result = None

...........................................................................
/usr/local/lib/python2.7/dist-packages/IPython/core/interactiveshell.py in run_ast_nodes(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, nodelist=[<_ast.ImportFrom object>, <_ast.ImportFrom object>, <_ast.ImportFrom object>, <_ast.ImportFrom object>, <_ast.ImportFrom object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Expr object>, <_ast.Print object>, <_ast.Print object>, <_ast.Print object>, <_ast.Expr object>], cell_name='<ipython-input-31-77a67d18d2e5>', interactivity='last', compiler=<IPython.core.compilerop.CachingCompiler instance>, result=<IPython.core.interactiveshell.ExecutionResult object>)
   2820 
   2821         try:
   2822             for i, node in enumerate(to_run_exec):
   2823                 mod = ast.Module([node])
   2824                 code = compiler(mod, cell_name, "exec")
-> 2825                 if self.run_code(code, result):
        self.run_code = <bound method ZMQInteractiveShell.run_code of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = <code object <module> at 0x7f99bfbd0a30, file "<ipython-input-31-77a67d18d2e5>", line 50>
        result = <IPython.core.interactiveshell.ExecutionResult object>
   2826                     return True
   2827 
   2828             for i, node in enumerate(to_run_interactive):
   2829                 mod = ast.Interactive([node])

...........................................................................
/usr/local/lib/python2.7/dist-packages/IPython/core/interactiveshell.py in run_code(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, code_obj=<code object <module> at 0x7f99bfbd0a30, file "<ipython-input-31-77a67d18d2e5>", line 50>, result=<IPython.core.interactiveshell.ExecutionResult object>)
   2880         outflag = 1  # happens in more places, so it's easier as default
   2881         try:
   2882             try:
   2883                 self.hooks.pre_run_code_hook()
   2884                 #rprint('Running code', repr(code_obj)) # dbg
-> 2885                 exec(code_obj, self.user_global_ns, self.user_ns)
        code_obj = <code object <module> at 0x7f99bfbd0a30, file "<ipython-input-31-77a67d18d2e5>", line 50>
        self.user_global_ns = {'AdaBoostRegressor': <class 'sklearn.ensemble.weight_boosting.AdaBoostRegressor'>, 'DecisionTreeRegressor': <class 'sklearn.tree.tree.DecisionTreeRegressor'>, 'EST_PICKLE_FILENAME': 'baseline_final_estimator.pkl', 'GridSearchCV': <class 'sklearn.grid_search.GridSearchCV'>, 'Imputer': <class 'sklearn.preprocessing.imputation.Imputer'>, 'In': ['', u'import pdb\nimport numpy as np\nimport gcp.big...ields_str.split(\'\\t\'))\nfeatures = fields[1:]', u'def mape(X, predictions, y):\n  num_timeslots ...or.predict(X)\n  return -mape(X, predictions, y)', u'from sklearn.linear_model import LogisticRegre...\n\n# Should return 0.0\nmape(X, predictions, y)', u'get_ipython().run_cell_magic(u\'sql\', u\'--mo...\n\\n# The above query randomizes its outputs.")', u'query = bq.Query(q_all)\ntableresult = query.results()', u"all_data = np.zeros((tableresult.length, len(f...\n    print 'processed {} rows'.format(rcounter)", u'import pdb\nimport numpy as np\nimport gcp.big...ields_str.split(\'\\t\'))\nfeatures = fields[1:]', u'def mape(X, predictions, y):\n  num_timeslots ...or.predict(X)\n  return -mape(X, predictions, y)', u'from sklearn.linear_model import LogisticRegre...\n\n# Should return 0.0\nmape(X, predictions, y)', u'get_ipython().run_cell_magic(u\'sql\', u\'--mo...\n\\n# The above query randomizes its outputs.")', u'query = bq.Query(q_all)\ntableresult = query.results()', u"all_data = np.zeros((tableresult.length, len(f...\n    print 'processed {} rows'.format(rcounter)", u'# Get timeslots to test from GCS\nitem = stora...'".format(x), timeslots_to_test))\nprint(tquery)', u'get_ipython().run_cell_magic(u\'sql\', u\'--mo... if estimator can generalize well to new data.")', u'query_t = bq.Query(q_all_t)\ntableresult_t = query_t.results()', u"all_data_t = np.zeros((tableresult_t.length, l...\n    print 'processed {} rows'.format(rcounter)", u'# Useful code to check NaN and Inf values. Thi...l_data))\nprint "np.max=", np.max(abs(all_data))', u'all_data[np.isnan(all_data)] = 0\nall_data_t[np.isnan(all_data_t)] = 0', u'# Useful code to check NaN and Inf values. Thi...l_data))\nprint "np.max=", np.max(abs(all_data))', ...], 'LogisticRegression': <class 'sklearn.linear_model.logistic.LogisticRegression'>, 'Out': {3: 0.0, 9: 0.0, 20: Pipeline(steps=[('estimate', DecisionTreeRegress...random_state=None,
           splitter='best'))]), 24: Pipeline(steps=[('estimate', RandomForestRegress...=None,
           verbose=0, warm_start=False))]), 27: Pipeline(steps=[('estimate', RandomForestRegress...=None,
           verbose=0, warm_start=False))])}, 'Pipeline': <class 'sklearn.pipeline.Pipeline'>, 'RandomForestRegressor': <class 'sklearn.ensemble.forest.RandomForestRegressor'>, ...}
        self.user_ns = {'AdaBoostRegressor': <class 'sklearn.ensemble.weight_boosting.AdaBoostRegressor'>, 'DecisionTreeRegressor': <class 'sklearn.tree.tree.DecisionTreeRegressor'>, 'EST_PICKLE_FILENAME': 'baseline_final_estimator.pkl', 'GridSearchCV': <class 'sklearn.grid_search.GridSearchCV'>, 'Imputer': <class 'sklearn.preprocessing.imputation.Imputer'>, 'In': ['', u'import pdb\nimport numpy as np\nimport gcp.big...ields_str.split(\'\\t\'))\nfeatures = fields[1:]', u'def mape(X, predictions, y):\n  num_timeslots ...or.predict(X)\n  return -mape(X, predictions, y)', u'from sklearn.linear_model import LogisticRegre...\n\n# Should return 0.0\nmape(X, predictions, y)', u'get_ipython().run_cell_magic(u\'sql\', u\'--mo...\n\\n# The above query randomizes its outputs.")', u'query = bq.Query(q_all)\ntableresult = query.results()', u"all_data = np.zeros((tableresult.length, len(f...\n    print 'processed {} rows'.format(rcounter)", u'import pdb\nimport numpy as np\nimport gcp.big...ields_str.split(\'\\t\'))\nfeatures = fields[1:]', u'def mape(X, predictions, y):\n  num_timeslots ...or.predict(X)\n  return -mape(X, predictions, y)', u'from sklearn.linear_model import LogisticRegre...\n\n# Should return 0.0\nmape(X, predictions, y)', u'get_ipython().run_cell_magic(u\'sql\', u\'--mo...\n\\n# The above query randomizes its outputs.")', u'query = bq.Query(q_all)\ntableresult = query.results()', u"all_data = np.zeros((tableresult.length, len(f...\n    print 'processed {} rows'.format(rcounter)", u'# Get timeslots to test from GCS\nitem = stora...'".format(x), timeslots_to_test))\nprint(tquery)', u'get_ipython().run_cell_magic(u\'sql\', u\'--mo... if estimator can generalize well to new data.")', u'query_t = bq.Query(q_all_t)\ntableresult_t = query_t.results()', u"all_data_t = np.zeros((tableresult_t.length, l...\n    print 'processed {} rows'.format(rcounter)", u'# Useful code to check NaN and Inf values. Thi...l_data))\nprint "np.max=", np.max(abs(all_data))', u'all_data[np.isnan(all_data)] = 0\nall_data_t[np.isnan(all_data_t)] = 0', u'# Useful code to check NaN and Inf values. Thi...l_data))\nprint "np.max=", np.max(abs(all_data))', ...], 'LogisticRegression': <class 'sklearn.linear_model.logistic.LogisticRegression'>, 'Out': {3: 0.0, 9: 0.0, 20: Pipeline(steps=[('estimate', DecisionTreeRegress...random_state=None,
           splitter='best'))]), 24: Pipeline(steps=[('estimate', RandomForestRegress...=None,
           verbose=0, warm_start=False))]), 27: Pipeline(steps=[('estimate', RandomForestRegress...=None,
           verbose=0, warm_start=False))])}, 'Pipeline': <class 'sklearn.pipeline.Pipeline'>, 'RandomForestRegressor': <class 'sklearn.ensemble.forest.RandomForestRegressor'>, ...}
   2886             finally:
   2887                 # Reset our crash handler in place
   2888                 sys.excepthook = old_excepthook
   2889         except SystemExit as e:

...........................................................................
/content/TeguhWPurwanto@gmail.com/Xiaojukeji Algorithm Competition/<ipython-input-31-77a67d18d2e5> in <module>()
     45   scoring=mape_scorer,
     46   n_jobs=2,
     47   verbose=1
     48 )
     49 
---> 50 search_params.fit(data_train, targets_train)
     51 print(search_params.grid_scores_)
     52 print(search_params.best_params_)
     53 print(search_params.best_score_)
     54 search_params.best_estimator_

...........................................................................
/usr/local/lib/python2.7/dist-packages/sklearn/grid_search.py in fit(self=GridSearchCV(cv=5, error_score='raise',
       e...nction mape_scorer at 0x7f99d626d398>, verbose=1), X=array([[  17.,    3.,    0., ...,  106.,    1., ...[  56.,    5.,    0., ...,    0.,    0.,    4.]]), y=array([ 46.,  32.,  33., ...,   9.,  34.,  42.]))
    727         y : array-like, shape = [n_samples] or [n_samples, n_output], optional
    728             Target relative to X for classification or regression;
    729             None for unsupervised learning.
    730 
    731         """
--> 732         return self._fit(X, y, ParameterGrid(self.param_grid))
        self._fit = <bound method GridSearchCV._fit of GridSearchCV(...ction mape_scorer at 0x7f99d626d398>, verbose=1)>
        X = array([[  17.,    3.,    0., ...,  106.,    1., ...[  56.,    5.,    0., ...,    0.,    0.,    4.]])
        y = array([ 46.,  32.,  33., ...,   9.,  34.,  42.])
        self.param_grid = {'estimate__n_estimators': [5, 10, 20, 50, 60, 80], 'estimate_learning_rate': [0.1, 0.3, 0.7, 1, 3, 5, 10]}
    733 
    734 
    735 class RandomizedSearchCV(BaseSearchCV):
    736     """Randomized search on hyper parameters.

...........................................................................
/usr/local/lib/python2.7/dist-packages/sklearn/grid_search.py in _fit(self=GridSearchCV(cv=5, error_score='raise',
       e...nction mape_scorer at 0x7f99d626d398>, verbose=1), X=array([[  17.,    3.,    0., ...,  106.,    1., ...[  56.,    5.,    0., ...,    0.,    0.,    4.]]), y=array([ 46.,  32.,  33., ...,   9.,  34.,  42.]), parameter_iterable=<sklearn.grid_search.ParameterGrid object>)
    500         )(
    501             delayed(_fit_and_score)(clone(base_estimator), X, y, self.scorer_,
    502                                     train, test, self.verbose, parameters,
    503                                     self.fit_params, return_parameters=True,
    504                                     error_score=self.error_score)
--> 505                 for parameters in parameter_iterable
        parameters = undefined
        parameter_iterable = <sklearn.grid_search.ParameterGrid object>
    506                 for train, test in cv)
    507 
    508         # Out is a list of triplet: score, estimator, n_test_samples
    509         n_fits = len(out)

...........................................................................
/usr/local/lib/python2.7/dist-packages/sklearn/externals/joblib/parallel.py in __call__(self=Parallel(n_jobs=2), iterable=<itertools.islice object>)
    661             if pre_dispatch == "all" or n_jobs == 1:
    662                 # The iterable was consumed all at once by the above for loop.
    663                 # No need to wait for async callbacks to trigger to
    664                 # consumption.
    665                 self._iterating = False
--> 666             self.retrieve()
        self.retrieve = <bound method Parallel.retrieve of Parallel(n_jobs=2)>
    667             # Make sure that we get a last message telling us we are done
    668             elapsed_time = time.time() - self._start_time
    669             self._print('Done %3i out of %3i | elapsed: %s finished',
    670                         (len(self._output),

    ---------------------------------------------------------------------------
    Sub-process traceback:
    ---------------------------------------------------------------------------
    ValueError                                         Sun Jun 12 12:52:44 2016
PID: 4116                                     Python 2.7.9: /usr/bin/python
...........................................................................
/usr/local/lib/python2.7/dist-packages/sklearn/cross_validation.pyc in _fit_and_score(estimator=Pipeline(steps=[('estimate', AdaBoostRegressor(b...,
         n_estimators=50, random_state=None))]), X=array([[  17.,    3.,    0., ...,  106.,    1., ...[  56.,    5.,    0., ...,    0.,    0.,    4.]]), y=array([ 46.,  32.,  33., ...,   9.,  34.,  42.]), scorer=<function mape_scorer>, train=array([ 20536,  20537,  20538, ..., 102677, 102678, 102679]), test=array([    0,     1,     2, ..., 20533, 20534, 20535]), verbose=1, parameters={'estimate__n_estimators': 5, 'estimate_learning_rate': 0.1}, fit_params={}, return_train_score=False, return_parameters=True, error_score='raise')
   1443     fit_params = fit_params if fit_params is not None else {}
   1444     fit_params = dict([(k, _index_param_value(X, v, train))
   1445                       for k, v in fit_params.items()])
   1446 
   1447     if parameters is not None:
-> 1448         estimator.set_params(**parameters)
   1449 
   1450     start_time = time.time()
   1451 
   1452     X_train, y_train = _safe_split(estimator, X, y, train)

...........................................................................
/usr/local/lib/python2.7/dist-packages/sklearn/base.pyc in set_params(self=Pipeline(steps=[('estimate', AdaBoostRegressor(b...,
         n_estimators=50, random_state=None))]), **params={'estimate__n_estimators': 5, 'estimate_learning_rate': 0.1})
    251                 sub_object.set_params(**{sub_name: value})
    252             else:
    253                 # simple objects case
    254                 if not key in valid_params:
    255                     raise ValueError('Invalid parameter %s ' 'for estimator %s'
--> 256                                      % (key, self.__class__.__name__))
    257                 setattr(self, key, value)
    258         return self
    259 
    260     def __repr__(self):

ValueError: Invalid parameter estimate_learning_rate for estimator Pipeline
___________________________________________________________________________

JoblibValueError: JoblibValueError
___________________________________________________________________________
Multiprocessing exception:
    ...........................................................................
/usr/lib/python2.7/runpy.py in _run_module_as_main(mod_name='ipykernel.__main__', alter_argv=1)
    157     pkg_name = mod_name.rpartition('.')[0]
    158     main_globals = sys.modules["__main__"].__dict__
    159     if alter_argv:
    160         sys.argv[0] = fname
    161     return _run_code(code, main_globals, None,
--> 162                      "__main__", fname, loader, pkg_name)
        fname = '/usr/local/lib/python2.7/dist-packages/ipykernel/__main__.py'
        loader = <pkgutil.ImpLoader instance>
        pkg_name = 'ipykernel'
    163 
    164 def run_module(mod_name, init_globals=None,
    165                run_name=None, alter_sys=False):
    166     """Execute a module's code without importing it

...........................................................................
/usr/lib/python2.7/runpy.py in _run_code(code=<code object <module> at 0x7f99f453b330, file "/...2.7/dist-packages/ipykernel/__main__.py", line 1>, run_globals={'__builtins__': <module '__builtin__' (built-in)>, '__doc__': None, '__file__': '/usr/local/lib/python2.7/dist-packages/ipykernel/__main__.py', '__loader__': <pkgutil.ImpLoader instance>, '__name__': '__main__', '__package__': 'ipykernel', 'app': <module 'ipykernel.kernelapp' from '/usr/local/lib/python2.7/dist-packages/ipykernel/kernelapp.pyc'>}, init_globals=None, mod_name='__main__', mod_fname='/usr/local/lib/python2.7/dist-packages/ipykernel/__main__.py', mod_loader=<pkgutil.ImpLoader instance>, pkg_name='ipykernel')
     67         run_globals.update(init_globals)
     68     run_globals.update(__name__ = mod_name,
     69                        __file__ = mod_fname,
     70                        __loader__ = mod_loader,
     71                        __package__ = pkg_name)
---> 72     exec code in run_globals
        code = <code object <module> at 0x7f99f453b330, file "/...2.7/dist-packages/ipykernel/__main__.py", line 1>
        run_globals = {'__builtins__': <module '__builtin__' (built-in)>, '__doc__': None, '__file__': '/usr/local/lib/python2.7/dist-packages/ipykernel/__main__.py', '__loader__': <pkgutil.ImpLoader instance>, '__name__': '__main__', '__package__': 'ipykernel', 'app': <module 'ipykernel.kernelapp' from '/usr/local/lib/python2.7/dist-packages/ipykernel/kernelapp.pyc'>}
     73     return run_globals
     74 
     75 def _run_module_code(code, init_globals=None,
     76                     mod_name=None, mod_fname=None,

...........................................................................
/usr/local/lib/python2.7/dist-packages/ipykernel/__main__.py in <module>()
      1 
      2 
----> 3 
      4 if __name__ == '__main__':
      5     from ipykernel import kernelapp as app
      6     app.launch_new_instance()
      7 
      8 
      9 
     10 

...........................................................................
/usr/local/lib/python2.7/dist-packages/traitlets/config/application.py in launch_instance(cls=<class 'ipykernel.kernelapp.IPKernelApp'>, argv=None, **kwargs={})
    591         
    592         If a global instance already exists, this reinitializes and starts it
    593         """
    594         app = cls.instance(**kwargs)
    595         app.initialize(argv)
--> 596         app.start()
        app.start = <bound method IPKernelApp.start of <ipykernel.kernelapp.IPKernelApp object>>
    597 
    598 #-----------------------------------------------------------------------------
    599 # utility functions, for convenience
    600 #-----------------------------------------------------------------------------

...........................................................................
/usr/local/lib/python2.7/dist-packages/ipykernel/kernelapp.py in start(self=<ipykernel.kernelapp.IPKernelApp object>)
    437         
    438         if self.poller is not None:
    439             self.poller.start()
    440         self.kernel.start()
    441         try:
--> 442             ioloop.IOLoop.instance().start()
    443         except KeyboardInterrupt:
    444             pass
    445 
    446 launch_new_instance = IPKernelApp.launch_instance

...........................................................................
/usr/local/lib/python2.7/dist-packages/zmq/eventloop/ioloop.py in start(self=<zmq.eventloop.ioloop.ZMQIOLoop object>)
    157             PollIOLoop.configure(ZMQIOLoop)
    158         return PollIOLoop.current(*args, **kwargs)
    159     
    160     def start(self):
    161         try:
--> 162             super(ZMQIOLoop, self).start()
        self.start = <bound method ZMQIOLoop.start of <zmq.eventloop.ioloop.ZMQIOLoop object>>
    163         except ZMQError as e:
    164             if e.errno == ETERM:
    165                 # quietly return on ETERM
    166                 pass

...........................................................................
/usr/local/lib/python2.7/dist-packages/tornado/ioloop.py in start(self=<zmq.eventloop.ioloop.ZMQIOLoop object>)
    878                 self._events.update(event_pairs)
    879                 while self._events:
    880                     fd, events = self._events.popitem()
    881                     try:
    882                         fd_obj, handler_func = self._handlers[fd]
--> 883                         handler_func(fd_obj, events)
        handler_func = <function null_wrapper>
        fd_obj = <zmq.sugar.socket.Socket object>
        events = 1
    884                     except (OSError, IOError) as e:
    885                         if errno_from_exception(e) == errno.EPIPE:
    886                             # Happens when the client closes the connection
    887                             pass

...........................................................................
/usr/local/lib/python2.7/dist-packages/tornado/stack_context.py in null_wrapper(*args=(<zmq.sugar.socket.Socket object>, 1), **kwargs={})
    270         # Fast path when there are no active contexts.
    271         def null_wrapper(*args, **kwargs):
    272             try:
    273                 current_state = _state.contexts
    274                 _state.contexts = cap_contexts[0]
--> 275                 return fn(*args, **kwargs)
        args = (<zmq.sugar.socket.Socket object>, 1)
        kwargs = {}
    276             finally:
    277                 _state.contexts = current_state
    278         null_wrapper._wrapped = True
    279         return null_wrapper

...........................................................................
/usr/local/lib/python2.7/dist-packages/zmq/eventloop/zmqstream.py in _handle_events(self=<zmq.eventloop.zmqstream.ZMQStream object>, fd=<zmq.sugar.socket.Socket object>, events=1)
    435             # dispatch events:
    436             if events & IOLoop.ERROR:
    437                 gen_log.error("got POLLERR event on ZMQStream, which doesn't make sense")
    438                 return
    439             if events & IOLoop.READ:
--> 440                 self._handle_recv()
        self._handle_recv = <bound method ZMQStream._handle_recv of <zmq.eventloop.zmqstream.ZMQStream object>>
    441                 if not self.socket:
    442                     return
    443             if events & IOLoop.WRITE:
    444                 self._handle_send()

...........................................................................
/usr/local/lib/python2.7/dist-packages/zmq/eventloop/zmqstream.py in _handle_recv(self=<zmq.eventloop.zmqstream.ZMQStream object>)
    467                 gen_log.error("RECV Error: %s"%zmq.strerror(e.errno))
    468         else:
    469             if self._recv_callback:
    470                 callback = self._recv_callback
    471                 # self._recv_callback = None
--> 472                 self._run_callback(callback, msg)
        self._run_callback = <bound method ZMQStream._run_callback of <zmq.eventloop.zmqstream.ZMQStream object>>
        callback = <function null_wrapper>
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    473                 
    474         # self.update_state()
    475         
    476 

...........................................................................
/usr/local/lib/python2.7/dist-packages/zmq/eventloop/zmqstream.py in _run_callback(self=<zmq.eventloop.zmqstream.ZMQStream object>, callback=<function null_wrapper>, *args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    409         close our socket."""
    410         try:
    411             # Use a NullContext to ensure that all StackContexts are run
    412             # inside our blanket exception handler rather than outside.
    413             with stack_context.NullContext():
--> 414                 callback(*args, **kwargs)
        callback = <function null_wrapper>
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    415         except:
    416             gen_log.error("Uncaught exception, closing connection.",
    417                           exc_info=True)
    418             # Close the socket on an uncaught exception from a user callback

...........................................................................
/usr/local/lib/python2.7/dist-packages/tornado/stack_context.py in null_wrapper(*args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    270         # Fast path when there are no active contexts.
    271         def null_wrapper(*args, **kwargs):
    272             try:
    273                 current_state = _state.contexts
    274                 _state.contexts = cap_contexts[0]
--> 275                 return fn(*args, **kwargs)
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    276             finally:
    277                 _state.contexts = current_state
    278         null_wrapper._wrapped = True
    279         return null_wrapper

...........................................................................
/usr/local/lib/python2.7/dist-packages/ipykernel/kernelbase.py in dispatcher(msg=[<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>])
    271         if self.control_stream:
    272             self.control_stream.on_recv(self.dispatch_control, copy=False)
    273 
    274         def make_dispatcher(stream):
    275             def dispatcher(msg):
--> 276                 return self.dispatch_shell(stream, msg)
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    277             return dispatcher
    278 
    279         for s in self.shell_streams:
    280             s.on_recv(make_dispatcher(s), copy=False)

...........................................................................
/usr/local/lib/python2.7/dist-packages/ipykernel/kernelbase.py in dispatch_shell(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, msg={'buffers': [], 'content': {u'allow_stdin': True, u'code': u'from sklearn.feature_selection import SelectKB...rams.best_score_)\nsearch_params.best_estimator_', u'silent': False, u'stop_on_error': True, u'store_history': True, u'user_expressions': {}}, 'header': {'date': '2016-06-12T12:52:44.380983', u'msg_id': u'51BC7D2045F34C1DBFB596E8921EC318', u'msg_type': u'execute_request', u'session': u'CEF40C66C22B4B51AB5041666FFC5A00', u'username': u'username', u'version': u'5.0'}, 'metadata': {}, 'msg_id': u'51BC7D2045F34C1DBFB596E8921EC318', 'msg_type': u'execute_request', 'parent_header': {}})
    223             self.log.error("UNKNOWN MESSAGE TYPE: %r", msg_type)
    224         else:
    225             self.log.debug("%s: %s", msg_type, msg)
    226             self.pre_handler_hook()
    227             try:
--> 228                 handler(stream, idents, msg)
        handler = <bound method IPythonKernel.execute_request of <ipykernel.ipkernel.IPythonKernel object>>
        stream = <zmq.eventloop.zmqstream.ZMQStream object>
        idents = ['CEF40C66C22B4B51AB5041666FFC5A00']
        msg = {'buffers': [], 'content': {u'allow_stdin': True, u'code': u'from sklearn.feature_selection import SelectKB...rams.best_score_)\nsearch_params.best_estimator_', u'silent': False, u'stop_on_error': True, u'store_history': True, u'user_expressions': {}}, 'header': {'date': '2016-06-12T12:52:44.380983', u'msg_id': u'51BC7D2045F34C1DBFB596E8921EC318', u'msg_type': u'execute_request', u'session': u'CEF40C66C22B4B51AB5041666FFC5A00', u'username': u'username', u'version': u'5.0'}, 'metadata': {}, 'msg_id': u'51BC7D2045F34C1DBFB596E8921EC318', 'msg_type': u'execute_request', 'parent_header': {}}
    229             except Exception:
    230                 self.log.error("Exception in message handler:", exc_info=True)
    231             finally:
    232                 self.post_handler_hook()

...........................................................................
/usr/local/lib/python2.7/dist-packages/ipykernel/kernelbase.py in execute_request(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, ident=['CEF40C66C22B4B51AB5041666FFC5A00'], parent={'buffers': [], 'content': {u'allow_stdin': True, u'code': u'from sklearn.feature_selection import SelectKB...rams.best_score_)\nsearch_params.best_estimator_', u'silent': False, u'stop_on_error': True, u'store_history': True, u'user_expressions': {}}, 'header': {'date': '2016-06-12T12:52:44.380983', u'msg_id': u'51BC7D2045F34C1DBFB596E8921EC318', u'msg_type': u'execute_request', u'session': u'CEF40C66C22B4B51AB5041666FFC5A00', u'username': u'username', u'version': u'5.0'}, 'metadata': {}, 'msg_id': u'51BC7D2045F34C1DBFB596E8921EC318', 'msg_type': u'execute_request', 'parent_header': {}})
    386         if not silent:
    387             self.execution_count += 1
    388             self._publish_execute_input(code, parent, self.execution_count)
    389 
    390         reply_content = self.do_execute(code, silent, store_history,
--> 391                                         user_expressions, allow_stdin)
        user_expressions = {}
        allow_stdin = True
    392 
    393         # Flush output before sending the reply.
    394         sys.stdout.flush()
    395         sys.stderr.flush()

...........................................................................
/usr/local/lib/python2.7/dist-packages/ipykernel/ipkernel.py in do_execute(self=<ipykernel.ipkernel.IPythonKernel object>, code=u'from sklearn.feature_selection import SelectKB...rams.best_score_)\nsearch_params.best_estimator_', silent=False, store_history=True, user_expressions={}, allow_stdin=True)
    194 
    195         reply_content = {}
    196         # FIXME: the shell calls the exception handler itself.
    197         shell._reply_content = None
    198         try:
--> 199             shell.run_cell(code, store_history=store_history, silent=silent)
        shell.run_cell = <bound method ZMQInteractiveShell.run_cell of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = u'from sklearn.feature_selection import SelectKB...rams.best_score_)\nsearch_params.best_estimator_'
        store_history = True
        silent = False
    200         except:
    201             status = u'error'
    202             # FIXME: this code right now isn't being used yet by default,
    203             # because the run_cell() call above directly fires off exception

...........................................................................
/usr/local/lib/python2.7/dist-packages/IPython/core/interactiveshell.py in run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, raw_cell=u'from sklearn.feature_selection import SelectKB...rams.best_score_)\nsearch_params.best_estimator_', store_history=True, silent=False, shell_futures=True)
   2718                 self.displayhook.exec_result = result
   2719 
   2720                 # Execute the user code
   2721                 interactivity = "none" if silent else self.ast_node_interactivity
   2722                 self.run_ast_nodes(code_ast.body, cell_name,
-> 2723                    interactivity=interactivity, compiler=compiler, result=result)
        interactivity = 'last_expr'
        compiler = <IPython.core.compilerop.CachingCompiler instance>
   2724 
   2725                 # Reset this so later displayed values do not modify the
   2726                 # ExecutionResult
   2727                 self.displayhook.exec_result = None

...........................................................................
/usr/local/lib/python2.7/dist-packages/IPython/core/interactiveshell.py in run_ast_nodes(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, nodelist=[<_ast.ImportFrom object>, <_ast.ImportFrom object>, <_ast.ImportFrom object>, <_ast.ImportFrom object>, <_ast.ImportFrom object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Expr object>, <_ast.Print object>, <_ast.Print object>, <_ast.Print object>, <_ast.Expr object>], cell_name='<ipython-input-31-77a67d18d2e5>', interactivity='last', compiler=<IPython.core.compilerop.CachingCompiler instance>, result=<IPython.core.interactiveshell.ExecutionResult object>)
   2820 
   2821         try:
   2822             for i, node in enumerate(to_run_exec):
   2823                 mod = ast.Module([node])
   2824                 code = compiler(mod, cell_name, "exec")
-> 2825                 if self.run_code(code, result):
        self.run_code = <bound method ZMQInteractiveShell.run_code of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = <code object <module> at 0x7f99bfbd0a30, file "<ipython-input-31-77a67d18d2e5>", line 50>
        result = <IPython.core.interactiveshell.ExecutionResult object>
   2826                     return True
   2827 
   2828             for i, node in enumerate(to_run_interactive):
   2829                 mod = ast.Interactive([node])

...........................................................................
/usr/local/lib/python2.7/dist-packages/IPython/core/interactiveshell.py in run_code(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, code_obj=<code object <module> at 0x7f99bfbd0a30, file "<ipython-input-31-77a67d18d2e5>", line 50>, result=<IPython.core.interactiveshell.ExecutionResult object>)
   2880         outflag = 1  # happens in more places, so it's easier as default
   2881         try:
   2882             try:
   2883                 self.hooks.pre_run_code_hook()
   2884                 #rprint('Running code', repr(code_obj)) # dbg
-> 2885                 exec(code_obj, self.user_global_ns, self.user_ns)
        code_obj = <code object <module> at 0x7f99bfbd0a30, file "<ipython-input-31-77a67d18d2e5>", line 50>
        self.user_global_ns = {'AdaBoostRegressor': <class 'sklearn.ensemble.weight_boosting.AdaBoostRegressor'>, 'DecisionTreeRegressor': <class 'sklearn.tree.tree.DecisionTreeRegressor'>, 'EST_PICKLE_FILENAME': 'baseline_final_estimator.pkl', 'GridSearchCV': <class 'sklearn.grid_search.GridSearchCV'>, 'Imputer': <class 'sklearn.preprocessing.imputation.Imputer'>, 'In': ['', u'import pdb\nimport numpy as np\nimport gcp.big...ields_str.split(\'\\t\'))\nfeatures = fields[1:]', u'def mape(X, predictions, y):\n  num_timeslots ...or.predict(X)\n  return -mape(X, predictions, y)', u'from sklearn.linear_model import LogisticRegre...\n\n# Should return 0.0\nmape(X, predictions, y)', u'get_ipython().run_cell_magic(u\'sql\', u\'--mo...\n\\n# The above query randomizes its outputs.")', u'query = bq.Query(q_all)\ntableresult = query.results()', u"all_data = np.zeros((tableresult.length, len(f...\n    print 'processed {} rows'.format(rcounter)", u'import pdb\nimport numpy as np\nimport gcp.big...ields_str.split(\'\\t\'))\nfeatures = fields[1:]', u'def mape(X, predictions, y):\n  num_timeslots ...or.predict(X)\n  return -mape(X, predictions, y)', u'from sklearn.linear_model import LogisticRegre...\n\n# Should return 0.0\nmape(X, predictions, y)', u'get_ipython().run_cell_magic(u\'sql\', u\'--mo...\n\\n# The above query randomizes its outputs.")', u'query = bq.Query(q_all)\ntableresult = query.results()', u"all_data = np.zeros((tableresult.length, len(f...\n    print 'processed {} rows'.format(rcounter)", u'# Get timeslots to test from GCS\nitem = stora...'".format(x), timeslots_to_test))\nprint(tquery)', u'get_ipython().run_cell_magic(u\'sql\', u\'--mo... if estimator can generalize well to new data.")', u'query_t = bq.Query(q_all_t)\ntableresult_t = query_t.results()', u"all_data_t = np.zeros((tableresult_t.length, l...\n    print 'processed {} rows'.format(rcounter)", u'# Useful code to check NaN and Inf values. Thi...l_data))\nprint "np.max=", np.max(abs(all_data))', u'all_data[np.isnan(all_data)] = 0\nall_data_t[np.isnan(all_data_t)] = 0', u'# Useful code to check NaN and Inf values. Thi...l_data))\nprint "np.max=", np.max(abs(all_data))', ...], 'LogisticRegression': <class 'sklearn.linear_model.logistic.LogisticRegression'>, 'Out': {3: 0.0, 9: 0.0, 20: Pipeline(steps=[('estimate', DecisionTreeRegress...random_state=None,
           splitter='best'))]), 24: Pipeline(steps=[('estimate', RandomForestRegress...=None,
           verbose=0, warm_start=False))]), 27: Pipeline(steps=[('estimate', RandomForestRegress...=None,
           verbose=0, warm_start=False))])}, 'Pipeline': <class 'sklearn.pipeline.Pipeline'>, 'RandomForestRegressor': <class 'sklearn.ensemble.forest.RandomForestRegressor'>, ...}
        self.user_ns = {'AdaBoostRegressor': <class 'sklearn.ensemble.weight_boosting.AdaBoostRegressor'>, 'DecisionTreeRegressor': <class 'sklearn.tree.tree.DecisionTreeRegressor'>, 'EST_PICKLE_FILENAME': 'baseline_final_estimator.pkl', 'GridSearchCV': <class 'sklearn.grid_search.GridSearchCV'>, 'Imputer': <class 'sklearn.preprocessing.imputation.Imputer'>, 'In': ['', u'import pdb\nimport numpy as np\nimport gcp.big...ields_str.split(\'\\t\'))\nfeatures = fields[1:]', u'def mape(X, predictions, y):\n  num_timeslots ...or.predict(X)\n  return -mape(X, predictions, y)', u'from sklearn.linear_model import LogisticRegre...\n\n# Should return 0.0\nmape(X, predictions, y)', u'get_ipython().run_cell_magic(u\'sql\', u\'--mo...\n\\n# The above query randomizes its outputs.")', u'query = bq.Query(q_all)\ntableresult = query.results()', u"all_data = np.zeros((tableresult.length, len(f...\n    print 'processed {} rows'.format(rcounter)", u'import pdb\nimport numpy as np\nimport gcp.big...ields_str.split(\'\\t\'))\nfeatures = fields[1:]', u'def mape(X, predictions, y):\n  num_timeslots ...or.predict(X)\n  return -mape(X, predictions, y)', u'from sklearn.linear_model import LogisticRegre...\n\n# Should return 0.0\nmape(X, predictions, y)', u'get_ipython().run_cell_magic(u\'sql\', u\'--mo...\n\\n# The above query randomizes its outputs.")', u'query = bq.Query(q_all)\ntableresult = query.results()', u"all_data = np.zeros((tableresult.length, len(f...\n    print 'processed {} rows'.format(rcounter)", u'# Get timeslots to test from GCS\nitem = stora...'".format(x), timeslots_to_test))\nprint(tquery)', u'get_ipython().run_cell_magic(u\'sql\', u\'--mo... if estimator can generalize well to new data.")', u'query_t = bq.Query(q_all_t)\ntableresult_t = query_t.results()', u"all_data_t = np.zeros((tableresult_t.length, l...\n    print 'processed {} rows'.format(rcounter)", u'# Useful code to check NaN and Inf values. Thi...l_data))\nprint "np.max=", np.max(abs(all_data))', u'all_data[np.isnan(all_data)] = 0\nall_data_t[np.isnan(all_data_t)] = 0', u'# Useful code to check NaN and Inf values. Thi...l_data))\nprint "np.max=", np.max(abs(all_data))', ...], 'LogisticRegression': <class 'sklearn.linear_model.logistic.LogisticRegression'>, 'Out': {3: 0.0, 9: 0.0, 20: Pipeline(steps=[('estimate', DecisionTreeRegress...random_state=None,
           splitter='best'))]), 24: Pipeline(steps=[('estimate', RandomForestRegress...=None,
           verbose=0, warm_start=False))]), 27: Pipeline(steps=[('estimate', RandomForestRegress...=None,
           verbose=0, warm_start=False))])}, 'Pipeline': <class 'sklearn.pipeline.Pipeline'>, 'RandomForestRegressor': <class 'sklearn.ensemble.forest.RandomForestRegressor'>, ...}
   2886             finally:
   2887                 # Reset our crash handler in place
   2888                 sys.excepthook = old_excepthook
   2889         except SystemExit as e:

...........................................................................
/content/TeguhWPurwanto@gmail.com/Xiaojukeji Algorithm Competition/<ipython-input-31-77a67d18d2e5> in <module>()
     45   scoring=mape_scorer,
     46   n_jobs=2,
     47   verbose=1
     48 )
     49 
---> 50 search_params.fit(data_train, targets_train)
     51 print(search_params.grid_scores_)
     52 print(search_params.best_params_)
     53 print(search_params.best_score_)
     54 search_params.best_estimator_

...........................................................................
/usr/local/lib/python2.7/dist-packages/sklearn/grid_search.py in fit(self=GridSearchCV(cv=5, error_score='raise',
       e...nction mape_scorer at 0x7f99d626d398>, verbose=1), X=array([[  17.,    3.,    0., ...,  106.,    1., ...[  56.,    5.,    0., ...,    0.,    0.,    4.]]), y=array([ 46.,  32.,  33., ...,   9.,  34.,  42.]))
    727         y : array-like, shape = [n_samples] or [n_samples, n_output], optional
    728             Target relative to X for classification or regression;
    729             None for unsupervised learning.
    730 
    731         """
--> 732         return self._fit(X, y, ParameterGrid(self.param_grid))
        self._fit = <bound method GridSearchCV._fit of GridSearchCV(...ction mape_scorer at 0x7f99d626d398>, verbose=1)>
        X = array([[  17.,    3.,    0., ...,  106.,    1., ...[  56.,    5.,    0., ...,    0.,    0.,    4.]])
        y = array([ 46.,  32.,  33., ...,   9.,  34.,  42.])
        self.param_grid = {'estimate__n_estimators': [5, 10, 20, 50, 60, 80], 'estimate_learning_rate': [0.1, 0.3, 0.7, 1, 3, 5, 10]}
    733 
    734 
    735 class RandomizedSearchCV(BaseSearchCV):
    736     """Randomized search on hyper parameters.

...........................................................................
/usr/local/lib/python2.7/dist-packages/sklearn/grid_search.py in _fit(self=GridSearchCV(cv=5, error_score='raise',
       e...nction mape_scorer at 0x7f99d626d398>, verbose=1), X=array([[  17.,    3.,    0., ...,  106.,    1., ...[  56.,    5.,    0., ...,    0.,    0.,    4.]]), y=array([ 46.,  32.,  33., ...,   9.,  34.,  42.]), parameter_iterable=<sklearn.grid_search.ParameterGrid object>)
    500         )(
    501             delayed(_fit_and_score)(clone(base_estimator), X, y, self.scorer_,
    502                                     train, test, self.verbose, parameters,
    503                                     self.fit_params, return_parameters=True,
    504                                     error_score=self.error_score)
--> 505                 for parameters in parameter_iterable
        parameters = undefined
        parameter_iterable = <sklearn.grid_search.ParameterGrid object>
    506                 for train, test in cv)
    507 
    508         # Out is a list of triplet: score, estimator, n_test_samples
    509         n_fits = len(out)

...........................................................................
/usr/local/lib/python2.7/dist-packages/sklearn/externals/joblib/parallel.py in __call__(self=Parallel(n_jobs=2), iterable=<itertools.islice object>)
    661             if pre_dispatch == "all" or n_jobs == 1:
    662                 # The iterable was consumed all at once by the above for loop.
    663                 # No need to wait for async callbacks to trigger to
    664                 # consumption.
    665                 self._iterating = False
--> 666             self.retrieve()
        self.retrieve = <bound method Parallel.retrieve of Parallel(n_jobs=2)>
    667             # Make sure that we get a last message telling us we are done
    668             elapsed_time = time.time() - self._start_time
    669             self._print('Done %3i out of %3i | elapsed: %s finished',
    670                         (len(self._output),

    ---------------------------------------------------------------------------
    Sub-process traceback:
    ---------------------------------------------------------------------------
    ValueError                                         Sun Jun 12 12:52:44 2016
PID: 4116                                     Python 2.7.9: /usr/bin/python
...........................................................................
/usr/local/lib/python2.7/dist-packages/sklearn/cross_validation.pyc in _fit_and_score(estimator=Pipeline(steps=[('estimate', AdaBoostRegressor(b...,
         n_estimators=50, random_state=None))]), X=array([[  17.,    3.,    0., ...,  106.,    1., ...[  56.,    5.,    0., ...,    0.,    0.,    4.]]), y=array([ 46.,  32.,  33., ...,   9.,  34.,  42.]), scorer=<function mape_scorer>, train=array([ 20536,  20537,  20538, ..., 102677, 102678, 102679]), test=array([    0,     1,     2, ..., 20533, 20534, 20535]), verbose=1, parameters={'estimate__n_estimators': 5, 'estimate_learning_rate': 0.1}, fit_params={}, return_train_score=False, return_parameters=True, error_score='raise')
   1443     fit_params = fit_params if fit_params is not None else {}
   1444     fit_params = dict([(k, _index_param_value(X, v, train))
   1445                       for k, v in fit_params.items()])
   1446 
   1447     if parameters is not None:
-> 1448         estimator.set_params(**parameters)
   1449 
   1450     start_time = time.time()
   1451 
   1452     X_train, y_train = _safe_split(estimator, X, y, train)

...........................................................................
/usr/local/lib/python2.7/dist-packages/sklearn/base.pyc in set_params(self=Pipeline(steps=[('estimate', AdaBoostRegressor(b...,
         n_estimators=50, random_state=None))]), **params={'estimate__n_estimators': 5, 'estimate_learning_rate': 0.1})
    251                 sub_object.set_params(**{sub_name: value})
    252             else:
    253                 # simple objects case
    254                 if not key in valid_params:
    255                     raise ValueError('Invalid parameter %s ' 'for estimator %s'
--> 256                                      % (key, self.__class__.__name__))
    257                 setattr(self, key, value)
    258         return self
    259 
    260     def __repr__(self):

ValueError: Invalid parameter estimate_learning_rate for estimator Pipeline
___________________________________________________________________________

JoblibValueError: JoblibValueError
___________________________________________________________________________
Multiprocessing exception:
    ...........................................................................
/usr/lib/python2.7/runpy.py in _run_module_as_main(mod_name='ipykernel.__main__', alter_argv=1)
    157     pkg_name = mod_name.rpartition('.')[0]
    158     main_globals = sys.modules["__main__"].__dict__
    159     if alter_argv:
    160         sys.argv[0] = fname
    161     return _run_code(code, main_globals, None,
--> 162                      "__main__", fname, loader, pkg_name)
        fname = '/usr/local/lib/python2.7/dist-packages/ipykernel/__main__.py'
        loader = <pkgutil.ImpLoader instance>
        pkg_name = 'ipykernel'
    163 
    164 def run_module(mod_name, init_globals=None,
    165                run_name=None, alter_sys=False):
    166     """Execute a module's code without importing it

...........................................................................
/usr/lib/python2.7/runpy.py in _run_code(code=<code object <module> at 0x7f99f453b330, file "/...2.7/dist-packages/ipykernel/__main__.py", line 1>, run_globals={'__builtins__': <module '__builtin__' (built-in)>, '__doc__': None, '__file__': '/usr/local/lib/python2.7/dist-packages/ipykernel/__main__.py', '__loader__': <pkgutil.ImpLoader instance>, '__name__': '__main__', '__package__': 'ipykernel', 'app': <module 'ipykernel.kernelapp' from '/usr/local/lib/python2.7/dist-packages/ipykernel/kernelapp.pyc'>}, init_globals=None, mod_name='__main__', mod_fname='/usr/local/lib/python2.7/dist-packages/ipykernel/__main__.py', mod_loader=<pkgutil.ImpLoader instance>, pkg_name='ipykernel')
     67         run_globals.update(init_globals)
     68     run_globals.update(__name__ = mod_name,
     69                        __file__ = mod_fname,
     70                        __loader__ = mod_loader,
     71                        __package__ = pkg_name)
---> 72     exec code in run_globals
        code = <code object <module> at 0x7f99f453b330, file "/...2.7/dist-packages/ipykernel/__main__.py", line 1>
        run_globals = {'__builtins__': <module '__builtin__' (built-in)>, '__doc__': None, '__file__': '/usr/local/lib/python2.7/dist-packages/ipykernel/__main__.py', '__loader__': <pkgutil.ImpLoader instance>, '__name__': '__main__', '__package__': 'ipykernel', 'app': <module 'ipykernel.kernelapp' from '/usr/local/lib/python2.7/dist-packages/ipykernel/kernelapp.pyc'>}
     73     return run_globals
     74 
     75 def _run_module_code(code, init_globals=None,
     76                     mod_name=None, mod_fname=None,

...........................................................................
/usr/local/lib/python2.7/dist-packages/ipykernel/__main__.py in <module>()
      1 
      2 
----> 3 
      4 if __name__ == '__main__':
      5     from ipykernel import kernelapp as app
      6     app.launch_new_instance()
      7 
      8 
      9 
     10 

...........................................................................
/usr/local/lib/python2.7/dist-packages/traitlets/config/application.py in launch_instance(cls=<class 'ipykernel.kernelapp.IPKernelApp'>, argv=None, **kwargs={})
    591         
    592         If a global instance already exists, this reinitializes and starts it
    593         """
    594         app = cls.instance(**kwargs)
    595         app.initialize(argv)
--> 596         app.start()
        app.start = <bound method IPKernelApp.start of <ipykernel.kernelapp.IPKernelApp object>>
    597 
    598 #-----------------------------------------------------------------------------
    599 # utility functions, for convenience
    600 #-----------------------------------------------------------------------------

...........................................................................
/usr/local/lib/python2.7/dist-packages/ipykernel/kernelapp.py in start(self=<ipykernel.kernelapp.IPKernelApp object>)
    437         
    438         if self.poller is not None:
    439             self.poller.start()
    440         self.kernel.start()
    441         try:
--> 442             ioloop.IOLoop.instance().start()
    443         except KeyboardInterrupt:
    444             pass
    445 
    446 launch_new_instance = IPKernelApp.launch_instance

...........................................................................
/usr/local/lib/python2.7/dist-packages/zmq/eventloop/ioloop.py in start(self=<zmq.eventloop.ioloop.ZMQIOLoop object>)
    157             PollIOLoop.configure(ZMQIOLoop)
    158         return PollIOLoop.current(*args, **kwargs)
    159     
    160     def start(self):
    161         try:
--> 162             super(ZMQIOLoop, self).start()
        self.start = <bound method ZMQIOLoop.start of <zmq.eventloop.ioloop.ZMQIOLoop object>>
    163         except ZMQError as e:
    164             if e.errno == ETERM:
    165                 # quietly return on ETERM
    166                 pass

...........................................................................
/usr/local/lib/python2.7/dist-packages/tornado/ioloop.py in start(self=<zmq.eventloop.ioloop.ZMQIOLoop object>)
    878                 self._events.update(event_pairs)
    879                 while self._events:
    880                     fd, events = self._events.popitem()
    881                     try:
    882                         fd_obj, handler_func = self._handlers[fd]
--> 883                         handler_func(fd_obj, events)
        handler_func = <function null_wrapper>
        fd_obj = <zmq.sugar.socket.Socket object>
        events = 1
    884                     except (OSError, IOError) as e:
    885                         if errno_from_exception(e) == errno.EPIPE:
    886                             # Happens when the client closes the connection
    887                             pass

...........................................................................
/usr/local/lib/python2.7/dist-packages/tornado/stack_context.py in null_wrapper(*args=(<zmq.sugar.socket.Socket object>, 1), **kwargs={})
    270         # Fast path when there are no active contexts.
    271         def null_wrapper(*args, **kwargs):
    272             try:
    273                 current_state = _state.contexts
    274                 _state.contexts = cap_contexts[0]
--> 275                 return fn(*args, **kwargs)
        args = (<zmq.sugar.socket.Socket object>, 1)
        kwargs = {}
    276             finally:
    277                 _state.contexts = current_state
    278         null_wrapper._wrapped = True
    279         return null_wrapper

...........................................................................
/usr/local/lib/python2.7/dist-packages/zmq/eventloop/zmqstream.py in _handle_events(self=<zmq.eventloop.zmqstream.ZMQStream object>, fd=<zmq.sugar.socket.Socket object>, events=1)
    435             # dispatch events:
    436             if events & IOLoop.ERROR:
    437                 gen_log.error("got POLLERR event on ZMQStream, which doesn't make sense")
    438                 return
    439             if events & IOLoop.READ:
--> 440                 self._handle_recv()
        self._handle_recv = <bound method ZMQStream._handle_recv of <zmq.eventloop.zmqstream.ZMQStream object>>
    441                 if not self.socket:
    442                     return
    443             if events & IOLoop.WRITE:
    444                 self._handle_send()

...........................................................................
/usr/local/lib/python2.7/dist-packages/zmq/eventloop/zmqstream.py in _handle_recv(self=<zmq.eventloop.zmqstream.ZMQStream object>)
    467                 gen_log.error("RECV Error: %s"%zmq.strerror(e.errno))
    468         else:
    469             if self._recv_callback:
    470                 callback = self._recv_callback
    471                 # self._recv_callback = None
--> 472                 self._run_callback(callback, msg)
        self._run_callback = <bound method ZMQStream._run_callback of <zmq.eventloop.zmqstream.ZMQStream object>>
        callback = <function null_wrapper>
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    473                 
    474         # self.update_state()
    475         
    476 

...........................................................................
/usr/local/lib/python2.7/dist-packages/zmq/eventloop/zmqstream.py in _run_callback(self=<zmq.eventloop.zmqstream.ZMQStream object>, callback=<function null_wrapper>, *args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    409         close our socket."""
    410         try:
    411             # Use a NullContext to ensure that all StackContexts are run
    412             # inside our blanket exception handler rather than outside.
    413             with stack_context.NullContext():
--> 414                 callback(*args, **kwargs)
        callback = <function null_wrapper>
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    415         except:
    416             gen_log.error("Uncaught exception, closing connection.",
    417                           exc_info=True)
    418             # Close the socket on an uncaught exception from a user callback

...........................................................................
/usr/local/lib/python2.7/dist-packages/tornado/stack_context.py in null_wrapper(*args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    270         # Fast path when there are no active contexts.
    271         def null_wrapper(*args, **kwargs):
    272             try:
    273                 current_state = _state.contexts
    274                 _state.contexts = cap_contexts[0]
--> 275                 return fn(*args, **kwargs)
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    276             finally:
    277                 _state.contexts = current_state
    278         null_wrapper._wrapped = True
    279         return null_wrapper

...........................................................................
/usr/local/lib/python2.7/dist-packages/ipykernel/kernelbase.py in dispatcher(msg=[<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>])
    271         if self.control_stream:
    272             self.control_stream.on_recv(self.dispatch_control, copy=False)
    273 
    274         def make_dispatcher(stream):
    275             def dispatcher(msg):
--> 276                 return self.dispatch_shell(stream, msg)
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    277             return dispatcher
    278 
    279         for s in self.shell_streams:
    280             s.on_recv(make_dispatcher(s), copy=False)

...........................................................................
/usr/local/lib/python2.7/dist-packages/ipykernel/kernelbase.py in dispatch_shell(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, msg={'buffers': [], 'content': {u'allow_stdin': True, u'code': u'from sklearn.feature_selection import SelectKB...rams.best_score_)\nsearch_params.best_estimator_', u'silent': False, u'stop_on_error': True, u'store_history': True, u'user_expressions': {}}, 'header': {'date': '2016-06-12T12:52:44.380983', u'msg_id': u'51BC7D2045F34C1DBFB596E8921EC318', u'msg_type': u'execute_request', u'session': u'CEF40C66C22B4B51AB5041666FFC5A00', u'username': u'username', u'version': u'5.0'}, 'metadata': {}, 'msg_id': u'51BC7D2045F34C1DBFB596E8921EC318', 'msg_type': u'execute_request', 'parent_header': {}})
    223             self.log.error("UNKNOWN MESSAGE TYPE: %r", msg_type)
    224         else:
    225             self.log.debug("%s: %s", msg_type, msg)
    226             self.pre_handler_hook()
    227             try:
--> 228                 handler(stream, idents, msg)
        handler = <bound method IPythonKernel.execute_request of <ipykernel.ipkernel.IPythonKernel object>>
        stream = <zmq.eventloop.zmqstream.ZMQStream object>
        idents = ['CEF40C66C22B4B51AB5041666FFC5A00']
        msg = {'buffers': [], 'content': {u'allow_stdin': True, u'code': u'from sklearn.feature_selection import SelectKB...rams.best_score_)\nsearch_params.best_estimator_', u'silent': False, u'stop_on_error': True, u'store_history': True, u'user_expressions': {}}, 'header': {'date': '2016-06-12T12:52:44.380983', u'msg_id': u'51BC7D2045F34C1DBFB596E8921EC318', u'msg_type': u'execute_request', u'session': u'CEF40C66C22B4B51AB5041666FFC5A00', u'username': u'username', u'version': u'5.0'}, 'metadata': {}, 'msg_id': u'51BC7D2045F34C1DBFB596E8921EC318', 'msg_type': u'execute_request', 'parent_header': {}}
    229             except Exception:
    230                 self.log.error("Exception in message handler:", exc_info=True)
    231             finally:
    232                 self.post_handler_hook()

...........................................................................
/usr/local/lib/python2.7/dist-packages/ipykernel/kernelbase.py in execute_request(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, ident=['CEF40C66C22B4B51AB5041666FFC5A00'], parent={'buffers': [], 'content': {u'allow_stdin': True, u'code': u'from sklearn.feature_selection import SelectKB...rams.best_score_)\nsearch_params.best_estimator_', u'silent': False, u'stop_on_error': True, u'store_history': True, u'user_expressions': {}}, 'header': {'date': '2016-06-12T12:52:44.380983', u'msg_id': u'51BC7D2045F34C1DBFB596E8921EC318', u'msg_type': u'execute_request', u'session': u'CEF40C66C22B4B51AB5041666FFC5A00', u'username': u'username', u'version': u'5.0'}, 'metadata': {}, 'msg_id': u'51BC7D2045F34C1DBFB596E8921EC318', 'msg_type': u'execute_request', 'parent_header': {}})
    386         if not silent:
    387             self.execution_count += 1
    388             self._publish_execute_input(code, parent, self.execution_count)
    389 
    390         reply_content = self.do_execute(code, silent, store_history,
--> 391                                         user_expressions, allow_stdin)
        user_expressions = {}
        allow_stdin = True
    392 
    393         # Flush output before sending the reply.
    394         sys.stdout.flush()
    395         sys.stderr.flush()

...........................................................................
/usr/local/lib/python2.7/dist-packages/ipykernel/ipkernel.py in do_execute(self=<ipykernel.ipkernel.IPythonKernel object>, code=u'from sklearn.feature_selection import SelectKB...rams.best_score_)\nsearch_params.best_estimator_', silent=False, store_history=True, user_expressions={}, allow_stdin=True)
    194 
    195         reply_content = {}
    196         # FIXME: the shell calls the exception handler itself.
    197         shell._reply_content = None
    198         try:
--> 199             shell.run_cell(code, store_history=store_history, silent=silent)
        shell.run_cell = <bound method ZMQInteractiveShell.run_cell of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = u'from sklearn.feature_selection import SelectKB...rams.best_score_)\nsearch_params.best_estimator_'
        store_history = True
        silent = False
    200         except:
    201             status = u'error'
    202             # FIXME: this code right now isn't being used yet by default,
    203             # because the run_cell() call above directly fires off exception

...........................................................................
/usr/local/lib/python2.7/dist-packages/IPython/core/interactiveshell.py in run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, raw_cell=u'from sklearn.feature_selection import SelectKB...rams.best_score_)\nsearch_params.best_estimator_', store_history=True, silent=False, shell_futures=True)
   2718                 self.displayhook.exec_result = result
   2719 
   2720                 # Execute the user code
   2721                 interactivity = "none" if silent else self.ast_node_interactivity
   2722                 self.run_ast_nodes(code_ast.body, cell_name,
-> 2723                    interactivity=interactivity, compiler=compiler, result=result)
        interactivity = 'last_expr'
        compiler = <IPython.core.compilerop.CachingCompiler instance>
   2724 
   2725                 # Reset this so later displayed values do not modify the
   2726                 # ExecutionResult
   2727                 self.displayhook.exec_result = None

...........................................................................
/usr/local/lib/python2.7/dist-packages/IPython/core/interactiveshell.py in run_ast_nodes(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, nodelist=[<_ast.ImportFrom object>, <_ast.ImportFrom object>, <_ast.ImportFrom object>, <_ast.ImportFrom object>, <_ast.ImportFrom object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Assign object>, <_ast.Expr object>, <_ast.Print object>, <_ast.Print object>, <_ast.Print object>, <_ast.Expr object>], cell_name='<ipython-input-31-77a67d18d2e5>', interactivity='last', compiler=<IPython.core.compilerop.CachingCompiler instance>, result=<IPython.core.interactiveshell.ExecutionResult object>)
   2820 
   2821         try:
   2822             for i, node in enumerate(to_run_exec):
   2823                 mod = ast.Module([node])
   2824                 code = compiler(mod, cell_name, "exec")
-> 2825                 if self.run_code(code, result):
        self.run_code = <bound method ZMQInteractiveShell.run_code of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = <code object <module> at 0x7f99bfbd0a30, file "<ipython-input-31-77a67d18d2e5>", line 50>
        result = <IPython.core.interactiveshell.ExecutionResult object>
   2826                     return True
   2827 
   2828             for i, node in enumerate(to_run_interactive):
   2829                 mod = ast.Interactive([node])

...........................................................................
/usr/local/lib/python2.7/dist-packages/IPython/core/interactiveshell.py in run_code(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, code_obj=<code object <module> at 0x7f99bfbd0a30, file "<ipython-input-31-77a67d18d2e5>", line 50>, result=<IPython.core.interactiveshell.ExecutionResult object>)
   2880         outflag = 1  # happens in more places, so it's easier as default
   2881         try:
   2882             try:
   2883                 self.hooks.pre_run_code_hook()
   2884                 #rprint('Running code', repr(code_obj)) # dbg
-> 2885                 exec(code_obj, self.user_global_ns, self.user_ns)
        code_obj = <code object <module> at 0x7f99bfbd0a30, file "<ipython-input-31-77a67d18d2e5>", line 50>
        self.user_global_ns = {'AdaBoostRegressor': <class 'sklearn.ensemble.weight_boosting.AdaBoostRegressor'>, 'DecisionTreeRegressor': <class 'sklearn.tree.tree.DecisionTreeRegressor'>, 'EST_PICKLE_FILENAME': 'baseline_final_estimator.pkl', 'GridSearchCV': <class 'sklearn.grid_search.GridSearchCV'>, 'Imputer': <class 'sklearn.preprocessing.imputation.Imputer'>, 'In': ['', u'import pdb\nimport numpy as np\nimport gcp.big...ields_str.split(\'\\t\'))\nfeatures = fields[1:]', u'def mape(X, predictions, y):\n  num_timeslots ...or.predict(X)\n  return -mape(X, predictions, y)', u'from sklearn.linear_model import LogisticRegre...\n\n# Should return 0.0\nmape(X, predictions, y)', u'get_ipython().run_cell_magic(u\'sql\', u\'--mo...\n\\n# The above query randomizes its outputs.")', u'query = bq.Query(q_all)\ntableresult = query.results()', u"all_data = np.zeros((tableresult.length, len(f...\n    print 'processed {} rows'.format(rcounter)", u'import pdb\nimport numpy as np\nimport gcp.big...ields_str.split(\'\\t\'))\nfeatures = fields[1:]', u'def mape(X, predictions, y):\n  num_timeslots ...or.predict(X)\n  return -mape(X, predictions, y)', u'from sklearn.linear_model import LogisticRegre...\n\n# Should return 0.0\nmape(X, predictions, y)', u'get_ipython().run_cell_magic(u\'sql\', u\'--mo...\n\\n# The above query randomizes its outputs.")', u'query = bq.Query(q_all)\ntableresult = query.results()', u"all_data = np.zeros((tableresult.length, len(f...\n    print 'processed {} rows'.format(rcounter)", u'# Get timeslots to test from GCS\nitem = stora...'".format(x), timeslots_to_test))\nprint(tquery)', u'get_ipython().run_cell_magic(u\'sql\', u\'--mo... if estimator can generalize well to new data.")', u'query_t = bq.Query(q_all_t)\ntableresult_t = query_t.results()', u"all_data_t = np.zeros((tableresult_t.length, l...\n    print 'processed {} rows'.format(rcounter)", u'# Useful code to check NaN and Inf values. Thi...l_data))\nprint "np.max=", np.max(abs(all_data))', u'all_data[np.isnan(all_data)] = 0\nall_data_t[np.isnan(all_data_t)] = 0', u'# Useful code to check NaN and Inf values. Thi...l_data))\nprint "np.max=", np.max(abs(all_data))', ...], 'LogisticRegression': <class 'sklearn.linear_model.logistic.LogisticRegression'>, 'Out': {3: 0.0, 9: 0.0, 20: Pipeline(steps=[('estimate', DecisionTreeRegress...random_state=None,
           splitter='best'))]), 24: Pipeline(steps=[('estimate', RandomForestRegress...=None,
           verbose=0, warm_start=False))]), 27: Pipeline(steps=[('estimate', RandomForestRegress...=None,
           verbose=0, warm_start=False))])}, 'Pipeline': <class 'sklearn.pipeline.Pipeline'>, 'RandomForestRegressor': <class 'sklearn.ensemble.forest.RandomForestRegressor'>, ...}
        self.user_ns = {'AdaBoostRegressor': <class 'sklearn.ensemble.weight_boosting.AdaBoostRegressor'>, 'DecisionTreeRegressor': <class 'sklearn.tree.tree.DecisionTreeRegressor'>, 'EST_PICKLE_FILENAME': 'baseline_final_estimator.pkl', 'GridSearchCV': <class 'sklearn.grid_search.GridSearchCV'>, 'Imputer': <class 'sklearn.preprocessing.imputation.Imputer'>, 'In': ['', u'import pdb\nimport numpy as np\nimport gcp.big...ields_str.split(\'\\t\'))\nfeatures = fields[1:]', u'def mape(X, predictions, y):\n  num_timeslots ...or.predict(X)\n  return -mape(X, predictions, y)', u'from sklearn.linear_model import LogisticRegre...\n\n# Should return 0.0\nmape(X, predictions, y)', u'get_ipython().run_cell_magic(u\'sql\', u\'--mo...\n\\n# The above query randomizes its outputs.")', u'query = bq.Query(q_all)\ntableresult = query.results()', u"all_data = np.zeros((tableresult.length, len(f...\n    print 'processed {} rows'.format(rcounter)", u'import pdb\nimport numpy as np\nimport gcp.big...ields_str.split(\'\\t\'))\nfeatures = fields[1:]', u'def mape(X, predictions, y):\n  num_timeslots ...or.predict(X)\n  return -mape(X, predictions, y)', u'from sklearn.linear_model import LogisticRegre...\n\n# Should return 0.0\nmape(X, predictions, y)', u'get_ipython().run_cell_magic(u\'sql\', u\'--mo...\n\\n# The above query randomizes its outputs.")', u'query = bq.Query(q_all)\ntableresult = query.results()', u"all_data = np.zeros((tableresult.length, len(f...\n    print 'processed {} rows'.format(rcounter)", u'# Get timeslots to test from GCS\nitem = stora...'".format(x), timeslots_to_test))\nprint(tquery)', u'get_ipython().run_cell_magic(u\'sql\', u\'--mo... if estimator can generalize well to new data.")', u'query_t = bq.Query(q_all_t)\ntableresult_t = query_t.results()', u"all_data_t = np.zeros((tableresult_t.length, l...\n    print 'processed {} rows'.format(rcounter)", u'# Useful code to check NaN and Inf values. Thi...l_data))\nprint "np.max=", np.max(abs(all_data))', u'all_data[np.isnan(all_data)] = 0\nall_data_t[np.isnan(all_data_t)] = 0', u'# Useful code to check NaN and Inf values. Thi...l_data))\nprint "np.max=", np.max(abs(all_data))', ...], 'LogisticRegression': <class 'sklearn.linear_model.logistic.LogisticRegression'>, 'Out': {3: 0.0, 9: 0.0, 20: Pipeline(steps=[('estimate', DecisionTreeRegress...random_state=None,
           splitter='best'))]), 24: Pipeline(steps=[('estimate', RandomForestRegress...=None,
           verbose=0, warm_start=False))]), 27: Pipeline(steps=[('estimate', RandomForestRegress...=None,
           verbose=0, warm_start=False))])}, 'Pipeline': <class 'sklearn.pipeline.Pipeline'>, 'RandomForestRegressor': <class 'sklearn.ensemble.forest.RandomForestRegressor'>, ...}
   2886             finally:
   2887                 # Reset our crash handler in place
   2888                 sys.excepthook = old_excepthook
   2889         except SystemExit as e:

...........................................................................
/content/TeguhWPurwanto@gmail.com/Xiaojukeji Algorithm Competition/<ipython-input-31-77a67d18d2e5> in <module>()
     45   scoring=mape_scorer,
     46   n_jobs=2,
     47   verbose=1
     48 )
     49 
---> 50 search_params.fit(data_train, targets_train)
     51 print(search_params.grid_scores_)
     52 print(search_params.best_params_)
     53 print(search_params.best_score_)
     54 search_params.best_estimator_

...........................................................................
/usr/local/lib/python2.7/dist-packages/sklearn/grid_search.py in fit(self=GridSearchCV(cv=5, error_score='raise',
       e...nction mape_scorer at 0x7f99d626d398>, verbose=1), X=array([[  17.,    3.,    0., ...,  106.,    1., ...[  56.,    5.,    0., ...,    0.,    0.,    4.]]), y=array([ 46.,  32.,  33., ...,   9.,  34.,  42.]))
    727         y : array-like, shape = [n_samples] or [n_samples, n_output], optional
    728             Target relative to X for classification or regression;
    729             None for unsupervised learning.
    730 
    731         """
--> 732         return self._fit(X, y, ParameterGrid(self.param_grid))
        self._fit = <bound method GridSearchCV._fit of GridSearchCV(...ction mape_scorer at 0x7f99d626d398>, verbose=1)>
        X = array([[  17.,    3.,    0., ...,  106.,    1., ...[  56.,    5.,    0., ...,    0.,    0.,    4.]])
        y = array([ 46.,  32.,  33., ...,   9.,  34.,  42.])
        self.param_grid = {'estimate__n_estimators': [5, 10, 20, 50, 60, 80], 'estimate_learning_rate': [0.1, 0.3, 0.7, 1, 3, 5, 10]}
    733 
    734 
    735 class RandomizedSearchCV(BaseSearchCV):
    736     """Randomized search on hyper parameters.

...........................................................................
/usr/local/lib/python2.7/dist-packages/sklearn/grid_search.py in _fit(self=GridSearchCV(cv=5, error_score='raise',
       e...nction mape_scorer at 0x7f99d626d398>, verbose=1), X=array([[  17.,    3.,    0., ...,  106.,    1., ...[  56.,    5.,    0., ...,    0.,    0.,    4.]]), y=array([ 46.,  32.,  33., ...,   9.,  34.,  42.]), parameter_iterable=<sklearn.grid_search.ParameterGrid object>)
    500         )(
    501             delayed(_fit_and_score)(clone(base_estimator), X, y, self.scorer_,
    502                                     train, test, self.verbose, parameters,
    503                                     self.fit_params, return_parameters=True,
    504                                     error_score=self.error_score)
--> 505                 for parameters in parameter_iterable
        parameters = undefined
        parameter_iterable = <sklearn.grid_search.ParameterGrid object>
    506                 for train, test in cv)
    507 
    508         # Out is a list of triplet: score, estimator, n_test_samples
    509         n_fits = len(out)

...........................................................................
/usr/local/lib/python2.7/dist-packages/sklearn/externals/joblib/parallel.py in __call__(self=Parallel(n_jobs=2), iterable=<itertools.islice object>)
    661             if pre_dispatch == "all" or n_jobs == 1:
    662                 # The iterable was consumed all at once by the above for loop.
    663                 # No need to wait for async callbacks to trigger to
    664                 # consumption.
    665                 self._iterating = False
--> 666             self.retrieve()
        self.retrieve = <bound method Parallel.retrieve of Parallel(n_jobs=2)>
    667             # Make sure that we get a last message telling us we are done
    668             elapsed_time = time.time() - self._start_time
    669             self._print('Done %3i out of %3i | elapsed: %s finished',
    670                         (len(self._output),

    ---------------------------------------------------------------------------
    Sub-process traceback:
    ---------------------------------------------------------------------------
    ValueError                                         Sun Jun 12 12:52:44 2016
PID: 4116                                     Python 2.7.9: /usr/bin/python
...........................................................................
/usr/local/lib/python2.7/dist-packages/sklearn/cross_validation.pyc in _fit_and_score(estimator=Pipeline(steps=[('estimate', AdaBoostRegressor(b...,
         n_estimators=50, random_state=None))]), X=array([[  17.,    3.,    0., ...,  106.,    1., ...[  56.,    5.,    0., ...,    0.,    0.,    4.]]), y=array([ 46.,  32.,  33., ...,   9.,  34.,  42.]), scorer=<function mape_scorer>, train=array([ 20536,  20537,  20538, ..., 102677, 102678, 102679]), test=array([    0,     1,     2, ..., 20533, 20534, 20535]), verbose=1, parameters={'estimate__n_estimators': 5, 'estimate_learning_rate': 0.1}, fit_params={}, return_train_score=False, return_parameters=True, error_score='raise')
   1443     fit_params = fit_params if fit_params is not None else {}
   1444     fit_params = dict([(k, _index_param_value(X, v, train))
   1445                       for k, v in fit_params.items()])
   1446 
   1447     if parameters is not None:
-> 1448         estimator.set_params(**parameters)
   1449 
   1450     start_time = time.time()
   1451 
   1452     X_train, y_train = _safe_split(estimator, X, y, train)

...........................................................................
/usr/local/lib/python2.7/dist-packages/sklearn/base.pyc in set_params(self=Pipeline(steps=[('estimate', AdaBoostRegressor(b...,
         n_estimators=50, random_state=None))]), **params={'estimate__n_estimators': 5, 'estimate_learning_rate': 0.1})
    251                 sub_object.set_params(**{sub_name: value})
    252             else:
    253                 # simple objects case
    254                 if not key in valid_params:
    255                     raise ValueError('Invalid parameter %s ' 'for estimator %s'
--> 256                                      % (key, self.__class__.__name__))
    257                 setattr(self, key, value)
    258         return self
    259 
    260     def __repr__(self):

ValueError: Invalid parameter estimate_learning_rate for estimator Pipeline
___________________________________________________________________________

Test data's prediction MAPE score:

In [28]:
final_est = search_params.best_estimator_
test_predictions = final_est.predict(data_test)
print(mape(data_test, test_predictions, targets_test))

2.34569095992
2.34569095992
2.34569095992


In [29]:
pickle.dump(final_est, open(EST_PICKLE_FILENAME, "w") )

Run "Process Final Test Data With Final Algorithm" to use pickled final algorithm against final test data to produce csv required by this competition.

In [23]:
# Just testing Imputer. Turns out somehow Imputer causes number of features reduced, weird.

# imputer = Imputer()
est = DecisionTreeRegressor(max_features=len(features))

data_train_i = np.copy(data_train)
print(data_train.shape)
print(data_train[0:10])
# data_train_i = imputer.fit_transform(data_train)
data_train_i[np.isnan(data_train_i)] = 0
data_train_i.astype('float32')
print(data_train_i.shape)
print(data_train_i[0:10])
est.fit(data_train_i, targets_train)
predictions = est.predict(data_test)
print(mape(data_test, predictions, targets_test))

(102680, 18)
[[  1.70000000e+01   3.00000000e+00   0.00000000e+00   3.27000000e+02
    2.97272727e+01   1.87671617e+04  -2.45115880e+03  -1.62980717e+04
   -2.21621266e+04  -1.19145310e+03   7.28000000e+02   7.40000000e+01
    2.30000000e+01   2.90000000e+01   2.00000000e+00   1.06000000e+02
    1.00000000e+00   3.00000000e+00]
 [  1.09000000e+02   4.00000000e+00   0.00000000e+00   1.27000000e+02
    1.81428571e+01  -6.11945684e+04   1.66019762e+03   2.40659554e+02
    7.81203839e+03   3.14309459e+02   1.23000000e+02   1.90000000e+01
    3.00000000e+00   1.00000000e+00   3.00000000e+00   1.24000000e+02
    8.00000000e+00   4.00000000e+00]
 [  9.70000000e+01   4.00000000e+00   0.00000000e+00   2.49000000e+02
    1.55625000e+01  -5.73741656e+04  -3.33111579e+03   2.33390366e+03
    3.77453856e+03  -7.61532298e+02   2.34000000e+02   6.60000000e+01
    1.80000000e+01   8.00000000e+00   0.00000000e+00   0.00000000e+00
    0.00000000e+00   2.00000000e+00]
 [  3.50000000e+01   6.00000000e+00 