In [46]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


from df_utils import get_companies_list, get_X_y
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.feature_selection import SelectKBest, VarianceThreshold
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import classification_report, roc_auc_score, \
                            precision_score, accuracy_score, confusion_matrix
from sklearn.decomposition import PCA
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.feature_selection import SelectKBest

#### Load the dataframe and companies with not too many nans

In [37]:
df, companies = get_companies_list(2)
# learner history parameters
nhist = 10
nfut = 2
totHist = int(3*365)

comp_dict = {}
for i, comp in enumerate(companies):
    comp_dict[comp] = get_X_y(df, comp, nfut, nhist, totHist)

#### Drop some columns:

In [38]:
X_orig, y, ysim = comp_dict[comp]
print(X_orig.columns.values)
print(y.columns.values)
print(ysim.columns.values)

include = ['c_oend_%', 'c_slow_%', 'c_shigh_%', 'change_Me', 'offer_sell_', 'offer_buy_'] #['c_shigh_', 'c_slow_', 'offer_sell_', 'offer_buy_', 'sales_low_', 'sales_high_']
cols_keep = [col for col in X_orig.columns if col[:-4] in include] 
y_cols = ['offer_end_change', 'sale_low_change', 'sale_high_change']

['offer_end_-01' 'offer_end_-02' 'offer_end_-03' 'offer_end_-04'
 'offer_end_-05' 'offer_end_-06' 'offer_end_-07' 'offer_end_-08'
 'offer_end_-09' 'offer_end_-10' 'offer_buy_-01' 'offer_buy_-02'
 'offer_buy_-03' 'offer_buy_-04' 'offer_buy_-05' 'offer_buy_-06'
 'offer_buy_-07' 'offer_buy_-08' 'offer_buy_-09' 'offer_buy_-10'
 'offer_sell_-01' 'offer_sell_-02' 'offer_sell_-03' 'offer_sell_-04'
 'offer_sell_-05' 'offer_sell_-06' 'offer_sell_-07' 'offer_sell_-08'
 'offer_sell_-09' 'offer_sell_-10' 'sales_low_-01' 'sales_low_-02'
 'sales_low_-03' 'sales_low_-04' 'sales_low_-05' 'sales_low_-06'
 'sales_low_-07' 'sales_low_-08' 'sales_low_-09' 'sales_low_-10'
 'sales_high_-01' 'sales_high_-02' 'sales_high_-03' 'sales_high_-04'
 'sales_high_-05' 'sales_high_-06' 'sales_high_-07' 'sales_high_-08'
 'sales_high_-09' 'sales_high_-10' 'change_Me_-01' 'change_Me_-02'
 'change_Me_-03' 'change_Me_-04' 'change_Me_-05' 'change_Me_-06'
 'change_Me_-07' 'change_Me_-08' 'change_Me_-09' 'change_Me_-10'
 'c_s

### Plot pictures:

In [39]:
def plot_sim(comp, ysim, ndays, *args):
    
    
    plt.figure(figsize = (12,6))
    x = pd.to_datetime(ysim.index, format = '%d.%m.%Y')[:ndays]
    yl = ysim['sales_low_000'].values[:ndays]
    yh = ysim['sales_high_000'].values[:ndays]
    #y = ysim['offer_end_prev'].values[:ndays]
    
    #plt.plot_date(x,y, linestyle = '-', marker = None)
    plt.fill_between(x,yl,yh, linestyle = '-')
    plt.ylabel('Sales lowest to highest filled')
    #[plt.gca().axvline(xi, alpha = .1) for xi in x]
    
    if len(args) == 3:
        pred, truth, col = [arg[:ndays] for arg in args]
        
        correct = pred & truth
        fp = pred & np.logical_not(truth)
        
        plt.scatter(x[correct], yh[correct], s = 40, alpha = .7, 
                    c = 'green', label = col + ' correct')
        plt.scatter(x[truth], yh[truth], s = 20, c = 'red', alpha = .2,
                    label = col + ' true')
        
    else:    
        plt.twinx()
        y2 = ysim['offer_end_change']*100
        plt.plot_date(x,y2, linestyle = '-', marker = None, color = 'red')
        plt.ylabel('Offer end percentage change')
    
    plt.legend(frameon = False)
    plt.title(comp, fontsize = 14)
    plt.show()
   

### Make estimator pipe

In [49]:
pipe_rf = Pipeline([('pol', PolynomialFeatures(degree = 2, interaction_only = True)),
                    ('var', VarianceThreshold()),
                    ('sel', SelectKBest())
                    ('rf', RandomForestClassifier())]) 

params_rf = [{'sel__k': np.arange(40,100,10),
              'rf__max_features': np.arange(5,30,5),
              'rf__max_depth': [5,10,20],
              'rf__n_estimators': [20,50,100]}]

pipe_gbm = Pipeline([('pol', PolynomialFeatures(degree = 2, interaction_only = True)),
                     ('var', VarianceThreshold()),
                     #('scale', StandardScaler()),
                     #('pca', PCA()),
                     ('gbm', GradientBoostingClassifier())]) 

params_gbm = [{'pca__n_components': np.arange(40,100,10),
                'rf__max_features': np.arange(5,30,5),
                'rf__max_depth': [5,10,20],
                'rf__n_estimators': [20,50,100]}]


def get_pipe(key):
    if key == 'rf':
        return pipe_rf, params_rf
    elif key == 'gbm':
        return pipe_gbm, params_gbm


TypeError: 'tuple' object is not callable

### Make custom fit for each company:

In [41]:
def comp_estimator(comp, y, threshold, pipe, params, ntest = 50, metric = 'roc_auc'):
    
    splitter = StratifiedKFold(n_splits = 5, shuffle = True) #, random_state = 0)
    X,_,ysim = comp_dict[comp]
    X = X[cols_keep]
    
    y_bin = threshold < y
    
    X_test, y_test = X[:ntest], y_bin[:ntest]
    X_train, y_train = X[ntest:], y_bin[ntest:]
    Xy = [X_train, X_test, y_train, y_test, ysim, y_bin]
    
    
    grid = GridSearchCV(pipe, params, scoring = metric, n_jobs = 5, 
                        cv = splitter, verbose = 1)
    grid.fit(X_train, y_train)
    
    return Xy, grid.best_estimator_, grid.best_params_, grid.best_score_


def get_scores(y_pred_train, y_pred_test, y_pred_train_p, 
               y_pred_test_p, y_train, y_test, show_report = False):
    try:
        roc_auc_test = roc_auc_score(y_test, y_pred_test_p)
    except ValueError as e:
        print(e)
        roc_auc_test = -1
    
    precision_test = precision_score(y_test, y_pred_test)
    
    if show_report:
        print('Train set classification')
        print(classification_report(y_train, y_pred_train, target_names = ['not rise', 'rise']))
        print(confusion_matrix(y_train, y_pred_train))
        print('Train roc_auc = {:.3f}'.format(roc_auc_score(y_train, y_pred_train_p)))
        print('\nTest set classification')
        print(classification_report(y_test, y_pred_test, target_names = ['not rise', 'rise']))
        print(confusion_matrix(y_test, y_pred_test))
        print('Test roc_auc = {:.3f}\n'.format(roc_auc_test))
    
    return roc_auc_test, precision_test

def get_prediction(Xy, estimator, show_report = False):
    
    X_train, X_test, y_train, y_test = Xy[:4]
    y_pred_train = estimator.predict(X_train)
    y_pred_test = estimator.predict(X_test)
    y_pred_train_p = estimator.predict_proba(X_train)[:,1]
    y_pred_test_p = estimator.predict_proba(X_test)[:,1]
    
    
    return y_pred_train, y_pred_test, y_pred_train_p, y_pred_test_p


### Feed the pipe to fit for each company and collect the results

In [48]:
def fit_and_report(companies, key = 'rf', ntest = 100, show = False):
    
    thres = {'offer_end_change':.01, 
             'sale_low_change+1':.0,
             'sale_low_change':.01,
             'sale_high_change':.01}
    
    pipe, params = get_pipe(key)
    try:
        results_dict = np.load('opm_params_{}.npy'.format(key)).item()        
    except FileNotFoundError:
        results_dict = {}
    
    for comp in companies:
        y = comp_dict[comp][1]
        for col in y.columns.values:
            try:
                results_dict[comp][col]
            except KeyError:
                
                
                print(comp, ' ', col)

                X,_,ysim = comp_dict[comp]
                X = X[cols_keep]

                Xy, estimator_r, params_opm_r, score_r \
                    = comp_estimator(comp, y[col].values,
                                     thres[col], pipe, params,
                                     ntest = ntest,
                                     metric = 'roc_auc')

                y_pred_train, y_pred_test, y_pred_train_p, y_pred_test_p \
                    = get_prediction(Xy, estimator_r)

                roc_auc, precision = get_scores(y_pred_train, 
                                                y_pred_test, 
                                                y_pred_train_p, 
                                                y_pred_test_p, 
                                                Xy[2], Xy[3], 
                                                show_report = show)

                if show:
                    print(params_opm_r)
                    y_pred_tot = np.concatenate((y_pred_test, 
                                                 y_pred_train))
                    plot_sim(comp, Xy[4], ntest*4, y_pred_tot, 
                             Xy[5], col)


                col_dict = {col: {'roc_auc':roc_auc, 
                                  'precision':precision,
                                  'threshold':thres[col],
                                  'opm_params':params_opm_r}}
                try:
                    results_dict[comp].update(col_dict)
                except KeyError:
                    results_dict[comp] = col_dict
                 
                print(results_dict[comp])
                np.save('opm_params_{}'.format(key), results_dict)
    return results_dict

results_dict = fit_and_report(companies, 'rf', ntest = 50, show = True)
    


Aktia Pankki A   offer_end_change
Fitting 5 folds for each of 270 candidates, totalling 1350 fits


JoblibValueError: JoblibValueError
___________________________________________________________________________
Multiprocessing exception:
...........................................................................
/usr/lib/python3.5/runpy.py in _run_module_as_main(mod_name='ipykernel_launcher', alter_argv=1)
    179         sys.exit(msg)
    180     main_globals = sys.modules["__main__"].__dict__
    181     if alter_argv:
    182         sys.argv[0] = mod_spec.origin
    183     return _run_code(code, main_globals, None,
--> 184                      "__main__", mod_spec)
        mod_spec = ModuleSpec(name='ipykernel_launcher', loader=<_f...b/python3.5/dist-packages/ipykernel_launcher.py')
    185 
    186 def run_module(mod_name, init_globals=None,
    187                run_name=None, alter_sys=False):
    188     """Execute a module's code without importing it

...........................................................................
/usr/lib/python3.5/runpy.py in _run_code(code=<code object <module> at 0x7f73c6c829c0, file "/...3.5/dist-packages/ipykernel_launcher.py", line 5>, run_globals={'__builtins__': <module 'builtins' (built-in)>, '__cached__': '/usr/local/lib/python3.5/dist-packages/__pycache__/ipykernel_launcher.cpython-35.pyc', '__doc__': 'Entry point for launching an IPython kernel.\n\nTh...orts until\nafter removing the cwd from sys.path.\n', '__file__': '/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py', '__loader__': <_frozen_importlib_external.SourceFileLoader object>, '__name__': '__main__', '__package__': '', '__spec__': ModuleSpec(name='ipykernel_launcher', loader=<_f...b/python3.5/dist-packages/ipykernel_launcher.py'), 'app': <module 'ipykernel.kernelapp' from '/usr/local/lib/python3.5/dist-packages/ipykernel/kernelapp.py'>, 'sys': <module 'sys' (built-in)>}, init_globals=None, mod_name='__main__', mod_spec=ModuleSpec(name='ipykernel_launcher', loader=<_f...b/python3.5/dist-packages/ipykernel_launcher.py'), pkg_name='', script_name=None)
     80                        __cached__ = cached,
     81                        __doc__ = None,
     82                        __loader__ = loader,
     83                        __package__ = pkg_name,
     84                        __spec__ = mod_spec)
---> 85     exec(code, run_globals)
        code = <code object <module> at 0x7f73c6c829c0, file "/...3.5/dist-packages/ipykernel_launcher.py", line 5>
        run_globals = {'__builtins__': <module 'builtins' (built-in)>, '__cached__': '/usr/local/lib/python3.5/dist-packages/__pycache__/ipykernel_launcher.cpython-35.pyc', '__doc__': 'Entry point for launching an IPython kernel.\n\nTh...orts until\nafter removing the cwd from sys.path.\n', '__file__': '/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py', '__loader__': <_frozen_importlib_external.SourceFileLoader object>, '__name__': '__main__', '__package__': '', '__spec__': ModuleSpec(name='ipykernel_launcher', loader=<_f...b/python3.5/dist-packages/ipykernel_launcher.py'), 'app': <module 'ipykernel.kernelapp' from '/usr/local/lib/python3.5/dist-packages/ipykernel/kernelapp.py'>, 'sys': <module 'sys' (built-in)>}
     86     return run_globals
     87 
     88 def _run_module_code(code, init_globals=None,
     89                     mod_name=None, mod_spec=None,

...........................................................................
/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py in <module>()
     11     # This is added back by InteractiveShellApp.init_path()
     12     if sys.path[0] == '':
     13         del sys.path[0]
     14 
     15     from ipykernel import kernelapp as app
---> 16     app.launch_new_instance()

...........................................................................
/usr/local/lib/python3.5/dist-packages/traitlets/config/application.py in launch_instance(cls=<class 'ipykernel.kernelapp.IPKernelApp'>, argv=None, **kwargs={})
    653 
    654         If a global instance already exists, this reinitializes and starts it
    655         """
    656         app = cls.instance(**kwargs)
    657         app.initialize(argv)
--> 658         app.start()
        app.start = <bound method IPKernelApp.start of <ipykernel.kernelapp.IPKernelApp object>>
    659 
    660 #-----------------------------------------------------------------------------
    661 # utility functions, for convenience
    662 #-----------------------------------------------------------------------------

...........................................................................
/usr/local/lib/python3.5/dist-packages/ipykernel/kernelapp.py in start(self=<ipykernel.kernelapp.IPKernelApp object>)
    472             return self.subapp.start()
    473         if self.poller is not None:
    474             self.poller.start()
    475         self.kernel.start()
    476         try:
--> 477             ioloop.IOLoop.instance().start()
    478         except KeyboardInterrupt:
    479             pass
    480 
    481 launch_new_instance = IPKernelApp.launch_instance

...........................................................................
/usr/local/lib/python3.5/dist-packages/zmq/eventloop/ioloop.py in start(self=<zmq.eventloop.ioloop.ZMQIOLoop object>)
    172             )
    173         return loop
    174     
    175     def start(self):
    176         try:
--> 177             super(ZMQIOLoop, self).start()
        self.start = <bound method ZMQIOLoop.start of <zmq.eventloop.ioloop.ZMQIOLoop object>>
    178         except ZMQError as e:
    179             if e.errno == ETERM:
    180                 # quietly return on ETERM
    181                 pass

...........................................................................
/usr/local/lib/python3.5/dist-packages/tornado/ioloop.py in start(self=<zmq.eventloop.ioloop.ZMQIOLoop object>)
    883                 self._events.update(event_pairs)
    884                 while self._events:
    885                     fd, events = self._events.popitem()
    886                     try:
    887                         fd_obj, handler_func = self._handlers[fd]
--> 888                         handler_func(fd_obj, events)
        handler_func = <function wrap.<locals>.null_wrapper>
        fd_obj = <zmq.sugar.socket.Socket object>
        events = 1
    889                     except (OSError, IOError) as e:
    890                         if errno_from_exception(e) == errno.EPIPE:
    891                             # Happens when the client closes the connection
    892                             pass

...........................................................................
/usr/local/lib/python3.5/dist-packages/tornado/stack_context.py in null_wrapper(*args=(<zmq.sugar.socket.Socket object>, 1), **kwargs={})
    272         # Fast path when there are no active contexts.
    273         def null_wrapper(*args, **kwargs):
    274             try:
    275                 current_state = _state.contexts
    276                 _state.contexts = cap_contexts[0]
--> 277                 return fn(*args, **kwargs)
        args = (<zmq.sugar.socket.Socket object>, 1)
        kwargs = {}
    278             finally:
    279                 _state.contexts = current_state
    280         null_wrapper._wrapped = True
    281         return null_wrapper

...........................................................................
/usr/local/lib/python3.5/dist-packages/zmq/eventloop/zmqstream.py in _handle_events(self=<zmq.eventloop.zmqstream.ZMQStream object>, fd=<zmq.sugar.socket.Socket object>, events=1)
    435             # dispatch events:
    436             if events & IOLoop.ERROR:
    437                 gen_log.error("got POLLERR event on ZMQStream, which doesn't make sense")
    438                 return
    439             if events & IOLoop.READ:
--> 440                 self._handle_recv()
        self._handle_recv = <bound method ZMQStream._handle_recv of <zmq.eventloop.zmqstream.ZMQStream object>>
    441                 if not self.socket:
    442                     return
    443             if events & IOLoop.WRITE:
    444                 self._handle_send()

...........................................................................
/usr/local/lib/python3.5/dist-packages/zmq/eventloop/zmqstream.py in _handle_recv(self=<zmq.eventloop.zmqstream.ZMQStream object>)
    467                 gen_log.error("RECV Error: %s"%zmq.strerror(e.errno))
    468         else:
    469             if self._recv_callback:
    470                 callback = self._recv_callback
    471                 # self._recv_callback = None
--> 472                 self._run_callback(callback, msg)
        self._run_callback = <bound method ZMQStream._run_callback of <zmq.eventloop.zmqstream.ZMQStream object>>
        callback = <function wrap.<locals>.null_wrapper>
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    473                 
    474         # self.update_state()
    475         
    476 

...........................................................................
/usr/local/lib/python3.5/dist-packages/zmq/eventloop/zmqstream.py in _run_callback(self=<zmq.eventloop.zmqstream.ZMQStream object>, callback=<function wrap.<locals>.null_wrapper>, *args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    409         close our socket."""
    410         try:
    411             # Use a NullContext to ensure that all StackContexts are run
    412             # inside our blanket exception handler rather than outside.
    413             with stack_context.NullContext():
--> 414                 callback(*args, **kwargs)
        callback = <function wrap.<locals>.null_wrapper>
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    415         except:
    416             gen_log.error("Uncaught exception, closing connection.",
    417                           exc_info=True)
    418             # Close the socket on an uncaught exception from a user callback

...........................................................................
/usr/local/lib/python3.5/dist-packages/tornado/stack_context.py in null_wrapper(*args=([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],), **kwargs={})
    272         # Fast path when there are no active contexts.
    273         def null_wrapper(*args, **kwargs):
    274             try:
    275                 current_state = _state.contexts
    276                 _state.contexts = cap_contexts[0]
--> 277                 return fn(*args, **kwargs)
        args = ([<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>],)
        kwargs = {}
    278             finally:
    279                 _state.contexts = current_state
    280         null_wrapper._wrapped = True
    281         return null_wrapper

...........................................................................
/usr/local/lib/python3.5/dist-packages/ipykernel/kernelbase.py in dispatcher(msg=[<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>])
    278         if self.control_stream:
    279             self.control_stream.on_recv(self.dispatch_control, copy=False)
    280 
    281         def make_dispatcher(stream):
    282             def dispatcher(msg):
--> 283                 return self.dispatch_shell(stream, msg)
        msg = [<zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>, <zmq.sugar.frame.Frame object>]
    284             return dispatcher
    285 
    286         for s in self.shell_streams:
    287             s.on_recv(make_dispatcher(s), copy=False)

...........................................................................
/usr/local/lib/python3.5/dist-packages/ipykernel/kernelbase.py in dispatch_shell(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, msg={'buffers': [], 'content': {'allow_stdin': True, 'code': "def fit_and_report(companies, key = 'rf', ntest ...t(companies, 'rf', ntest = 50, show = True)\n    \n", 'silent': False, 'stop_on_error': True, 'store_history': True, 'user_expressions': {}}, 'header': {'date': datetime.datetime(2017, 11, 4, 16, 55, 6, 588393, tzinfo=tzutc()), 'msg_id': '26DC9D1BEC1F4A44982D62D5A3B8A4E5', 'msg_type': 'execute_request', 'session': '2B4D6E377E97435C8BEBB6731E86503B', 'username': 'username', 'version': '5.0'}, 'metadata': {}, 'msg_id': '26DC9D1BEC1F4A44982D62D5A3B8A4E5', 'msg_type': 'execute_request', 'parent_header': {}})
    230             self.log.warn("Unknown message type: %r", msg_type)
    231         else:
    232             self.log.debug("%s: %s", msg_type, msg)
    233             self.pre_handler_hook()
    234             try:
--> 235                 handler(stream, idents, msg)
        handler = <bound method Kernel.execute_request of <ipykernel.ipkernel.IPythonKernel object>>
        stream = <zmq.eventloop.zmqstream.ZMQStream object>
        idents = [b'2B4D6E377E97435C8BEBB6731E86503B']
        msg = {'buffers': [], 'content': {'allow_stdin': True, 'code': "def fit_and_report(companies, key = 'rf', ntest ...t(companies, 'rf', ntest = 50, show = True)\n    \n", 'silent': False, 'stop_on_error': True, 'store_history': True, 'user_expressions': {}}, 'header': {'date': datetime.datetime(2017, 11, 4, 16, 55, 6, 588393, tzinfo=tzutc()), 'msg_id': '26DC9D1BEC1F4A44982D62D5A3B8A4E5', 'msg_type': 'execute_request', 'session': '2B4D6E377E97435C8BEBB6731E86503B', 'username': 'username', 'version': '5.0'}, 'metadata': {}, 'msg_id': '26DC9D1BEC1F4A44982D62D5A3B8A4E5', 'msg_type': 'execute_request', 'parent_header': {}}
    236             except Exception:
    237                 self.log.error("Exception in message handler:", exc_info=True)
    238             finally:
    239                 self.post_handler_hook()

...........................................................................
/usr/local/lib/python3.5/dist-packages/ipykernel/kernelbase.py in execute_request(self=<ipykernel.ipkernel.IPythonKernel object>, stream=<zmq.eventloop.zmqstream.ZMQStream object>, ident=[b'2B4D6E377E97435C8BEBB6731E86503B'], parent={'buffers': [], 'content': {'allow_stdin': True, 'code': "def fit_and_report(companies, key = 'rf', ntest ...t(companies, 'rf', ntest = 50, show = True)\n    \n", 'silent': False, 'stop_on_error': True, 'store_history': True, 'user_expressions': {}}, 'header': {'date': datetime.datetime(2017, 11, 4, 16, 55, 6, 588393, tzinfo=tzutc()), 'msg_id': '26DC9D1BEC1F4A44982D62D5A3B8A4E5', 'msg_type': 'execute_request', 'session': '2B4D6E377E97435C8BEBB6731E86503B', 'username': 'username', 'version': '5.0'}, 'metadata': {}, 'msg_id': '26DC9D1BEC1F4A44982D62D5A3B8A4E5', 'msg_type': 'execute_request', 'parent_header': {}})
    394         if not silent:
    395             self.execution_count += 1
    396             self._publish_execute_input(code, parent, self.execution_count)
    397 
    398         reply_content = self.do_execute(code, silent, store_history,
--> 399                                         user_expressions, allow_stdin)
        user_expressions = {}
        allow_stdin = True
    400 
    401         # Flush output before sending the reply.
    402         sys.stdout.flush()
    403         sys.stderr.flush()

...........................................................................
/usr/local/lib/python3.5/dist-packages/ipykernel/ipkernel.py in do_execute(self=<ipykernel.ipkernel.IPythonKernel object>, code="def fit_and_report(companies, key = 'rf', ntest ...t(companies, 'rf', ntest = 50, show = True)\n    \n", silent=False, store_history=True, user_expressions={}, allow_stdin=True)
    191 
    192         self._forward_input(allow_stdin)
    193 
    194         reply_content = {}
    195         try:
--> 196             res = shell.run_cell(code, store_history=store_history, silent=silent)
        res = undefined
        shell.run_cell = <bound method ZMQInteractiveShell.run_cell of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = "def fit_and_report(companies, key = 'rf', ntest ...t(companies, 'rf', ntest = 50, show = True)\n    \n"
        store_history = True
        silent = False
    197         finally:
    198             self._restore_input()
    199 
    200         if res.error_before_exec is not None:

...........................................................................
/usr/local/lib/python3.5/dist-packages/ipykernel/zmqshell.py in run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, *args=("def fit_and_report(companies, key = 'rf', ntest ...t(companies, 'rf', ntest = 50, show = True)\n    \n",), **kwargs={'silent': False, 'store_history': True})
    528             )
    529         self.payload_manager.write_payload(payload)
    530 
    531     def run_cell(self, *args, **kwargs):
    532         self._last_traceback = None
--> 533         return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
        self.run_cell = <bound method ZMQInteractiveShell.run_cell of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        args = ("def fit_and_report(companies, key = 'rf', ntest ...t(companies, 'rf', ntest = 50, show = True)\n    \n",)
        kwargs = {'silent': False, 'store_history': True}
    534 
    535     def _showtraceback(self, etype, evalue, stb):
    536         # try to preserve ordering of tracebacks and print statements
    537         sys.stdout.flush()

...........................................................................
/usr/local/lib/python3.5/dist-packages/IPython/core/interactiveshell.py in run_cell(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, raw_cell="def fit_and_report(companies, key = 'rf', ntest ...t(companies, 'rf', ntest = 50, show = True)\n    \n", store_history=True, silent=False, shell_futures=True)
   2693                 self.displayhook.exec_result = result
   2694 
   2695                 # Execute the user code
   2696                 interactivity = "none" if silent else self.ast_node_interactivity
   2697                 has_raised = self.run_ast_nodes(code_ast.body, cell_name,
-> 2698                    interactivity=interactivity, compiler=compiler, result=result)
        interactivity = 'last_expr'
        compiler = <IPython.core.compilerop.CachingCompiler object>
   2699                 
   2700                 self.last_execution_succeeded = not has_raised
   2701 
   2702                 # Reset this so later displayed values do not modify the

...........................................................................
/usr/local/lib/python3.5/dist-packages/IPython/core/interactiveshell.py in run_ast_nodes(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, nodelist=[<_ast.FunctionDef object>, <_ast.Assign object>], cell_name='<ipython-input-48-0c6de7a59909>', interactivity='none', compiler=<IPython.core.compilerop.CachingCompiler object>, result=<ExecutionResult object at 7f7384284e10, executi..._before_exec=None error_in_exec=None result=None>)
   2797 
   2798         try:
   2799             for i, node in enumerate(to_run_exec):
   2800                 mod = ast.Module([node])
   2801                 code = compiler(mod, cell_name, "exec")
-> 2802                 if self.run_code(code, result):
        self.run_code = <bound method InteractiveShell.run_code of <ipykernel.zmqshell.ZMQInteractiveShell object>>
        code = <code object <module> at 0x7f7382ce8f60, file "<ipython-input-48-0c6de7a59909>", line 62>
        result = <ExecutionResult object at 7f7384284e10, executi..._before_exec=None error_in_exec=None result=None>
   2803                     return True
   2804 
   2805             for i, node in enumerate(to_run_interactive):
   2806                 mod = ast.Interactive([node])

...........................................................................
/usr/local/lib/python3.5/dist-packages/IPython/core/interactiveshell.py in run_code(self=<ipykernel.zmqshell.ZMQInteractiveShell object>, code_obj=<code object <module> at 0x7f7382ce8f60, file "<ipython-input-48-0c6de7a59909>", line 62>, result=<ExecutionResult object at 7f7384284e10, executi..._before_exec=None error_in_exec=None result=None>)
   2857         outflag = True  # happens in more places, so it's easier as default
   2858         try:
   2859             try:
   2860                 self.hooks.pre_run_code_hook()
   2861                 #rprint('Running code', repr(code_obj)) # dbg
-> 2862                 exec(code_obj, self.user_global_ns, self.user_ns)
        code_obj = <code object <module> at 0x7f7382ce8f60, file "<ipython-input-48-0c6de7a59909>", line 62>
        self.user_global_ns = {'GradientBoostingClassifier': <class 'sklearn.ensemble.gradient_boosting.GradientBoostingClassifier'>, 'GridSearchCV': <class 'sklearn.model_selection._search.GridSearchCV'>, 'In': ['', 'import numpy as np\nimport pandas as pd\nimport ma...el_selection import GridSearchCV, StratifiedKFold', 'df, companies = get_companies_list(2)\n# learner ...t[comp] = get_X_y(df, comp, nfut, nhist, totHist)', "X_orig, y, ysim = comp_dict[comp]\nprint(X_orig.c...d_change', 'sale_low_change', 'sale_high_change']", 'def plot_sim(comp, ysim, ndays, *args):\n    \n   ...plt.title(comp, fontsize = 14)\n    plt.show()\n   ', "pipe_rf = Pipeline([('pol', PolynomialFeatures(d...key == 'gbm':\n        return pipe_gbm, params_gbm", 'def comp_estimator(comp, y, threshold, pipe, par...train, y_pred_test, y_pred_train_p, y_pred_test_p', "# optimal parameters:\nkey = 'rf'\n#opm_params = n...col)\n    \n    return pred_df\n\nget_prediction_df()", "# optimal parameters:\nkey = 'rf'\n#opm_params = n...col)\n    \n    return pred_df\n\nget_prediction_df()", "# optimal parameters:\nkey = 'rf'\n#opm_params = n...col)\n    \n    return pred_df\n\nget_prediction_df()", "# optimal parameters:\nkey = 'rf'\n#opm_params = n...rn pred_df\n\nget_prediction_df()['Aktia Pankki A']", "# optimal parameters:\nkey = 'rf'\n#opm_params = n...rn pred_df\n\nget_prediction_df()['Aktia Pankki A']", "# optimal parameters:\nkey = 'rf'\n#opm_params = n...col)\n    \n    return pred_df\n\nget_prediction_df()", "# optimal parameters:\nkey = 'rf'\n#opm_params = n...col)\n    \n    return pred_df\n\nget_prediction_df()", "# optimal parameters:\nkey = 'rf'\n#opm_params = n...col)\n    \n    return pred_df\n\nget_prediction_df()", "# optimal parameters:\nkey = 'rf'\n#opm_params = n...col)\n    \n    return pred_df\n\nget_prediction_df()", "# optimal parameters:\nkey = 'rf'\n#opm_params = n...col)\n    \n    return pred_df\n\nget_prediction_df()", "# optimal parameters:\nkey = 'rf'\n#opm_params = n...)\n    \n    return pred_df\n\nget_prediction_df(100)", "# optimal parameters:\nkey = 'rf'\n#opm_params = n...)\n    \n    return pred_df\n\nget_prediction_df(100)", "# optimal parameters:\nkey = 'rf'\n#opm_params = n...)\n    \n    return pred_df\n\nget_prediction_df(100)", ...], 'Out': {7:    (Aktia Pankki A, offer_end_change)  (Aktia Pa...                   0.0  

[10 rows x 244 columns], 9:    (Aktia Pankki A, offer_end_change)  (Aktia Pa...                   0.0  

[10 rows x 244 columns], 12:    (Aktia Pankki A, offer_end_change)  (Aktia Pa...                   0.0  

[10 rows x 244 columns], 13:    (Aktia Pankki A, offer_end_change)  (Aktia Pa...                   0.0  

[10 rows x 244 columns], 14:    (Aktia Pankki A, offer_end_change)  (Aktia Pa...                   0.0  

[10 rows x 244 columns], 15:     Aktia Pankki A                              ...0  
9              0.0  

[10 rows x 244 columns], 22:     Aktia Pankki A                              ...0  
9              0.0  

[10 rows x 244 columns], 26:              Aktia Pankki A                     ...   NaN            NaN  

[100 rows x 366 columns], 27:              Aktia Pankki A                     ...   NaN            NaN  

[100 rows x 366 columns], 28:             offer_end_change  sale_low_change+1 ...    NaN             NaN  

[100 rows x 6 columns], ...}, 'PCA': <class 'sklearn.decomposition.pca.PCA'>, 'Pipeline': <class 'sklearn.pipeline.Pipeline'>, 'PolynomialFeatures': <class 'sklearn.preprocessing.data.PolynomialFeatures'>, 'RandomForestClassifier': <class 'sklearn.ensemble.forest.RandomForestClassifier'>, 'SelectKBest': <class 'sklearn.feature_selection.univariate_selection.SelectKBest'>, 'StandardScaler': <class 'sklearn.preprocessing.data.StandardScaler'>, ...}
        self.user_ns = {'GradientBoostingClassifier': <class 'sklearn.ensemble.gradient_boosting.GradientBoostingClassifier'>, 'GridSearchCV': <class 'sklearn.model_selection._search.GridSearchCV'>, 'In': ['', 'import numpy as np\nimport pandas as pd\nimport ma...el_selection import GridSearchCV, StratifiedKFold', 'df, companies = get_companies_list(2)\n# learner ...t[comp] = get_X_y(df, comp, nfut, nhist, totHist)', "X_orig, y, ysim = comp_dict[comp]\nprint(X_orig.c...d_change', 'sale_low_change', 'sale_high_change']", 'def plot_sim(comp, ysim, ndays, *args):\n    \n   ...plt.title(comp, fontsize = 14)\n    plt.show()\n   ', "pipe_rf = Pipeline([('pol', PolynomialFeatures(d...key == 'gbm':\n        return pipe_gbm, params_gbm", 'def comp_estimator(comp, y, threshold, pipe, par...train, y_pred_test, y_pred_train_p, y_pred_test_p', "# optimal parameters:\nkey = 'rf'\n#opm_params = n...col)\n    \n    return pred_df\n\nget_prediction_df()", "# optimal parameters:\nkey = 'rf'\n#opm_params = n...col)\n    \n    return pred_df\n\nget_prediction_df()", "# optimal parameters:\nkey = 'rf'\n#opm_params = n...col)\n    \n    return pred_df\n\nget_prediction_df()", "# optimal parameters:\nkey = 'rf'\n#opm_params = n...rn pred_df\n\nget_prediction_df()['Aktia Pankki A']", "# optimal parameters:\nkey = 'rf'\n#opm_params = n...rn pred_df\n\nget_prediction_df()['Aktia Pankki A']", "# optimal parameters:\nkey = 'rf'\n#opm_params = n...col)\n    \n    return pred_df\n\nget_prediction_df()", "# optimal parameters:\nkey = 'rf'\n#opm_params = n...col)\n    \n    return pred_df\n\nget_prediction_df()", "# optimal parameters:\nkey = 'rf'\n#opm_params = n...col)\n    \n    return pred_df\n\nget_prediction_df()", "# optimal parameters:\nkey = 'rf'\n#opm_params = n...col)\n    \n    return pred_df\n\nget_prediction_df()", "# optimal parameters:\nkey = 'rf'\n#opm_params = n...col)\n    \n    return pred_df\n\nget_prediction_df()", "# optimal parameters:\nkey = 'rf'\n#opm_params = n...)\n    \n    return pred_df\n\nget_prediction_df(100)", "# optimal parameters:\nkey = 'rf'\n#opm_params = n...)\n    \n    return pred_df\n\nget_prediction_df(100)", "# optimal parameters:\nkey = 'rf'\n#opm_params = n...)\n    \n    return pred_df\n\nget_prediction_df(100)", ...], 'Out': {7:    (Aktia Pankki A, offer_end_change)  (Aktia Pa...                   0.0  

[10 rows x 244 columns], 9:    (Aktia Pankki A, offer_end_change)  (Aktia Pa...                   0.0  

[10 rows x 244 columns], 12:    (Aktia Pankki A, offer_end_change)  (Aktia Pa...                   0.0  

[10 rows x 244 columns], 13:    (Aktia Pankki A, offer_end_change)  (Aktia Pa...                   0.0  

[10 rows x 244 columns], 14:    (Aktia Pankki A, offer_end_change)  (Aktia Pa...                   0.0  

[10 rows x 244 columns], 15:     Aktia Pankki A                              ...0  
9              0.0  

[10 rows x 244 columns], 22:     Aktia Pankki A                              ...0  
9              0.0  

[10 rows x 244 columns], 26:              Aktia Pankki A                     ...   NaN            NaN  

[100 rows x 366 columns], 27:              Aktia Pankki A                     ...   NaN            NaN  

[100 rows x 366 columns], 28:             offer_end_change  sale_low_change+1 ...    NaN             NaN  

[100 rows x 6 columns], ...}, 'PCA': <class 'sklearn.decomposition.pca.PCA'>, 'Pipeline': <class 'sklearn.pipeline.Pipeline'>, 'PolynomialFeatures': <class 'sklearn.preprocessing.data.PolynomialFeatures'>, 'RandomForestClassifier': <class 'sklearn.ensemble.forest.RandomForestClassifier'>, 'SelectKBest': <class 'sklearn.feature_selection.univariate_selection.SelectKBest'>, 'StandardScaler': <class 'sklearn.preprocessing.data.StandardScaler'>, ...}
   2863             finally:
   2864                 # Reset our crash handler in place
   2865                 sys.excepthook = old_excepthook
   2866         except SystemExit as e:

...........................................................................
/home/topiko/Workspace/FinStk2/<ipython-input-48-0c6de7a59909> in <module>()
     57                  
     58                 print(results_dict[comp])
     59                 np.save('opm_params_{}'.format(key), results_dict)
     60     return results_dict
     61 
---> 62 results_dict = fit_and_report(companies, 'rf', ntest = 50, show = True)
     63     

...........................................................................
/home/topiko/Workspace/FinStk2/<ipython-input-48-0c6de7a59909> in fit_and_report(companies=['Aktia Pankki A', 'Alma Media', 'Amer Sports A', 'Aspo', 'Atria A', 'Basware', 'Bittium', 'CapMan', 'Cargotec', 'Citycon', 'Cramo', 'Elisa', 'F-Secure', 'Finnair', 'Fiskars', 'Fortum', 'Glaston', 'HKScan A', 'Huhtamäki', 'Kemira', ...], key='rf', ntest=50, show=True)
     25                 X = X[cols_keep]
     26 
     27                 Xy, estimator_r, params_opm_r, score_r                     = comp_estimator(comp, y[col].values,
     28                                      thres[col], pipe, params,
     29                                      ntest = ntest,
---> 30                                      metric = 'roc_auc')
     31 
     32                 y_pred_train, y_pred_test, y_pred_train_p, y_pred_test_p                     = get_prediction(Xy, estimator_r)
     33 
     34                 roc_auc, precision = get_scores(y_pred_train, 

...........................................................................
/home/topiko/Workspace/FinStk2/<ipython-input-41-3feac741d67b> in comp_estimator(comp='Aktia Pankki A', y=array([ 0.00106952, -0.00106724,  0.00107066, ...,  0.01620029,
       -0.02305476,  0.00147493]), threshold=0.01, pipe=Pipeline(memory=None,
     steps=[('pol', Polyno...None, verbose=0,
            warm_start=False))]), params=[{'pca__n_components': array([40, 50, 60, 70, 80, 90]), 'rf__max_depth': [5, 10, 20], 'rf__max_features': array([ 5, 10, 15, 20, 25]), 'rf__n_estimators': [20, 50, 100]}], ntest=50, metric='roc_auc')
     11     Xy = [X_train, X_test, y_train, y_test, ysim, y_bin]
     12     
     13     
     14     grid = GridSearchCV(pipe, params, scoring = metric, n_jobs = 5, 
     15                         cv = splitter, verbose = 1)
---> 16     grid.fit(X_train, y_train)
     17     
     18     return Xy, grid.best_estimator_, grid.best_params_, grid.best_score_
     19 
     20 

...........................................................................
/usr/local/lib/python3.5/dist-packages/sklearn/model_selection/_search.py in fit(self=GridSearchCV(cv=StratifiedKFold(n_splits=5, rand..._score=True,
       scoring='roc_auc', verbose=1), X=            change_Me_-01  change_Me_-02  change...06.2013     -0.011364  

[1045 rows x 40 columns], y=array([ True,  True, False, ...,  True, False, False], dtype=bool), groups=None, **fit_params={})
    633                                   return_train_score=self.return_train_score,
    634                                   return_n_test_samples=True,
    635                                   return_times=True, return_parameters=False,
    636                                   error_score=self.error_score)
    637           for parameters, (train, test) in product(candidate_params,
--> 638                                                    cv.split(X, y, groups)))
        cv.split = <bound method StratifiedKFold.split of StratifiedKFold(n_splits=5, random_state=None, shuffle=True)>
        X =             change_Me_-01  change_Me_-02  change...06.2013     -0.011364  

[1045 rows x 40 columns]
        y = array([ True,  True, False, ...,  True, False, False], dtype=bool)
        groups = None
    639 
    640         # if one choose to see train score, "out" will contain train score info
    641         if self.return_train_score:
    642             (train_score_dicts, test_score_dicts, test_sample_counts, fit_time,

...........................................................................
/usr/local/lib/python3.5/dist-packages/sklearn/externals/joblib/parallel.py in __call__(self=Parallel(n_jobs=5), iterable=<generator object BaseSearchCV.fit.<locals>.<genexpr>>)
    784             if pre_dispatch == "all" or n_jobs == 1:
    785                 # The iterable was consumed all at once by the above for loop.
    786                 # No need to wait for async callbacks to trigger to
    787                 # consumption.
    788                 self._iterating = False
--> 789             self.retrieve()
        self.retrieve = <bound method Parallel.retrieve of Parallel(n_jobs=5)>
    790             # Make sure that we get a last message telling us we are done
    791             elapsed_time = time.time() - self._start_time
    792             self._print('Done %3i out of %3i | elapsed: %s finished',
    793                         (len(self._output), len(self._output),

---------------------------------------------------------------------------
Sub-process traceback:
---------------------------------------------------------------------------
ValueError                                         Sat Nov  4 17:55:06 2017
PID: 8761                                    Python 3.5.2: /usr/bin/python3
...........................................................................
/usr/local/lib/python3.5/dist-packages/sklearn/externals/joblib/parallel.py in __call__(self=<sklearn.externals.joblib.parallel.BatchedCalls object>)
    126     def __init__(self, iterator_slice):
    127         self.items = list(iterator_slice)
    128         self._size = len(self.items)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
        self.items = [(<function _fit_and_score>, (Pipeline(memory=None,
     steps=[('pol', Polyno...None, verbose=0,
            warm_start=False))]),             change_Me_-01  change_Me_-02  change...06.2013     -0.011364  

[1045 rows x 40 columns], array([ True,  True, False, ...,  True, False, False], dtype=bool), {'score': make_scorer(roc_auc_score, needs_threshold=True)}, array([   0,    1,    2,    3,    4,    5,    6,... 1036, 1037, 1039, 1040, 1041, 1042, 1043, 1044]), array([   7,   10,   17,   18,   21,   25,   29,... 998, 1003, 1005, 1007, 1013, 1018,
       1038]), 1, {'pca__n_components': 40, 'rf__max_depth': 5, 'rf__max_features': 5, 'rf__n_estimators': 20}), {'error_score': 'raise', 'fit_params': {}, 'return_n_test_samples': True, 'return_parameters': False, 'return_times': True, 'return_train_score': True})]
    132 
    133     def __len__(self):
    134         return self._size
    135 

...........................................................................
/usr/local/lib/python3.5/dist-packages/sklearn/externals/joblib/parallel.py in <listcomp>(.0=<list_iterator object>)
    126     def __init__(self, iterator_slice):
    127         self.items = list(iterator_slice)
    128         self._size = len(self.items)
    129 
    130     def __call__(self):
--> 131         return [func(*args, **kwargs) for func, args, kwargs in self.items]
        func = <function _fit_and_score>
        args = (Pipeline(memory=None,
     steps=[('pol', Polyno...None, verbose=0,
            warm_start=False))]),             change_Me_-01  change_Me_-02  change...06.2013     -0.011364  

[1045 rows x 40 columns], array([ True,  True, False, ...,  True, False, False], dtype=bool), {'score': make_scorer(roc_auc_score, needs_threshold=True)}, array([   0,    1,    2,    3,    4,    5,    6,... 1036, 1037, 1039, 1040, 1041, 1042, 1043, 1044]), array([   7,   10,   17,   18,   21,   25,   29,... 998, 1003, 1005, 1007, 1013, 1018,
       1038]), 1, {'pca__n_components': 40, 'rf__max_depth': 5, 'rf__max_features': 5, 'rf__n_estimators': 20})
        kwargs = {'error_score': 'raise', 'fit_params': {}, 'return_n_test_samples': True, 'return_parameters': False, 'return_times': True, 'return_train_score': True}
    132 
    133     def __len__(self):
    134         return self._size
    135 

...........................................................................
/usr/local/lib/python3.5/dist-packages/sklearn/model_selection/_validation.py in _fit_and_score(estimator=Pipeline(memory=None,
     steps=[('pol', Polyno...None, verbose=0,
            warm_start=False))]), X=            change_Me_-01  change_Me_-02  change...06.2013     -0.011364  

[1045 rows x 40 columns], y=array([ True,  True, False, ...,  True, False, False], dtype=bool), scorer={'score': make_scorer(roc_auc_score, needs_threshold=True)}, train=array([   0,    1,    2,    3,    4,    5,    6,... 1036, 1037, 1039, 1040, 1041, 1042, 1043, 1044]), test=array([   7,   10,   17,   18,   21,   25,   29,... 998, 1003, 1005, 1007, 1013, 1018,
       1038]), verbose=1, parameters={'pca__n_components': 40, 'rf__max_depth': 5, 'rf__max_features': 5, 'rf__n_estimators': 20}, fit_params={}, return_train_score=True, return_parameters=False, return_n_test_samples=True, return_times=True, error_score='raise')
    418                       for k, v in fit_params.items()])
    419 
    420     test_scores = {}
    421     train_scores = {}
    422     if parameters is not None:
--> 423         estimator.set_params(**parameters)
        estimator.set_params = <bound method Pipeline.set_params of Pipeline(me...one, verbose=0,
            warm_start=False))])>
        parameters = {'pca__n_components': 40, 'rf__max_depth': 5, 'rf__max_features': 5, 'rf__n_estimators': 20}
    424 
    425     start_time = time.time()
    426 
    427     X_train, y_train = _safe_split(estimator, X, y, train)

...........................................................................
/usr/local/lib/python3.5/dist-packages/sklearn/pipeline.py in set_params(self=Pipeline(memory=None,
     steps=[('pol', Polyno...None, verbose=0,
            warm_start=False))]), **kwargs={'pca__n_components': 40, 'rf__max_depth': 5, 'rf__max_features': 5, 'rf__n_estimators': 20})
    139 
    140         Returns
    141         -------
    142         self
    143         """
--> 144         self._set_params('steps', **kwargs)
        self._set_params = <bound method _BaseComposition._set_params of Pi...one, verbose=0,
            warm_start=False))])>
        kwargs = {'pca__n_components': 40, 'rf__max_depth': 5, 'rf__max_features': 5, 'rf__n_estimators': 20}
    145         return self
    146 
    147     def _validate_steps(self):
    148         names, estimators = zip(*self.steps)

...........................................................................
/usr/local/lib/python3.5/dist-packages/sklearn/utils/metaestimators.py in _set_params(self=Pipeline(memory=None,
     steps=[('pol', Polyno...None, verbose=0,
            warm_start=False))]), attr='steps', **params={'pca__n_components': 40, 'rf__max_depth': 5, 'rf__max_features': 5, 'rf__n_estimators': 20})
     44         names, _ = zip(*getattr(self, attr))
     45         for name in list(six.iterkeys(params)):
     46             if '__' not in name and name in names:
     47                 self._replace_estimator(attr, name, params.pop(name))
     48         # 3. Step parameters and other initilisation arguments
---> 49         super(_BaseComposition, self).set_params(**params)
        self.set_params = <bound method Pipeline.set_params of Pipeline(me...one, verbose=0,
            warm_start=False))])>
        params = {'pca__n_components': 40, 'rf__max_depth': 5, 'rf__max_features': 5, 'rf__n_estimators': 20}
     50         return self
     51 
     52     def _replace_estimator(self, attr, name, new_val):
     53         # assumes `name` is a valid estimator name

...........................................................................
/usr/local/lib/python3.5/dist-packages/sklearn/base.py in set_params(self=Pipeline(memory=None,
     steps=[('pol', Polyno...None, verbose=0,
            warm_start=False))]), **params={'pca__n_components': 40, 'rf__max_depth': 5, 'rf__max_features': 5, 'rf__n_estimators': 20})
    269                 name, sub_name = split
    270                 if name not in valid_params:
    271                     raise ValueError('Invalid parameter %s for estimator %s. '
    272                                      'Check the list of available parameters '
    273                                      'with `estimator.get_params().keys()`.' %
--> 274                                      (name, self))
        name = 'pca'
        self = Pipeline(memory=None,
     steps=[('pol', Polyno...None, verbose=0,
            warm_start=False))])
    275                 sub_object = valid_params[name]
    276                 sub_object.set_params(**{sub_name: value})
    277             else:
    278                 # simple objects case

ValueError: Invalid parameter pca for estimator Pipeline(memory=None,
     steps=[('pol', PolynomialFeatures(degree=2, include_bias=True, interaction_only=True)), ('var', VarianceThreshold(threshold=0.0)), ('rf', RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=5, max_features=5, max_leaf_nodes=None,
            min_impurity...n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False))]). Check the list of available parameters with `estimator.get_params().keys()`.
___________________________________________________________________________

## Then test the obtained results:
### First make the prediction dataframe

In [35]:
# optimal parameters:
key = 'rf'
#opm_params = np.load('opm_params_{}.npy'.format(key)).item()

def get_predictions(ntest, comp, col):
    
    X, y, ysim = comp_dict[comp]
    print(X)
    X = X[cols_keep]
    
    y_bin = opm_params[comp][col]['threshold'] < y[col]    
    X_test, y_test = X[:ntest], y_bin[:ntest]
    X_train, y_train = X[ntest:], y_bin[ntest:]
    
    pipe, _ = get_pipe(key)
    pipe.set_params(**opm_params[comp][col]['opm_params']) 
    pipe.fit(X_train, y_train)
    
    return pipe.predict(X_test)

def get_prediction_df(ntest):
    idx_tuples = [(comp, col) for comp in companies for col in y.columns]
    index = pd.MultiIndex.from_tuples(idx_tuples)
    
    
    pred_df = pd.DataFrame(index = comp_dict['Alma Media'][0].index[:ntest], columns = index)
    
    for comp in companies:
        ysim = comp_dict[comp][-1]

        for col in y.columns:
            pred_df[(comp, col)] = np.zeros(ntest) #get_predictions(ntest, comp, col)
        
        pred_df[(comp, 'sales_low_000')] = ysim['sales_low_000']
        pred_df[(comp, 'sales_high_{:03d}'.format(nfut-1))] = ysim['sales_high_{:03d}'.format(nfut-1)]
    pred_df.index = comp_dict[comp][0].index.values[:ntest]
    return pred_df

get_prediction_df(100) #['Aktia Pankki A']

Unnamed: 0_level_0,Aktia Pankki A,Aktia Pankki A,Aktia Pankki A,Aktia Pankki A,Alma Media,Alma Media,Alma Media,Alma Media,Amer Sports A,Amer Sports A,...,UPM-Kymmene,UPM-Kymmene,Uponor,Uponor,Vaisala A,Vaisala A,Wärtsilä,Wärtsilä,YIT,YIT
Unnamed: 0_level_1,offer_end_change,sale_low_change+1,sale_low_change,sale_high_change,offer_end_change,sale_low_change+1,sale_low_change,sale_high_change,offer_end_change,sale_low_change+1,...,sales_low_000,sales_high_001,sales_low_000,sales_high_001,sales_low_000,sales_high_001,sales_low_000,sales_high_001,sales_low_000,sales_high_001
16.10.2017,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,23.82,23.82,14.83,15.05,43.23,44.70,60.35,62.30,7.04,7.08
13.10.2017,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,23.91,24.03,14.81,14.99,43.85,44.40,59.30,62.00,7.02,7.14
12.10.2017,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,23.83,24.22,14.76,14.95,43.95,44.00,58.75,60.75,7.04,7.10
11.10.2017,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,23.70,24.23,14.76,14.94,44.00,44.19,58.95,59.40,7.10,7.14
10.10.2017,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,23.72,23.87,14.74,14.92,43.34,44.42,59.00,59.60,7.10,7.14
09.10.2017,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,23.54,23.98,14.63,14.88,44.17,45.00,58.85,59.50,7.08,7.16
06.10.2017,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,23.63,23.93,14.66,14.79,44.43,45.00,58.60,59.65,7.14,7.18
05.10.2017,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,23.69,24.07,14.52,14.78,43.91,45.45,60.45,59.75,7.09,7.29
04.10.2017,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,23.40,23.95,14.51,14.76,43.80,45.00,60.60,61.00,6.94,7.24
03.10.2017,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,23.21,23.80,14.61,14.69,43.30,44.88,60.05,61.30,6.98,7.06
