shuffle between layers if asked

flennerhag · Sep 12, 2017 · ada1134 · ada1134
1 parent a88bc6a
commit ada1134
Show file tree

Hide file tree

Showing 2 changed files with 22 additions and 17 deletions.
diff --git a/mlens/ensemble/base.py b/mlens/ensemble/base.py
@@ -16,7 +16,6 @@
 from ..base import INDEXERS
 from ..parallel import ParallelProcessing
 from ..externals.sklearn.base import BaseEstimator
-from ..externals.sklearn.validation import check_random_state
 from ..utils import assert_correct_format, check_ensemble_build, \
     check_inputs, check_instances, print_time, safe_print
 try:
@@ -94,7 +93,6 @@ class LayerContainer(BaseEstimator):
 
         If ``verbose >= 50`` prints to ``sys.stdout``, else ``sys.stderr``.
         For verbosity in the layers themselves, use ``fit_params``.
-
     """
 
     def __init__(self,
@@ -531,6 +529,9 @@ class Layer(BaseEstimator):
         If ``verbose >= 50`` prints to ``sys.stdout``, else ``sys.stderr``.
         For verbosity in the layers themselves, use ``fit_params``.
 
+    shuffle : bool (default = False)
+        Whether to shuffle data before fitting layer.
+
     dtype : numpy dtype class, default = :class:`numpy.float32`
         dtype format of prediction array.
 
@@ -559,6 +560,7 @@ def __init__(self,
                  scorer=None,
                  raise_on_exception=False,
                  name=None,
+                 shuffle=False,
                  dtype=None,
                  verbose=False,
                  cls_kwargs=None):
@@ -578,6 +580,7 @@ def __init__(self,
         self.scorer = scorer
         self.raise_on_exception = raise_on_exception
         self.name = name
+        self.shuffle = shuffle
         self.dtype = dtype if dtype is not None else config.DTYPE
         self.verbose = verbose
 
@@ -685,7 +688,6 @@ def __init__(self,
                  array_check=2,
                  backend=None
                  ):
-
         self.shuffle = shuffle
         self.random_state = random_state
         self.scorer = scorer
@@ -734,6 +736,7 @@ def _add(self,
         # Add layer to Layer Container
         verbose = kwargs.pop('verbose', self.verbose)
         scorer = kwargs.pop('scorer', self.scorer)
+        shuffle = kwargs.pop('shuffle', self.shuffle)
 
         if 'proba' in kwargs:
             if kwargs['proba'] and scorer is not None:
@@ -746,6 +749,7 @@ def _add(self,
                         indexer=indexer,
                         preprocessing=preprocessing,
                         scorer=scorer,
+                        shuffle=shuffle,
                         verbose=verbose,
                         **kwargs)
 
@@ -778,11 +782,6 @@ class instance with fitted estimators.
 
         X, y = check_inputs(X, y, self.array_check)
 
-        if self.shuffle:
-            r = check_random_state(self.random_state)
-            idx = r.permutation(X.shape[0])
-            X, y = X[idx], y[idx]
-
         self.scores_ = self.layers.fit(X, y)
 
         return self
@@ -806,13 +805,7 @@ def predict(self, X):
 
         X, _ = check_inputs(X, check_level=self.array_check)
 
-        if self.shuffle:
-            r = check_random_state(self.random_state)
-            idx = r.permutation(X.shape[0])
-            X = X[idx]
-
         y = self.layers.predict(X)
-
         if y.shape[1] == 1:
             # The meta estimator is treated as a layer and thus a prediction
             # matrix with shape [n_samples, 1] is created. Ravel before return

diff --git a/mlens/parallel/manager.py b/mlens/parallel/manager.py
@@ -19,6 +19,7 @@
 from . import Blender, Evaluation, SingleRun, Stacker, SubStacker
 from .. import config
 from ..externals.joblib import Parallel, dump, load
+from ..externals.sklearn.validation import check_random_state
 from ..utils import check_initialized
 from ..utils.exceptions import (ParallelProcessingError,
                                 ParallelProcessingWarning)
@@ -109,15 +110,26 @@ def __init__(self, job):
         self.tmp = None
         self.dir = None
 
-    def update(self):
-        """Shift output array to input array."""
+    def update(self, shuffle):
+        """Shift output array to input array.
+
+        Parameters
+        ----------
+        shuffle : boolean
+            whether to shuffle the new input data.
+        """
         # Enforce csr on spare matrices
         if issparse(self.predict_out) and not \
                 self.predict_out.__class__.__name__.startswith('csr'):
             self.predict_out = self.predict_out.tocsr()
 
         self.predict_in = self.predict_out
 
+        if shuffle:
+            r = check_random_state(self.random_state)
+            idx = r.permutation(self.y.shape[0])
+            self.predict_in = self.predict_in[idx]
+            self.y = self.y[idx]
 
 ###############################################################################
 class BaseProcessor(object):
@@ -242,7 +254,7 @@ def process(self):
                 self._partial_process(name, lyr, parallel)
 
                 # Update input array with output array
-                self.job.update()
+                self.job.update(lyr.shuffle)
 
     def _partial_process(self, name, lyr, parallel):
         """Generate prediction matrix for a given :class:`layer`."""