Skip to content

Commit

Permalink
shuffle between layers if asked
Browse files Browse the repository at this point in the history
  • Loading branch information
flennerhag committed Sep 12, 2017
1 parent a88bc6a commit ada1134
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 17 deletions.
21 changes: 7 additions & 14 deletions mlens/ensemble/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
from ..base import INDEXERS
from ..parallel import ParallelProcessing
from ..externals.sklearn.base import BaseEstimator
from ..externals.sklearn.validation import check_random_state
from ..utils import assert_correct_format, check_ensemble_build, \
check_inputs, check_instances, print_time, safe_print
try:
Expand Down Expand Up @@ -94,7 +93,6 @@ class LayerContainer(BaseEstimator):
If ``verbose >= 50`` prints to ``sys.stdout``, else ``sys.stderr``.
For verbosity in the layers themselves, use ``fit_params``.
"""

def __init__(self,
Expand Down Expand Up @@ -531,6 +529,9 @@ class Layer(BaseEstimator):
If ``verbose >= 50`` prints to ``sys.stdout``, else ``sys.stderr``.
For verbosity in the layers themselves, use ``fit_params``.
shuffle : bool (default = False)
Whether to shuffle data before fitting layer.
dtype : numpy dtype class, default = :class:`numpy.float32`
dtype format of prediction array.
Expand Down Expand Up @@ -559,6 +560,7 @@ def __init__(self,
scorer=None,
raise_on_exception=False,
name=None,
shuffle=False,
dtype=None,
verbose=False,
cls_kwargs=None):
Expand All @@ -578,6 +580,7 @@ def __init__(self,
self.scorer = scorer
self.raise_on_exception = raise_on_exception
self.name = name
self.shuffle = shuffle
self.dtype = dtype if dtype is not None else config.DTYPE
self.verbose = verbose

Expand Down Expand Up @@ -685,7 +688,6 @@ def __init__(self,
array_check=2,
backend=None
):

self.shuffle = shuffle
self.random_state = random_state
self.scorer = scorer
Expand Down Expand Up @@ -734,6 +736,7 @@ def _add(self,
# Add layer to Layer Container
verbose = kwargs.pop('verbose', self.verbose)
scorer = kwargs.pop('scorer', self.scorer)
shuffle = kwargs.pop('shuffle', self.shuffle)

if 'proba' in kwargs:
if kwargs['proba'] and scorer is not None:
Expand All @@ -746,6 +749,7 @@ def _add(self,
indexer=indexer,
preprocessing=preprocessing,
scorer=scorer,
shuffle=shuffle,
verbose=verbose,
**kwargs)

Expand Down Expand Up @@ -778,11 +782,6 @@ class instance with fitted estimators.

X, y = check_inputs(X, y, self.array_check)

if self.shuffle:
r = check_random_state(self.random_state)
idx = r.permutation(X.shape[0])
X, y = X[idx], y[idx]

self.scores_ = self.layers.fit(X, y)

return self
Expand All @@ -806,13 +805,7 @@ def predict(self, X):

X, _ = check_inputs(X, check_level=self.array_check)

if self.shuffle:
r = check_random_state(self.random_state)
idx = r.permutation(X.shape[0])
X = X[idx]

y = self.layers.predict(X)

if y.shape[1] == 1:
# The meta estimator is treated as a layer and thus a prediction
# matrix with shape [n_samples, 1] is created. Ravel before return
Expand Down
18 changes: 15 additions & 3 deletions mlens/parallel/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from . import Blender, Evaluation, SingleRun, Stacker, SubStacker
from .. import config
from ..externals.joblib import Parallel, dump, load
from ..externals.sklearn.validation import check_random_state
from ..utils import check_initialized
from ..utils.exceptions import (ParallelProcessingError,
ParallelProcessingWarning)
Expand Down Expand Up @@ -109,15 +110,26 @@ def __init__(self, job):
self.tmp = None
self.dir = None

def update(self):
"""Shift output array to input array."""
def update(self, shuffle):
"""Shift output array to input array.
Parameters
----------
shuffle : boolean
whether to shuffle the new input data.
"""
# Enforce csr on spare matrices
if issparse(self.predict_out) and not \
self.predict_out.__class__.__name__.startswith('csr'):
self.predict_out = self.predict_out.tocsr()

self.predict_in = self.predict_out

if shuffle:
r = check_random_state(self.random_state)
idx = r.permutation(self.y.shape[0])
self.predict_in = self.predict_in[idx]
self.y = self.y[idx]

###############################################################################
class BaseProcessor(object):
Expand Down Expand Up @@ -242,7 +254,7 @@ def process(self):
self._partial_process(name, lyr, parallel)

# Update input array with output array
self.job.update()
self.job.update(lyr.shuffle)

def _partial_process(self, name, lyr, parallel):
"""Generate prediction matrix for a given :class:`layer`."""
Expand Down

0 comments on commit ada1134

Please sign in to comment.