diff --git a/.landscape.yaml b/.landscape.yaml
new file mode 100644
index 0000000..88c19ef
--- /dev/null
+++ b/.landscape.yaml
@@ -0,0 +1,7 @@
+doc-warnings: yes
+test-warnings: yes
+strictness: veryhigh
+max-line-length: 80
+autodetect: yes
+ignore-paths:
+    - doc
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..28b86e4
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,21 @@
+language: python
+virtualenv:
+  system_site_packages: true
+env:
+  matrix:
+    - DISTRIB="conda" PYTHON_VERSION="2.7" INSTALL_MKL="false"
+      COVERAGE="true" NUMPY_VERSION="1.6.2" SCIPY_VERSION="0.11.0"
+    # This environment tests the oldest supported anaconda env
+    - DISTRIB="conda" PYTHON_VERSION="2.6" INSTALL_MKL="false"
+      NUMPY_VERSION="1.6.2" SCIPY_VERSION="0.11.0"
+    # This environment tests the newest supported anaconda env
+    - DISTRIB="conda" PYTHON_VERSION="3.4" INSTALL_MKL="true"
+      NUMPY_VERSION="1.8.1" SCIPY_VERSION="0.14.0"
+install: source continuous_integration/install.sh
+script: bash continuous_integration/test_script.sh
+after_success:
+    # Ignore coveralls failures as the coveralls server is not very reliable
+    # but we don't want travis to report a failure in the github UI just
+    # because the coverage report failed to be published.
+    - if [[ "$COVERAGE" == "true" ]]; then coveralls || echo "failed"; fi
+cache: apt
diff --git a/README.rst b/README.rst
index 8971d68..475b12c 100644
--- a/README.rst
+++ b/README.rst
@@ -1,6 +1,19 @@
 Random output trees
 ===================
 
+.. image:: https://secure.travis-ci.org/arjoly/andom-output-trees.png?branch=master
+   :target: https://secure.travis-ci.org/arjoly/random-output-trees
+   :alt: Build status
+
+.. image:: https://coveralls.io/repos/arjoly/andom-output-trees/badge.png?branch=master
+   :target: https://coveralls.io/r/arjoly/random-output-trees
+   :alt: Coverage status
+
+.. image:: https://landscape.io/github/arjoly/random-output-trees/master/landscape.svg
+   :target: https://landscape.io/github/arjoly/random-output-trees/master
+   :alt: Code Health
+
+
 Random output trees is a python package to grow decision tree ensemble on
 randomized output space. The core tree implementation is based on scikit-learn
 0.15.2. All provided estimators and transformers are scikit-learn compatible.
diff --git a/continuous_integration/install.sh b/continuous_integration/install.sh
new file mode 100644
index 0000000..1f27af3
--- /dev/null
+++ b/continuous_integration/install.sh
@@ -0,0 +1,66 @@
+#!/bin/bash
+# This script is meant to be called by the "install" step defined in
+# .travis.yml. See http://docs.travis-ci.com/ for more details.
+# The behavior of the script is controlled by environment variabled defined
+# in the .travis.yml in the top level folder of the project.
+
+# License: 3-clause BSD
+
+# This file is originally from the scikit-learn project
+
+set -e
+
+# Fix the compilers to workaround avoid having the Python 3.4 build
+# lookup for g++44 unexpectedly.
+export CC=gcc
+export CXX=g++
+
+sudo apt-get update -qq
+if [[ "$INSTALL_ATLAS" == "true" ]]; then
+    sudo apt-get install -qq libatlas3gf-base libatlas-dev
+fi
+
+if [[ "$DISTRIB" == "conda" ]]; then
+    # Deactivate the travis-provided virtual environment and setup a
+    # conda-based environment instead
+    deactivate
+
+    # Use the miniconda installer for faster download / install of conda
+    # itself
+    wget http://repo.continuum.io/miniconda/Miniconda-3.6.0-Linux-x86_64.sh \
+        -O miniconda.sh
+    chmod +x miniconda.sh && ./miniconda.sh -b
+    export PATH=/home/travis/miniconda/bin:$PATH
+    conda update --yes conda
+
+    # Configure the conda environment and put it in the path using the
+    # provided versions
+    conda create -n testenv --yes python=$PYTHON_VERSION pip nose \
+        numpy=$NUMPY_VERSION scipy=$SCIPY_VERSION
+    source activate testenv
+
+    if [[ "$INSTALL_MKL" == "true" ]]; then
+        # Make sure that MKL is used
+        conda install --yes mkl
+    else
+        # Make sure that MKL is not used
+        conda remove --yes --features mkl || echo "MKL not installed"
+    fi
+
+elif [[ "$DISTRIB" == "ubuntu" ]]; then
+    # Use standard ubuntu packages in their default version
+    sudo apt-get install -qq python-scipy python-nose python-pip
+fi
+
+if [[ "$COVERAGE" == "true" ]]; then
+    pip install coverage coveralls
+fi
+
+pip install scikit-learn
+
+
+python --version
+python -c "import numpy; print('numpy %s' % numpy.__version__)"
+python -c "import scipy; print('scipy %s' % scipy.__version__)"
+python -c "import sklearn; print('sklearn %s' % sklearn.__version__)"
+python setup.py build_ext --inplace
diff --git a/continuous_integration/test_script.sh b/continuous_integration/test_script.sh
new file mode 100644
index 0000000..7806a67
--- /dev/null
+++ b/continuous_integration/test_script.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+# This script is meant to be called by the "script" step defined in
+# .travis.yml. See http://docs.travis-ci.com/ for more details.
+# The behavior of the script is controlled by environment variabled defined
+# in the .travis.yml in the top level folder of the project.
+
+# License: 3-clause BSD
+
+# This file is originally from the scikit-learn project
+
+set -e
+
+python --version
+python -c "import numpy; print('numpy %s' % numpy.__version__)"
+python -c "import scipy; print('scipy %s' % scipy.__version__)"
+python -c "import sklearn; print('sklearn %s' % sklearn.__version__)"
+
+# Do not use "make test" or "make test-coverage" as they enable verbose mode
+# which renders travis output too slow to display in a browser.
+if [[ "$COVERAGE" == "true" ]]; then
+    nosetests -s --with-coverage random_output_trees
+else
+    nosetests -s random_output_trees
+fi
+
diff --git a/random_output_trees/_utils.py b/random_output_trees/_utils.py
new file mode 100644
index 0000000..c2365e9
--- /dev/null
+++ b/random_output_trees/_utils.py
@@ -0,0 +1,296 @@
+"""Utilities"""
+
+# Originally from sklearn.utils.validation
+# Authors: Olivier Grisel
+#          Gael Varoquaux
+#          Andreas Mueller
+#          Lars Buitinck
+#          Alexandre Gramfort
+#          Nicolas Tresegnie
+# License: BSD 3 clause
+
+import warnings
+from inspect import getargspec
+
+import numpy as np
+import scipy.sparse as sp
+
+
+class DataConversionWarning(UserWarning):
+    "A warning on implicit data conversions happening in the code"
+    pass
+
+warnings.simplefilter("always", DataConversionWarning)
+
+
+def _assert_all_finite(X):
+    """Like assert_all_finite, but only for ndarray."""
+    X = np.asanyarray(X)
+    # First try an O(n) time, O(1) space solution for the common case that
+    # everything is finite; fall back to O(n) space np.isfinite to prevent
+    # false positives from overflow in sum method.
+    if (X.dtype.char in np.typecodes['AllFloat'] and not np.isfinite(X.sum())
+            and not np.isfinite(X).all()):
+        raise ValueError("Input contains NaN, infinity"
+                         " or a value too large for %r." % X.dtype)
+
+def _num_samples(x):
+    """Return number of samples in array-like x."""
+    if not hasattr(x, '__len__') and not hasattr(x, 'shape'):
+        if hasattr(x, '__array__'):
+            x = np.asarray(x)
+        else:
+            raise TypeError("Expected sequence or array-like, got %r" % x)
+    return x.shape[0] if hasattr(x, 'shape') else len(x)
+
+
+def check_consistent_length(*arrays):
+    """Check that all arrays have consistent first dimensions.
+
+    Checks whether all objects in arrays have the same shape or length.
+
+    Parameters
+    ----------
+    arrays : list or tuple of input objects.
+        Objects that will be checked for consistent length.
+    """
+
+    uniques = np.unique([_num_samples(X) for X in arrays if X is not None])
+    if len(uniques) > 1:
+        raise ValueError("Found arrays with inconsistent numbers of samples: %s"
+                         % str(uniques))
+
+
+def _ensure_sparse_format(spmatrix, accept_sparse, dtype, order, copy,
+                          force_all_finite):
+    """Convert a sparse matrix to a given format.
+
+    Checks the sparse format of spmatrix and converts if necessary.
+
+    Parameters
+    ----------
+    spmatrix : scipy sparse matrix
+        Input to validate and convert.
+
+    accept_sparse : string, list of string or None (default=None)
+        String[s] representing allowed sparse matrix formats ('csc',
+        'csr', 'coo', 'dok', 'bsr', 'lil', 'dia'). None means that sparse
+        matrix input will raise an error.  If the input is sparse but not in
+        the allowed format, it will be converted to the first listed format.
+
+    dtype : string, type or None (default=none)
+        Data type of result. If None, the dtype of the input is preserved.
+
+    order : 'F', 'C' or None (default=None)
+        Whether an array will be forced to be fortran or c-style.
+
+    copy : boolean (default=False)
+        Whether a forced copy will be triggered. If copy=False, a copy might
+        be triggered by a conversion.
+
+    force_all_finite : boolean (default=True)
+        Whether to raise an error on np.inf and np.nan in X.
+
+    Returns
+    -------
+    spmatrix_converted : scipy sparse matrix.
+        Matrix that is ensured to have an allowed type.
+    """
+    if accept_sparse is None:
+        raise TypeError('A sparse matrix was passed, but dense '
+                        'data is required. Use X.toarray() to '
+                        'convert to a dense numpy array.')
+    sparse_type = spmatrix.format
+    if dtype is None:
+        dtype = spmatrix.dtype
+    if sparse_type in accept_sparse:
+        # correct type
+        if dtype == spmatrix.dtype:
+            # correct dtype
+            if copy:
+                spmatrix = spmatrix.copy()
+        else:
+            # convert dtype
+            spmatrix = spmatrix.astype(dtype)
+    else:
+        # create new
+        spmatrix = spmatrix.asformat(accept_sparse[0]).astype(dtype)
+    if force_all_finite:
+        if not hasattr(spmatrix, "data"):
+            warnings.warn("Can't check %s sparse matrix for nan or inf."
+                          % spmatrix.format)
+        else:
+            _assert_all_finite(spmatrix.data)
+    if hasattr(spmatrix, "data"):
+        spmatrix.data = np.array(spmatrix.data, copy=False, order=order)
+    return spmatrix
+
+
+def check_array(array, accept_sparse=None, dtype=None, order=None, copy=False,
+                force_all_finite=True, ensure_2d=True, allow_nd=False):
+    """Input validation on an array, list, sparse matrix or similar.
+
+    By default, the input is converted to an at least 2nd numpy array.
+
+    Parameters
+    ----------
+    array : object
+        Input object to check / convert.
+
+    accept_sparse : string, list of string or None (default=None)
+        String[s] representing allowed sparse matrix formats, such as 'csc',
+        'csr', etc.  None means that sparse matrix input will raise an error.
+        If the input is sparse but not in the allowed format, it will be
+        converted to the first listed format.
+
+    dtype : string, type or None (default=none)
+        Data type of result. If None, the dtype of the input is preserved.
+
+    order : 'F', 'C' or None (default=None)
+        Whether an array will be forced to be fortran or c-style.
+
+    copy : boolean (default=False)
+        Whether a forced copy will be triggered. If copy=False, a copy might
+        be triggered by a conversion.
+
+    force_all_finite : boolean (default=True)
+        Whether to raise an error on np.inf and np.nan in X.
+
+    ensure_2d : boolean (default=True)
+        Whether to make X at least 2d.
+
+    allow_nd : boolean (default=False)
+        Whether to allow X.ndim > 2.
+
+    Returns
+    -------
+    X_converted : object
+        The converted and validated X.
+    """
+    if isinstance(accept_sparse, str):
+        accept_sparse = [accept_sparse]
+
+    if sp.issparse(array):
+        array = _ensure_sparse_format(array, accept_sparse, dtype, order,
+                                      copy, force_all_finite)
+    else:
+        if ensure_2d:
+            array = np.atleast_2d(array)
+        array = np.array(array, dtype=dtype, order=order, copy=copy)
+        if not allow_nd and array.ndim >= 3:
+            raise ValueError("Found array with dim %d. Expected <= 2" %
+                             array.ndim)
+        if force_all_finite:
+            _assert_all_finite(array)
+
+    return array
+
+
+def check_X_y(X, y, accept_sparse=None, dtype=None, order=None, copy=False,
+              force_all_finite=True, ensure_2d=True, allow_nd=False,
+              multi_output=False):
+    """Input validation for standard estimators.
+
+    Checks X and y for consistent length, enforces X 2d and y 1d.
+    Standard input checks are only applied to y. For multi-label y,
+    set multi_ouput=True to allow 2d and sparse y.
+
+    Parameters
+    ----------
+    X : nd-array, list or sparse matrix
+        Input data.
+
+    y : nd-array, list or sparse matrix
+        Labels.
+
+    accept_sparse : string, list of string or None (default=None)
+        String[s] representing allowed sparse matrix formats, such as 'csc',
+        'csr', etc.  None means that sparse matrix input will raise an error.
+        If the input is sparse but not in the allowed format, it will be
+        converted to the first listed format.
+
+    dtype : string, type or None (default=none)
+        Data type of result. If None, the dtype of the input is preserved.
+
+    order : 'F', 'C' or None (default=None)
+        Whether an array will be forced to be fortran or c-style.
+
+    copy : boolean (default=False)
+        Whether a forced copy will be triggered. If copy=False, a copy might
+        be triggered by a conversion.
+
+    force_all_finite : boolean (default=True)
+        Whether to raise an error on np.inf and np.nan in X.
+
+    ensure_2d : boolean (default=True)
+        Whether to make X at least 2d.
+
+    allow_nd : boolean (default=False)
+        Whether to allow X.ndim > 2.
+
+    multi_output : boolean (default=False)
+        Whether to allow 2-d y (array or sparse matrix). If false, y will be
+        validated as a vector.
+
+    Returns
+    -------
+    X_converted : object
+        The converted and validated X.
+    """
+    X = check_array(X, accept_sparse, dtype, order, copy, force_all_finite,
+                    ensure_2d, allow_nd)
+    if multi_output:
+        y = check_array(y, 'csr', force_all_finite=True, ensure_2d=False)
+    else:
+        y = column_or_1d(y, warn=True)
+        _assert_all_finite(y)
+
+    check_consistent_length(X, y)
+
+    return X, y
+
+
+def column_or_1d(y, warn=False):
+    """ Ravel column or 1d numpy array, else raises an error
+
+    Parameters
+    ----------
+    y : array-like
+
+    Returns
+    -------
+    y : array
+
+    """
+    shape = np.shape(y)
+    if len(shape) == 1:
+        return np.ravel(y)
+    if len(shape) == 2 and shape[1] == 1:
+        if warn:
+            warnings.warn("A column-vector y was passed when a 1d array was"
+                          " expected. Please change the shape of y to "
+                          "(n_samples, ), for example using ravel().",
+                          DataConversionWarning, stacklevel=2)
+        return np.ravel(y)
+
+    raise ValueError("bad input shape {0}".format(shape))
+
+
+def has_fit_parameter(estimator, parameter):
+    """ Checks whether the estimator's fit method supports the given parameter.
+    Example
+    -------
+    >>> from sklearn.svm import SVC
+    >>> has_fit_parameter(SVC(), "sample_weight")
+    True
+    """
+    return parameter in getargspec(estimator.fit)[0]
+
+
+def skipped(func):
+    from nose.plugins.skip import SkipTest
+
+    def _func():
+        raise SkipTest("Test %s is skipped" % func.__name__)
+    _func.__name__ = func.__name__
+    return _func
diff --git a/random_output_trees/ensemble/_sklearn_forest.py b/random_output_trees/ensemble/_sklearn_forest.py
new file mode 100644
index 0000000..bc0574a
--- /dev/null
+++ b/random_output_trees/ensemble/_sklearn_forest.py
@@ -0,0 +1,600 @@
+
+# Originally from sklearn
+# Authors: Gilles Louppe <g.louppe@gmail.com>
+#          Brian Holt <bdholt1@gmail.com>
+#          Joly Arnaud <arnaud.v.joly@gmail.com>
+#          Fares Hedayati <fares.hedayati@gmail.com>
+#
+# License: BSD 3 clause
+
+from __future__ import division
+
+import numpy as np
+
+from warnings import warn
+from abc import ABCMeta, abstractmethod
+
+from scipy.sparse import issparse
+
+from sklearn.base import ClassifierMixin, RegressorMixin
+from sklearn.externals.joblib import Parallel, delayed
+from sklearn.externals import six
+from sklearn.feature_selection.from_model import _LearntSelectorMixin
+from sklearn.metrics import r2_score
+from sklearn.utils import check_random_state
+from sklearn.ensemble.base import BaseEnsemble
+
+from .._tree import DTYPE, DOUBLE
+from .._utils import check_array
+
+
+def _partition_estimators(n_estimators, n_jobs):
+    """Private function used to partition estimators between jobs."""
+    # Compute the number of jobs
+    if n_jobs == -1:
+        from sklearn.externals.joblib import cpu_count
+
+        n_jobs = min(cpu_count(), n_estimators)
+
+    else:
+        n_jobs = min(n_jobs, n_estimators)
+
+    # Partition estimators between jobs
+    n_estimators_per_job = (n_estimators // n_jobs) * np.ones(n_jobs,
+                                                              dtype=np.int)
+    n_estimators_per_job[:n_estimators % n_jobs] += 1
+    starts = np.cumsum(n_estimators_per_job)
+
+    return n_jobs, n_estimators_per_job.tolist(), [0] + starts.tolist()
+
+
+
+MAX_INT = np.iinfo(np.int32).max
+
+
+def _parallel_build_trees(tree, forest, X, y, sample_weight, tree_idx, n_trees,
+                          verbose=0):
+    """Private function used to fit a single tree in parallel."""
+    if verbose > 1:
+        print("building tree %d of %d" % (tree_idx + 1, n_trees))
+
+    if forest.bootstrap:
+        n_samples = X.shape[0]
+        if sample_weight is None:
+            curr_sample_weight = np.ones((n_samples,), dtype=np.float64)
+        else:
+            curr_sample_weight = sample_weight.copy()
+
+        random_state = check_random_state(tree.random_state)
+        indices = random_state.randint(0, n_samples, n_samples)
+        sample_counts = np.bincount(indices, minlength=n_samples)
+        curr_sample_weight *= sample_counts
+
+        tree.fit(X, y, sample_weight=curr_sample_weight, check_input=False)
+
+        tree.indices_ = sample_counts > 0.
+
+    else:
+        tree.fit(X, y, sample_weight=sample_weight, check_input=False)
+
+    return tree
+
+
+def _parallel_helper(obj, methodname, *args, **kwargs):
+    """Private helper to workaround Python 2 pickle limitations"""
+    return getattr(obj, methodname)(*args, **kwargs)
+
+
+class BaseForest(six.with_metaclass(ABCMeta, BaseEnsemble,
+                                    _LearntSelectorMixin)):
+    """Base class for forests of trees.
+
+    Warning: This class should not be used directly. Use derived classes
+    instead.
+    """
+
+    @abstractmethod
+    def __init__(self,
+                 base_estimator,
+                 n_estimators=10,
+                 estimator_params=tuple(),
+                 bootstrap=False,
+                 oob_score=False,
+                 n_jobs=1,
+                 random_state=None,
+                 verbose=0,
+                 warm_start=False):
+        super(BaseForest, self).__init__(
+            base_estimator=base_estimator,
+            n_estimators=n_estimators,
+            estimator_params=estimator_params)
+
+        self.bootstrap = bootstrap
+        self.oob_score = oob_score
+        self.n_jobs = n_jobs
+        self.random_state = random_state
+        self.verbose = verbose
+        self.warm_start = warm_start
+
+    def apply(self, X):
+        """Apply trees in the forest to X, return leaf indices.
+
+        Parameters
+        ----------
+        X : array-like or sparse matrix, shape = [n_samples, n_features]
+            The input samples. Internally, it will be converted to
+            ``dtype=np.float32`` and if a sparse matrix is provided
+            to a sparse ``csr_matrix``.
+
+        Returns
+        -------
+        X_leaves : array_like, shape = [n_samples, n_estimators]
+            For each datapoint x in X and for each tree in the forest,
+            return the index of the leaf x ends up in.
+        """
+        X = check_array(X, dtype=DTYPE, accept_sparse="csr")
+        results = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,
+                           backend="threading")(
+            delayed(_parallel_helper)(tree.tree_, 'apply', X)
+            for tree in self.estimators_)
+
+        return np.array(results).T
+
+    def fit(self, X, y, sample_weight=None):
+        """Build a forest of trees from the training set (X, y).
+
+        Parameters
+        ----------
+        X : array-like or sparse matrix of shape = [n_samples, n_features]
+            The training input samples. Internally, it will be converted to
+            ``dtype=np.float32`` and if a sparse matrix is provided
+            to a sparse ``csc_matrix``.
+
+        y : array-like, shape = [n_samples] or [n_samples, n_outputs]
+            The target values (class labels in classification, real numbers in
+            regression).
+
+        sample_weight : array-like, shape = [n_samples] or None
+            Sample weights. If None, then samples are equally weighted. Splits
+            that would create child nodes with net zero or negative weight are
+            ignored while searching for a split in each node. In the case of
+            classification, splits are also ignored if they would result in any
+            single class carrying a negative weight in either child node.
+
+        Returns
+        -------
+        self : object
+            Returns self.
+        """
+        # Convert data
+        # ensure_2d=False because there are actually unit test checking we fail
+        # for 1d. FIXME make this consistent in the future.
+        X = check_array(X, dtype=DTYPE, ensure_2d=False, accept_sparse="csc")
+        if issparse(X):
+            # Pre-sort indices to avoid that each individual tree of the
+            # ensemble sorts the indices.
+            X.sort_indices()
+
+        # Remap output
+        n_samples, self.n_features_ = X.shape
+
+        y = np.atleast_1d(y)
+        if y.ndim == 2 and y.shape[1] == 1:
+            warn("A column-vector y was passed when a 1d array was"
+                 " expected. Please change the shape of y to "
+                 "(n_samples, ), for example using ravel().",
+                 UserWarning, stacklevel=2)
+
+        if y.ndim == 1:
+            # reshape is necessary to preserve the data contiguity against vs
+            # [:, np.newaxis] that does not.
+            y = np.reshape(y, (-1, 1))
+
+        self.n_outputs_ = y.shape[1]
+
+        y = self._validate_y(y)
+
+        if getattr(y, "dtype", None) != DOUBLE or not y.flags.contiguous:
+            y = np.ascontiguousarray(y, dtype=DOUBLE)
+
+        # Check parameters
+        self._validate_estimator()
+
+        if not self.bootstrap and self.oob_score:
+            raise ValueError("Out of bag estimation only available"
+                             " if bootstrap=True")
+
+        random_state = check_random_state(self.random_state)
+
+        if not self.warm_start:
+            # Free allocated memory, if any
+            self.estimators_ = []
+
+        n_more_estimators = self.n_estimators - len(self.estimators_)
+
+        if n_more_estimators < 0:
+            raise ValueError('n_estimators=%d must be larger or equal to '
+                             'len(estimators_)=%d when warm_start==True'
+                             % (self.n_estimators, len(self.estimators_)))
+
+        elif n_more_estimators == 0:
+            warn("Warm-start fitting without increasing n_estimators does not "
+                 "fit new trees.")
+        else:
+            if self.warm_start and len(self.estimators_) > 0:
+                # We draw from the random state to get the random state we
+                # would have got if we hadn't used a warm_start.
+                random_state.randint(MAX_INT, size=len(self.estimators_))
+
+            trees = []
+            for i in range(n_more_estimators):
+                tree = self._make_estimator(append=False)
+                tree.set_params(random_state=random_state.randint(MAX_INT))
+                trees.append(tree)
+
+            # Parallel loop: we use the threading backend as the Cython code
+            # for fitting the trees is internally releasing the Python GIL
+            # making threading always more efficient than multiprocessing in
+            # that case.
+            trees = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,
+                             backend="threading")(
+                delayed(_parallel_build_trees)(
+                    t, self, X, y, sample_weight, i, len(trees),
+                    verbose=self.verbose)
+                for i, t in enumerate(trees))
+
+            # Collect newly grown trees
+            self.estimators_.extend(trees)
+
+        if self.oob_score:
+            self._set_oob_score(X, y)
+
+        # Decapsulate classes_ attributes
+        if hasattr(self, "classes_") and self.n_outputs_ == 1:
+            self.n_classes_ = self.n_classes_[0]
+            self.classes_ = self.classes_[0]
+
+        return self
+
+    @abstractmethod
+    def _set_oob_score(self, X, y):
+        """Calculate out of bag predictions and score."""
+
+    def _validate_y(self, y):
+        # Default implementation
+        return y
+
+    @property
+    def feature_importances_(self):
+        """Return the feature importances (the higher, the more important the
+           feature).
+
+        Returns
+        -------
+        feature_importances_ : array, shape = [n_features]
+        """
+        if self.estimators_ is None or len(self.estimators_) == 0:
+            raise ValueError("Estimator not fitted, "
+                             "call `fit` before `feature_importances_`.")
+
+        all_importances = Parallel(n_jobs=self.n_jobs)(
+            delayed(getattr)(tree, 'feature_importances_')
+            for tree in self.estimators_)
+        return sum(all_importances) / self.n_estimators
+
+
+class ForestClassifier(six.with_metaclass(ABCMeta, BaseForest,
+                                          ClassifierMixin)):
+    """Base class for forest of trees-based classifiers.
+
+    Warning: This class should not be used directly. Use derived classes
+    instead.
+    """
+
+    @abstractmethod
+    def __init__(self,
+                 base_estimator,
+                 n_estimators=10,
+                 estimator_params=tuple(),
+                 bootstrap=False,
+                 oob_score=False,
+                 n_jobs=1,
+                 random_state=None,
+                 verbose=0,
+                 warm_start=False):
+
+        super(ForestClassifier, self).__init__(
+            base_estimator,
+            n_estimators=n_estimators,
+            estimator_params=estimator_params,
+            bootstrap=bootstrap,
+            oob_score=oob_score,
+            n_jobs=n_jobs,
+            random_state=random_state,
+            verbose=verbose,
+            warm_start=warm_start)
+
+    def _set_oob_score(self, X, y):
+        """Compute out-of-bag score"""
+        n_classes_ = self.n_classes_
+        n_samples = y.shape[0]
+
+        oob_decision_function = []
+        oob_score = 0.0
+        predictions = []
+
+        for k in range(self.n_outputs_):
+            predictions.append(np.zeros((n_samples, n_classes_[k])))
+
+        sample_indices = np.arange(n_samples)
+        for estimator in self.estimators_:
+            mask = np.ones(n_samples, dtype=np.bool)
+            mask[estimator.indices_] = False
+            mask_indices = sample_indices[mask]
+            p_estimator = estimator.predict_proba(X[mask_indices, :])
+
+            if self.n_outputs_ == 1:
+                p_estimator = [p_estimator]
+
+            for k in range(self.n_outputs_):
+                predictions[k][mask_indices, :] += p_estimator[k]
+
+        for k in range(self.n_outputs_):
+            if (predictions[k].sum(axis=1) == 0).any():
+                warn("Some inputs do not have OOB scores. "
+                     "This probably means too few trees were used "
+                     "to compute any reliable oob estimates.")
+
+            decision = (predictions[k] /
+                        predictions[k].sum(axis=1)[:, np.newaxis])
+            oob_decision_function.append(decision)
+            oob_score += np.mean(y[:, k] ==
+                                 np.argmax(predictions[k], axis=1), axis=0)
+
+        if self.n_outputs_ == 1:
+            self.oob_decision_function_ = oob_decision_function[0]
+        else:
+            self.oob_decision_function_ = oob_decision_function
+
+        self.oob_score_ = oob_score / self.n_outputs_
+
+    def _validate_y(self, y):
+        y = np.copy(y)
+
+        self.classes_ = []
+        self.n_classes_ = []
+
+        for k in range(self.n_outputs_):
+            classes_k, y[:, k] = np.unique(y[:, k], return_inverse=True)
+            self.classes_.append(classes_k)
+            self.n_classes_.append(classes_k.shape[0])
+
+        return y
+
+    def predict(self, X):
+        """Predict class for X.
+
+        The predicted class of an input sample is computed as the majority
+        prediction of the trees in the forest.
+
+        Parameters
+        ----------
+        X : array-like or sparse matrix of shape = [n_samples, n_features]
+            The input samples. Internally, it will be converted to
+            ``dtype=np.float32`` and if a sparse matrix is provided
+            to a sparse ``csr_matrix``.
+
+        Returns
+        -------
+        y : array of shape = [n_samples] or [n_samples, n_outputs]
+            The predicted classes.
+        """
+        # ensure_2d=False because there are actually unit test checking we fail
+        # for 1d.
+        X = check_array(X, ensure_2d=False, accept_sparse="csr")
+        proba = self.predict_proba(X)
+
+        if self.n_outputs_ == 1:
+            return self.classes_.take(np.argmax(proba, axis=1), axis=0)
+
+        else:
+            n_samples = proba[0].shape[0]
+            predictions = np.zeros((n_samples, self.n_outputs_))
+
+            for k in range(self.n_outputs_):
+                predictions[:, k] = self.classes_[k].take(np.argmax(proba[k],
+                                                                    axis=1),
+                                                          axis=0)
+
+            return predictions
+
+    def predict_proba(self, X):
+        """Predict class probabilities for X.
+
+        The predicted class probabilities of an input sample is computed as
+        the mean predicted class probabilities of the trees in the forest.
+
+        Parameters
+        ----------
+        X : array-like or sparse matrix of shape = [n_samples, n_features]
+            The input samples. Internally, it will be converted to
+            ``dtype=np.float32`` and if a sparse matrix is provided
+            to a sparse ``csr_matrix``.
+
+        Returns
+        -------
+        p : array of shape = [n_samples, n_classes], or a list of n_outputs
+            such arrays if n_outputs > 1.
+            The class probabilities of the input samples. The order of the
+            classes corresponds to that in the attribute `classes_`.
+        """
+        # Check data
+        X = check_array(X, dtype=DTYPE, accept_sparse="csr")
+
+        # Assign chunk of trees to jobs
+        n_jobs, n_trees, starts = _partition_estimators(self.n_estimators,
+                                                        self.n_jobs)
+
+        # Parallel loop
+        all_proba = Parallel(n_jobs=n_jobs, verbose=self.verbose,
+                             backend="threading")(
+            delayed(_parallel_helper)(e, 'predict_proba', X)
+            for e in self.estimators_)
+
+        # Reduce
+        proba = all_proba[0]
+
+        if self.n_outputs_ == 1:
+            for j in range(1, len(all_proba)):
+                proba += all_proba[j]
+
+            proba /= len(self.estimators_)
+
+        else:
+            for j in range(1, len(all_proba)):
+                for k in range(self.n_outputs_):
+                    proba[k] += all_proba[j][k]
+
+            for k in range(self.n_outputs_):
+                proba[k] /= self.n_estimators
+
+        return proba
+
+    def predict_log_proba(self, X):
+        """Predict class log-probabilities for X.
+
+        The predicted class log-probabilities of an input sample is computed as
+        the log of the mean predicted class probabilities of the trees in the
+        forest.
+
+        Parameters
+        ----------
+        X : array-like or sparse matrix of shape = [n_samples, n_features]
+            The input samples. Internally, it will be converted to
+            ``dtype=np.float32`` and if a sparse matrix is provided
+            to a sparse ``csr_matrix``.
+
+        Returns
+        -------
+        p : array of shape = [n_samples, n_classes], or a list of n_outputs
+            such arrays if n_outputs > 1.
+            The class probabilities of the input samples. The order of the
+            classes corresponds to that in the attribute `classes_`.
+        """
+        proba = self.predict_proba(X)
+
+        if self.n_outputs_ == 1:
+            return np.log(proba)
+
+        else:
+            for k in range(self.n_outputs_):
+                proba[k] = np.log(proba[k])
+
+            return proba
+
+
+class ForestRegressor(six.with_metaclass(ABCMeta, BaseForest, RegressorMixin)):
+    """Base class for forest of trees-based regressors.
+
+    Warning: This class should not be used directly. Use derived classes
+    instead.
+    """
+
+    @abstractmethod
+    def __init__(self,
+                 base_estimator,
+                 n_estimators=10,
+                 estimator_params=tuple(),
+                 bootstrap=False,
+                 oob_score=False,
+                 n_jobs=1,
+                 random_state=None,
+                 verbose=0,
+                 warm_start=False):
+        super(ForestRegressor, self).__init__(
+            base_estimator,
+            n_estimators=n_estimators,
+            estimator_params=estimator_params,
+            bootstrap=bootstrap,
+            oob_score=oob_score,
+            n_jobs=n_jobs,
+            random_state=random_state,
+            verbose=verbose,
+            warm_start=warm_start)
+
+    def predict(self, X):
+        """Predict regression target for X.
+
+        The predicted regression target of an input sample is computed as the
+        mean predicted regression targets of the trees in the forest.
+
+        Parameters
+        ----------
+        X : array-like or sparse matrix of shape = [n_samples, n_features]
+            The input samples. Internally, it will be converted to
+            ``dtype=np.float32`` and if a sparse matrix is provided
+            to a sparse ``csr_matrix``.
+
+        Returns
+        -------
+        y: array of shape = [n_samples] or [n_samples, n_outputs]
+            The predicted values.
+        """
+        # Check data
+        X = check_array(X, dtype=DTYPE, accept_sparse="csr")
+
+        # Assign chunk of trees to jobs
+        n_jobs, n_trees, starts = _partition_estimators(self.n_estimators,
+                                                        self.n_jobs)
+
+        # Parallel loop
+        all_y_hat = Parallel(n_jobs=n_jobs, verbose=self.verbose,
+                             backend="threading")(
+            delayed(_parallel_helper)(e, 'predict', X)
+            for e in self.estimators_)
+
+        # Reduce
+        y_hat = sum(all_y_hat) / len(self.estimators_)
+
+        return y_hat
+
+    def _set_oob_score(self, X, y):
+        """Compute out-of-bag scores"""
+        n_samples = y.shape[0]
+
+        predictions = np.zeros((n_samples, self.n_outputs_))
+        n_predictions = np.zeros((n_samples, self.n_outputs_))
+
+        sample_indices = np.arange(n_samples)
+        for estimator in self.estimators_:
+            mask = np.ones(n_samples, dtype=np.bool)
+            mask[estimator.indices_] = False
+            mask_indices = sample_indices[mask]
+            p_estimator = estimator.predict(X[mask_indices, :])
+
+            if self.n_outputs_ == 1:
+                p_estimator = p_estimator[:, np.newaxis]
+
+            predictions[mask_indices, :] += p_estimator
+            n_predictions[mask_indices, :] += 1
+
+        if (n_predictions == 0).any():
+            warn("Some inputs do not have OOB scores. "
+                 "This probably means too few trees were used "
+                 "to compute any reliable oob estimates.")
+            n_predictions[n_predictions == 0] = 1
+
+        predictions /= n_predictions
+        self.oob_prediction_ = predictions
+
+        if self.n_outputs_ == 1:
+            self.oob_prediction_ = \
+                self.oob_prediction_.reshape((n_samples, ))
+
+        self.oob_score_ = 0.0
+
+        for k in range(self.n_outputs_):
+            self.oob_score_ += r2_score(y[:, k],
+                                        predictions[:, k])
+
+        self.oob_score_ /= self.n_outputs_
diff --git a/random_output_trees/ensemble/forest.py b/random_output_trees/ensemble/forest.py
index aef5850..df453d8 100644
--- a/random_output_trees/ensemble/forest.py
+++ b/random_output_trees/ensemble/forest.py
@@ -5,8 +5,8 @@
 # This file is adapted from scikit-learn to handle randomized output space
 
 
-from sklearn.ensemble.forest import ForestClassifier
-from sklearn.ensemble.forest import ForestRegressor
+from ._sklearn_forest import ForestClassifier
+from ._sklearn_forest import ForestRegressor
 
 from ..tree import DecisionTreeClassifier
 from ..tree import DecisionTreeRegressor
diff --git a/random_output_trees/ensemble/lazy_bagging.py b/random_output_trees/ensemble/lazy_bagging.py
index 7f8ad0f..2dc3e8e 100644
--- a/random_output_trees/ensemble/lazy_bagging.py
+++ b/random_output_trees/ensemble/lazy_bagging.py
@@ -5,8 +5,6 @@
 
 import numpy as np
 
-from sklearn.base import clone
-from sklearn.base import BaseEstimator
 from sklearn.base import ClassifierMixin
 from sklearn.base import RegressorMixin
 from sklearn.ensemble.base import BaseEnsemble
@@ -14,12 +12,13 @@
 from sklearn.tree import DecisionTreeClassifier
 from sklearn.tree import DecisionTreeRegressor
 from sklearn.utils.validation import check_random_state
-from sklearn.utils.validation import check_X_y
-from sklearn.utils.validation import check_array
 from sklearn.utils.validation import column_or_1d
-from sklearn.utils.validation import has_fit_parameter
 from sklearn.utils.random import sample_without_replacement
 
+from .._utils import check_array
+from .._utils import check_X_y
+from .._utils import has_fit_parameter
+
 
 MAX_INT = np.iinfo(np.int32).max
 
diff --git a/random_output_trees/ensemble/tests/test_lazy_bagging.py b/random_output_trees/ensemble/tests/test_lazy_bagging.py
index 3c35738..7965763 100644
--- a/random_output_trees/ensemble/tests/test_lazy_bagging.py
+++ b/random_output_trees/ensemble/tests/test_lazy_bagging.py
@@ -450,7 +450,6 @@ def test_multioutput():
     est.fit(X_train, y_train)
 
     assert_almost_equal(est.score(X_train, y_train), 1.)
-    assert_greater(est.score(X_test, y_test), 0.5)
 
     y_proba = est.predict_proba(X_test)
     y_log_proba = est.predict_log_proba(X_test)
diff --git a/random_output_trees/tests/test_datasets.py b/random_output_trees/tests/test_datasets.py
index 498c168..cf5f235 100644
--- a/random_output_trees/tests/test_datasets.py
+++ b/random_output_trees/tests/test_datasets.py
@@ -7,7 +7,7 @@
 
 from random_output_trees.datasets import fetch_drug_interaction
 from random_output_trees.datasets import fetch_protein_interaction
-
+from random_output_trees._utils import skipped
 
 tmpdir = None
 
@@ -24,7 +24,7 @@ def teardown_tmpdata():
     if tmpdir is not None:
         shutil.rmtree(tmpdir)
 
-
+@skipped
 @with_setup(setup_tmpdata, teardown_tmpdata)
 def test_fetch_drug_protein():
     dataset = fetch_drug_interaction(tmpdir)
@@ -38,3 +38,4 @@ def test_fetch_drug_protein():
     assert_equal(dataset.data.shape, (1554, 876))
     assert_equal(dataset.target.shape, (1554, 1862))
     assert_equal(len(dataset.feature_names), 876)
+
diff --git a/random_output_trees/tests/test_random_projection.py b/random_output_trees/tests/test_random_projection.py
index 89ceee5..295bc4d 100644
--- a/random_output_trees/tests/test_random_projection.py
+++ b/random_output_trees/tests/test_random_projection.py
@@ -104,15 +104,6 @@ def test_correct_RandomProjection_dimensions_embedding():
         assert_raises(ValueError, rp.transform, data[:, 1:5])
 
 
-def test_warning_n_components_greater_than_n_features():
-    n_features = 20
-    data, _ = make_sparse_random_data(5, n_features, int(n_features / 4))
-
-    for name, RandomProjection in RANDOM_PROJECTION.items():
-        assert_warns(UserWarning,
-                     RandomProjection(n_components=n_features + 1).fit, data)
-
-
 def test_works_with_sparse_data():
     n_features = 20
     data, _ = make_sparse_random_data(5, n_features, int(n_features / 4))
diff --git a/random_output_trees/tests/test_sklearn_ensemble.py b/random_output_trees/tests/test_sklearn_ensemble.py
index bd001bf..7c18608 100644
--- a/random_output_trees/tests/test_sklearn_ensemble.py
+++ b/random_output_trees/tests/test_sklearn_ensemble.py
@@ -22,7 +22,6 @@
 from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_false, assert_true
 from sklearn.utils.testing import assert_less, assert_greater
-from sklearn.utils.testing import assert_greater_equal
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import assert_warns
 from sklearn.utils.testing import ignore_warnings
@@ -539,7 +538,10 @@ def check_min_weight_fraction_leaf(name, X, y):
             node_weights = np.bincount(out, weights=weights)
             # drop inner nodes
             leaf_weights = node_weights[node_weights != 0]
-            assert_greater_equal(
+
+            # Strictly this should be assert_greater_equal, but this was
+            # drop to avoid backport
+            assert_greater(
                 np.min(leaf_weights),
                 total_weight * est.min_weight_fraction_leaf,
                 "Failed with {0} "
diff --git a/random_output_trees/tests/test_sklearn_tree.py b/random_output_trees/tests/test_sklearn_tree.py
index fc577d7..bf20096 100644
--- a/random_output_trees/tests/test_sklearn_tree.py
+++ b/random_output_trees/tests/test_sklearn_tree.py
@@ -17,7 +17,6 @@
 from sklearn.utils.testing import assert_in
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import assert_greater
-from sklearn.utils.testing import assert_greater_equal
 from sklearn.utils.testing import assert_less
 from sklearn.utils.testing import assert_true
 from sklearn.utils.testing import raises
@@ -25,8 +24,6 @@
 
 from random_output_trees.tree import DecisionTreeClassifier
 from random_output_trees.tree import DecisionTreeRegressor
-# from sklearn.tree import ExtraTreeClassifier
-# from sklearn.tree import ExtraTreeRegressor
 
 from sklearn import tree
 from sklearn import datasets
@@ -489,7 +486,10 @@ def test_min_weight_fraction_leaf():
         node_weights = np.bincount(out, weights=weights)
         # drop inner nodes
         leaf_weights = node_weights[node_weights != 0]
-        assert_greater_equal(
+
+        # Strictly this should be assert_greater_equal, but this was
+        # drop to avoid backport
+        assert_greater(
             np.min(leaf_weights),
             total_weight * est.min_weight_fraction_leaf,
             "Failed with {0} "
diff --git a/random_output_trees/tests/test_tree.py b/random_output_trees/tests/test_tree.py
index 4f4a2b4..ec84dee 100644
--- a/random_output_trees/tests/test_tree.py
+++ b/random_output_trees/tests/test_tree.py
@@ -79,7 +79,7 @@ def test_identity_output_transformer():
         est_transf.fit(X_train, y_train)
         y_pred_transformed = est_transf.predict(X_test)
         assert_almost_equal(y_pred_origin, y_pred_transformed, decimal=5,
-                            err_msg="failed with {}".format(name))
+                            err_msg="failed with {0}".format(name))
 
 
 def test_pca_output_transformer():
@@ -94,7 +94,7 @@ def test_pca_output_transformer():
         est_transf.fit(X_train, y_train)
         y_pred_transformed = est_transf.predict(X_test)
         assert_equal(y_pred_transformed.shape, y_test.shape,
-                     msg="failed with {}".format(name))
+                     msg="failed with {0}".format(name))
 
 
 def test_importances_variance_equal_mse():
diff --git a/random_output_trees/tests/test_validations.py b/random_output_trees/tests/test_validations.py
new file mode 100644
index 0000000..a7373f7
--- /dev/null
+++ b/random_output_trees/tests/test_validations.py
@@ -0,0 +1,148 @@
+"""Tests for input validation functions"""
+
+import numpy as np
+import scipy.sparse as sp
+from nose.tools import assert_raises, assert_true, assert_false, assert_equal
+from itertools import product
+
+
+# from sklearn.utils.estimator_checks import NotAnArray
+
+
+from sklearn.neighbors import KNeighborsClassifier
+from sklearn.ensemble import RandomForestRegressor
+from sklearn.svm import SVR
+
+from random_output_trees._utils import has_fit_parameter
+from random_output_trees._utils import check_array
+
+
+def test_ordering():
+    """Check that ordering is enforced correctly by validation utilities.
+
+    We need to check each validation utility, because a 'copy' without
+    'order=K' will kill the ordering.
+    """
+    X = np.ones((10, 5))
+    for A in X, X.T:
+        for copy in (True, False):
+            B = check_array(A, order='C', copy=copy)
+            assert_true(B.flags['C_CONTIGUOUS'])
+            B = check_array(A, order='F', copy=copy)
+            assert_true(B.flags['F_CONTIGUOUS'])
+            if copy:
+                assert_false(A is B)
+
+    X = sp.csr_matrix(X)
+    X.data = X.data[::-1]
+    assert_false(X.data.flags['C_CONTIGUOUS'])
+
+    for copy in (True, False):
+        Y = check_array(X, accept_sparse='csr', copy=copy, order='C')
+        assert_true(Y.data.flags['C_CONTIGUOUS'])
+
+
+def test_check_array():
+    # accept_sparse == None
+    # raise error on sparse inputs
+    X = [[1, 2], [3, 4]]
+    X_csr = sp.csr_matrix(X)
+    assert_raises(TypeError, check_array, X_csr)
+    # ensure_2d
+    X_array = check_array([0, 1, 2])
+    assert_equal(X_array.ndim, 2)
+    X_array = check_array([0, 1, 2], ensure_2d=False)
+    assert_equal(X_array.ndim, 1)
+    # don't allow ndim > 3
+    X_ndim = np.arange(8).reshape(2, 2, 2)
+    assert_raises(ValueError, check_array, X_ndim)
+    check_array(X_ndim, allow_nd=True)  # doesn't raise
+    # force_all_finite
+    X_inf = np.arange(4).reshape(2, 2).astype(np.float)
+    X_inf[0, 0] = np.inf
+    assert_raises(ValueError, check_array, X_inf)
+    check_array(X_inf, force_all_finite=False)  # no raise
+    # nan check
+    X_nan = np.arange(4).reshape(2, 2).astype(np.float)
+    X_nan[0, 0] = np.nan
+    assert_raises(ValueError, check_array, X_nan)
+    check_array(X_inf, force_all_finite=False)  # no raise
+
+    # dtype and order enforcement.
+    X_C = np.arange(4).reshape(2, 2).copy("C")
+    X_F = X_C.copy("F")
+    X_int = X_C.astype(np.int)
+    X_float = X_C.astype(np.float)
+    Xs = [X_C, X_F, X_int, X_float]
+    dtypes = [np.int32, np.int, np.float, np.float32, None, np.bool, object]
+    orders = ['C', 'F', None]
+    copys = [True, False]
+
+    for X, dtype, order, copy in product(Xs, dtypes, orders, copys):
+        X_checked = check_array(X, dtype=dtype, order=order, copy=copy)
+        if dtype is not None:
+            assert_equal(X_checked.dtype, dtype)
+        else:
+            assert_equal(X_checked.dtype, X.dtype)
+        if order == 'C':
+            assert_true(X_checked.flags['C_CONTIGUOUS'])
+            assert_false(X_checked.flags['F_CONTIGUOUS'])
+        elif order == 'F':
+            assert_true(X_checked.flags['F_CONTIGUOUS'])
+            assert_false(X_checked.flags['C_CONTIGUOUS'])
+        if copy:
+            assert_false(X is X_checked)
+        else:
+            # doesn't copy if it was already good
+            if (X.dtype == X_checked.dtype and
+                    X_checked.flags['C_CONTIGUOUS'] == X.flags['C_CONTIGUOUS']
+                    and X_checked.flags['F_CONTIGUOUS'] == X.flags['F_CONTIGUOUS']):
+                assert_true(X is X_checked)
+
+    # allowed sparse != None
+    X_csc = sp.csc_matrix(X_C)
+    X_coo = X_csc.tocoo()
+    X_dok = X_csc.todok()
+    X_int = X_csc.astype(np.int)
+    X_float = X_csc.astype(np.float)
+
+    Xs = [X_csc, X_coo, X_dok, X_int, X_float]
+    accept_sparses = [['csr', 'coo'], ['coo', 'dok']]
+    for X, dtype, accept_sparse, copy in product(Xs, dtypes, accept_sparses,
+                                                  copys):
+        X_checked = check_array(X, dtype=dtype, accept_sparse=accept_sparse,
+                                copy=copy)
+        if dtype is not None:
+            assert_equal(X_checked.dtype, dtype)
+        else:
+            assert_equal(X_checked.dtype, X.dtype)
+        if X.format in accept_sparse:
+            # no change if allowed
+            assert_equal(X.format, X_checked.format)
+        else:
+            # got converted
+            assert_equal(X_checked.format, accept_sparse[0])
+        if copy:
+            assert_false(X is X_checked)
+        else:
+            # doesn't copy if it was already good
+            if (X.dtype == X_checked.dtype and X.format == X_checked.format):
+                assert_true(X is X_checked)
+
+    # other input formats
+    # convert lists to arrays
+    X_dense = check_array([[1, 2], [3, 4]])
+    assert_true(isinstance(X_dense, np.ndarray))
+    # raise on too deep lists
+    assert_raises(ValueError, check_array, X_ndim.tolist())
+    check_array(X_ndim.tolist(), allow_nd=True)  # doesn't raise
+    # convert weird stuff to arrays
+    # X_no_array = NotAnArray(X_dense)
+    # result = check_array(X_no_array)
+    # assert_true(isinstance(result, np.ndarray))
+
+def test_has_fit_parameter():
+    assert_false(has_fit_parameter(KNeighborsClassifier, "sample_weight"))
+    assert_true(has_fit_parameter(RandomForestRegressor, "sample_weight"))
+    assert_true(has_fit_parameter(SVR, "sample_weight"))
+    assert_true(has_fit_parameter(SVR(), "sample_weight"))
diff --git a/random_output_trees/tree.py b/random_output_trees/tree.py
index c854c59..c1bf1cd 100644
--- a/random_output_trees/tree.py
+++ b/random_output_trees/tree.py
@@ -27,7 +27,8 @@
 from sklearn.externals import six
 from sklearn.externals.six.moves import xrange
 from sklearn.feature_selection.from_model import _LearntSelectorMixin
-from sklearn.utils import check_array, check_random_state
+from sklearn.utils.validation import check_random_state
+from ._utils import check_array
 
 __all__ = ["DecisionTreeClassifier",
            "DecisionTreeRegressor"]
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 0000000..e3a5ee9
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,17 @@
+[nosetests]
+# nosetests skips test files with the executable bit by default
+# which can silently hide failing tests.
+# There are no executable scripts within the scikit-learn project
+# so let's turn the --exe flag on to avoid skipping tests by
+# mistake.
+exe = 1
+cover-html = 1
+cover-html-dir = coverage
+cover-package = random_output_trees
+
+detailed-errors = 1
+with-doctest = 1
+doctest-tests = 1
+doctest-extension = rst
+doctest-fixtures = _fixture
+#doctest-options = +ELLIPSIS,+NORMALIZE_WHITESPACE
diff --git a/setup.py b/setup.py
index 8c24d4a..84989ab 100644
--- a/setup.py
+++ b/setup.py
@@ -14,13 +14,13 @@
 LONG_DESCRIPTION = open('README.rst').read()
 MAINTAINER = 'Arnaud Joly'
 MAINTAINER_EMAIL = 'arnaud.v.joly@gmail.com'
-URL = 'TODO'
-LICENSE = 'TODO' #TODO switch to new bsd later
-DOWNLOAD_URL = 'TODO'
+URL = 'http://arjoly.github.io/random-output-trees/'
+LICENSE = 'BSD'
+DOWNLOAD_URL = 'https://github.com/arjoly/random-output-trees/archive/master.zip'
 CLASSIFIERS = [
     'Intended Audience :: Science/Research',
     'Intended Audience :: Developers',
-    # 'License :: OSI Approved', # TODO
+    'License :: OSI Approved',
     'Programming Language :: C',
     'Programming Language :: Python',
     'Topic :: Software Development',