Setup CI services, packaging (#7)

* Setup CI services, packaging * Fixups * Fix warnings, cleanup * BUG: fix scale assignment * Added tox
dask · Sep 26, 2017 · 8e2600d · 8e2600d
1 parent 9e85ba2
commit 8e2600d
Show file tree

Hide file tree

Showing 18 changed files with 175 additions and 78 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -0,0 +1,18 @@
+sudo: required
+services:
+  - docker
+language: python
+env:
+  matrix:
+    - PYTHON=3.6
+    - PYTHON=3.5
+    - PYTHON=2.7
+before_install:
+  - export PATH="$HOME/miniconda3/bin:$PATH"
+install:
+  - ci/install-travis.sh
+script:
+  - echo "script start"
+  - source activate test-environment
+  - pytest tests
+  - flake8
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -0,0 +1,4 @@
+include MANIFEST.in
+include LICENSE.md
+include README.rst
+include setup.py
diff --git a/README.rst b/README.rst
@@ -1,7 +1,7 @@
 dask-ml
 =======
 
-``dask-ml`` is a library for distributed and machine learning using `dask`_.
+``dask-ml`` is a library for distributed and parallel machine learning using `dask`_.
 See the `documentation`_ for more.
 
 .. _dask: http://dask.pydata.org

diff --git a/ci/install-travis.sh b/ci/install-travis.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+echo "[install-travis]"
+
+# install iniconda
+MINICONDA_DIR="$HOME/miniconda3"
+time wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh || exit 1
+time bash miniconda.sh -b -p "$MINICONDA_DIR" || exit 1
+
+echo
+echo "[show conda]"
+which conda
+
+echo
+echo "[update conda]"
+conda config --set always_yes true --set changeps1 false || exit 1
+conda update -q conda
+
+echo
+echo "[conda build]"
+conda install conda-build anaconda-client --yes
+
+echo
+echo "[add channels]"
+conda config --add channels conda-forge || exit 1
+
+conda create -q -n test-environment python=${PYTHON}
+source activate test-environment
+
+conda install -q \
+      coverage \
+      flake8 \
+      pytest \
+      pytest-cov \
+      pytest-mock \
+      mock \
+      numpy \
+      pandas \
+      scikit-learn \
+
+# development dask, distributed
+pip install git+https://github.com/dask/dask
+pip install git+https://github.com/dask/distributed
+
+pip install -e .
+conda list test-environment
+exit 0
diff --git a/ci/requirements_dev.txt b/ci/requirements_dev.txt
@@ -0,0 +1,12 @@
+coverage
+flake8
+pytest
+pytest-cov
+pytest-mock
+mock
+numpy
+pandas
+scikit-learn
+scipy
+git+https://github.com/dask/dask
+git+https://github.com/dask/distributed
diff --git a/daskml/base.py b/daskml/base.py
@@ -3,7 +3,7 @@
 from dask.array import learn
 
 
-class _BigPartialFitMixin:
+class _BigPartialFitMixin(object):
 
     _init_kwargs = []
     _fit_kwargs = []
@@ -17,7 +17,7 @@ def __init__(self, **kwargs):
             )
         for kwarg in self._init_kwargs:
             setattr(self, kwarg, kwargs.pop(kwarg))
-        super().__init__(**kwargs)
+        super(_BigPartialFitMixin, self).__init__(**kwargs)
 
     @classmethod
     def _get_param_names(cls):
@@ -39,7 +39,7 @@ def fit(self, X, y=None, get=None):
             get = dask.threaded.get
 
         fit_kwargs = {k: getattr(self, k) for k in self._fit_kwargs}
-        result = learn.fit(self, X, y, **fit_kwargs, get=get)
+        result = learn.fit(self, X, y, get=get, **fit_kwargs)
 
         # Copy the learned attributes over to self
         # It should go without saying that this is *not* threadsafe
@@ -49,14 +49,14 @@ def fit(self, X, y=None, get=None):
         return self
 
     def predict(self, X, dtype=None):
-        predict = super().predict
+        predict = super(_BigPartialFitMixin, self).predict
         if dtype is None:
             dtype = self._get_predict_dtype(X)
         return X.map_blocks(predict, dtype=dtype, drop_axis=1)
 
     def _get_predict_dtype(self, X):
         xx = np.zeros((1, X.shape[1]), dtype=X.dtype)
-        return super().predict(xx).dtype
+        return super(_BigPartialFitMixin, self).predict(xx).dtype
 
 
 __all__ = [

diff --git a/daskml/ensemble.py b/daskml/ensemble.py
diff --git a/daskml/preprocessing/data.py b/daskml/preprocessing/data.py
@@ -1,6 +1,6 @@
 from collections import OrderedDict
 
-from dask import compute, persist
+from dask import persist
 import dask.array as da
 from sklearn.preprocessing import data as skdata
 
@@ -17,7 +17,8 @@ def fit(self, X, y=None):
             to_persist['mean_'] = mean_
         if self.with_std:
             var_ = X.var(0)
-            scale_ = var_[var_ == 0] = 1
+            scale_ = var_.copy()
+            scale_[scale_ == 0] = 1
             scale_ = da.sqrt(scale_)
             to_persist['scale_'] = scale_
             to_persist['var_'] = var_

diff --git a/daskml/util.py b/daskml/util.py
diff --git a/flit.ini b/flit.ini
diff --git a/setup.cfg b/setup.cfg
@@ -0,0 +1,8 @@
+[bdist_wheel]
+universal=1
+
+[flake8]
+exclude = tests/data,docs,benchmarks,scripts
+
+[tool:pytest]
+addopts = -rsx -v
diff --git a/setup.py b/setup.py
@@ -0,0 +1,59 @@
+import sys
+import os
+from codecs import open
+
+from setuptools import setup, find_packages
+
+here = os.path.abspath(os.path.dirname(__file__))
+
+# Get the long description from the README file
+with open(os.path.join(here, 'README.rst'), encoding='utf-8') as f:
+    long_description = f.read()
+
+install_requires = ['dask', 'distributed', 'numpy', 'pandas', 'scikit-learn']
+
+# Optional Requirements
+
+
+doc_requires = ['sphinx', 'numpydoc', 'sphinx-rtd-theme']
+test_requires = ['coverage', 'pytest', 'pytest-mock']
+dev_requires = doc_requires + test_requires
+
+if sys.version_info.major == 2:
+    test_requires.append("mock")
+
+
+extra_requires = {
+    'docs': doc_requires,
+    'test': test_requires,
+    'dev': dev_requires,
+}
+
+setup(
+    name='daskml',
+    description='A library for distributed and parallel machine learning',
+    long_description=long_description,
+    url='https://github.com/dask/dask-ml',
+
+    author='Tom Augspurger',
+    author_email='taugspurger@anaconda.com',
+    license='BSD',
+    classifiers=[
+        'Development Status :: 5 - Production/Stable',
+        'Intended Audience :: Developers',
+        'Topic :: Database',
+        'Topic :: Scientific/Engineering',
+        'License :: OSI Approved :: BSD License',
+        'Programming Language :: Python :: 2',
+        'Programming Language :: Python :: 2.7',
+        'Programming Language :: Python :: 3',
+        'Programming Language :: Python :: 3.4',
+        'Programming Language :: Python :: 3.5',
+        'Programming Language :: Python :: 3.6',
+    ],
+    packages=find_packages(exclude=['docs', 'tests']),
+    use_scm_version=True,
+    setup_requires=['setuptools_scm'],
+    install_requires=install_requires,
+    extras_require=extra_requires,
+)
diff --git a/tests/linear_model/test_neural_network.py b/tests/linear_model/test_neural_network.py
@@ -4,7 +4,7 @@
 from dask.array.utils import assert_eq
 
 
-class TestMLPClassifier:
+class TestMLPClassifier(object):
 
     def test_basic(self, single_chunk_classification):
         X, y = single_chunk_classification
@@ -16,7 +16,7 @@ def test_basic(self, single_chunk_classification):
             assert_eq(a_, b_)
 
 
-class TestMLPRegressor:
+class TestMLPRegressor(object):
 
     def test_basic(self, single_chunk_classification):
         X, y = single_chunk_classification

diff --git a/tests/linear_model/test_passive_aggressive.py b/tests/linear_model/test_passive_aggressive.py
@@ -4,18 +4,20 @@
 from dask.array.utils import assert_eq
 
 
-class TestPassiveAggressiveClassifier:
+class TestPassiveAggressiveClassifier(object):
 
     def test_basic(self, single_chunk_classification):
         X, y = single_chunk_classification
-        a = lm.BigPassiveAggressiveClassifier(classes=[0, 1], random_state=0)
-        b = lm_.PassiveAggressiveClassifier(random_state=0)
+        a = lm.BigPassiveAggressiveClassifier(classes=[0, 1], random_state=0,
+                                              max_iter=100, tol=1e-3)
+        b = lm_.PassiveAggressiveClassifier(random_state=0, max_iter=100,
+                                            tol=1e-3)
         a.fit(X, y)
         b.partial_fit(X, y, classes=[0, 1])
         assert_eq(a.coef_, b.coef_)
 
 
-class TestPassiveAggressiveRegressor:
+class TestPassiveAggressiveRegressor(object):
 
     def test_basic(self, single_chunk_regression):
         X, y = single_chunk_regression

diff --git a/tests/linear_model/test_perceptron.py b/tests/linear_model/test_perceptron.py
@@ -4,7 +4,7 @@
 from dask.array.utils import assert_eq
 
 
-class TestPerceptron:
+class TestPerceptron(object):
 
     def test_basic(self, single_chunk_classification):
         X, y = single_chunk_classification

diff --git a/tests/linear_model/test_stochastic_gradient.py b/tests/linear_model/test_stochastic_gradient.py
@@ -4,7 +4,7 @@
 import numpy.testing as npt
 
 
-class TestStochasticGradientClassifier:
+class TestStochasticGradientClassifier(object):
 
     def test_basic(self, single_chunk_classification):
         X, y = single_chunk_classification
@@ -18,7 +18,7 @@ def test_basic(self, single_chunk_classification):
         npt.assert_almost_equal(a.coef_, b.coef_)
 
 
-class TestStochasticGradientRegressor:
+class TestStochasticGradientRegressor(object):
 
     def test_basic(self, single_chunk_regression):
         X, y = single_chunk_regression

diff --git a/tests/test_naive_bayes.py b/tests/test_naive_bayes.py
@@ -24,7 +24,7 @@ def test_smoke():
     assert_eq(a.predict_log_proba(X).compute(), b.predict_log_proba(X_))
 
 
-class TestBigMultinomialNB:
+class TestBigMultinomialNB(object):
     def test_basic(self, single_chunk_count_classification):
         X, y = single_chunk_count_classification
         a = nb.BigMultinomialNB(classes=[0, 1])
@@ -34,7 +34,7 @@ def test_basic(self, single_chunk_count_classification):
         assert_eq(a.coef_, b.coef_)
 
 
-class TestBigBernoulliNB:
+class TestBigBernoulliNB(object):
     def test_basic(self, single_chunk_binary_classification):
         X, y = single_chunk_binary_classification
         a = nb.BigBernoulliNB(classes=[0, 1])

diff --git a/tox.ini b/tox.ini
@@ -0,0 +1,5 @@
+[tox]
+envlist=py27,py36
+[testenv]
+deps=-rci/requirements_dev.txt
+commands=pytest