Skip to content

Commit

Permalink
Setup CI services, packaging (#7)
Browse files Browse the repository at this point in the history
* Setup CI services, packaging

* Fixups

* Fix warnings, cleanup

* BUG: fix scale assignment

* Added tox
  • Loading branch information
TomAugspurger committed Sep 26, 2017
1 parent 9e85ba2 commit 8e2600d
Show file tree
Hide file tree
Showing 18 changed files with 175 additions and 78 deletions.
18 changes: 18 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
sudo: required
services:
- docker
language: python
env:
matrix:
- PYTHON=3.6
- PYTHON=3.5
- PYTHON=2.7
before_install:
- export PATH="$HOME/miniconda3/bin:$PATH"
install:
- ci/install-travis.sh
script:
- echo "script start"
- source activate test-environment
- pytest tests
- flake8
4 changes: 4 additions & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
include MANIFEST.in
include LICENSE.md
include README.rst
include setup.py
2 changes: 1 addition & 1 deletion README.rst
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
dask-ml
=======

``dask-ml`` is a library for distributed and machine learning using `dask`_.
``dask-ml`` is a library for distributed and parallel machine learning using `dask`_.
See the `documentation`_ for more.

.. _dask: http://dask.pydata.org
Expand Down
47 changes: 47 additions & 0 deletions ci/install-travis.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#!/bin/bash

echo "[install-travis]"

# install iniconda
MINICONDA_DIR="$HOME/miniconda3"
time wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh || exit 1
time bash miniconda.sh -b -p "$MINICONDA_DIR" || exit 1

echo
echo "[show conda]"
which conda

echo
echo "[update conda]"
conda config --set always_yes true --set changeps1 false || exit 1
conda update -q conda

echo
echo "[conda build]"
conda install conda-build anaconda-client --yes

echo
echo "[add channels]"
conda config --add channels conda-forge || exit 1

conda create -q -n test-environment python=${PYTHON}
source activate test-environment

conda install -q \
coverage \
flake8 \
pytest \
pytest-cov \
pytest-mock \
mock \
numpy \
pandas \
scikit-learn \

# development dask, distributed
pip install git+https://github.com/dask/dask
pip install git+https://github.com/dask/distributed

pip install -e .
conda list test-environment
exit 0
12 changes: 12 additions & 0 deletions ci/requirements_dev.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
coverage
flake8
pytest
pytest-cov
pytest-mock
mock
numpy
pandas
scikit-learn
scipy
git+https://github.com/dask/dask
git+https://github.com/dask/distributed
10 changes: 5 additions & 5 deletions daskml/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from dask.array import learn


class _BigPartialFitMixin:
class _BigPartialFitMixin(object):

_init_kwargs = []
_fit_kwargs = []
Expand All @@ -17,7 +17,7 @@ def __init__(self, **kwargs):
)
for kwarg in self._init_kwargs:
setattr(self, kwarg, kwargs.pop(kwarg))
super().__init__(**kwargs)
super(_BigPartialFitMixin, self).__init__(**kwargs)

@classmethod
def _get_param_names(cls):
Expand All @@ -39,7 +39,7 @@ def fit(self, X, y=None, get=None):
get = dask.threaded.get

fit_kwargs = {k: getattr(self, k) for k in self._fit_kwargs}
result = learn.fit(self, X, y, **fit_kwargs, get=get)
result = learn.fit(self, X, y, get=get, **fit_kwargs)

# Copy the learned attributes over to self
# It should go without saying that this is *not* threadsafe
Expand All @@ -49,14 +49,14 @@ def fit(self, X, y=None, get=None):
return self

def predict(self, X, dtype=None):
predict = super().predict
predict = super(_BigPartialFitMixin, self).predict
if dtype is None:
dtype = self._get_predict_dtype(X)
return X.map_blocks(predict, dtype=dtype, drop_axis=1)

def _get_predict_dtype(self, X):
xx = np.zeros((1, X.shape[1]), dtype=X.dtype)
return super().predict(xx).dtype
return super(_BigPartialFitMixin, self).predict(xx).dtype


__all__ = [
Expand Down
27 changes: 0 additions & 27 deletions daskml/ensemble.py

This file was deleted.

5 changes: 3 additions & 2 deletions daskml/preprocessing/data.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from collections import OrderedDict

from dask import compute, persist
from dask import persist
import dask.array as da
from sklearn.preprocessing import data as skdata

Expand All @@ -17,7 +17,8 @@ def fit(self, X, y=None):
to_persist['mean_'] = mean_
if self.with_std:
var_ = X.var(0)
scale_ = var_[var_ == 0] = 1
scale_ = var_.copy()
scale_[scale_ == 0] = 1
scale_ = da.sqrt(scale_)
to_persist['scale_'] = scale_
to_persist['var_'] = var_
Expand Down
27 changes: 0 additions & 27 deletions daskml/util.py

This file was deleted.

5 changes: 0 additions & 5 deletions flit.ini

This file was deleted.

8 changes: 8 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
[bdist_wheel]
universal=1

[flake8]
exclude = tests/data,docs,benchmarks,scripts

[tool:pytest]
addopts = -rsx -v
59 changes: 59 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import sys
import os
from codecs import open

from setuptools import setup, find_packages

here = os.path.abspath(os.path.dirname(__file__))

# Get the long description from the README file
with open(os.path.join(here, 'README.rst'), encoding='utf-8') as f:
long_description = f.read()

install_requires = ['dask', 'distributed', 'numpy', 'pandas', 'scikit-learn']

# Optional Requirements


doc_requires = ['sphinx', 'numpydoc', 'sphinx-rtd-theme']
test_requires = ['coverage', 'pytest', 'pytest-mock']
dev_requires = doc_requires + test_requires

if sys.version_info.major == 2:
test_requires.append("mock")


extra_requires = {
'docs': doc_requires,
'test': test_requires,
'dev': dev_requires,
}

setup(
name='daskml',
description='A library for distributed and parallel machine learning',
long_description=long_description,
url='https://github.com/dask/dask-ml',

author='Tom Augspurger',
author_email='taugspurger@anaconda.com',
license='BSD',
classifiers=[
'Development Status :: 5 - Production/Stable',
'Intended Audience :: Developers',
'Topic :: Database',
'Topic :: Scientific/Engineering',
'License :: OSI Approved :: BSD License',
'Programming Language :: Python :: 2',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.4',
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
],
packages=find_packages(exclude=['docs', 'tests']),
use_scm_version=True,
setup_requires=['setuptools_scm'],
install_requires=install_requires,
extras_require=extra_requires,
)
4 changes: 2 additions & 2 deletions tests/linear_model/test_neural_network.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from dask.array.utils import assert_eq


class TestMLPClassifier:
class TestMLPClassifier(object):

def test_basic(self, single_chunk_classification):
X, y = single_chunk_classification
Expand All @@ -16,7 +16,7 @@ def test_basic(self, single_chunk_classification):
assert_eq(a_, b_)


class TestMLPRegressor:
class TestMLPRegressor(object):

def test_basic(self, single_chunk_classification):
X, y = single_chunk_classification
Expand Down
10 changes: 6 additions & 4 deletions tests/linear_model/test_passive_aggressive.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,20 @@
from dask.array.utils import assert_eq


class TestPassiveAggressiveClassifier:
class TestPassiveAggressiveClassifier(object):

def test_basic(self, single_chunk_classification):
X, y = single_chunk_classification
a = lm.BigPassiveAggressiveClassifier(classes=[0, 1], random_state=0)
b = lm_.PassiveAggressiveClassifier(random_state=0)
a = lm.BigPassiveAggressiveClassifier(classes=[0, 1], random_state=0,
max_iter=100, tol=1e-3)
b = lm_.PassiveAggressiveClassifier(random_state=0, max_iter=100,
tol=1e-3)
a.fit(X, y)
b.partial_fit(X, y, classes=[0, 1])
assert_eq(a.coef_, b.coef_)


class TestPassiveAggressiveRegressor:
class TestPassiveAggressiveRegressor(object):

def test_basic(self, single_chunk_regression):
X, y = single_chunk_regression
Expand Down
2 changes: 1 addition & 1 deletion tests/linear_model/test_perceptron.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from dask.array.utils import assert_eq


class TestPerceptron:
class TestPerceptron(object):

def test_basic(self, single_chunk_classification):
X, y = single_chunk_classification
Expand Down
4 changes: 2 additions & 2 deletions tests/linear_model/test_stochastic_gradient.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import numpy.testing as npt


class TestStochasticGradientClassifier:
class TestStochasticGradientClassifier(object):

def test_basic(self, single_chunk_classification):
X, y = single_chunk_classification
Expand All @@ -18,7 +18,7 @@ def test_basic(self, single_chunk_classification):
npt.assert_almost_equal(a.coef_, b.coef_)


class TestStochasticGradientRegressor:
class TestStochasticGradientRegressor(object):

def test_basic(self, single_chunk_regression):
X, y = single_chunk_regression
Expand Down
4 changes: 2 additions & 2 deletions tests/test_naive_bayes.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def test_smoke():
assert_eq(a.predict_log_proba(X).compute(), b.predict_log_proba(X_))


class TestBigMultinomialNB:
class TestBigMultinomialNB(object):
def test_basic(self, single_chunk_count_classification):
X, y = single_chunk_count_classification
a = nb.BigMultinomialNB(classes=[0, 1])
Expand All @@ -34,7 +34,7 @@ def test_basic(self, single_chunk_count_classification):
assert_eq(a.coef_, b.coef_)


class TestBigBernoulliNB:
class TestBigBernoulliNB(object):
def test_basic(self, single_chunk_binary_classification):
X, y = single_chunk_binary_classification
a = nb.BigBernoulliNB(classes=[0, 1])
Expand Down
5 changes: 5 additions & 0 deletions tox.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[tox]
envlist=py27,py36
[testenv]
deps=-rci/requirements_dev.txt
commands=pytest

0 comments on commit 8e2600d

Please sign in to comment.