Skip to content

Commit

Permalink
Merge pull request #40 from gmrukwa/develop
Browse files Browse the repository at this point in the history
Release v2.3.18
  • Loading branch information
gmrukwa committed Jan 13, 2020
2 parents 3586b28 + 171e157 commit 0a06fb1
Show file tree
Hide file tree
Showing 7 changed files with 40 additions and 9 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ on:
env:
MAJOR: ${{ 2 }}
MINOR: ${{ 3 }}
FIXUP: ${{ 17 }}
FIXUP: ${{ 18 }}
PACKAGE_INIT_FILE: ${{ 'divik/__init__.py' }}
DOCKER_REPO: ${{ 'gmrukwa/divik' }}
IS_ALPHA: ${{ github.event_name == 'pull_request' }}
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ docker pull gmrukwa/divik
To install specific version, you can specify it in the command, e.g.:

```bash
docker pull gmrukwa/divik:2.3.17
docker pull gmrukwa/divik:2.3.18
```

## Python package
Expand All @@ -60,7 +60,7 @@ pip install divik
or any stable tagged version, e.g.:

```bash
pip install divik==2.3.17
pip install divik==2.3.18
```

If you want to have compatibility with
Expand Down
2 changes: 1 addition & 1 deletion divik/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = '2.3.17'
__version__ = '2.3.18'

from divik import core
from divik import feature_selection
Expand Down
9 changes: 6 additions & 3 deletions divik/score/_sampled_gap.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@
_BIG_PRIME = 40013


def _pool_initialize(initializers, *args):
for arg, sampler in zip(args, initializers):
sampler.initializer(*arg)


@seeded(wrapped_requires_seed=True)
def sampled_gap(data: Data, kmeans: KMeans,
sample_size: Union[int, float] = 1000,
Expand All @@ -28,9 +33,7 @@ def sampled_gap(data: Data, kmeans: KMeans,
kmeans_ = clone(kmeans)
seeds = list(seed + np.arange(n_trials) * _BIG_PRIME)
with data_.parallel() as d, reference_.parallel() as r:
def initializer(*args):
for arg, sampler in zip(args, [d, r]):
sampler.initializer(*arg)
initializer = partial(_pool_initialize, [d, r])
with maybe_pool(n_jobs, initializer=initializer,
initargs=(d.initargs, r.initargs)) as pool:
compute_disp = partial(_dispersion, sampler=r, kmeans=kmeans_)
Expand Down
4 changes: 2 additions & 2 deletions docs/instructions/installation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ To install latest stable version use::

To install specific version, you can specify it in the command, e.g.::

docker pull gmrukwa/divik:2.3.17
docker pull gmrukwa/divik:2.3.18

Python package
--------------
Expand All @@ -31,7 +31,7 @@ package::

or any stable tagged version, e.g.::

pip install divik==2.3.17
pip install divik==2.3.18

If you want to have compatibility with
`gin-config <https://github.com/google/gin-config>`_, you can install
Expand Down
14 changes: 14 additions & 0 deletions test/cluster/divik/test_dunn_sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,20 @@ def test_works_with_pool(self):
npt.assert_array_equal(expected, labels1)
npt.assert_array_equal(expected, labels2)

def test_works_with_pool_and_sampling(self):
X, _ = make_blobs(n_samples=1000, n_features=100, centers=5,
random_state=42)
sequential = DunnDiviK(distance='euclidean', distance_percentile=.9,
max_iter=10, sample_size=50, rejection_size=5)
parallel = DunnDiviK(distance='euclidean', distance_percentile=.9,
n_jobs=-1, max_iter=10, sample_size=50,
rejection_size=5)
expected = sequential.fit_predict(X)
labels1 = parallel.fit_predict(X)
labels2 = parallel.predict(X)
npt.assert_array_equal(expected, labels1)
npt.assert_array_equal(expected, labels2)

def test_has_decent_performance_on_numerous_clusters(self):
X, y = make_blobs(n_samples=200, n_features=100, centers=20,
random_state=42)
Expand Down
14 changes: 14 additions & 0 deletions test/cluster/divik/test_gap_sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,20 @@ def test_works_with_pool(self):
npt.assert_array_equal(expected, labels1)
npt.assert_array_equal(expected, labels2)

def test_works_with_pool_and_sampling(self):
X, _ = make_blobs(n_samples=1000, n_features=100, centers=5,
random_state=42)
sequential = DiviK(distance='euclidean', distance_percentile=.9,
max_iter=10, sample_size=50, rejection_size=5)
parallel = DiviK(distance='euclidean', distance_percentile=.9,
n_jobs=-1, max_iter=10, sample_size=50,
rejection_size=5)
expected = sequential.fit_predict(X)
labels1 = parallel.fit_predict(X)
labels2 = parallel.predict(X)
npt.assert_array_equal(expected, labels1)
npt.assert_array_equal(expected, labels2)

def test_transforms_to_n_clusters_dimensions(self):
X, _ = make_blobs(n_samples=600, n_features=100, centers=20,
random_state=42)
Expand Down

0 comments on commit 0a06fb1

Please sign in to comment.