Skip to content

Commit

Permalink
* Add a basic test suite.
Browse files Browse the repository at this point in the history
* Fix empty clusters before you add any items
* Fix mass=1, start with mass=0.

Fixes #1
  • Loading branch information
craigds committed Aug 25, 2014
1 parent ee573e2 commit 8471beb
Show file tree
Hide file tree
Showing 5 changed files with 94 additions and 12 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
*.pyc
build/
pygvm.egg-info
.tox
dist
MANIFEST
37 changes: 26 additions & 11 deletions pygvm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import sys
from array import array

VERSION = (0, 3, 'x')
VERSION = (0, 3, 'a')


MAX_FLOAT = sys.float_info.max
Expand All @@ -18,7 +18,7 @@
class Cluster(object):
def __init__(self, clusters):
self.removed = False
self.mass = 1.0
self.mass = 0.0

# mass-weighted coordinate sum
self.m1 = [0] * clusters.dimension
Expand All @@ -32,9 +32,9 @@ def __init__(self, clusters):
@property
def center(self):
if self.mass:
return [coord / self.mass for coord in self.m1]
return tuple([coord / self.mass for coord in self.m1])
else:
return self.m1[:]
return None

def __len__(self):
return len(self.members)
Expand Down Expand Up @@ -274,9 +274,6 @@ def __init__(self, dimension, capacity, cluster_factory=Cluster):
self.clusters = []
self.pairs = ClusterPairs()
self.cluster_factory = cluster_factory
for i in range(capacity):
self.clusters.append(self.cluster_factory(self))
self._add_pairs()

def clear(self):
self.clusters = []
Expand All @@ -292,6 +289,17 @@ def add_bulk(self, items, step=1000):
This method can be much faster than calling add() repeatedly.
"""
allow_merge = True
if len(self.clusters) < self.capacity:
# create some clusters
num_to_add = min(len(items), self.capacity - len(self.clusters))
for i in range(num_to_add):
self.clusters.append(self.cluster_factory(self))

allow_merge = len(self.clusters) == self.capacity
if allow_merge:
# finished adding clusters, so we can populate pairs.
self._add_pairs()

# doing individual add/append for each key added is slow,
# so we delay it until a bunch of things have been added instead.
Expand All @@ -301,6 +309,7 @@ def _add_members():
for c, new_members in cluster_keys.items():
c.add_members(new_members)
del cluster_keys[c]

try:
for i, (mass, coords, key) in enumerate(items):
if mass == 0:
Expand All @@ -313,16 +322,22 @@ def _add_members():
break
else:
c = None
if c:

if c is not None:
# a cluster is empty. add to that cluster and continue
c.add(mass, coords, None)
if key is not None:
cluster_keys[c].append(key)
self._update_pairs(c)
continue

#identify cheapest merge
merge_pair = self.pairs.peek()
merge_t = merge_pair and merge_pair.value or MAX_FLOAT
if allow_merge:
# identify cheapest merge
merge_pair = self.pairs.peek()
merge_t = merge_pair.value if merge_pair else MAX_FLOAT
else:
# never merge if we still haven't assigned at least one point per cluster.
merge_t = MAX_FLOAT

# find cheapest addition
addition_c = None
Expand Down
40 changes: 40 additions & 0 deletions pygvm/tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import unittest2
from pygvm import Clusters

class TestClusters(unittest2.TestCase):
def test_empty_clusters(self):
cs = Clusters(dimension=2, capacity=2)

self.assertEqual(len(cs.clusters), 0)

def test_first_point(self):
cs = Clusters(dimension=2, capacity=2)

cs.add(1, (5, 5), 'bert')
self.assertEqual(len(cs.clusters), 1)
self.assertEqual(len(cs.clusters[0]), 1)

self.assertEqual(cs.clusters[0].mass, 1)
self.assertEqual(cs.clusters[0].center, (5, 5))

def test_same_point_multiple_times(self):
# NOTE: this is stupid. If you seed both clusters with the same
# point, your clustering will basically just be random.
cs = Clusters(dimension=2, capacity=2)

cs.add(1, (5, 5), 'tom')
self.assertEqual(len(cs.clusters), 1)
self.assertEqual(len(cs.clusters[0]), 1)

cs.add(1, (5, 5), 'dick')
self.assertEqual(len(cs.clusters), 2)
self.assertEqual(len(cs.clusters[0]), 1)
self.assertEqual(len(cs.clusters[1]), 1)

cs.add(1, (5, 5), 'harry')
self.assertEqual(len(cs.clusters), 2)
self.assertEqual(len(cs.clusters[0]), 2)
self.assertEqual(len(cs.clusters[1]), 1)

self.assertEqual(cs.clusters[0].mass, 2)
self.assertEqual(cs.clusters[1].mass, 1)
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
author='Craig de Stigter',
author_email='craig.ds@gmail.com',
url='http://github.com/craigds/pygvm',
packages=['pygvm', 'pygvm.libs'],
packages=['pygvm'],
classifiers = [
"Development Status :: 4 - Beta",
"Environment :: Web Environment",
Expand Down
24 changes: 24 additions & 0 deletions tox.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
[tox]
envlist=
py26,
py27,
py33,
py34


[testenv]
deps =
unittest2
commands = unit2.py pygvm.tests {posargs}

[testenv:py26]
basepython=python2.6

[testenv:py27]
basepython=python2.7

[testenv:py33]
basepython=python3.3

[testenv:py34]
basepython=python3.4

0 comments on commit 8471beb

Please sign in to comment.