Skip to content

Commit

Permalink
Use property itemsize to estimate size of elems in arrays (#19)
Browse files Browse the repository at this point in the history
* Use property `.itemsize` to estimate the size of elements in arrays
* Fix size-tests to be platform-independent
  • Loading branch information
gakhov committed Oct 2, 2019
1 parent e72a20e commit a217f47
Show file tree
Hide file tree
Showing 6 changed files with 29 additions and 6 deletions.
3 changes: 1 addition & 2 deletions pdsa/cardinality/hyperloglog.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -150,8 +150,7 @@ cdef class HyperLogLog:
Number of bytes allocated for the counter.
"""
cdef uint8_t bytes_per_counter = 4 # 'L' unsigned long takes 4 bytes
return bytes_per_counter * self.num_of_counters
return self.num_of_counters * self._counter.itemsize

def __repr__(self):
return "<HyperLogLog (length: {}, precision: {})>".format(
Expand Down
2 changes: 1 addition & 1 deletion pdsa/frequency/count_min_sketch.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ cdef class CountMinSketch:
Number of bytes allocated for the sketch.
"""
return self._length * sizeof(uint32_t)
return self._length * self._counter.itemsize

def __repr__(self):
return "<CountMinSketch ({} x {})>".format(
Expand Down
2 changes: 1 addition & 1 deletion pdsa/frequency/count_sketch.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ cdef class CountSketch:
Number of bytes allocated for the sketch.
"""
return self._length * sizeof(int32_t)
return self._length * self._counter.itemsize

def __repr__(self):
return "<CountSketch ({} x {})>".format(
Expand Down
10 changes: 9 additions & 1 deletion tests/cardinality/test_hyperloglog.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@

import array
import pytest

from math import sqrt
Expand All @@ -6,14 +8,20 @@

def test_init():
hll = HyperLogLog(10)
assert hll.sizeof() == 4096, "Unexpected size in bytes"

with pytest.raises(ValueError) as excinfo:
hll = HyperLogLog(2)
assert str(excinfo.value) == (
"Precision has to be in range 4...16")


def test_size():
hll = HyperLogLog(10)

element_size = array.array('L', [1]).itemsize
assert hll.sizeof() == element_size * len(hll), "Unexpected size in bytes"


def test_repr():
hll = HyperLogLog(6)

Expand Down
10 changes: 9 additions & 1 deletion tests/frequency/test_count_min_sketch.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@

import array
import pytest

from pdsa.frequency.count_min_sketch import CountMinSketch


def test_init():
cms = CountMinSketch(2, 4)
assert cms.sizeof() == 32, 'Unexpected size in bytes'

with pytest.raises(ValueError) as excinfo:
cms = CountMinSketch(0, 5)
Expand All @@ -18,6 +19,13 @@ def test_init():
)


def test_size():
cms = CountMinSketch(2, 4)

element_size = array.array('I', [1]).itemsize
assert cms.sizeof() == element_size * len(cms), "Unexpected size in bytes"


def test_create_from_expected_error():
cms = CountMinSketch.create_from_expected_error(0.000001, 0.01)
assert repr(cms) == "<CountMinSketch (5 x 2718282)>"
Expand Down
8 changes: 8 additions & 0 deletions tests/frequency/test_count_sketch.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import array
import pytest

from pdsa.frequency.count_sketch import CountSketch
Expand All @@ -18,6 +19,13 @@ def test_init():
)


def test_size():
cs = CountSketch(2, 4)

element_size = array.array('i', [1]).itemsize
assert cs.sizeof() == element_size * len(cs), "Unexpected size in bytes"


def test_create_from_expected_error():
cs = CountSketch.create_from_expected_error(0.0001, 0.01)
assert repr(cs) == "<CountSketch (5 x 271828209)>"
Expand Down

0 comments on commit a217f47

Please sign in to comment.