Skip to content

Commit

Permalink
Add missing examples (#24)
Browse files Browse the repository at this point in the history
* Add example for Count-Min Sketch

* Add example for Count Sketch. Thank you @victox5 

* Add example for HyperLogLog

* Add example for Probabilistic Counter
  • Loading branch information
gakhov committed Oct 3, 2019
1 parent 2d490fa commit 836a67f
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 0 deletions.
28 changes: 28 additions & 0 deletions examples/cardinality/hyperloglog.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
"""Example how to use HyperLogLog."""

from pdsa.cardinality.hyperloglog import HyperLogLog


LOREM_IPSUM = (
"Lorem ipsum dolor sit amet, consectetur adipiscing elit."
" Mauris consequat leo ut vehicula placerat. In lacinia, nisl"
" id maximus auctor, sem elit interdum urna, at efficitur tellus"
" turpis at quam. Pellentesque eget iaculis turpis. Nam ac ligula"
" ut nunc porttitor pharetra in non lorem. In purus metus,"
" sollicitudin tristique sapien."
)

if __name__ == '__main__':
hll = HyperLogLog(10)

print(hll)
print("HLL counter uses {} bytes in the memory".format(hll.sizeof()))

print("Counter contains approx. {} unique elements".format(hll.count()))

words = set(LOREM_IPSUM.split())
for word in words:
hll.add(word.strip(" .,"))

print("Added {} words, in the counter approx. {} unique elements".format(
len(words), hll.count()))
28 changes: 28 additions & 0 deletions examples/cardinality/probabilistic_counter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
"""Example how to use ProbabilisticCounter."""

from pdsa.cardinality.probabilistic_counter import ProbabilisticCounter


LOREM_IPSUM = (
"Lorem ipsum dolor sit amet, consectetur adipiscing elit."
" Mauris consequat leo ut vehicula placerat. In lacinia, nisl"
" id maximus auctor, sem elit interdum urna, at efficitur tellus"
" turpis at quam. Pellentesque eget iaculis turpis. Nam ac ligula"
" ut nunc porttitor pharetra in non lorem. In purus metus,"
" sollicitudin tristique sapien."
)

if __name__ == '__main__':
pc = ProbabilisticCounter(2048, with_small_cardinality_correction=True)

print(pc)
print("PC counter uses {} bytes in the memory".format(pc.sizeof()))

print("Counter contains approx. {} unique elements".format(pc.count()))

words = set(LOREM_IPSUM.split())
for word in words:
pc.add(word.strip(" .,"))

print("Added {} words, in the counter approx. {} unique elements".format(
len(words), pc.count()))
26 changes: 26 additions & 0 deletions examples/frequency/count_min_sketch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
"""Example how to use Count-Min Sketch."""

from pdsa.frequency.count_min_sketch import CountMinSketch


DATASET = [
30, 19, 4, 29, 9, 9, 2, 26, 12, 13, 27, 18, 3, 20, 13, 17, 24, 24, 9, 28,
20, 30, 10, 5, 8, 2, 6, 28, 20, 17, 26, 23, 25, 26, 1, 30, 28, 20, 7, 26,
14, 3, 21, 2, 23, 22, 4, 15, 27, 9, 19, 29, 25, 27, 25, 28, 2, 27, 29, 16,
9, 23, 3, 30, 1, 1, 26, 6, 4, 27, 12, 13, 3, 28, 27, 10, 9, 10, 2, 22, 6,
8, 5, 30, 21, 9, 29, 6, 5, 2, 3, 1, 16, 17, 15, 5, 3, 6, 9, 12,
]

if __name__ == '__main__':
cms = CountMinSketch(4, 100)

print(cms)
print("CMS uses {} bytes in the memory".format(cms.sizeof()))

for digit in DATASET:
cms.add(digit)

for digit in sorted(set(DATASET)):
print("Element: {}. Freq.: {}, Est. Freq.: {}".format(
digit, DATASET.count(digit), cms.frequency(digit)
))

0 comments on commit 836a67f

Please sign in to comment.