Skip to content

Commit

Permalink
Merge pull request #24 from sdsykes/master
Browse files Browse the repository at this point in the history
Make sure toNormal is eventually done even if Count is called often
  • Loading branch information
clarkduvall committed Nov 27, 2017
2 parents c8a7a0b + a0ad9b7 commit a0107a5
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 16 deletions.
32 changes: 20 additions & 12 deletions hyperloglogplus.go
Expand Up @@ -55,6 +55,7 @@ func (h *HyperLogLogPlus) decodeHash(k uint32) (uint32, uint8) {
}

// Merge tmpSet and sparseList in the sparse representation.
// Converts to normal if the sparse list is too large
func (h *HyperLogLogPlus) mergeSparse() {
keys := make(sortableSlice, 0, len(h.tmpSet))
for k := range h.tmpSet {
Expand Down Expand Up @@ -89,6 +90,17 @@ func (h *HyperLogLogPlus) mergeSparse() {

h.sparseList = newList
h.tmpSet = set{}

if uint32(h.sparseList.Len()) > h.m {
h.toNormal()
}
}

func (h *HyperLogLogPlus) mergeSparseAndToNormal() {
h.mergeSparse()
if h.sparse {
h.toNormal()
}
}

// NewPlus returns a new initialized HyperLogLogPlus that uses the HyperLogLog++
Expand Down Expand Up @@ -118,10 +130,6 @@ func (h *HyperLogLogPlus) Clear() {
// Converts HyperLogLogPlus h to the normal representation from the sparse
// representation.
func (h *HyperLogLogPlus) toNormal() {
if len(h.tmpSet) > 0 {
h.mergeSparse()
}

h.reg = make([]uint8, h.m)
for iter := h.sparseList.Iter(); iter.HasNext(); {
i, r := h.decodeHash(iter.Next())
Expand All @@ -140,7 +148,7 @@ func (h *HyperLogLogPlus) Add(item Hash64) {
x := item.Sum64()
if h.sparse {
h.tmpSet.Add(h.encodeHash(x))
h.maybeToNormal()
h.maybeMerge()
} else {
i := eb64(x, 64, 64-h.p) // {x63,...,x64-p}
w := x<<h.p | 1<<(h.p-1) // {x63-p,...,x0}
Expand All @@ -165,12 +173,12 @@ func (h *HyperLogLogPlus) Merge(other *HyperLogLogPlus) error {
for iter := other.sparseList.Iter(); iter.HasNext(); {
h.tmpSet.Add(iter.Next())
}
h.maybeToNormal()
h.maybeMerge()
return nil
}

if h.sparse {
h.toNormal()
h.mergeSparseAndToNormal()
}

if other.sparse {
Expand All @@ -197,13 +205,10 @@ func (h *HyperLogLogPlus) Merge(other *HyperLogLogPlus) error {
return nil
}

// Converts to normal if the sparse list is too large.
func (h *HyperLogLogPlus) maybeToNormal() {
// Merges tmpSet if it exceeds the threshold
func (h *HyperLogLogPlus) maybeMerge() {
if uint32(len(h.tmpSet))*100 > h.m {
h.mergeSparse()
if uint32(h.sparseList.Len()) > h.m {
h.toNormal()
}
}
}

Expand Down Expand Up @@ -235,6 +240,9 @@ func (h *HyperLogLogPlus) estimateBias(est float64) float64 {
func (h *HyperLogLogPlus) Count() uint64 {
if h.sparse {
h.mergeSparse()
}

if h.sparse {
return uint64(linearCounting(mPrime, mPrime-uint32(h.sparseList.Count)))
}

Expand Down
28 changes: 24 additions & 4 deletions hyperloglogplus_test.go
Expand Up @@ -86,7 +86,7 @@ func TestHLLPPPrecisionNoSparse(t *testing.T) {
func TestHLLPPToNormal(t *testing.T) {
h, _ := NewPlus(16)
h.Add(fakeHash64(0x00010fffffffffff))
h.toNormal()
h.mergeSparseAndToNormal()
c := h.Count()
if c != 1 {
t.Error(c)
Expand All @@ -103,8 +103,7 @@ func TestHLLPPToNormal(t *testing.T) {
h.Add(fakeHash64(0x0003000000000001))
h.Add(fakeHash64(0xff03700000000000))
h.Add(fakeHash64(0xff03080000000000))
h.mergeSparse()
h.toNormal()
h.mergeSparseAndToNormal()

n := h.reg[1]
if n != 5 {
Expand Down Expand Up @@ -326,7 +325,7 @@ func TestHLLMergeMixedConvertToNormal(t *testing.T) {
h.Add(fakeHash64(0x00050fffffffffff))
h.Add(fakeHash64(0x00050fffffffffff))
// h is normal, h2 should be converted too.
h.toNormal()
h.mergeSparseAndToNormal()

h2, _ := NewPlus(16)
h2.Merge(h)
Expand Down Expand Up @@ -574,3 +573,24 @@ func TestHLLPPToNormalWhenSparseIsTooBig(t *testing.T) {
t.Error("h should be converted to normal")
}
}

func TestHLLPPToNormalWhenCountIsCalledOften(t *testing.T) {
h, _ := NewPlus(7)

for i := 0; i < 128; i++ {
h.Add(fakeHash64(i << 39))
h.Count()
}

h.Add(fakeHash64(1))

if !h.sparse {
t.Error("h should still be sparse")
}

h.Count()

if h.sparse {
t.Error("h should be converted to normal")
}
}

0 comments on commit a0107a5

Please sign in to comment.