Skip to content

Commit

Permalink
Merge pull request #17 from ianwilkes/master
Browse files Browse the repository at this point in the history
retain sparse representation during Merge
  • Loading branch information
clarkduvall committed May 29, 2017
2 parents 97d5a14 + 73f64ef commit 6eb69df
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 12 deletions.
32 changes: 22 additions & 10 deletions hyperloglogplus.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,13 +140,7 @@ func (h *HyperLogLogPlus) Add(item Hash64) {
x := item.Sum64()
if h.sparse {
h.tmpSet.Add(h.encodeHash(x))

if uint32(len(h.tmpSet))*100 > h.m {
h.mergeSparse()
if uint32(h.sparseList.Len()) > h.m {
h.toNormal()
}
}
h.toNormalIfSparseTooBig()
} else {
i := eb64(x, 64, 64-h.p) // {x63,...,x64-p}
w := x<<h.p | 1<<(h.p-1) // {x63-p,...,x0}
Expand All @@ -159,13 +153,22 @@ func (h *HyperLogLogPlus) Add(item Hash64) {
}

// Merge takes another HyperLogLogPlus and combines it with HyperLogLogPlus h.
// If HyperLogLogPlus h is using the sparse representation, it will be converted
// to the normal representation.
func (h *HyperLogLogPlus) Merge(other *HyperLogLogPlus) error {
if h.p != other.p {
return errors.New("precisions must be equal")
}

if h.sparse && other.sparse {
for k := range other.tmpSet {
h.tmpSet.Add(k)
}
for iter := other.sparseList.Iter(); iter.HasNext(); {
h.tmpSet.Add(iter.Next())
}
h.toNormalIfSparseTooBig()
return nil
}

if h.sparse {
h.toNormal()
}
Expand Down Expand Up @@ -194,6 +197,15 @@ func (h *HyperLogLogPlus) Merge(other *HyperLogLogPlus) error {
return nil
}

func (h *HyperLogLogPlus) toNormalIfSparseTooBig() {
if uint32(len(h.tmpSet))*100 > h.m {
h.mergeSparse()
if uint32(h.sparseList.Len()) > h.m {
h.toNormal()
}
}
}

// Estimates the bias using empirically determined values.
func (h *HyperLogLogPlus) estimateBias(est float64) float64 {
estTable, biasTable := rawEstimateData[h.p-4], biasData[h.p-4]
Expand Down Expand Up @@ -291,7 +303,7 @@ func (h *HyperLogLogPlus) GobDecode(b []byte) error {
if err := dec.Decode(&h.tmpSet); err != nil {
return err
}
h.sparseList = &compressedList{}
h.sparseList = newCompressedList(int(h.m))
if err := dec.Decode(&h.sparseList.Count); err != nil {
return err
}
Expand Down
49 changes: 47 additions & 2 deletions hyperloglogplus_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -178,8 +178,8 @@ func TestHLLMergeSparse(t *testing.T) {
t.Error(n)
}

if h2.sparse {
t.Error("Merge should convert to normal")
if !h2.sparse {
t.Error("Merge should not convert to normal")
}

if !h.sparse {
Expand Down Expand Up @@ -252,6 +252,51 @@ func TestHLLMergeNormal(t *testing.T) {
}
}

func TestHLLMergeMixed(t *testing.T) {
h, _ := NewPlus(16)
h.Add(fakeHash64(0x00010fffffffffff))
h.Add(fakeHash64(0x00020fffffffffff))
h.Add(fakeHash64(0x00030fffffffffff))
h.Add(fakeHash64(0x00040fffffffffff))
h.Add(fakeHash64(0x00050fffffffffff))
h.Add(fakeHash64(0x00050fffffffffff))

h2, _ := NewPlus(16)
h2.toNormal()
h2.Merge(h)
n := h2.Count()
if n != 5 {
t.Error(n)
}

if !h.sparse {
t.Error("Merge should not modify argument")
}

h2.Merge(h)
n = h2.Count()
if n != 5 {
t.Error(n)
}

h.Add(fakeHash64(0x00060fffffffffff))
h.Add(fakeHash64(0x00070fffffffffff))
h.Add(fakeHash64(0x00080fffffffffff))
h.Add(fakeHash64(0x00090fffffffffff))
h.Add(fakeHash64(0x000a0fffffffffff))
h.Add(fakeHash64(0x000a0fffffffffff))
n = h.Count()
if n != 10 {
t.Error(n)
}

h2.Merge(h)
n = h2.Count()
if n != 10 {
t.Error(n)
}
}

func TestHLLPPClear(t *testing.T) {
h, _ := NewPlus(16)
h.Add(fakeHash64(0x00010fffffffffff))
Expand Down

0 comments on commit 6eb69df

Please sign in to comment.