Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changes/45715-implement-roaring-bitmaps
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
- Implement roaring bitmaps in historical data collection for improved performance.
3 changes: 3 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ require (
github.com/Masterminds/semver v1.5.0
github.com/Masterminds/semver/v3 v3.3.1
github.com/MicahParks/jwkset v0.11.0
github.com/RoaringBitmap/roaring v1.9.4
github.com/RobotsAndPencils/buford v0.14.0
github.com/VividCortex/mysqlerr v0.0.0-20170204212430-6c6b55f8796f
github.com/WatchBeam/clock v0.0.0-20170901150240-b08e6b4da7ea
Expand Down Expand Up @@ -226,6 +227,7 @@ require (
github.com/aws/aws-sdk-go-v2/service/sso v1.30.13 // indirect
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.17 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/bits-and-blooms/bitset v1.12.0 // indirect
github.com/c-bata/go-prompt v0.2.3 // indirect
github.com/cavaliergopher/cpio v1.0.1 // indirect
github.com/cenkalti/backoff/v5 v5.0.3 // indirect
Expand Down Expand Up @@ -315,6 +317,7 @@ require (
github.com/moby/sys/signal v0.7.0 // indirect
github.com/moby/sys/user v0.3.0 // indirect
github.com/moby/sys/userns v0.1.0 // indirect
github.com/mschoch/smat v0.2.0 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/nats-io/jwt/v2 v2.8.1 // indirect
github.com/nats-io/nkeys v0.4.15 // indirect
Expand Down
6 changes: 6 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@ github.com/ProtonMail/go-mime v0.0.0-20220302105931-303f85f7fe0f/go.mod h1:NYt+V
github.com/ProtonMail/gopenpgp/v2 v2.2.2 h1:u2m7xt+CZWj88qK1UUNBoXeJCFJwJCZ/Ff4ymGoxEXs=
github.com/ProtonMail/gopenpgp/v2 v2.2.2/go.mod h1:ajUlBGvxMH1UBZnaYO3d1FSVzjiC6kK9XlZYGiDCvpM=
github.com/PuerkitoBio/goquery v1.7.1/go.mod h1:XY0pP4kfraEmmV1O7Uf6XyjoslwsneBbgeDjLYuN8xY=
github.com/RoaringBitmap/roaring v1.9.4 h1:yhEIoH4YezLYT04s1nHehNO64EKFTop/wBhxv2QzDdQ=
github.com/RoaringBitmap/roaring v1.9.4/go.mod h1:6AXUsoIEzDTFFQCe1RbGA6uFONMhvejWj5rqITANK90=
github.com/RobotsAndPencils/buford v0.14.0 h1:+d18IMEisYlRZZYfe6uFlmQGbT07kWro25V35fGptZM=
github.com/RobotsAndPencils/buford v0.14.0/go.mod h1:F5FvdB/nkMby8Pge6HFpPHgLOeUZne/iE5wKzvx64Y0=
github.com/VividCortex/gohistogram v1.0.0 h1:6+hBz+qvs0JOrrNhhmR7lFxo5sINxBCGXrdtl/UvroE=
Expand Down Expand Up @@ -172,6 +174,8 @@ github.com/beevik/ntp v0.3.0 h1:xzVrPrE4ziasFXgBVBZJDP0Wg/KpMwk2KHJ4Ba8GrDw=
github.com/beevik/ntp v0.3.0/go.mod h1:hIHWr+l3+/clUnF44zdK+CWW7fO8dR5cIylAQ76NRpg=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/bits-and-blooms/bitset v1.12.0 h1:U/q1fAF7xXRhFCrhROzIfffYnu+dlS38vCZtmFVPHmA=
github.com/bits-and-blooms/bitset v1.12.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8=
github.com/blakesmith/ar v0.0.0-20190502131153-809d4375e1fb h1:m935MPodAbYS46DG4pJSv7WO+VECIWUQ7OJYSoTrMh4=
github.com/blakesmith/ar v0.0.0-20190502131153-809d4375e1fb/go.mod h1:PkYb9DJNAwrSvRx5DYA+gUcOIgTGVMNkfSCbZM8cWpI=
github.com/bmatcuk/doublestar/v4 v4.10.0 h1:zU9WiOla1YA122oLM6i4EXvGW62DvKZVxIe6TYWexEs=
Expand Down Expand Up @@ -674,6 +678,8 @@ github.com/moby/term v0.0.0-20221205130635-1aeaba878587 h1:HfkjXDfhgVaN5rmueG8cL
github.com/moby/term v0.0.0-20221205130635-1aeaba878587/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y=
github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A=
github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc=
github.com/mschoch/smat v0.2.0 h1:8imxQsjDm8yFEAVBe7azKmKSgzSkZXDuKkSq9374khM=
github.com/mschoch/smat v0.2.0/go.mod h1:kc9mz7DoBKqDyiRL7VZN8KvXQMWeTaVnttLRXOlotKw=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
github.com/nats-io/jwt/v2 v2.8.1 h1:V0xpGuD/N8Mi+fQNDynXohVvp7ZztevW5io8CUWlPmU=
Expand Down
6 changes: 5 additions & 1 deletion server/chart/api/chart.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ package api
import (
"context"
"time"

"github.com/RoaringBitmap/roaring"
)

// SampleStrategy describes how a dataset's samples combine within a bucket and
Expand Down Expand Up @@ -92,13 +94,15 @@ type DatasetStore interface {

// RecordBucketData writes one or more entity bitmaps for the given bucket
// using the specified sample strategy. See SampleStrategy for semantics.
// Bitmaps are passed in op form (*roaring.Bitmap); the datastore
// serializes via chart.BitmapToBlob at the storage boundary.
RecordBucketData(
ctx context.Context,
dataset string,
bucketStart time.Time,
bucketSize time.Duration,
strategy SampleStrategy,
entityBitmaps map[string][]byte,
entityBitmaps map[string]*roaring.Bitmap,
) error
}

Expand Down
256 changes: 193 additions & 63 deletions server/chart/blob.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,105 +2,235 @@
// shared constants for the chart bounded context. Public API types live in
// server/chart/api; internal types (HostFilter, Datastore) live in
// server/chart/internal/types.
//
// # Bitmap encoding
//
// Host-set bitmaps are stored in host_scd_data.host_bitmap. Two on-disk
// formats are supported, discriminated by the host_scd_data.encoding_type
// column:
//
// - EncodingDense (0): a raw bit-array sized to (max_id_in_set / 8) + 1.
// Bit n set iff host n is in the set. The original format; legacy rows
// written before this encoding was introduced read with encoding_type = 0
// via the column DEFAULT.
//
// - EncodingRoaring (1): the standard portable RoaringBitmap/roaring
// serialization (Bitmap.ToBytes() output). All new writes use this
// encoding; legacy dense rows are decoded into roaring at the I/O
// boundary via DecodeBitmap and either age out via retention or are
// overwritten on the next state transition.
//
// # Storage form vs op form
//
// Two distinct in-memory representations:
//
// - Blob{Bytes, Encoding} — storage form. Used only at the database I/O
// boundary. Constructed by HostIDsToBlob / BitmapToBlob. Consumed by
// INSERT / UPDATE statements.
//
// - *roaring.Bitmap — op form. Used for all bitwise operations
// (BlobAND/OR/ANDNOT/Popcount) and in-memory bitmap manipulation.
// Constructed by NewBitmap or DecodeBitmap. Encoding-awareness lives
// in DecodeBitmap and BitmapToBlob only.
//
// All BitmapToBlob calls invoke RunOptimize before serializing, so the
// same host set always produces byte-equal Blob.Bytes. This is not
// load-bearing for correctness (change detection uses roaring.Equals on
// op-form bitmaps) but is a desirable storage property.
package chart

import (
"encoding/binary"
"math/bits"
"math"
"strconv"

"github.com/RoaringBitmap/roaring"
)

// Encoding identifies the on-disk format of a host_bitmap blob. The constants
// here correspond directly to the host_scd_data.encoding_type column values.
const (
EncodingDense uint8 = 0
EncodingRoaring uint8 = 1
)

// HostIDsToBlob builds a byte slice with bits set at positions corresponding to
// the given host IDs. Bit N of the blob = host ID N.
func HostIDsToBlob(ids []uint) []byte {
// Blob is the storage form of a host-set bitmap. Bytes is the serialized
// payload as written to host_scd_data.host_bitmap; Encoding is the matching
// host_scd_data.encoding_type column value. A nil Bytes represents the empty
// host set regardless of Encoding.
type Blob struct {
Bytes []byte
Encoding uint8
}

// NewBitmap builds a *roaring.Bitmap from a host ID list. Calls RunOptimize
// before returning so that subsequent serialization (via BitmapToBlob) is
// byte-deterministic for the input set. Host IDs of 0 are skipped — Fleet
// host IDs are AUTO_INCREMENT starting at 1.
func NewBitmap(ids []uint) *roaring.Bitmap {
rb := roaring.New()
for _, id := range ids {
if id == 0 || id > math.MaxUint32 {
continue
}
rb.Add(uint32(id))
}
rb.RunOptimize()
return rb
}

// BitmapToBlob serializes a *roaring.Bitmap into the storage form. Always
// returns Encoding = EncodingRoaring. Calls RunOptimize defensively (safe to
// invoke multiple times) so callers do not need to remember to do so.
// Bitmaps with cardinality 0 serialize to a nil byte slice.
func BitmapToBlob(rb *roaring.Bitmap) Blob {
if rb == nil || rb.IsEmpty() {
return Blob{Encoding: EncodingRoaring}
}
rb.RunOptimize()
return Blob{Bytes: serializeBitmap(rb), Encoding: EncodingRoaring}
}

// serializeBitmap wraps Bitmap.ToBytes; isolated so the encoder path has a
// single call site if we ever swap serialization formats.
func serializeBitmap(rb *roaring.Bitmap) []byte {
out, err := rb.ToBytes()
if err != nil {
// Bitmap.ToBytes only errors on internal buffer issues that aren't
// reachable for in-memory bitmaps; treat as a programmer error.
panic("chart: roaring.Bitmap.ToBytes failed: " + err.Error())
}
return out
}

// HostIDsToBlob is the convenience composition of NewBitmap + BitmapToBlob for
// callers going directly from a host-id list to storage form. Empty input
// returns Blob{Bytes: nil, Encoding: EncodingRoaring}.
func HostIDsToBlob(ids []uint) Blob {
return BitmapToBlob(NewBitmap(ids))
}

// hostIDsToDenseBlob is the pre-change dense encoder, retained for tests and
// for constructing legacy-row fixtures in the migration tests. Production
// writes go through HostIDsToBlob (which produces roaring) instead.
func hostIDsToDenseBlob(ids []uint) []byte {
if len(ids) == 0 {
return nil
}

// Find the max ID to size the blob.
var maxID uint
for _, id := range ids {
if id > maxID {
maxID = id
}
}

blob := make([]byte, maxID/8+1)
for _, id := range ids {
blob[id/8] |= 1 << (id % 8)
}
return blob
}

// BlobPopcount returns the number of set bits in the blob.
func BlobPopcount(blob []byte) int {
count := 0
// Process 8 bytes at a time for performance.
i := 0
for ; i+8 <= len(blob); i += 8 {
v := binary.LittleEndian.Uint64(blob[i : i+8])
count += bits.OnesCount64(v)
// DecodeBitmap converts storage form to op form. Dispatches on Blob.Encoding:
// roaring blobs are deserialized via the library; legacy dense blobs are
// walked byte-by-byte and each set bit added to a fresh roaring bitmap.
// A nil or empty Bytes slice returns an empty bitmap regardless of Encoding.
// An unknown encoding value returns an error.
func DecodeBitmap(b Blob) (*roaring.Bitmap, error) {
if len(b.Bytes) == 0 {
return roaring.New(), nil
}
for ; i < len(blob); i++ {
count += bits.OnesCount8(blob[i])
switch b.Encoding {
case EncodingRoaring:
rb := roaring.New()
if _, err := rb.FromBuffer(b.Bytes); err != nil {
return nil, err
}
return rb, nil
case EncodingDense:
return decodeDense(b.Bytes), nil
default:
return nil, errUnknownEncoding(b.Encoding)
}
return count
}

// BlobAND returns a new blob that is the bitwise AND of a and b.
// The result length is min(len(a), len(b)) — bits beyond the shorter blob are implicitly zero.
func BlobAND(a, b []byte) []byte {
if a == nil || b == nil {
return nil
// decodeDense walks a dense bitmap byte-by-byte and inserts each set bit's
// position as a uint32 into a fresh roaring bitmap. O(byte count) work.
func decodeDense(blob []byte) *roaring.Bitmap {
rb := roaring.New()
for i, byteVal := range blob {
if byteVal == 0 {
continue
}
base := uint32(i) * 8
for bit := range uint32(8) {
if byteVal&(1<<bit) != 0 {
rb.Add(base + bit)
}
}
}
n := min(len(a), len(b))
if n == 0 {
return rb
}

type errUnknownEncoding uint8

func (e errUnknownEncoding) Error() string {
return "chart: unknown bitmap encoding " + strconv.Itoa(int(e))
}

// BitmapToHostIDs returns the set bits of a *roaring.Bitmap as a sorted []uint.
// Thin convenience over roaring.Bitmap.ToArray (which returns []uint32) for
// callers that work in uint at the Fleet boundary.
func BitmapToHostIDs(rb *roaring.Bitmap) []uint {
if rb == nil {
return nil
}
result := make([]byte, n)
a = a[:n]
b = b[:n]
for i := range n {
result[i] = a[i] & b[i] //nolint:gosec // a and b are bounded to n via slicing above
arr := rb.ToArray()
out := make([]uint, len(arr))
for i, v := range arr {
out[i] = uint(v)
}
return result
return out
}

// BlobANDNOT returns a new blob equal to a with the bits set in mask cleared.
// Result length is len(a). If mask is shorter than a, it zero-extends — high
// bytes of a pass through unchanged. If mask is longer than a, the excess
// bytes of mask are ignored.
func BlobANDNOT(a, mask []byte) []byte {
if len(a) == 0 {
return nil
}
out := make([]byte, len(a))
n := min(len(a), len(mask))
bitsToMask := a[:n]
sizedMask := mask[:n]
for i := range n {
out[i] = bitsToMask[i] &^ sizedMask[i] //nolint:gosec // bitsToMask and sizedMask are bounded to n via slicing above
// BlobPopcount returns the cardinality of the bitmap. A nil bitmap is treated
// as the empty set.
func BlobPopcount(rb *roaring.Bitmap) uint64 {
if rb == nil {
return 0
}
// If mask is shorter than a, copy the remaining high bytes unchanged.
if n < len(a) {
copy(out[n:], a[n:])
return rb.GetCardinality()
}

// BlobAND returns the intersection of a and b as a new bitmap. nil operands
// are treated as the empty set; the result is the empty set.
func BlobAND(a, b *roaring.Bitmap) *roaring.Bitmap {
if a == nil || b == nil {
return roaring.New()
}
return out
return roaring.And(a, b)
}

// BlobOR returns a new blob that is the bitwise OR of a and b.
// The result length is max(len(a), len(b)) — the shorter blob is zero-extended.
func BlobOR(a, b []byte) []byte {
long, short := a, b
if len(b) > len(a) {
long, short = b, a
// BlobOR returns the union of a and b as a new bitmap. nil operands are
// treated as the empty set.
func BlobOR(a, b *roaring.Bitmap) *roaring.Bitmap {
switch {
case a == nil && b == nil:
return roaring.New()
case a == nil:
return b.Clone()
case b == nil:
return a.Clone()
}
if len(long) == 0 {
return nil
return roaring.Or(a, b)
}

// BlobANDNOT returns a \ mask: the bits set in a but not in mask, as a new
// bitmap. nil a returns the empty set; nil mask returns a clone of a.
func BlobANDNOT(a, mask *roaring.Bitmap) *roaring.Bitmap {
if a == nil {
return roaring.New()
}
result := make([]byte, len(long))
copy(result, long)
for i := range short {
result[i] |= short[i]
if mask == nil {
return a.Clone()
}
return result
return roaring.AndNot(a, mask)
}
Loading
Loading