From 0053d7e4316b14ebd3a64fcdd8ccb5f35bae7a07 Mon Sep 17 00:00:00 2001 From: Scott Gress Date: Tue, 19 May 2026 07:34:29 -0700 Subject: [PATCH] Implement roaring bitmaps for historical data collection (#45709) **Related issue:** Resolves #45715 # Details This PR refactors the way the charts module stores historical data to use the [roaring bitmap](https://github.com/RoaringBitmap/roaring) package instead of saving raw bitmaps. See [this blurb](https://github.com/RoaringBitmap/roaring#how-does-roaring-compares-with-the-alternatives) to learn how roaring compresses data, but TL;DR for our purposes it represents a huge improvement especially for larger deployments where host ID numbers may be very large. In testing, some data was reduced 96%. The majority of the changes in this PR are straight swapping of types from `[]byte` to `*roaring.Bitmap` in vars and function signatures, and updating the internals of our bit math helpers to use roaring methods instead of native AND and OR methods. I've tried to comment on all functional changes. Since the charts have been shipped already, so there will be data in the wild in the prior "dense" format, the code still handles dense bitmaps on _read_, but will always _write_ roaring bitmaps. The majority of the data will therefore have turned over within 30 days on its own, but I plan on a follow-up PR that will transform open rows when the cron runs so that we should be guaranteed to turn over completely within 30 days. # Checklist for submitter If some of the following don't apply, delete the relevant line. - [X] Changes file added for user-visible changes in `changes/`, `orbit/changes/` or `ee/fleetd-chrome/changes`. See [Changes files](https://github.com/fleetdm/fleet/blob/main/docs/Contributing/guides/committing-changes.md#changes-files) for more information. - [X] Input data is properly validated, `SELECT *` is avoided, SQL injection is prevented (using placeholders for values in statements), JS inline code is prevented especially for url redirects, and untrusted data interpolated into shell scripts/commands is validated against shell metacharacters. ## Testing - [X] Added/updated automated tests - Tests updated to accommodate the new format, and existing unchanged tests act as proof against regression - [X] QA'd all new/changed functionality manually - Using a tool that dumps the `host_scd_data` rows data into a JSON file (with the keys being entity_id+data and the values being host IDs on that date), compared the data from main branch and this and confirmed they're identical - With a host count of ~9000, some of which have IDs of over 1,000,000, the data storage requirements were: * 82,558,976 bytes for dense * 2,867,200 for roaring (a 96% decrease) For unreleased bug fixes in a release candidate, one of: - [X] Confirmed that the fix is not expected to adversely impact load test results - should hugely improve - [X] Alerted the release DRI if additional load testing is needed ## Database migrations - [X] Checked schema for all modified table for columns that will auto-update timestamps during migration. ## Summary by CodeRabbit * **New Features** * Implemented roaring bitmaps in historical data collection to optimize bitmap handling for chart data aggregation * Added encoding support to bitmap storage schema for flexible data representation --- changes/45715-implement-roaring-bitmaps | 1 + go.mod | 3 + go.sum | 6 + server/chart/api/chart.go | 6 +- server/chart/blob.go | 256 +++++++--- server/chart/blob_test.go | 440 ++++++++++++++---- server/chart/bootstrap/bootstrap.go | 11 + server/chart/datasets.go | 7 +- server/chart/internal/mysql/data.go | 216 ++++++--- server/chart/internal/mysql/data_test.go | 150 +++--- server/chart/internal/service/host_cache.go | 13 +- .../chart/internal/service/host_cache_test.go | 21 +- server/chart/internal/service/service.go | 9 +- server/chart/internal/service/service_test.go | 57 +-- server/chart/internal/testutils/testutils.go | 62 ++- server/chart/internal/types/chart.go | 12 +- ...0518194422_AddEncodingTypeToHostSCDData.go | 34 ++ ...94422_AddEncodingTypeToHostSCDData_test.go | 58 +++ server/datastore/mysql/schema.sql | 7 +- tools/charts-backfill/README.md | 30 +- tools/charts-backfill/main.go | 254 ++++++++-- tools/charts-collect/main.go | 77 +-- 22 files changed, 1289 insertions(+), 441 deletions(-) create mode 100644 changes/45715-implement-roaring-bitmaps create mode 100644 server/datastore/mysql/migrations/tables/20260518194422_AddEncodingTypeToHostSCDData.go create mode 100644 server/datastore/mysql/migrations/tables/20260518194422_AddEncodingTypeToHostSCDData_test.go diff --git a/changes/45715-implement-roaring-bitmaps b/changes/45715-implement-roaring-bitmaps new file mode 100644 index 00000000000..9a4b6683b96 --- /dev/null +++ b/changes/45715-implement-roaring-bitmaps @@ -0,0 +1 @@ +- Implement roaring bitmaps in historical data collection for improved performance. \ No newline at end of file diff --git a/go.mod b/go.mod index a4b4c4ccd65..d2c1afda9dc 100644 --- a/go.mod +++ b/go.mod @@ -11,6 +11,7 @@ require ( github.com/Masterminds/semver v1.5.0 github.com/Masterminds/semver/v3 v3.3.1 github.com/MicahParks/jwkset v0.11.0 + github.com/RoaringBitmap/roaring v1.9.4 github.com/RobotsAndPencils/buford v0.14.0 github.com/VividCortex/mysqlerr v0.0.0-20170204212430-6c6b55f8796f github.com/WatchBeam/clock v0.0.0-20170901150240-b08e6b4da7ea @@ -226,6 +227,7 @@ require ( github.com/aws/aws-sdk-go-v2/service/sso v1.30.13 // indirect github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.17 // indirect github.com/beorn7/perks v1.0.1 // indirect + github.com/bits-and-blooms/bitset v1.12.0 // indirect github.com/c-bata/go-prompt v0.2.3 // indirect github.com/cavaliergopher/cpio v1.0.1 // indirect github.com/cenkalti/backoff/v5 v5.0.3 // indirect @@ -315,6 +317,7 @@ require ( github.com/moby/sys/signal v0.7.0 // indirect github.com/moby/sys/user v0.3.0 // indirect github.com/moby/sys/userns v0.1.0 // indirect + github.com/mschoch/smat v0.2.0 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/nats-io/jwt/v2 v2.8.1 // indirect github.com/nats-io/nkeys v0.4.15 // indirect diff --git a/go.sum b/go.sum index 078739e83cc..e5a0874a5d2 100644 --- a/go.sum +++ b/go.sum @@ -69,6 +69,8 @@ github.com/ProtonMail/go-mime v0.0.0-20220302105931-303f85f7fe0f/go.mod h1:NYt+V github.com/ProtonMail/gopenpgp/v2 v2.2.2 h1:u2m7xt+CZWj88qK1UUNBoXeJCFJwJCZ/Ff4ymGoxEXs= github.com/ProtonMail/gopenpgp/v2 v2.2.2/go.mod h1:ajUlBGvxMH1UBZnaYO3d1FSVzjiC6kK9XlZYGiDCvpM= github.com/PuerkitoBio/goquery v1.7.1/go.mod h1:XY0pP4kfraEmmV1O7Uf6XyjoslwsneBbgeDjLYuN8xY= +github.com/RoaringBitmap/roaring v1.9.4 h1:yhEIoH4YezLYT04s1nHehNO64EKFTop/wBhxv2QzDdQ= +github.com/RoaringBitmap/roaring v1.9.4/go.mod h1:6AXUsoIEzDTFFQCe1RbGA6uFONMhvejWj5rqITANK90= github.com/RobotsAndPencils/buford v0.14.0 h1:+d18IMEisYlRZZYfe6uFlmQGbT07kWro25V35fGptZM= github.com/RobotsAndPencils/buford v0.14.0/go.mod h1:F5FvdB/nkMby8Pge6HFpPHgLOeUZne/iE5wKzvx64Y0= github.com/VividCortex/gohistogram v1.0.0 h1:6+hBz+qvs0JOrrNhhmR7lFxo5sINxBCGXrdtl/UvroE= @@ -172,6 +174,8 @@ github.com/beevik/ntp v0.3.0 h1:xzVrPrE4ziasFXgBVBZJDP0Wg/KpMwk2KHJ4Ba8GrDw= github.com/beevik/ntp v0.3.0/go.mod h1:hIHWr+l3+/clUnF44zdK+CWW7fO8dR5cIylAQ76NRpg= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= +github.com/bits-and-blooms/bitset v1.12.0 h1:U/q1fAF7xXRhFCrhROzIfffYnu+dlS38vCZtmFVPHmA= +github.com/bits-and-blooms/bitset v1.12.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8= github.com/blakesmith/ar v0.0.0-20190502131153-809d4375e1fb h1:m935MPodAbYS46DG4pJSv7WO+VECIWUQ7OJYSoTrMh4= github.com/blakesmith/ar v0.0.0-20190502131153-809d4375e1fb/go.mod h1:PkYb9DJNAwrSvRx5DYA+gUcOIgTGVMNkfSCbZM8cWpI= github.com/bmatcuk/doublestar/v4 v4.10.0 h1:zU9WiOla1YA122oLM6i4EXvGW62DvKZVxIe6TYWexEs= @@ -674,6 +678,8 @@ github.com/moby/term v0.0.0-20221205130635-1aeaba878587 h1:HfkjXDfhgVaN5rmueG8cL github.com/moby/term v0.0.0-20221205130635-1aeaba878587/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y= github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A= github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc= +github.com/mschoch/smat v0.2.0 h1:8imxQsjDm8yFEAVBe7azKmKSgzSkZXDuKkSq9374khM= +github.com/mschoch/smat v0.2.0/go.mod h1:kc9mz7DoBKqDyiRL7VZN8KvXQMWeTaVnttLRXOlotKw= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/nats-io/jwt/v2 v2.8.1 h1:V0xpGuD/N8Mi+fQNDynXohVvp7ZztevW5io8CUWlPmU= diff --git a/server/chart/api/chart.go b/server/chart/api/chart.go index 144caed40ff..8340149a0ba 100644 --- a/server/chart/api/chart.go +++ b/server/chart/api/chart.go @@ -3,6 +3,8 @@ package api import ( "context" "time" + + "github.com/RoaringBitmap/roaring" ) // SampleStrategy describes how a dataset's samples combine within a bucket and @@ -92,13 +94,15 @@ type DatasetStore interface { // RecordBucketData writes one or more entity bitmaps for the given bucket // using the specified sample strategy. See SampleStrategy for semantics. + // Bitmaps are passed in op form (*roaring.Bitmap); the datastore + // serializes via chart.BitmapToBlob at the storage boundary. RecordBucketData( ctx context.Context, dataset string, bucketStart time.Time, bucketSize time.Duration, strategy SampleStrategy, - entityBitmaps map[string][]byte, + entityBitmaps map[string]*roaring.Bitmap, ) error } diff --git a/server/chart/blob.go b/server/chart/blob.go index 73b78e3d481..73115e258cc 100644 --- a/server/chart/blob.go +++ b/server/chart/blob.go @@ -2,28 +2,126 @@ // shared constants for the chart bounded context. Public API types live in // server/chart/api; internal types (HostFilter, Datastore) live in // server/chart/internal/types. +// +// # Bitmap encoding +// +// Host-set bitmaps are stored in host_scd_data.host_bitmap. Two on-disk +// formats are supported, discriminated by the host_scd_data.encoding_type +// column: +// +// - EncodingDense (0): a raw bit-array sized to (max_id_in_set / 8) + 1. +// Bit n set iff host n is in the set. The original format; legacy rows +// written before this encoding was introduced read with encoding_type = 0 +// via the column DEFAULT. +// +// - EncodingRoaring (1): the standard portable RoaringBitmap/roaring +// serialization (Bitmap.ToBytes() output). All new writes use this +// encoding; legacy dense rows are decoded into roaring at the I/O +// boundary via DecodeBitmap and either age out via retention or are +// overwritten on the next state transition. +// +// # Storage form vs op form +// +// Two distinct in-memory representations: +// +// - Blob{Bytes, Encoding} — storage form. Used only at the database I/O +// boundary. Constructed by HostIDsToBlob / BitmapToBlob. Consumed by +// INSERT / UPDATE statements. +// +// - *roaring.Bitmap — op form. Used for all bitwise operations +// (BlobAND/OR/ANDNOT/Popcount) and in-memory bitmap manipulation. +// Constructed by NewBitmap or DecodeBitmap. Encoding-awareness lives +// in DecodeBitmap and BitmapToBlob only. +// +// All BitmapToBlob calls invoke RunOptimize before serializing, so the +// same host set always produces byte-equal Blob.Bytes. This is not +// load-bearing for correctness (change detection uses roaring.Equals on +// op-form bitmaps) but is a desirable storage property. package chart import ( - "encoding/binary" - "math/bits" + "math" + "strconv" + + "github.com/RoaringBitmap/roaring" +) + +// Encoding identifies the on-disk format of a host_bitmap blob. The constants +// here correspond directly to the host_scd_data.encoding_type column values. +const ( + EncodingDense uint8 = 0 + EncodingRoaring uint8 = 1 ) -// HostIDsToBlob builds a byte slice with bits set at positions corresponding to -// the given host IDs. Bit N of the blob = host ID N. -func HostIDsToBlob(ids []uint) []byte { +// Blob is the storage form of a host-set bitmap. Bytes is the serialized +// payload as written to host_scd_data.host_bitmap; Encoding is the matching +// host_scd_data.encoding_type column value. A nil Bytes represents the empty +// host set regardless of Encoding. +type Blob struct { + Bytes []byte + Encoding uint8 +} + +// NewBitmap builds a *roaring.Bitmap from a host ID list. Calls RunOptimize +// before returning so that subsequent serialization (via BitmapToBlob) is +// byte-deterministic for the input set. Host IDs of 0 are skipped — Fleet +// host IDs are AUTO_INCREMENT starting at 1. +func NewBitmap(ids []uint) *roaring.Bitmap { + rb := roaring.New() + for _, id := range ids { + if id == 0 || id > math.MaxUint32 { + continue + } + rb.Add(uint32(id)) + } + rb.RunOptimize() + return rb +} + +// BitmapToBlob serializes a *roaring.Bitmap into the storage form. Always +// returns Encoding = EncodingRoaring. Calls RunOptimize defensively (safe to +// invoke multiple times) so callers do not need to remember to do so. +// Bitmaps with cardinality 0 serialize to a nil byte slice. +func BitmapToBlob(rb *roaring.Bitmap) Blob { + if rb == nil || rb.IsEmpty() { + return Blob{Encoding: EncodingRoaring} + } + rb.RunOptimize() + return Blob{Bytes: serializeBitmap(rb), Encoding: EncodingRoaring} +} + +// serializeBitmap wraps Bitmap.ToBytes; isolated so the encoder path has a +// single call site if we ever swap serialization formats. +func serializeBitmap(rb *roaring.Bitmap) []byte { + out, err := rb.ToBytes() + if err != nil { + // Bitmap.ToBytes only errors on internal buffer issues that aren't + // reachable for in-memory bitmaps; treat as a programmer error. + panic("chart: roaring.Bitmap.ToBytes failed: " + err.Error()) + } + return out +} + +// HostIDsToBlob is the convenience composition of NewBitmap + BitmapToBlob for +// callers going directly from a host-id list to storage form. Empty input +// returns Blob{Bytes: nil, Encoding: EncodingRoaring}. +func HostIDsToBlob(ids []uint) Blob { + return BitmapToBlob(NewBitmap(ids)) +} + +// hostIDsToDenseBlob is the pre-change dense encoder, retained for tests and +// for constructing legacy-row fixtures in the migration tests. Production +// writes go through HostIDsToBlob (which produces roaring) instead. +func hostIDsToDenseBlob(ids []uint) []byte { if len(ids) == 0 { return nil } - - // Find the max ID to size the blob. var maxID uint for _, id := range ids { if id > maxID { maxID = id } } - blob := make([]byte, maxID/8+1) for _, id := range ids { blob[id/8] |= 1 << (id % 8) @@ -31,76 +129,108 @@ func HostIDsToBlob(ids []uint) []byte { return blob } -// BlobPopcount returns the number of set bits in the blob. -func BlobPopcount(blob []byte) int { - count := 0 - // Process 8 bytes at a time for performance. - i := 0 - for ; i+8 <= len(blob); i += 8 { - v := binary.LittleEndian.Uint64(blob[i : i+8]) - count += bits.OnesCount64(v) +// DecodeBitmap converts storage form to op form. Dispatches on Blob.Encoding: +// roaring blobs are deserialized via the library; legacy dense blobs are +// walked byte-by-byte and each set bit added to a fresh roaring bitmap. +// A nil or empty Bytes slice returns an empty bitmap regardless of Encoding. +// An unknown encoding value returns an error. +func DecodeBitmap(b Blob) (*roaring.Bitmap, error) { + if len(b.Bytes) == 0 { + return roaring.New(), nil } - for ; i < len(blob); i++ { - count += bits.OnesCount8(blob[i]) + switch b.Encoding { + case EncodingRoaring: + rb := roaring.New() + if _, err := rb.FromBuffer(b.Bytes); err != nil { + return nil, err + } + return rb, nil + case EncodingDense: + return decodeDense(b.Bytes), nil + default: + return nil, errUnknownEncoding(b.Encoding) } - return count } -// BlobAND returns a new blob that is the bitwise AND of a and b. -// The result length is min(len(a), len(b)) — bits beyond the shorter blob are implicitly zero. -func BlobAND(a, b []byte) []byte { - if a == nil || b == nil { - return nil +// decodeDense walks a dense bitmap byte-by-byte and inserts each set bit's +// position as a uint32 into a fresh roaring bitmap. O(byte count) work. +func decodeDense(blob []byte) *roaring.Bitmap { + rb := roaring.New() + for i, byteVal := range blob { + if byteVal == 0 { + continue + } + base := uint32(i) * 8 + for bit := range uint32(8) { + if byteVal&(1< len(a) { - long, short = b, a +// BlobOR returns the union of a and b as a new bitmap. nil operands are +// treated as the empty set. +func BlobOR(a, b *roaring.Bitmap) *roaring.Bitmap { + switch { + case a == nil && b == nil: + return roaring.New() + case a == nil: + return b.Clone() + case b == nil: + return a.Clone() } - if len(long) == 0 { - return nil + return roaring.Or(a, b) +} + +// BlobANDNOT returns a \ mask: the bits set in a but not in mask, as a new +// bitmap. nil a returns the empty set; nil mask returns a clone of a. +func BlobANDNOT(a, mask *roaring.Bitmap) *roaring.Bitmap { + if a == nil { + return roaring.New() } - result := make([]byte, len(long)) - copy(result, long) - for i := range short { - result[i] |= short[i] + if mask == nil { + return a.Clone() } - return result + return roaring.AndNot(a, mask) } diff --git a/server/chart/blob_test.go b/server/chart/blob_test.go index ae0f9a0a446..80917680b67 100644 --- a/server/chart/blob_test.go +++ b/server/chart/blob_test.go @@ -1,151 +1,403 @@ package chart import ( + "bytes" "testing" + "github.com/RoaringBitmap/roaring" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) +// chunkSize is the host-ID span covered by a single roaring container (2^16). +const chunkSize uint = 1 << 16 + +func TestNewBitmap(t *testing.T) { + t.Run("empty input is empty bitmap", func(t *testing.T) { + rb := NewBitmap(nil) + assert.True(t, rb.IsEmpty()) + assert.Equal(t, uint64(0), rb.GetCardinality()) + }) + + t.Run("host id 0 is skipped", func(t *testing.T) { + rb := NewBitmap([]uint{0, 1, 2}) + assert.Equal(t, uint64(2), rb.GetCardinality()) + assert.False(t, rb.Contains(0)) + assert.True(t, rb.Contains(1)) + assert.True(t, rb.Contains(2)) + }) + + t.Run("duplicates collapse", func(t *testing.T) { + rb := NewBitmap([]uint{5, 5, 5, 10}) + assert.Equal(t, uint64(2), rb.GetCardinality()) + }) + + t.Run("multi-chunk host ids", func(t *testing.T) { + rb := NewBitmap([]uint{7, 99, chunkSize + 5, 3*chunkSize + 10}) + assert.Equal(t, uint64(4), rb.GetCardinality()) + }) +} + func TestHostIDsToBlob(t *testing.T) { - t.Run("nil for empty input", func(t *testing.T) { - assert.Nil(t, HostIDsToBlob(nil)) - assert.Nil(t, HostIDsToBlob([]uint{})) + t.Run("empty input produces nil bytes tagged roaring", func(t *testing.T) { + b := HostIDsToBlob(nil) + assert.Nil(t, b.Bytes) + assert.Equal(t, EncodingRoaring, b.Encoding) + + b = HostIDsToBlob([]uint{}) + assert.Nil(t, b.Bytes) + assert.Equal(t, EncodingRoaring, b.Encoding) }) - t.Run("single host", func(t *testing.T) { - blob := HostIDsToBlob([]uint{0}) - require.Len(t, blob, 1) - assert.Equal(t, byte(0x01), blob[0]) + t.Run("non-empty input always tagged roaring", func(t *testing.T) { + b := HostIDsToBlob([]uint{7}) + assert.NotNil(t, b.Bytes) + assert.Equal(t, EncodingRoaring, b.Encoding) }) - t.Run("host ID 7", func(t *testing.T) { - blob := HostIDsToBlob([]uint{7}) - require.Len(t, blob, 1) - assert.Equal(t, byte(0x80), blob[0]) + t.Run("round trip via DecodeBitmap matches input set", func(t *testing.T) { + ids := []uint{1, 5, 10, 42, 100, 255, chunkSize + 7, 2*chunkSize + 3} + blob := HostIDsToBlob(ids) + rb, err := DecodeBitmap(blob) + require.NoError(t, err) + assert.Equal(t, uint64(len(ids)), rb.GetCardinality()) + for _, id := range ids { + assert.Truef(t, rb.Contains(uint32(id)), "expected bit %d to be set", id) //nolint:gosec // G115: test IDs fit in uint32 + } }) +} - t.Run("host ID 8 starts second byte", func(t *testing.T) { - blob := HostIDsToBlob([]uint{8}) - require.Len(t, blob, 2) - assert.Equal(t, byte(0x00), blob[0]) - assert.Equal(t, byte(0x01), blob[1]) +func TestBitmapToBlob(t *testing.T) { + t.Run("nil bitmap produces empty blob", func(t *testing.T) { + b := BitmapToBlob(nil) + assert.Nil(t, b.Bytes) + assert.Equal(t, EncodingRoaring, b.Encoding) }) - t.Run("multiple hosts", func(t *testing.T) { - blob := HostIDsToBlob([]uint{0, 1, 8, 16}) - require.Len(t, blob, 3) - assert.Equal(t, byte(0x03), blob[0]) // bits 0,1 - assert.Equal(t, byte(0x01), blob[1]) // bit 8 - assert.Equal(t, byte(0x01), blob[2]) // bit 16 + t.Run("empty bitmap produces empty blob", func(t *testing.T) { + b := BitmapToBlob(roaring.New()) + assert.Nil(t, b.Bytes) + assert.Equal(t, EncodingRoaring, b.Encoding) }) - t.Run("large host ID", func(t *testing.T) { - blob := HostIDsToBlob([]uint{1000}) - require.Len(t, blob, 126) // 1000/8+1 - assert.Equal(t, byte(0x01), blob[125]) + t.Run("non-empty bitmap produces non-nil bytes", func(t *testing.T) { + rb := roaring.BitmapOf(1, 2, 3) + b := BitmapToBlob(rb) + assert.NotEmpty(t, b.Bytes) + assert.Equal(t, EncodingRoaring, b.Encoding) + }) +} + +func TestDecodeBitmap(t *testing.T) { + t.Run("nil bytes returns empty bitmap", func(t *testing.T) { + rb, err := DecodeBitmap(Blob{Encoding: EncodingRoaring}) + require.NoError(t, err) + assert.True(t, rb.IsEmpty()) + + rb, err = DecodeBitmap(Blob{Encoding: EncodingDense}) + require.NoError(t, err) + assert.True(t, rb.IsEmpty()) + }) + + t.Run("roaring round trip", func(t *testing.T) { + original := roaring.BitmapOf(1, 7, 99, 12345) + original.RunOptimize() + bytesData := serializeBitmap(original) + + rb, err := DecodeBitmap(Blob{Bytes: bytesData, Encoding: EncodingRoaring}) + require.NoError(t, err) + assert.True(t, rb.Equals(original)) + }) + + t.Run("dense round trip", func(t *testing.T) { + ids := []uint{1, 7, 99, 1234} + dense := hostIDsToDenseBlob(ids) + + rb, err := DecodeBitmap(Blob{Bytes: dense, Encoding: EncodingDense}) + require.NoError(t, err) + assert.Equal(t, uint64(len(ids)), rb.GetCardinality()) + for _, id := range ids { + assert.True(t, rb.Contains(uint32(id))) //nolint:gosec // G115: test IDs fit in uint32 + } + }) + + t.Run("single-byte dense", func(t *testing.T) { + // 0x82 = bits 1 and 7 set + rb, err := DecodeBitmap(Blob{Bytes: []byte{0x82}, Encoding: EncodingDense}) + require.NoError(t, err) + assert.Equal(t, uint64(2), rb.GetCardinality()) + assert.True(t, rb.Contains(1)) + assert.True(t, rb.Contains(7)) + }) + + t.Run("dense spanning chunk boundary", func(t *testing.T) { + // Set a bit just below and one just above the 65536-bit chunk boundary. + ids := []uint{chunkSize - 1, chunkSize, chunkSize + 1} + dense := hostIDsToDenseBlob(ids) + + rb, err := DecodeBitmap(Blob{Bytes: dense, Encoding: EncodingDense}) + require.NoError(t, err) + for _, id := range ids { + assert.Truef(t, rb.Contains(uint32(id)), "expected bit %d to be set", id) //nolint:gosec // G115: test IDs fit in uint32 + } + }) + + t.Run("unknown encoding returns error", func(t *testing.T) { + _, err := DecodeBitmap(Blob{Bytes: []byte{0xFF}, Encoding: 99}) + require.Error(t, err) + }) +} + +func TestBitmapToHostIDs(t *testing.T) { + t.Run("nil bitmap returns nil", func(t *testing.T) { + assert.Nil(t, BitmapToHostIDs(nil)) + }) + + t.Run("empty bitmap returns empty slice", func(t *testing.T) { + out := BitmapToHostIDs(roaring.New()) + assert.Empty(t, out) + }) + + t.Run("populated bitmap returns sorted ids", func(t *testing.T) { + rb := roaring.BitmapOf(99, 7, 1, 65540) + out := BitmapToHostIDs(rb) + assert.Equal(t, []uint{1, 7, 99, 65540}, out) }) } func TestBlobPopcount(t *testing.T) { - assert.Equal(t, 0, BlobPopcount(nil)) - assert.Equal(t, 0, BlobPopcount([]byte{})) - assert.Equal(t, 1, BlobPopcount([]byte{0x01})) - assert.Equal(t, 8, BlobPopcount([]byte{0xFF})) - assert.Equal(t, 3, BlobPopcount([]byte{0x07})) - - // Multi-byte - assert.Equal(t, 4, BlobPopcount([]byte{0x0F, 0x00})) - assert.Equal(t, 16, BlobPopcount([]byte{0xFF, 0xFF})) - - // Exercises the uint64 fast path (>= 8 bytes) - blob := make([]byte, 16) - blob[0] = 0xFF // 8 bits - blob[15] = 0x01 // 1 bit - assert.Equal(t, 9, BlobPopcount(blob)) + t.Run("nil is zero", func(t *testing.T) { + assert.Equal(t, uint64(0), BlobPopcount(nil)) + }) + + t.Run("empty bitmap is zero", func(t *testing.T) { + assert.Equal(t, uint64(0), BlobPopcount(roaring.New())) + }) + + t.Run("counts set bits", func(t *testing.T) { + assert.Equal(t, uint64(5), BlobPopcount(roaring.BitmapOf(1, 5, 9, 100, 65540))) + }) } func TestBlobAND(t *testing.T) { - assert.Nil(t, BlobAND([]byte{}, []byte{})) - assert.Nil(t, BlobAND([]byte{0xFF}, []byte{})) + t.Run("nil operands produce empty", func(t *testing.T) { + assert.True(t, BlobAND(nil, nil).IsEmpty()) + assert.True(t, BlobAND(roaring.BitmapOf(1, 2, 3), nil).IsEmpty()) + assert.True(t, BlobAND(nil, roaring.BitmapOf(1, 2, 3)).IsEmpty()) + }) - result := BlobAND([]byte{0xFF, 0x0F}, []byte{0x0F, 0xFF}) - assert.Equal(t, []byte{0x0F, 0x0F}, result) + t.Run("intersection", func(t *testing.T) { + a := roaring.BitmapOf(1, 5, 9, 15) + b := roaring.BitmapOf(5, 9, 99) + got := BlobAND(a, b) + assert.True(t, got.Equals(roaring.BitmapOf(5, 9))) + }) + + t.Run("disjoint", func(t *testing.T) { + a := roaring.BitmapOf(1, 2, 3) + b := roaring.BitmapOf(10, 20, 30) + assert.True(t, BlobAND(a, b).IsEmpty()) + }) + + t.Run("idempotent", func(t *testing.T) { + a := roaring.BitmapOf(3, 7, 11) + assert.True(t, BlobAND(a, a).Equals(a)) + }) - // Different lengths: result is min length - result = BlobAND([]byte{0xFF, 0xFF, 0xFF}, []byte{0x0F}) - assert.Equal(t, []byte{0x0F}, result) + t.Run("does not mutate operands", func(t *testing.T) { + a := roaring.BitmapOf(1, 5, 9) + b := roaring.BitmapOf(5, 9, 15) + _ = BlobAND(a, b) + assert.True(t, a.Equals(roaring.BitmapOf(1, 5, 9))) + assert.True(t, b.Equals(roaring.BitmapOf(5, 9, 15))) + }) } func TestBlobOR(t *testing.T) { - assert.Nil(t, BlobOR(nil, nil)) + t.Run("both nil returns empty", func(t *testing.T) { + assert.True(t, BlobOR(nil, nil).IsEmpty()) + }) - // One nil - result := BlobOR([]byte{0x0F}, nil) - assert.Equal(t, []byte{0x0F}, result) + t.Run("one nil returns clone of the other", func(t *testing.T) { + a := roaring.BitmapOf(1, 2, 3) + got := BlobOR(a, nil) + assert.True(t, got.Equals(a)) - result = BlobOR([]byte{0xF0, 0x00}, []byte{0x0F, 0xFF}) - assert.Equal(t, []byte{0xFF, 0xFF}, result) + // Mutating result should not affect the source. + got.Remove(2) + assert.True(t, a.Contains(2)) + }) + + t.Run("union", func(t *testing.T) { + a := roaring.BitmapOf(1, 5) + b := roaring.BitmapOf(5, 9) + assert.True(t, BlobOR(a, b).Equals(roaring.BitmapOf(1, 5, 9))) + }) - // Different lengths: result is max length - result = BlobOR([]byte{0x01}, []byte{0x02, 0xFF}) - assert.Equal(t, []byte{0x03, 0xFF}, result) + t.Run("idempotent", func(t *testing.T) { + a := roaring.BitmapOf(3, 7, 11) + assert.True(t, BlobOR(a, a).Equals(a)) + }) + + t.Run("does not mutate operands", func(t *testing.T) { + a := roaring.BitmapOf(1, 5) + b := roaring.BitmapOf(5, 9) + _ = BlobOR(a, b) + assert.True(t, a.Equals(roaring.BitmapOf(1, 5))) + assert.True(t, b.Equals(roaring.BitmapOf(5, 9))) + }) } func TestBlobANDNOT(t *testing.T) { - t.Run("nil and empty a return nil", func(t *testing.T) { - assert.Nil(t, BlobANDNOT(nil, []byte{0xFF})) - assert.Nil(t, BlobANDNOT([]byte{}, []byte{0xFF})) + t.Run("nil a returns empty", func(t *testing.T) { + assert.True(t, BlobANDNOT(nil, roaring.BitmapOf(1, 2, 3)).IsEmpty()) }) - t.Run("equal-length operands", func(t *testing.T) { - result := BlobANDNOT([]byte{0xFF, 0x0F}, []byte{0x0F, 0xFF}) - assert.Equal(t, []byte{0xF0, 0x00}, result) + t.Run("nil mask returns clone of a", func(t *testing.T) { + a := roaring.BitmapOf(1, 2, 3) + got := BlobANDNOT(a, nil) + assert.True(t, got.Equals(a)) + got.Remove(2) + assert.True(t, a.Contains(2)) }) - t.Run("mask shorter than a passes high bytes through", func(t *testing.T) { - result := BlobANDNOT([]byte{0xFF, 0xFF, 0xFF}, []byte{0x0F}) - assert.Equal(t, []byte{0xF0, 0xFF, 0xFF}, result) + t.Run("subtraction", func(t *testing.T) { + a := roaring.BitmapOf(1, 5, 9, 15) + mask := roaring.BitmapOf(5, 15) + assert.True(t, BlobANDNOT(a, mask).Equals(roaring.BitmapOf(1, 9))) }) - t.Run("nil mask leaves a unchanged", func(t *testing.T) { - result := BlobANDNOT([]byte{0xFF, 0xAA}, nil) - assert.Equal(t, []byte{0xFF, 0xAA}, result) + t.Run("mask covering a yields empty", func(t *testing.T) { + a := roaring.BitmapOf(1, 2, 3) + assert.True(t, BlobANDNOT(a, a).IsEmpty()) }) - t.Run("mask longer than a ignores excess", func(t *testing.T) { - result := BlobANDNOT([]byte{0xFF}, []byte{0x0F, 0xFF}) - assert.Equal(t, []byte{0xF0}, result) + t.Run("disjoint mask is identity", func(t *testing.T) { + a := roaring.BitmapOf(1, 2, 3) + mask := roaring.BitmapOf(10, 20) + assert.True(t, BlobANDNOT(a, mask).Equals(a)) }) - t.Run("all-zero mask is identity", func(t *testing.T) { - a := []byte{0xAB, 0xCD, 0xEF} - result := BlobANDNOT(a, []byte{0x00, 0x00, 0x00}) - assert.Equal(t, a, result) + t.Run("does not mutate operands", func(t *testing.T) { + a := roaring.BitmapOf(1, 2, 3) + mask := roaring.BitmapOf(2) + _ = BlobANDNOT(a, mask) + assert.True(t, a.Equals(roaring.BitmapOf(1, 2, 3))) + assert.True(t, mask.Equals(roaring.BitmapOf(2))) }) +} + +// TestMixedEncoding exercises the transition case where a legacy dense row is +// decoded at the boundary and used alongside a roaring operand. +func TestMixedEncoding(t *testing.T) { + ids := []uint{1, 5, 9} + denseBlob := Blob{Bytes: hostIDsToDenseBlob(ids), Encoding: EncodingDense} + roaringBlob := HostIDsToBlob([]uint{5, 9, 15}) + + a, err := DecodeBitmap(denseBlob) + require.NoError(t, err) + b, err := DecodeBitmap(roaringBlob) + require.NoError(t, err) - t.Run("all-ones equal-length mask clears everything", func(t *testing.T) { - result := BlobANDNOT([]byte{0xFF, 0xFF}, []byte{0xFF, 0xFF}) - assert.Equal(t, []byte{0x00, 0x00}, result) + t.Run("AND mixed-encoding", func(t *testing.T) { + assert.True(t, BlobAND(a, b).Equals(roaring.BitmapOf(5, 9))) }) - t.Run("does not mutate inputs", func(t *testing.T) { - a := []byte{0xFF, 0xFF} - mask := []byte{0x0F, 0xF0} - _ = BlobANDNOT(a, mask) - assert.Equal(t, []byte{0xFF, 0xFF}, a) - assert.Equal(t, []byte{0x0F, 0xF0}, mask) + t.Run("OR mixed-encoding", func(t *testing.T) { + assert.True(t, BlobOR(a, b).Equals(roaring.BitmapOf(1, 5, 9, 15))) }) + + t.Run("ANDNOT mixed-encoding both directions", func(t *testing.T) { + assert.True(t, BlobANDNOT(a, b).Equals(roaring.BitmapOf(1))) + assert.True(t, BlobANDNOT(b, a).Equals(roaring.BitmapOf(15))) + }) + + t.Run("popcount on decoded legacy dense", func(t *testing.T) { + assert.Equal(t, uint64(3), BlobPopcount(a)) + }) +} + +// TestContainerTypes builds bitmaps that force each roaring container type +// (array, bitmap, run) and a multi-chunk bitmap, then exercises all ops over +// the fixture matrix. Without this the bitmap and run paths are silently +// untested when the rest of the suite uses sparse-shaped inputs. +func TestContainerTypes(t *testing.T) { + // Array container: 50 scattered ids within one chunk (cardinality << 4096). + arrayIDs := make([]uint, 0, 50) + for i := range uint(50) { + arrayIDs = append(arrayIDs, 1000+i*7) + } + array := NewBitmap(arrayIDs) + + // Bitmap container: 5000 ids in one chunk (cardinality > 4096 forces bitmap). + bitmapIDs := make([]uint, 0, 5000) + for i := range uint(5000) { + bitmapIDs = append(bitmapIDs, 10000+i) + } + bitmapRB := NewBitmap(bitmapIDs) + + // Run container: a contiguous range of 10000 ids — RunOptimize will pick + // a run container as the compact representation. + runIDs := make([]uint, 0, 10000) + for i := range uint(10000) { + runIDs = append(runIDs, 100+i) + } + run := NewBitmap(runIDs) + + // Multi-chunk: ids spanning ≥3 chunks across the 65,536-bit boundary. + multiIDs := []uint{ + 7, 99, chunkSize / 2, + chunkSize + 7, chunkSize + 99, + 2*chunkSize + 7, 2*chunkSize + 99, + } + multi := NewBitmap(multiIDs) + + fixtures := map[string]*roaring.Bitmap{ + "array": array, + "bitmap": bitmapRB, + "run": run, + "multi": multi, + } + + for nameA, a := range fixtures { + for nameB, b := range fixtures { + t.Run("AND/"+nameA+"_x_"+nameB, func(t *testing.T) { + got := BlobAND(a, b) + want := roaring.And(a, b) + assert.True(t, got.Equals(want)) + }) + t.Run("OR/"+nameA+"_x_"+nameB, func(t *testing.T) { + got := BlobOR(a, b) + want := roaring.Or(a, b) + assert.True(t, got.Equals(want)) + }) + t.Run("ANDNOT/"+nameA+"_x_"+nameB, func(t *testing.T) { + got := BlobANDNOT(a, b) + want := roaring.AndNot(a, b) + assert.True(t, got.Equals(want)) + }) + } + } } -func TestRoundTrip(t *testing.T) { - ids := []uint{1, 5, 10, 42, 100, 255} - blob := HostIDsToBlob(ids) - assert.Equal(t, len(ids), BlobPopcount(blob)) +// TestSerializationDeterminism asserts that the same host set produces +// byte-equal output regardless of which code path built the bitmap. Catches +// any missed RunOptimize call in the encoder chain. +func TestSerializationDeterminism(t *testing.T) { + ids := []uint{2, 100, chunkSize + 4, 2 * chunkSize} + + // Path A: build directly. + bytesA := BitmapToBlob(NewBitmap(ids)).Bytes + + // Path B: round-trip through dense. + denseBlob := Blob{Bytes: hostIDsToDenseBlob(ids), Encoding: EncodingDense} + rbFromDense, err := DecodeBitmap(denseBlob) + require.NoError(t, err) + bytesB := BitmapToBlob(rbFromDense).Bytes + + // Path C: OR an empty with the source bitmap. + bytesC := BitmapToBlob(BlobOR(roaring.New(), NewBitmap(ids))).Bytes - // Filter to only even IDs - filterIDs := []uint{10, 42, 100} - filterBlob := HostIDsToBlob(filterIDs) - filtered := BlobAND(blob, filterBlob) - assert.Equal(t, 3, BlobPopcount(filtered)) + require.True(t, bytes.Equal(bytesA, bytesB), "BitmapToBlob(NewBitmap) vs BitmapToBlob(DecodeBitmap(dense)) differ:\nA=%x\nB=%x", bytesA, bytesB) + require.True(t, bytes.Equal(bytesA, bytesC), "BitmapToBlob(NewBitmap) vs BitmapToBlob(BlobOR(empty, ...)) differ:\nA=%x\nC=%x", bytesA, bytesC) } diff --git a/server/chart/bootstrap/bootstrap.go b/server/chart/bootstrap/bootstrap.go index b815c1efb5f..c94210f095d 100644 --- a/server/chart/bootstrap/bootstrap.go +++ b/server/chart/bootstrap/bootstrap.go @@ -3,6 +3,7 @@ package bootstrap import ( + "context" "log/slog" "github.com/fleetdm/fleet/v4/server/chart/api" @@ -12,6 +13,7 @@ import ( eu "github.com/fleetdm/fleet/v4/server/platform/endpointer" platform_mysql "github.com/fleetdm/fleet/v4/server/platform/mysql" "github.com/go-kit/kit/endpoint" + "github.com/jmoiron/sqlx" ) // New creates a new chart service module and returns its service and route handler. @@ -30,3 +32,12 @@ func New( return svc, routesFn } + +// TrackedCriticalCVEs returns the curated set of CVE IDs that the chart +// collector currently tracks. Exposed for development tools (e.g. +// charts-backfill) that need to mirror the production CVE-selection logic +// without constructing the full bounded context. +func TrackedCriticalCVEs(ctx context.Context, db *sqlx.DB, logger *slog.Logger) ([]string, error) { + ds := mysql.NewDatastore(&platform_mysql.DBConnections{Primary: db, Replica: db}, logger) + return ds.TrackedCriticalCVEs(ctx) +} diff --git a/server/chart/datasets.go b/server/chart/datasets.go index f57175eed49..95847c9b71e 100644 --- a/server/chart/datasets.go +++ b/server/chart/datasets.go @@ -4,6 +4,7 @@ import ( "context" "time" + "github.com/RoaringBitmap/roaring" "github.com/fleetdm/fleet/v4/server/chart/api" ) @@ -27,7 +28,7 @@ func (u *UptimeDataset) Collect(ctx context.Context, store api.DatasetStore, now return store.RecordBucketData(ctx, u.Name(), bucketStart, time.Hour, u.SampleStrategy(), // The empty string key means "all entities" since uptime isn't tracked per host. // The value is a bitmap of host IDs that were active in this bucket. - map[string][]byte{"": HostIDsToBlob(hostIDs)}) + map[string]*roaring.Bitmap{"": NewBitmap(hostIDs)}) } // CVEDataset implements api.Dataset for host CVE tracking. @@ -50,9 +51,9 @@ func (c *CVEDataset) Collect(ctx context.Context, store api.DatasetStore, now ti if err != nil { return err } - bitmaps := make(map[string][]byte, len(hostIDsByCVE)) + bitmaps := make(map[string]*roaring.Bitmap, len(hostIDsByCVE)) for cve, hostIDs := range hostIDsByCVE { - bitmaps[cve] = HostIDsToBlob(hostIDs) + bitmaps[cve] = NewBitmap(hostIDs) } bucketStart := now.UTC().Truncate(time.Hour) // Always call RecordBucketData, even when bitmaps is empty: snapshot diff --git a/server/chart/internal/mysql/data.go b/server/chart/internal/mysql/data.go index 388eecad18a..cd566742e89 100644 --- a/server/chart/internal/mysql/data.go +++ b/server/chart/internal/mysql/data.go @@ -1,12 +1,12 @@ package mysql import ( - "bytes" "context" "fmt" "strings" "time" + "github.com/RoaringBitmap/roaring" "github.com/fleetdm/fleet/v4/server/chart" "github.com/fleetdm/fleet/v4/server/chart/api" "github.com/fleetdm/fleet/v4/server/contexts/ctxdb" @@ -40,10 +40,11 @@ var scdScrubWriteBatchCap = 1000 // scdRow is a single row of host_scd_data as fetched by GetSCDData. type scdRow struct { - EntityID string `db:"entity_id"` - HostBitmap []byte `db:"host_bitmap"` - ValidFrom time.Time `db:"valid_from"` - ValidTo time.Time `db:"valid_to"` + EntityID string `db:"entity_id"` + HostBitmap []byte `db:"host_bitmap"` + EncodingType uint8 `db:"encoding_type"` + ValidFrom time.Time `db:"valid_from"` + ValidTo time.Time `db:"valid_to"` } func (ds *Datastore) RecordBucketData( @@ -52,7 +53,7 @@ func (ds *Datastore) RecordBucketData( bucketStart time.Time, bucketSize time.Duration, strategy api.SampleStrategy, - entityBitmaps map[string][]byte, + entityBitmaps map[string]*roaring.Bitmap, ) error { bucketStart = bucketStart.UTC() @@ -87,7 +88,7 @@ func (ds *Datastore) recordAccumulate( dataset string, bucketStart time.Time, bucketSize time.Duration, - entityBitmaps map[string][]byte, + entityBitmaps map[string]*roaring.Bitmap, ) error { validTo := bucketStart.Add(bucketSize) @@ -97,10 +98,10 @@ func (ds *Datastore) recordAccumulate( } // Fetch the current in-bucket bitmaps so we can OR-merge before writing. - existing := make(map[string][]byte, len(entityIDs)) + existing := make(map[string]*roaring.Bitmap, len(entityIDs)) if len(entityIDs) > 0 { query, args, err := sqlx.In( - `SELECT entity_id, host_bitmap FROM host_scd_data + `SELECT entity_id, host_bitmap, encoding_type FROM host_scd_data WHERE dataset = ? AND valid_from = ? AND entity_id IN (?)`, dataset, bucketStart, entityIDs) if err != nil { @@ -109,8 +110,9 @@ func (ds *Datastore) recordAccumulate( query = ds.rebind(query) type row struct { - EntityID string `db:"entity_id"` - HostBitmap []byte `db:"host_bitmap"` + EntityID string `db:"entity_id"` + HostBitmap []byte `db:"host_bitmap"` + EncodingType uint8 `db:"encoding_type"` } var rows []row // Using writer here since a stale read would OR-merge against an older @@ -120,18 +122,22 @@ func (ds *Datastore) recordAccumulate( return ctxerr.Wrap(ctx, err, "fetch in-bucket bitmaps") } for _, r := range rows { - existing[r.EntityID] = r.HostBitmap + rb, err := chart.DecodeBitmap(chart.Blob{Bytes: r.HostBitmap, Encoding: r.EncodingType}) + if err != nil { + return ctxerr.Wrapf(ctx, err, "decode in-bucket bitmap for entity %q", r.EntityID) + } + existing[r.EntityID] = rb } } type upsertRow struct { entityID string - bitmap []byte + blob chart.Blob } toUpsert := make([]upsertRow, 0, len(entityBitmaps)) for entityID, newBitmap := range entityBitmaps { merged := chart.BlobOR(existing[entityID], newBitmap) - toUpsert = append(toUpsert, upsertRow{entityID: entityID, bitmap: merged}) + toUpsert = append(toUpsert, upsertRow{entityID: entityID, blob: chart.BitmapToBlob(merged)}) } for i := 0; i < len(toUpsert); i += scdUpsertBatch { @@ -139,15 +145,15 @@ func (ds *Datastore) recordAccumulate( batch := toUpsert[i:end] placeholders := make([]string, 0, len(batch)) - args := make([]any, 0, len(batch)*5) + args := make([]any, 0, len(batch)*6) for _, r := range batch { - placeholders = append(placeholders, "(?, ?, ?, ?, ?)") - args = append(args, dataset, r.entityID, r.bitmap, bucketStart, validTo) + placeholders = append(placeholders, "(?, ?, ?, ?, ?, ?)") + args = append(args, dataset, r.entityID, r.blob.Bytes, r.blob.Encoding, bucketStart, validTo) } - // Concatenating hardcoded "(?,?,?,?,?)" placeholder strings, not user input. - stmt := `INSERT INTO host_scd_data (dataset, entity_id, host_bitmap, valid_from, valid_to) VALUES ` + //nolint:gosec // G202 + // Concatenating hardcoded "(?,?,?,?,?,?)" placeholder strings, not user input. + stmt := `INSERT INTO host_scd_data (dataset, entity_id, host_bitmap, encoding_type, valid_from, valid_to) VALUES ` + //nolint:gosec // G202 strings.Join(placeholders, ", ") + - ` ON DUPLICATE KEY UPDATE host_bitmap = VALUES(host_bitmap)` + ` ON DUPLICATE KEY UPDATE host_bitmap = VALUES(host_bitmap), encoding_type = VALUES(encoding_type)` if _, err := ds.writer(ctx).ExecContext(ctx, stmt, args...); err != nil { return ctxerr.Wrap(ctx, err, "upsert accumulate rows") } @@ -164,46 +170,62 @@ func (ds *Datastore) recordSnapshot( ctx context.Context, dataset string, bucketStart time.Time, - entityBitmaps map[string][]byte, + entityBitmaps map[string]*roaring.Bitmap, ) error { type openRow struct { - EntityID string `db:"entity_id"` - HostBitmap []byte `db:"host_bitmap"` - ValidFrom time.Time `db:"valid_from"` + EntityID string `db:"entity_id"` + HostBitmap []byte `db:"host_bitmap"` + EncodingType uint8 `db:"encoding_type"` + ValidFrom time.Time `db:"valid_from"` } var openRows []openRow // Reader is safe here: the close UPDATE filters by valid_to = sentinel and the // insert uses ODKU on uniq_entity_bucket, so a stale read at worst produces // idempotent re-work (a no-op close or a same-bucket overwrite). if err := sqlx.SelectContext(ctx, ds.reader(ctx), &openRows, - `SELECT entity_id, host_bitmap, valid_from + `SELECT entity_id, host_bitmap, encoding_type, valid_from FROM host_scd_data WHERE dataset = ? AND valid_to = ?`, dataset, scdOpenSentinel); err != nil { return ctxerr.Wrap(ctx, err, "fetch open SCD rows") } - openByEntity := make(map[string]openRow, len(openRows)) + // Decode every open row to op form so change-detection compares semantically + // rather than byte-wise. Mixed encodings (a dense legacy row vs an incoming + // roaring bitmap) would never byte-equal even when representing the same host + // set; comparing op-form bitmaps via roaring.Equals sidesteps this. + type openEntity struct { + row openRow + bitmap *roaring.Bitmap + } + openByEntity := make(map[string]openEntity, len(openRows)) for _, r := range openRows { - openByEntity[r.EntityID] = r + rb, err := chart.DecodeBitmap(chart.Blob{Bytes: r.HostBitmap, Encoding: r.EncodingType}) + if err != nil { + return ctxerr.Wrapf(ctx, err, "decode open bitmap for entity %q", r.EntityID) + } + openByEntity[r.EntityID] = openEntity{row: r, bitmap: rb} } var toClose []string type upsertRow struct { entityID string - bitmap []byte + blob chart.Blob } var toUpsert []upsertRow - for entityID, bitmap := range entityBitmaps { + for entityID, incoming := range entityBitmaps { existing, hasOpen := openByEntity[entityID] - if hasOpen && bytes.Equal(existing.HostBitmap, bitmap) { + if hasOpen && existing.bitmap.Equals(incoming) { continue // unchanged state — leave the row alone } - if hasOpen && existing.ValidFrom.Before(bucketStart) { + if hasOpen && existing.row.ValidFrom.Before(bucketStart) { toClose = append(toClose, entityID) } - toUpsert = append(toUpsert, upsertRow{entityID: entityID, bitmap: bitmap}) + toUpsert = append(toUpsert, upsertRow{ + entityID: entityID, + blob: chart.BitmapToBlob(incoming), + }) } // Entities that disappeared entirely — close their open rows. If the row @@ -238,15 +260,15 @@ func (ds *Datastore) recordSnapshot( batch := toUpsert[i:end] placeholders := make([]string, 0, len(batch)) - args := make([]any, 0, len(batch)*4) + args := make([]any, 0, len(batch)*5) for _, r := range batch { - placeholders = append(placeholders, "(?, ?, ?, ?)") - args = append(args, dataset, r.entityID, r.bitmap, bucketStart) + placeholders = append(placeholders, "(?, ?, ?, ?, ?)") + args = append(args, dataset, r.entityID, r.blob.Bytes, r.blob.Encoding, bucketStart) } - // Concatenating hardcoded "(?,?,?,?)" placeholder strings, not user input. - stmt := `INSERT INTO host_scd_data (dataset, entity_id, host_bitmap, valid_from) VALUES ` + //nolint:gosec // G202 + // Concatenating hardcoded "(?,?,?,?,?)" placeholder strings, not user input. + stmt := `INSERT INTO host_scd_data (dataset, entity_id, host_bitmap, encoding_type, valid_from) VALUES ` + //nolint:gosec // G202 strings.Join(placeholders, ", ") + - ` ON DUPLICATE KEY UPDATE host_bitmap = VALUES(host_bitmap)` + ` ON DUPLICATE KEY UPDATE host_bitmap = VALUES(host_bitmap), encoding_type = VALUES(encoding_type)` if _, err := ds.writer(ctx).ExecContext(ctx, stmt, args...); err != nil { return ctxerr.Wrap(ctx, err, "upsert snapshot rows") } @@ -284,7 +306,7 @@ func (ds *Datastore) GetSCDData( startDate, endDate time.Time, bucketSize time.Duration, strategy api.SampleStrategy, - filterMask []byte, + filterMask *roaring.Bitmap, entityIDs []string, ) ([]api.DataPoint, error) { startDate = startDate.UTC() @@ -313,7 +335,7 @@ func (ds *Datastore) GetSCDData( } query := fmt.Sprintf(` - SELECT entity_id, host_bitmap, valid_from, valid_to + SELECT entity_id, host_bitmap, encoding_type, valid_from, valid_to FROM host_scd_data WHERE dataset = ? AND valid_from < ? @@ -330,59 +352,96 @@ func (ds *Datastore) GetSCDData( return nil, ctxerr.Wrap(ctx, err, "get SCD data") } + // Decode every row to op form once before the per-bucket walk. Decode work + // is O(set bits) for roaring rows and O(byte count) for legacy dense rows; + // doing it once here avoids re-decoding the same row across overlapping + // buckets. + decoded := make([]decodedSCDRow, len(rows)) + for i, r := range rows { + rb, err := chart.DecodeBitmap(chart.Blob{Bytes: r.HostBitmap, Encoding: r.EncodingType}) + if err != nil { + return nil, ctxerr.Wrapf(ctx, err, "decode bitmap for entity %q", r.EntityID) + } + decoded[i] = decodedSCDRow{entityID: r.EntityID, bitmap: rb, validFrom: r.ValidFrom, validTo: r.ValidTo} + } + results := make([]api.DataPoint, numBuckets) for i := range numBuckets { bucketStart := startDate.Add(time.Duration(i+1) * bucketSize) bucketEnd := bucketStart.Add(bucketSize) - merged := aggregateBucket(rows, bucketStart, bucketEnd, strategy) - if merged != nil { + merged := aggregateBucket(decoded, bucketStart, bucketEnd, strategy) + if merged != nil && filterMask != nil { merged = chart.BlobAND(merged, filterMask) } results[i] = api.DataPoint{ Timestamp: bucketStart, - Value: chart.BlobPopcount(merged), + Value: int(chart.BlobPopcount(merged)), //nolint:gosec // host counts fit comfortably in int } } return results, nil } +// decodedSCDRow is the in-memory op-form view of an scdRow, produced by +// decoding the storage-form bytes once at SELECT time and shared across the +// per-bucket aggregation walk. +type decodedSCDRow struct { + entityID string + bitmap *roaring.Bitmap + validFrom time.Time + validTo time.Time +} + // aggregateBucket returns the merged bitmap for a single bucket given the // sample strategy. For Accumulate, ORs every overlapping row (entity dimension // collapses into the union — correct for "distinct hosts seen doing anything // tracked"). For Snapshot, picks the row active at bucketEnd per entity and // ORs across entities. -func aggregateBucket(rows []scdRow, bucketStart, bucketEnd time.Time, strategy api.SampleStrategy) []byte { +func aggregateBucket(rows []decodedSCDRow, bucketStart, bucketEnd time.Time, strategy api.SampleStrategy) *roaring.Bitmap { if strategy == api.SampleStrategySnapshot { // Per entity, the row "active at bucketEnd" is the one whose // [valid_from, valid_to) covers the instant bucketEnd-ε. For interval // boundaries, that's valid_from < bucketEnd AND valid_to >= bucketEnd. // Write semantics ensure at most one such row per (entity, moment). - var merged []byte + var merged *roaring.Bitmap seen := make(map[string]struct{}) for _, r := range rows { - if !r.ValidFrom.Before(bucketEnd) || r.ValidTo.Before(bucketEnd) { + if !r.validFrom.Before(bucketEnd) || r.validTo.Before(bucketEnd) { continue } - if _, dup := seen[r.EntityID]; dup { + if _, dup := seen[r.entityID]; dup { continue } - seen[r.EntityID] = struct{}{} - merged = chart.BlobOR(merged, r.HostBitmap) + seen[r.entityID] = struct{}{} + merged = orInto(merged, r.bitmap) } return merged } // Accumulate: OR every row that overlaps the bucket. - var merged []byte + var merged *roaring.Bitmap for _, r := range rows { - if !r.ValidFrom.Before(bucketEnd) || !r.ValidTo.After(bucketStart) { + if !r.validFrom.Before(bucketEnd) || !r.validTo.After(bucketStart) { continue } - merged = chart.BlobOR(merged, r.HostBitmap) + merged = orInto(merged, r.bitmap) } return merged } +// orInto returns merged OR rb. When merged is nil, returns a clone of rb so +// subsequent ORs on merged don't mutate the source row's cached bitmap. Once +// merged is non-nil, future ORs mutate merged in place (cheap; we own it). +func orInto(merged, rb *roaring.Bitmap) *roaring.Bitmap { + if rb == nil || rb.IsEmpty() { + return merged + } + if merged == nil { + return rb.Clone() + } + merged.Or(rb) + return merged +} + // CleanupSCDData deletes closed SCD rows whose valid_to is older than the // retention cutoff. Open rows (valid_to = sentinel) are always preserved. // Deletes in batches so each statement holds locks briefly and the concurrent @@ -481,27 +540,31 @@ func (ds *Datastore) HostIDsInFleets(ctx context.Context, fleetIDs []uint) ([]ui // - Surviving updates are flushed in chunked CASE/WHEN UPDATE statements // so a read-page of N rows costs O(N / writeBatch) round trips instead // of O(N). -func (ds *Datastore) ApplyScrubMaskToDataset(ctx context.Context, dataset string, mask []byte, batchSize int) error { +func (ds *Datastore) ApplyScrubMaskToDataset(ctx context.Context, dataset string, mask *roaring.Bitmap, batchSize int) error { if batchSize <= 0 { batchSize = 5000 } - if len(mask) == 0 { + if mask == nil || mask.IsEmpty() { // Nothing to clear; avoid the row walk entirely. return nil } - // Size each CASE/WHEN UPDATE so its payload (~writeBatch * len(mask) bytes - // of new bitmap data) stays under scdScrubWriteByteBudget. Bounded above - // by scdScrubWriteBatchCap to keep parser cost predictable. - writeBatch := min(max(scdScrubWriteByteBudget/len(mask), 1), scdScrubWriteBatchCap) + // Size each CASE/WHEN UPDATE so its payload (~writeBatch * estimated bitmap + // bytes per row) stays under scdScrubWriteByteBudget. Use the mask's + // serialized size as a rough proxy for typical row size, since most rows + // after scrubbing will be at most as large as the mask. Bounded above by + // scdScrubWriteBatchCap to keep parser cost predictable. + maskBlob := chart.BitmapToBlob(mask) + writeBatch := min(max(scdScrubWriteByteBudget/max(len(maskBlob.Bytes), 1), 1), scdScrubWriteBatchCap) type row struct { - ID uint `db:"id"` - HostBitmap []byte `db:"host_bitmap"` + ID uint `db:"id"` + HostBitmap []byte `db:"host_bitmap"` + EncodingType uint8 `db:"encoding_type"` } type pendingRow struct { - id uint - scrubbed []byte + id uint + bytes []byte } // Paging select reads from the primary: the loop terminates on @@ -524,7 +587,7 @@ func (ds *Datastore) ApplyScrubMaskToDataset(ctx context.Context, dataset string var rows []row // reader(ctx) honors RequirePrimary set above and returns the writer connection. if err := sqlx.SelectContext(ctx, ds.reader(ctx), &rows, - `SELECT id, host_bitmap FROM host_scd_data + `SELECT id, host_bitmap, encoding_type FROM host_scd_data WHERE dataset = ? AND id > ? ORDER BY id LIMIT ?`, dataset, lastID, batchSize); err != nil { @@ -544,9 +607,14 @@ func (ds *Datastore) ApplyScrubMaskToDataset(ctx context.Context, dataset string // produce no UPDATE. pending := make([]pendingRow, 0, len(rows)) for _, r := range rows { - scrubbed := chart.BlobANDNOT(r.HostBitmap, mask) - if !bytes.Equal(scrubbed, r.HostBitmap) { - pending = append(pending, pendingRow{id: r.ID, scrubbed: scrubbed}) + rb, err := chart.DecodeBitmap(chart.Blob{Bytes: r.HostBitmap, Encoding: r.EncodingType}) + if err != nil { + return ctxerr.Wrapf(ctx, err, "decode bitmap for scrub row id %d", r.ID) + } + before := rb.GetCardinality() + scrubbed := chart.BlobANDNOT(rb, mask) + if scrubbed.GetCardinality() != before { + pending = append(pending, pendingRow{id: r.ID, bytes: chart.BitmapToBlob(scrubbed).Bytes}) } lastID = r.ID } @@ -555,21 +623,25 @@ func (ds *Datastore) ApplyScrubMaskToDataset(ctx context.Context, dataset string end := min(i+writeBatch, len(pending)) chunk := pending[i:end] - caseClauses := make([]string, 0, len(chunk)) + // Scrubbed bytes are always roaring (chart.BitmapToBlob has no other + // code path), so encoding_type is set with a literal rather than a + // per-row CASE. + caseBitmapClauses := make([]string, 0, len(chunk)) inPlaceholders := make([]string, 0, len(chunk)) - args := make([]any, 0, len(chunk)*3) + args := make([]any, 0, len(chunk)*3+1) for _, p := range chunk { - caseClauses = append(caseClauses, "WHEN ? THEN ?") - args = append(args, p.id, p.scrubbed) + caseBitmapClauses = append(caseBitmapClauses, "WHEN ? THEN ?") + args = append(args, p.id, p.bytes) } + args = append(args, chart.EncodingRoaring) for _, p := range chunk { inPlaceholders = append(inPlaceholders, "?") args = append(args, p.id) } // Concatenating hardcoded "WHEN ? THEN ?" / "?" placeholders, not user input. stmt := `UPDATE host_scd_data SET host_bitmap = CASE id ` + //nolint:gosec // G202 - strings.Join(caseClauses, " ") + - ` END WHERE id IN (` + + strings.Join(caseBitmapClauses, " ") + + ` END, encoding_type = ? WHERE id IN (` + strings.Join(inPlaceholders, ", ") + `)` if _, err := ds.writer(ctx).ExecContext(ctx, stmt, args...); err != nil { return ctxerr.Wrap(ctx, err, "scrub batch") diff --git a/server/chart/internal/mysql/data_test.go b/server/chart/internal/mysql/data_test.go index 5c6770cdb97..f481bc4b4c7 100644 --- a/server/chart/internal/mysql/data_test.go +++ b/server/chart/internal/mysql/data_test.go @@ -6,6 +6,7 @@ import ( "testing" "time" + "github.com/RoaringBitmap/roaring" "github.com/fleetdm/fleet/v4/server/chart" "github.com/fleetdm/fleet/v4/server/chart/api" "github.com/fleetdm/fleet/v4/server/chart/internal/testutils" @@ -13,20 +14,30 @@ import ( "github.com/stretchr/testify/require" ) +// rowFixture is a compact way to declare a decodedSCDRow in tests. +func rowFixture(entityID string, ids []uint, validFrom, validTo time.Time) decodedSCDRow { + return decodedSCDRow{ + entityID: entityID, + bitmap: chart.NewBitmap(ids), + validFrom: validFrom, + validTo: validTo, + } +} + func TestAggregateBucketAccumulate(t *testing.T) { bucketStart := time.Date(2026, 4, 21, 0, 0, 0, 0, time.UTC) bucketEnd := bucketStart.Add(24 * time.Hour) // Three accumulate rows within the bucket, each observed during a different // hour. Accumulate semantics = union of all overlapping rows. - rows := []scdRow{ - {EntityID: "", HostBitmap: chart.HostIDsToBlob([]uint{1, 2}), ValidFrom: bucketStart.Add(2 * time.Hour), ValidTo: bucketStart.Add(3 * time.Hour)}, - {EntityID: "", HostBitmap: chart.HostIDsToBlob([]uint{3}), ValidFrom: bucketStart.Add(10 * time.Hour), ValidTo: bucketStart.Add(11 * time.Hour)}, - {EntityID: "", HostBitmap: chart.HostIDsToBlob([]uint{2, 4}), ValidFrom: bucketStart.Add(15 * time.Hour), ValidTo: bucketStart.Add(16 * time.Hour)}, + rows := []decodedSCDRow{ + rowFixture("", []uint{1, 2}, bucketStart.Add(2*time.Hour), bucketStart.Add(3*time.Hour)), + rowFixture("", []uint{3}, bucketStart.Add(10*time.Hour), bucketStart.Add(11*time.Hour)), + rowFixture("", []uint{2, 4}, bucketStart.Add(15*time.Hour), bucketStart.Add(16*time.Hour)), } got := aggregateBucket(rows, bucketStart, bucketEnd, api.SampleStrategyAccumulate) - assert.Equal(t, 4, chart.BlobPopcount(got), "union of {1,2}, {3}, {2,4} = {1,2,3,4}") + assert.Equal(t, uint64(4), chart.BlobPopcount(got), "union of {1,2}, {3}, {2,4} = {1,2,3,4}") } func TestAggregateBucketAccumulateMultiEntity(t *testing.T) { @@ -36,14 +47,14 @@ func TestAggregateBucketAccumulateMultiEntity(t *testing.T) { // Future-style multi-entity accumulate dataset (e.g. software usage): // entity = software name; bitmap = hosts that used that software this hour. // Bucket value = distinct hosts using any tracked software during the hour. - rows := []scdRow{ - {EntityID: "slack", HostBitmap: chart.HostIDsToBlob([]uint{1, 2}), ValidFrom: bucketStart, ValidTo: bucketEnd}, - {EntityID: "zoom", HostBitmap: chart.HostIDsToBlob([]uint{2, 3}), ValidFrom: bucketStart, ValidTo: bucketEnd}, - {EntityID: "chrome", HostBitmap: chart.HostIDsToBlob([]uint{4}), ValidFrom: bucketStart, ValidTo: bucketEnd}, + rows := []decodedSCDRow{ + rowFixture("slack", []uint{1, 2}, bucketStart, bucketEnd), + rowFixture("zoom", []uint{2, 3}, bucketStart, bucketEnd), + rowFixture("chrome", []uint{4}, bucketStart, bucketEnd), } got := aggregateBucket(rows, bucketStart, bucketEnd, api.SampleStrategyAccumulate) - assert.Equal(t, 4, chart.BlobPopcount(got), "union across entities = {1,2,3,4}") + assert.Equal(t, uint64(4), chart.BlobPopcount(got), "union across entities = {1,2,3,4}") } func TestAggregateBucketSnapshotEndOfBucket(t *testing.T) { @@ -53,13 +64,13 @@ func TestAggregateBucketSnapshotEndOfBucket(t *testing.T) { // One entity "cve-A" changed state mid-bucket: affected hosts were {1,2,3} // from hr 0 to hr 14, then {1,2} from hr 14 onward (H3 patched). // End-of-bucket semantics should return only the *latest* state, not the OR. - rows := []scdRow{ - {EntityID: "cve-A", HostBitmap: chart.HostIDsToBlob([]uint{1, 2, 3}), ValidFrom: bucketStart, ValidTo: bucketStart.Add(14 * time.Hour)}, - {EntityID: "cve-A", HostBitmap: chart.HostIDsToBlob([]uint{1, 2}), ValidFrom: bucketStart.Add(14 * time.Hour), ValidTo: time.Date(9999, 12, 31, 0, 0, 0, 0, time.UTC)}, + rows := []decodedSCDRow{ + rowFixture("cve-A", []uint{1, 2, 3}, bucketStart, bucketStart.Add(14*time.Hour)), + rowFixture("cve-A", []uint{1, 2}, bucketStart.Add(14*time.Hour), time.Date(9999, 12, 31, 0, 0, 0, 0, time.UTC)), } got := aggregateBucket(rows, bucketStart, bucketEnd, api.SampleStrategySnapshot) - assert.Equal(t, 2, chart.BlobPopcount(got), "end-of-bucket state is {1,2}, not union {1,2,3}") + assert.Equal(t, uint64(2), chart.BlobPopcount(got), "end-of-bucket state is {1,2}, not union {1,2,3}") } func TestAggregateBucketSnapshotMultipleEntities(t *testing.T) { @@ -70,16 +81,16 @@ func TestAggregateBucketSnapshotMultipleEntities(t *testing.T) { // Two entities, each with an end-of-bucket state; snapshot returns OR across // entities of each's latest row. - rows := []scdRow{ + rows := []decodedSCDRow{ // cve-A: latest state {1,2} - {EntityID: "cve-A", HostBitmap: chart.HostIDsToBlob([]uint{1, 2, 3}), ValidFrom: bucketStart, ValidTo: bucketStart.Add(14 * time.Hour)}, - {EntityID: "cve-A", HostBitmap: chart.HostIDsToBlob([]uint{1, 2}), ValidFrom: bucketStart.Add(14 * time.Hour), ValidTo: sentinel}, + rowFixture("cve-A", []uint{1, 2, 3}, bucketStart, bucketStart.Add(14*time.Hour)), + rowFixture("cve-A", []uint{1, 2}, bucketStart.Add(14*time.Hour), sentinel), // cve-B: latest state {3,4} - {EntityID: "cve-B", HostBitmap: chart.HostIDsToBlob([]uint{3, 4}), ValidFrom: bucketStart.Add(5 * time.Hour), ValidTo: sentinel}, + rowFixture("cve-B", []uint{3, 4}, bucketStart.Add(5*time.Hour), sentinel), } got := aggregateBucket(rows, bucketStart, bucketEnd, api.SampleStrategySnapshot) - assert.Equal(t, 4, chart.BlobPopcount(got), "union of cve-A end-state {1,2} and cve-B end-state {3,4}") + assert.Equal(t, uint64(4), chart.BlobPopcount(got), "union of cve-A end-state {1,2} and cve-B end-state {3,4}") } func TestAggregateBucketSnapshotEntityDisappears(t *testing.T) { @@ -89,12 +100,12 @@ func TestAggregateBucketSnapshotEntityDisappears(t *testing.T) { // Entity was active early in bucket but its row was closed mid-bucket with // no replacement (entity disappeared — e.g., last affected host patched). // End-of-bucket semantics exclude it: no row is active at bucketEnd. - rows := []scdRow{ - {EntityID: "cve-A", HostBitmap: chart.HostIDsToBlob([]uint{1, 2, 3}), ValidFrom: bucketStart, ValidTo: bucketStart.Add(14 * time.Hour)}, + rows := []decodedSCDRow{ + rowFixture("cve-A", []uint{1, 2, 3}, bucketStart, bucketStart.Add(14*time.Hour)), } got := aggregateBucket(rows, bucketStart, bucketEnd, api.SampleStrategySnapshot) - assert.Equal(t, 0, chart.BlobPopcount(got), "entity closed mid-bucket is absent at bucketEnd") + assert.Equal(t, uint64(0), chart.BlobPopcount(got), "entity closed mid-bucket is absent at bucketEnd") } func TestAggregateBucketSnapshotRowClosedExactlyAtBucketEnd(t *testing.T) { @@ -104,12 +115,12 @@ func TestAggregateBucketSnapshotRowClosedExactlyAtBucketEnd(t *testing.T) { // Row's valid_to == bucketEnd. The row represents state up to (but not // including) bucketEnd — i.e., the state just before the bucket ends. // That's exactly what end-of-bucket semantics should pick. - rows := []scdRow{ - {EntityID: "cve-A", HostBitmap: chart.HostIDsToBlob([]uint{1, 2}), ValidFrom: bucketStart, ValidTo: bucketEnd}, + rows := []decodedSCDRow{ + rowFixture("cve-A", []uint{1, 2}, bucketStart, bucketEnd), } got := aggregateBucket(rows, bucketStart, bucketEnd, api.SampleStrategySnapshot) - assert.Equal(t, 2, chart.BlobPopcount(got), "row whose valid_to equals bucketEnd covers bucketEnd-ε") + assert.Equal(t, uint64(2), chart.BlobPopcount(got), "row whose valid_to equals bucketEnd covers bucketEnd-ε") } func TestCleanupSCDData(t *testing.T) { @@ -215,26 +226,24 @@ func TestApplyScrubMaskToDataset(t *testing.T) { func testScrubEmptyMaskNoOp(t *testing.T, tdb *testutils.TestDB, ds *Datastore) { now := time.Now().UTC() - bitmap := chart.HostIDsToBlob([]uint{1, 2, 3}) - id := tdb.InsertSCDRowWithBitmap(t, "uptime", "", bitmap, now.Add(-time.Hour), now) + id := tdb.InsertSCDRowWithHostIDs(t, "uptime", "", []uint{1, 2, 3}, now.Add(-time.Hour), now) + before := tdb.SCDBlob(t, id) require.NoError(t, ds.ApplyScrubMaskToDataset(t.Context(), "uptime", nil, 0)) - assert.Equal(t, bitmap, tdb.SCDBitmap(t, id), "nil mask must not modify the row") + assert.Equal(t, before, tdb.SCDBlob(t, id), "nil mask must not modify the row") - require.NoError(t, ds.ApplyScrubMaskToDataset(t.Context(), "uptime", []byte{}, 0)) - assert.Equal(t, bitmap, tdb.SCDBitmap(t, id), "empty mask must not modify the row") + require.NoError(t, ds.ApplyScrubMaskToDataset(t.Context(), "uptime", roaring.New(), 0)) + assert.Equal(t, before, tdb.SCDBlob(t, id), "empty mask must not modify the row") } func testScrubClearsAffectedBits(t *testing.T, tdb *testutils.TestDB, ds *Datastore) { now := time.Now().UTC() - id := tdb.InsertSCDRowWithBitmap(t, "uptime", "", - chart.HostIDsToBlob([]uint{1, 2, 3, 4, 5}), now.Add(-time.Hour), now) + id := tdb.InsertSCDRowWithHostIDs(t, "uptime", "", []uint{1, 2, 3, 4, 5}, now.Add(-time.Hour), now) - mask := chart.HostIDsToBlob([]uint{2, 4}) + mask := chart.NewBitmap([]uint{2, 4}) require.NoError(t, ds.ApplyScrubMaskToDataset(t.Context(), "uptime", mask, 0)) - got := tdb.SCDBitmap(t, id) - assert.Equal(t, chart.HostIDsToBlob([]uint{1, 3, 5}), got) + assert.Equal(t, []uint{1, 3, 5}, tdb.SCDHostIDs(t, id)) } func testScrubSkipsRowsMaskDoesNotTouch(t *testing.T, tdb *testutils.TestDB, ds *Datastore) { @@ -242,17 +251,15 @@ func testScrubSkipsRowsMaskDoesNotTouch(t *testing.T, tdb *testutils.TestDB, ds // untouched row's bitmap MUST be byte-for-byte identical post-scrub — // this is the contract the skip-noop optimization promises. now := time.Now().UTC() - hitBitmap := chart.HostIDsToBlob([]uint{1, 2, 3}) - missBitmap := chart.HostIDsToBlob([]uint{10, 11, 12}) + hitID := tdb.InsertSCDRowWithHostIDs(t, "uptime", "a", []uint{1, 2, 3}, now.Add(-time.Hour), now) + missID := tdb.InsertSCDRowWithHostIDs(t, "uptime", "b", []uint{10, 11, 12}, now.Add(-time.Hour), now) + missBefore := tdb.SCDBlob(t, missID) - hitID := tdb.InsertSCDRowWithBitmap(t, "uptime", "a", hitBitmap, now.Add(-time.Hour), now) - missID := tdb.InsertSCDRowWithBitmap(t, "uptime", "b", missBitmap, now.Add(-time.Hour), now) - - mask := chart.HostIDsToBlob([]uint{2}) + mask := chart.NewBitmap([]uint{2}) require.NoError(t, ds.ApplyScrubMaskToDataset(t.Context(), "uptime", mask, 0)) - assert.Equal(t, chart.HostIDsToBlob([]uint{1, 3}), tdb.SCDBitmap(t, hitID)) - assert.Equal(t, missBitmap, tdb.SCDBitmap(t, missID), "mask doesn't intersect — row must remain unchanged") + assert.Equal(t, []uint{1, 3}, tdb.SCDHostIDs(t, hitID)) + assert.Equal(t, missBefore, tdb.SCDBlob(t, missID), "mask doesn't intersect — row must remain unchanged") } func testScrubChunkedAcrossWriteBatches(t *testing.T, tdb *testutils.TestDB, ds *Datastore) { @@ -263,49 +270,74 @@ func testScrubChunkedAcrossWriteBatches(t *testing.T, tdb *testutils.TestDB, ds t.Cleanup(func() { scdScrubWriteBatchCap = prev }) now := time.Now().UTC() - mask := chart.HostIDsToBlob([]uint{1}) + mask := chart.NewBitmap([]uint{1}) // 7 rows, all containing host 1 → 7 affected rows → 3+3+1 across chunks. // Read batch of 4 forces two read pages, each splitting into multiple // CASE/WHEN UPDATEs. - bitmap := chart.HostIDsToBlob([]uint{1, 2}) ids := make([]uint, 7) for i := range ids { - ids[i] = tdb.InsertSCDRowWithBitmap(t, "uptime", fmt.Sprintf("e%d", i), - bitmap, now.Add(-time.Hour), now) + ids[i] = tdb.InsertSCDRowWithHostIDs(t, "uptime", fmt.Sprintf("e%d", i), + []uint{1, 2}, now.Add(-time.Hour), now) } require.NoError(t, ds.ApplyScrubMaskToDataset(t.Context(), "uptime", mask, 4)) - want := chart.HostIDsToBlob([]uint{2}) for _, id := range ids { - assert.Equal(t, want, tdb.SCDBitmap(t, id), "row %d", id) + assert.Equal(t, []uint{2}, tdb.SCDHostIDs(t, id), "row %d", id) } } func testScrubHonorsCtxCancellation(t *testing.T, tdb *testutils.TestDB, ds *Datastore) { now := time.Now().UTC() - bitmap := chart.HostIDsToBlob([]uint{1, 2}) - id := tdb.InsertSCDRowWithBitmap(t, "uptime", "", bitmap, now.Add(-time.Hour), now) + id := tdb.InsertSCDRowWithHostIDs(t, "uptime", "", []uint{1, 2}, now.Add(-time.Hour), now) + before := tdb.SCDBlob(t, id) ctx, cancel := context.WithCancel(t.Context()) cancel() - err := ds.ApplyScrubMaskToDataset(ctx, "uptime", chart.HostIDsToBlob([]uint{1}), 0) + err := ds.ApplyScrubMaskToDataset(ctx, "uptime", chart.NewBitmap([]uint{1}), 0) require.ErrorIs(t, err, context.Canceled) - assert.Equal(t, bitmap, tdb.SCDBitmap(t, id), "row must be untouched when ctx was canceled before the first read") + assert.Equal(t, before, tdb.SCDBlob(t, id), "row must be untouched when ctx was canceled before the first read") +} + +// TestGetSCDDataMixedEncoding proves the lazy-migration premise: a dense legacy +// row and a roaring row for the same dataset are both decoded by the chart +// query path and contribute to the bucket's union. Without this, the day-1 +// post-deploy story (mixed encodings coexisting until closed rows age out) is +// only covered by unit tests of DecodeBitmap, not by the wired-up read path. +func TestGetSCDDataMixedEncoding(t *testing.T) { + tdb := testutils.SetupTestDB(t, "chart_mysql") + ds := NewDatastore(tdb.Conns(), tdb.Logger) + + startDate := time.Date(2026, 4, 21, 0, 0, 0, 0, time.UTC) + endDate := startDate.Add(24 * time.Hour) + // Rows must be open at bucketEnd (startDate + 2*bucketSize) for snapshot to + // pick them, so seed them as open with valid_from comfortably before the + // query window. + validFrom := startDate.Add(-time.Hour) + + tdb.InsertSCDRowWithBlob(t, "cve", "CVE-A", testutils.DenseBlob([]uint{1, 2, 3}), validFrom, scdOpenSentinel) + tdb.InsertSCDRowWithHostIDs(t, "cve", "CVE-B", []uint{3, 4, 5}, validFrom, scdOpenSentinel) + + pts, err := ds.GetSCDData(t.Context(), "cve", + startDate, endDate, 24*time.Hour, + api.SampleStrategySnapshot, nil, nil) + require.NoError(t, err) + require.Len(t, pts, 1) + assert.Equal(t, 5, pts[0].Value, "union of dense {1,2,3} and roaring {3,4,5} = {1,2,3,4,5}") } func testScrubOtherDatasetUnaffected(t *testing.T, tdb *testutils.TestDB, ds *Datastore) { now := time.Now().UTC() - bitmap := chart.HostIDsToBlob([]uint{1, 2, 3}) - uptimeID := tdb.InsertSCDRowWithBitmap(t, "uptime", "", bitmap, now.Add(-time.Hour), now) - cveID := tdb.InsertSCDRowWithBitmap(t, "cve", "CVE-1", bitmap, now.Add(-time.Hour), now) + uptimeID := tdb.InsertSCDRowWithHostIDs(t, "uptime", "", []uint{1, 2, 3}, now.Add(-time.Hour), now) + cveID := tdb.InsertSCDRowWithHostIDs(t, "cve", "CVE-1", []uint{1, 2, 3}, now.Add(-time.Hour), now) + cveBefore := tdb.SCDBlob(t, cveID) - mask := chart.HostIDsToBlob([]uint{2}) + mask := chart.NewBitmap([]uint{2}) require.NoError(t, ds.ApplyScrubMaskToDataset(t.Context(), "uptime", mask, 0)) - assert.Equal(t, chart.HostIDsToBlob([]uint{1, 3}), tdb.SCDBitmap(t, uptimeID)) - assert.Equal(t, bitmap, tdb.SCDBitmap(t, cveID), "cve dataset must not be touched by an uptime scrub") + assert.Equal(t, []uint{1, 3}, tdb.SCDHostIDs(t, uptimeID)) + assert.Equal(t, cveBefore, tdb.SCDBlob(t, cveID), "cve dataset must not be touched by an uptime scrub") } diff --git a/server/chart/internal/service/host_cache.go b/server/chart/internal/service/host_cache.go index bb5c9047bca..8a6f955aff7 100644 --- a/server/chart/internal/service/host_cache.go +++ b/server/chart/internal/service/host_cache.go @@ -8,6 +8,7 @@ import ( "sync" "time" + "github.com/RoaringBitmap/roaring" "github.com/fleetdm/fleet/v4/server/chart/internal/types" "golang.org/x/sync/singleflight" ) @@ -19,7 +20,7 @@ const hostFilterCacheTTL = 60 * time.Second // hostBitmapFetcher is the signature used by cache callers to compute a fresh // bitmap on a miss. Returning an error bypasses caching for that call. -type hostBitmapFetcher func(ctx context.Context) ([]byte, error) +type hostBitmapFetcher func(ctx context.Context) (*roaring.Bitmap, error) // hostFilterCache maps a canonicalized HostFilter to the bitmap of host IDs // that match it. Entries are considered valid for ttl; concurrent misses for @@ -35,7 +36,7 @@ type hostFilterCache struct { } type hostFilterCacheEntry struct { - bitmap []byte + bitmap *roaring.Bitmap expiresAt time.Time } @@ -49,7 +50,11 @@ func newHostFilterCache(ttl time.Duration) *hostFilterCache { // Get returns the cached bitmap for the filter or computes a fresh one via // fetch on miss/expiry. Concurrent misses for the same filter share one fetch. -func (c *hostFilterCache) Get(ctx context.Context, filter *types.HostFilter, fetch hostBitmapFetcher) ([]byte, error) { +// +// The returned *roaring.Bitmap is shared across callers; treat it as read-only +// (use roaring.And/Or/AndNot rather than (*Bitmap).And/Or/AndNot). The library +// is safe for concurrent reads but not concurrent reads-with-writes. +func (c *hostFilterCache) Get(ctx context.Context, filter *types.HostFilter, fetch hostBitmapFetcher) (*roaring.Bitmap, error) { key := hashHostFilter(filter) c.mu.RLock() @@ -92,7 +97,7 @@ func (c *hostFilterCache) Get(ctx context.Context, filter *types.HostFilter, fet if err != nil { return nil, err } - return val.([]byte), nil + return val.(*roaring.Bitmap), nil } // hashHostFilter produces a deterministic string key for a HostFilter. Slice diff --git a/server/chart/internal/service/host_cache_test.go b/server/chart/internal/service/host_cache_test.go index 40a4426ecfe..a94097cfe3d 100644 --- a/server/chart/internal/service/host_cache_test.go +++ b/server/chart/internal/service/host_cache_test.go @@ -8,6 +8,7 @@ import ( "testing" "time" + "github.com/RoaringBitmap/roaring" "github.com/fleetdm/fleet/v4/server/chart/internal/types" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -64,16 +65,16 @@ func TestHostFilterCacheServesFromCacheUntilTTL(t *testing.T) { cache.clock = func() time.Time { return time.Unix(0, now.Load()) } var calls atomic.Int32 - fetch := func(_ context.Context) ([]byte, error) { + fetch := func(_ context.Context) (*roaring.Bitmap, error) { calls.Add(1) - return []byte{0x0F}, nil + return roaring.BitmapOf(1, 2, 3), nil } filter := &types.HostFilter{LabelIDs: []uint{1}} for range 5 { b, err := cache.Get(t.Context(), filter, fetch) require.NoError(t, err) - assert.Equal(t, []byte{0x0F}, b) + assert.Equal(t, uint64(3), b.GetCardinality()) } assert.Equal(t, int32(1), calls.Load(), "repeated gets within TTL should hit the cache") @@ -88,9 +89,9 @@ func TestHostFilterCacheDistinctFiltersMissSeparately(t *testing.T) { cache := newHostFilterCache(time.Minute) var calls atomic.Int32 - fetch := func(_ context.Context) ([]byte, error) { + fetch := func(_ context.Context) (*roaring.Bitmap, error) { calls.Add(1) - return []byte{0xFF}, nil + return roaring.BitmapOf(1), nil } _, err := cache.Get(t.Context(), &types.HostFilter{TeamIDs: []uint{1}}, fetch) @@ -106,10 +107,10 @@ func TestHostFilterCacheSingleflightCoalescesConcurrentMisses(t *testing.T) { var calls atomic.Int32 unblock := make(chan struct{}) - fetch := func(_ context.Context) ([]byte, error) { + fetch := func(_ context.Context) (*roaring.Bitmap, error) { calls.Add(1) <-unblock // hold the fetch until all goroutines are parked on singleflight - return []byte{0x01}, nil + return roaring.BitmapOf(1), nil } filter := &types.HostFilter{LabelIDs: []uint{42}} @@ -122,7 +123,7 @@ func TestHostFilterCacheSingleflightCoalescesConcurrentMisses(t *testing.T) { defer wg.Done() b, err := cache.Get(t.Context(), filter, fetch) assert.NoError(t, err) - assert.Equal(t, []byte{0x01}, b) + assert.Equal(t, uint64(1), b.GetCardinality()) }() } @@ -141,7 +142,7 @@ func TestHostFilterCacheSweepsExpiredEntriesOnWrite(t *testing.T) { now.Store(time.Now().UnixNano()) cache.clock = func() time.Time { return time.Unix(0, now.Load()) } - fetch := func(_ context.Context) ([]byte, error) { return []byte{0x01}, nil } + fetch := func(_ context.Context) (*roaring.Bitmap, error) { return roaring.BitmapOf(1), nil } // Seed two entries that will later be expired. _, err := cache.Get(t.Context(), &types.HostFilter{LabelIDs: []uint{1}}, fetch) @@ -169,7 +170,7 @@ func TestHostFilterCacheDoesNotCacheErrors(t *testing.T) { var calls atomic.Int32 sentinel := errors.New("boom") - fetch := func(_ context.Context) ([]byte, error) { + fetch := func(_ context.Context) (*roaring.Bitmap, error) { calls.Add(1) return nil, sentinel } diff --git a/server/chart/internal/service/service.go b/server/chart/internal/service/service.go index ab38ae4d2a7..f6db2f8b090 100644 --- a/server/chart/internal/service/service.go +++ b/server/chart/internal/service/service.go @@ -7,6 +7,7 @@ import ( "log/slog" "time" + "github.com/RoaringBitmap/roaring" "github.com/fleetdm/fleet/v4/server/chart" "github.com/fleetdm/fleet/v4/server/chart/api" "github.com/fleetdm/fleet/v4/server/chart/internal/types" @@ -126,12 +127,12 @@ func (s *Service) GetChartData(ctx context.Context, metric string, opts api.Requ ExcludeHostIDs: opts.ExcludeHostIDs, } - filterMask, err := s.hostCache.Get(ctx, hostFilter, func(ctx context.Context) ([]byte, error) { + filterMask, err := s.hostCache.Get(ctx, hostFilter, func(ctx context.Context) (*roaring.Bitmap, error) { hostIDs, err := s.store.GetHostIDsForFilter(ctx, hostFilter) if err != nil { return nil, ctxerr.Wrap(ctx, err, "fetch host IDs for chart filter") } - return chart.HostIDsToBlob(hostIDs), nil + return chart.NewBitmap(hostIDs), nil }) if err != nil { return nil, err @@ -158,7 +159,7 @@ func (s *Service) GetChartData(ctx context.Context, metric string, opts api.Requ return &api.Response{ Metric: metric, Visualization: dataset.DefaultVisualization(), - TotalHosts: chart.BlobPopcount(filterMask), + TotalHosts: int(chart.BlobPopcount(filterMask)), //nolint:gosec // host counts fit comfortably in int Resolution: formatResolution(bucketSize), Days: opts.Days, Filters: api.Filters{ @@ -231,7 +232,7 @@ func (s *Service) ScrubDatasetFleet(ctx context.Context, dataset string, fleetID } return nil } - mask := chart.HostIDsToBlob(hostIDs) + mask := chart.NewBitmap(hostIDs) return s.store.ApplyScrubMaskToDataset(ctx, dataset, mask, scrubBatchSize) } diff --git a/server/chart/internal/service/service_test.go b/server/chart/internal/service/service_test.go index 9cb111d35e9..c2dc96976c2 100644 --- a/server/chart/internal/service/service_test.go +++ b/server/chart/internal/service/service_test.go @@ -6,6 +6,7 @@ import ( "testing" "time" + "github.com/RoaringBitmap/roaring" "github.com/fleetdm/fleet/v4/server/chart" "github.com/fleetdm/fleet/v4/server/chart/api" "github.com/fleetdm/fleet/v4/server/chart/internal/types" @@ -56,16 +57,16 @@ func globalViewer() *mockViewerProvider { return &mockViewerProvider{isGlobal: t // mockDatastore implements types.Datastore for unit tests. type mockDatastore struct { - getSCDDataFunc func(ctx context.Context, dataset string, startDate, endDate time.Time, bucketSize time.Duration, strategy api.SampleStrategy, filterMask []byte, entityIDs []string) ([]api.DataPoint, error) + getSCDDataFunc func(ctx context.Context, dataset string, startDate, endDate time.Time, bucketSize time.Duration, strategy api.SampleStrategy, filterMask *roaring.Bitmap, entityIDs []string) ([]api.DataPoint, error) getHostIDsForFilterFunc func(ctx context.Context, hostFilter *types.HostFilter) ([]uint, error) findOnlineHostIDsFn func(ctx context.Context, now time.Time, disabledFleetIDs []uint) ([]uint, error) affectedHostIDsByCVEFn func(ctx context.Context, disabledFleetIDs []uint, cves []string) (map[string][]uint, error) trackedCriticalCVEsFn func(ctx context.Context) ([]string, error) - recordBucketDataFn func(ctx context.Context, dataset string, bucketStart time.Time, bucketSize time.Duration, strategy api.SampleStrategy, entityBitmaps map[string][]byte) error + recordBucketDataFn func(ctx context.Context, dataset string, bucketStart time.Time, bucketSize time.Duration, strategy api.SampleStrategy, entityBitmaps map[string]*roaring.Bitmap) error recordBucketDataInvoked bool deleteAllForDatasetFn func(ctx context.Context, dataset string, batchSize int) error hostIDsInFleetsFn func(ctx context.Context, fleetIDs []uint) ([]uint, error) - applyScrubMaskFn func(ctx context.Context, dataset string, mask []byte, batchSize int) error + applyScrubMaskFn func(ctx context.Context, dataset string, mask *roaring.Bitmap, batchSize int) error } func (m *mockDatastore) FindOnlineHostIDs(ctx context.Context, now time.Time, disabledFleetIDs []uint) ([]uint, error) { @@ -89,7 +90,7 @@ func (m *mockDatastore) TrackedCriticalCVEs(ctx context.Context) ([]string, erro return nil, nil } -func (m *mockDatastore) RecordBucketData(ctx context.Context, dataset string, bucketStart time.Time, bucketSize time.Duration, strategy api.SampleStrategy, entityBitmaps map[string][]byte) error { +func (m *mockDatastore) RecordBucketData(ctx context.Context, dataset string, bucketStart time.Time, bucketSize time.Duration, strategy api.SampleStrategy, entityBitmaps map[string]*roaring.Bitmap) error { m.recordBucketDataInvoked = true if m.recordBucketDataFn != nil { return m.recordBucketDataFn(ctx, dataset, bucketStart, bucketSize, strategy, entityBitmaps) @@ -97,7 +98,7 @@ func (m *mockDatastore) RecordBucketData(ctx context.Context, dataset string, bu return nil } -func (m *mockDatastore) GetSCDData(ctx context.Context, dataset string, startDate, endDate time.Time, bucketSize time.Duration, strategy api.SampleStrategy, filterMask []byte, entityIDs []string) ([]api.DataPoint, error) { +func (m *mockDatastore) GetSCDData(ctx context.Context, dataset string, startDate, endDate time.Time, bucketSize time.Duration, strategy api.SampleStrategy, filterMask *roaring.Bitmap, entityIDs []string) ([]api.DataPoint, error) { if m.getSCDDataFunc != nil { return m.getSCDDataFunc(ctx, dataset, startDate, endDate, bucketSize, strategy, filterMask, entityIDs) } @@ -129,7 +130,7 @@ func (m *mockDatastore) HostIDsInFleets(ctx context.Context, fleetIDs []uint) ([ return nil, nil } -func (m *mockDatastore) ApplyScrubMaskToDataset(ctx context.Context, dataset string, mask []byte, batchSize int) error { +func (m *mockDatastore) ApplyScrubMaskToDataset(ctx context.Context, dataset string, mask *roaring.Bitmap, batchSize int) error { if m.applyScrubMaskFn != nil { return m.applyScrubMaskFn(ctx, dataset, mask, batchSize) } @@ -202,8 +203,8 @@ func TestGetChartDataUptimeDefault(t *testing.T) { var gotBucketSize time.Duration var gotStart, gotEnd time.Time var gotStrategy api.SampleStrategy - var gotMask []byte - ds.getSCDDataFunc = func(_ context.Context, dataset string, start, end time.Time, bucketSize time.Duration, strategy api.SampleStrategy, mask []byte, _ []string) ([]api.DataPoint, error) { + var gotMask *roaring.Bitmap + ds.getSCDDataFunc = func(_ context.Context, dataset string, start, end time.Time, bucketSize time.Duration, strategy api.SampleStrategy, mask *roaring.Bitmap, _ []string) ([]api.DataPoint, error) { assert.Equal(t, "uptime", dataset) gotBucketSize = bucketSize gotStart = start @@ -222,7 +223,7 @@ func TestGetChartDataUptimeDefault(t *testing.T) { assert.Equal(t, 7, resp.Days) assert.Equal(t, 3*time.Hour, gotBucketSize) assert.Equal(t, api.SampleStrategyAccumulate, gotStrategy) - assert.Equal(t, 200, chart.BlobPopcount(gotMask), "filter mask should encode all 200 host IDs") + assert.Equal(t, uint64(200), chart.BlobPopcount(gotMask), "filter mask should encode all 200 host IDs") // Span must be exactly 7 days. assert.Equal(t, 7*24*time.Hour, gotEnd.Sub(gotStart)) } @@ -245,7 +246,7 @@ func TestGetChartDataUptimeResolution(t *testing.T) { svc.RegisterDataset(&chart.UptimeDataset{}) var gotBucketSize time.Duration - ds.getSCDDataFunc = func(_ context.Context, _ string, _, _ time.Time, bucketSize time.Duration, _ api.SampleStrategy, _ []byte, _ []string) ([]api.DataPoint, error) { + ds.getSCDDataFunc = func(_ context.Context, _ string, _, _ time.Time, bucketSize time.Duration, _ api.SampleStrategy, _ *roaring.Bitmap, _ []string) ([]api.DataPoint, error) { gotBucketSize = bucketSize return nil, nil } @@ -278,7 +279,7 @@ func TestGetChartDataCVEResolution(t *testing.T) { var gotBucketSize time.Duration var gotStrategy api.SampleStrategy - ds.getSCDDataFunc = func(_ context.Context, _ string, _, _ time.Time, bucketSize time.Duration, strategy api.SampleStrategy, _ []byte, _ []string) ([]api.DataPoint, error) { + ds.getSCDDataFunc = func(_ context.Context, _ string, _, _ time.Time, bucketSize time.Duration, strategy api.SampleStrategy, _ *roaring.Bitmap, _ []string) ([]api.DataPoint, error) { gotBucketSize = bucketSize gotStrategy = strategy return nil, nil @@ -302,7 +303,7 @@ func TestGetChartDataCVEUsesCuratedFilter(t *testing.T) { return []string{"CVE-A", "CVE-B"}, nil } var gotEntityIDs []string - ds.getSCDDataFunc = func(_ context.Context, _ string, _, _ time.Time, _ time.Duration, _ api.SampleStrategy, _ []byte, entityIDs []string) ([]api.DataPoint, error) { + ds.getSCDDataFunc = func(_ context.Context, _ string, _, _ time.Time, _ time.Duration, _ api.SampleStrategy, _ *roaring.Bitmap, entityIDs []string) ([]api.DataPoint, error) { gotEntityIDs = entityIDs return nil, nil } @@ -325,7 +326,7 @@ func TestGetChartDataCVEEmptySetReturnsZeros(t *testing.T) { } var gotEntityIDs []string gotEntityIDsIsNil := true - ds.getSCDDataFunc = func(_ context.Context, _ string, startDate, endDate time.Time, bucketSize time.Duration, _ api.SampleStrategy, _ []byte, entityIDs []string) ([]api.DataPoint, error) { + ds.getSCDDataFunc = func(_ context.Context, _ string, startDate, endDate time.Time, bucketSize time.Duration, _ api.SampleStrategy, _ *roaring.Bitmap, entityIDs []string) ([]api.DataPoint, error) { gotEntityIDs = entityIDs gotEntityIDsIsNil = entityIDs == nil numBuckets := int(endDate.Sub(startDate) / bucketSize) @@ -357,7 +358,7 @@ func TestGetChartDataUptimePassesNilEntityIDs(t *testing.T) { return nil, nil } gotEntityIDsIsNil := false - ds.getSCDDataFunc = func(_ context.Context, _ string, _, _ time.Time, _ time.Duration, _ api.SampleStrategy, _ []byte, entityIDs []string) ([]api.DataPoint, error) { + ds.getSCDDataFunc = func(_ context.Context, _ string, _, _ time.Time, _ time.Duration, _ api.SampleStrategy, _ *roaring.Bitmap, entityIDs []string) ([]api.DataPoint, error) { gotEntityIDsIsNil = entityIDs == nil return nil, nil } @@ -377,8 +378,8 @@ func TestGetChartDataWithHostFilters(t *testing.T) { gotFilter = hostFilter return []uint{10, 20}, nil } - ds.getSCDDataFunc = func(_ context.Context, _ string, _, _ time.Time, _ time.Duration, _ api.SampleStrategy, mask []byte, _ []string) ([]api.DataPoint, error) { - assert.Equal(t, 2, chart.BlobPopcount(mask), "mask should encode the 2 host IDs returned") + ds.getSCDDataFunc = func(_ context.Context, _ string, _, _ time.Time, _ time.Duration, _ api.SampleStrategy, mask *roaring.Bitmap, _ []string) ([]api.DataPoint, error) { + assert.Equal(t, uint64(2), chart.BlobPopcount(mask), "mask should encode the 2 host IDs returned") return []api.DataPoint{{Value: 2}}, nil } @@ -577,7 +578,7 @@ func TestCollectDatasetsUptime(t *testing.T) { assert.Equal(t, now, gotNow) return []uint{1, 2, 3}, nil } - ds.recordBucketDataFn = func(_ context.Context, dataset string, bucketStart time.Time, bucketSize time.Duration, strategy api.SampleStrategy, entityBitmaps map[string][]byte) error { + ds.recordBucketDataFn = func(_ context.Context, dataset string, bucketStart time.Time, bucketSize time.Duration, strategy api.SampleStrategy, entityBitmaps map[string]*roaring.Bitmap) error { assert.Equal(t, "uptime", dataset) assert.Equal(t, wantBucketStart, bucketStart) assert.Equal(t, time.Hour, bucketSize) @@ -612,7 +613,7 @@ func TestCollectDatasetsCVE(t *testing.T) { "CVE-2024-0002": {2, 4}, }, nil } - ds.recordBucketDataFn = func(_ context.Context, dataset string, bucketStart time.Time, bucketSize time.Duration, strategy api.SampleStrategy, entityBitmaps map[string][]byte) error { + ds.recordBucketDataFn = func(_ context.Context, dataset string, bucketStart time.Time, bucketSize time.Duration, strategy api.SampleStrategy, entityBitmaps map[string]*roaring.Bitmap) error { assert.Equal(t, "cve", dataset) assert.Equal(t, wantBucketStart, bucketStart) assert.Equal(t, time.Hour, bucketSize) @@ -646,8 +647,8 @@ func TestCollectDatasetsCVEEmptyTracked(t *testing.T) { assert.Empty(t, cves, "empty tracked set must propagate as empty cves filter") return map[string][]uint{}, nil } - var gotBitmaps map[string][]byte - ds.recordBucketDataFn = func(_ context.Context, _ string, _ time.Time, _ time.Duration, _ api.SampleStrategy, entityBitmaps map[string][]byte) error { + var gotBitmaps map[string]*roaring.Bitmap + ds.recordBucketDataFn = func(_ context.Context, _ string, _ time.Time, _ time.Duration, _ api.SampleStrategy, entityBitmaps map[string]*roaring.Bitmap) error { gotBitmaps = entityBitmaps return nil } @@ -691,7 +692,7 @@ func TestCollectDatasetsForwardsScope(t *testing.T) { gotDisabled = disabled return []uint{1}, nil } - ds.recordBucketDataFn = func(_ context.Context, _ string, _ time.Time, _ time.Duration, _ api.SampleStrategy, _ map[string][]byte) error { + ds.recordBucketDataFn = func(_ context.Context, _ string, _ time.Time, _ time.Duration, _ api.SampleStrategy, _ map[string]*roaring.Bitmap) error { return nil } err := svc.CollectDatasets(t.Context(), now, func(_ string) (bool, []uint) { @@ -714,7 +715,7 @@ func TestCollectDatasetsForwardsScope(t *testing.T) { gotDisabled = disabled return map[string][]uint{"CVE-1": {1}}, nil } - ds.recordBucketDataFn = func(_ context.Context, _ string, _ time.Time, _ time.Duration, _ api.SampleStrategy, _ map[string][]byte) error { + ds.recordBucketDataFn = func(_ context.Context, _ string, _ time.Time, _ time.Duration, _ api.SampleStrategy, _ map[string]*roaring.Bitmap) error { return nil } err := svc.CollectDatasets(t.Context(), now, func(_ string) (bool, []uint) { @@ -734,7 +735,7 @@ func TestCollectDatasetsForwardsScope(t *testing.T) { gotDisabled = disabled return []uint{1}, nil } - ds.recordBucketDataFn = func(_ context.Context, _ string, _ time.Time, _ time.Duration, _ api.SampleStrategy, _ map[string][]byte) error { + ds.recordBucketDataFn = func(_ context.Context, _ string, _ time.Time, _ time.Duration, _ api.SampleStrategy, _ map[string]*roaring.Bitmap) error { return nil } err := svc.CollectDatasets(t.Context(), now, nil) @@ -772,9 +773,9 @@ func TestScrubDatasetFleet(t *testing.T) { } var gotDataset string - var gotMask []byte + var gotMask *roaring.Bitmap var gotBatchSize int - ds.applyScrubMaskFn = func(_ context.Context, dataset string, mask []byte, batchSize int) error { + ds.applyScrubMaskFn = func(_ context.Context, dataset string, mask *roaring.Bitmap, batchSize int) error { gotDataset = dataset gotMask = mask gotBatchSize = batchSize @@ -786,7 +787,7 @@ func TestScrubDatasetFleet(t *testing.T) { assert.Equal(t, "cve", gotDataset) assert.Equal(t, scrubBatchSize, gotBatchSize) // Mask must have bits set at positions 3, 7, 12. - assert.Equal(t, 3, chart.BlobPopcount(gotMask)) + assert.Equal(t, uint64(3), chart.BlobPopcount(gotMask)) }) t.Run("empty fleet IDs is no-op", func(t *testing.T) { @@ -796,7 +797,7 @@ func TestScrubDatasetFleet(t *testing.T) { t.Fatal("HostIDsInFleets should not have been called for empty input") return nil, nil } - ds.applyScrubMaskFn = func(_ context.Context, _ string, _ []byte, _ int) error { + ds.applyScrubMaskFn = func(_ context.Context, _ string, _ *roaring.Bitmap, _ int) error { t.Fatal("ApplyScrubMaskToDataset should not have been called for empty input") return nil } @@ -810,7 +811,7 @@ func TestScrubDatasetFleet(t *testing.T) { ds.hostIDsInFleetsFn = func(_ context.Context, _ []uint) ([]uint, error) { return nil, nil } - ds.applyScrubMaskFn = func(_ context.Context, _ string, _ []byte, _ int) error { + ds.applyScrubMaskFn = func(_ context.Context, _ string, _ *roaring.Bitmap, _ int) error { t.Fatal("ApplyScrubMaskToDataset should not be called when no hosts resolved") return nil } diff --git a/server/chart/internal/testutils/testutils.go b/server/chart/internal/testutils/testutils.go index 1ec51f6fd5e..904f2415a2f 100644 --- a/server/chart/internal/testutils/testutils.go +++ b/server/chart/internal/testutils/testutils.go @@ -6,6 +6,7 @@ import ( "testing" "time" + "github.com/fleetdm/fleet/v4/server/chart" common_mysql "github.com/fleetdm/fleet/v4/server/platform/mysql" mysql_testing_utils "github.com/fleetdm/fleet/v4/server/platform/mysql/testing_utils" "github.com/jmoiron/sqlx" @@ -65,16 +66,16 @@ func (tdb *TestDB) InsertSCDRow(t *testing.T, dataset, entityID string, validFro require.NoError(t, err) } -// InsertSCDRowWithBitmap inserts a host_scd_data row with a caller-supplied -// host_bitmap and returns the auto-assigned id. -func (tdb *TestDB) InsertSCDRowWithBitmap(t *testing.T, dataset, entityID string, bitmap []byte, validFrom, validTo time.Time) uint { +// InsertSCDRowWithBlob inserts a host_scd_data row with a caller-supplied +// chart.Blob (bytes + encoding) and returns the auto-assigned id. +func (tdb *TestDB) InsertSCDRowWithBlob(t *testing.T, dataset, entityID string, blob chart.Blob, validFrom, validTo time.Time) uint { t.Helper() ctx := t.Context() res, err := tdb.DB.ExecContext(ctx, ` - INSERT INTO host_scd_data (dataset, entity_id, host_bitmap, valid_from, valid_to) - VALUES (?, ?, ?, ?, ?) - `, dataset, entityID, bitmap, validFrom, validTo) + INSERT INTO host_scd_data (dataset, entity_id, host_bitmap, encoding_type, valid_from, valid_to) + VALUES (?, ?, ?, ?, ?, ?) + `, dataset, entityID, blob.Bytes, blob.Encoding, validFrom, validTo) require.NoError(t, err) id, err := res.LastInsertId() require.NoError(t, err) @@ -82,15 +83,54 @@ func (tdb *TestDB) InsertSCDRowWithBitmap(t *testing.T, dataset, entityID string return uint(id) //nolint:gosec // G115: id is a positive AUTO_INCREMENT primary key } -// SCDBitmap returns the host_bitmap column for the given row id. -func (tdb *TestDB) SCDBitmap(t *testing.T, id uint) []byte { +// InsertSCDRowWithHostIDs is a convenience wrapper for tests that just want to +// store a set of host IDs — produces a roaring-encoded row. +func (tdb *TestDB) InsertSCDRowWithHostIDs(t *testing.T, dataset, entityID string, hostIDs []uint, validFrom, validTo time.Time) uint { + t.Helper() + return tdb.InsertSCDRowWithBlob(t, dataset, entityID, chart.HostIDsToBlob(hostIDs), validFrom, validTo) +} + +// DenseBlob builds a legacy dense-encoded chart.Blob for the given host IDs. +// Used to seed pre-migration fixtures that exercise the dense decode path. +// Production writes always go through chart.HostIDsToBlob (roaring). +func DenseBlob(ids []uint) chart.Blob { + if len(ids) == 0 { + return chart.Blob{Encoding: chart.EncodingDense} + } + var maxID uint + for _, id := range ids { + if id > maxID { + maxID = id + } + } + bytes := make([]byte, maxID/8+1) + for _, id := range ids { + bytes[id/8] |= 1 << (id % 8) + } + return chart.Blob{Bytes: bytes, Encoding: chart.EncodingDense} +} + +// SCDBlob returns the host_bitmap + encoding_type for the given row id. +func (tdb *TestDB) SCDBlob(t *testing.T, id uint) chart.Blob { t.Helper() ctx := t.Context() - var b []byte - err := tdb.DB.GetContext(ctx, &b, `SELECT host_bitmap FROM host_scd_data WHERE id = ?`, id) + type row struct { + HostBitmap []byte `db:"host_bitmap"` + EncodingType uint8 `db:"encoding_type"` + } + var r row + err := tdb.DB.GetContext(ctx, &r, `SELECT host_bitmap, encoding_type FROM host_scd_data WHERE id = ?`, id) + require.NoError(t, err) + return chart.Blob{Bytes: r.HostBitmap, Encoding: r.EncodingType} +} + +// SCDHostIDs returns the decoded host IDs for the given row id. +func (tdb *TestDB) SCDHostIDs(t *testing.T, id uint) []uint { + t.Helper() + rb, err := chart.DecodeBitmap(tdb.SCDBlob(t, id)) require.NoError(t, err) - return b + return chart.BitmapToHostIDs(rb) } // CountSCDRows returns the total number of rows in host_scd_data. diff --git a/server/chart/internal/types/chart.go b/server/chart/internal/types/chart.go index 6eec1d73ef3..cf06a19020b 100644 --- a/server/chart/internal/types/chart.go +++ b/server/chart/internal/types/chart.go @@ -5,6 +5,7 @@ import ( "context" "time" + "github.com/RoaringBitmap/roaring" "github.com/fleetdm/fleet/v4/server/chart/api" ) @@ -56,14 +57,15 @@ type Datastore interface { // RecordBucketData writes one or more entity bitmaps for the given bucket using // the specified sample strategy. See api.SampleStrategy for the semantics of - // each strategy. + // each strategy. Bitmaps are passed in op form (*roaring.Bitmap); the + // datastore serializes via chart.BitmapToBlob at the storage boundary. RecordBucketData( ctx context.Context, dataset string, bucketStart time.Time, bucketSize time.Duration, strategy api.SampleStrategy, - entityBitmaps map[string][]byte, + entityBitmaps map[string]*roaring.Bitmap, ) error // GetSCDData returns per-bucket distinct-host counts for a dataset over the @@ -74,7 +76,7 @@ type Datastore interface { // - Snapshot: for each entity, pick the row active at bucketEnd, then OR // across entities ("state as of the end of the bucket"). // filterMask is always applied via bitmap AND — callers build it via - // GetHostIDsForFilter + chart.HostIDsToBlob, usually through a cache. + // GetHostIDsForFilter + chart.NewBitmap, usually through a cache. // The entity filter is applied via entity_id IN. GetSCDData( ctx context.Context, @@ -82,7 +84,7 @@ type Datastore interface { startDate, endDate time.Time, bucketSize time.Duration, strategy api.SampleStrategy, - filterMask []byte, + filterMask *roaring.Bitmap, entityIDs []string, ) ([]api.DataPoint, error) @@ -109,5 +111,5 @@ type Datastore interface { // dataset in id-order with `batchSize`-row pages, computing // chart.BlobANDNOT(host_bitmap, mask) and writing the result back via // UPDATE. Used by the per-fleet scrub worker. - ApplyScrubMaskToDataset(ctx context.Context, dataset string, mask []byte, batchSize int) error + ApplyScrubMaskToDataset(ctx context.Context, dataset string, mask *roaring.Bitmap, batchSize int) error } diff --git a/server/datastore/mysql/migrations/tables/20260518194422_AddEncodingTypeToHostSCDData.go b/server/datastore/mysql/migrations/tables/20260518194422_AddEncodingTypeToHostSCDData.go new file mode 100644 index 00000000000..12f6df131ae --- /dev/null +++ b/server/datastore/mysql/migrations/tables/20260518194422_AddEncodingTypeToHostSCDData.go @@ -0,0 +1,34 @@ +package tables + +import ( + "database/sql" + "fmt" +) + +func init() { + MigrationClient.AddMigration(Up_20260518194422, Down_20260518194422) +} + +// Up_20260514220719 adds the encoding_type column that discriminates between +// the legacy dense bitmap format (encoding_type = 0) and the new roaring +// bitmap format (encoding_type = 1). ALGORITHM=INSTANT is a metadata-only +// change on MySQL 8.0+; existing rows are not rewritten and read back with +// encoding_type = 0 via the column DEFAULT, correctly identifying them as +// dense. New writes always set encoding_type = 1. +func Up_20260518194422(tx *sql.Tx) error { + if columnExists(tx, "host_scd_data", "encoding_type") { + return nil + } + if _, err := tx.Exec(` + ALTER TABLE host_scd_data + ADD COLUMN encoding_type TINYINT NOT NULL DEFAULT 0, + ALGORITHM=INSTANT + `); err != nil { + return fmt.Errorf("add encoding_type to host_scd_data: %w", err) + } + return nil +} + +func Down_20260518194422(tx *sql.Tx) error { + return nil +} diff --git a/server/datastore/mysql/migrations/tables/20260518194422_AddEncodingTypeToHostSCDData_test.go b/server/datastore/mysql/migrations/tables/20260518194422_AddEncodingTypeToHostSCDData_test.go new file mode 100644 index 00000000000..f1d96f9c91b --- /dev/null +++ b/server/datastore/mysql/migrations/tables/20260518194422_AddEncodingTypeToHostSCDData_test.go @@ -0,0 +1,58 @@ +package tables + +import ( + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestUp_20260518194422(t *testing.T) { + db := applyUpToPrev(t) + + // Insert a pre-migration row representing a dense host_bitmap. After the + // migration this row must still be readable, with encoding_type defaulting + // to 0 (dense). + denseBytes := []byte{0x82, 0x05} // bits 1, 7, 8, 10 set: hosts {1, 7, 8, 10} + validFrom := time.Date(2026, 5, 1, 0, 0, 0, 0, time.UTC) + _, err := db.Exec(` + INSERT INTO host_scd_data (dataset, entity_id, host_bitmap, valid_from) + VALUES (?, ?, ?, ?)`, + "cve", "CVE-2026-0001", denseBytes, validFrom, + ) + require.NoError(t, err) + + applyNext(t, db) + + // Pre-existing row reads back with encoding_type = 0 via DEFAULT and + // unchanged host_bitmap bytes. + var encoding int + var bitmap []byte + err = db.QueryRow(` + SELECT encoding_type, host_bitmap FROM host_scd_data + WHERE dataset = ? AND entity_id = ?`, + "cve", "CVE-2026-0001", + ).Scan(&encoding, &bitmap) + require.NoError(t, err) + assert.Equal(t, 0, encoding, "legacy row should default to encoding_type=0 (dense)") + assert.Equal(t, denseBytes, bitmap, "INSTANT ALTER must not rewrite row data") + + // New rows may be written with encoding_type = 1 (roaring). + roaringBytes := []byte{0x3A, 0x30, 0x00, 0x00} // arbitrary stand-in; library serializes its own format + _, err = db.Exec(` + INSERT INTO host_scd_data (dataset, entity_id, host_bitmap, encoding_type, valid_from) + VALUES (?, ?, ?, ?, ?)`, + "cve", "CVE-2026-0002", roaringBytes, 1, validFrom, + ) + require.NoError(t, err) + + err = db.QueryRow(` + SELECT encoding_type, host_bitmap FROM host_scd_data + WHERE dataset = ? AND entity_id = ?`, + "cve", "CVE-2026-0002", + ).Scan(&encoding, &bitmap) + require.NoError(t, err) + assert.Equal(t, 1, encoding) + assert.Equal(t, roaringBytes, bitmap) +} diff --git a/server/datastore/mysql/schema.sql b/server/datastore/mysql/schema.sql index b5710d7966e..c1b67d14cb7 100644 --- a/server/datastore/mysql/schema.sql +++ b/server/datastore/mysql/schema.sql @@ -1114,11 +1114,12 @@ CREATE TABLE `host_scd_data` ( `valid_to` datetime NOT NULL DEFAULT '9999-12-31 00:00:00', `created_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP, `updated_at` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + `encoding_type` tinyint NOT NULL DEFAULT '0', PRIMARY KEY (`id`), UNIQUE KEY `uniq_entity_bucket` (`dataset`,`entity_id`,`valid_from`), KEY `idx_dataset_range` (`dataset`,`valid_from`,`valid_to`), KEY `idx_valid_to_dataset` (`valid_to`,`dataset`,`entity_id`) -) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; +) /*!50100 TABLESPACE `innodb_system` */ ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; /*!40101 SET character_set_client = @saved_cs_client */; /*!40101 SET @saved_cs_client = @@character_set_client */; /*!50503 SET character_set_client = utf8mb4 */; @@ -1974,9 +1975,9 @@ CREATE TABLE `migration_status_tables` ( `is_applied` tinyint(1) NOT NULL, `tstamp` timestamp NULL DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY (`id`) -) /*!50100 TABLESPACE `innodb_system` */ ENGINE=InnoDB AUTO_INCREMENT=529 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; +) /*!50100 TABLESPACE `innodb_system` */ ENGINE=InnoDB AUTO_INCREMENT=530 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; /*!40101 SET character_set_client = @saved_cs_client */; -INSERT INTO `migration_status_tables` VALUES (1,0,1,'2020-01-01 01:01:01'),(2,20161118193812,1,'2020-01-01 01:01:01'),(3,20161118211713,1,'2020-01-01 01:01:01'),(4,20161118212436,1,'2020-01-01 01:01:01'),(5,20161118212515,1,'2020-01-01 01:01:01'),(6,20161118212528,1,'2020-01-01 01:01:01'),(7,20161118212538,1,'2020-01-01 01:01:01'),(8,20161118212549,1,'2020-01-01 01:01:01'),(9,20161118212557,1,'2020-01-01 01:01:01'),(10,20161118212604,1,'2020-01-01 01:01:01'),(11,20161118212613,1,'2020-01-01 01:01:01'),(12,20161118212621,1,'2020-01-01 01:01:01'),(13,20161118212630,1,'2020-01-01 01:01:01'),(14,20161118212641,1,'2020-01-01 01:01:01'),(15,20161118212649,1,'2020-01-01 01:01:01'),(16,20161118212656,1,'2020-01-01 01:01:01'),(17,20161118212758,1,'2020-01-01 01:01:01'),(18,20161128234849,1,'2020-01-01 01:01:01'),(19,20161230162221,1,'2020-01-01 01:01:01'),(20,20170104113816,1,'2020-01-01 01:01:01'),(21,20170105151732,1,'2020-01-01 01:01:01'),(22,20170108191242,1,'2020-01-01 01:01:01'),(23,20170109094020,1,'2020-01-01 01:01:01'),(24,20170109130438,1,'2020-01-01 01:01:01'),(25,20170110202752,1,'2020-01-01 01:01:01'),(26,20170111133013,1,'2020-01-01 01:01:01'),(27,20170117025759,1,'2020-01-01 01:01:01'),(28,20170118191001,1,'2020-01-01 01:01:01'),(29,20170119234632,1,'2020-01-01 01:01:01'),(30,20170124230432,1,'2020-01-01 01:01:01'),(31,20170127014618,1,'2020-01-01 01:01:01'),(32,20170131232841,1,'2020-01-01 01:01:01'),(33,20170223094154,1,'2020-01-01 01:01:01'),(34,20170306075207,1,'2020-01-01 01:01:01'),(35,20170309100733,1,'2020-01-01 01:01:01'),(36,20170331111922,1,'2020-01-01 01:01:01'),(37,20170502143928,1,'2020-01-01 01:01:01'),(38,20170504130602,1,'2020-01-01 01:01:01'),(39,20170509132100,1,'2020-01-01 01:01:01'),(40,20170519105647,1,'2020-01-01 01:01:01'),(41,20170519105648,1,'2020-01-01 01:01:01'),(42,20170831234300,1,'2020-01-01 01:01:01'),(43,20170831234301,1,'2020-01-01 01:01:01'),(44,20170831234303,1,'2020-01-01 01:01:01'),(45,20171116163618,1,'2020-01-01 01:01:01'),(46,20171219164727,1,'2020-01-01 01:01:01'),(47,20180620164811,1,'2020-01-01 01:01:01'),(48,20180620175054,1,'2020-01-01 01:01:01'),(49,20180620175055,1,'2020-01-01 01:01:01'),(50,20191010101639,1,'2020-01-01 01:01:01'),(51,20191010155147,1,'2020-01-01 01:01:01'),(52,20191220130734,1,'2020-01-01 01:01:01'),(53,20200311140000,1,'2020-01-01 01:01:01'),(54,20200405120000,1,'2020-01-01 01:01:01'),(55,20200407120000,1,'2020-01-01 01:01:01'),(56,20200420120000,1,'2020-01-01 01:01:01'),(57,20200504120000,1,'2020-01-01 01:01:01'),(58,20200512120000,1,'2020-01-01 01:01:01'),(59,20200707120000,1,'2020-01-01 01:01:01'),(60,20201011162341,1,'2020-01-01 01:01:01'),(61,20201021104586,1,'2020-01-01 01:01:01'),(62,20201102112520,1,'2020-01-01 01:01:01'),(63,20201208121729,1,'2020-01-01 01:01:01'),(64,20201215091637,1,'2020-01-01 01:01:01'),(65,20210119174155,1,'2020-01-01 01:01:01'),(66,20210326182902,1,'2020-01-01 01:01:01'),(67,20210421112652,1,'2020-01-01 01:01:01'),(68,20210506095025,1,'2020-01-01 01:01:01'),(69,20210513115729,1,'2020-01-01 01:01:01'),(70,20210526113559,1,'2020-01-01 01:01:01'),(71,20210601000001,1,'2020-01-01 01:01:01'),(72,20210601000002,1,'2020-01-01 01:01:01'),(73,20210601000003,1,'2020-01-01 01:01:01'),(74,20210601000004,1,'2020-01-01 01:01:01'),(75,20210601000005,1,'2020-01-01 01:01:01'),(76,20210601000006,1,'2020-01-01 01:01:01'),(77,20210601000007,1,'2020-01-01 01:01:01'),(78,20210601000008,1,'2020-01-01 01:01:01'),(79,20210606151329,1,'2020-01-01 01:01:01'),(80,20210616163757,1,'2020-01-01 01:01:01'),(81,20210617174723,1,'2020-01-01 01:01:01'),(82,20210622160235,1,'2020-01-01 01:01:01'),(83,20210623100031,1,'2020-01-01 01:01:01'),(84,20210623133615,1,'2020-01-01 01:01:01'),(85,20210708143152,1,'2020-01-01 01:01:01'),(86,20210709124443,1,'2020-01-01 01:01:01'),(87,20210712155608,1,'2020-01-01 01:01:01'),(88,20210714102108,1,'2020-01-01 01:01:01'),(89,20210719153709,1,'2020-01-01 01:01:01'),(90,20210721171531,1,'2020-01-01 01:01:01'),(91,20210723135713,1,'2020-01-01 01:01:01'),(92,20210802135933,1,'2020-01-01 01:01:01'),(93,20210806112844,1,'2020-01-01 01:01:01'),(94,20210810095603,1,'2020-01-01 01:01:01'),(95,20210811150223,1,'2020-01-01 01:01:01'),(96,20210818151827,1,'2020-01-01 01:01:01'),(97,20210818151828,1,'2020-01-01 01:01:01'),(98,20210818182258,1,'2020-01-01 01:01:01'),(99,20210819131107,1,'2020-01-01 01:01:01'),(100,20210819143446,1,'2020-01-01 01:01:01'),(101,20210903132338,1,'2020-01-01 01:01:01'),(102,20210915144307,1,'2020-01-01 01:01:01'),(103,20210920155130,1,'2020-01-01 01:01:01'),(104,20210927143115,1,'2020-01-01 01:01:01'),(105,20210927143116,1,'2020-01-01 01:01:01'),(106,20211013133706,1,'2020-01-01 01:01:01'),(107,20211013133707,1,'2020-01-01 01:01:01'),(108,20211102135149,1,'2020-01-01 01:01:01'),(109,20211109121546,1,'2020-01-01 01:01:01'),(110,20211110163320,1,'2020-01-01 01:01:01'),(111,20211116184029,1,'2020-01-01 01:01:01'),(112,20211116184030,1,'2020-01-01 01:01:01'),(113,20211202092042,1,'2020-01-01 01:01:01'),(114,20211202181033,1,'2020-01-01 01:01:01'),(115,20211207161856,1,'2020-01-01 01:01:01'),(116,20211216131203,1,'2020-01-01 01:01:01'),(117,20211221110132,1,'2020-01-01 01:01:01'),(118,20220107155700,1,'2020-01-01 01:01:01'),(119,20220125105650,1,'2020-01-01 01:01:01'),(120,20220201084510,1,'2020-01-01 01:01:01'),(121,20220208144830,1,'2020-01-01 01:01:01'),(122,20220208144831,1,'2020-01-01 01:01:01'),(123,20220215152203,1,'2020-01-01 01:01:01'),(124,20220223113157,1,'2020-01-01 01:01:01'),(125,20220307104655,1,'2020-01-01 01:01:01'),(126,20220309133956,1,'2020-01-01 01:01:01'),(127,20220316155700,1,'2020-01-01 01:01:01'),(128,20220323152301,1,'2020-01-01 01:01:01'),(129,20220330100659,1,'2020-01-01 01:01:01'),(130,20220404091216,1,'2020-01-01 01:01:01'),(131,20220419140750,1,'2020-01-01 01:01:01'),(132,20220428140039,1,'2020-01-01 01:01:01'),(133,20220503134048,1,'2020-01-01 01:01:01'),(134,20220524102918,1,'2020-01-01 01:01:01'),(135,20220526123327,1,'2020-01-01 01:01:01'),(136,20220526123328,1,'2020-01-01 01:01:01'),(137,20220526123329,1,'2020-01-01 01:01:01'),(138,20220608113128,1,'2020-01-01 01:01:01'),(139,20220627104817,1,'2020-01-01 01:01:01'),(140,20220704101843,1,'2020-01-01 01:01:01'),(141,20220708095046,1,'2020-01-01 01:01:01'),(142,20220713091130,1,'2020-01-01 01:01:01'),(143,20220802135510,1,'2020-01-01 01:01:01'),(144,20220818101352,1,'2020-01-01 01:01:01'),(145,20220822161445,1,'2020-01-01 01:01:01'),(146,20220831100036,1,'2020-01-01 01:01:01'),(147,20220831100151,1,'2020-01-01 01:01:01'),(148,20220908181826,1,'2020-01-01 01:01:01'),(149,20220914154915,1,'2020-01-01 01:01:01'),(150,20220915165115,1,'2020-01-01 01:01:01'),(151,20220915165116,1,'2020-01-01 01:01:01'),(152,20220928100158,1,'2020-01-01 01:01:01'),(153,20221014084130,1,'2020-01-01 01:01:01'),(154,20221027085019,1,'2020-01-01 01:01:01'),(155,20221101103952,1,'2020-01-01 01:01:01'),(156,20221104144401,1,'2020-01-01 01:01:01'),(157,20221109100749,1,'2020-01-01 01:01:01'),(158,20221115104546,1,'2020-01-01 01:01:01'),(159,20221130114928,1,'2020-01-01 01:01:01'),(160,20221205112142,1,'2020-01-01 01:01:01'),(161,20221216115820,1,'2020-01-01 01:01:01'),(162,20221220195934,1,'2020-01-01 01:01:01'),(163,20221220195935,1,'2020-01-01 01:01:01'),(164,20221223174807,1,'2020-01-01 01:01:01'),(165,20221227163855,1,'2020-01-01 01:01:01'),(166,20221227163856,1,'2020-01-01 01:01:01'),(167,20230202224725,1,'2020-01-01 01:01:01'),(168,20230206163608,1,'2020-01-01 01:01:01'),(169,20230214131519,1,'2020-01-01 01:01:01'),(170,20230303135738,1,'2020-01-01 01:01:01'),(171,20230313135301,1,'2020-01-01 01:01:01'),(172,20230313141819,1,'2020-01-01 01:01:01'),(173,20230315104937,1,'2020-01-01 01:01:01'),(174,20230317173844,1,'2020-01-01 01:01:01'),(175,20230320133602,1,'2020-01-01 01:01:01'),(176,20230330100011,1,'2020-01-01 01:01:01'),(177,20230330134823,1,'2020-01-01 01:01:01'),(178,20230405232025,1,'2020-01-01 01:01:01'),(179,20230408084104,1,'2020-01-01 01:01:01'),(180,20230411102858,1,'2020-01-01 01:01:01'),(181,20230421155932,1,'2020-01-01 01:01:01'),(182,20230425082126,1,'2020-01-01 01:01:01'),(183,20230425105727,1,'2020-01-01 01:01:01'),(184,20230501154913,1,'2020-01-01 01:01:01'),(185,20230503101418,1,'2020-01-01 01:01:01'),(186,20230515144206,1,'2020-01-01 01:01:01'),(187,20230517140952,1,'2020-01-01 01:01:01'),(188,20230517152807,1,'2020-01-01 01:01:01'),(189,20230518114155,1,'2020-01-01 01:01:01'),(190,20230520153236,1,'2020-01-01 01:01:01'),(191,20230525151159,1,'2020-01-01 01:01:01'),(192,20230530122103,1,'2020-01-01 01:01:01'),(193,20230602111827,1,'2020-01-01 01:01:01'),(194,20230608103123,1,'2020-01-01 01:01:01'),(195,20230629140529,1,'2020-01-01 01:01:01'),(196,20230629140530,1,'2020-01-01 01:01:01'),(197,20230711144622,1,'2020-01-01 01:01:01'),(198,20230721135421,1,'2020-01-01 01:01:01'),(199,20230721161508,1,'2020-01-01 01:01:01'),(200,20230726115701,1,'2020-01-01 01:01:01'),(201,20230807100822,1,'2020-01-01 01:01:01'),(202,20230814150442,1,'2020-01-01 01:01:01'),(203,20230823122728,1,'2020-01-01 01:01:01'),(204,20230906152143,1,'2020-01-01 01:01:01'),(205,20230911163618,1,'2020-01-01 01:01:01'),(206,20230912101759,1,'2020-01-01 01:01:01'),(207,20230915101341,1,'2020-01-01 01:01:01'),(208,20230918132351,1,'2020-01-01 01:01:01'),(209,20231004144339,1,'2020-01-01 01:01:01'),(210,20231009094541,1,'2020-01-01 01:01:01'),(211,20231009094542,1,'2020-01-01 01:01:01'),(212,20231009094543,1,'2020-01-01 01:01:01'),(213,20231009094544,1,'2020-01-01 01:01:01'),(214,20231016091915,1,'2020-01-01 01:01:01'),(215,20231024174135,1,'2020-01-01 01:01:01'),(216,20231025120016,1,'2020-01-01 01:01:01'),(217,20231025160156,1,'2020-01-01 01:01:01'),(218,20231031165350,1,'2020-01-01 01:01:01'),(219,20231106144110,1,'2020-01-01 01:01:01'),(220,20231107130934,1,'2020-01-01 01:01:01'),(221,20231109115838,1,'2020-01-01 01:01:01'),(222,20231121054530,1,'2020-01-01 01:01:01'),(223,20231122101320,1,'2020-01-01 01:01:01'),(224,20231130132828,1,'2020-01-01 01:01:01'),(225,20231130132931,1,'2020-01-01 01:01:01'),(226,20231204155427,1,'2020-01-01 01:01:01'),(227,20231206142340,1,'2020-01-01 01:01:01'),(228,20231207102320,1,'2020-01-01 01:01:01'),(229,20231207102321,1,'2020-01-01 01:01:01'),(230,20231207133731,1,'2020-01-01 01:01:01'),(231,20231212094238,1,'2020-01-01 01:01:01'),(232,20231212095734,1,'2020-01-01 01:01:01'),(233,20231212161121,1,'2020-01-01 01:01:01'),(234,20231215122713,1,'2020-01-01 01:01:01'),(235,20231219143041,1,'2020-01-01 01:01:01'),(236,20231224070653,1,'2020-01-01 01:01:01'),(237,20240110134315,1,'2020-01-01 01:01:01'),(238,20240119091637,1,'2020-01-01 01:01:01'),(239,20240126020642,1,'2020-01-01 01:01:01'),(240,20240126020643,1,'2020-01-01 01:01:01'),(241,20240129162819,1,'2020-01-01 01:01:01'),(242,20240130115133,1,'2020-01-01 01:01:01'),(243,20240131083822,1,'2020-01-01 01:01:01'),(244,20240205095928,1,'2020-01-01 01:01:01'),(245,20240205121956,1,'2020-01-01 01:01:01'),(246,20240209110212,1,'2020-01-01 01:01:01'),(247,20240212111533,1,'2020-01-01 01:01:01'),(248,20240221112844,1,'2020-01-01 01:01:01'),(249,20240222073518,1,'2020-01-01 01:01:01'),(250,20240222135115,1,'2020-01-01 01:01:01'),(251,20240226082255,1,'2020-01-01 01:01:01'),(252,20240228082706,1,'2020-01-01 01:01:01'),(253,20240301173035,1,'2020-01-01 01:01:01'),(254,20240302111134,1,'2020-01-01 01:01:01'),(255,20240312103753,1,'2020-01-01 01:01:01'),(256,20240313143416,1,'2020-01-01 01:01:01'),(257,20240314085226,1,'2020-01-01 01:01:01'),(258,20240314151747,1,'2020-01-01 01:01:01'),(259,20240320145650,1,'2020-01-01 01:01:01'),(260,20240327115530,1,'2020-01-01 01:01:01'),(261,20240327115617,1,'2020-01-01 01:01:01'),(262,20240408085837,1,'2020-01-01 01:01:01'),(263,20240415104633,1,'2020-01-01 01:01:01'),(264,20240430111727,1,'2020-01-01 01:01:01'),(265,20240515200020,1,'2020-01-01 01:01:01'),(266,20240521143023,1,'2020-01-01 01:01:01'),(267,20240521143024,1,'2020-01-01 01:01:01'),(268,20240601174138,1,'2020-01-01 01:01:01'),(269,20240607133721,1,'2020-01-01 01:01:01'),(270,20240612150059,1,'2020-01-01 01:01:01'),(271,20240613162201,1,'2020-01-01 01:01:01'),(272,20240613172616,1,'2020-01-01 01:01:01'),(273,20240618142419,1,'2020-01-01 01:01:01'),(274,20240625093543,1,'2020-01-01 01:01:01'),(275,20240626195531,1,'2020-01-01 01:01:01'),(276,20240702123921,1,'2020-01-01 01:01:01'),(277,20240703154849,1,'2020-01-01 01:01:01'),(278,20240707134035,1,'2020-01-01 01:01:01'),(279,20240707134036,1,'2020-01-01 01:01:01'),(280,20240709124958,1,'2020-01-01 01:01:01'),(281,20240709132642,1,'2020-01-01 01:01:01'),(282,20240709183940,1,'2020-01-01 01:01:01'),(283,20240710155623,1,'2020-01-01 01:01:01'),(284,20240723102712,1,'2020-01-01 01:01:01'),(285,20240725152735,1,'2020-01-01 01:01:01'),(286,20240725182118,1,'2020-01-01 01:01:01'),(287,20240726100517,1,'2020-01-01 01:01:01'),(288,20240730171504,1,'2020-01-01 01:01:01'),(289,20240730174056,1,'2020-01-01 01:01:01'),(290,20240730215453,1,'2020-01-01 01:01:01'),(291,20240730374423,1,'2020-01-01 01:01:01'),(292,20240801115359,1,'2020-01-01 01:01:01'),(293,20240802101043,1,'2020-01-01 01:01:01'),(294,20240802113716,1,'2020-01-01 01:01:01'),(295,20240814135330,1,'2020-01-01 01:01:01'),(296,20240815000000,1,'2020-01-01 01:01:01'),(297,20240815000001,1,'2020-01-01 01:01:01'),(298,20240816103247,1,'2020-01-01 01:01:01'),(299,20240820091218,1,'2020-01-01 01:01:01'),(300,20240826111228,1,'2020-01-01 01:01:01'),(301,20240826160025,1,'2020-01-01 01:01:01'),(302,20240829165448,1,'2020-01-01 01:01:01'),(303,20240829165605,1,'2020-01-01 01:01:01'),(304,20240829165715,1,'2020-01-01 01:01:01'),(305,20240829165930,1,'2020-01-01 01:01:01'),(306,20240829170023,1,'2020-01-01 01:01:01'),(307,20240829170033,1,'2020-01-01 01:01:01'),(308,20240829170044,1,'2020-01-01 01:01:01'),(309,20240905105135,1,'2020-01-01 01:01:01'),(310,20240905140514,1,'2020-01-01 01:01:01'),(311,20240905200000,1,'2020-01-01 01:01:01'),(312,20240905200001,1,'2020-01-01 01:01:01'),(313,20241002104104,1,'2020-01-01 01:01:01'),(314,20241002104105,1,'2020-01-01 01:01:01'),(315,20241002104106,1,'2020-01-01 01:01:01'),(316,20241002210000,1,'2020-01-01 01:01:01'),(317,20241003145349,1,'2020-01-01 01:01:01'),(318,20241004005000,1,'2020-01-01 01:01:01'),(319,20241008083925,1,'2020-01-01 01:01:01'),(320,20241009090010,1,'2020-01-01 01:01:01'),(321,20241017163402,1,'2020-01-01 01:01:01'),(322,20241021224359,1,'2020-01-01 01:01:01'),(323,20241022140321,1,'2020-01-01 01:01:01'),(324,20241025111236,1,'2020-01-01 01:01:01'),(325,20241025112748,1,'2020-01-01 01:01:01'),(326,20241025141855,1,'2020-01-01 01:01:01'),(327,20241110152839,1,'2020-01-01 01:01:01'),(328,20241110152840,1,'2020-01-01 01:01:01'),(329,20241110152841,1,'2020-01-01 01:01:01'),(330,20241116233322,1,'2020-01-01 01:01:01'),(331,20241122171434,1,'2020-01-01 01:01:01'),(332,20241125150614,1,'2020-01-01 01:01:01'),(333,20241203125346,1,'2020-01-01 01:01:01'),(334,20241203130032,1,'2020-01-01 01:01:01'),(335,20241205122800,1,'2020-01-01 01:01:01'),(336,20241209164540,1,'2020-01-01 01:01:01'),(337,20241210140021,1,'2020-01-01 01:01:01'),(338,20241219180042,1,'2020-01-01 01:01:01'),(339,20241220100000,1,'2020-01-01 01:01:01'),(340,20241220114903,1,'2020-01-01 01:01:01'),(341,20241220114904,1,'2020-01-01 01:01:01'),(342,20241224000000,1,'2020-01-01 01:01:01'),(343,20241230000000,1,'2020-01-01 01:01:01'),(344,20241231112624,1,'2020-01-01 01:01:01'),(345,20250102121439,1,'2020-01-01 01:01:01'),(346,20250121094045,1,'2020-01-01 01:01:01'),(347,20250121094500,1,'2020-01-01 01:01:01'),(348,20250121094600,1,'2020-01-01 01:01:01'),(349,20250121094700,1,'2020-01-01 01:01:01'),(350,20250124194347,1,'2020-01-01 01:01:01'),(351,20250127162751,1,'2020-01-01 01:01:01'),(352,20250213104005,1,'2020-01-01 01:01:01'),(353,20250214205657,1,'2020-01-01 01:01:01'),(354,20250217093329,1,'2020-01-01 01:01:01'),(355,20250219090511,1,'2020-01-01 01:01:01'),(356,20250219100000,1,'2020-01-01 01:01:01'),(357,20250219142401,1,'2020-01-01 01:01:01'),(358,20250224184002,1,'2020-01-01 01:01:01'),(359,20250225085436,1,'2020-01-01 01:01:01'),(360,20250226000000,1,'2020-01-01 01:01:01'),(361,20250226153445,1,'2020-01-01 01:01:01'),(362,20250304162702,1,'2020-01-01 01:01:01'),(363,20250306144233,1,'2020-01-01 01:01:01'),(364,20250313163430,1,'2020-01-01 01:01:01'),(365,20250317130944,1,'2020-01-01 01:01:01'),(366,20250318165922,1,'2020-01-01 01:01:01'),(367,20250320132525,1,'2020-01-01 01:01:01'),(368,20250320200000,1,'2020-01-01 01:01:01'),(369,20250326161930,1,'2020-01-01 01:01:01'),(370,20250326161931,1,'2020-01-01 01:01:01'),(371,20250331042354,1,'2020-01-01 01:01:01'),(372,20250331154206,1,'2020-01-01 01:01:01'),(373,20250401155831,1,'2020-01-01 01:01:01'),(374,20250408133233,1,'2020-01-01 01:01:01'),(375,20250410104321,1,'2020-01-01 01:01:01'),(376,20250421085116,1,'2020-01-01 01:01:01'),(377,20250422095806,1,'2020-01-01 01:01:01'),(378,20250424153059,1,'2020-01-01 01:01:01'),(379,20250430103833,1,'2020-01-01 01:01:01'),(380,20250430112622,1,'2020-01-01 01:01:01'),(381,20250501162727,1,'2020-01-01 01:01:01'),(382,20250502154517,1,'2020-01-01 01:01:01'),(383,20250502222222,1,'2020-01-01 01:01:01'),(384,20250507170845,1,'2020-01-01 01:01:01'),(385,20250513162912,1,'2020-01-01 01:01:01'),(386,20250519161614,1,'2020-01-01 01:01:01'),(387,20250519170000,1,'2020-01-01 01:01:01'),(388,20250520153848,1,'2020-01-01 01:01:01'),(389,20250528115932,1,'2020-01-01 01:01:01'),(390,20250529102706,1,'2020-01-01 01:01:01'),(391,20250603105558,1,'2020-01-01 01:01:01'),(392,20250609102714,1,'2020-01-01 01:01:01'),(393,20250609112613,1,'2020-01-01 01:01:01'),(394,20250613103810,1,'2020-01-01 01:01:01'),(395,20250616193950,1,'2020-01-01 01:01:01'),(396,20250624140757,1,'2020-01-01 01:01:01'),(397,20250626130239,1,'2020-01-01 01:01:01'),(398,20250629131032,1,'2020-01-01 01:01:01'),(399,20250701155654,1,'2020-01-01 01:01:01'),(400,20250707095725,1,'2020-01-01 01:01:01'),(401,20250716152435,1,'2020-01-01 01:01:01'),(402,20250718091828,1,'2020-01-01 01:01:01'),(403,20250728122229,1,'2020-01-01 01:01:01'),(404,20250731122715,1,'2020-01-01 01:01:01'),(405,20250731151000,1,'2020-01-01 01:01:01'),(406,20250803000000,1,'2020-01-01 01:01:01'),(407,20250805083116,1,'2020-01-01 01:01:01'),(408,20250807140441,1,'2020-01-01 01:01:01'),(409,20250808000000,1,'2020-01-01 01:01:01'),(410,20250811155036,1,'2020-01-01 01:01:01'),(411,20250813205039,1,'2020-01-01 01:01:01'),(412,20250814123333,1,'2020-01-01 01:01:01'),(413,20250815130115,1,'2020-01-01 01:01:01'),(414,20250816115553,1,'2020-01-01 01:01:01'),(415,20250817154557,1,'2020-01-01 01:01:01'),(416,20250825113751,1,'2020-01-01 01:01:01'),(417,20250827113140,1,'2020-01-01 01:01:01'),(418,20250828120836,1,'2020-01-01 01:01:01'),(419,20250902112642,1,'2020-01-01 01:01:01'),(420,20250904091745,1,'2020-01-01 01:01:01'),(421,20250905090000,1,'2020-01-01 01:01:01'),(422,20250922083056,1,'2020-01-01 01:01:01'),(423,20250923120000,1,'2020-01-01 01:01:01'),(424,20250926123048,1,'2020-01-01 01:01:01'),(425,20251015103505,1,'2020-01-01 01:01:01'),(426,20251015103600,1,'2020-01-01 01:01:01'),(427,20251015103700,1,'2020-01-01 01:01:01'),(428,20251015103800,1,'2020-01-01 01:01:01'),(429,20251015103900,1,'2020-01-01 01:01:01'),(430,20251028140000,1,'2020-01-01 01:01:01'),(431,20251028140100,1,'2020-01-01 01:01:01'),(432,20251028140110,1,'2020-01-01 01:01:01'),(433,20251028140200,1,'2020-01-01 01:01:01'),(434,20251028140300,1,'2020-01-01 01:01:01'),(435,20251028140400,1,'2020-01-01 01:01:01'),(436,20251031154558,1,'2020-01-01 01:01:01'),(437,20251103160848,1,'2020-01-01 01:01:01'),(438,20251104112849,1,'2020-01-01 01:01:01'),(439,20251106000000,1,'2020-01-01 01:01:01'),(440,20251107164629,1,'2020-01-01 01:01:01'),(441,20251107170854,1,'2020-01-01 01:01:01'),(442,20251110172137,1,'2020-01-01 01:01:01'),(443,20251111153133,1,'2020-01-01 01:01:01'),(444,20251117020000,1,'2020-01-01 01:01:01'),(445,20251117020100,1,'2020-01-01 01:01:01'),(446,20251117020200,1,'2020-01-01 01:01:01'),(447,20251121100000,1,'2020-01-01 01:01:01'),(448,20251121124239,1,'2020-01-01 01:01:01'),(449,20251124090450,1,'2020-01-01 01:01:01'),(450,20251124135808,1,'2020-01-01 01:01:01'),(451,20251124140138,1,'2020-01-01 01:01:01'),(452,20251124162948,1,'2020-01-01 01:01:01'),(453,20251127113559,1,'2020-01-01 01:01:01'),(454,20251202162232,1,'2020-01-01 01:01:01'),(455,20251203170808,1,'2020-01-01 01:01:01'),(456,20251207050413,1,'2020-01-01 01:01:01'),(457,20251208215800,1,'2020-01-01 01:01:01'),(458,20251209221730,1,'2020-01-01 01:01:01'),(459,20251209221850,1,'2020-01-01 01:01:01'),(460,20251215163721,1,'2020-01-01 01:01:01'),(461,20251217000000,1,'2020-01-01 01:01:01'),(462,20251217120000,1,'2020-01-01 01:01:01'),(463,20251229000000,1,'2020-01-01 01:01:01'),(464,20251229000010,1,'2020-01-01 01:01:01'),(465,20251229000020,1,'2020-01-01 01:01:01'),(466,20260106000000,1,'2020-01-01 01:01:01'),(467,20260108200708,1,'2020-01-01 01:01:01'),(468,20260108214732,1,'2020-01-01 01:01:01'),(469,20260109231821,1,'2020-01-01 01:01:01'),(470,20260113012054,1,'2020-01-01 01:01:01'),(471,20260124200020,1,'2020-01-01 01:01:01'),(472,20260126150840,1,'2020-01-01 01:01:01'),(473,20260126210724,1,'2020-01-01 01:01:01'),(474,20260202151756,1,'2020-01-01 01:01:01'),(475,20260205184907,1,'2020-01-01 01:01:01'),(476,20260210151544,1,'2020-01-01 01:01:01'),(477,20260210155109,1,'2020-01-01 01:01:01'),(478,20260210181120,1,'2020-01-01 01:01:01'),(479,20260211200153,1,'2020-01-01 01:01:01'),(480,20260217141240,1,'2020-01-01 01:01:01'),(481,20260217200906,1,'2020-01-01 01:01:01'),(482,20260218175704,1,'2020-01-01 01:01:01'),(483,20260314120000,1,'2020-01-01 01:01:01'),(484,20260316120000,1,'2020-01-01 01:01:01'),(485,20260316120001,1,'2020-01-01 01:01:01'),(486,20260316120002,1,'2020-01-01 01:01:01'),(487,20260316120003,1,'2020-01-01 01:01:01'),(488,20260316120004,1,'2020-01-01 01:01:01'),(489,20260316120005,1,'2020-01-01 01:01:01'),(490,20260316120006,1,'2020-01-01 01:01:01'),(491,20260316120007,1,'2020-01-01 01:01:01'),(492,20260316120008,1,'2020-01-01 01:01:01'),(493,20260316120009,1,'2020-01-01 01:01:01'),(494,20260316120010,1,'2020-01-01 01:01:01'),(495,20260317120000,1,'2020-01-01 01:01:01'),(496,20260318184559,1,'2020-01-01 01:01:01'),(497,20260319120000,1,'2020-01-01 01:01:01'),(498,20260323144117,1,'2020-01-01 01:01:01'),(499,20260324161944,1,'2020-01-01 01:01:01'),(500,20260324223334,1,'2020-01-01 01:01:01'),(501,20260326131501,1,'2020-01-01 01:01:01'),(502,20260326210603,1,'2020-01-01 01:01:01'),(503,20260331000000,1,'2020-01-01 01:01:01'),(504,20260401153000,1,'2020-01-01 01:01:01'),(505,20260401153001,1,'2020-01-01 01:01:01'),(506,20260401153503,1,'2020-01-01 01:01:01'),(507,20260403120000,1,'2020-01-01 01:01:01'),(508,20260409153713,1,'2020-01-01 01:01:01'),(509,20260409153714,1,'2020-01-01 01:01:01'),(510,20260409153715,1,'2020-01-01 01:01:01'),(511,20260409153716,1,'2020-01-01 01:01:01'),(512,20260409153717,1,'2020-01-01 01:01:01'),(513,20260409183610,1,'2020-01-01 01:01:01'),(514,20260410173222,1,'2020-01-01 01:01:01'),(515,20260422181702,1,'2020-01-01 01:01:01'),(516,20260423161823,1,'2020-01-01 01:01:01'),(517,20260423161824,1,'2020-01-01 01:01:01'),(518,20260427134220,1,'2020-01-01 01:01:01'),(519,20260428125634,1,'2020-01-01 01:01:01'),(520,20260429180725,1,'2020-01-01 01:01:01'),(521,20260430103635,1,'2020-01-01 01:01:01'),(522,20260506132626,1,'2020-01-01 01:01:01'),(523,20260506171058,1,'2020-01-01 01:01:01'),(524,20260512143542,1,'2020-01-01 01:01:01'),(525,20260512173249,1,'2020-01-01 01:01:01'),(526,20260512173250,1,'2020-01-01 01:01:01'),(527,20260518124441,1,'2020-01-01 01:01:01'),(528,20260518150028,1,'2020-01-01 01:01:01'); +INSERT INTO `migration_status_tables` VALUES (1,0,1,'2020-01-01 01:01:01'),(2,20161118193812,1,'2020-01-01 01:01:01'),(3,20161118211713,1,'2020-01-01 01:01:01'),(4,20161118212436,1,'2020-01-01 01:01:01'),(5,20161118212515,1,'2020-01-01 01:01:01'),(6,20161118212528,1,'2020-01-01 01:01:01'),(7,20161118212538,1,'2020-01-01 01:01:01'),(8,20161118212549,1,'2020-01-01 01:01:01'),(9,20161118212557,1,'2020-01-01 01:01:01'),(10,20161118212604,1,'2020-01-01 01:01:01'),(11,20161118212613,1,'2020-01-01 01:01:01'),(12,20161118212621,1,'2020-01-01 01:01:01'),(13,20161118212630,1,'2020-01-01 01:01:01'),(14,20161118212641,1,'2020-01-01 01:01:01'),(15,20161118212649,1,'2020-01-01 01:01:01'),(16,20161118212656,1,'2020-01-01 01:01:01'),(17,20161118212758,1,'2020-01-01 01:01:01'),(18,20161128234849,1,'2020-01-01 01:01:01'),(19,20161230162221,1,'2020-01-01 01:01:01'),(20,20170104113816,1,'2020-01-01 01:01:01'),(21,20170105151732,1,'2020-01-01 01:01:01'),(22,20170108191242,1,'2020-01-01 01:01:01'),(23,20170109094020,1,'2020-01-01 01:01:01'),(24,20170109130438,1,'2020-01-01 01:01:01'),(25,20170110202752,1,'2020-01-01 01:01:01'),(26,20170111133013,1,'2020-01-01 01:01:01'),(27,20170117025759,1,'2020-01-01 01:01:01'),(28,20170118191001,1,'2020-01-01 01:01:01'),(29,20170119234632,1,'2020-01-01 01:01:01'),(30,20170124230432,1,'2020-01-01 01:01:01'),(31,20170127014618,1,'2020-01-01 01:01:01'),(32,20170131232841,1,'2020-01-01 01:01:01'),(33,20170223094154,1,'2020-01-01 01:01:01'),(34,20170306075207,1,'2020-01-01 01:01:01'),(35,20170309100733,1,'2020-01-01 01:01:01'),(36,20170331111922,1,'2020-01-01 01:01:01'),(37,20170502143928,1,'2020-01-01 01:01:01'),(38,20170504130602,1,'2020-01-01 01:01:01'),(39,20170509132100,1,'2020-01-01 01:01:01'),(40,20170519105647,1,'2020-01-01 01:01:01'),(41,20170519105648,1,'2020-01-01 01:01:01'),(42,20170831234300,1,'2020-01-01 01:01:01'),(43,20170831234301,1,'2020-01-01 01:01:01'),(44,20170831234303,1,'2020-01-01 01:01:01'),(45,20171116163618,1,'2020-01-01 01:01:01'),(46,20171219164727,1,'2020-01-01 01:01:01'),(47,20180620164811,1,'2020-01-01 01:01:01'),(48,20180620175054,1,'2020-01-01 01:01:01'),(49,20180620175055,1,'2020-01-01 01:01:01'),(50,20191010101639,1,'2020-01-01 01:01:01'),(51,20191010155147,1,'2020-01-01 01:01:01'),(52,20191220130734,1,'2020-01-01 01:01:01'),(53,20200311140000,1,'2020-01-01 01:01:01'),(54,20200405120000,1,'2020-01-01 01:01:01'),(55,20200407120000,1,'2020-01-01 01:01:01'),(56,20200420120000,1,'2020-01-01 01:01:01'),(57,20200504120000,1,'2020-01-01 01:01:01'),(58,20200512120000,1,'2020-01-01 01:01:01'),(59,20200707120000,1,'2020-01-01 01:01:01'),(60,20201011162341,1,'2020-01-01 01:01:01'),(61,20201021104586,1,'2020-01-01 01:01:01'),(62,20201102112520,1,'2020-01-01 01:01:01'),(63,20201208121729,1,'2020-01-01 01:01:01'),(64,20201215091637,1,'2020-01-01 01:01:01'),(65,20210119174155,1,'2020-01-01 01:01:01'),(66,20210326182902,1,'2020-01-01 01:01:01'),(67,20210421112652,1,'2020-01-01 01:01:01'),(68,20210506095025,1,'2020-01-01 01:01:01'),(69,20210513115729,1,'2020-01-01 01:01:01'),(70,20210526113559,1,'2020-01-01 01:01:01'),(71,20210601000001,1,'2020-01-01 01:01:01'),(72,20210601000002,1,'2020-01-01 01:01:01'),(73,20210601000003,1,'2020-01-01 01:01:01'),(74,20210601000004,1,'2020-01-01 01:01:01'),(75,20210601000005,1,'2020-01-01 01:01:01'),(76,20210601000006,1,'2020-01-01 01:01:01'),(77,20210601000007,1,'2020-01-01 01:01:01'),(78,20210601000008,1,'2020-01-01 01:01:01'),(79,20210606151329,1,'2020-01-01 01:01:01'),(80,20210616163757,1,'2020-01-01 01:01:01'),(81,20210617174723,1,'2020-01-01 01:01:01'),(82,20210622160235,1,'2020-01-01 01:01:01'),(83,20210623100031,1,'2020-01-01 01:01:01'),(84,20210623133615,1,'2020-01-01 01:01:01'),(85,20210708143152,1,'2020-01-01 01:01:01'),(86,20210709124443,1,'2020-01-01 01:01:01'),(87,20210712155608,1,'2020-01-01 01:01:01'),(88,20210714102108,1,'2020-01-01 01:01:01'),(89,20210719153709,1,'2020-01-01 01:01:01'),(90,20210721171531,1,'2020-01-01 01:01:01'),(91,20210723135713,1,'2020-01-01 01:01:01'),(92,20210802135933,1,'2020-01-01 01:01:01'),(93,20210806112844,1,'2020-01-01 01:01:01'),(94,20210810095603,1,'2020-01-01 01:01:01'),(95,20210811150223,1,'2020-01-01 01:01:01'),(96,20210818151827,1,'2020-01-01 01:01:01'),(97,20210818151828,1,'2020-01-01 01:01:01'),(98,20210818182258,1,'2020-01-01 01:01:01'),(99,20210819131107,1,'2020-01-01 01:01:01'),(100,20210819143446,1,'2020-01-01 01:01:01'),(101,20210903132338,1,'2020-01-01 01:01:01'),(102,20210915144307,1,'2020-01-01 01:01:01'),(103,20210920155130,1,'2020-01-01 01:01:01'),(104,20210927143115,1,'2020-01-01 01:01:01'),(105,20210927143116,1,'2020-01-01 01:01:01'),(106,20211013133706,1,'2020-01-01 01:01:01'),(107,20211013133707,1,'2020-01-01 01:01:01'),(108,20211102135149,1,'2020-01-01 01:01:01'),(109,20211109121546,1,'2020-01-01 01:01:01'),(110,20211110163320,1,'2020-01-01 01:01:01'),(111,20211116184029,1,'2020-01-01 01:01:01'),(112,20211116184030,1,'2020-01-01 01:01:01'),(113,20211202092042,1,'2020-01-01 01:01:01'),(114,20211202181033,1,'2020-01-01 01:01:01'),(115,20211207161856,1,'2020-01-01 01:01:01'),(116,20211216131203,1,'2020-01-01 01:01:01'),(117,20211221110132,1,'2020-01-01 01:01:01'),(118,20220107155700,1,'2020-01-01 01:01:01'),(119,20220125105650,1,'2020-01-01 01:01:01'),(120,20220201084510,1,'2020-01-01 01:01:01'),(121,20220208144830,1,'2020-01-01 01:01:01'),(122,20220208144831,1,'2020-01-01 01:01:01'),(123,20220215152203,1,'2020-01-01 01:01:01'),(124,20220223113157,1,'2020-01-01 01:01:01'),(125,20220307104655,1,'2020-01-01 01:01:01'),(126,20220309133956,1,'2020-01-01 01:01:01'),(127,20220316155700,1,'2020-01-01 01:01:01'),(128,20220323152301,1,'2020-01-01 01:01:01'),(129,20220330100659,1,'2020-01-01 01:01:01'),(130,20220404091216,1,'2020-01-01 01:01:01'),(131,20220419140750,1,'2020-01-01 01:01:01'),(132,20220428140039,1,'2020-01-01 01:01:01'),(133,20220503134048,1,'2020-01-01 01:01:01'),(134,20220524102918,1,'2020-01-01 01:01:01'),(135,20220526123327,1,'2020-01-01 01:01:01'),(136,20220526123328,1,'2020-01-01 01:01:01'),(137,20220526123329,1,'2020-01-01 01:01:01'),(138,20220608113128,1,'2020-01-01 01:01:01'),(139,20220627104817,1,'2020-01-01 01:01:01'),(140,20220704101843,1,'2020-01-01 01:01:01'),(141,20220708095046,1,'2020-01-01 01:01:01'),(142,20220713091130,1,'2020-01-01 01:01:01'),(143,20220802135510,1,'2020-01-01 01:01:01'),(144,20220818101352,1,'2020-01-01 01:01:01'),(145,20220822161445,1,'2020-01-01 01:01:01'),(146,20220831100036,1,'2020-01-01 01:01:01'),(147,20220831100151,1,'2020-01-01 01:01:01'),(148,20220908181826,1,'2020-01-01 01:01:01'),(149,20220914154915,1,'2020-01-01 01:01:01'),(150,20220915165115,1,'2020-01-01 01:01:01'),(151,20220915165116,1,'2020-01-01 01:01:01'),(152,20220928100158,1,'2020-01-01 01:01:01'),(153,20221014084130,1,'2020-01-01 01:01:01'),(154,20221027085019,1,'2020-01-01 01:01:01'),(155,20221101103952,1,'2020-01-01 01:01:01'),(156,20221104144401,1,'2020-01-01 01:01:01'),(157,20221109100749,1,'2020-01-01 01:01:01'),(158,20221115104546,1,'2020-01-01 01:01:01'),(159,20221130114928,1,'2020-01-01 01:01:01'),(160,20221205112142,1,'2020-01-01 01:01:01'),(161,20221216115820,1,'2020-01-01 01:01:01'),(162,20221220195934,1,'2020-01-01 01:01:01'),(163,20221220195935,1,'2020-01-01 01:01:01'),(164,20221223174807,1,'2020-01-01 01:01:01'),(165,20221227163855,1,'2020-01-01 01:01:01'),(166,20221227163856,1,'2020-01-01 01:01:01'),(167,20230202224725,1,'2020-01-01 01:01:01'),(168,20230206163608,1,'2020-01-01 01:01:01'),(169,20230214131519,1,'2020-01-01 01:01:01'),(170,20230303135738,1,'2020-01-01 01:01:01'),(171,20230313135301,1,'2020-01-01 01:01:01'),(172,20230313141819,1,'2020-01-01 01:01:01'),(173,20230315104937,1,'2020-01-01 01:01:01'),(174,20230317173844,1,'2020-01-01 01:01:01'),(175,20230320133602,1,'2020-01-01 01:01:01'),(176,20230330100011,1,'2020-01-01 01:01:01'),(177,20230330134823,1,'2020-01-01 01:01:01'),(178,20230405232025,1,'2020-01-01 01:01:01'),(179,20230408084104,1,'2020-01-01 01:01:01'),(180,20230411102858,1,'2020-01-01 01:01:01'),(181,20230421155932,1,'2020-01-01 01:01:01'),(182,20230425082126,1,'2020-01-01 01:01:01'),(183,20230425105727,1,'2020-01-01 01:01:01'),(184,20230501154913,1,'2020-01-01 01:01:01'),(185,20230503101418,1,'2020-01-01 01:01:01'),(186,20230515144206,1,'2020-01-01 01:01:01'),(187,20230517140952,1,'2020-01-01 01:01:01'),(188,20230517152807,1,'2020-01-01 01:01:01'),(189,20230518114155,1,'2020-01-01 01:01:01'),(190,20230520153236,1,'2020-01-01 01:01:01'),(191,20230525151159,1,'2020-01-01 01:01:01'),(192,20230530122103,1,'2020-01-01 01:01:01'),(193,20230602111827,1,'2020-01-01 01:01:01'),(194,20230608103123,1,'2020-01-01 01:01:01'),(195,20230629140529,1,'2020-01-01 01:01:01'),(196,20230629140530,1,'2020-01-01 01:01:01'),(197,20230711144622,1,'2020-01-01 01:01:01'),(198,20230721135421,1,'2020-01-01 01:01:01'),(199,20230721161508,1,'2020-01-01 01:01:01'),(200,20230726115701,1,'2020-01-01 01:01:01'),(201,20230807100822,1,'2020-01-01 01:01:01'),(202,20230814150442,1,'2020-01-01 01:01:01'),(203,20230823122728,1,'2020-01-01 01:01:01'),(204,20230906152143,1,'2020-01-01 01:01:01'),(205,20230911163618,1,'2020-01-01 01:01:01'),(206,20230912101759,1,'2020-01-01 01:01:01'),(207,20230915101341,1,'2020-01-01 01:01:01'),(208,20230918132351,1,'2020-01-01 01:01:01'),(209,20231004144339,1,'2020-01-01 01:01:01'),(210,20231009094541,1,'2020-01-01 01:01:01'),(211,20231009094542,1,'2020-01-01 01:01:01'),(212,20231009094543,1,'2020-01-01 01:01:01'),(213,20231009094544,1,'2020-01-01 01:01:01'),(214,20231016091915,1,'2020-01-01 01:01:01'),(215,20231024174135,1,'2020-01-01 01:01:01'),(216,20231025120016,1,'2020-01-01 01:01:01'),(217,20231025160156,1,'2020-01-01 01:01:01'),(218,20231031165350,1,'2020-01-01 01:01:01'),(219,20231106144110,1,'2020-01-01 01:01:01'),(220,20231107130934,1,'2020-01-01 01:01:01'),(221,20231109115838,1,'2020-01-01 01:01:01'),(222,20231121054530,1,'2020-01-01 01:01:01'),(223,20231122101320,1,'2020-01-01 01:01:01'),(224,20231130132828,1,'2020-01-01 01:01:01'),(225,20231130132931,1,'2020-01-01 01:01:01'),(226,20231204155427,1,'2020-01-01 01:01:01'),(227,20231206142340,1,'2020-01-01 01:01:01'),(228,20231207102320,1,'2020-01-01 01:01:01'),(229,20231207102321,1,'2020-01-01 01:01:01'),(230,20231207133731,1,'2020-01-01 01:01:01'),(231,20231212094238,1,'2020-01-01 01:01:01'),(232,20231212095734,1,'2020-01-01 01:01:01'),(233,20231212161121,1,'2020-01-01 01:01:01'),(234,20231215122713,1,'2020-01-01 01:01:01'),(235,20231219143041,1,'2020-01-01 01:01:01'),(236,20231224070653,1,'2020-01-01 01:01:01'),(237,20240110134315,1,'2020-01-01 01:01:01'),(238,20240119091637,1,'2020-01-01 01:01:01'),(239,20240126020642,1,'2020-01-01 01:01:01'),(240,20240126020643,1,'2020-01-01 01:01:01'),(241,20240129162819,1,'2020-01-01 01:01:01'),(242,20240130115133,1,'2020-01-01 01:01:01'),(243,20240131083822,1,'2020-01-01 01:01:01'),(244,20240205095928,1,'2020-01-01 01:01:01'),(245,20240205121956,1,'2020-01-01 01:01:01'),(246,20240209110212,1,'2020-01-01 01:01:01'),(247,20240212111533,1,'2020-01-01 01:01:01'),(248,20240221112844,1,'2020-01-01 01:01:01'),(249,20240222073518,1,'2020-01-01 01:01:01'),(250,20240222135115,1,'2020-01-01 01:01:01'),(251,20240226082255,1,'2020-01-01 01:01:01'),(252,20240228082706,1,'2020-01-01 01:01:01'),(253,20240301173035,1,'2020-01-01 01:01:01'),(254,20240302111134,1,'2020-01-01 01:01:01'),(255,20240312103753,1,'2020-01-01 01:01:01'),(256,20240313143416,1,'2020-01-01 01:01:01'),(257,20240314085226,1,'2020-01-01 01:01:01'),(258,20240314151747,1,'2020-01-01 01:01:01'),(259,20240320145650,1,'2020-01-01 01:01:01'),(260,20240327115530,1,'2020-01-01 01:01:01'),(261,20240327115617,1,'2020-01-01 01:01:01'),(262,20240408085837,1,'2020-01-01 01:01:01'),(263,20240415104633,1,'2020-01-01 01:01:01'),(264,20240430111727,1,'2020-01-01 01:01:01'),(265,20240515200020,1,'2020-01-01 01:01:01'),(266,20240521143023,1,'2020-01-01 01:01:01'),(267,20240521143024,1,'2020-01-01 01:01:01'),(268,20240601174138,1,'2020-01-01 01:01:01'),(269,20240607133721,1,'2020-01-01 01:01:01'),(270,20240612150059,1,'2020-01-01 01:01:01'),(271,20240613162201,1,'2020-01-01 01:01:01'),(272,20240613172616,1,'2020-01-01 01:01:01'),(273,20240618142419,1,'2020-01-01 01:01:01'),(274,20240625093543,1,'2020-01-01 01:01:01'),(275,20240626195531,1,'2020-01-01 01:01:01'),(276,20240702123921,1,'2020-01-01 01:01:01'),(277,20240703154849,1,'2020-01-01 01:01:01'),(278,20240707134035,1,'2020-01-01 01:01:01'),(279,20240707134036,1,'2020-01-01 01:01:01'),(280,20240709124958,1,'2020-01-01 01:01:01'),(281,20240709132642,1,'2020-01-01 01:01:01'),(282,20240709183940,1,'2020-01-01 01:01:01'),(283,20240710155623,1,'2020-01-01 01:01:01'),(284,20240723102712,1,'2020-01-01 01:01:01'),(285,20240725152735,1,'2020-01-01 01:01:01'),(286,20240725182118,1,'2020-01-01 01:01:01'),(287,20240726100517,1,'2020-01-01 01:01:01'),(288,20240730171504,1,'2020-01-01 01:01:01'),(289,20240730174056,1,'2020-01-01 01:01:01'),(290,20240730215453,1,'2020-01-01 01:01:01'),(291,20240730374423,1,'2020-01-01 01:01:01'),(292,20240801115359,1,'2020-01-01 01:01:01'),(293,20240802101043,1,'2020-01-01 01:01:01'),(294,20240802113716,1,'2020-01-01 01:01:01'),(295,20240814135330,1,'2020-01-01 01:01:01'),(296,20240815000000,1,'2020-01-01 01:01:01'),(297,20240815000001,1,'2020-01-01 01:01:01'),(298,20240816103247,1,'2020-01-01 01:01:01'),(299,20240820091218,1,'2020-01-01 01:01:01'),(300,20240826111228,1,'2020-01-01 01:01:01'),(301,20240826160025,1,'2020-01-01 01:01:01'),(302,20240829165448,1,'2020-01-01 01:01:01'),(303,20240829165605,1,'2020-01-01 01:01:01'),(304,20240829165715,1,'2020-01-01 01:01:01'),(305,20240829165930,1,'2020-01-01 01:01:01'),(306,20240829170023,1,'2020-01-01 01:01:01'),(307,20240829170033,1,'2020-01-01 01:01:01'),(308,20240829170044,1,'2020-01-01 01:01:01'),(309,20240905105135,1,'2020-01-01 01:01:01'),(310,20240905140514,1,'2020-01-01 01:01:01'),(311,20240905200000,1,'2020-01-01 01:01:01'),(312,20240905200001,1,'2020-01-01 01:01:01'),(313,20241002104104,1,'2020-01-01 01:01:01'),(314,20241002104105,1,'2020-01-01 01:01:01'),(315,20241002104106,1,'2020-01-01 01:01:01'),(316,20241002210000,1,'2020-01-01 01:01:01'),(317,20241003145349,1,'2020-01-01 01:01:01'),(318,20241004005000,1,'2020-01-01 01:01:01'),(319,20241008083925,1,'2020-01-01 01:01:01'),(320,20241009090010,1,'2020-01-01 01:01:01'),(321,20241017163402,1,'2020-01-01 01:01:01'),(322,20241021224359,1,'2020-01-01 01:01:01'),(323,20241022140321,1,'2020-01-01 01:01:01'),(324,20241025111236,1,'2020-01-01 01:01:01'),(325,20241025112748,1,'2020-01-01 01:01:01'),(326,20241025141855,1,'2020-01-01 01:01:01'),(327,20241110152839,1,'2020-01-01 01:01:01'),(328,20241110152840,1,'2020-01-01 01:01:01'),(329,20241110152841,1,'2020-01-01 01:01:01'),(330,20241116233322,1,'2020-01-01 01:01:01'),(331,20241122171434,1,'2020-01-01 01:01:01'),(332,20241125150614,1,'2020-01-01 01:01:01'),(333,20241203125346,1,'2020-01-01 01:01:01'),(334,20241203130032,1,'2020-01-01 01:01:01'),(335,20241205122800,1,'2020-01-01 01:01:01'),(336,20241209164540,1,'2020-01-01 01:01:01'),(337,20241210140021,1,'2020-01-01 01:01:01'),(338,20241219180042,1,'2020-01-01 01:01:01'),(339,20241220100000,1,'2020-01-01 01:01:01'),(340,20241220114903,1,'2020-01-01 01:01:01'),(341,20241220114904,1,'2020-01-01 01:01:01'),(342,20241224000000,1,'2020-01-01 01:01:01'),(343,20241230000000,1,'2020-01-01 01:01:01'),(344,20241231112624,1,'2020-01-01 01:01:01'),(345,20250102121439,1,'2020-01-01 01:01:01'),(346,20250121094045,1,'2020-01-01 01:01:01'),(347,20250121094500,1,'2020-01-01 01:01:01'),(348,20250121094600,1,'2020-01-01 01:01:01'),(349,20250121094700,1,'2020-01-01 01:01:01'),(350,20250124194347,1,'2020-01-01 01:01:01'),(351,20250127162751,1,'2020-01-01 01:01:01'),(352,20250213104005,1,'2020-01-01 01:01:01'),(353,20250214205657,1,'2020-01-01 01:01:01'),(354,20250217093329,1,'2020-01-01 01:01:01'),(355,20250219090511,1,'2020-01-01 01:01:01'),(356,20250219100000,1,'2020-01-01 01:01:01'),(357,20250219142401,1,'2020-01-01 01:01:01'),(358,20250224184002,1,'2020-01-01 01:01:01'),(359,20250225085436,1,'2020-01-01 01:01:01'),(360,20250226000000,1,'2020-01-01 01:01:01'),(361,20250226153445,1,'2020-01-01 01:01:01'),(362,20250304162702,1,'2020-01-01 01:01:01'),(363,20250306144233,1,'2020-01-01 01:01:01'),(364,20250313163430,1,'2020-01-01 01:01:01'),(365,20250317130944,1,'2020-01-01 01:01:01'),(366,20250318165922,1,'2020-01-01 01:01:01'),(367,20250320132525,1,'2020-01-01 01:01:01'),(368,20250320200000,1,'2020-01-01 01:01:01'),(369,20250326161930,1,'2020-01-01 01:01:01'),(370,20250326161931,1,'2020-01-01 01:01:01'),(371,20250331042354,1,'2020-01-01 01:01:01'),(372,20250331154206,1,'2020-01-01 01:01:01'),(373,20250401155831,1,'2020-01-01 01:01:01'),(374,20250408133233,1,'2020-01-01 01:01:01'),(375,20250410104321,1,'2020-01-01 01:01:01'),(376,20250421085116,1,'2020-01-01 01:01:01'),(377,20250422095806,1,'2020-01-01 01:01:01'),(378,20250424153059,1,'2020-01-01 01:01:01'),(379,20250430103833,1,'2020-01-01 01:01:01'),(380,20250430112622,1,'2020-01-01 01:01:01'),(381,20250501162727,1,'2020-01-01 01:01:01'),(382,20250502154517,1,'2020-01-01 01:01:01'),(383,20250502222222,1,'2020-01-01 01:01:01'),(384,20250507170845,1,'2020-01-01 01:01:01'),(385,20250513162912,1,'2020-01-01 01:01:01'),(386,20250519161614,1,'2020-01-01 01:01:01'),(387,20250519170000,1,'2020-01-01 01:01:01'),(388,20250520153848,1,'2020-01-01 01:01:01'),(389,20250528115932,1,'2020-01-01 01:01:01'),(390,20250529102706,1,'2020-01-01 01:01:01'),(391,20250603105558,1,'2020-01-01 01:01:01'),(392,20250609102714,1,'2020-01-01 01:01:01'),(393,20250609112613,1,'2020-01-01 01:01:01'),(394,20250613103810,1,'2020-01-01 01:01:01'),(395,20250616193950,1,'2020-01-01 01:01:01'),(396,20250624140757,1,'2020-01-01 01:01:01'),(397,20250626130239,1,'2020-01-01 01:01:01'),(398,20250629131032,1,'2020-01-01 01:01:01'),(399,20250701155654,1,'2020-01-01 01:01:01'),(400,20250707095725,1,'2020-01-01 01:01:01'),(401,20250716152435,1,'2020-01-01 01:01:01'),(402,20250718091828,1,'2020-01-01 01:01:01'),(403,20250728122229,1,'2020-01-01 01:01:01'),(404,20250731122715,1,'2020-01-01 01:01:01'),(405,20250731151000,1,'2020-01-01 01:01:01'),(406,20250803000000,1,'2020-01-01 01:01:01'),(407,20250805083116,1,'2020-01-01 01:01:01'),(408,20250807140441,1,'2020-01-01 01:01:01'),(409,20250808000000,1,'2020-01-01 01:01:01'),(410,20250811155036,1,'2020-01-01 01:01:01'),(411,20250813205039,1,'2020-01-01 01:01:01'),(412,20250814123333,1,'2020-01-01 01:01:01'),(413,20250815130115,1,'2020-01-01 01:01:01'),(414,20250816115553,1,'2020-01-01 01:01:01'),(415,20250817154557,1,'2020-01-01 01:01:01'),(416,20250825113751,1,'2020-01-01 01:01:01'),(417,20250827113140,1,'2020-01-01 01:01:01'),(418,20250828120836,1,'2020-01-01 01:01:01'),(419,20250902112642,1,'2020-01-01 01:01:01'),(420,20250904091745,1,'2020-01-01 01:01:01'),(421,20250905090000,1,'2020-01-01 01:01:01'),(422,20250922083056,1,'2020-01-01 01:01:01'),(423,20250923120000,1,'2020-01-01 01:01:01'),(424,20250926123048,1,'2020-01-01 01:01:01'),(425,20251015103505,1,'2020-01-01 01:01:01'),(426,20251015103600,1,'2020-01-01 01:01:01'),(427,20251015103700,1,'2020-01-01 01:01:01'),(428,20251015103800,1,'2020-01-01 01:01:01'),(429,20251015103900,1,'2020-01-01 01:01:01'),(430,20251028140000,1,'2020-01-01 01:01:01'),(431,20251028140100,1,'2020-01-01 01:01:01'),(432,20251028140110,1,'2020-01-01 01:01:01'),(433,20251028140200,1,'2020-01-01 01:01:01'),(434,20251028140300,1,'2020-01-01 01:01:01'),(435,20251028140400,1,'2020-01-01 01:01:01'),(436,20251031154558,1,'2020-01-01 01:01:01'),(437,20251103160848,1,'2020-01-01 01:01:01'),(438,20251104112849,1,'2020-01-01 01:01:01'),(439,20251106000000,1,'2020-01-01 01:01:01'),(440,20251107164629,1,'2020-01-01 01:01:01'),(441,20251107170854,1,'2020-01-01 01:01:01'),(442,20251110172137,1,'2020-01-01 01:01:01'),(443,20251111153133,1,'2020-01-01 01:01:01'),(444,20251117020000,1,'2020-01-01 01:01:01'),(445,20251117020100,1,'2020-01-01 01:01:01'),(446,20251117020200,1,'2020-01-01 01:01:01'),(447,20251121100000,1,'2020-01-01 01:01:01'),(448,20251121124239,1,'2020-01-01 01:01:01'),(449,20251124090450,1,'2020-01-01 01:01:01'),(450,20251124135808,1,'2020-01-01 01:01:01'),(451,20251124140138,1,'2020-01-01 01:01:01'),(452,20251124162948,1,'2020-01-01 01:01:01'),(453,20251127113559,1,'2020-01-01 01:01:01'),(454,20251202162232,1,'2020-01-01 01:01:01'),(455,20251203170808,1,'2020-01-01 01:01:01'),(456,20251207050413,1,'2020-01-01 01:01:01'),(457,20251208215800,1,'2020-01-01 01:01:01'),(458,20251209221730,1,'2020-01-01 01:01:01'),(459,20251209221850,1,'2020-01-01 01:01:01'),(460,20251215163721,1,'2020-01-01 01:01:01'),(461,20251217000000,1,'2020-01-01 01:01:01'),(462,20251217120000,1,'2020-01-01 01:01:01'),(463,20251229000000,1,'2020-01-01 01:01:01'),(464,20251229000010,1,'2020-01-01 01:01:01'),(465,20251229000020,1,'2020-01-01 01:01:01'),(466,20260106000000,1,'2020-01-01 01:01:01'),(467,20260108200708,1,'2020-01-01 01:01:01'),(468,20260108214732,1,'2020-01-01 01:01:01'),(469,20260109231821,1,'2020-01-01 01:01:01'),(470,20260113012054,1,'2020-01-01 01:01:01'),(471,20260124200020,1,'2020-01-01 01:01:01'),(472,20260126150840,1,'2020-01-01 01:01:01'),(473,20260126210724,1,'2020-01-01 01:01:01'),(474,20260202151756,1,'2020-01-01 01:01:01'),(475,20260205184907,1,'2020-01-01 01:01:01'),(476,20260210151544,1,'2020-01-01 01:01:01'),(477,20260210155109,1,'2020-01-01 01:01:01'),(478,20260210181120,1,'2020-01-01 01:01:01'),(479,20260211200153,1,'2020-01-01 01:01:01'),(480,20260217141240,1,'2020-01-01 01:01:01'),(481,20260217200906,1,'2020-01-01 01:01:01'),(482,20260218175704,1,'2020-01-01 01:01:01'),(483,20260314120000,1,'2020-01-01 01:01:01'),(484,20260316120000,1,'2020-01-01 01:01:01'),(485,20260316120001,1,'2020-01-01 01:01:01'),(486,20260316120002,1,'2020-01-01 01:01:01'),(487,20260316120003,1,'2020-01-01 01:01:01'),(488,20260316120004,1,'2020-01-01 01:01:01'),(489,20260316120005,1,'2020-01-01 01:01:01'),(490,20260316120006,1,'2020-01-01 01:01:01'),(491,20260316120007,1,'2020-01-01 01:01:01'),(492,20260316120008,1,'2020-01-01 01:01:01'),(493,20260316120009,1,'2020-01-01 01:01:01'),(494,20260316120010,1,'2020-01-01 01:01:01'),(495,20260317120000,1,'2020-01-01 01:01:01'),(496,20260318184559,1,'2020-01-01 01:01:01'),(497,20260319120000,1,'2020-01-01 01:01:01'),(498,20260323144117,1,'2020-01-01 01:01:01'),(499,20260324161944,1,'2020-01-01 01:01:01'),(500,20260324223334,1,'2020-01-01 01:01:01'),(501,20260326131501,1,'2020-01-01 01:01:01'),(502,20260326210603,1,'2020-01-01 01:01:01'),(503,20260331000000,1,'2020-01-01 01:01:01'),(504,20260401153000,1,'2020-01-01 01:01:01'),(505,20260401153001,1,'2020-01-01 01:01:01'),(506,20260401153503,1,'2020-01-01 01:01:01'),(507,20260403120000,1,'2020-01-01 01:01:01'),(508,20260409153713,1,'2020-01-01 01:01:01'),(509,20260409153714,1,'2020-01-01 01:01:01'),(510,20260409153715,1,'2020-01-01 01:01:01'),(511,20260409153716,1,'2020-01-01 01:01:01'),(512,20260409153717,1,'2020-01-01 01:01:01'),(513,20260409183610,1,'2020-01-01 01:01:01'),(514,20260410173222,1,'2020-01-01 01:01:01'),(515,20260422181702,1,'2020-01-01 01:01:01'),(516,20260423161823,1,'2020-01-01 01:01:01'),(517,20260423161824,1,'2020-01-01 01:01:01'),(518,20260427134220,1,'2020-01-01 01:01:01'),(519,20260428125634,1,'2020-01-01 01:01:01'),(520,20260429180725,1,'2020-01-01 01:01:01'),(521,20260430103635,1,'2020-01-01 01:01:01'),(522,20260506132626,1,'2020-01-01 01:01:01'),(523,20260506171058,1,'2020-01-01 01:01:01'),(524,20260512143542,1,'2020-01-01 01:01:01'),(525,20260512173249,1,'2020-01-01 01:01:01'),(526,20260512173250,1,'2020-01-01 01:01:01'),(527,20260518124441,1,'2020-01-01 01:01:01'),(528,20260518150028,1,'2020-01-01 01:01:01'),(529,20260518194422,1,'2020-01-01 01:01:01'); /*!40101 SET @saved_cs_client = @@character_set_client */; /*!50503 SET character_set_client = utf8mb4 */; CREATE TABLE `mobile_device_management_solutions` ( diff --git a/tools/charts-backfill/README.md b/tools/charts-backfill/README.md index 55530978aae..50fc527aa6c 100644 --- a/tools/charts-backfill/README.md +++ b/tools/charts-backfill/README.md @@ -1,14 +1,15 @@ # charts-backfill Generates synthetic chart data for development and testing. Writes rows to -`host_hourly_data_blobs` using `ON DUPLICATE KEY UPDATE`, so it is safe to -re-run. +`host_scd_data` using `ON DUPLICATE KEY UPDATE`, so it is safe to re-run. +All writes use the roaring bitmap encoding (`encoding_type = 1`). ## Usage ```bash go run ./tools/charts-backfill --dataset uptime --days 30 go run ./tools/charts-backfill --dataset uptime --days 7 --host-ids 1,2,3 +go run ./tools/charts-backfill --dataset cve --days 30 --use-tracked-cves go run ./tools/charts-backfill --dataset cve --days 30 --entity-ids CVE-2024-1,CVE-2024-2 go run ./tools/charts-backfill --mysql-dsn "fleet:fleet@tcp(localhost:3306)/fleet" ``` @@ -21,13 +22,28 @@ go run ./tools/charts-backfill --mysql-dsn "fleet:fleet@tcp(localhost:3306)/flee | `--days` | `30` | Number of days to backfill | | `--start-date` | `now - days` | Start date (`YYYY-MM-DD`) | | `--entity-ids` | `""` | Comma-separated entity IDs (e.g. CVE IDs); `""` for non-entity datasets | +| `--use-tracked-cves` | `false` | For `--dataset cve`, auto-discover entity IDs from the production tracked-CVE query (joins `software_cve` / `operating_system_vulnerabilities` against the curated software matchers; requires vulnerability data to be populated). Overrides `--entity-ids`. | | `--host-ids` | all hosts | Comma-separated host IDs to include | | `--mysql-dsn` | local dev | MySQL connection string | ## Datasets -- **Hourly blob** (default): 24 rows/day per entity, one per hour. -- **Daily blob** (`cve`): one row/day with `hour = -1` (whole-day sentinel). - -Density (fraction of hosts marked active) varies by dataset — see -`densityRange` in `main.go`. +Backfill mode matches the live collector's sample strategy for each dataset: + +- **Accumulate, hourly** (default; `uptime`, `policy`): 24 independent rows + per day per entity, each a fresh random sample. `valid_to` is set to one + hour past `valid_from`. + +- **Snapshot, state-segment** (`cve`): per-entity state-segment rows shaped + like real CVE data. Each entity gets an initial host set; for each + subsequent day, with ~5% probability the set is *churned* (~10% drop, ~10% + add). Each contiguous run of unchanged days collapses to a single row. + The final segment per entity leaves `valid_to` at the open sentinel so the + live collector compares against it on its next tick instead of inserting + over the top. Pair with `--use-tracked-cves` to mirror production CVE + selection. + +Density (fraction of hosts marked active) for the initial sample varies by +dataset — see `densityRange` in `main.go`. Snapshot churn parameters +(`snapshotFlipsPerDayPerEntity`, `snapshotChurnFraction`) are also defined +there. diff --git a/tools/charts-backfill/main.go b/tools/charts-backfill/main.go index 4de571d4380..91856e17cd7 100644 --- a/tools/charts-backfill/main.go +++ b/tools/charts-backfill/main.go @@ -1,37 +1,99 @@ // charts-backfill generates realistic chart data for development and testing. -// Writes rows to host_scd_data in closed form (explicit valid_to); the live -// collector can then extend from these rows via its normal write path. -// Safe to re-run — uses ON DUPLICATE KEY UPDATE to merge new data. +// Writes rows to host_scd_data. Safe to re-run — uses ON DUPLICATE KEY UPDATE +// to merge new data. +// +// Datasets are backfilled in one of two modes based on their sample strategy: +// +// - Accumulate (e.g. uptime): independent rows per hour, each a fresh +// random sample. Each row's validity is bounded to its single hour. +// - Snapshot (e.g. cve): per-entity state-segment rows. Most entities get +// a single open row spanning the entire backfill range; a small fraction +// "flip" state on day boundaries, producing additional closed segments. +// The final segment per entity has valid_to = sentinel so the live +// collector can compare against it on its next tick. This mirrors what +// real CVE data looks like (mostly stable, occasional churn). // // Usage: // // go run ./tools/charts-backfill --dataset uptime --days 30 // go run ./tools/charts-backfill --dataset uptime --days 7 --host-ids 1,2,3 +// go run ./tools/charts-backfill --dataset cve --days 30 --use-tracked-cves // go run ./tools/charts-backfill --mysql-dsn "fleet:fleet@tcp(localhost:3306)/fleet" package main import ( + "context" "database/sql" "flag" "log" + "log/slog" "math/rand/v2" "time" "github.com/fleetdm/fleet/v4/pkg/str" "github.com/fleetdm/fleet/v4/server/chart" + "github.com/fleetdm/fleet/v4/server/chart/bootstrap" _ "github.com/go-sql-driver/mysql" + "github.com/jmoiron/sqlx" ) -// dailyDatasets bucket at 24h granularity; all others are hourly. -var dailyDatasets = map[string]struct{}{ +// snapshotDatasets are generated with the state-segment model: mostly stable +// rows with occasional churn. Everything not listed here uses the accumulate +// hourly model. Must match the live collector's sample strategy for each +// dataset (see server/chart/datasets.go) so backfilled data is shaped like +// what production will eventually produce. +var snapshotDatasets = map[string]struct{}{ "cve": {}, } +// scdOpenSentinel mirrors the constant in server/chart/internal/mysql/data.go. +// Used as valid_to to mark rows as currently open; the live collector closes +// these on the next state change or extends them by leaving them alone. +var scdOpenSentinel = time.Date(9999, 12, 31, 0, 0, 0, 0, time.UTC) + +// snapshotFlipsPerDayPerEntity is the per-entity probability of a state +// change on any given day. ~5% means a 30-day window produces on average +// ~1.5 state changes per entity — most entities stay stable, a few have a +// handful of segments. +const snapshotFlipsPerDayPerEntity = 0.05 + +// snapshotChurnFraction is the fraction of an entity's current host set that +// turns over on a flip — some hosts drop out (patched), some new hosts get +// added (newly discovered as affected). Cardinality stays roughly stable. +const snapshotChurnFraction = 0.10 + +// snapshotCardinality picks a per-entity affected-host count from a long-tail +// distribution shaped like real-world CVE data: most CVEs touch a handful of +// hosts (specific software/version), with an occasional wide one (browser or +// kernel). A naive uniform-density model saturates at fleet size when many +// CVEs are unioned together — this distribution keeps the union meaningful +// even with hundreds of tracked entities. Return value is capped at fleetSize. +func snapshotCardinality(fleetSize int) int { + r := rand.Float64() //nolint:gosec // dev data generator, not crypto + var count int + switch { + case r < 0.70: // very narrow: specific software build + count = 1 + rand.IntN(5) //nolint:gosec + case r < 0.92: // narrow: software version + count = 5 + rand.IntN(20) //nolint:gosec + case r < 0.99: // moderate: popular software + count = 25 + rand.IntN(100) //nolint:gosec + default: // wide: browser/kernel-tier, up to ~10% of fleet + wideMax := max(fleetSize/10, 200) + count = 125 + rand.IntN(wideMax) //nolint:gosec + } + if count > fleetSize { + count = fleetSize + } + return count +} + func main() { dataset := flag.String("dataset", "uptime", "dataset name (e.g. uptime, policy, cve)") days := flag.Int("days", 30, "number of days to backfill") startDate := flag.String("start-date", "", "start date (YYYY-MM-DD), defaults to now - days") entityIDsStr := flag.String("entity-ids", "", "comma-separated entity IDs (default: '' for non-entity datasets)") + useTrackedCVEs := flag.Bool("use-tracked-cves", false, "for --dataset cve, auto-discover entity IDs from the production tracked-CVE query (overrides --entity-ids)") hostIDsStr := flag.String("host-ids", "", "comma-separated host IDs (default: all from hosts table)") dsn := flag.String("mysql-dsn", "fleet:fleet@tcp(localhost:3306)/fleet?parseTime=true", "MySQL connection string") flag.Parse() @@ -48,20 +110,23 @@ func main() { } start = time.Date(start.Year(), start.Month(), start.Day(), 0, 0, 0, 0, time.UTC) - db, err := sql.Open("mysql", *dsn) + rawDB, err := sql.Open("mysql", *dsn) if err != nil { log.Fatalf("failed to connect to mysql: %v", err) } - - if err := db.Ping(); err != nil { - db.Close() + if err := rawDB.Ping(); err != nil { + rawDB.Close() log.Fatalf("failed to ping mysql: %v", err) } - defer db.Close() + defer rawDB.Close() + + // sqlx wraps the raw connection so we can hand it to the chart bootstrap + // helpers (TrackedCriticalCVEs) without opening a second pool. + db := sqlx.NewDb(rawDB, "mysql") hostIDs := str.ParseUintList(*hostIDsStr) if len(hostIDs) == 0 { - hostIDs, err = queryHostIDs(db) + hostIDs, err = queryHostIDs(rawDB) if err != nil { log.Fatalf("failed to query host IDs: %v", err) //nolint:gocritic // dev tool, OS reclaims db handle on exit } @@ -70,8 +135,25 @@ func main() { } } - entityIDs := str.ParseStringList(*entityIDsStr) - if len(entityIDs) == 0 { + var entityIDs []string + switch { + case *useTrackedCVEs: + if *dataset != "cve" { + log.Fatalf("--use-tracked-cves only applies to --dataset cve (got %q)", *dataset) + } + ctx := context.Background() + cves, err := bootstrap.TrackedCriticalCVEs(ctx, db, slog.New(slog.DiscardHandler)) + if err != nil { + log.Fatalf("failed to query tracked CVEs: %v", err) + } + if len(cves) == 0 { + log.Fatal("tracked-CVE query returned no CVEs (vulnerability data may not be populated yet)") + } + entityIDs = cves + log.Printf("discovered %d tracked CVEs from the live database", len(entityIDs)) + case *entityIDsStr != "": + entityIDs = str.ParseStringList(*entityIDsStr) + default: entityIDs = []string{""} } @@ -79,13 +161,13 @@ func main() { *dataset, *days, start.Format("2006-01-02"), len(hostIDs), len(entityIDs)) startTime := time.Now() - totalRows := backfill(db, *dataset, *days, start, hostIDs, entityIDs) + totalRows := backfill(rawDB, *dataset, *days, start, hostIDs, entityIDs) log.Printf("done: %d SCD rows inserted/updated in %.1fs", totalRows, time.Since(startTime).Seconds()) } func backfill(db *sql.DB, dataset string, days int, start time.Time, hostIDs []uint, entityIDs []string) int { - if _, ok := dailyDatasets[dataset]; ok { - return backfillDaily(db, dataset, days, start, hostIDs, entityIDs) + if _, ok := snapshotDatasets[dataset]; ok { + return backfillSnapshot(db, dataset, days, start, hostIDs, entityIDs) } return backfillHourly(db, dataset, days, start, hostIDs, entityIDs) } @@ -107,10 +189,10 @@ func backfillHourly(db *sql.DB, dataset string, days int, start time.Time, hostI blob := chart.HostIDsToBlob(activeHosts) _, err := db.Exec( - `INSERT INTO host_scd_data (dataset, entity_id, host_bitmap, valid_from, valid_to) - VALUES (?, ?, ?, ?, ?) - ON DUPLICATE KEY UPDATE host_bitmap = VALUES(host_bitmap), valid_to = VALUES(valid_to)`, - dataset, entityID, blob, validFrom, validTo) + `INSERT INTO host_scd_data (dataset, entity_id, host_bitmap, encoding_type, valid_from, valid_to) + VALUES (?, ?, ?, ?, ?, ?) + ON DUPLICATE KEY UPDATE host_bitmap = VALUES(host_bitmap), encoding_type = VALUES(encoding_type), valid_to = VALUES(valid_to)`, + dataset, entityID, blob.Bytes, blob.Encoding, validFrom, validTo) if err != nil { log.Fatalf("insert hourly SCD row failed on %s hour %d: %v", validFrom, hour, err) } @@ -126,48 +208,134 @@ func backfillHourly(db *sql.DB, dataset string, days int, start time.Time, hostI return totalRows } -func backfillDaily(db *sql.DB, dataset string, days int, start time.Time, hostIDs []uint, entityIDs []string) int { +// backfillSnapshot models per-entity state-segment data shaped like what the +// production snapshot collector produces over time. For each entity: +// - Pick an initial host set with density in the dataset's typical range. +// - Walk day by day; on each day, with probability snapshotFlipsPerDayPerEntity, +// churn the set (drop ~churn% / add ~churn% of new hosts). +// - Each contiguous run of unchanged days is written as a single row. +// - The final segment per entity leaves valid_to at the sentinel (open), so +// the live collector compares against it on its next tick rather than +// opening a fresh row over the top. +func backfillSnapshot(db *sql.DB, dataset string, days int, start time.Time, hostIDs []uint, entityIDs []string) int { totalRows := 0 - minDensity, maxDensity := densityRange(dataset) - n := len(hostIDs) - for day := range days { - date := start.AddDate(0, 0, day) + type segment struct { + validFrom time.Time + active []uint + } - for _, entityID := range entityIDs { - density := minDensity + rand.Float64()*(maxDensity-minDensity) //nolint:gosec // dev data generator, not crypto - count := int(float64(n) * density) - if count == 0 { + for entityIdx, entityID := range entityIDs { + active := randomSubset(hostIDs, snapshotCardinality(len(hostIDs))) + + segments := []segment{{validFrom: start, active: active}} + for day := 1; day < days; day++ { + if rand.Float64() >= snapshotFlipsPerDayPerEntity { //nolint:gosec // dev data generator, not crypto continue } - active := make([]uint, count) - for i, idx := range rand.Perm(n)[:count] { - active[i] = hostIDs[idx] + active = churn(active, hostIDs, snapshotChurnFraction) + segments = append(segments, segment{ + validFrom: start.AddDate(0, 0, day), + active: active, + }) + } + + for i, seg := range segments { + validTo := scdOpenSentinel + if i+1 < len(segments) { + validTo = segments[i+1].validFrom } - blob := chart.HostIDsToBlob(active) - validFrom := date - validTo := date.AddDate(0, 0, 1) + blob := chart.HostIDsToBlob(seg.active) _, err := db.Exec( - `INSERT INTO host_scd_data (dataset, entity_id, host_bitmap, valid_from, valid_to) - VALUES (?, ?, ?, ?, ?) - ON DUPLICATE KEY UPDATE host_bitmap = VALUES(host_bitmap), valid_to = VALUES(valid_to)`, - dataset, entityID, blob, validFrom, validTo) + `INSERT INTO host_scd_data (dataset, entity_id, host_bitmap, encoding_type, valid_from, valid_to) + VALUES (?, ?, ?, ?, ?, ?) + ON DUPLICATE KEY UPDATE host_bitmap = VALUES(host_bitmap), encoding_type = VALUES(encoding_type), valid_to = VALUES(valid_to)`, + dataset, entityID, blob.Bytes, blob.Encoding, seg.validFrom, validTo) if err != nil { - log.Fatalf("insert daily SCD row failed on %s entity %q: %v", date, entityID, err) + log.Fatalf("insert snapshot SCD row failed for entity %q at %s: %v", entityID, seg.validFrom, err) } totalRows++ } - if (day+1)%5 == 0 || day == days-1 { - log.Printf(" day %d/%d (%s) — %d rows so far", - day+1, days, date.Format("2006-01-02"), totalRows) + if (entityIdx+1)%500 == 0 || entityIdx == len(entityIDs)-1 { + log.Printf(" entity %d/%d — %d rows so far", + entityIdx+1, len(entityIDs), totalRows) } } return totalRows } +// randomSubset returns a uniformly random `count`-sized subset of pool. If +// count >= len(pool), returns a shuffled clone of the entire pool. The result +// is a fresh slice that the caller can mutate. +func randomSubset(pool []uint, count int) []uint { + if count <= 0 { + return nil + } + if count >= len(pool) { + out := make([]uint, len(pool)) + copy(out, pool) + return out + } + out := make([]uint, count) + for i, idx := range rand.Perm(len(pool))[:count] { //nolint:gosec // dev data generator, not crypto + out[i] = pool[idx] + } + return out +} + +// churn produces a new host set from `prev` by dropping a `fraction` of its +// members and adding a `fraction` of currently-unaffected hosts from the pool. +// Cardinality stays roughly stable; identity shifts. Models the realistic CVE +// state-change pattern (some hosts patched, some new hosts discovered as +// affected) without inventing wholly new bitmaps. +func churn(prev, pool []uint, fraction float64) []uint { + prevSet := make(map[uint]struct{}, len(prev)) + for _, id := range prev { + prevSet[id] = struct{}{} + } + + dropCount := int(float64(len(prev)) * fraction) + if dropCount < 1 && len(prev) > 0 { + dropCount = 1 + } + addCount := dropCount + + // Drop: keep prev members not in the random dropout sample. If dropCount + // >= len(prev) we drop everything, leaving kept empty for the add step + // below to fill. + kept := make([]uint, 0, len(prev)) + if dropCount < len(prev) { + dropIdx := make(map[int]struct{}, dropCount) + for _, idx := range rand.Perm(len(prev))[:dropCount] { //nolint:gosec // dev data generator, not crypto + dropIdx[idx] = struct{}{} + } + for i, id := range prev { + if _, drop := dropIdx[i]; drop { + continue + } + kept = append(kept, id) + } + } + + // Add: walk a shuffled pool, picking hosts that aren't already in prev. + added := 0 + for _, idx := range rand.Perm(len(pool)) { //nolint:gosec // dev data generator, not crypto + if added >= addCount { + break + } + candidate := pool[idx] + if _, exists := prevSet[candidate]; exists { + continue + } + kept = append(kept, candidate) + added++ + } + return kept +} + // generateHourlyHosts returns a map of hour -> active host IDs for a single day. func generateHourlyHosts(dataset string, hostIDs []uint) map[int][]uint { minDensity, maxDensity := densityRange(dataset) diff --git a/tools/charts-collect/main.go b/tools/charts-collect/main.go index 321c91f3868..629eb1f32a3 100644 --- a/tools/charts-collect/main.go +++ b/tools/charts-collect/main.go @@ -25,7 +25,6 @@ package main import ( - "bytes" "database/sql" "encoding/json" "flag" @@ -36,6 +35,7 @@ import ( "strings" "time" + "github.com/RoaringBitmap/roaring" "github.com/fleetdm/fleet/v4/pkg/fleethttp" "github.com/fleetdm/fleet/v4/server/chart" _ "github.com/go-sql-driver/mysql" @@ -188,30 +188,36 @@ func collectUptime(api *apiClient, db *sql.DB) error { now := time.Now().UTC() bucketStart := now.Truncate(time.Hour) validTo := bucketStart.Add(time.Hour) - newBlob := chart.HostIDsToBlob(hostIDs) + merged := chart.NewBitmap(hostIDs) // OR with existing in-bucket bitmap (accumulate semantic). - var existing []byte + var existingBytes []byte + var existingEncoding uint8 err = db.QueryRow( - `SELECT host_bitmap FROM host_scd_data + `SELECT host_bitmap, encoding_type FROM host_scd_data WHERE dataset = 'uptime' AND entity_id = '' AND valid_from = ?`, bucketStart, - ).Scan(&existing) + ).Scan(&existingBytes, &existingEncoding) if err == nil { - newBlob = chart.BlobOR(existing, newBlob) + existing, decErr := chart.DecodeBitmap(chart.Blob{Bytes: existingBytes, Encoding: existingEncoding}) + if decErr != nil { + return fmt.Errorf("decode existing uptime bitmap: %w", decErr) + } + merged = chart.BlobOR(merged, existing) } + blob := chart.BitmapToBlob(merged) _, err = db.Exec( - `INSERT INTO host_scd_data (dataset, entity_id, host_bitmap, valid_from, valid_to) - VALUES ('uptime', '', ?, ?, ?) - ON DUPLICATE KEY UPDATE host_bitmap = VALUES(host_bitmap)`, - newBlob, bucketStart, validTo, + `INSERT INTO host_scd_data (dataset, entity_id, host_bitmap, encoding_type, valid_from, valid_to) + VALUES ('uptime', '', ?, ?, ?, ?) + ON DUPLICATE KEY UPDATE host_bitmap = VALUES(host_bitmap), encoding_type = VALUES(encoding_type)`, + blob.Bytes, blob.Encoding, bucketStart, validTo, ) if err != nil { return fmt.Errorf("write uptime SCD row: %w", err) } - log.Printf(" wrote uptime row: %d hosts, valid_from %s", chart.BlobPopcount(newBlob), bucketStart) + log.Printf(" wrote uptime row: %d hosts, valid_from %s", chart.BlobPopcount(merged), bucketStart) return nil } @@ -246,9 +252,9 @@ func collectCVE(api *apiClient, db *sql.DB) error { log.Printf(" %d unique CVEs found in %.1fs", len(cveHosts), time.Since(fetchStart).Seconds()) // Build the desired entity->bitmap map for the current hourly bucket. - entityBitmaps := make(map[string][]byte, len(cveHosts)) + entityBitmaps := make(map[string]*roaring.Bitmap, len(cveHosts)) for cve, hosts := range cveHosts { - entityBitmaps[cve] = chart.HostIDsToBlob(hosts) + entityBitmaps[cve] = chart.NewBitmap(hosts) } // Snapshot rows are keyed to 1h boundaries (not 24h) so that row transitions @@ -267,9 +273,9 @@ func collectCVE(api *apiClient, db *sql.DB) error { // reconcileSnapshot mirrors Datastore.recordSnapshot in // server/chart/internal/mysql/data.go. -func reconcileSnapshot(db *sql.DB, dataset string, entityBitmaps map[string][]byte, bucketStart time.Time) error { +func reconcileSnapshot(db *sql.DB, dataset string, entityBitmaps map[string]*roaring.Bitmap, bucketStart time.Time) error { rows, err := db.Query( - `SELECT entity_id, host_bitmap, valid_from + `SELECT entity_id, host_bitmap, encoding_type, valid_from FROM host_scd_data WHERE dataset = ? AND valid_to = ?`, dataset, scdOpenSentinel) @@ -277,42 +283,45 @@ func reconcileSnapshot(db *sql.DB, dataset string, entityBitmaps map[string][]by return fmt.Errorf("fetch open SCD rows: %w", err) } defer rows.Close() - type openRow struct { - bitmap []byte + type openEntry struct { + bitmap *roaring.Bitmap validFrom time.Time } - openByEntity := make(map[string]openRow) + openByEntity := make(map[string]openEntry) for rows.Next() { var entityID string - var bitmap []byte + var bitmapBytes []byte + var encoding uint8 var validFrom time.Time - if err := rows.Scan(&entityID, &bitmap, &validFrom); err != nil { + if err := rows.Scan(&entityID, &bitmapBytes, &encoding, &validFrom); err != nil { return fmt.Errorf("scan open SCD row: %w", err) } - openByEntity[entityID] = openRow{bitmap: bitmap, validFrom: validFrom} + rb, err := chart.DecodeBitmap(chart.Blob{Bytes: bitmapBytes, Encoding: encoding}) + if err != nil { + return fmt.Errorf("decode open bitmap for %q: %w", entityID, err) + } + openByEntity[entityID] = openEntry{bitmap: rb, validFrom: validFrom} } if err := rows.Err(); err != nil { return fmt.Errorf("iterate open SCD rows: %w", err) } var toClose []string - var toUpsert []struct { + type upsertRow struct { entityID string - bitmap []byte + blob chart.Blob } + var toUpsert []upsertRow for entityID, bitmap := range entityBitmaps { existing, hasOpen := openByEntity[entityID] - if hasOpen && bytes.Equal(existing.bitmap, bitmap) { + if hasOpen && existing.bitmap.Equals(bitmap) { continue } if hasOpen && existing.validFrom.Before(bucketStart) { toClose = append(toClose, entityID) } - toUpsert = append(toUpsert, struct { - entityID string - bitmap []byte - }{entityID, bitmap}) + toUpsert = append(toUpsert, upsertRow{entityID: entityID, blob: chart.BitmapToBlob(bitmap)}) } for entityID := range openByEntity { @@ -343,15 +352,15 @@ func reconcileSnapshot(db *sql.DB, dataset string, entityBitmaps map[string][]by batch := toUpsert[i:end] placeholders := make([]string, len(batch)) - args := make([]any, 0, len(batch)*4) + args := make([]any, 0, len(batch)*5) for j, r := range batch { - placeholders[j] = "(?, ?, ?, ?)" - args = append(args, dataset, r.entityID, r.bitmap, bucketStart) + placeholders[j] = "(?, ?, ?, ?, ?)" + args = append(args, dataset, r.entityID, r.blob.Bytes, r.blob.Encoding, bucketStart) } - // Concatenating hardcoded "(?,?,?,?)" placeholder strings, not user input. - stmt := `INSERT INTO host_scd_data (dataset, entity_id, host_bitmap, valid_from) VALUES ` + //nolint:gosec // G202 + // Concatenating hardcoded "(?,?,?,?,?)" placeholder strings, not user input. + stmt := `INSERT INTO host_scd_data (dataset, entity_id, host_bitmap, encoding_type, valid_from) VALUES ` + //nolint:gosec // G202 strings.Join(placeholders, ", ") + - ` ON DUPLICATE KEY UPDATE host_bitmap = VALUES(host_bitmap)` + ` ON DUPLICATE KEY UPDATE host_bitmap = VALUES(host_bitmap), encoding_type = VALUES(encoding_type)` if _, err := db.Exec(stmt, args...); err != nil { return fmt.Errorf("upsert rows: %w", err) }