From c5cb8f2d660671680e4bbfaf944872471c0424f7 Mon Sep 17 00:00:00 2001 From: Anton Kolesnikov Date: Mon, 18 Mar 2024 19:11:58 +0800 Subject: [PATCH 01/36] feat: strings encoding --- pkg/phlaredb/symdb/strings_encoding.go | 229 ++++++++++++++++++++ pkg/phlaredb/symdb/strings_encoding_test.go | 95 ++++++++ pkg/slices/slices.go | 11 +- 3 files changed, 330 insertions(+), 5 deletions(-) create mode 100644 pkg/phlaredb/symdb/strings_encoding.go create mode 100644 pkg/phlaredb/symdb/strings_encoding_test.go diff --git a/pkg/phlaredb/symdb/strings_encoding.go b/pkg/phlaredb/symdb/strings_encoding.go new file mode 100644 index 0000000000..d4312f8e8a --- /dev/null +++ b/pkg/phlaredb/symdb/strings_encoding.go @@ -0,0 +1,229 @@ +package symdb + +import ( + "encoding/binary" + "fmt" + "io" + "unsafe" + + "github.com/grafana/pyroscope/pkg/slices" +) + +// Almost all strings in profiles are very short, their length fits 8 bits. +// Strings larger than 65536 are not expected and are getting truncated. +// Typically, there are only 1-10 strings longer than 256 in a data set +// consisting of a few dozens of thousands of strings. +// +// A traditional var length encoding is rather wasteful in our case. +// Instead, we split the strings into blocks and use encoding that depends +// on the maximum length of the strings in the block. +// +// The output data starts with a header: number of strings, block size, +// number of blocks, and the block encoding map. In the map, each byte +// specifies the number of bits needed to decode the maximum value from +// that block, rounded up to the next power of two. Currently, the length +// value is either 8 bits or 16. +// +// Blocks of data follow after the header. Each block includes two parts: +// strings lengths array and strings data. + +const maxStringLen = 1<<16 - 1 + +type StringsEncoder struct { + w io.Writer + blockSize int + blocks []byte + buf []byte +} + +func NewStringsEncoder(w io.Writer) *StringsEncoder { return &StringsEncoder{w: w} } + +func (e *StringsEncoder) WriteStrings(strings []string) error { + if e.blockSize == 0 { + e.blockSize = 1 << 10 // 1k strings per block by default. + } + nb := (len(strings) + e.blockSize - 1) / e.blockSize + e.blocks = slices.GrowLen(e.blocks, nb) + var offset uint32 + var bi int + l := uint32(len(strings)) + for offset < l { + lo := offset + hi := offset + uint32(e.blockSize) + if x := uint32(len(strings)); hi > x { + hi = x + } + e.blocks[bi] = e.blockEncoding(strings[lo:hi]) + offset = hi + bi++ + } + if err := e.writeHeader(strings); err != nil { + return err + } + // Next we write string lengths and values in blocks. + e.buf = slices.GrowLen(e.buf, e.blockSize*2) // Up to 2 bytes per string. + for i, b := range e.blocks { + // e.buf = e.buf[:0] + lo := i * e.blockSize + hi := lo + e.blockSize + if x := len(strings); hi > x { + hi = x + } + bs := strings[lo:hi] + switch b { + case 8: + for j, s := range bs { + e.buf[j] = byte(len(s)) + } + case 16: + for j, s := range bs { + // binary.LittleEndian.PutUint16. + e.buf[j*2] = byte(len(s)) + e.buf[j*2+1] = byte(len(s) >> 8) + } + default: + panic("bug: unexpected block size") + } + if _, err := e.w.Write(e.buf[:len(bs)*int(b)/8]); err != nil { + return err + } + for _, s := range bs { + if len(s) > maxStringLen { + s = s[:maxStringLen] + } + if _, err := e.w.Write(*((*[]byte)(unsafe.Pointer(&s)))); err != nil { + return err + } + } + } + return nil +} + +func (e *StringsEncoder) writeHeader(strings []string) (err error) { + e.buf = slices.GrowLen(e.buf, 12) + binary.LittleEndian.PutUint32(e.buf[0:4], uint32(len(strings))) + binary.LittleEndian.PutUint32(e.buf[4:8], uint32(e.blockSize)) + binary.LittleEndian.PutUint32(e.buf[8:12], uint32(len(e.blocks))) + if _, err = e.w.Write(e.buf); err != nil { + return err + } + _, err = e.w.Write(e.blocks) + return err +} + +func (e *StringsEncoder) blockEncoding(b []string) byte { + var x uint16 + for _, s := range b { + x |= uint16(len(s)) >> 8 + } + if x > 0 { + return 16 + } + return 8 +} + +func (e *StringsEncoder) Reset() { + e.buf = e.buf[:0] + e.blocks = e.blocks[:0] + e.blockSize = 0 + e.w = nil +} + +type StringsDecoder struct { + r io.Reader + stringsLen uint32 + blocksLen uint32 + blockSize uint32 + blocks []byte + buf []byte +} + +func NewStringsDecoder(r io.Reader) *StringsDecoder { return &StringsDecoder{r: r} } + +func (d *StringsDecoder) readHeader() (err error) { + d.buf = slices.GrowLen(d.buf, 12) + if _, err = io.ReadFull(d.r, d.buf); err != nil { + return err + } + d.stringsLen = binary.LittleEndian.Uint32(d.buf[0:4]) + d.blockSize = binary.LittleEndian.Uint32(d.buf[4:8]) + d.blocksLen = binary.LittleEndian.Uint32(d.buf[8:12]) + // Sanity checks are needed as we process the stream data + // before verifying the check sum. + if d.blocksLen > 1<<20 || d.stringsLen > 1<<20 || d.blockSize > 1<<20 { + return fmt.Errorf("malformed header") + } + d.blocks = slices.GrowLen(d.blocks, int(d.blocksLen)) + _, err = io.ReadFull(d.r, d.blocks) + return err +} + +func (d *StringsDecoder) StringsLen() (int, error) { + if err := d.readHeader(); err != nil { + return 0, err + } + return int(d.stringsLen), nil +} + +func (d *StringsDecoder) ReadStrings(dst []string) (err error) { + for i := 0; i < len(d.blocks); i++ { + bs := d.blockSize + if i == len(d.blocks)-1 && d.stringsLen%d.blockSize > 0 { + bs = d.stringsLen % d.blockSize + } + switch d.blocks[i] { + case 8: + err = d.readStrings8(i, int(bs), dst) + case 16: + err = d.readStrings16(i, int(bs), dst) + default: + err = fmt.Errorf("unknown block encoding") + } + if err != nil { + return err + } + } + return nil +} + +func (d *StringsDecoder) readStrings8(idx, length int, dst []string) (err error) { + d.buf = slices.GrowLen(d.buf, length) // 1 byte per string. + if _, err = io.ReadFull(d.r, d.buf); err != nil { + return err + } + offset := int(d.blockSize) * idx + for i, l := range d.buf { + s := make([]byte, l) // Up to 256 bytes. + if _, err = io.ReadFull(d.r, s); err != nil { + return err + } + dst[offset+i] = *(*string)(unsafe.Pointer(&s)) + } + return err +} + +func (d *StringsDecoder) readStrings16(idx, length int, dst []string) (err error) { + d.buf = slices.GrowLen(d.buf, length*2) // 2 bytes per string. + if _, err = io.ReadFull(d.r, d.buf); err != nil { + return err + } + offset := int(d.blockSize) * idx + for i := 0; i < len(d.buf); i += 2 { + l := uint16(d.buf[i]) | uint16(d.buf[i+1])<<8 + s := make([]byte, l) // Up to 65536 bytes. + if _, err = io.ReadFull(d.r, s); err != nil { + return err + } + dst[offset+i/2] = *(*string)(unsafe.Pointer(&s)) + } + return err +} + +func (d *StringsDecoder) Reset() { + d.buf = d.buf[:0] + d.blocks = d.blocks[:0] + d.blockSize = 0 + d.blocksLen = 0 + d.stringsLen = 0 + d.r = nil +} diff --git a/pkg/phlaredb/symdb/strings_encoding_test.go b/pkg/phlaredb/symdb/strings_encoding_test.go new file mode 100644 index 0000000000..519f719df8 --- /dev/null +++ b/pkg/phlaredb/symdb/strings_encoding_test.go @@ -0,0 +1,95 @@ +package symdb + +import ( + "bufio" + "bytes" + "strings" + "testing" + + "github.com/stretchr/testify/require" +) + +func Test_StringsEncoding(t *testing.T) { + type testCase struct { + description string + strings []string + blockSize int + } + + testCases := []testCase{ + { + description: "empty", + strings: []string{}, + }, + { + description: "less than block size", + strings: []string{ + "a", + "b", + }, + blockSize: 4, + }, + { + description: "exact block size", + strings: []string{ + "a", + "bc", + "cde", + "def", + }, + blockSize: 4, + }, + { + description: "greater than block size", + strings: []string{ + "a", + "bc", + "cde", + "def", + "e", + }, + blockSize: 4, + }, + { + description: "mixed encoding", + strings: []string{ + "a", + "bcd", + strings.Repeat("e", 256), + }, + blockSize: 4, + }, + { + description: "mixed encoding exact block", + strings: []string{ + "a", + "b", + "c", + "d", + strings.Repeat("e", 256), + strings.Repeat("f", 256), + strings.Repeat("j", 256), + strings.Repeat("h", 256), + }, + blockSize: 4, + }, + } + + for _, tc := range testCases { + tc := tc + t.Run(tc.description, func(t *testing.T) { + var output bytes.Buffer + e := NewStringsEncoder(&output) + if tc.blockSize > 0 { + e.blockSize = tc.blockSize + } + require.NoError(t, e.WriteStrings(tc.strings)) + d := NewStringsDecoder(bufio.NewReader(&output)) + n, err := d.StringsLen() + require.NoError(t, err) + out := make([]string, n) + require.NoError(t, d.ReadStrings(out)) + require.Equal(t, tc.strings, out) + }) + } +} diff --git a/pkg/slices/slices.go b/pkg/slices/slices.go index 70de8711f4..e574ccbb02 100644 --- a/pkg/slices/slices.go +++ b/pkg/slices/slices.go @@ -1,5 +1,9 @@ package slices +import ( + "golang.org/x/exp/slices" +) + // RemoveInPlace removes all elements from a slice that match the given predicate. // Does not allocate a new slice. func RemoveInPlace[T any](collection []T, predicate func(T, int) bool) []T { @@ -27,9 +31,6 @@ func Clear[S ~[]E, E any](s S) { } func GrowLen[S ~[]E, E any](s S, n int) S { - if cap(s) < n { - s = make([]E, n) - } - s = s[:n] - return s + s = s[:0] + return slices.Grow(s, n)[:n] } From 76c31b42ba88f842395fdbd283bb31dc55eef6b2 Mon Sep 17 00:00:00 2001 From: Anton Kolesnikov Date: Wed, 20 Mar 2024 13:02:36 +0800 Subject: [PATCH 02/36] simplify string encoding heuristics --- .../symdb/{strings_encoding.go => encoding_strings.go} | 8 +++----- ...{strings_encoding_test.go => encoding_strings_test.go} | 0 2 files changed, 3 insertions(+), 5 deletions(-) rename pkg/phlaredb/symdb/{strings_encoding.go => encoding_strings.go} (98%) rename pkg/phlaredb/symdb/{strings_encoding_test.go => encoding_strings_test.go} (100%) diff --git a/pkg/phlaredb/symdb/strings_encoding.go b/pkg/phlaredb/symdb/encoding_strings.go similarity index 98% rename from pkg/phlaredb/symdb/strings_encoding.go rename to pkg/phlaredb/symdb/encoding_strings.go index d4312f8e8a..ac69b33fdd 100644 --- a/pkg/phlaredb/symdb/strings_encoding.go +++ b/pkg/phlaredb/symdb/encoding_strings.go @@ -112,12 +112,10 @@ func (e *StringsEncoder) writeHeader(strings []string) (err error) { } func (e *StringsEncoder) blockEncoding(b []string) byte { - var x uint16 for _, s := range b { - x |= uint16(len(s)) >> 8 - } - if x > 0 { - return 16 + if len(s) > 255 { + return 16 + } } return 8 } diff --git a/pkg/phlaredb/symdb/strings_encoding_test.go b/pkg/phlaredb/symdb/encoding_strings_test.go similarity index 100% rename from pkg/phlaredb/symdb/strings_encoding_test.go rename to pkg/phlaredb/symdb/encoding_strings_test.go From c716fa64a1dddf1284afcb387ef86002c1d0fe56 Mon Sep 17 00:00:00 2001 From: Anton Kolesnikov Date: Wed, 20 Mar 2024 20:34:21 +0800 Subject: [PATCH 03/36] locations encoding --- pkg/phlaredb/symdb/encoding_locations.go | 176 +++++++++++++++++++++++ 1 file changed, 176 insertions(+) create mode 100644 pkg/phlaredb/symdb/encoding_locations.go diff --git a/pkg/phlaredb/symdb/encoding_locations.go b/pkg/phlaredb/symdb/encoding_locations.go new file mode 100644 index 0000000000..bb7b6b50e5 --- /dev/null +++ b/pkg/phlaredb/symdb/encoding_locations.go @@ -0,0 +1,176 @@ +package symdb + +import ( + "bytes" + "encoding/binary" + "io" + "unsafe" + + "github.com/parquet-go/parquet-go/encoding/delta" + + v1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1" + "github.com/grafana/pyroscope/pkg/slices" +) + +// https://parquet.apache.org/docs/file-format/data-pages/encodings/#delta-encoding-delta_binary_packed--5 + +type LocationsEncoder struct { + w io.Writer +} + +type locationsBlock struct { + locsLen uint32 + + mapping []int32 + function []int32 + line []int32 + // Optional. + count []int32 + lines []int32 + addr []int64 + folded []bool + + tmp []byte + buf bytes.Buffer + + hasLines bool + hasAddr bool + hasFolded bool +} + +func (lb *locationsBlock) encode(w io.Writer, locations []v1.InMemoryLocation) (int64, error) { + lb.reset(len(locations)) + var addr int64 + for i, loc := range locations { + lb.mapping[i] = int32(loc.MappingId) + lb.function[i] = int32(loc.Line[0].FunctionId) + lb.line[i] = loc.Line[0].Line + lb.count[i] = int32(len(loc.Line) - 1) + // Append lines but the first one. + for j := 1; j < len(loc.Line); j++ { + line := loc.Line[j] + lb.lines = append(lb.lines, line.Line, int32(line.FunctionId)) + } + addr |= int64(loc.Address) + lb.addr[i] = int64(loc.Address) + lb.hasFolded = lb.hasFolded || loc.IsFolded + lb.folded[i] = loc.IsFolded + } + lb.hasLines = len(lb.lines) > 0 + lb.hasAddr = addr > 0 + h := locationsBlockHeader{ + LocationsLen: lb.locsLen, + } + + var enc delta.BinaryPackedEncoding + lb.tmp, _ = enc.EncodeInt32(lb.tmp, lb.mapping) + h.MappingSize = uint32(len(lb.tmp)) + lb.buf.Write(lb.tmp) + lb.tmp, _ = enc.EncodeInt32(lb.tmp, lb.function) + h.FunctionSize = uint32(len(lb.tmp)) + lb.buf.Write(lb.tmp) + lb.tmp, _ = enc.EncodeInt32(lb.tmp, lb.line) + h.LineSize = uint32(len(lb.tmp)) + lb.buf.Write(lb.tmp) + if lb.hasLines { + lb.tmp, _ = enc.EncodeInt32(lb.tmp, lb.count) + h.CountSize = uint32(len(lb.tmp)) + lb.buf.Write(lb.tmp) + lb.tmp, _ = enc.EncodeInt32(lb.tmp, lb.lines) + h.LinesSize = uint32(len(lb.tmp)) + lb.buf.Write(lb.tmp) + } + if lb.hasAddr { + lb.tmp, _ = enc.EncodeInt64(lb.tmp, lb.addr) + h.AddrSize = uint32(len(lb.tmp)) + lb.buf.Write(lb.tmp) + } + if lb.hasFolded { + // TODO + } + + lb.tmp = slices.GrowLen(lb.tmp, locationsBlockHeaderSize) + h.marshal(lb.tmp) + n, err := w.Write(lb.tmp) + if err != nil { + return int64(n), err + } + m, err := lb.buf.WriteTo(w) + return m + int64(n), err +} + +func (lb *locationsBlock) reset(locations int) { + // Actual estimate is ~6 bytes per location. + // In a large data set, the most expensive member + // is FunctionID, and it's about 2 bytes per location. + lb.buf.Reset() + lb.buf.Grow(locations * 8) + *lb = locationsBlock{ + locsLen: uint32(locations), + + mapping: slices.GrowLen(lb.mapping, locations), + function: slices.GrowLen(lb.function, locations), + line: slices.GrowLen(lb.line, locations), + + count: slices.GrowLen(lb.count, locations), + lines: lb.lines[:0], // Appended. + addr: slices.GrowLen(lb.addr, locations), + folded: slices.GrowLen(lb.folded, locations), + + buf: lb.buf, + tmp: slices.GrowLen(lb.tmp, 2*locations), + } +} + +const locationsBlockHeaderSize = int(unsafe.Sizeof(locationsBlockHeader{})) + +type locationsBlockHeader struct { + LocationsLen uint32 + MappingSize uint32 + FunctionSize uint32 + LineSize uint32 + CountSize uint32 + LinesSize uint32 + AddrSize uint32 + IsFoldedSize uint32 +} + +func (h *locationsBlockHeader) marshal(b []byte) { + binary.LittleEndian.PutUint32(b[0:4], h.LocationsLen) + binary.LittleEndian.PutUint32(b[4:8], h.MappingSize) + binary.LittleEndian.PutUint32(b[8:12], h.FunctionSize) + binary.LittleEndian.PutUint32(b[12:16], h.LineSize) + binary.LittleEndian.PutUint32(b[16:20], h.CountSize) + binary.LittleEndian.PutUint32(b[20:24], h.LinesSize) + binary.LittleEndian.PutUint32(b[24:28], h.AddrSize) + binary.LittleEndian.PutUint32(b[28:32], h.IsFoldedSize) +} + +func (lb *locationsBlock) locations() int { return int(lb.locsLen) } + +func (lb *locationsBlock) decode(locations []v1.InMemoryLocation) { + lines := make([]v1.InMemoryLine, len(lb.function)+len(lb.lines)/2) + var j int32 // Offset within the lines slice. + var o int32 // Offset within the encoded lines slice. + for i := 0; i < len(locations); i++ { + ll := lb.count[i] + 1 + locations[i].Line = lines[j : j+ll] + locations[i].Line[0].Line = lb.line[i] + locations[i].Line[0].FunctionId = uint32(lb.function[i]) + locations[i].MappingId = uint32(lb.mapping[i]) + j += ll + for l := int32(1); l < ll; l++ { + locations[i].Line[l].FunctionId = uint32(lb.lines[o+1]) + locations[i].Line[l].Line = lb.lines[o] + o += 2 + } + } +} + +func NewLocationsEncoder(w io.Writer) *LocationsEncoder { + return &LocationsEncoder{w: w} +} + +func (e *LocationsEncoder) EncodeLocations(locations []v1.InMemoryLocation) error { + return nil +} From 42342157ce2ef9d1580814e5c929bde904680e35 Mon Sep 17 00:00:00 2001 From: Anton Kolesnikov Date: Thu, 21 Mar 2024 20:29:45 +0800 Subject: [PATCH 04/36] locations wire format --- pkg/phlaredb/symdb/encoding_locations.go | 458 +++++++++++++++++------ pkg/phlaredb/symdb/encoding_strings.go | 20 +- 2 files changed, 355 insertions(+), 123 deletions(-) diff --git a/pkg/phlaredb/symdb/encoding_locations.go b/pkg/phlaredb/symdb/encoding_locations.go index bb7b6b50e5..87df17d7c2 100644 --- a/pkg/phlaredb/symdb/encoding_locations.go +++ b/pkg/phlaredb/symdb/encoding_locations.go @@ -1,8 +1,10 @@ package symdb import ( + "bufio" "bytes" "encoding/binary" + "fmt" "io" "unsafe" @@ -10,167 +12,397 @@ import ( v1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1" "github.com/grafana/pyroscope/pkg/slices" + "github.com/grafana/pyroscope/pkg/util/math" ) // https://parquet.apache.org/docs/file-format/data-pages/encodings/#delta-encoding-delta_binary_packed--5 type LocationsEncoder struct { w io.Writer + e locationsBlockEncoder + + blockSize int + locations int + + buf []byte +} + +const ( + maxLocationLines = 255 + defaultLocationsBlockSize = 1 << 10 +) + +func NewLocationsEncoder(w io.Writer) *LocationsEncoder { + return &LocationsEncoder{w: w} +} + +func (e *LocationsEncoder) EncodeLocations(locations []v1.InMemoryLocation) error { + if e.blockSize == 0 { + e.blockSize = defaultLocationsBlockSize + } + e.locations = len(locations) + if err := e.writeHeader(); err != nil { + return err + } + for i := 0; i < len(locations); i += e.blockSize { + block := locations[i:math.Min(i+e.blockSize, len(locations))] + if _, err := e.e.encode(e.w, block); err != nil { + return err + } + } + return nil +} + +func (e *LocationsEncoder) writeHeader() (err error) { + e.buf = slices.GrowLen(e.buf, 8) + binary.LittleEndian.PutUint32(e.buf[0:4], uint32(e.locations)) + binary.LittleEndian.PutUint32(e.buf[4:8], uint32(e.blockSize)) + _, err = e.w.Write(e.buf) + return err +} + +type LocationsDecoder struct { + r io.Reader + d locationsBlockDecoder + + blockSize uint32 + locations uint32 + + buf []byte } -type locationsBlock struct { - locsLen uint32 +func NewLocationsDecoder(r io.Reader) *LocationsDecoder { return &LocationsDecoder{r: r} } + +func (d *LocationsDecoder) LocationsLen() (int, error) { + if err := d.readHeader(); err != nil { + return 0, err + } + return int(d.locations), nil +} - mapping []int32 - function []int32 - line []int32 +func (d *LocationsDecoder) readHeader() (err error) { + d.buf = slices.GrowLen(d.buf, 8) + if _, err = io.ReadFull(d.r, d.buf); err != nil { + return err + } + d.locations = binary.LittleEndian.Uint32(d.buf[0:4]) + d.blockSize = binary.LittleEndian.Uint32(d.buf[4:8]) + // Sanity checks are needed as we process the stream data + // before verifying the check sum. + if d.locations > 1<<20 || d.blockSize > 1<<20 { + return ErrInvalidSize + } + return nil +} + +func (d *LocationsDecoder) DecodeLocations(locations []v1.InMemoryLocation) error { + blocks := int((d.locations + d.blockSize - 1) / d.blockSize) + // It's expected that the reader is already buffered. + r, ok := d.r.(*bufio.Reader) + if !ok { + bufSize := int(d.blockSize * 16) // 16 bytes per location. + r = bufio.NewReaderSize(d.r, bufSize) + } + for i := 0; i < blocks; i++ { + lo := i * int(d.blockSize) + hi := math.Min(lo+int(d.blockSize), int(d.locations)) + block := locations[lo:hi] + if err := d.d.decode(r, block); err != nil { + return err + } + } + return nil +} + +type locationsBlockEncoder struct { + header locationsBlockHeader + + mapping []int32 + // Assuming there is no locations with more than 255 lines. + // We could even use a nibble (4 bits), but there are locations + // with 10 and more functions, therefore there is a change that + // capacity of 2^4 is not enough in all cases. + lineCount []byte + lines []int32 // Optional. - count []int32 - lines []int32 addr []int64 folded []bool tmp []byte buf bytes.Buffer - hasLines bool - hasAddr bool hasFolded bool } -func (lb *locationsBlock) encode(w io.Writer, locations []v1.InMemoryLocation) (int64, error) { - lb.reset(len(locations)) +const locationsBlockHeaderSize = int(unsafe.Sizeof(locationsBlockHeader{})) + +type locationsBlockHeader struct { + LocationsLen uint32 // Number of locations + MappingSize uint32 // Size of the encoded slice of mapping_ids + LinesLen uint32 // Number of lines per location + LinesSize uint32 // Size of the encoded lines + // Optional, might be empty. + AddrSize uint32 // Size of the encoded slice of addresses + IsFoldedSize uint32 // Size of the encoded slice of is_folded +} + +// isValid reports whether the header contains sane values. +// This is important as the block might be read before the +// checksum validation. +func (h *locationsBlockHeader) isValid() bool { + return h.LocationsLen > 0 && h.LocationsLen < 1<<20 && + h.MappingSize > 0 && h.MappingSize < 1<<20 && + h.LinesLen > 0 && h.LinesLen < 1<<20 && + h.LinesSize > 0 && h.LinesSize < 1<<20 && + h.AddrSize < 1<<20 && + h.IsFoldedSize < 1<<20 +} + +func (h *locationsBlockHeader) marshal(b []byte) { + binary.LittleEndian.PutUint32(b[0:4], h.LocationsLen) + binary.LittleEndian.PutUint32(b[4:8], h.MappingSize) + binary.LittleEndian.PutUint32(b[8:12], h.LinesLen) + binary.LittleEndian.PutUint32(b[12:16], h.LinesSize) + binary.LittleEndian.PutUint32(b[16:20], h.AddrSize) + binary.LittleEndian.PutUint32(b[20:24], h.IsFoldedSize) +} + +func (h *locationsBlockHeader) unmarshal(b []byte) { + h.LocationsLen = binary.LittleEndian.Uint32(b[0:4]) + h.MappingSize = binary.LittleEndian.Uint32(b[4:8]) + h.LinesLen = binary.LittleEndian.Uint32(b[8:12]) + h.LinesSize = binary.LittleEndian.Uint32(b[12:16]) + h.AddrSize = binary.LittleEndian.Uint32(b[16:20]) + h.IsFoldedSize = binary.LittleEndian.Uint32(b[20:24]) +} + +func (e *locationsBlockEncoder) encode(w io.Writer, locations []v1.InMemoryLocation) (int64, error) { + e.initWrite(len(locations)) var addr int64 for i, loc := range locations { - lb.mapping[i] = int32(loc.MappingId) - lb.function[i] = int32(loc.Line[0].FunctionId) - lb.line[i] = loc.Line[0].Line - lb.count[i] = int32(len(loc.Line) - 1) + e.mapping[i] = int32(loc.MappingId) + e.lineCount[i] = byte(len(loc.Line)) // Append lines but the first one. - for j := 1; j < len(loc.Line); j++ { - line := loc.Line[j] - lb.lines = append(lb.lines, line.Line, int32(line.FunctionId)) + for j := 0; j < len(loc.Line) && j < maxLocationLines; j++ { + e.lines = append(e.lines, + int32(loc.Line[j].FunctionId), + loc.Line[j].Line) } addr |= int64(loc.Address) - lb.addr[i] = int64(loc.Address) - lb.hasFolded = lb.hasFolded || loc.IsFolded - lb.folded[i] = loc.IsFolded - } - lb.hasLines = len(lb.lines) > 0 - lb.hasAddr = addr > 0 - h := locationsBlockHeader{ - LocationsLen: lb.locsLen, + e.addr[i] = int64(loc.Address) + e.hasFolded = e.hasFolded || loc.IsFolded + e.folded[i] = loc.IsFolded } + // Mapping and line count per location. var enc delta.BinaryPackedEncoding - lb.tmp, _ = enc.EncodeInt32(lb.tmp, lb.mapping) - h.MappingSize = uint32(len(lb.tmp)) - lb.buf.Write(lb.tmp) - lb.tmp, _ = enc.EncodeInt32(lb.tmp, lb.function) - h.FunctionSize = uint32(len(lb.tmp)) - lb.buf.Write(lb.tmp) - lb.tmp, _ = enc.EncodeInt32(lb.tmp, lb.line) - h.LineSize = uint32(len(lb.tmp)) - lb.buf.Write(lb.tmp) - if lb.hasLines { - lb.tmp, _ = enc.EncodeInt32(lb.tmp, lb.count) - h.CountSize = uint32(len(lb.tmp)) - lb.buf.Write(lb.tmp) - lb.tmp, _ = enc.EncodeInt32(lb.tmp, lb.lines) - h.LinesSize = uint32(len(lb.tmp)) - lb.buf.Write(lb.tmp) - } - if lb.hasAddr { - lb.tmp, _ = enc.EncodeInt64(lb.tmp, lb.addr) - h.AddrSize = uint32(len(lb.tmp)) - lb.buf.Write(lb.tmp) - } - if lb.hasFolded { - // TODO - } - - lb.tmp = slices.GrowLen(lb.tmp, locationsBlockHeaderSize) - h.marshal(lb.tmp) - n, err := w.Write(lb.tmp) + e.tmp, _ = enc.EncodeInt32(e.tmp, e.mapping) + e.header.MappingSize = uint32(len(e.tmp)) + e.buf.Write(e.tmp) + // Line count size and length is deterministic. + e.buf.Write(e.lineCount) // Without any encoding. + + // Lines slice size and length (in lines, not int32s). + e.tmp, _ = enc.EncodeInt32(e.tmp, e.lines) + e.header.LinesLen = uint32(len(e.lines) / 2) + e.header.LinesSize = uint32(len(e.tmp)) + e.buf.Write(e.tmp) + + if addr > 0 { + e.tmp, _ = enc.EncodeInt64(e.tmp, e.addr) + e.header.AddrSize = uint32(len(e.tmp)) + e.buf.Write(e.tmp) + } + + if e.hasFolded { + e.tmp = slices.GrowLen(e.tmp, len(e.folded)/8) + encodeBoolean(e.tmp, e.folded) + e.header.IsFoldedSize = uint32(len(e.tmp)) + e.buf.Write(e.tmp) + } + + e.tmp = slices.GrowLen(e.tmp, locationsBlockHeaderSize) + e.header.marshal(e.tmp) + n, err := w.Write(e.tmp) if err != nil { return int64(n), err } - m, err := lb.buf.WriteTo(w) + m, err := e.buf.WriteTo(w) return m + int64(n), err } -func (lb *locationsBlock) reset(locations int) { +func (e *locationsBlockEncoder) initWrite(locations int) { // Actual estimate is ~6 bytes per location. // In a large data set, the most expensive member // is FunctionID, and it's about 2 bytes per location. - lb.buf.Reset() - lb.buf.Grow(locations * 8) - *lb = locationsBlock{ - locsLen: uint32(locations), - - mapping: slices.GrowLen(lb.mapping, locations), - function: slices.GrowLen(lb.function, locations), - line: slices.GrowLen(lb.line, locations), + e.buf.Reset() + e.buf.Grow(locations * 8) + *e = locationsBlockEncoder{ + header: locationsBlockHeader{LocationsLen: uint32(locations)}, - count: slices.GrowLen(lb.count, locations), - lines: lb.lines[:0], // Appended. - addr: slices.GrowLen(lb.addr, locations), - folded: slices.GrowLen(lb.folded, locations), + mapping: slices.GrowLen(e.mapping, locations), + lineCount: slices.GrowLen(e.lineCount, locations), + lines: e.lines[:0], // Appendable. + addr: slices.GrowLen(e.addr, locations), + folded: slices.GrowLen(e.folded, locations), - buf: lb.buf, - tmp: slices.GrowLen(lb.tmp, 2*locations), + buf: e.buf, + tmp: slices.GrowLen(e.tmp, 2*locations), } } -const locationsBlockHeaderSize = int(unsafe.Sizeof(locationsBlockHeader{})) +type locationsBlockDecoder struct { + header locationsBlockHeader -type locationsBlockHeader struct { - LocationsLen uint32 - MappingSize uint32 - FunctionSize uint32 - LineSize uint32 - CountSize uint32 - LinesSize uint32 - AddrSize uint32 - IsFoldedSize uint32 + mappings []int32 + lineCount []byte + lines []int32 + + address []int64 + folded []bool + + tmp []byte } -func (h *locationsBlockHeader) marshal(b []byte) { - binary.LittleEndian.PutUint32(b[0:4], h.LocationsLen) - binary.LittleEndian.PutUint32(b[4:8], h.MappingSize) - binary.LittleEndian.PutUint32(b[8:12], h.FunctionSize) - binary.LittleEndian.PutUint32(b[12:16], h.LineSize) - binary.LittleEndian.PutUint32(b[16:20], h.CountSize) - binary.LittleEndian.PutUint32(b[20:24], h.LinesSize) - binary.LittleEndian.PutUint32(b[24:28], h.AddrSize) - binary.LittleEndian.PutUint32(b[28:32], h.IsFoldedSize) -} - -func (lb *locationsBlock) locations() int { return int(lb.locsLen) } - -func (lb *locationsBlock) decode(locations []v1.InMemoryLocation) { - lines := make([]v1.InMemoryLine, len(lb.function)+len(lb.lines)/2) - var j int32 // Offset within the lines slice. - var o int32 // Offset within the encoded lines slice. - for i := 0; i < len(locations); i++ { - ll := lb.count[i] + 1 - locations[i].Line = lines[j : j+ll] - locations[i].Line[0].Line = lb.line[i] - locations[i].Line[0].FunctionId = uint32(lb.function[i]) - locations[i].MappingId = uint32(lb.mapping[i]) - j += ll - for l := int32(1); l < ll; l++ { - locations[i].Line[l].FunctionId = uint32(lb.lines[o+1]) - locations[i].Line[l].Line = lb.lines[o] - o += 2 +func (d *locationsBlockDecoder) readHeader(r io.Reader) error { + d.tmp = slices.GrowLen(d.tmp, locationsBlockHeaderSize) + if _, err := io.ReadFull(r, d.tmp); err != nil { + return nil + } + d.header.unmarshal(d.tmp) + if !d.header.isValid() { + return ErrInvalidSize + } + return nil +} + +func (d *locationsBlockDecoder) decode(r *bufio.Reader, locations []v1.InMemoryLocation) error { + if err := d.readHeader(r); err != nil { + return err + } + if d.header.LocationsLen > uint32(len(locations)) { + return fmt.Errorf("locations buffer is too short") + } + + var enc delta.BinaryPackedEncoding + // First we decode mapping_id and assign them to locations. + buf, err := r.Peek(int(d.header.MappingSize)) + if err != nil { + return err + } + d.mappings = slices.GrowLen(d.mappings, int(d.header.LocationsLen)) + d.mappings, err = enc.DecodeInt32(d.mappings, buf) + if err != nil { + return err + } + _, _ = r.Discard(len(buf)) + + // Line count per location. + // One byte per location. + buf, err = r.Peek(int(d.header.LocationsLen)) + if err != nil { + return err + } + d.lineCount = slices.GrowLen(d.lineCount, int(d.header.LocationsLen)) + copy(d.lineCount, buf) + _, _ = r.Discard(len(buf)) + + // Lines. A single slice backs all the location line + // sub-slices. But it has to be allocated as we can't + // reference d.lines, which is reusable. + lines := make([]v1.InMemoryLine, d.header.LinesLen) + // Unlike other members, d.header.LinesSize potentially + // might be too big to fit into the reader's buffer. + // This is not expected, but we have to handle it in + // a graceful way. + if r.Size() > int(d.header.LinesSize) { + buf, err = r.Peek(int(d.header.LinesSize)) + if err != nil { + return err + } + } else { + buf = make([]byte, int(d.header.LinesSize)) + if _, err = io.ReadFull(r, buf); err != nil { + return err + } + } + d.lines = slices.GrowLen(d.lines, int(d.header.LinesLen)) + d.lines, err = enc.DecodeInt32(d.lines, buf) + if err != nil { + return err + } + copy(lines, *(*[]v1.InMemoryLine)(unsafe.Pointer(&d.lines))) + if r.Size() > int(d.header.LinesSize) { + // Advance the buffer offset, if we haven't read from it. + // Note that this invalidates buf, therefore it can only + // be done after it was decoded. + _, _ = r.Discard(len(buf)) + } + + // In most cases we end up here. + if d.header.AddrSize == 0 && d.header.IsFoldedSize == 0 { + var o int // Offset within the lines slice. + for i := uint32(0); i < d.header.LocationsLen; i++ { + locations[i].MappingId = uint32(d.mappings[i]) + n := o + int(d.lineCount[i]) + locations[i].Line = lines[o:n] + o = n + } + return nil + } + + // Otherwise, inspect all the optional fields. + if int(d.header.AddrSize) > 0 { + buf, err = r.Peek(int(d.header.AddrSize)) + if err != nil { + return err + } + d.address = slices.GrowLen(d.address, int(d.header.LocationsLen)) + d.address, err = enc.DecodeInt64(d.address, buf) + if err != nil { + return err + } + _, _ = r.Discard(len(buf)) + } + if int(d.header.IsFoldedSize) > 0 { + buf, err = r.Peek(int(d.header.IsFoldedSize)) + if err != nil { + return err } + d.folded = slices.GrowLen(d.folded, int(d.header.LocationsLen)) + decodeBoolean(d.folded, buf) + _, _ = r.Discard(len(buf)) } + + var o int // Offset within the lines slice. + for i := uint32(0); i < d.header.LocationsLen; i++ { + locations[i].MappingId = uint32(d.mappings[i]) + n := o + int(d.lineCount[i]) + locations[i].Line = lines[o:n] + o = n + locations[i].Address = uint64(d.address[i]) + locations[i].IsFolded = d.folded[i] + } + + return nil } -func NewLocationsEncoder(w io.Writer) *LocationsEncoder { - return &LocationsEncoder{w: w} +func encodeBoolean(dst []byte, src []bool) { + for i := range dst { + dst[i] = 0 + } + for i, b := range src { + if b { + dst[i>>3] |= 1 << i & 7 + } + } } -func (e *LocationsEncoder) EncodeLocations(locations []v1.InMemoryLocation) error { - return nil +func decodeBoolean(dst []bool, src []byte) { + for i := range dst { + dst[i] = false + } + for i := range dst { + dst[i] = src[i>>3]&(1< 1<<20 || d.stringsLen > 1<<20 || d.blockSize > 1<<20 { - return fmt.Errorf("malformed header") + return ErrInvalidSize } d.blocks = slices.GrowLen(d.blocks, int(d.blocksLen)) _, err = io.ReadFull(d.r, d.blocks) return err } -func (d *StringsDecoder) StringsLen() (int, error) { - if err := d.readHeader(); err != nil { - return 0, err - } - return int(d.stringsLen), nil -} - -func (d *StringsDecoder) ReadStrings(dst []string) (err error) { +func (d *StringsDecoder) DecodeStrings(dst []string) (err error) { for i := 0; i < len(d.blocks); i++ { bs := d.blockSize if i == len(d.blocks)-1 && d.stringsLen%d.blockSize > 0 { From 48e523c7cb5c881c2552464376c5ff0c984ca404 Mon Sep 17 00:00:00 2001 From: Anton Kolesnikov Date: Fri, 22 Mar 2024 15:22:58 +0800 Subject: [PATCH 05/36] remove bufio reader --- pkg/phlaredb/symdb/encoding_locations.go | 83 ++++++++------------- pkg/phlaredb/symdb/encoding_strings_test.go | 4 +- 2 files changed, 35 insertions(+), 52 deletions(-) diff --git a/pkg/phlaredb/symdb/encoding_locations.go b/pkg/phlaredb/symdb/encoding_locations.go index 87df17d7c2..68b7e2a10a 100644 --- a/pkg/phlaredb/symdb/encoding_locations.go +++ b/pkg/phlaredb/symdb/encoding_locations.go @@ -1,7 +1,6 @@ package symdb import ( - "bufio" "bytes" "encoding/binary" "fmt" @@ -61,6 +60,13 @@ func (e *LocationsEncoder) writeHeader() (err error) { return err } +func (e *LocationsEncoder) Reset(w io.Writer) { + e.locations = 0 + e.blockSize = 0 + e.buf = e.buf[:0] + e.w = w +} + type LocationsDecoder struct { r io.Reader d locationsBlockDecoder @@ -97,23 +103,24 @@ func (d *LocationsDecoder) readHeader() (err error) { func (d *LocationsDecoder) DecodeLocations(locations []v1.InMemoryLocation) error { blocks := int((d.locations + d.blockSize - 1) / d.blockSize) - // It's expected that the reader is already buffered. - r, ok := d.r.(*bufio.Reader) - if !ok { - bufSize := int(d.blockSize * 16) // 16 bytes per location. - r = bufio.NewReaderSize(d.r, bufSize) - } for i := 0; i < blocks; i++ { lo := i * int(d.blockSize) hi := math.Min(lo+int(d.blockSize), int(d.locations)) block := locations[lo:hi] - if err := d.d.decode(r, block); err != nil { + if err := d.d.decode(d.r, block); err != nil { return err } } return nil } +func (d *LocationsDecoder) Reset(r io.Reader) { + d.locations = 0 + d.blockSize = 0 + d.buf = d.buf[:0] + d.r = r +} + type locationsBlockEncoder struct { header locationsBlockHeader @@ -276,8 +283,8 @@ func (d *locationsBlockDecoder) readHeader(r io.Reader) error { return nil } -func (d *locationsBlockDecoder) decode(r *bufio.Reader, locations []v1.InMemoryLocation) error { - if err := d.readHeader(r); err != nil { +func (d *locationsBlockDecoder) decode(r io.Reader, locations []v1.InMemoryLocation) (err error) { + if err = d.readHeader(r); err != nil { return err } if d.header.LocationsLen > uint32(len(locations)) { @@ -286,63 +293,41 @@ func (d *locationsBlockDecoder) decode(r *bufio.Reader, locations []v1.InMemoryL var enc delta.BinaryPackedEncoding // First we decode mapping_id and assign them to locations. - buf, err := r.Peek(int(d.header.MappingSize)) - if err != nil { + d.tmp = slices.GrowLen(d.tmp, int(d.header.MappingSize)) + if _, err = io.ReadFull(r, d.tmp); err != nil { return err } - d.mappings = slices.GrowLen(d.mappings, int(d.header.LocationsLen)) - d.mappings, err = enc.DecodeInt32(d.mappings, buf) + d.mappings, err = enc.DecodeInt32(d.mappings, d.tmp) if err != nil { return err } - _, _ = r.Discard(len(buf)) // Line count per location. // One byte per location. - buf, err = r.Peek(int(d.header.LocationsLen)) - if err != nil { + d.lineCount = slices.GrowLen(d.lineCount, int(d.header.LocationsLen)) + if _, err = io.ReadFull(r, d.lineCount); err != nil { return err } - d.lineCount = slices.GrowLen(d.lineCount, int(d.header.LocationsLen)) - copy(d.lineCount, buf) - _, _ = r.Discard(len(buf)) // Lines. A single slice backs all the location line // sub-slices. But it has to be allocated as we can't // reference d.lines, which is reusable. lines := make([]v1.InMemoryLine, d.header.LinesLen) - // Unlike other members, d.header.LinesSize potentially - // might be too big to fit into the reader's buffer. - // This is not expected, but we have to handle it in - // a graceful way. - if r.Size() > int(d.header.LinesSize) { - buf, err = r.Peek(int(d.header.LinesSize)) - if err != nil { - return err - } - } else { - buf = make([]byte, int(d.header.LinesSize)) - if _, err = io.ReadFull(r, buf); err != nil { - return err - } + d.tmp = slices.GrowLen(d.tmp, int(d.header.LinesSize)) + if _, err = io.ReadFull(r, d.tmp); err != nil { + return err } d.lines = slices.GrowLen(d.lines, int(d.header.LinesLen)) - d.lines, err = enc.DecodeInt32(d.lines, buf) + d.lines, err = enc.DecodeInt32(d.lines, d.tmp) if err != nil { return err } copy(lines, *(*[]v1.InMemoryLine)(unsafe.Pointer(&d.lines))) - if r.Size() > int(d.header.LinesSize) { - // Advance the buffer offset, if we haven't read from it. - // Note that this invalidates buf, therefore it can only - // be done after it was decoded. - _, _ = r.Discard(len(buf)) - } // In most cases we end up here. if d.header.AddrSize == 0 && d.header.IsFoldedSize == 0 { var o int // Offset within the lines slice. - for i := uint32(0); i < d.header.LocationsLen; i++ { + for i := 0; i < len(locations); i++ { locations[i].MappingId = uint32(d.mappings[i]) n := o + int(d.lineCount[i]) locations[i].Line = lines[o:n] @@ -353,25 +338,23 @@ func (d *locationsBlockDecoder) decode(r *bufio.Reader, locations []v1.InMemoryL // Otherwise, inspect all the optional fields. if int(d.header.AddrSize) > 0 { - buf, err = r.Peek(int(d.header.AddrSize)) - if err != nil { + d.tmp = slices.GrowLen(d.tmp, int(d.header.AddrSize)) + if _, err = io.ReadFull(r, d.tmp); err != nil { return err } d.address = slices.GrowLen(d.address, int(d.header.LocationsLen)) - d.address, err = enc.DecodeInt64(d.address, buf) + d.address, err = enc.DecodeInt64(d.address, d.tmp) if err != nil { return err } - _, _ = r.Discard(len(buf)) } if int(d.header.IsFoldedSize) > 0 { - buf, err = r.Peek(int(d.header.IsFoldedSize)) - if err != nil { + d.tmp = slices.GrowLen(d.tmp, int(d.header.IsFoldedSize)) + if _, err = io.ReadFull(r, d.tmp); err != nil { return err } d.folded = slices.GrowLen(d.folded, int(d.header.LocationsLen)) - decodeBoolean(d.folded, buf) - _, _ = r.Discard(len(buf)) + decodeBoolean(d.folded, d.tmp) } var o int // Offset within the lines slice. diff --git a/pkg/phlaredb/symdb/encoding_strings_test.go b/pkg/phlaredb/symdb/encoding_strings_test.go index 519f719df8..1fa6191eb1 100644 --- a/pkg/phlaredb/symdb/encoding_strings_test.go +++ b/pkg/phlaredb/symdb/encoding_strings_test.go @@ -83,12 +83,12 @@ func Test_StringsEncoding(t *testing.T) { if tc.blockSize > 0 { e.blockSize = tc.blockSize } - require.NoError(t, e.WriteStrings(tc.strings)) + require.NoError(t, e.EncodeStrings(tc.strings)) d := NewStringsDecoder(bufio.NewReader(&output)) n, err := d.StringsLen() require.NoError(t, err) out := make([]string, n) - require.NoError(t, d.ReadStrings(out)) + require.NoError(t, d.DecodeStrings(out)) require.Equal(t, tc.strings, out) }) } From 17d327f857380085e249d4f8d8f205bbcc7ebbf8 Mon Sep 17 00:00:00 2001 From: Anton Kolesnikov Date: Tue, 26 Mar 2024 14:38:00 +0800 Subject: [PATCH 06/36] add functions codecs --- pkg/phlaredb/symdb/encoding_functions.go | 295 +++++++++++++++++++++++ 1 file changed, 295 insertions(+) create mode 100644 pkg/phlaredb/symdb/encoding_functions.go diff --git a/pkg/phlaredb/symdb/encoding_functions.go b/pkg/phlaredb/symdb/encoding_functions.go new file mode 100644 index 0000000000..72cca758cc --- /dev/null +++ b/pkg/phlaredb/symdb/encoding_functions.go @@ -0,0 +1,295 @@ +package symdb + +import ( + "bytes" + "encoding/binary" + "fmt" + "io" + "unsafe" + + "github.com/parquet-go/parquet-go/encoding/delta" + + v1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1" + "github.com/grafana/pyroscope/pkg/slices" + "github.com/grafana/pyroscope/pkg/util/math" +) + +type FunctionsEncoder struct { + w io.Writer + e functionsBlockEncoder + + blockSize int + functions int + + buf []byte +} + +const ( + defaultFunctionsBlockSize = 1 << 10 +) + +func NewFunctionsEncoder(w io.Writer) *FunctionsEncoder { + return &FunctionsEncoder{w: w} +} + +func (e *FunctionsEncoder) EncodeFunctions(locations []v1.InMemoryFunction) error { + if e.blockSize == 0 { + e.blockSize = defaultFunctionsBlockSize + } + e.functions = len(locations) + if err := e.writeHeader(); err != nil { + return err + } + for i := 0; i < len(locations); i += e.blockSize { + block := locations[i:math.Min(i+e.blockSize, len(locations))] + if _, err := e.e.encode(e.w, block); err != nil { + return err + } + } + return nil +} + +func (e *FunctionsEncoder) writeHeader() (err error) { + e.buf = slices.GrowLen(e.buf, 8) + binary.LittleEndian.PutUint32(e.buf[0:4], uint32(e.functions)) + binary.LittleEndian.PutUint32(e.buf[4:8], uint32(e.blockSize)) + _, err = e.w.Write(e.buf) + return err +} + +func (e *FunctionsEncoder) Reset(w io.Writer) { + e.functions = 0 + e.blockSize = 0 + e.buf = e.buf[:0] + e.w = w +} + +type FunctionsDecoder struct { + r io.Reader + d functionsBlockDecoder + + blockSize uint32 + functions uint32 + + buf []byte +} + +func NewFunctionsDecoder(r io.Reader) *FunctionsDecoder { return &FunctionsDecoder{r: r} } + +func (d *FunctionsDecoder) FunctionsLen() (int, error) { + if err := d.readHeader(); err != nil { + return 0, err + } + return int(d.functions), nil +} + +func (d *FunctionsDecoder) readHeader() (err error) { + d.buf = slices.GrowLen(d.buf, 8) + if _, err = io.ReadFull(d.r, d.buf); err != nil { + return err + } + d.functions = binary.LittleEndian.Uint32(d.buf[0:4]) + d.blockSize = binary.LittleEndian.Uint32(d.buf[4:8]) + // Sanity checks are needed as we process the stream data + // before verifying the check sum. + if d.functions > 1<<20 || d.blockSize > 1<<20 { + return ErrInvalidSize + } + return nil +} + +func (d *FunctionsDecoder) DecodeFunctions(functions []v1.InMemoryFunction) error { + blocks := int((d.functions + d.blockSize - 1) / d.blockSize) + for i := 0; i < blocks; i++ { + lo := i * int(d.blockSize) + hi := math.Min(lo+int(d.blockSize), int(d.functions)) + block := functions[lo:hi] + if err := d.d.decode(d.r, block); err != nil { + return err + } + } + return nil +} + +func (d *FunctionsDecoder) Reset(r io.Reader) { + d.functions = 0 + d.blockSize = 0 + d.buf = d.buf[:0] + d.r = r +} + +const functionsBlockHeaderSize = int(unsafe.Sizeof(functionsBlockHeader{})) + +type functionsBlockHeader struct { + FunctionsLen uint32 + NameSize uint32 + SystemNameSize uint32 + FileNameSize uint32 + StartLineSize uint32 +} + +func (h *functionsBlockHeader) marshal(b []byte) { + binary.LittleEndian.PutUint32(b[0:4], h.FunctionsLen) + binary.LittleEndian.PutUint32(b[4:8], h.NameSize) + binary.LittleEndian.PutUint32(b[8:12], h.SystemNameSize) + binary.LittleEndian.PutUint32(b[12:16], h.FileNameSize) + binary.LittleEndian.PutUint32(b[16:20], h.StartLineSize) +} + +func (h *functionsBlockHeader) unmarshal(b []byte) { + h.FunctionsLen = binary.LittleEndian.Uint32(b[0:4]) + h.NameSize = binary.LittleEndian.Uint32(b[4:8]) + h.SystemNameSize = binary.LittleEndian.Uint32(b[8:12]) + h.FileNameSize = binary.LittleEndian.Uint32(b[12:16]) + h.StartLineSize = binary.LittleEndian.Uint32(b[16:20]) +} + +// isValid reports whether the header contains sane values. +// This is important as the block might be read before the +// checksum validation. +func (h *functionsBlockHeader) isValid() bool { + return h.FunctionsLen < 1<<20 +} + +type functionsBlockEncoder struct { + header functionsBlockHeader + + tmp []byte + buf bytes.Buffer + ints []int32 +} + +func (e *functionsBlockEncoder) encode(w io.Writer, functions []v1.InMemoryFunction) (int64, error) { + e.initWrite(len(functions)) + var enc delta.BinaryPackedEncoding + + for i, f := range functions { + e.ints[i] = int32(f.Name) + } + e.tmp, _ = enc.EncodeInt32(e.tmp, e.ints) + e.header.NameSize = uint32(len(e.tmp)) + e.buf.Write(e.tmp) + + for i, f := range functions { + e.ints[i] = int32(f.SystemName) + } + e.tmp, _ = enc.EncodeInt32(e.tmp, e.ints) + e.header.SystemNameSize = uint32(len(e.tmp)) + e.buf.Write(e.tmp) + + for i, f := range functions { + e.ints[i] = int32(f.Filename) + } + e.tmp, _ = enc.EncodeInt32(e.tmp, e.ints) + e.header.FileNameSize = uint32(len(e.tmp)) + e.buf.Write(e.tmp) + + for i, f := range functions { + e.ints[i] = int32(f.StartLine) + } + e.tmp, _ = enc.EncodeInt32(e.tmp, e.ints) + e.header.StartLineSize = uint32(len(e.tmp)) + e.buf.Write(e.tmp) + + e.tmp = slices.GrowLen(e.tmp, functionsBlockHeaderSize) + e.header.marshal(e.tmp) + n, err := w.Write(e.tmp) + if err != nil { + return int64(n), err + } + m, err := e.buf.WriteTo(w) + return m + int64(n), err +} + +func (e *functionsBlockEncoder) initWrite(functions int) { + e.buf.Reset() + // Actual estimate is ~7 bytes per function. + e.buf.Grow(functions * 8) + *e = functionsBlockEncoder{ + header: functionsBlockHeader{FunctionsLen: uint32(functions)}, + + tmp: slices.GrowLen(e.tmp, functions*2), + ints: slices.GrowLen(e.ints, functions), + buf: e.buf, + } +} + +type functionsBlockDecoder struct { + header functionsBlockHeader + + ints []int32 + tmp []byte +} + +func (d *functionsBlockDecoder) readHeader(r io.Reader) error { + d.tmp = slices.GrowLen(d.tmp, functionsBlockHeaderSize) + if _, err := io.ReadFull(r, d.tmp); err != nil { + return nil + } + d.header.unmarshal(d.tmp) + if !d.header.isValid() { + return ErrInvalidSize + } + return nil +} + +func (d *functionsBlockDecoder) decode(r io.Reader, functions []v1.InMemoryFunction) (err error) { + if err = d.readHeader(r); err != nil { + return err + } + if d.header.FunctionsLen > uint32(len(functions)) { + return fmt.Errorf("functions buffer is too short") + } + + var enc delta.BinaryPackedEncoding + d.ints = slices.GrowLen(d.ints, int(d.header.FunctionsLen)) + d.tmp = slices.GrowLen(d.tmp, int(d.header.NameSize)) + if _, err = io.ReadFull(r, d.tmp); err != nil { + return err + } + d.ints, err = enc.DecodeInt32(d.ints, d.tmp) + if err != nil { + return err + } + for i, v := range d.ints { + functions[i].Name = uint32(v) + } + + d.tmp = slices.GrowLen(d.tmp, int(d.header.SystemNameSize)) + if _, err = io.ReadFull(r, d.tmp); err != nil { + return err + } + d.ints, err = enc.DecodeInt32(d.ints, d.tmp) + if err != nil { + return err + } + for i, v := range d.ints { + functions[i].SystemName = uint32(v) + } + + d.tmp = slices.GrowLen(d.tmp, int(d.header.FileNameSize)) + if _, err = io.ReadFull(r, d.tmp); err != nil { + return err + } + d.ints, err = enc.DecodeInt32(d.ints, d.tmp) + if err != nil { + return err + } + for i, v := range d.ints { + functions[i].Filename = uint32(v) + } + + d.tmp = slices.GrowLen(d.tmp, int(d.header.StartLineSize)) + if _, err = io.ReadFull(r, d.tmp); err != nil { + return err + } + d.ints, err = enc.DecodeInt32(d.ints, d.tmp) + if err != nil { + return err + } + for i, v := range d.ints { + functions[i].StartLine = uint32(v) + } + + return nil +} From f859abb895d428a25c1512710d5319cf038ee0b9 Mon Sep 17 00:00:00 2001 From: Anton Kolesnikov Date: Tue, 26 Mar 2024 16:03:29 +0800 Subject: [PATCH 07/36] add mapping codecs --- pkg/phlaredb/symdb/encoding_functions.go | 8 +- pkg/phlaredb/symdb/encoding_locations.go | 7 +- pkg/phlaredb/symdb/encoding_mappings.go | 380 +++++++++++++++++++++++ 3 files changed, 387 insertions(+), 8 deletions(-) create mode 100644 pkg/phlaredb/symdb/encoding_mappings.go diff --git a/pkg/phlaredb/symdb/encoding_functions.go b/pkg/phlaredb/symdb/encoding_functions.go index 72cca758cc..78fa383941 100644 --- a/pkg/phlaredb/symdb/encoding_functions.go +++ b/pkg/phlaredb/symdb/encoding_functions.go @@ -32,16 +32,16 @@ func NewFunctionsEncoder(w io.Writer) *FunctionsEncoder { return &FunctionsEncoder{w: w} } -func (e *FunctionsEncoder) EncodeFunctions(locations []v1.InMemoryFunction) error { +func (e *FunctionsEncoder) EncodeFunctions(functions []v1.InMemoryFunction) error { if e.blockSize == 0 { e.blockSize = defaultFunctionsBlockSize } - e.functions = len(locations) + e.functions = len(functions) if err := e.writeHeader(); err != nil { return err } - for i := 0; i < len(locations); i += e.blockSize { - block := locations[i:math.Min(i+e.blockSize, len(locations))] + for i := 0; i < len(functions); i += e.blockSize { + block := functions[i:math.Min(i+e.blockSize, len(functions))] if _, err := e.e.encode(e.w, block); err != nil { return err } diff --git a/pkg/phlaredb/symdb/encoding_locations.go b/pkg/phlaredb/symdb/encoding_locations.go index 68b7e2a10a..193d3233c4 100644 --- a/pkg/phlaredb/symdb/encoding_locations.go +++ b/pkg/phlaredb/symdb/encoding_locations.go @@ -137,8 +137,6 @@ type locationsBlockEncoder struct { tmp []byte buf bytes.Buffer - - hasFolded bool } const locationsBlockHeaderSize = int(unsafe.Sizeof(locationsBlockHeader{})) @@ -186,6 +184,7 @@ func (h *locationsBlockHeader) unmarshal(b []byte) { func (e *locationsBlockEncoder) encode(w io.Writer, locations []v1.InMemoryLocation) (int64, error) { e.initWrite(len(locations)) var addr int64 + var folded bool for i, loc := range locations { e.mapping[i] = int32(loc.MappingId) e.lineCount[i] = byte(len(loc.Line)) @@ -197,7 +196,7 @@ func (e *locationsBlockEncoder) encode(w io.Writer, locations []v1.InMemoryLocat } addr |= int64(loc.Address) e.addr[i] = int64(loc.Address) - e.hasFolded = e.hasFolded || loc.IsFolded + folded = folded || loc.IsFolded e.folded[i] = loc.IsFolded } @@ -221,7 +220,7 @@ func (e *locationsBlockEncoder) encode(w io.Writer, locations []v1.InMemoryLocat e.buf.Write(e.tmp) } - if e.hasFolded { + if folded { e.tmp = slices.GrowLen(e.tmp, len(e.folded)/8) encodeBoolean(e.tmp, e.folded) e.header.IsFoldedSize = uint32(len(e.tmp)) diff --git a/pkg/phlaredb/symdb/encoding_mappings.go b/pkg/phlaredb/symdb/encoding_mappings.go new file mode 100644 index 0000000000..c1a5794bc9 --- /dev/null +++ b/pkg/phlaredb/symdb/encoding_mappings.go @@ -0,0 +1,380 @@ +package symdb + +import ( + "bytes" + "encoding/binary" + "fmt" + "io" + "unsafe" + + "github.com/parquet-go/parquet-go/encoding/delta" + + v1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1" + "github.com/grafana/pyroscope/pkg/slices" + "github.com/grafana/pyroscope/pkg/util/math" +) + +type MappingsEncoder struct { + w io.Writer + e mappingsBlockEncoder + + blockSize int + mappings int + + buf []byte +} + +const ( + defaultMappingsBlockSize = 1 << 10 +) + +func NewMappingsEncoder(w io.Writer) *MappingsEncoder { + return &MappingsEncoder{w: w} +} + +func (e *MappingsEncoder) EncodeMappings(mappings []v1.InMemoryMapping) error { + if e.blockSize == 0 { + e.blockSize = defaultMappingsBlockSize + } + e.mappings = len(mappings) + if err := e.writeHeader(); err != nil { + return err + } + for i := 0; i < len(mappings); i += e.blockSize { + block := mappings[i:math.Min(i+e.blockSize, len(mappings))] + if _, err := e.e.encode(e.w, block); err != nil { + return err + } + } + return nil +} + +func (e *MappingsEncoder) writeHeader() (err error) { + e.buf = slices.GrowLen(e.buf, 8) + binary.LittleEndian.PutUint32(e.buf[0:4], uint32(e.mappings)) + binary.LittleEndian.PutUint32(e.buf[4:8], uint32(e.blockSize)) + _, err = e.w.Write(e.buf) + return err +} + +func (e *MappingsEncoder) Reset(w io.Writer) { + e.mappings = 0 + e.blockSize = 0 + e.buf = e.buf[:0] + e.w = w +} + +type MappingsDecoder struct { + r io.Reader + d mappingsBlockDecoder + + blockSize uint32 + mappings uint32 + + buf []byte +} + +func NewMappingsDecoder(r io.Reader) *MappingsDecoder { return &MappingsDecoder{r: r} } + +func (d *MappingsDecoder) MappingsLen() (int, error) { + if err := d.readHeader(); err != nil { + return 0, err + } + return int(d.mappings), nil +} + +func (d *MappingsDecoder) readHeader() (err error) { + d.buf = slices.GrowLen(d.buf, 8) + if _, err = io.ReadFull(d.r, d.buf); err != nil { + return err + } + d.mappings = binary.LittleEndian.Uint32(d.buf[0:4]) + d.blockSize = binary.LittleEndian.Uint32(d.buf[4:8]) + // Sanity checks are needed as we process the stream data + // before verifying the check sum. + if d.mappings > 1<<20 || d.blockSize > 1<<20 { + return ErrInvalidSize + } + return nil +} + +func (d *MappingsDecoder) DecodeMappings(mappings []v1.InMemoryMapping) error { + blocks := int((d.mappings + d.blockSize - 1) / d.blockSize) + for i := 0; i < blocks; i++ { + lo := i * int(d.blockSize) + hi := math.Min(lo+int(d.blockSize), int(d.mappings)) + block := mappings[lo:hi] + if err := d.d.decode(d.r, block); err != nil { + return err + } + } + return nil +} + +func (d *MappingsDecoder) Reset(r io.Reader) { + d.mappings = 0 + d.blockSize = 0 + d.buf = d.buf[:0] + d.r = r +} + +const mappingsBlockHeaderSize = int(unsafe.Sizeof(mappingsBlockHeader{})) + +type mappingsBlockHeader struct { + MappingsLen uint32 + FileNameSize uint32 + BuildIDSize uint32 + FlagsSize uint32 + // Optional. + MemoryStartSize uint32 + MemoryLimitSize uint32 + FileOffsetSize uint32 +} + +func (h *mappingsBlockHeader) marshal(b []byte) { + binary.LittleEndian.PutUint32(b[0:4], h.MappingsLen) + binary.LittleEndian.PutUint32(b[4:8], h.FileNameSize) + binary.LittleEndian.PutUint32(b[8:12], h.BuildIDSize) + binary.LittleEndian.PutUint32(b[12:16], h.FlagsSize) + binary.LittleEndian.PutUint32(b[16:20], h.MemoryStartSize) + binary.LittleEndian.PutUint32(b[20:24], h.MemoryLimitSize) + binary.LittleEndian.PutUint32(b[24:28], h.FileOffsetSize) +} + +func (h *mappingsBlockHeader) unmarshal(b []byte) { + h.MappingsLen = binary.LittleEndian.Uint32(b[0:4]) + h.FileNameSize = binary.LittleEndian.Uint32(b[4:8]) + h.BuildIDSize = binary.LittleEndian.Uint32(b[8:12]) + h.FlagsSize = binary.LittleEndian.Uint32(b[12:16]) + h.MemoryStartSize = binary.LittleEndian.Uint32(b[16:20]) + h.MemoryLimitSize = binary.LittleEndian.Uint32(b[20:24]) + h.FileOffsetSize = binary.LittleEndian.Uint32(b[24:28]) +} + +// isValid reports whether the header contains sane values. +// This is important as the block might be read before the +// checksum validation. +func (h *mappingsBlockHeader) isValid() bool { + return h.MappingsLen < 1<<20 +} + +type mappingsBlockEncoder struct { + header mappingsBlockHeader + + tmp []byte + buf bytes.Buffer + ints []int32 + ints64 []int64 +} + +func (e *mappingsBlockEncoder) encode(w io.Writer, mappings []v1.InMemoryMapping) (int64, error) { + e.initWrite(len(mappings)) + var enc delta.BinaryPackedEncoding + + for i, m := range mappings { + e.ints[i] = int32(m.Filename) + } + e.tmp, _ = enc.EncodeInt32(e.tmp, e.ints) + e.header.FileNameSize = uint32(len(e.tmp)) + e.buf.Write(e.tmp) + + for i, m := range mappings { + e.ints[i] = int32(m.BuildId) + } + e.tmp, _ = enc.EncodeInt32(e.tmp, e.ints) + e.header.BuildIDSize = uint32(len(e.tmp)) + e.buf.Write(e.tmp) + + for i, m := range mappings { + var v int32 + if m.HasFunctions { + v |= 1 << 3 + } + if m.HasFilenames { + v |= 1 << 2 + } + if m.HasLineNumbers { + v |= 1 << 1 + } + if m.HasInlineFrames { + v |= 1 + } + e.ints[i] = v + } + e.tmp, _ = enc.EncodeInt32(e.tmp, e.ints) + e.header.FlagsSize = uint32(len(e.tmp)) + e.buf.Write(e.tmp) + + var memoryStart uint64 + for i, m := range mappings { + memoryStart |= m.MemoryStart + e.ints64[i] = int64(m.MemoryStart) + } + if memoryStart != 0 { + e.tmp, _ = enc.EncodeInt64(e.tmp, e.ints64) + e.header.MemoryStartSize = uint32(len(e.tmp)) + e.buf.Write(e.tmp) + } + + var memoryLimit uint64 + for i, m := range mappings { + memoryLimit |= m.MemoryLimit + e.ints64[i] = int64(m.MemoryLimit) + } + if memoryLimit != 0 { + e.tmp, _ = enc.EncodeInt64(e.tmp, e.ints64) + e.header.MemoryLimitSize = uint32(len(e.tmp)) + e.buf.Write(e.tmp) + } + + var fileOffset uint64 + for i, m := range mappings { + fileOffset |= m.FileOffset + e.ints64[i] = int64(m.FileOffset) + } + if fileOffset != 0 { + e.tmp, _ = enc.EncodeInt64(e.tmp, e.ints64) + e.header.FileOffsetSize = uint32(len(e.tmp)) + e.buf.Write(e.tmp) + } + + e.tmp = slices.GrowLen(e.tmp, mappingsBlockHeaderSize) + e.header.marshal(e.tmp) + n, err := w.Write(e.tmp) + if err != nil { + return int64(n), err + } + m, err := e.buf.WriteTo(w) + return m + int64(n), err +} + +func (e *mappingsBlockEncoder) initWrite(mappings int) { + e.buf.Reset() + // Actual estimate is ~7 bytes per mapping. + e.buf.Grow(mappings * 8) + *e = mappingsBlockEncoder{ + header: mappingsBlockHeader{MappingsLen: uint32(mappings)}, + + tmp: slices.GrowLen(e.tmp, mappings*2), + ints: slices.GrowLen(e.ints, mappings), + ints64: slices.GrowLen(e.ints64, mappings), + buf: e.buf, + } +} + +type mappingsBlockDecoder struct { + header mappingsBlockHeader + + ints []int32 + ints64 []int64 + tmp []byte +} + +func (d *mappingsBlockDecoder) readHeader(r io.Reader) error { + d.tmp = slices.GrowLen(d.tmp, mappingsBlockHeaderSize) + if _, err := io.ReadFull(r, d.tmp); err != nil { + return nil + } + d.header.unmarshal(d.tmp) + if !d.header.isValid() { + return ErrInvalidSize + } + // TODO: Scale tmp + return nil +} + +func (d *mappingsBlockDecoder) decode(r io.Reader, mappings []v1.InMemoryMapping) (err error) { + if err = d.readHeader(r); err != nil { + return err + } + if d.header.MappingsLen > uint32(len(mappings)) { + return fmt.Errorf("mappings buffer is too short") + } + + var enc delta.BinaryPackedEncoding + d.ints = slices.GrowLen(d.ints, int(d.header.MappingsLen)) + + d.tmp = slices.GrowLen(d.tmp, int(d.header.FileNameSize)) + if _, err = io.ReadFull(r, d.tmp); err != nil { + return err + } + d.ints, err = enc.DecodeInt32(d.ints, d.tmp) + if err != nil { + return err + } + for i, v := range d.ints { + mappings[i].Filename = uint32(v) + } + + d.tmp = slices.GrowLen(d.tmp, int(d.header.BuildIDSize)) + if _, err = io.ReadFull(r, d.tmp); err != nil { + return err + } + d.ints, err = enc.DecodeInt32(d.ints, d.tmp) + if err != nil { + return err + } + for i, v := range d.ints { + mappings[i].BuildId = uint32(v) + } + + d.tmp = slices.GrowLen(d.tmp, int(d.header.FlagsSize)) + if _, err = io.ReadFull(r, d.tmp); err != nil { + return err + } + d.ints, err = enc.DecodeInt32(d.ints, d.tmp) + if err != nil { + return err + } + for i, v := range d.ints { + mappings[i].HasFunctions = v&(1<<3) > 0 + mappings[i].HasFilenames = v&(1<<2) > 0 + mappings[i].HasLineNumbers = v&(1<<1) > 0 + mappings[i].HasInlineFrames = v&1 > 0 + } + + if d.header.MemoryStartSize > 0 { + d.ints64 = slices.GrowLen(d.ints64, int(d.header.MappingsLen)) + d.tmp = slices.GrowLen(d.tmp, int(d.header.MemoryStartSize)) + if _, err = io.ReadFull(r, d.tmp); err != nil { + return err + } + d.ints64, err = enc.DecodeInt64(d.ints64, d.tmp) + if err != nil { + return err + } + for i, v := range d.ints64 { + mappings[i].MemoryStart = uint64(v) + } + } + if d.header.MemoryLimitSize > 0 { + d.ints64 = slices.GrowLen(d.ints64, int(d.header.MappingsLen)) + d.tmp = slices.GrowLen(d.tmp, int(d.header.MemoryLimitSize)) + if _, err = io.ReadFull(r, d.tmp); err != nil { + return err + } + d.ints64, err = enc.DecodeInt64(d.ints64, d.tmp) + if err != nil { + return err + } + for i, v := range d.ints64 { + mappings[i].MemoryLimit = uint64(v) + } + } + if d.header.FileOffsetSize > 0 { + d.ints64 = slices.GrowLen(d.ints64, int(d.header.MappingsLen)) + d.tmp = slices.GrowLen(d.tmp, int(d.header.FileOffsetSize)) + if _, err = io.ReadFull(r, d.tmp); err != nil { + return err + } + d.ints64, err = enc.DecodeInt64(d.ints64, d.tmp) + if err != nil { + return err + } + for i, v := range d.ints64 { + mappings[i].FileOffset = uint64(v) + } + } + + return nil +} From 265ca213baecfa61031ea08a91474961cab00b0b Mon Sep 17 00:00:00 2001 From: Anton Kolesnikov Date: Wed, 27 Mar 2024 12:12:13 +0800 Subject: [PATCH 08/36] unify interfaces --- pkg/phlaredb/symdb/encoding_strings.go | 227 ------------------ pkg/phlaredb/symdb/format.go | 157 ++++++++++++ .../{encoding_functions.go => functions.go} | 119 +-------- .../{encoding_locations.go => locations.go} | 162 +++---------- .../{encoding_mappings.go => mappings.go} | 119 +-------- pkg/phlaredb/symdb/strings.go | 167 +++++++++++++ ...coding_strings_test.go => strings_test.go} | 13 +- 7 files changed, 377 insertions(+), 587 deletions(-) delete mode 100644 pkg/phlaredb/symdb/encoding_strings.go rename pkg/phlaredb/symdb/{encoding_functions.go => functions.go} (64%) rename pkg/phlaredb/symdb/{encoding_locations.go => locations.go} (72%) rename pkg/phlaredb/symdb/{encoding_mappings.go => mappings.go} (73%) create mode 100644 pkg/phlaredb/symdb/strings.go rename pkg/phlaredb/symdb/{encoding_strings_test.go => strings_test.go} (83%) diff --git a/pkg/phlaredb/symdb/encoding_strings.go b/pkg/phlaredb/symdb/encoding_strings.go deleted file mode 100644 index 5ecced263f..0000000000 --- a/pkg/phlaredb/symdb/encoding_strings.go +++ /dev/null @@ -1,227 +0,0 @@ -package symdb - -import ( - "encoding/binary" - "fmt" - "io" - "unsafe" - - "github.com/grafana/pyroscope/pkg/slices" -) - -// Almost all strings in profiles are very short, their length fits 8 bits. -// Strings larger than 65536 are not expected and are getting truncated. -// Typically, there are only 1-10 strings longer than 256 in a data set -// consisting of a few dozens of thousands of strings. -// -// A traditional var length encoding is rather wasteful in our case. -// Instead, we split the strings into blocks and use encoding that depends -// on the maximum length of the strings in the block. -// -// The output data starts with a header: number of strings, block size, -// number of blocks, and the block encoding map. In the map, each byte -// specifies the number of bits needed to decode the maximum value from -// that block, rounded up to the next power of two. Currently, the length -// value is either 8 bits or 16. -// -// Blocks of data follow after the header. Each block includes two parts: -// strings lengths array and strings data. - -const maxStringLen = 1<<16 - 1 - -type StringsEncoder struct { - w io.Writer - blockSize int - blocks []byte - buf []byte -} - -func NewStringsEncoder(w io.Writer) *StringsEncoder { return &StringsEncoder{w: w} } - -func (e *StringsEncoder) EncodeStrings(strings []string) error { - if e.blockSize == 0 { - e.blockSize = 1 << 10 // 1k strings per block by default. - } - nb := (len(strings) + e.blockSize - 1) / e.blockSize - e.blocks = slices.GrowLen(e.blocks, nb) - var offset uint32 - var bi int - l := uint32(len(strings)) - for offset < l { - lo := offset - hi := offset + uint32(e.blockSize) - if x := uint32(len(strings)); hi > x { - hi = x - } - e.blocks[bi] = e.blockEncoding(strings[lo:hi]) - offset = hi - bi++ - } - if err := e.writeHeader(strings); err != nil { - return err - } - // Next we write string lengths and values in blocks. - e.buf = slices.GrowLen(e.buf, e.blockSize*2) // Up to 2 bytes per string. - for i, b := range e.blocks { - // e.buf = e.buf[:0] - lo := i * e.blockSize - hi := lo + e.blockSize - if x := len(strings); hi > x { - hi = x - } - bs := strings[lo:hi] - switch b { - case 8: - for j, s := range bs { - e.buf[j] = byte(len(s)) - } - case 16: - for j, s := range bs { - // binary.LittleEndian.PutUint16. - e.buf[j*2] = byte(len(s)) - e.buf[j*2+1] = byte(len(s) >> 8) - } - default: - panic("bug: unexpected block size") - } - if _, err := e.w.Write(e.buf[:len(bs)*int(b)/8]); err != nil { - return err - } - for _, s := range bs { - if len(s) > maxStringLen { - s = s[:maxStringLen] - } - if _, err := e.w.Write(*((*[]byte)(unsafe.Pointer(&s)))); err != nil { - return err - } - } - } - return nil -} - -func (e *StringsEncoder) writeHeader(strings []string) (err error) { - e.buf = slices.GrowLen(e.buf, 12) - binary.LittleEndian.PutUint32(e.buf[0:4], uint32(len(strings))) - binary.LittleEndian.PutUint32(e.buf[4:8], uint32(e.blockSize)) - binary.LittleEndian.PutUint32(e.buf[8:12], uint32(len(e.blocks))) - if _, err = e.w.Write(e.buf); err != nil { - return err - } - _, err = e.w.Write(e.blocks) - return err -} - -func (e *StringsEncoder) blockEncoding(b []string) byte { - for _, s := range b { - if len(s) > 255 { - return 16 - } - } - return 8 -} - -func (e *StringsEncoder) Reset() { - e.buf = e.buf[:0] - e.blocks = e.blocks[:0] - e.blockSize = 0 - e.w = nil -} - -type StringsDecoder struct { - r io.Reader - stringsLen uint32 - blocksLen uint32 - blockSize uint32 - blocks []byte - buf []byte -} - -func NewStringsDecoder(r io.Reader) *StringsDecoder { return &StringsDecoder{r: r} } - -func (d *StringsDecoder) StringsLen() (int, error) { - if err := d.readHeader(); err != nil { - return 0, err - } - return int(d.stringsLen), nil -} - -func (d *StringsDecoder) readHeader() (err error) { - d.buf = slices.GrowLen(d.buf, 12) - if _, err = io.ReadFull(d.r, d.buf); err != nil { - return err - } - d.stringsLen = binary.LittleEndian.Uint32(d.buf[0:4]) - d.blockSize = binary.LittleEndian.Uint32(d.buf[4:8]) - d.blocksLen = binary.LittleEndian.Uint32(d.buf[8:12]) - // Sanity checks are needed as we process the stream data - // before verifying the check sum. - if d.blocksLen > 1<<20 || d.stringsLen > 1<<20 || d.blockSize > 1<<20 { - return ErrInvalidSize - } - d.blocks = slices.GrowLen(d.blocks, int(d.blocksLen)) - _, err = io.ReadFull(d.r, d.blocks) - return err -} - -func (d *StringsDecoder) DecodeStrings(dst []string) (err error) { - for i := 0; i < len(d.blocks); i++ { - bs := d.blockSize - if i == len(d.blocks)-1 && d.stringsLen%d.blockSize > 0 { - bs = d.stringsLen % d.blockSize - } - switch d.blocks[i] { - case 8: - err = d.readStrings8(i, int(bs), dst) - case 16: - err = d.readStrings16(i, int(bs), dst) - default: - err = fmt.Errorf("unknown block encoding") - } - if err != nil { - return err - } - } - return nil -} - -func (d *StringsDecoder) readStrings8(idx, length int, dst []string) (err error) { - d.buf = slices.GrowLen(d.buf, length) // 1 byte per string. - if _, err = io.ReadFull(d.r, d.buf); err != nil { - return err - } - offset := int(d.blockSize) * idx - for i, l := range d.buf { - s := make([]byte, l) // Up to 256 bytes. - if _, err = io.ReadFull(d.r, s); err != nil { - return err - } - dst[offset+i] = *(*string)(unsafe.Pointer(&s)) - } - return err -} - -func (d *StringsDecoder) readStrings16(idx, length int, dst []string) (err error) { - d.buf = slices.GrowLen(d.buf, length*2) // 2 bytes per string. - if _, err = io.ReadFull(d.r, d.buf); err != nil { - return err - } - offset := int(d.blockSize) * idx - for i := 0; i < len(d.buf); i += 2 { - l := uint16(d.buf[i]) | uint16(d.buf[i+1])<<8 - s := make([]byte, l) // Up to 65536 bytes. - if _, err = io.ReadFull(d.r, s); err != nil { - return err - } - dst[offset+i/2] = *(*string)(unsafe.Pointer(&s)) - } - return err -} - -func (d *StringsDecoder) Reset() { - d.buf = d.buf[:0] - d.blocks = d.blocks[:0] - d.blockSize = 0 - d.blocksLen = 0 - d.stringsLen = 0 - d.r = nil -} diff --git a/pkg/phlaredb/symdb/format.go b/pkg/phlaredb/symdb/format.go index 781ed86832..5ed441ed34 100644 --- a/pkg/phlaredb/symdb/format.go +++ b/pkg/phlaredb/symdb/format.go @@ -7,6 +7,9 @@ import ( "hash/crc32" "io" "unsafe" + + "github.com/grafana/pyroscope/pkg/slices" + "github.com/grafana/pyroscope/pkg/util/math" ) // The database is a collection of files. The only file that is guaranteed @@ -484,3 +487,157 @@ func (f *IndexFile) WriteTo(dst io.Writer) (n int64, err error) { return w.offset, nil } + +// symbolic information such as locations, functions, mappings, +// and strings is represented as Array of Structures in memory, +// and is encoded as Structure of Arrays when written on disk. +// +// The common structure of the encoded symbolic data is as follows: +// +// [Header] +// [Data encoded in blocks] +// [CRC32] +// +// Where the block format depends on the contents. +// +// Note that the data is decoded in a stream fashion, therefore +// any error in the data will be detected only after all the blocks +// are read in and decoded. +type symbolsBlockHeader struct { + Magic [4]byte + Version uint32 + // Length denotes the total number of items encoded. + Length uint32 + // BlockSize denotes the number of items per block. + BlockSize uint32 +} + +const ( + defaultSymbolsBlockSize = 1 << 10 + symbolsBlockHeaderSize = int(unsafe.Sizeof(mappingsBlockHeader{})) +) + +func newSymbolsBlockHeader(n, bs int) symbolsBlockHeader { + return symbolsBlockHeader{ + Magic: symdbMagic, + Version: 1, + Length: uint32(n), + BlockSize: uint32(bs), + } +} + +func (h *symbolsBlockHeader) marshal(b []byte) { + b[0], b[1], b[2], b[3] = h.Magic[0], h.Magic[1], h.Magic[2], h.Magic[3] + binary.BigEndian.PutUint32(b[4:8], h.Version) + binary.BigEndian.PutUint32(b[8:12], h.Length) + binary.BigEndian.PutUint32(b[12:16], h.BlockSize) +} + +func (h *symbolsBlockHeader) unmarshal(b []byte) { + h.Magic[0], h.Magic[1], h.Magic[2], h.Magic[3] = b[0], b[1], b[2], b[3] + h.Version = binary.BigEndian.Uint32(b[4:8]) + h.Length = binary.BigEndian.Uint32(b[8:12]) + h.BlockSize = binary.BigEndian.Uint32(b[12:16]) +} + +func (h *symbolsBlockHeader) validate() error { + if h.Magic[0] != symdbMagic[0] || + h.Magic[1] != symdbMagic[1] || + h.Magic[2] != symdbMagic[2] || + h.Magic[3] != symdbMagic[3] { + return ErrInvalidMagic + } + if h.Version >= 2 { + return ErrUnknownVersion + } + if h.Length >= 1<<20 && h.BlockSize >= 1<<20 { + return ErrInvalidSize + } + return nil +} + +func writeSymbolsBlockHeader(w io.Writer, buf []byte, h symbolsBlockHeader) ([]byte, error) { + if err := h.validate(); err != nil { + return buf, err + } + buf = slices.GrowLen(buf, symbolsBlockHeaderSize) + h.marshal(buf) + _, err := w.Write(buf) + return buf, err +} + +func readSymbolsBlockHeader(r io.Reader, buf []byte, h *symbolsBlockHeader) ([]byte, error) { + buf = slices.GrowLen(buf, symbolsBlockHeaderSize) + if _, err := io.ReadFull(r, buf); err != nil { + return buf, err + } + h.unmarshal(buf) + return buf, h.validate() +} + +type symbolsBlockEncoder[T any] interface { + encode(w io.Writer, block []T) error +} + +type symbolsEncoder[T any] struct { + w io.Writer + e symbolsBlockEncoder[T] + bs int + buf []byte +} + +func newSymbolsEncoder[T any](w io.Writer, e symbolsBlockEncoder[T]) *symbolsEncoder[T] { + return &symbolsEncoder[T]{w: w, e: e, bs: defaultSymbolsBlockSize} +} + +func (e *symbolsEncoder[T]) Encode(items []T) (err error) { + h := newSymbolsBlockHeader(len(items), e.bs) + if e.buf, err = writeSymbolsBlockHeader(e.w, e.buf, h); err != nil { + return err + } + for i := uint32(0); i < h.Length; i += h.BlockSize { + block := items[i:math.Min(i+h.BlockSize, h.Length)] + if err = e.e.encode(e.w, block); err != nil { + return err + } + } + return nil +} + +func (e *symbolsEncoder[T]) Reset(w io.Writer) { e.w = w } + +type symbolsBlockDecoder[T any] interface { + decode(r io.Reader, block []T) error +} + +type symbolsDecoder[T any] struct { + r io.Reader + h symbolsBlockHeader + d symbolsBlockDecoder[T] + + buf []byte +} + +func newSymbolsDecoder[T any](r io.Reader, d symbolsBlockDecoder[T]) *symbolsDecoder[T] { + return &symbolsDecoder[T]{r: r, d: d} +} + +func (d *symbolsDecoder[T]) Open() (n int, err error) { + d.buf, err = readSymbolsBlockHeader(d.r, d.buf, &d.h) + return int(d.h.Length), err +} + +func (d *symbolsDecoder[T]) Decode(items []T) error { + blocks := int((d.h.Length + d.h.BlockSize - 1) / d.h.BlockSize) + for i := 0; i < blocks; i++ { + lo := i * int(d.h.BlockSize) + hi := math.Min(lo+int(d.h.BlockSize), int(d.h.Length)) + block := items[lo:hi] + if err := d.d.decode(d.r, block); err != nil { + return err + } + } + return nil +} + +func (d *symbolsDecoder[T]) Reset(r io.Reader) { d.r = r } diff --git a/pkg/phlaredb/symdb/encoding_functions.go b/pkg/phlaredb/symdb/functions.go similarity index 64% rename from pkg/phlaredb/symdb/encoding_functions.go rename to pkg/phlaredb/symdb/functions.go index 78fa383941..d4f5833f1e 100644 --- a/pkg/phlaredb/symdb/encoding_functions.go +++ b/pkg/phlaredb/symdb/functions.go @@ -11,115 +11,15 @@ import ( v1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1" "github.com/grafana/pyroscope/pkg/slices" - "github.com/grafana/pyroscope/pkg/util/math" ) -type FunctionsEncoder struct { - w io.Writer - e functionsBlockEncoder - - blockSize int - functions int - - buf []byte -} +const functionsBlockHeaderSize = int(unsafe.Sizeof(functionsBlockHeader{})) -const ( - defaultFunctionsBlockSize = 1 << 10 +var ( + _ symbolsBlockEncoder[v1.InMemoryFunction] = (*functionsBlockEncoder)(nil) + _ symbolsBlockDecoder[v1.InMemoryFunction] = (*functionsBlockDecoder)(nil) ) -func NewFunctionsEncoder(w io.Writer) *FunctionsEncoder { - return &FunctionsEncoder{w: w} -} - -func (e *FunctionsEncoder) EncodeFunctions(functions []v1.InMemoryFunction) error { - if e.blockSize == 0 { - e.blockSize = defaultFunctionsBlockSize - } - e.functions = len(functions) - if err := e.writeHeader(); err != nil { - return err - } - for i := 0; i < len(functions); i += e.blockSize { - block := functions[i:math.Min(i+e.blockSize, len(functions))] - if _, err := e.e.encode(e.w, block); err != nil { - return err - } - } - return nil -} - -func (e *FunctionsEncoder) writeHeader() (err error) { - e.buf = slices.GrowLen(e.buf, 8) - binary.LittleEndian.PutUint32(e.buf[0:4], uint32(e.functions)) - binary.LittleEndian.PutUint32(e.buf[4:8], uint32(e.blockSize)) - _, err = e.w.Write(e.buf) - return err -} - -func (e *FunctionsEncoder) Reset(w io.Writer) { - e.functions = 0 - e.blockSize = 0 - e.buf = e.buf[:0] - e.w = w -} - -type FunctionsDecoder struct { - r io.Reader - d functionsBlockDecoder - - blockSize uint32 - functions uint32 - - buf []byte -} - -func NewFunctionsDecoder(r io.Reader) *FunctionsDecoder { return &FunctionsDecoder{r: r} } - -func (d *FunctionsDecoder) FunctionsLen() (int, error) { - if err := d.readHeader(); err != nil { - return 0, err - } - return int(d.functions), nil -} - -func (d *FunctionsDecoder) readHeader() (err error) { - d.buf = slices.GrowLen(d.buf, 8) - if _, err = io.ReadFull(d.r, d.buf); err != nil { - return err - } - d.functions = binary.LittleEndian.Uint32(d.buf[0:4]) - d.blockSize = binary.LittleEndian.Uint32(d.buf[4:8]) - // Sanity checks are needed as we process the stream data - // before verifying the check sum. - if d.functions > 1<<20 || d.blockSize > 1<<20 { - return ErrInvalidSize - } - return nil -} - -func (d *FunctionsDecoder) DecodeFunctions(functions []v1.InMemoryFunction) error { - blocks := int((d.functions + d.blockSize - 1) / d.blockSize) - for i := 0; i < blocks; i++ { - lo := i * int(d.blockSize) - hi := math.Min(lo+int(d.blockSize), int(d.functions)) - block := functions[lo:hi] - if err := d.d.decode(d.r, block); err != nil { - return err - } - } - return nil -} - -func (d *FunctionsDecoder) Reset(r io.Reader) { - d.functions = 0 - d.blockSize = 0 - d.buf = d.buf[:0] - d.r = r -} - -const functionsBlockHeaderSize = int(unsafe.Sizeof(functionsBlockHeader{})) - type functionsBlockHeader struct { FunctionsLen uint32 NameSize uint32 @@ -159,7 +59,7 @@ type functionsBlockEncoder struct { ints []int32 } -func (e *functionsBlockEncoder) encode(w io.Writer, functions []v1.InMemoryFunction) (int64, error) { +func (e *functionsBlockEncoder) encode(w io.Writer, functions []v1.InMemoryFunction) error { e.initWrite(len(functions)) var enc delta.BinaryPackedEncoding @@ -193,12 +93,11 @@ func (e *functionsBlockEncoder) encode(w io.Writer, functions []v1.InMemoryFunct e.tmp = slices.GrowLen(e.tmp, functionsBlockHeaderSize) e.header.marshal(e.tmp) - n, err := w.Write(e.tmp) - if err != nil { - return int64(n), err + if _, err := w.Write(e.tmp); err != nil { + return err } - m, err := e.buf.WriteTo(w) - return m + int64(n), err + _, err := e.buf.WriteTo(w) + return err } func (e *functionsBlockEncoder) initWrite(functions int) { diff --git a/pkg/phlaredb/symdb/encoding_locations.go b/pkg/phlaredb/symdb/locations.go similarity index 72% rename from pkg/phlaredb/symdb/encoding_locations.go rename to pkg/phlaredb/symdb/locations.go index 193d3233c4..6d5d1ab8f1 100644 --- a/pkg/phlaredb/symdb/encoding_locations.go +++ b/pkg/phlaredb/symdb/locations.go @@ -11,135 +11,17 @@ import ( v1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1" "github.com/grafana/pyroscope/pkg/slices" - "github.com/grafana/pyroscope/pkg/util/math" ) -// https://parquet.apache.org/docs/file-format/data-pages/encodings/#delta-encoding-delta_binary_packed--5 - -type LocationsEncoder struct { - w io.Writer - e locationsBlockEncoder - - blockSize int - locations int - - buf []byte -} - const ( - maxLocationLines = 255 - defaultLocationsBlockSize = 1 << 10 + maxLocationLines = 255 + locationsBlockHeaderSize = int(unsafe.Sizeof(locationsBlockHeader{})) ) -func NewLocationsEncoder(w io.Writer) *LocationsEncoder { - return &LocationsEncoder{w: w} -} - -func (e *LocationsEncoder) EncodeLocations(locations []v1.InMemoryLocation) error { - if e.blockSize == 0 { - e.blockSize = defaultLocationsBlockSize - } - e.locations = len(locations) - if err := e.writeHeader(); err != nil { - return err - } - for i := 0; i < len(locations); i += e.blockSize { - block := locations[i:math.Min(i+e.blockSize, len(locations))] - if _, err := e.e.encode(e.w, block); err != nil { - return err - } - } - return nil -} - -func (e *LocationsEncoder) writeHeader() (err error) { - e.buf = slices.GrowLen(e.buf, 8) - binary.LittleEndian.PutUint32(e.buf[0:4], uint32(e.locations)) - binary.LittleEndian.PutUint32(e.buf[4:8], uint32(e.blockSize)) - _, err = e.w.Write(e.buf) - return err -} - -func (e *LocationsEncoder) Reset(w io.Writer) { - e.locations = 0 - e.blockSize = 0 - e.buf = e.buf[:0] - e.w = w -} - -type LocationsDecoder struct { - r io.Reader - d locationsBlockDecoder - - blockSize uint32 - locations uint32 - - buf []byte -} - -func NewLocationsDecoder(r io.Reader) *LocationsDecoder { return &LocationsDecoder{r: r} } - -func (d *LocationsDecoder) LocationsLen() (int, error) { - if err := d.readHeader(); err != nil { - return 0, err - } - return int(d.locations), nil -} - -func (d *LocationsDecoder) readHeader() (err error) { - d.buf = slices.GrowLen(d.buf, 8) - if _, err = io.ReadFull(d.r, d.buf); err != nil { - return err - } - d.locations = binary.LittleEndian.Uint32(d.buf[0:4]) - d.blockSize = binary.LittleEndian.Uint32(d.buf[4:8]) - // Sanity checks are needed as we process the stream data - // before verifying the check sum. - if d.locations > 1<<20 || d.blockSize > 1<<20 { - return ErrInvalidSize - } - return nil -} - -func (d *LocationsDecoder) DecodeLocations(locations []v1.InMemoryLocation) error { - blocks := int((d.locations + d.blockSize - 1) / d.blockSize) - for i := 0; i < blocks; i++ { - lo := i * int(d.blockSize) - hi := math.Min(lo+int(d.blockSize), int(d.locations)) - block := locations[lo:hi] - if err := d.d.decode(d.r, block); err != nil { - return err - } - } - return nil -} - -func (d *LocationsDecoder) Reset(r io.Reader) { - d.locations = 0 - d.blockSize = 0 - d.buf = d.buf[:0] - d.r = r -} - -type locationsBlockEncoder struct { - header locationsBlockHeader - - mapping []int32 - // Assuming there is no locations with more than 255 lines. - // We could even use a nibble (4 bits), but there are locations - // with 10 and more functions, therefore there is a change that - // capacity of 2^4 is not enough in all cases. - lineCount []byte - lines []int32 - // Optional. - addr []int64 - folded []bool - - tmp []byte - buf bytes.Buffer -} - -const locationsBlockHeaderSize = int(unsafe.Sizeof(locationsBlockHeader{})) +var ( + _ symbolsBlockEncoder[v1.InMemoryLocation] = (*locationsBlockEncoder)(nil) + _ symbolsBlockDecoder[v1.InMemoryLocation] = (*locationsBlockDecoder)(nil) +) type locationsBlockHeader struct { LocationsLen uint32 // Number of locations @@ -151,9 +33,6 @@ type locationsBlockHeader struct { IsFoldedSize uint32 // Size of the encoded slice of is_folded } -// isValid reports whether the header contains sane values. -// This is important as the block might be read before the -// checksum validation. func (h *locationsBlockHeader) isValid() bool { return h.LocationsLen > 0 && h.LocationsLen < 1<<20 && h.MappingSize > 0 && h.MappingSize < 1<<20 && @@ -181,7 +60,25 @@ func (h *locationsBlockHeader) unmarshal(b []byte) { h.IsFoldedSize = binary.LittleEndian.Uint32(b[20:24]) } -func (e *locationsBlockEncoder) encode(w io.Writer, locations []v1.InMemoryLocation) (int64, error) { +type locationsBlockEncoder struct { + header locationsBlockHeader + + mapping []int32 + // Assuming there is no locations with more than 255 lines. + // We could even use a nibble (4 bits), but there are locations + // with 10 and more functions, therefore there is a change that + // capacity of 2^4 is not enough in all cases. + lineCount []byte + lines []int32 + // Optional. + addr []int64 + folded []bool + + tmp []byte + buf bytes.Buffer +} + +func (e *locationsBlockEncoder) encode(w io.Writer, locations []v1.InMemoryLocation) error { e.initWrite(len(locations)) var addr int64 var folded bool @@ -229,12 +126,11 @@ func (e *locationsBlockEncoder) encode(w io.Writer, locations []v1.InMemoryLocat e.tmp = slices.GrowLen(e.tmp, locationsBlockHeaderSize) e.header.marshal(e.tmp) - n, err := w.Write(e.tmp) - if err != nil { - return int64(n), err + if _, err := w.Write(e.tmp); err != nil { + return err } - m, err := e.buf.WriteTo(w) - return m + int64(n), err + _, err := e.buf.WriteTo(w) + return err } func (e *locationsBlockEncoder) initWrite(locations int) { diff --git a/pkg/phlaredb/symdb/encoding_mappings.go b/pkg/phlaredb/symdb/mappings.go similarity index 73% rename from pkg/phlaredb/symdb/encoding_mappings.go rename to pkg/phlaredb/symdb/mappings.go index c1a5794bc9..d4ce07c531 100644 --- a/pkg/phlaredb/symdb/encoding_mappings.go +++ b/pkg/phlaredb/symdb/mappings.go @@ -11,115 +11,15 @@ import ( v1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1" "github.com/grafana/pyroscope/pkg/slices" - "github.com/grafana/pyroscope/pkg/util/math" ) -type MappingsEncoder struct { - w io.Writer - e mappingsBlockEncoder - - blockSize int - mappings int - - buf []byte -} +const mappingsBlockHeaderSize = int(unsafe.Sizeof(mappingsBlockHeader{})) -const ( - defaultMappingsBlockSize = 1 << 10 +var ( + _ symbolsBlockEncoder[v1.InMemoryMapping] = (*mappingsBlockEncoder)(nil) + _ symbolsBlockDecoder[v1.InMemoryMapping] = (*mappingsBlockDecoder)(nil) ) -func NewMappingsEncoder(w io.Writer) *MappingsEncoder { - return &MappingsEncoder{w: w} -} - -func (e *MappingsEncoder) EncodeMappings(mappings []v1.InMemoryMapping) error { - if e.blockSize == 0 { - e.blockSize = defaultMappingsBlockSize - } - e.mappings = len(mappings) - if err := e.writeHeader(); err != nil { - return err - } - for i := 0; i < len(mappings); i += e.blockSize { - block := mappings[i:math.Min(i+e.blockSize, len(mappings))] - if _, err := e.e.encode(e.w, block); err != nil { - return err - } - } - return nil -} - -func (e *MappingsEncoder) writeHeader() (err error) { - e.buf = slices.GrowLen(e.buf, 8) - binary.LittleEndian.PutUint32(e.buf[0:4], uint32(e.mappings)) - binary.LittleEndian.PutUint32(e.buf[4:8], uint32(e.blockSize)) - _, err = e.w.Write(e.buf) - return err -} - -func (e *MappingsEncoder) Reset(w io.Writer) { - e.mappings = 0 - e.blockSize = 0 - e.buf = e.buf[:0] - e.w = w -} - -type MappingsDecoder struct { - r io.Reader - d mappingsBlockDecoder - - blockSize uint32 - mappings uint32 - - buf []byte -} - -func NewMappingsDecoder(r io.Reader) *MappingsDecoder { return &MappingsDecoder{r: r} } - -func (d *MappingsDecoder) MappingsLen() (int, error) { - if err := d.readHeader(); err != nil { - return 0, err - } - return int(d.mappings), nil -} - -func (d *MappingsDecoder) readHeader() (err error) { - d.buf = slices.GrowLen(d.buf, 8) - if _, err = io.ReadFull(d.r, d.buf); err != nil { - return err - } - d.mappings = binary.LittleEndian.Uint32(d.buf[0:4]) - d.blockSize = binary.LittleEndian.Uint32(d.buf[4:8]) - // Sanity checks are needed as we process the stream data - // before verifying the check sum. - if d.mappings > 1<<20 || d.blockSize > 1<<20 { - return ErrInvalidSize - } - return nil -} - -func (d *MappingsDecoder) DecodeMappings(mappings []v1.InMemoryMapping) error { - blocks := int((d.mappings + d.blockSize - 1) / d.blockSize) - for i := 0; i < blocks; i++ { - lo := i * int(d.blockSize) - hi := math.Min(lo+int(d.blockSize), int(d.mappings)) - block := mappings[lo:hi] - if err := d.d.decode(d.r, block); err != nil { - return err - } - } - return nil -} - -func (d *MappingsDecoder) Reset(r io.Reader) { - d.mappings = 0 - d.blockSize = 0 - d.buf = d.buf[:0] - d.r = r -} - -const mappingsBlockHeaderSize = int(unsafe.Sizeof(mappingsBlockHeader{})) - type mappingsBlockHeader struct { MappingsLen uint32 FileNameSize uint32 @@ -167,7 +67,7 @@ type mappingsBlockEncoder struct { ints64 []int64 } -func (e *mappingsBlockEncoder) encode(w io.Writer, mappings []v1.InMemoryMapping) (int64, error) { +func (e *mappingsBlockEncoder) encode(w io.Writer, mappings []v1.InMemoryMapping) error { e.initWrite(len(mappings)) var enc delta.BinaryPackedEncoding @@ -240,12 +140,11 @@ func (e *mappingsBlockEncoder) encode(w io.Writer, mappings []v1.InMemoryMapping e.tmp = slices.GrowLen(e.tmp, mappingsBlockHeaderSize) e.header.marshal(e.tmp) - n, err := w.Write(e.tmp) - if err != nil { - return int64(n), err + if _, err := w.Write(e.tmp); err != nil { + return err } - m, err := e.buf.WriteTo(w) - return m + int64(n), err + _, err := e.buf.WriteTo(w) + return err } func (e *mappingsBlockEncoder) initWrite(mappings int) { diff --git a/pkg/phlaredb/symdb/strings.go b/pkg/phlaredb/symdb/strings.go new file mode 100644 index 0000000000..a66fc2ae83 --- /dev/null +++ b/pkg/phlaredb/symdb/strings.go @@ -0,0 +1,167 @@ +package symdb + +import ( + "bytes" + "encoding/binary" + "fmt" + "io" + "unsafe" + + "github.com/grafana/pyroscope/pkg/slices" +) + +const ( + maxStringLen = 1<<16 - 1 + stringsBlockHeaderSize = int(unsafe.Sizeof(stringsBlockHeader{})) +) + +var ( + _ symbolsBlockEncoder[string] = (*stringsBlockEncoder)(nil) + _ symbolsBlockDecoder[string] = (*stringsBlockDecoder)(nil) +) + +type stringsBlockHeader struct { + StringsLen uint32 + BlockEncoding byte + _ [3]byte +} + +func (h *stringsBlockHeader) marshal(b []byte) { + binary.LittleEndian.PutUint32(b[0:4], h.StringsLen) + b[5] = h.BlockEncoding +} + +func (h *stringsBlockHeader) unmarshal(b []byte) { + h.StringsLen = binary.LittleEndian.Uint32(b[0:4]) + h.BlockEncoding = b[5] +} + +func (h *stringsBlockHeader) isValid() bool { + return h.StringsLen < 1<<20 && h.BlockEncoding == 8 || h.BlockEncoding == 16 +} + +type stringsBlockEncoder struct { + header stringsBlockHeader + buf bytes.Buffer + tmp []byte +} + +func (e *stringsBlockEncoder) encode(w io.Writer, strings []string) error { + e.initWrite(len(strings)) + e.header.BlockEncoding = e.blockEncoding(strings) + switch e.header.BlockEncoding { + case 8: + for j, s := range strings { + e.tmp[j] = byte(len(s)) + } + case 16: + for j, s := range strings { + // binary.LittleEndian.PutUint16. // TODO: BigEndian + e.tmp[j*2] = byte(len(s)) + e.tmp[j*2+1] = byte(len(s) >> 8) + } + } + if _, err := e.buf.Write(e.tmp[:len(strings)*int(e.header.BlockEncoding)/8]); err != nil { + return err + } + for _, s := range strings { + if len(s) > maxStringLen { + s = s[:maxStringLen] + } + if _, err := e.buf.Write(*((*[]byte)(unsafe.Pointer(&s)))); err != nil { + return err + } + } + e.tmp = slices.GrowLen(e.tmp, stringsBlockHeaderSize) + e.header.marshal(e.tmp) + if _, err := w.Write(e.tmp); err != nil { + return err + } + _, err := e.buf.WriteTo(w) + return err +} + +func (e *stringsBlockEncoder) blockEncoding(b []string) byte { + for _, s := range b { + if len(s) > 255 { + return 16 + } + } + return 8 +} + +func (e *stringsBlockEncoder) initWrite(strings int) { + e.buf.Reset() + e.buf.Grow(strings * 16) + *e = stringsBlockEncoder{ + header: stringsBlockHeader{StringsLen: uint32(strings)}, + tmp: slices.GrowLen(e.tmp, strings*2), + buf: e.buf, + } +} + +type stringsBlockDecoder struct { + header stringsBlockHeader + tmp []byte +} + +func (d *stringsBlockDecoder) readHeader(r io.Reader) error { + d.tmp = slices.GrowLen(d.tmp, stringsBlockHeaderSize) + if _, err := io.ReadFull(r, d.tmp); err != nil { + return nil + } + d.header.unmarshal(d.tmp) + if !d.header.isValid() { + return ErrInvalidSize + } + return nil +} + +func (d *stringsBlockDecoder) decode(r io.Reader, strings []string) (err error) { + if err = d.readHeader(r); err != nil { + return err + } + if d.header.StringsLen < uint32(len(strings)) { + return fmt.Errorf("strings buffer is too short") + } + switch d.header.BlockEncoding { + case 8: + return d.decodeStrings8(r, strings) + case 16: + return d.decodeStrings16(r, strings) + default: + // Header validation ensures this never happens. + } + return nil +} + +func (d *stringsBlockDecoder) decodeStrings8(r io.Reader, dst []string) (err error) { + d.tmp = slices.GrowLen(d.tmp, int(d.header.StringsLen)) // 1 byte per string. + if _, err = io.ReadFull(r, d.tmp); err != nil { + return err + } + for i := uint32(0); i < d.header.StringsLen; i++ { + s := make([]byte, d.tmp[i]) + if _, err = io.ReadFull(r, s); err != nil { + return err + } + dst[i] = *(*string)(unsafe.Pointer(&s)) + } + return err +} + +func (d *stringsBlockDecoder) decodeStrings16(r io.Reader, dst []string) (err error) { + d.tmp = slices.GrowLen(d.tmp, int(d.header.StringsLen)*2) // 2 bytes per string. + if _, err = io.ReadFull(r, d.tmp); err != nil { + return err + } + for i := uint32(0); i < d.header.StringsLen; i++ { + l := uint16(d.tmp[i*2]) | uint16(d.tmp[i*2+1])<<8 + s := make([]byte, l) + if _, err = io.ReadFull(r, s); err != nil { + return err + } + dst[i] = *(*string)(unsafe.Pointer(&s)) + } + return err +} diff --git a/pkg/phlaredb/symdb/encoding_strings_test.go b/pkg/phlaredb/symdb/strings_test.go similarity index 83% rename from pkg/phlaredb/symdb/encoding_strings_test.go rename to pkg/phlaredb/symdb/strings_test.go index 1fa6191eb1..5b391957c8 100644 --- a/pkg/phlaredb/symdb/encoding_strings_test.go +++ b/pkg/phlaredb/symdb/strings_test.go @@ -1,7 +1,6 @@ package symdb import ( - "bufio" "bytes" "strings" "testing" @@ -79,16 +78,16 @@ func Test_StringsEncoding(t *testing.T) { tc := tc t.Run(tc.description, func(t *testing.T) { var output bytes.Buffer - e := NewStringsEncoder(&output) + e := newSymbolsEncoder[string](&output, new(stringsBlockEncoder)) if tc.blockSize > 0 { - e.blockSize = tc.blockSize + e.bs = tc.blockSize } - require.NoError(t, e.EncodeStrings(tc.strings)) - d := NewStringsDecoder(bufio.NewReader(&output)) - n, err := d.StringsLen() + require.NoError(t, e.Encode(tc.strings)) + d := newSymbolsDecoder[string](&output, new(stringsBlockDecoder)) + n, err := d.Open() require.NoError(t, err) out := make([]string, n) - require.NoError(t, d.DecodeStrings(out)) + require.NoError(t, d.Decode(out)) require.Equal(t, tc.strings, out) }) } From 918f4eec144f8f86822288462a954f1257bb92f6 Mon Sep 17 00:00:00 2001 From: Anton Kolesnikov Date: Tue, 2 Apr 2024 15:18:30 +0800 Subject: [PATCH 09/36] adhere to the value semantics --- pkg/ingester/pyroscope/ingest_handler_test.go | 2 +- pkg/phlaredb/block_querier_symbols.go | 8 +-- pkg/phlaredb/schemas/v1/functions.go | 17 +++--- pkg/phlaredb/schemas/v1/locations.go | 18 +++--- pkg/phlaredb/schemas/v1/mappings.go | 17 +++--- pkg/phlaredb/schemas/v1/models.go | 8 +-- pkg/phlaredb/schemas/v1/schema_test.go | 34 +++++------ pkg/phlaredb/schemas/v1/strings.go | 10 ++-- pkg/phlaredb/symdb/block_reader.go | 16 +++--- pkg/phlaredb/symdb/block_writer.go | 8 +-- pkg/phlaredb/symdb/dedup_slice.go | 56 +++++++++---------- pkg/phlaredb/symdb/partition_memory.go | 54 ++++++++++++++++-- pkg/phlaredb/symdb/rewriter.go | 40 ++++++------- pkg/phlaredb/symdb/symdb.go | 6 +- pkg/slices/slices.go | 2 +- 15 files changed, 167 insertions(+), 129 deletions(-) diff --git a/pkg/ingester/pyroscope/ingest_handler_test.go b/pkg/ingester/pyroscope/ingest_handler_test.go index c16f5f6da9..bc3a1ed14e 100644 --- a/pkg/ingester/pyroscope/ingest_handler_test.go +++ b/pkg/ingester/pyroscope/ingest_handler_test.go @@ -7,6 +7,7 @@ import ( "mime/multipart" "net/http/httptest" "os" + "slices" "sort" "testing" @@ -15,7 +16,6 @@ import ( "github.com/prometheus/prometheus/model/labels" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - "golang.org/x/exp/slices" profilev1 "github.com/grafana/pyroscope/api/gen/proto/go/google/v1" pushv1 "github.com/grafana/pyroscope/api/gen/proto/go/push/v1" diff --git a/pkg/phlaredb/block_querier_symbols.go b/pkg/phlaredb/block_querier_symbols.go index 5b88ef4964..9124ba8927 100644 --- a/pkg/phlaredb/block_querier_symbols.go +++ b/pkg/phlaredb/block_querier_symbols.go @@ -204,10 +204,10 @@ func (p *symbolsPartition) Release() { } type inMemoryParquetTables struct { - strings inMemoryparquetReader[string, *schemav1.StringPersister] - functions inMemoryparquetReader[*schemav1.InMemoryFunction, *schemav1.FunctionPersister] - locations inMemoryparquetReader[*schemav1.InMemoryLocation, *schemav1.LocationPersister] - mappings inMemoryparquetReader[*schemav1.InMemoryMapping, *schemav1.MappingPersister] + strings inMemoryparquetReader[string, schemav1.StringPersister] + functions inMemoryparquetReader[schemav1.InMemoryFunction, schemav1.FunctionPersister] + locations inMemoryparquetReader[schemav1.InMemoryLocation, schemav1.LocationPersister] + mappings inMemoryparquetReader[schemav1.InMemoryMapping, schemav1.MappingPersister] } func openInMemoryParquetTables(ctx context.Context, r phlareobj.BucketReader, meta *block.Meta) (*inMemoryParquetTables, error) { diff --git a/pkg/phlaredb/schemas/v1/functions.go b/pkg/phlaredb/schemas/v1/functions.go index 62d723fc7d..bbeaf8e637 100644 --- a/pkg/phlaredb/schemas/v1/functions.go +++ b/pkg/phlaredb/schemas/v1/functions.go @@ -10,13 +10,13 @@ var functionsSchema = parquet.SchemaOf(new(profilev1.Function)) type FunctionPersister struct{} -func (*FunctionPersister) Name() string { return "functions" } +func (FunctionPersister) Name() string { return "functions" } -func (*FunctionPersister) Schema() *parquet.Schema { return functionsSchema } +func (FunctionPersister) Schema() *parquet.Schema { return functionsSchema } -func (*FunctionPersister) SortingColumns() parquet.SortingOption { return parquet.SortingColumns() } +func (FunctionPersister) SortingColumns() parquet.SortingOption { return parquet.SortingColumns() } -func (*FunctionPersister) Deconstruct(row parquet.Row, _ uint64, fn *InMemoryFunction) parquet.Row { +func (FunctionPersister) Deconstruct(row parquet.Row, _ uint64, fn InMemoryFunction) parquet.Row { if cap(row) < 5 { row = make(parquet.Row, 0, 5) } @@ -29,7 +29,7 @@ func (*FunctionPersister) Deconstruct(row parquet.Row, _ uint64, fn *InMemoryFun return row } -func (*FunctionPersister) Reconstruct(row parquet.Row) (uint64, *InMemoryFunction, error) { +func (FunctionPersister) Reconstruct(row parquet.Row) (uint64, InMemoryFunction, error) { loc := InMemoryFunction{ Id: row[0].Uint64(), Name: row[1].Uint32(), @@ -37,7 +37,7 @@ func (*FunctionPersister) Reconstruct(row parquet.Row) (uint64, *InMemoryFunctio Filename: row[3].Uint32(), StartLine: row[4].Uint32(), } - return 0, &loc, nil + return 0, loc, nil } type InMemoryFunction struct { @@ -54,7 +54,6 @@ type InMemoryFunction struct { StartLine uint32 } -func (f *InMemoryFunction) Clone() *InMemoryFunction { - n := *f - return &n +func (f InMemoryFunction) Clone() InMemoryFunction { + return f } diff --git a/pkg/phlaredb/schemas/v1/locations.go b/pkg/phlaredb/schemas/v1/locations.go index b9cbf91ba6..a426f0958c 100644 --- a/pkg/phlaredb/schemas/v1/locations.go +++ b/pkg/phlaredb/schemas/v1/locations.go @@ -10,13 +10,13 @@ var locationsSchema = parquet.SchemaOf(new(profilev1.Location)) type LocationPersister struct{} -func (*LocationPersister) Name() string { return "locations" } +func (LocationPersister) Name() string { return "locations" } -func (*LocationPersister) Schema() *parquet.Schema { return locationsSchema } +func (LocationPersister) Schema() *parquet.Schema { return locationsSchema } -func (*LocationPersister) SortingColumns() parquet.SortingOption { return parquet.SortingColumns() } +func (LocationPersister) SortingColumns() parquet.SortingOption { return parquet.SortingColumns() } -func (*LocationPersister) Deconstruct(row parquet.Row, _ uint64, loc *InMemoryLocation) parquet.Row { +func (LocationPersister) Deconstruct(row parquet.Row, _ uint64, loc InMemoryLocation) parquet.Row { var ( col = -1 newCol = func() int { @@ -61,7 +61,7 @@ func (*LocationPersister) Deconstruct(row parquet.Row, _ uint64, loc *InMemoryLo return row } -func (*LocationPersister) Reconstruct(row parquet.Row) (uint64, *InMemoryLocation, error) { +func (LocationPersister) Reconstruct(row parquet.Row) (uint64, InMemoryLocation, error) { loc := InMemoryLocation{ Id: row[0].Uint64(), MappingId: uint32(row[1].Uint64()), @@ -76,7 +76,7 @@ func (*LocationPersister) Reconstruct(row parquet.Row) (uint64, *InMemoryLocatio for i, v := range lines[len(lines)/2:] { loc.Line[i].Line = int32(v.Uint64()) } - return 0, &loc, nil + return 0, loc, nil } type InMemoryLocation struct { @@ -110,11 +110,11 @@ type InMemoryLocation struct { Line []InMemoryLine } -func (l *InMemoryLocation) Clone() *InMemoryLocation { - x := *l +func (l InMemoryLocation) Clone() InMemoryLocation { + x := l x.Line = make([]InMemoryLine, len(l.Line)) copy(x.Line, l.Line) - return &x + return x } type InMemoryLine struct { diff --git a/pkg/phlaredb/schemas/v1/mappings.go b/pkg/phlaredb/schemas/v1/mappings.go index 0d5503f6cb..cb53ec5d08 100644 --- a/pkg/phlaredb/schemas/v1/mappings.go +++ b/pkg/phlaredb/schemas/v1/mappings.go @@ -10,13 +10,13 @@ var mappingsSchema = parquet.SchemaOf(new(profilev1.Mapping)) type MappingPersister struct{} -func (*MappingPersister) Name() string { return "mappings" } +func (MappingPersister) Name() string { return "mappings" } -func (*MappingPersister) Schema() *parquet.Schema { return mappingsSchema } +func (MappingPersister) Schema() *parquet.Schema { return mappingsSchema } -func (*MappingPersister) SortingColumns() parquet.SortingOption { return parquet.SortingColumns() } +func (MappingPersister) SortingColumns() parquet.SortingOption { return parquet.SortingColumns() } -func (*MappingPersister) Deconstruct(row parquet.Row, _ uint64, m *InMemoryMapping) parquet.Row { +func (MappingPersister) Deconstruct(row parquet.Row, _ uint64, m InMemoryMapping) parquet.Row { if cap(row) < 10 { row = make(parquet.Row, 0, 10) } @@ -34,7 +34,7 @@ func (*MappingPersister) Deconstruct(row parquet.Row, _ uint64, m *InMemoryMappi return row } -func (*MappingPersister) Reconstruct(row parquet.Row) (uint64, *InMemoryMapping, error) { +func (MappingPersister) Reconstruct(row parquet.Row) (uint64, InMemoryMapping, error) { mapping := InMemoryMapping{ Id: row[0].Uint64(), MemoryStart: row[1].Uint64(), @@ -47,7 +47,7 @@ func (*MappingPersister) Reconstruct(row parquet.Row) (uint64, *InMemoryMapping, HasLineNumbers: row[8].Boolean(), HasInlineFrames: row[9].Boolean(), } - return 0, &mapping, nil + return 0, mapping, nil } type InMemoryMapping struct { @@ -74,7 +74,6 @@ type InMemoryMapping struct { HasInlineFrames bool } -func (m *InMemoryMapping) Clone() *InMemoryMapping { - n := *m - return &n +func (m InMemoryMapping) Clone() InMemoryMapping { + return m } diff --git a/pkg/phlaredb/schemas/v1/models.go b/pkg/phlaredb/schemas/v1/models.go index 84fa49956b..2fdcf812b2 100644 --- a/pkg/phlaredb/schemas/v1/models.go +++ b/pkg/phlaredb/schemas/v1/models.go @@ -3,10 +3,10 @@ package v1 import googlev1 "github.com/grafana/pyroscope/api/gen/proto/go/google/v1" type Models interface { - *Profile | *InMemoryProfile | - *googlev1.Location | *InMemoryLocation | - *googlev1.Function | *InMemoryFunction | - *googlev1.Mapping | *InMemoryMapping | + *Profile | InMemoryProfile | + *googlev1.Location | InMemoryLocation | + *googlev1.Function | InMemoryFunction | + *googlev1.Mapping | InMemoryMapping | *Stacktrace | string } diff --git a/pkg/phlaredb/schemas/v1/schema_test.go b/pkg/phlaredb/schemas/v1/schema_test.go index b79106c39b..da2a70df50 100644 --- a/pkg/phlaredb/schemas/v1/schema_test.go +++ b/pkg/phlaredb/schemas/v1/schema_test.go @@ -68,7 +68,7 @@ func newStrings() []string { func TestStringsRoundTrip(t *testing.T) { var ( s = newStrings() - w = &ReadWriter[string, *StringPersister]{} + w = &ReadWriter[string, StringPersister]{} buf bytes.Buffer ) @@ -200,7 +200,7 @@ func TestLocationsRoundTrip(t *testing.T) { }, } - mem := []*InMemoryLocation{ + mem := []InMemoryLocation{ { Id: 8, Address: 9, @@ -236,14 +236,14 @@ func TestLocationsRoundTrip(t *testing.T) { } var buf bytes.Buffer - require.NoError(t, new(ReadWriter[*profilev1.Location, *pprofLocationPersister]).WriteParquetFile(&buf, raw)) - actual, err := new(ReadWriter[*InMemoryLocation, *LocationPersister]).ReadParquetFile(bytes.NewReader(buf.Bytes())) + require.NoError(t, new(ReadWriter[*profilev1.Location, pprofLocationPersister]).WriteParquetFile(&buf, raw)) + actual, err := new(ReadWriter[InMemoryLocation, LocationPersister]).ReadParquetFile(bytes.NewReader(buf.Bytes())) require.NoError(t, err) assert.Equal(t, mem, actual) buf.Reset() - require.NoError(t, new(ReadWriter[*InMemoryLocation, *LocationPersister]).WriteParquetFile(&buf, mem)) - actual, err = new(ReadWriter[*InMemoryLocation, *LocationPersister]).ReadParquetFile(bytes.NewReader(buf.Bytes())) + require.NoError(t, new(ReadWriter[InMemoryLocation, LocationPersister]).WriteParquetFile(&buf, mem)) + actual, err = new(ReadWriter[InMemoryLocation, LocationPersister]).ReadParquetFile(bytes.NewReader(buf.Bytes())) require.NoError(t, err) assert.Equal(t, mem, actual) } @@ -252,20 +252,20 @@ var protoLocationsSchema = parquet.SchemaOf(&profilev1.Location{}) type pprofLocationPersister struct{} -func (*pprofLocationPersister) Name() string { return "locations" } +func (pprofLocationPersister) Name() string { return "locations" } -func (*pprofLocationPersister) Schema() *parquet.Schema { return protoLocationsSchema } +func (pprofLocationPersister) Schema() *parquet.Schema { return protoLocationsSchema } -func (*pprofLocationPersister) SortingColumns() parquet.SortingOption { +func (pprofLocationPersister) SortingColumns() parquet.SortingOption { return parquet.SortingColumns() } -func (*pprofLocationPersister) Deconstruct(row parquet.Row, _ uint64, loc *profilev1.Location) parquet.Row { +func (pprofLocationPersister) Deconstruct(row parquet.Row, _ uint64, loc *profilev1.Location) parquet.Row { row = protoLocationsSchema.Deconstruct(row, loc) return row } -func (*pprofLocationPersister) Reconstruct(row parquet.Row) (uint64, *profilev1.Location, error) { +func (pprofLocationPersister) Reconstruct(row parquet.Row) (uint64, *profilev1.Location, error) { var loc profilev1.Location if err := protoLocationsSchema.Reconstruct(&loc, row); err != nil { return 0, nil, err @@ -291,7 +291,7 @@ func TestFunctionsRoundTrip(t *testing.T) { }, } - mem := []*InMemoryFunction{ + mem := []InMemoryFunction{ { Id: 6, Name: 7, @@ -310,13 +310,13 @@ func TestFunctionsRoundTrip(t *testing.T) { var buf bytes.Buffer require.NoError(t, new(ReadWriter[*profilev1.Function, *pprofFunctionPersister]).WriteParquetFile(&buf, raw)) - actual, err := new(ReadWriter[*InMemoryFunction, *FunctionPersister]).ReadParquetFile(bytes.NewReader(buf.Bytes())) + actual, err := new(ReadWriter[InMemoryFunction, FunctionPersister]).ReadParquetFile(bytes.NewReader(buf.Bytes())) require.NoError(t, err) assert.Equal(t, mem, actual) buf.Reset() - require.NoError(t, new(ReadWriter[*InMemoryFunction, *FunctionPersister]).WriteParquetFile(&buf, mem)) - actual, err = new(ReadWriter[*InMemoryFunction, *FunctionPersister]).ReadParquetFile(bytes.NewReader(buf.Bytes())) + require.NoError(t, new(ReadWriter[InMemoryFunction, FunctionPersister]).WriteParquetFile(&buf, mem)) + actual, err = new(ReadWriter[InMemoryFunction, FunctionPersister]).ReadParquetFile(bytes.NewReader(buf.Bytes())) require.NoError(t, err) assert.Equal(t, mem, actual) } @@ -374,7 +374,7 @@ func TestMappingsRoundTrip(t *testing.T) { }, } - mem := []*InMemoryMapping{ + mem := []InMemoryMapping{ { Id: 7, MemoryStart: 8, @@ -403,7 +403,7 @@ func TestMappingsRoundTrip(t *testing.T) { var buf bytes.Buffer require.NoError(t, new(ReadWriter[*profilev1.Mapping, *pprofMappingPersister]).WriteParquetFile(&buf, raw)) - actual, err := new(ReadWriter[*InMemoryMapping, *MappingPersister]).ReadParquetFile(bytes.NewReader(buf.Bytes())) + actual, err := new(ReadWriter[InMemoryMapping, MappingPersister]).ReadParquetFile(bytes.NewReader(buf.Bytes())) require.NoError(t, err) assert.Equal(t, mem, actual) diff --git a/pkg/phlaredb/schemas/v1/strings.go b/pkg/phlaredb/schemas/v1/strings.go index 844aa460dc..d470bb4202 100644 --- a/pkg/phlaredb/schemas/v1/strings.go +++ b/pkg/phlaredb/schemas/v1/strings.go @@ -13,13 +13,13 @@ var stringsSchema = parquet.NewSchema("String", phlareparquet.Group{ type StringPersister struct{} -func (*StringPersister) Name() string { return "strings" } +func (StringPersister) Name() string { return "strings" } -func (*StringPersister) Schema() *parquet.Schema { return stringsSchema } +func (StringPersister) Schema() *parquet.Schema { return stringsSchema } -func (*StringPersister) SortingColumns() parquet.SortingOption { return parquet.SortingColumns() } +func (StringPersister) SortingColumns() parquet.SortingOption { return parquet.SortingColumns() } -func (*StringPersister) Deconstruct(row parquet.Row, id uint64, s string) parquet.Row { +func (StringPersister) Deconstruct(row parquet.Row, id uint64, s string) parquet.Row { if cap(row) < 2 { row = make(parquet.Row, 0, 2) } @@ -29,6 +29,6 @@ func (*StringPersister) Deconstruct(row parquet.Row, id uint64, s string) parque return row } -func (*StringPersister) Reconstruct(row parquet.Row) (id uint64, s string, err error) { +func (StringPersister) Reconstruct(row parquet.Row) (id uint64, s string, err error) { return 0, row[1].String(), nil } diff --git a/pkg/phlaredb/symdb/block_reader.go b/pkg/phlaredb/symdb/block_reader.go index 95d0736c9d..a38d85687c 100644 --- a/pkg/phlaredb/symdb/block_reader.go +++ b/pkg/phlaredb/symdb/block_reader.go @@ -144,22 +144,22 @@ func (r *Reader) file(name string) (block.File, error) { func (r *Reader) partitionReader(h *PartitionHeader) *partition { p := &partition{ reader: r, - locations: parquetTableRange[*schemav1.InMemoryLocation, *schemav1.LocationPersister]{ + locations: parquetTableRange[schemav1.InMemoryLocation, schemav1.LocationPersister]{ bucket: r.bucket, headers: h.Locations, file: &r.locations, }, - mappings: parquetTableRange[*schemav1.InMemoryMapping, *schemav1.MappingPersister]{ + mappings: parquetTableRange[schemav1.InMemoryMapping, schemav1.MappingPersister]{ bucket: r.bucket, headers: h.Mappings, file: &r.mappings, }, - functions: parquetTableRange[*schemav1.InMemoryFunction, *schemav1.FunctionPersister]{ + functions: parquetTableRange[schemav1.InMemoryFunction, schemav1.FunctionPersister]{ bucket: r.bucket, headers: h.Functions, file: &r.functions, }, - strings: parquetTableRange[string, *schemav1.StringPersister]{ + strings: parquetTableRange[string, schemav1.StringPersister]{ bucket: r.bucket, headers: h.Strings, file: &r.strings, @@ -208,10 +208,10 @@ type partition struct { reader *Reader stacktraceChunks []*stacktraceChunkReader - locations parquetTableRange[*schemav1.InMemoryLocation, *schemav1.LocationPersister] - mappings parquetTableRange[*schemav1.InMemoryMapping, *schemav1.MappingPersister] - functions parquetTableRange[*schemav1.InMemoryFunction, *schemav1.FunctionPersister] - strings parquetTableRange[string, *schemav1.StringPersister] + locations parquetTableRange[schemav1.InMemoryLocation, schemav1.LocationPersister] + mappings parquetTableRange[schemav1.InMemoryMapping, schemav1.MappingPersister] + functions parquetTableRange[schemav1.InMemoryFunction, schemav1.FunctionPersister] + strings parquetTableRange[string, schemav1.StringPersister] } func (p *partition) init(ctx context.Context) (err error) { diff --git a/pkg/phlaredb/symdb/block_writer.go b/pkg/phlaredb/symdb/block_writer.go index 101d5e3e6b..af862c1c12 100644 --- a/pkg/phlaredb/symdb/block_writer.go +++ b/pkg/phlaredb/symdb/block_writer.go @@ -28,10 +28,10 @@ type writer struct { files []block.File // Parquet tables. - mappings parquetWriter[*schemav1.InMemoryMapping, *schemav1.MappingPersister] - functions parquetWriter[*schemav1.InMemoryFunction, *schemav1.FunctionPersister] - locations parquetWriter[*schemav1.InMemoryLocation, *schemav1.LocationPersister] - strings parquetWriter[string, *schemav1.StringPersister] + mappings parquetWriter[schemav1.InMemoryMapping, schemav1.MappingPersister] + functions parquetWriter[schemav1.InMemoryFunction, schemav1.FunctionPersister] + locations parquetWriter[schemav1.InMemoryLocation, schemav1.LocationPersister] + strings parquetWriter[string, schemav1.StringPersister] } func newWriter(c *Config) *writer { diff --git a/pkg/phlaredb/symdb/dedup_slice.go b/pkg/phlaredb/symdb/dedup_slice.go index faa8e9e53a..4cdef9accb 100644 --- a/pkg/phlaredb/symdb/dedup_slice.go +++ b/pkg/phlaredb/symdb/dedup_slice.go @@ -37,9 +37,9 @@ func (p *PartitionWriter) WriteProfileSymbols(profile *profilev1.Profile) []sche pprof.ZeroLabelStrings(profile) p.strings.ingest(profile.StringTable, rewrites) - mappings := make([]*schemav1.InMemoryMapping, len(profile.Mapping)) + mappings := make([]schemav1.InMemoryMapping, len(profile.Mapping)) for i, v := range profile.Mapping { - mappings[i] = &schemav1.InMemoryMapping{ + mappings[i] = schemav1.InMemoryMapping{ Id: v.Id, MemoryStart: v.MemoryStart, MemoryLimit: v.MemoryLimit, @@ -54,9 +54,9 @@ func (p *PartitionWriter) WriteProfileSymbols(profile *profilev1.Profile) []sche } p.mappings.ingest(mappings, rewrites) - funcs := make([]*schemav1.InMemoryFunction, len(profile.Function)) + funcs := make([]schemav1.InMemoryFunction, len(profile.Function)) for i, v := range profile.Function { - funcs[i] = &schemav1.InMemoryFunction{ + funcs[i] = schemav1.InMemoryFunction{ Id: v.Id, Name: uint32(v.Name), SystemName: uint32(v.SystemName), @@ -66,9 +66,9 @@ func (p *PartitionWriter) WriteProfileSymbols(profile *profilev1.Profile) []sche } p.functions.ingest(funcs, rewrites) - locs := make([]*schemav1.InMemoryLocation, len(profile.Location)) + locs := make([]schemav1.InMemoryLocation, len(profile.Location)) for i, v := range profile.Location { - x := &schemav1.InMemoryLocation{ + x := schemav1.InMemoryLocation{ Id: v.Id, Address: v.Address, MappingId: uint32(v.MappingId), @@ -214,7 +214,7 @@ type rewriter struct { type storeHelper[M schemav1.Models] interface { // some Models contain their own IDs within the struct, this allows to set them and keep track of the preexisting ID. It should return the oldID that is supposed to be rewritten. - setID(existingSliceID uint64, newID uint64, element M) uint64 + setID(existingSliceID uint64, newID uint64, element *M) uint64 // size returns a (rough estimation) of the size of a single element M size(M) uint64 @@ -222,7 +222,7 @@ type storeHelper[M schemav1.Models] interface { // clone copies parts that are not optimally sized from protobuf parsing clone(M) M - rewrite(*rewriter, M) error + rewrite(*rewriter, *M) error } type Helper[M schemav1.Models, K comparable] interface { @@ -261,7 +261,7 @@ func (s *deduplicatingSlice[M, K, H]) ingest(elems []M, rewriter *rewriter) { missing = missing[:0] // rewrite elements for pos := range elems { - _ = s.helper.rewrite(rewriter, elems[pos]) + _ = s.helper.rewrite(rewriter, &elems[pos]) } // try to find if element already exists in slice, when supposed to deduplicate @@ -269,7 +269,7 @@ func (s *deduplicatingSlice[M, K, H]) ingest(elems []M, rewriter *rewriter) { for pos := range elems { k := s.helper.key(elems[pos]) if posSlice, exists := s.lookup[k]; exists { - rewritingMap[int64(s.helper.setID(uint64(pos), uint64(posSlice), elems[pos]))] = posSlice + rewritingMap[int64(s.helper.setID(uint64(pos), uint64(posSlice), &elems[pos]))] = posSlice } else { missing = append(missing, int64(pos)) } @@ -284,14 +284,14 @@ func (s *deduplicatingSlice[M, K, H]) ingest(elems []M, rewriter *rewriter) { // check again if element exists k := s.helper.key(elems[pos]) if posSlice, exists := s.lookup[k]; exists { - rewritingMap[int64(s.helper.setID(uint64(pos), uint64(posSlice), elems[pos]))] = posSlice + rewritingMap[int64(s.helper.setID(uint64(pos), uint64(posSlice), &elems[pos]))] = posSlice continue } // add element to slice/map s.slice = append(s.slice, s.helper.clone(elems[pos])) s.lookup[k] = posSlice - rewritingMap[int64(s.helper.setID(uint64(pos), uint64(posSlice), elems[pos]))] = posSlice + rewritingMap[int64(s.helper.setID(uint64(pos), uint64(posSlice), &elems[pos]))] = posSlice posSlice++ s.size.Add(s.helper.size(elems[pos])) } @@ -381,7 +381,7 @@ func (*stringsHelper) addToRewriter(r *rewriter, m idConversionTable) { } // nolint unused -func (*stringsHelper) rewrite(*rewriter, string) error { +func (*stringsHelper) rewrite(*rewriter, *string) error { return nil } @@ -389,7 +389,7 @@ func (*stringsHelper) size(s string) uint64 { return uint64(len(s)) } -func (*stringsHelper) setID(oldID, newID uint64, s string) uint64 { +func (*stringsHelper) setID(oldID, newID uint64, s *string) uint64 { return oldID } @@ -410,7 +410,7 @@ const ( type locationsHelper struct{} -func (*locationsHelper) key(l *schemav1.InMemoryLocation) locationsKey { +func (*locationsHelper) key(l schemav1.InMemoryLocation) locationsKey { return locationsKey{ Address: l.Address, MappingId: l.MappingId, @@ -465,15 +465,15 @@ func (*locationsHelper) setID(_, newID uint64, l *schemav1.InMemoryLocation) uin return oldID } -func (*locationsHelper) size(l *schemav1.InMemoryLocation) uint64 { +func (*locationsHelper) size(l schemav1.InMemoryLocation) uint64 { return uint64(len(l.Line))*lineSize + locationSize } -func (*locationsHelper) clone(l *schemav1.InMemoryLocation) *schemav1.InMemoryLocation { - x := *l +func (*locationsHelper) clone(l schemav1.InMemoryLocation) schemav1.InMemoryLocation { + x := l x.Line = make([]schemav1.InMemoryLine, len(l.Line)) copy(x.Line, l.Line) - return &x + return x } type mappingsHelper struct{} @@ -492,7 +492,7 @@ type mappingsKey struct { HasInlineFrames bool } -func (*mappingsHelper) key(m *schemav1.InMemoryMapping) mappingsKey { +func (*mappingsHelper) key(m schemav1.InMemoryMapping) mappingsKey { return mappingsKey{ MemoryStart: m.MemoryStart, MemoryLimit: m.MemoryLimit, @@ -523,13 +523,12 @@ func (*mappingsHelper) setID(_, newID uint64, m *schemav1.InMemoryMapping) uint6 return oldID } -func (*mappingsHelper) size(_ *schemav1.InMemoryMapping) uint64 { +func (*mappingsHelper) size(_ schemav1.InMemoryMapping) uint64 { return mappingSize } -func (*mappingsHelper) clone(m *schemav1.InMemoryMapping) *schemav1.InMemoryMapping { - x := *m - return &x +func (*mappingsHelper) clone(m schemav1.InMemoryMapping) schemav1.InMemoryMapping { + return m } type functionsKey struct { @@ -543,7 +542,7 @@ type functionsHelper struct{} const functionSize = uint64(unsafe.Sizeof(schemav1.InMemoryFunction{})) -func (*functionsHelper) key(f *schemav1.InMemoryFunction) functionsKey { +func (*functionsHelper) key(f schemav1.InMemoryFunction) functionsKey { return functionsKey{ Name: f.Name, SystemName: f.SystemName, @@ -569,11 +568,10 @@ func (*functionsHelper) setID(_, newID uint64, f *schemav1.InMemoryFunction) uin return oldID } -func (*functionsHelper) size(_ *schemav1.InMemoryFunction) uint64 { +func (*functionsHelper) size(_ schemav1.InMemoryFunction) uint64 { return functionSize } -func (*functionsHelper) clone(f *schemav1.InMemoryFunction) *schemav1.InMemoryFunction { - x := *f - return &x +func (*functionsHelper) clone(f schemav1.InMemoryFunction) schemav1.InMemoryFunction { + return f } diff --git a/pkg/phlaredb/symdb/partition_memory.go b/pkg/phlaredb/symdb/partition_memory.go index 0a127fcff5..ab3f7781de 100644 --- a/pkg/phlaredb/symdb/partition_memory.go +++ b/pkg/phlaredb/symdb/partition_memory.go @@ -2,6 +2,8 @@ package symdb import ( "context" + "fmt" + "hash/crc32" "io" "sync" @@ -13,9 +15,9 @@ type PartitionWriter struct { stacktraces *stacktracesPartition strings deduplicatingSlice[string, string, *stringsHelper] - mappings deduplicatingSlice[*schemav1.InMemoryMapping, mappingsKey, *mappingsHelper] - functions deduplicatingSlice[*schemav1.InMemoryFunction, functionsKey, *functionsHelper] - locations deduplicatingSlice[*schemav1.InMemoryLocation, locationsKey, *locationsHelper] + mappings deduplicatingSlice[schemav1.InMemoryMapping, mappingsKey, *mappingsHelper] + functions deduplicatingSlice[schemav1.InMemoryFunction, functionsKey, *functionsHelper] + locations deduplicatingSlice[schemav1.InMemoryLocation, locationsKey, *locationsHelper] } func (p *PartitionWriter) AppendStacktraces(dst []uint32, s []*schemav1.Stacktrace) { @@ -311,15 +313,15 @@ func SplitStacktraces(s []uint32, n uint32) []StacktracesRange { return cs } -func (p *PartitionWriter) AppendLocations(dst []uint32, locations []*schemav1.InMemoryLocation) { +func (p *PartitionWriter) AppendLocations(dst []uint32, locations []schemav1.InMemoryLocation) { p.locations.append(dst, locations) } -func (p *PartitionWriter) AppendMappings(dst []uint32, mappings []*schemav1.InMemoryMapping) { +func (p *PartitionWriter) AppendMappings(dst []uint32, mappings []schemav1.InMemoryMapping) { p.mappings.append(dst, mappings) } -func (p *PartitionWriter) AppendFunctions(dst []uint32, functions []*schemav1.InMemoryFunction) { +func (p *PartitionWriter) AppendFunctions(dst []uint32, functions []schemav1.InMemoryFunction) { p.functions.append(dst, functions) } @@ -364,3 +366,43 @@ func (p *PartitionWriter) WriteStats(s *PartitionStats) { func (p *PartitionWriter) Release() { // Noop. Satisfies PartitionReader interface. } + +func (p *PartitionWriter) write(w *fileWriter) (err error) { + senc := newSymbolsEncoder[string](w.w, new(stringsBlockEncoder)) + senc.Encode(p.strings.slice) + + menc := newSymbolsEncoder[schemav1.InMemoryMapping](w.w, new(mappingsBlockEncoder)) + menc.Encode(p.mappings.slice) + + fenc := newSymbolsEncoder[schemav1.InMemoryFunction](w.w, new(functionsBlockEncoder)) + fenc.Encode(p.functions.slice) + + lenc := newSymbolsEncoder[schemav1.InMemoryLocation](w.w, new(locationsBlockEncoder)) + lenc.Encode(p.locations.slice) + + for ci, c := range p.stacktraces.chunks { + stacks := c.stacks + if stacks == 0 { + stacks = uint32(len(p.stacktraces.hashToIdx)) + } + h := StacktraceChunkHeader{ + Offset: w.w.offset, + Size: 0, // Set later. + Partition: p.header.Partition, + ChunkIndex: uint16(ci), + ChunkEncoding: ChunkEncodingGroupVarint, + Stacktraces: stacks, + StacktraceNodes: c.tree.len(), + StacktraceMaxDepth: 0, // TODO + StacktraceMaxNodes: c.partition.maxNodesPerChunk, + CRC: 0, // Set later. + } + crc := crc32.New(castagnoli) + if h.Size, err = c.WriteTo(io.MultiWriter(crc, w.w)); err != nil { + return fmt.Errorf("writing stacktrace chunk data: %w", err) + } + h.CRC = crc.Sum32() + p.header.StacktraceChunks = append(p.header.StacktraceChunks, h) + } + return nil +} diff --git a/pkg/phlaredb/symdb/rewriter.go b/pkg/phlaredb/symdb/rewriter.go index d19e1e2ca1..437a45ab1c 100644 --- a/pkg/phlaredb/symdb/rewriter.go +++ b/pkg/phlaredb/symdb/rewriter.go @@ -68,9 +68,9 @@ func (r *Rewriter) getOrCreatePartition(partition uint64) (_ *partitionRewriter, pr.WriteStats(&stats) n.stacktraces = newLookupTable[[]int32](stats.MaxStacktraceID) - n.locations = newLookupTable[*schemav1.InMemoryLocation](stats.LocationsTotal) - n.mappings = newLookupTable[*schemav1.InMemoryMapping](stats.MappingsTotal) - n.functions = newLookupTable[*schemav1.InMemoryFunction](stats.FunctionsTotal) + n.locations = newLookupTable[schemav1.InMemoryLocation](stats.LocationsTotal) + n.mappings = newLookupTable[schemav1.InMemoryMapping](stats.MappingsTotal) + n.functions = newLookupTable[schemav1.InMemoryFunction](stats.FunctionsTotal) n.strings = newLookupTable[string](stats.StringsTotal) r.partitions[partition] = n @@ -84,9 +84,9 @@ type partitionRewriter struct { dst *PartitionWriter stacktraces *lookupTable[[]int32] - locations *lookupTable[*schemav1.InMemoryLocation] - mappings *lookupTable[*schemav1.InMemoryMapping] - functions *lookupTable[*schemav1.InMemoryFunction] + locations *lookupTable[schemav1.InMemoryLocation] + mappings *lookupTable[schemav1.InMemoryMapping] + functions *lookupTable[schemav1.InMemoryFunction] strings *lookupTable[string] // FIXME(kolesnikovae): schemav1.Stacktrace should be just a uint32 slice: @@ -162,25 +162,25 @@ func (p *partitionRewriter) appendRewrite(stacktraces []uint32) error { p.dst.AppendStrings(p.strings.buf, p.strings.values) p.strings.updateResolved() - for _, v := range p.functions.values { - v.Name = p.strings.lookupResolved(v.Name) - v.Filename = p.strings.lookupResolved(v.Filename) - v.SystemName = p.strings.lookupResolved(v.SystemName) + for i := range p.functions.values { + p.functions.values[i].Name = p.strings.lookupResolved(p.functions.values[i].Name) + p.functions.values[i].Filename = p.strings.lookupResolved(p.functions.values[i].Filename) + p.functions.values[i].SystemName = p.strings.lookupResolved(p.functions.values[i].SystemName) } p.dst.AppendFunctions(p.functions.buf, p.functions.values) p.functions.updateResolved() - for _, v := range p.mappings.values { - v.BuildId = p.strings.lookupResolved(v.BuildId) - v.Filename = p.strings.lookupResolved(v.Filename) + for i := range p.mappings.values { + p.mappings.values[i].BuildId = p.strings.lookupResolved(p.mappings.values[i].BuildId) + p.mappings.values[i].Filename = p.strings.lookupResolved(p.mappings.values[i].Filename) } p.dst.AppendMappings(p.mappings.buf, p.mappings.values) p.mappings.updateResolved() - for _, v := range p.locations.values { - v.MappingId = p.mappings.lookupResolved(v.MappingId) - for j, line := range v.Line { - v.Line[j].FunctionId = p.functions.lookupResolved(line.FunctionId) + for i := range p.locations.values { + p.locations.values[i].MappingId = p.mappings.lookupResolved(p.locations.values[i].MappingId) + for j, line := range p.locations.values[i].Line { + p.locations.values[i].Line[j].FunctionId = p.functions.lookupResolved(line.FunctionId) } } p.dst.AppendLocations(p.locations.buf, p.locations.values) @@ -247,9 +247,9 @@ func (p *partitionRewriter) InsertStacktrace(stacktrace uint32, locations []int3 func cloneSymbolsPartially(x *Symbols) *Symbols { n := Symbols{ Stacktraces: x.Stacktraces, - Locations: make([]*schemav1.InMemoryLocation, len(x.Locations)), - Mappings: make([]*schemav1.InMemoryMapping, len(x.Mappings)), - Functions: make([]*schemav1.InMemoryFunction, len(x.Functions)), + Locations: make([]schemav1.InMemoryLocation, len(x.Locations)), + Mappings: make([]schemav1.InMemoryMapping, len(x.Mappings)), + Functions: make([]schemav1.InMemoryFunction, len(x.Functions)), Strings: x.Strings, } for i, l := range x.Locations { diff --git a/pkg/phlaredb/symdb/symdb.go b/pkg/phlaredb/symdb/symdb.go index 934b0f01a3..e0dedf8356 100644 --- a/pkg/phlaredb/symdb/symdb.go +++ b/pkg/phlaredb/symdb/symdb.go @@ -26,9 +26,9 @@ type PartitionReader interface { type Symbols struct { Stacktraces StacktraceResolver - Locations []*schemav1.InMemoryLocation - Mappings []*schemav1.InMemoryMapping - Functions []*schemav1.InMemoryFunction + Locations []schemav1.InMemoryLocation + Mappings []schemav1.InMemoryMapping + Functions []schemav1.InMemoryFunction Strings []string } diff --git a/pkg/slices/slices.go b/pkg/slices/slices.go index e574ccbb02..d3245fe740 100644 --- a/pkg/slices/slices.go +++ b/pkg/slices/slices.go @@ -1,7 +1,7 @@ package slices import ( - "golang.org/x/exp/slices" + "slices" ) // RemoveInPlace removes all elements from a slice that match the given predicate. From 6c98c3f3dbb763275c43c1213ebfbcd7a37eb296 Mon Sep 17 00:00:00 2001 From: Anton Kolesnikov Date: Thu, 4 Apr 2024 19:16:29 +0800 Subject: [PATCH 10/36] write path --- pkg/phlaredb/symdb/block_reader.go | 14 +- pkg/phlaredb/symdb/block_writer.go | 270 +++---------------------- pkg/phlaredb/symdb/format.go | 264 +++++++++++++----------- pkg/phlaredb/symdb/partition_memory.go | 68 ++++--- 4 files changed, 223 insertions(+), 393 deletions(-) diff --git a/pkg/phlaredb/symdb/block_reader.go b/pkg/phlaredb/symdb/block_reader.go index a38d85687c..4ef32ae3c6 100644 --- a/pkg/phlaredb/symdb/block_reader.go +++ b/pkg/phlaredb/symdb/block_reader.go @@ -146,26 +146,26 @@ func (r *Reader) partitionReader(h *PartitionHeader) *partition { reader: r, locations: parquetTableRange[schemav1.InMemoryLocation, schemav1.LocationPersister]{ bucket: r.bucket, - headers: h.Locations, + headers: SymbolsBlockReferencesAsRows(h.Locations), file: &r.locations, }, mappings: parquetTableRange[schemav1.InMemoryMapping, schemav1.MappingPersister]{ bucket: r.bucket, - headers: h.Mappings, + headers: SymbolsBlockReferencesAsRows(h.Mappings), file: &r.mappings, }, functions: parquetTableRange[schemav1.InMemoryFunction, schemav1.FunctionPersister]{ bucket: r.bucket, - headers: h.Functions, + headers: SymbolsBlockReferencesAsRows(h.Functions), file: &r.functions, }, strings: parquetTableRange[string, schemav1.StringPersister]{ bucket: r.bucket, - headers: h.Strings, + headers: SymbolsBlockReferencesAsRows(h.Strings), file: &r.strings, }, } - p.setStacktracesChunks(h.StacktraceChunks) + p.setStacktracesChunks(h.Stacktraces) return p } @@ -295,7 +295,7 @@ func (p *partition) ResolveStacktraceLocations(ctx context.Context, dst Stacktra return nil } -func (p *partition) setStacktracesChunks(chunks []StacktraceChunkHeader) { +func (p *partition) setStacktracesChunks(chunks []StacktraceBlockHeader) { p.stacktraceChunks = make([]*stacktraceChunkReader, len(chunks)) for i, c := range chunks { p.stacktraceChunks[i] = &stacktraceChunkReader{ @@ -347,7 +347,7 @@ func (r *stacktracesLookup) do() error { type stacktraceChunkReader struct { reader *Reader - header StacktraceChunkHeader + header StacktraceBlockHeader r refctr.Counter t *parentPointerTree diff --git a/pkg/phlaredb/symdb/block_writer.go b/pkg/phlaredb/symdb/block_writer.go index af862c1c12..099f04da2f 100644 --- a/pkg/phlaredb/symdb/block_writer.go +++ b/pkg/phlaredb/symdb/block_writer.go @@ -2,36 +2,29 @@ package symdb import ( "bufio" - "context" "fmt" - "hash/crc32" "io" "os" "path/filepath" "github.com/grafana/dskit/multierror" - "github.com/parquet-go/parquet-go" - "golang.org/x/sync/errgroup" "github.com/grafana/pyroscope/pkg/phlaredb/block" - schemav1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1" - "github.com/grafana/pyroscope/pkg/util/build" - "github.com/grafana/pyroscope/pkg/util/math" + v1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1" ) type writer struct { config *Config - index IndexFile - indexWriter *fileWriter - stacktraces *fileWriter - files []block.File + index IndexFile + indexFile *fileWriter + dataFile *fileWriter + files []block.File - // Parquet tables. - mappings parquetWriter[schemav1.InMemoryMapping, schemav1.MappingPersister] - functions parquetWriter[schemav1.InMemoryFunction, schemav1.FunctionPersister] - locations parquetWriter[schemav1.InMemoryLocation, schemav1.LocationPersister] - strings parquetWriter[string, schemav1.StringPersister] + stringsEncoder *symbolsEncoder[string] + mappingsEncoder *symbolsEncoder[v1.InMemoryMapping] + functionsEncoder *symbolsEncoder[v1.InMemoryFunction] + locationsEncoder *symbolsEncoder[v1.InMemoryLocation] } func newWriter(c *Config) *writer { @@ -40,82 +33,30 @@ func newWriter(c *Config) *writer { index: IndexFile{ Header: Header{ Magic: symdbMagic, - Version: FormatV2, + Version: FormatV3, }, }, + + stringsEncoder: newSymbolsEncoder[string](nil, new(stringsBlockEncoder)), + mappingsEncoder: newSymbolsEncoder[v1.InMemoryMapping](nil, new(mappingsBlockEncoder)), + functionsEncoder: newSymbolsEncoder[v1.InMemoryFunction](nil, new(functionsBlockEncoder)), + locationsEncoder: newSymbolsEncoder[v1.InMemoryLocation](nil, new(locationsBlockEncoder)), } } -func (w *writer) writePartitions(partitions []*PartitionWriter) error { - g, _ := errgroup.WithContext(context.Background()) - g.Go(func() (err error) { - if w.stacktraces, err = w.newFile(StacktracesFileName); err != nil { - return err - } - for _, partition := range partitions { - if err = w.writeStacktraces(partition); err != nil { - return err - } - } - return w.stacktraces.Close() - }) - - g.Go(func() (err error) { - if err = w.strings.init(w.config.Dir, w.config.Parquet); err != nil { - return err - } - for _, partition := range partitions { - if partition.header.Strings, err = w.strings.readFrom(partition.strings.slice); err != nil { - return err - } - } - return w.strings.Close() - }) - - g.Go(func() (err error) { - if err = w.functions.init(w.config.Dir, w.config.Parquet); err != nil { - return err - } - for _, partition := range partitions { - if partition.header.Functions, err = w.functions.readFrom(partition.functions.slice); err != nil { - return err - } - } - return w.functions.Close() - }) - - g.Go(func() (err error) { - if err = w.mappings.init(w.config.Dir, w.config.Parquet); err != nil { - return err - } - for _, partition := range partitions { - if partition.header.Mappings, err = w.mappings.readFrom(partition.mappings.slice); err != nil { - return err - } - } - return w.mappings.Close() - }) - - g.Go(func() (err error) { - if err = w.locations.init(w.config.Dir, w.config.Parquet); err != nil { - return err - } - for _, partition := range partitions { - if partition.header.Locations, err = w.locations.readFrom(partition.locations.slice); err != nil { - return err - } - } - return w.locations.Close() - }) - - if err := g.Wait(); err != nil { +func (w *writer) writePartitions(partitions []*PartitionWriter) (err error) { + if w.dataFile, err = w.newFile(DataFileName); err != nil { return err } - - for _, partition := range partitions { - w.index.PartitionHeaders = append(w.index.PartitionHeaders, &partition.header) + defer func() { + err = w.dataFile.Close() + }() + for _, p := range partitions { + if err = p.writeTo(w); err != nil { + return err + } + w.index.PartitionHeaders = append(w.index.PartitionHeaders, &p.header) } - return nil } @@ -124,40 +65,8 @@ func (w *writer) Flush() (err error) { return err } w.files = []block.File{ - w.indexWriter.meta(), - w.stacktraces.meta(), - w.locations.meta(), - w.mappings.meta(), - w.functions.meta(), - w.strings.meta(), - } - return nil -} - -func (w *writer) writeStacktraces(partition *PartitionWriter) (err error) { - for ci, c := range partition.stacktraces.chunks { - stacks := c.stacks - if stacks == 0 { - stacks = uint32(len(partition.stacktraces.hashToIdx)) - } - h := StacktraceChunkHeader{ - Offset: w.stacktraces.w.offset, - Size: 0, // Set later. - Partition: partition.header.Partition, - ChunkIndex: uint16(ci), - ChunkEncoding: ChunkEncodingGroupVarint, - Stacktraces: stacks, - StacktraceNodes: c.tree.len(), - StacktraceMaxDepth: 0, // TODO - StacktraceMaxNodes: c.partition.maxNodesPerChunk, - CRC: 0, // Set later. - } - crc := crc32.New(castagnoli) - if h.Size, err = c.WriteTo(io.MultiWriter(crc, w.stacktraces)); err != nil { - return fmt.Errorf("writing stacktrace chunk data: %w", err) - } - h.CRC = crc.Sum32() - partition.header.StacktraceChunks = append(partition.header.StacktraceChunks, h) + w.indexFile.meta(), + w.dataFile.meta(), } return nil } @@ -171,13 +80,13 @@ func (w *writer) createDir() error { func (w *writer) writeIndexFile() (err error) { // Write the index file only after all the files were flushed. - if w.indexWriter, err = w.newFile(IndexFileName); err != nil { + if w.indexFile, err = w.newFile(IndexFileName); err != nil { return err } defer func() { - err = multierror.New(err, w.indexWriter.Close()).Err() + err = multierror.New(err, w.indexFile.Close()).Err() }() - if _, err = w.index.WriteTo(w.indexWriter); err != nil { + if _, err = w.index.WriteTo(w.indexFile); err != nil { return fmt.Errorf("failed to write index file: %w", err) } return err @@ -265,122 +174,3 @@ func (w *writerOffset) Write(p []byte) (n int, err error) { w.offset += int64(n) return n, err } - -type parquetWriter[M schemav1.Models, P schemav1.Persister[M]] struct { - persister P - config ParquetConfig - - currentRowGroup uint32 - currentRows uint32 - rowsTotal uint64 - - buffer *parquet.Buffer - rowsBatch []parquet.Row - - writer *parquet.GenericWriter[P] - file *os.File - path string -} - -func (s *parquetWriter[M, P]) init(dir string, c ParquetConfig) (err error) { - s.config = c - s.path = filepath.Join(dir, s.persister.Name()+block.ParquetSuffix) - s.file, err = os.OpenFile(s.path, os.O_RDWR|os.O_CREATE|os.O_EXCL, 0o644) - if err != nil { - return err - } - s.rowsBatch = make([]parquet.Row, 0, 128) - s.buffer = parquet.NewBuffer(s.persister.Schema(), parquet.ColumnBufferCapacity(s.config.MaxBufferRowCount)) - s.writer = parquet.NewGenericWriter[P](s.file, s.persister.Schema(), - parquet.CreatedBy("github.com/grafana/pyroscope/", build.Version, build.Revision), - parquet.PageBufferSize(3*1024*1024), - ) - return nil -} - -func (s *parquetWriter[M, P]) readFrom(values []M) (ranges []RowRangeReference, err error) { - for len(values) > 0 { - var r RowRangeReference - if r, err = s.writeRows(values); err != nil { - return nil, err - } - ranges = append(ranges, r) - values = values[r.Rows:] - } - return ranges, nil -} - -func (s *parquetWriter[M, P]) writeRows(values []M) (r RowRangeReference, err error) { - r.RowGroup = s.currentRowGroup - r.Index = s.currentRows - if len(values) == 0 { - return r, nil - } - var n int - for len(values) > 0 && int(s.currentRows) < s.config.MaxBufferRowCount { - s.fillBatch(values) - if n, err = s.buffer.WriteRows(s.rowsBatch); err != nil { - return r, err - } - s.currentRows += uint32(n) - r.Rows += uint32(n) - values = values[n:] - } - if int(s.currentRows)+cap(s.rowsBatch) >= s.config.MaxBufferRowCount { - if err = s.flushBuffer(); err != nil { - return r, err - } - } - return r, nil -} - -func (s *parquetWriter[M, P]) fillBatch(values []M) int { - m := math.Min(len(values), cap(s.rowsBatch)) - s.rowsBatch = s.rowsBatch[:m] - for i := 0; i < m; i++ { - row := s.rowsBatch[i][:0] - s.rowsBatch[i] = s.persister.Deconstruct(row, 0, values[i]) - } - return m -} - -func (s *parquetWriter[M, P]) flushBuffer() error { - if _, err := s.writer.WriteRowGroup(s.buffer); err != nil { - return err - } - s.rowsTotal += uint64(s.buffer.NumRows()) - s.currentRowGroup++ - s.currentRows = 0 - s.buffer.Reset() - return nil -} - -func (s *parquetWriter[M, P]) meta() block.File { - f := block.File{ - // Note that the path is relative to the symdb root dir. - RelPath: filepath.Base(s.path), - Parquet: &block.ParquetFile{ - NumRows: s.rowsTotal, - }, - } - if f.Parquet.NumRows > 0 { - f.Parquet.NumRowGroups = uint64(s.currentRowGroup + 1) - } - if stat, err := os.Stat(s.path); err == nil { - f.SizeBytes = uint64(stat.Size()) - } - return f -} - -func (s *parquetWriter[M, P]) Close() error { - if err := s.flushBuffer(); err != nil { - return fmt.Errorf("flushing parquet buffer: %w", err) - } - if err := s.writer.Close(); err != nil { - return fmt.Errorf("closing parquet writer: %w", err) - } - if err := s.file.Close(); err != nil { - return fmt.Errorf("closing parquet file: %w", err) - } - return nil -} diff --git a/pkg/phlaredb/symdb/format.go b/pkg/phlaredb/symdb/format.go index 5ed441ed34..8c8c4d0edc 100644 --- a/pkg/phlaredb/symdb/format.go +++ b/pkg/phlaredb/symdb/format.go @@ -36,16 +36,16 @@ const ( DefaultDirName = "symbols" IndexFileName = "index.symdb" - StacktracesFileName = "stacktraces.symdb" + StacktracesFileName = "stacktraces.symdb" // Used in v1 and v2. + DataFileName = "data.symdb" // Added in v3. ) -const HeaderSize = int(unsafe.Sizeof(Header{})) - const ( _ = iota FormatV1 FormatV2 + FormatV3 unknownVersion ) @@ -96,6 +96,8 @@ type Header struct { Reserved [8]byte // Reserved for future use. } +const HeaderSize = int(unsafe.Sizeof(Header{})) + func (h *Header) MarshalBinary() ([]byte, error) { b := make([]byte, HeaderSize) copy(b[0:4], h.Magic[:]) @@ -171,11 +173,11 @@ type PartitionHeaders []*PartitionHeader type PartitionHeader struct { Partition uint64 - StacktraceChunks []StacktraceChunkHeader - Locations []RowRangeReference - Mappings []RowRangeReference - Functions []RowRangeReference - Strings []RowRangeReference + Stacktraces []StacktraceBlockHeader + Locations []SymbolsBlockReference + Mappings []SymbolsBlockReference + Functions []SymbolsBlockReference + Strings []SymbolsBlockReference } func (h *PartitionHeaders) Size() int64 { @@ -220,13 +222,13 @@ func (h *PartitionHeaders) Unmarshal(b []byte) error { func (h *PartitionHeaders) fromChunks(b []byte) error { s := len(b) - if s%stacktraceChunkHeaderSize > 0 { + if s%stacktraceBlockHeaderSize > 0 { return ErrInvalidSize } - chunks := make([]StacktraceChunkHeader, s/stacktraceChunkHeaderSize) + chunks := make([]StacktraceBlockHeader, s/stacktraceBlockHeaderSize) for i := range chunks { - off := i * stacktraceChunkHeaderSize - chunks[i].unmarshal(b[off : off+stacktraceChunkHeaderSize]) + off := i * stacktraceBlockHeaderSize + chunks[i].unmarshal(b[off : off+stacktraceBlockHeaderSize]) } var p *PartitionHeader for _, c := range chunks { @@ -234,60 +236,60 @@ func (h *PartitionHeaders) fromChunks(b []byte) error { p = &PartitionHeader{Partition: c.Partition} *h = append(*h, p) } - p.StacktraceChunks = append(p.StacktraceChunks, c) + p.Stacktraces = append(p.Stacktraces, c) } return nil } func (h *PartitionHeader) marshal(buf []byte) { binary.BigEndian.PutUint64(buf[0:8], h.Partition) - binary.BigEndian.PutUint32(buf[8:12], uint32(len(h.StacktraceChunks))) + binary.BigEndian.PutUint32(buf[8:12], uint32(len(h.Stacktraces))) binary.BigEndian.PutUint32(buf[12:16], uint32(len(h.Locations))) binary.BigEndian.PutUint32(buf[16:20], uint32(len(h.Mappings))) binary.BigEndian.PutUint32(buf[20:24], uint32(len(h.Functions))) binary.BigEndian.PutUint32(buf[24:28], uint32(len(h.Strings))) n := 28 - for i := range h.StacktraceChunks { - h.StacktraceChunks[i].marshal(buf[n:]) - n += stacktraceChunkHeaderSize + for i := range h.Stacktraces { + h.Stacktraces[i].marshal(buf[n:]) + n += stacktraceBlockHeaderSize } - n += marshalRowRangeReferences(buf[n:], h.Locations) - n += marshalRowRangeReferences(buf[n:], h.Mappings) - n += marshalRowRangeReferences(buf[n:], h.Functions) - marshalRowRangeReferences(buf[n:], h.Strings) + n += marshalSymbolsBlockReferences(buf[n:], h.Locations) + n += marshalSymbolsBlockReferences(buf[n:], h.Mappings) + n += marshalSymbolsBlockReferences(buf[n:], h.Functions) + marshalSymbolsBlockReferences(buf[n:], h.Strings) } func (h *PartitionHeader) unmarshal(buf []byte) (err error) { h.Partition = binary.BigEndian.Uint64(buf[0:8]) - h.StacktraceChunks = make([]StacktraceChunkHeader, int(binary.BigEndian.Uint32(buf[8:12]))) - h.Locations = make([]RowRangeReference, int(binary.BigEndian.Uint32(buf[12:16]))) - h.Mappings = make([]RowRangeReference, int(binary.BigEndian.Uint32(buf[16:20]))) - h.Functions = make([]RowRangeReference, int(binary.BigEndian.Uint32(buf[20:24]))) - h.Strings = make([]RowRangeReference, int(binary.BigEndian.Uint32(buf[24:28]))) + h.Stacktraces = make([]StacktraceBlockHeader, int(binary.BigEndian.Uint32(buf[8:12]))) + h.Locations = make([]SymbolsBlockReference, int(binary.BigEndian.Uint32(buf[12:16]))) + h.Mappings = make([]SymbolsBlockReference, int(binary.BigEndian.Uint32(buf[16:20]))) + h.Functions = make([]SymbolsBlockReference, int(binary.BigEndian.Uint32(buf[20:24]))) + h.Strings = make([]SymbolsBlockReference, int(binary.BigEndian.Uint32(buf[24:28]))) buf = buf[28:] - stacktracesSize := len(h.StacktraceChunks) * stacktraceChunkHeaderSize + stacktracesSize := len(h.Stacktraces) * stacktraceBlockHeaderSize if err = h.unmarshalStacktraceChunks(buf[:stacktracesSize]); err != nil { return err } buf = buf[stacktracesSize:] - locationsSize := len(h.Locations) * rowRangeReferenceSize - if err = h.unmarshalRowRangeReferences(h.Locations, buf[:locationsSize]); err != nil { + locationsSize := len(h.Locations) * symbolsBlockReferenceSize + if err = h.unmarshalSymbolsBlockReferences(h.Locations, buf[:locationsSize]); err != nil { return err } buf = buf[locationsSize:] - mappingsSize := len(h.Mappings) * rowRangeReferenceSize - if err = h.unmarshalRowRangeReferences(h.Mappings, buf[:mappingsSize]); err != nil { + mappingsSize := len(h.Mappings) * symbolsBlockReferenceSize + if err = h.unmarshalSymbolsBlockReferences(h.Mappings, buf[:mappingsSize]); err != nil { return err } buf = buf[mappingsSize:] - functionsSize := len(h.Functions) * rowRangeReferenceSize - if err = h.unmarshalRowRangeReferences(h.Functions, buf[:functionsSize]); err != nil { + functionsSize := len(h.Functions) * symbolsBlockReferenceSize + if err = h.unmarshalSymbolsBlockReferences(h.Functions, buf[:functionsSize]); err != nil { return err } buf = buf[functionsSize:] - stringsSize := len(h.Strings) * rowRangeReferenceSize - if err = h.unmarshalRowRangeReferences(h.Strings, buf[:stringsSize]); err != nil { + stringsSize := len(h.Strings) * symbolsBlockReferenceSize + if err = h.unmarshalSymbolsBlockReferences(h.Strings, buf[:stringsSize]); err != nil { return err } @@ -296,120 +298,84 @@ func (h *PartitionHeader) unmarshal(buf []byte) (err error) { func (h *PartitionHeader) Size() int64 { s := 28 - s += len(h.StacktraceChunks) * stacktraceChunkHeaderSize + s += len(h.Stacktraces) * stacktraceBlockHeaderSize r := len(h.Locations) + len(h.Mappings) + len(h.Functions) + len(h.Strings) - s += r * rowRangeReferenceSize + s += r * symbolsBlockReferenceSize return int64(s) } func (h *PartitionHeader) unmarshalStacktraceChunks(b []byte) error { s := len(b) - if s%stacktraceChunkHeaderSize > 0 { + if s%stacktraceBlockHeaderSize > 0 { return ErrInvalidSize } - for i := range h.StacktraceChunks { - off := i * stacktraceChunkHeaderSize - h.StacktraceChunks[i].unmarshal(b[off : off+stacktraceChunkHeaderSize]) + for i := range h.Stacktraces { + off := i * stacktraceBlockHeaderSize + h.Stacktraces[i].unmarshal(b[off : off+stacktraceBlockHeaderSize]) } return nil } -func (h *PartitionHeader) unmarshalRowRangeReferences(refs []RowRangeReference, b []byte) error { +func (h *PartitionHeader) unmarshalSymbolsBlockReferences(refs []SymbolsBlockReference, b []byte) error { s := len(b) - if s%rowRangeReferenceSize > 0 { + if s%symbolsBlockReferenceSize > 0 { return ErrInvalidSize } for i := range refs { - off := i * rowRangeReferenceSize - refs[i].unmarshal(b[off : off+rowRangeReferenceSize]) + off := i * symbolsBlockReferenceSize + refs[i].unmarshal(b[off : off+symbolsBlockReferenceSize]) } return nil } -func marshalRowRangeReferences(b []byte, refs []RowRangeReference) int { +func marshalSymbolsBlockReferences(b []byte, refs []SymbolsBlockReference) int { var off int for i := range refs { - refs[i].marshal(b[off : off+rowRangeReferenceSize]) - off += rowRangeReferenceSize + refs[i].marshal(b[off : off+symbolsBlockReferenceSize]) + off += symbolsBlockReferenceSize } return off } -const rowRangeReferenceSize = int(unsafe.Sizeof(RowRangeReference{})) - -type RowRangeReference struct { - RowGroup uint32 - Index uint32 - Rows uint32 +type SymbolsBlockReference struct { + Offset uint32 + Size uint32 + CRC uint32 } -func (r *RowRangeReference) marshal(b []byte) { - binary.BigEndian.PutUint32(b[0:4], r.RowGroup) - binary.BigEndian.PutUint32(b[4:8], r.Index) - binary.BigEndian.PutUint32(b[8:12], r.Rows) -} +const symbolsBlockReferenceSize = int(unsafe.Sizeof(SymbolsBlockReference{})) -func (r *RowRangeReference) unmarshal(b []byte) { - r.RowGroup = binary.BigEndian.Uint32(b[0:4]) - r.Index = binary.BigEndian.Uint32(b[4:8]) - r.Rows = binary.BigEndian.Uint32(b[8:12]) +func (r *SymbolsBlockReference) marshal(b []byte) { + binary.BigEndian.PutUint32(b[0:4], r.Offset) + binary.BigEndian.PutUint32(b[4:8], r.Size) + binary.BigEndian.PutUint32(b[8:12], r.CRC) } -const stacktraceChunkHeaderSize = int(unsafe.Sizeof(StacktraceChunkHeader{})) - -type StacktraceChunkHeader struct { - Offset int64 - Size int64 - - Partition uint64 - ChunkIndex uint16 - ChunkEncoding ChunkEncoding - _ [5]byte // Reserved. - - Stacktraces uint32 // Number of unique stack traces in the chunk. - StacktraceNodes uint32 // Number of nodes in the stacktrace tree. - StacktraceMaxDepth uint32 // Max stack trace depth in the tree. - StacktraceMaxNodes uint32 // Max number of nodes at the time of the chunk creation. - - _ [12]byte // Padding. 64 bytes per chunk header. - CRC uint32 // Checksum of the chunk data [Offset:Size). +func (r *SymbolsBlockReference) unmarshal(b []byte) { + r.Offset = binary.BigEndian.Uint32(b[0:4]) + r.Size = binary.BigEndian.Uint32(b[4:8]) + r.CRC = binary.BigEndian.Uint32(b[8:12]) } -type ChunkEncoding byte - -const ( - _ ChunkEncoding = iota - ChunkEncodingGroupVarint -) +func (r *SymbolsBlockReference) AsRowRange() RowRangeReference { + return RowRangeReference{ + RowGroup: r.Offset, + Index: r.Size, + Rows: r.CRC, + } +} -func (h *StacktraceChunkHeader) marshal(b []byte) { - binary.BigEndian.PutUint64(b[0:8], uint64(h.Offset)) - binary.BigEndian.PutUint64(b[8:16], uint64(h.Size)) - binary.BigEndian.PutUint64(b[16:24], h.Partition) - binary.BigEndian.PutUint16(b[24:26], h.ChunkIndex) - b[27] = byte(h.ChunkEncoding) - // 5 bytes reserved. - binary.BigEndian.PutUint32(b[32:36], h.Stacktraces) - binary.BigEndian.PutUint32(b[36:40], h.StacktraceNodes) - binary.BigEndian.PutUint32(b[40:44], h.StacktraceMaxDepth) - binary.BigEndian.PutUint32(b[44:48], h.StacktraceMaxNodes) - // 12 bytes reserved. - binary.BigEndian.PutUint32(b[60:64], h.CRC) +type RowRangeReference struct { + RowGroup uint32 + Index uint32 + Rows uint32 } -func (h *StacktraceChunkHeader) unmarshal(b []byte) { - h.Offset = int64(binary.BigEndian.Uint64(b[0:8])) - h.Size = int64(binary.BigEndian.Uint64(b[8:16])) - h.Partition = binary.BigEndian.Uint64(b[16:24]) - h.ChunkIndex = binary.BigEndian.Uint16(b[24:26]) - h.ChunkEncoding = ChunkEncoding(b[27]) - // 5 bytes reserved. - h.Stacktraces = binary.BigEndian.Uint32(b[32:36]) - h.StacktraceNodes = binary.BigEndian.Uint32(b[36:40]) - h.StacktraceMaxDepth = binary.BigEndian.Uint32(b[40:44]) - h.StacktraceMaxNodes = binary.BigEndian.Uint32(b[44:48]) - // 12 bytes reserved. - h.CRC = binary.BigEndian.Uint32(b[60:64]) +// SymbolsBlockReferencesAsRows re-interprets SymbolsBlockReference as +// RowRangeReference, that used to describe parquet table row ranges (v2). +// Both types have identical binary layouts but different semantics. +func SymbolsBlockReferencesAsRows(s []SymbolsBlockReference) []RowRangeReference { + return *(*[]RowRangeReference)(unsafe.Pointer(&s)) } func ReadIndexFile(b []byte) (f IndexFile, err error) { @@ -441,7 +407,7 @@ func ReadIndexFile(b []byte) (f IndexFile, err error) { return f, fmt.Errorf("unmarshal stacktraces: %w", err) } - case FormatV2: + case FormatV2, FormatV3: ph := f.TOC.Entries[tocEntryPartitionHeaders] if err = f.PartitionHeaders.Unmarshal(b[ph.Offset : ph.Offset+ph.Size]); err != nil { return f, fmt.Errorf("reading partition headers: %w", err) @@ -488,6 +454,64 @@ func (f *IndexFile) WriteTo(dst io.Writer) (n int64, err error) { return w.offset, nil } +type StacktraceBlockHeader struct { + Offset int64 + Size int64 + + Partition uint64 // Used in v1. + BlockIndex uint16 // Used in v1. + + Encoding ChunkEncoding + _ [5]byte // Reserved. + + Stacktraces uint32 // Number of unique stack traces in the chunk. + StacktraceNodes uint32 // Number of nodes in the stacktrace tree. + StacktraceMaxDepth uint32 // Max stack trace depth in the tree. + StacktraceMaxNodes uint32 // Max number of nodes at the time of the chunk creation. + + _ [12]byte // Padding. 64 bytes per chunk header. + CRC uint32 // Checksum of the chunk data [Offset:Size). +} + +const stacktraceBlockHeaderSize = int(unsafe.Sizeof(StacktraceBlockHeader{})) + +type ChunkEncoding byte + +const ( + _ ChunkEncoding = iota + StacktraceEncodingGroupVarint +) + +func (h *StacktraceBlockHeader) marshal(b []byte) { + binary.BigEndian.PutUint64(b[0:8], uint64(h.Offset)) + binary.BigEndian.PutUint64(b[8:16], uint64(h.Size)) + binary.BigEndian.PutUint64(b[16:24], h.Partition) + binary.BigEndian.PutUint16(b[24:26], h.BlockIndex) + b[27] = byte(h.Encoding) + // 5 bytes reserved. + binary.BigEndian.PutUint32(b[32:36], h.Stacktraces) + binary.BigEndian.PutUint32(b[36:40], h.StacktraceNodes) + binary.BigEndian.PutUint32(b[40:44], h.StacktraceMaxDepth) + binary.BigEndian.PutUint32(b[44:48], h.StacktraceMaxNodes) + // 12 bytes reserved. + binary.BigEndian.PutUint32(b[60:64], h.CRC) +} + +func (h *StacktraceBlockHeader) unmarshal(b []byte) { + h.Offset = int64(binary.BigEndian.Uint64(b[0:8])) + h.Size = int64(binary.BigEndian.Uint64(b[8:16])) + h.Partition = binary.BigEndian.Uint64(b[16:24]) + h.BlockIndex = binary.BigEndian.Uint16(b[24:26]) + h.Encoding = ChunkEncoding(b[27]) + // 5 bytes reserved. + h.Stacktraces = binary.BigEndian.Uint32(b[32:36]) + h.StacktraceNodes = binary.BigEndian.Uint32(b[36:40]) + h.StacktraceMaxDepth = binary.BigEndian.Uint32(b[40:44]) + h.StacktraceMaxNodes = binary.BigEndian.Uint32(b[44:48]) + // 12 bytes reserved. + h.CRC = binary.BigEndian.Uint32(b[60:64]) +} + // symbolic information such as locations, functions, mappings, // and strings is represented as Array of Structures in memory, // and is encoded as Structure of Arrays when written on disk. @@ -512,10 +536,7 @@ type symbolsBlockHeader struct { BlockSize uint32 } -const ( - defaultSymbolsBlockSize = 1 << 10 - symbolsBlockHeaderSize = int(unsafe.Sizeof(mappingsBlockHeader{})) -) +const symbolsBlockHeaderSize = int(unsafe.Sizeof(symbolsBlockHeader{})) func newSymbolsBlockHeader(n, bs int) symbolsBlockHeader { return symbolsBlockHeader{ @@ -541,10 +562,7 @@ func (h *symbolsBlockHeader) unmarshal(b []byte) { } func (h *symbolsBlockHeader) validate() error { - if h.Magic[0] != symdbMagic[0] || - h.Magic[1] != symdbMagic[1] || - h.Magic[2] != symdbMagic[2] || - h.Magic[3] != symdbMagic[3] { + if !bytes.Equal(h.Magic[:], symdbMagic[:]) { return ErrInvalidMagic } if h.Version >= 2 { @@ -586,6 +604,8 @@ type symbolsEncoder[T any] struct { buf []byte } +const defaultSymbolsBlockSize = 1 << 10 + func newSymbolsEncoder[T any](w io.Writer, e symbolsBlockEncoder[T]) *symbolsEncoder[T] { return &symbolsEncoder[T]{w: w, e: e, bs: defaultSymbolsBlockSize} } diff --git a/pkg/phlaredb/symdb/partition_memory.go b/pkg/phlaredb/symdb/partition_memory.go index ab3f7781de..e5a81b2740 100644 --- a/pkg/phlaredb/symdb/partition_memory.go +++ b/pkg/phlaredb/symdb/partition_memory.go @@ -56,7 +56,7 @@ type stacktracesPartition struct { m sync.RWMutex hashToIdx map[uint64]uint32 chunks []*stacktraceChunk - header []StacktraceChunkHeader + header []StacktraceBlockHeader } func newStacktracesPartition(maxNodesPerChunk uint32) *stacktracesPartition { @@ -74,7 +74,7 @@ func newStacktracesPartition(maxNodesPerChunk uint32) *stacktracesPartition { func (p *stacktracesPartition) size() uint64 { p.m.RLock() // TODO: map footprint isn't accounted - v := len(p.header) * stacktraceChunkHeaderSize + v := len(p.header) * stacktraceBlockHeaderSize for _, c := range p.chunks { v += stacktraceTreeNodeSize * cap(c.tree.nodes) } @@ -367,42 +367,62 @@ func (p *PartitionWriter) Release() { // Noop. Satisfies PartitionReader interface. } -func (p *PartitionWriter) write(w *fileWriter) (err error) { - senc := newSymbolsEncoder[string](w.w, new(stringsBlockEncoder)) - senc.Encode(p.strings.slice) - - menc := newSymbolsEncoder[schemav1.InMemoryMapping](w.w, new(mappingsBlockEncoder)) - menc.Encode(p.mappings.slice) - - fenc := newSymbolsEncoder[schemav1.InMemoryFunction](w.w, new(functionsBlockEncoder)) - fenc.Encode(p.functions.slice) - - lenc := newSymbolsEncoder[schemav1.InMemoryLocation](w.w, new(locationsBlockEncoder)) - lenc.Encode(p.locations.slice) - +func (p *PartitionWriter) writeTo(w *writer) (err error) { + if p.header.Strings, err = writeSymbolBlocks(w.dataFile, p.strings.slice, w.stringsEncoder); err != nil { + return err + } + if p.header.Mappings, err = writeSymbolBlocks(w.dataFile, p.mappings.slice, w.mappingsEncoder); err != nil { + return err + } + if p.header.Functions, err = writeSymbolBlocks(w.dataFile, p.functions.slice, w.functionsEncoder); err != nil { + return err + } + if p.header.Locations, err = writeSymbolBlocks(w.dataFile, p.locations.slice, w.locationsEncoder); err != nil { + return err + } for ci, c := range p.stacktraces.chunks { stacks := c.stacks if stacks == 0 { stacks = uint32(len(p.stacktraces.hashToIdx)) } - h := StacktraceChunkHeader{ - Offset: w.w.offset, - Size: 0, // Set later. + h := StacktraceBlockHeader{ + Offset: w.dataFile.w.offset, Partition: p.header.Partition, - ChunkIndex: uint16(ci), - ChunkEncoding: ChunkEncodingGroupVarint, + BlockIndex: uint16(ci), + Encoding: StacktraceEncodingGroupVarint, Stacktraces: stacks, StacktraceNodes: c.tree.len(), - StacktraceMaxDepth: 0, // TODO StacktraceMaxNodes: c.partition.maxNodesPerChunk, - CRC: 0, // Set later. } crc := crc32.New(castagnoli) - if h.Size, err = c.WriteTo(io.MultiWriter(crc, w.w)); err != nil { + if h.Size, err = c.WriteTo(io.MultiWriter(crc, w.dataFile)); err != nil { return fmt.Errorf("writing stacktrace chunk data: %w", err) } h.CRC = crc.Sum32() - p.header.StacktraceChunks = append(p.header.StacktraceChunks, h) + p.header.Stacktraces = append(p.header.Stacktraces, h) } return nil } + +func writeSymbolBlocks[T any](f *fileWriter, s []T, e *symbolsEncoder[T]) ([]SymbolsBlockReference, error) { + // TODO(kolesnikovae): Split into blocks (< 1M). + h, err := writeSymbolsBlock(f, func(w io.Writer) error { + e.Reset(w) + err := e.Encode(s) + e.Reset(nil) + return err + }) + return []SymbolsBlockReference{h}, err +} + +func writeSymbolsBlock(w *fileWriter, fn func(io.Writer) error) (h SymbolsBlockReference, err error) { + h.Offset = uint32(w.w.offset) + crc := crc32.New(castagnoli) + mw := io.MultiWriter(crc, w.w) + if err = fn(mw); err != nil { + return h, err + } + h.Size = uint32(w.w.offset) - h.Offset + h.CRC = crc.Sum32() + return h, nil +} From 8c105664fa90a131a975888bc3f2dec060a1ab56 Mon Sep 17 00:00:00 2001 From: Anton Kolesnikov Date: Mon, 22 Apr 2024 19:33:04 +0800 Subject: [PATCH 11/36] improve versioning --- Makefile | 2 +- pkg/phlaredb/symdb/block_reader.go | 366 ++++++++--------- pkg/phlaredb/symdb/block_reader_parquet.go | 163 ++++++++ pkg/phlaredb/symdb/block_reader_test.go | 1 + pkg/phlaredb/symdb/block_writer.go | 9 +- pkg/phlaredb/symdb/format.go | 434 ++++++++++----------- pkg/phlaredb/symdb/partition_memory.go | 29 +- pkg/phlaredb/symdb/strings_test.go | 20 +- pkg/phlaredb/symdb/symdb.go | 2 +- 9 files changed, 564 insertions(+), 462 deletions(-) create mode 100644 pkg/phlaredb/symdb/block_reader_parquet.go diff --git a/Makefile b/Makefile index fd69e72ce2..f7e01d8727 100644 --- a/Makefile +++ b/Makefile @@ -260,7 +260,7 @@ $(BIN)/buf: Makefile $(BIN)/golangci-lint: Makefile @mkdir -p $(@D) - GOBIN=$(abspath $(@D)) $(GO) install github.com/golangci/golangci-lint/cmd/golangci-lint@v1.54.0 + GOBIN=$(abspath $(@D)) $(GO) install github.com/golangci/golangci-lint/cmd/golangci-lint@v1.57.2 $(BIN)/protoc-gen-go: Makefile go.mod @mkdir -p $(@D) diff --git a/pkg/phlaredb/symdb/block_reader.go b/pkg/phlaredb/symdb/block_reader.go index 95e66e16f3..030ecb048a 100644 --- a/pkg/phlaredb/symdb/block_reader.go +++ b/pkg/phlaredb/symdb/block_reader.go @@ -3,7 +3,6 @@ package symdb import ( "bufio" "context" - "errors" "fmt" "hash/crc32" "io" @@ -14,11 +13,9 @@ import ( "github.com/grafana/dskit/multierror" "github.com/opentracing/opentracing-go" otlog "github.com/opentracing/opentracing-go/log" - "github.com/parquet-go/parquet-go" "golang.org/x/sync/errgroup" "github.com/grafana/pyroscope/pkg/objstore" - parquetobj "github.com/grafana/pyroscope/pkg/objstore/parquet" "github.com/grafana/pyroscope/pkg/phlaredb/block" schemav1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1" "github.com/grafana/pyroscope/pkg/util/refctr" @@ -29,44 +26,36 @@ type Reader struct { files map[string]block.File meta *block.Meta - chunkFetchBufferSize int + // TODO: fetch buffer pool + fetchBufferSize int index IndexFile partitions []*partition partitionsMap map[uint64]*partition - locations parquetobj.File - mappings parquetobj.File - functions parquetobj.File - strings parquetobj.File + parquetFiles *parquetFiles } -const defaultChunkFetchBufferSize = 4096 +const defaultFetchBufferSize = 4096 func Open(ctx context.Context, b objstore.BucketReader, m *block.Meta) (*Reader, error) { - r := Reader{ + r := &Reader{ bucket: b, meta: m, files: make(map[string]block.File), - chunkFetchBufferSize: defaultChunkFetchBufferSize, + fetchBufferSize: defaultFetchBufferSize, } - if err := r.open(ctx); err != nil { - return nil, err - } - return &r, nil -} - -func (r *Reader) open(ctx context.Context) (err error) { for _, f := range r.meta.Files { r.files[filepath.Base(f.RelPath)] = f } + var err error if err = r.openIndexFile(ctx); err != nil { - return fmt.Errorf("opening index file: %w", err) + return nil, fmt.Errorf("opening index file: %w", err) } if r.index.Header.Version == FormatV2 { - if err = r.openParquetFiles(ctx); err != nil { - return err + if err = openParquetFiles(ctx, r); err != nil { + return nil, err } } r.partitionsMap = make(map[uint64]*partition, len(r.index.PartitionHeaders)) @@ -76,6 +65,16 @@ func (r *Reader) open(ctx context.Context) (err error) { r.partitionsMap[h.Partition] = ph r.partitions[i] = ph } + return r, nil +} + +func (r *Reader) Close() error { + if r == nil { + return nil + } + if r.parquetFiles != nil { + return r.parquetFiles.Close() + } return nil } @@ -96,39 +95,6 @@ func (r *Reader) openIndexFile(ctx context.Context) error { return err } -const parquetReadBufferSize = 256 << 10 // 256KB - -func (r *Reader) openParquetFiles(ctx context.Context) error { - options := []parquet.FileOption{ - parquet.SkipBloomFilters(true), // we don't use bloom filters - parquet.FileReadMode(parquet.ReadModeAsync), - parquet.ReadBufferSize(parquetReadBufferSize), - } - - m := map[string]*parquetobj.File{ - new(schemav1.LocationPersister).Name() + block.ParquetSuffix: &r.locations, - new(schemav1.MappingPersister).Name() + block.ParquetSuffix: &r.mappings, - new(schemav1.FunctionPersister).Name() + block.ParquetSuffix: &r.functions, - new(schemav1.StringPersister).Name() + block.ParquetSuffix: &r.strings, - } - g, ctx := errgroup.WithContext(ctx) - for n, fp := range m { - n := n - fp := fp - g.Go(func() error { - fm, err := r.file(n) - if err != nil { - return err - } - if err = fp.Open(ctx, r.bucket, fm, options...); err != nil { - return fmt.Errorf("openning file %q: %w", n, err) - } - return nil - }) - } - return g.Wait() -} - func (r *Reader) file(name string) (block.File, error) { f, ok := r.files[name] if !ok { @@ -138,43 +104,15 @@ func (r *Reader) file(name string) (block.File, error) { } func (r *Reader) partitionReader(h *PartitionHeader) *partition { - p := &partition{ - reader: r, - locations: parquetTableRange[schemav1.InMemoryLocation, schemav1.LocationPersister]{ - bucket: r.bucket, - headers: SymbolsBlockReferencesAsRows(h.Locations), - file: &r.locations, - }, - mappings: parquetTableRange[schemav1.InMemoryMapping, schemav1.MappingPersister]{ - bucket: r.bucket, - headers: SymbolsBlockReferencesAsRows(h.Mappings), - file: &r.mappings, - }, - functions: parquetTableRange[schemav1.InMemoryFunction, schemav1.FunctionPersister]{ - bucket: r.bucket, - headers: SymbolsBlockReferencesAsRows(h.Functions), - file: &r.functions, - }, - strings: parquetTableRange[string, schemav1.StringPersister]{ - bucket: r.bucket, - headers: SymbolsBlockReferencesAsRows(h.Strings), - file: &r.strings, - }, - } - p.setStacktracesChunks(h.Stacktraces) - return p -} - -func (r *Reader) Close() error { - if r == nil { - return nil + p := &partition{reader: r} + if r.index.Header.Version == FormatV2 { + p.initParquetTables(h) + } + if r.index.Header.Version == FormatV3 { + p.initTables(h) } - return multierror.New( - r.locations.Close(), - r.mappings.Close(), - r.functions.Close(), - r.strings.Close()). - Err() + p.initStacktraces(h.Stacktraces) + return p } var ErrPartitionNotFound = fmt.Errorf("partition not found") @@ -192,7 +130,7 @@ func (r *Reader) partition(ctx context.Context, partition uint64) (*partition, e if !ok { return nil, ErrPartitionNotFound } - if err := p.init(ctx); err != nil { + if err := p.fetch(ctx); err != nil { return nil, err } return p, nil @@ -201,14 +139,19 @@ func (r *Reader) partition(ctx context.Context, partition uint64) (*partition, e type partition struct { reader *Reader - stacktraceChunks []*stacktraceChunkReader - locations parquetTableRange[schemav1.InMemoryLocation, schemav1.LocationPersister] - mappings parquetTableRange[schemav1.InMemoryMapping, schemav1.MappingPersister] - functions parquetTableRange[schemav1.InMemoryFunction, schemav1.FunctionPersister] - strings parquetTableRange[string, schemav1.StringPersister] + stacktraces []*stacktraceBlock + locations table[schemav1.InMemoryLocation] + mappings table[schemav1.InMemoryMapping] + functions table[schemav1.InMemoryFunction] + strings table[string] +} + +type table[T any] interface { + fetchable + slice() []T } -func (p *partition) init(ctx context.Context) (err error) { +func (p *partition) fetch(ctx context.Context) (err error) { return p.tx().fetch(ctx) } @@ -217,68 +160,115 @@ func (p *partition) Release() { } func (p *partition) tx() *fetchTx { - tx := make(fetchTx, 0, len(p.stacktraceChunks)+4) - for _, c := range p.stacktraceChunks { + tx := make(fetchTx, 0, len(p.stacktraces)+4) + for _, c := range p.stacktraces { tx.append(c) } if p.reader.index.Header.Version > FormatV1 { - tx.append(&p.locations) - tx.append(&p.mappings) - tx.append(&p.functions) - tx.append(&p.strings) + tx.append(p.locations) + tx.append(p.mappings) + tx.append(p.functions) + tx.append(p.strings) } return &tx } +func (p *partition) initParquetTables(h *PartitionHeader) { + p.locations = &parquetTable[schemav1.InMemoryLocation, schemav1.LocationPersister]{ + bucket: p.reader.bucket, + headers: h.V2.Locations, + file: &p.reader.parquetFiles.locations, + } + p.mappings = &parquetTable[schemav1.InMemoryMapping, schemav1.MappingPersister]{ + bucket: p.reader.bucket, + headers: h.V2.Mappings, + file: &p.reader.parquetFiles.mappings, + } + p.functions = &parquetTable[schemav1.InMemoryFunction, schemav1.FunctionPersister]{ + bucket: p.reader.bucket, + headers: h.V2.Functions, + file: &p.reader.parquetFiles.functions, + } + p.strings = &parquetTable[string, schemav1.StringPersister]{ + bucket: p.reader.bucket, + headers: h.V2.Strings, + file: &p.reader.parquetFiles.strings, + } +} + +func (p *partition) initTables(h *PartitionHeader) { + // TODO(kolesnikovae): decoder pool. + p.locations = &rawTable[schemav1.InMemoryLocation]{ + reader: p.reader, + header: h.V3.Locations, + dec: newSymbolsDecoder[schemav1.InMemoryLocation](h.V3.Locations, new(locationsBlockDecoder)), + } + p.mappings = &rawTable[schemav1.InMemoryMapping]{ + reader: p.reader, + header: h.V3.Mappings, + dec: newSymbolsDecoder[schemav1.InMemoryMapping](h.V3.Mappings, new(mappingsBlockDecoder)), + } + p.functions = &rawTable[schemav1.InMemoryFunction]{ + reader: p.reader, + header: h.V3.Functions, + dec: newSymbolsDecoder[schemav1.InMemoryFunction](h.V3.Functions, new(functionsBlockDecoder)), + } + p.strings = &rawTable[string]{ + reader: p.reader, + header: h.V3.Strings, + dec: newSymbolsDecoder[string](h.V3.Strings, new(stringsBlockDecoder)), + } +} + func (p *partition) Symbols() *Symbols { return &Symbols{ Stacktraces: p, - Locations: p.locations.s, - Mappings: p.mappings.s, - Functions: p.functions.s, - Strings: p.strings.s, + Locations: p.locations.slice(), + Mappings: p.mappings.slice(), + Functions: p.functions.slice(), + Strings: p.strings.slice(), } } func (p *partition) WriteStats(s *PartitionStats) { var nodes uint32 - for _, c := range p.stacktraceChunks { + for _, c := range p.stacktraces { s.StacktracesTotal += int(c.header.Stacktraces) nodes += c.header.StacktraceNodes } s.MaxStacktraceID = int(nodes) - s.LocationsTotal = len(p.locations.s) - s.MappingsTotal = len(p.mappings.s) - s.FunctionsTotal = len(p.functions.s) - s.StringsTotal = len(p.strings.s) + s.LocationsTotal = len(p.locations.slice()) + s.MappingsTotal = len(p.mappings.slice()) + s.FunctionsTotal = len(p.functions.slice()) + s.StringsTotal = len(p.strings.slice()) } var ErrInvalidStacktraceRange = fmt.Errorf("invalid range: stack traces can't be resolved") func (p *partition) LookupLocations(dst []uint64, stacktraceID uint32) []uint64 { dst = dst[:0] - if len(p.stacktraceChunks) == 0 { + if len(p.stacktraces) == 0 { return dst } - nodesPerChunk := p.stacktraceChunks[0].header.StacktraceMaxNodes + nodesPerChunk := p.stacktraces[0].header.StacktraceMaxNodes chunkID := stacktraceID / nodesPerChunk localSID := stacktraceID % nodesPerChunk - if localSID == 0 || int(chunkID) > len(p.stacktraceChunks) { + if localSID == 0 || int(chunkID) > len(p.stacktraces) { return dst } - return p.stacktraceChunks[chunkID].t.resolveUint64(dst, localSID) + return p.stacktraces[chunkID].t.resolveUint64(dst, localSID) } func (p *partition) ResolveStacktraceLocations(ctx context.Context, dst StacktraceInserter, s []uint32) (err error) { if len(s) == 0 { return nil } - if len(p.stacktraceChunks) == 0 { + if len(p.stacktraces) == 0 { return ErrInvalidStacktraceRange } // First, we determine the chunks needed for the range. // All chunks in a block must have the same StacktraceMaxNodes. - sr := SplitStacktraces(s, p.stacktraceChunks[0].header.StacktraceMaxNodes) + sr := SplitStacktraces(s, p.stacktraces[0].header.StacktraceMaxNodes) for _, c := range sr { if err = p.lookupStacktraces(ctx, dst, c).do(); err != nil { return err @@ -287,19 +277,19 @@ func (p *partition) ResolveStacktraceLocations(ctx context.Context, dst Stacktra return nil } -func (p *partition) setStacktracesChunks(chunks []StacktraceBlockHeader) { - p.stacktraceChunks = make([]*stacktraceChunkReader, len(chunks)) +func (p *partition) initStacktraces(chunks []StacktraceBlockHeader) { + p.stacktraces = make([]*stacktraceBlock, len(chunks)) for i, c := range chunks { - p.stacktraceChunks[i] = &stacktraceChunkReader{ + p.stacktraces[i] = &stacktraceBlock{ reader: p.reader, header: c, } } } -func (p *partition) stacktraceChunkReader(i uint32) *stacktraceChunkReader { - if int(i) < len(p.stacktraceChunks) { - return p.stacktraceChunks[i] +func (p *partition) stacktraceChunkReader(i uint32) *stacktraceBlock { + if int(i) < len(p.stacktraces) { + return p.stacktraces[i] } return nil } @@ -337,7 +327,7 @@ func (r *stacktracesLookup) do() error { return nil } -type stacktraceChunkReader struct { +type stacktraceBlock struct { reader *Reader header StacktraceBlockHeader @@ -345,8 +335,8 @@ type stacktraceChunkReader struct { t *parentPointerTree } -func (c *stacktraceChunkReader) fetch(ctx context.Context) error { - span, ctx := opentracing.StartSpanFromContext(ctx, "stacktraceChunkReader.fetch") +func (c *stacktraceBlock) fetch(ctx context.Context) error { + span, ctx := opentracing.StartSpanFromContext(ctx, "stacktraceBlock.fetch") span.LogFields( otlog.Int64("size", c.header.Size), otlog.Uint32("nodes", c.header.StacktraceNodes), @@ -354,7 +344,11 @@ func (c *stacktraceChunkReader) fetch(ctx context.Context) error { ) defer span.Finish() return c.r.Inc(func() error { - f, err := c.reader.file(StacktracesFileName) + filename := DataFileName + if c.reader.index.Header.Version < 3 { + filename = StacktracesFileName + } + f, err := c.reader.file(filename) if err != nil { return err } @@ -365,12 +359,11 @@ func (c *stacktraceChunkReader) fetch(ctx context.Context) error { defer func() { err = multierror.New(err, rc.Close()).Err() }() - // Consider pooling the buffer. - return c.readFrom(bufio.NewReaderSize(rc, c.reader.chunkFetchBufferSize)) + return c.readFrom(bufio.NewReaderSize(rc, c.reader.fetchBufferSize)) }) } -func (c *stacktraceChunkReader) readFrom(r io.Reader) error { +func (c *stacktraceBlock) readFrom(r *bufio.Reader) error { // NOTE(kolesnikovae): Pool of node chunks could reduce // the alloc size, but it may affect memory locality. // Although, properly aligned chunks of, say, 1-4K nodes @@ -393,94 +386,59 @@ func (c *stacktraceChunkReader) readFrom(r io.Reader) error { return nil } -func (c *stacktraceChunkReader) release() { +func (c *stacktraceBlock) release() { c.r.Dec(func() { c.t = nil }) } -type parquetTableRange[M schemav1.Models, P schemav1.Persister[M]] struct { - headers []RowRangeReference - bucket objstore.BucketReader - persister P - - file *parquetobj.File - - r refctr.Counter - s []M +type rawTable[T any] struct { + reader *Reader + header SymbolsBlockHeader + dec *symbolsDecoder[T] + r refctr.Counter + s []T } -// parquet.CopyRows uses hardcoded buffer size: -// defaultRowBufferSize = 42 -const inMemoryReaderRowsBufSize = 1 << 10 - -func (t *parquetTableRange[M, P]) fetch(ctx context.Context) (err error) { - span, _ := opentracing.StartSpanFromContext(ctx, "parquetTableRange.fetch", opentracing.Tags{ - "table_name": t.persister.Name(), - "row_groups": len(t.headers), - }) +func (t *rawTable[T]) fetch(ctx context.Context) error { + span, ctx := opentracing.StartSpanFromContext(ctx, "symbolsTable.fetch") + span.LogFields( + otlog.Uint32("size", t.header.Size), + otlog.Uint32("length", t.header.Length), + ) defer span.Finish() return t.r.Inc(func() error { - var s uint32 - for _, h := range t.headers { - s += h.Rows + f, err := t.reader.file(DataFileName) + if err != nil { + return err } - buf := make([]parquet.Row, inMemoryReaderRowsBufSize) - t.s = make([]M, s) - var offset int - // TODO(kolesnikovae): Row groups could be fetched in parallel. - rgs := t.file.RowGroups() - for _, h := range t.headers { - span.LogFields( - otlog.Uint32("row_group", h.RowGroup), - otlog.Uint32("index_row", h.Index), - otlog.Uint32("rows", h.Rows), - ) - rg := rgs[h.RowGroup] - rows := rg.Rows() - if err := rows.SeekToRow(int64(h.Index)); err != nil { - return err - } - dst := t.s[offset : offset+int(h.Rows)] - if err := t.readRows(dst, buf, rows); err != nil { - return fmt.Errorf("reading row group from parquet file %q: %w", t.file.Path(), err) - } - offset += int(h.Rows) + rc, err := t.reader.bucket.GetRange(ctx, f.RelPath, int64(t.header.Offset), int64(t.header.Size)) + if err != nil { + return err } - return nil + defer func() { + err = multierror.New(err, rc.Close()).Err() + }() + return t.readFrom(bufio.NewReaderSize(rc, t.reader.fetchBufferSize)) }) } -func (t *parquetTableRange[M, P]) readRows(dst []M, buf []parquet.Row, rows parquet.Rows) (err error) { - defer func() { - err = multierror.New(err, rows.Close()).Err() - }() - for i := 0; i < len(dst); { - n, err := rows.ReadRows(buf) - if n > 0 { - for _, row := range buf[:n] { - if i == len(dst) { - return nil - } - _, v, err := t.persister.Reconstruct(row) - if err != nil { - return err - } - dst[i] = v - i++ - } - } - if err != nil { - if errors.Is(err, io.EOF) { - return nil - } - return err - } +func (t *rawTable[T]) readFrom(r *bufio.Reader) error { + crc := crc32.New(castagnoli) + tee := io.TeeReader(r, crc) + t.s = make([]T, t.header.Length) + if err := t.dec.Decode(t.s, tee); err != nil { + return fmt.Errorf("failed to decode symbols: %w", err) + } + if t.header.CRC != crc.Sum32() { + return ErrInvalidCRC } return nil } -func (t *parquetTableRange[M, P]) release() { +func (t *rawTable[T]) slice() []T { return t.s } + +func (t *rawTable[T]) release() { t.r.Dec(func() { t.s = nil }) @@ -488,14 +446,14 @@ func (t *parquetTableRange[M, P]) release() { // fetchTx facilitates fetching multiple objects in a transactional manner: // if one of the objects has failed, all the remaining ones are released. -type fetchTx []fetch +type fetchTx []fetchable -type fetch interface { +type fetchable interface { fetch(context.Context) error release() } -func (tx *fetchTx) append(x fetch) { *tx = append(*tx, x) } +func (tx *fetchTx) append(x fetchable) { *tx = append(*tx, x) } func (tx *fetchTx) fetch(ctx context.Context) (err error) { defer func() { diff --git a/pkg/phlaredb/symdb/block_reader_parquet.go b/pkg/phlaredb/symdb/block_reader_parquet.go new file mode 100644 index 0000000000..1addb2764e --- /dev/null +++ b/pkg/phlaredb/symdb/block_reader_parquet.go @@ -0,0 +1,163 @@ +package symdb + +import ( + "context" + "errors" + "fmt" + "io" + + "github.com/grafana/dskit/multierror" + "github.com/opentracing/opentracing-go" + otlog "github.com/opentracing/opentracing-go/log" + "github.com/parquet-go/parquet-go" + "golang.org/x/sync/errgroup" + + "github.com/grafana/pyroscope/pkg/objstore" + parquetobj "github.com/grafana/pyroscope/pkg/objstore/parquet" + "github.com/grafana/pyroscope/pkg/phlaredb/block" + schemav1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1" + "github.com/grafana/pyroscope/pkg/util/refctr" +) + +type parquetTable[M schemav1.Models, P schemav1.Persister[M]] struct { + headers []RowRangeReference + bucket objstore.BucketReader + persister P + + file *parquetobj.File + + r refctr.Counter + s []M +} + +const ( + // parquet.CopyRows uses hardcoded buffer size: + // defaultRowBufferSize = 42 + inMemoryReaderRowsBufSize = 1 << 10 + parquetReadBufferSize = 256 << 10 // 256KB +) + +func (t *parquetTable[M, P]) fetch(ctx context.Context) (err error) { + span, _ := opentracing.StartSpanFromContext(ctx, "parquetTable.fetch", opentracing.Tags{ + "table_name": t.persister.Name(), + "row_groups": len(t.headers), + }) + defer span.Finish() + return t.r.Inc(func() error { + var s uint32 + for _, h := range t.headers { + s += h.Rows + } + buf := make([]parquet.Row, inMemoryReaderRowsBufSize) + t.s = make([]M, s) + var offset int + // TODO(kolesnikovae): Row groups could be fetched in parallel. + rgs := t.file.RowGroups() + for _, h := range t.headers { + span.LogFields( + otlog.Uint32("row_group", h.RowGroup), + otlog.Uint32("index_row", h.Index), + otlog.Uint32("rows", h.Rows), + ) + rg := rgs[h.RowGroup] + rows := rg.Rows() + if err := rows.SeekToRow(int64(h.Index)); err != nil { + return err + } + dst := t.s[offset : offset+int(h.Rows)] + if err := t.readRows(dst, buf, rows); err != nil { + return fmt.Errorf("reading row group from parquet file %q: %w", t.file.Path(), err) + } + offset += int(h.Rows) + } + return nil + }) +} + +func (t *parquetTable[M, P]) readRows(dst []M, buf []parquet.Row, rows parquet.Rows) (err error) { + defer func() { + err = multierror.New(err, rows.Close()).Err() + }() + for i := 0; i < len(dst); { + n, err := rows.ReadRows(buf) + if n > 0 { + for _, row := range buf[:n] { + if i == len(dst) { + return nil + } + _, v, err := t.persister.Reconstruct(row) + if err != nil { + return err + } + dst[i] = v + i++ + } + } + if err != nil { + if errors.Is(err, io.EOF) { + return nil + } + return err + } + } + return nil +} + +func (t *parquetTable[M, P]) slice() []M { return t.s } + +func (t *parquetTable[M, P]) release() { + t.r.Dec(func() { + t.s = nil + }) +} + +type parquetFiles struct { + locations parquetobj.File + mappings parquetobj.File + functions parquetobj.File + strings parquetobj.File +} + +func (f *parquetFiles) Close() error { + return multierror.New( + f.locations.Close(), + f.mappings.Close(), + f.functions.Close(), + f.strings.Close()). + Err() +} + +func openParquetFiles(ctx context.Context, r *Reader) error { + options := []parquet.FileOption{ + parquet.SkipBloomFilters(true), + parquet.FileReadMode(parquet.ReadModeAsync), + parquet.ReadBufferSize(parquetReadBufferSize), + } + files := new(parquetFiles) + m := map[string]*parquetobj.File{ + new(schemav1.LocationPersister).Name() + block.ParquetSuffix: &files.locations, + new(schemav1.MappingPersister).Name() + block.ParquetSuffix: &files.mappings, + new(schemav1.FunctionPersister).Name() + block.ParquetSuffix: &files.functions, + new(schemav1.StringPersister).Name() + block.ParquetSuffix: &files.strings, + } + g, ctx := errgroup.WithContext(ctx) + for n, fp := range m { + n := n + fp := fp + g.Go(func() error { + fm, err := r.file(n) + if err != nil { + return err + } + if err = fp.Open(ctx, r.bucket, fm, options...); err != nil { + return fmt.Errorf("openning file %q: %w", n, err) + } + return nil + }) + } + if err := g.Wait(); err != nil { + return err + } + r.parquetFiles = files + return nil +} diff --git a/pkg/phlaredb/symdb/block_reader_test.go b/pkg/phlaredb/symdb/block_reader_test.go index f4e5b2cdaf..fcc63ee9b2 100644 --- a/pkg/phlaredb/symdb/block_reader_test.go +++ b/pkg/phlaredb/symdb/block_reader_test.go @@ -16,6 +16,7 @@ var testBlockMeta = &block.Meta{ Files: []block.File{ {RelPath: IndexFileName}, {RelPath: StacktracesFileName}, + {RelPath: DataFileName}, {RelPath: "locations.parquet"}, {RelPath: "mappings.parquet"}, {RelPath: "functions.parquet"}, diff --git a/pkg/phlaredb/symdb/block_writer.go b/pkg/phlaredb/symdb/block_writer.go index 099f04da2f..35c8687d2e 100644 --- a/pkg/phlaredb/symdb/block_writer.go +++ b/pkg/phlaredb/symdb/block_writer.go @@ -37,10 +37,11 @@ func newWriter(c *Config) *writer { }, }, - stringsEncoder: newSymbolsEncoder[string](nil, new(stringsBlockEncoder)), - mappingsEncoder: newSymbolsEncoder[v1.InMemoryMapping](nil, new(mappingsBlockEncoder)), - functionsEncoder: newSymbolsEncoder[v1.InMemoryFunction](nil, new(functionsBlockEncoder)), - locationsEncoder: newSymbolsEncoder[v1.InMemoryLocation](nil, new(locationsBlockEncoder)), + // TODO(kolesnikovae): encoder pool. + stringsEncoder: newSymbolsEncoder[string](new(stringsBlockEncoder)), + mappingsEncoder: newSymbolsEncoder[v1.InMemoryMapping](new(mappingsBlockEncoder)), + functionsEncoder: newSymbolsEncoder[v1.InMemoryFunction](new(functionsBlockEncoder)), + locationsEncoder: newSymbolsEncoder[v1.InMemoryLocation](new(locationsBlockEncoder)), } } diff --git a/pkg/phlaredb/symdb/format.go b/pkg/phlaredb/symdb/format.go index 8c8c4d0edc..d71b2fa642 100644 --- a/pkg/phlaredb/symdb/format.go +++ b/pkg/phlaredb/symdb/format.go @@ -171,13 +171,10 @@ func (h *TOCEntry) unmarshal(b []byte) { type PartitionHeaders []*PartitionHeader type PartitionHeader struct { - Partition uint64 - + Partition uint64 Stacktraces []StacktraceBlockHeader - Locations []SymbolsBlockReference - Mappings []SymbolsBlockReference - Functions []SymbolsBlockReference - Strings []SymbolsBlockReference + V2 *PartitionHeaderV2 + V3 *PartitionHeaderV3 } func (h *PartitionHeaders) Size() int64 { @@ -194,33 +191,17 @@ func (h *PartitionHeaders) WriteTo(dst io.Writer) (_ int64, err error) { binary.BigEndian.PutUint32(buf, uint32(len(*h))) w.write(buf) for _, p := range *h { - s := p.Size() - if int(s) > cap(buf) { - buf = make([]byte, s) + if p.V3 == nil { + return 0, fmt.Errorf("v2 format is not supported") } - buf = buf[:s] + buf = slices.GrowLen(buf, int(p.Size())) p.marshal(buf) w.write(buf) } return w.offset, w.err } -func (h *PartitionHeaders) Unmarshal(b []byte) error { - partitions := binary.BigEndian.Uint32(b[0:4]) - b = b[4:] - *h = make(PartitionHeaders, partitions) - for i := range *h { - var p PartitionHeader - if err := p.unmarshal(b); err != nil { - return err - } - b = b[p.Size():] - (*h)[i] = &p - } - return nil -} - -func (h *PartitionHeaders) fromChunks(b []byte) error { +func (h *PartitionHeaders) UnmarshalV1(b []byte) error { s := len(b) if s%stacktraceBlockHeaderSize > 0 { return ErrInvalidSize @@ -241,70 +222,109 @@ func (h *PartitionHeaders) fromChunks(b []byte) error { return nil } +func (h *PartitionHeaders) UnmarshalV2(b []byte) error { return h.unmarshal(b, FormatV2) } + +func (h *PartitionHeaders) UnmarshalV3(b []byte) error { return h.unmarshal(b, FormatV3) } + +func (h *PartitionHeaders) unmarshal(b []byte, version int) error { + partitions := binary.BigEndian.Uint32(b[0:4]) + b = b[4:] + *h = make(PartitionHeaders, partitions) + for i := range *h { + var p PartitionHeader + if err := p.unmarshal(b, version); err != nil { + return err + } + b = b[p.Size():] + (*h)[i] = &p + } + return nil +} + func (h *PartitionHeader) marshal(buf []byte) { binary.BigEndian.PutUint64(buf[0:8], h.Partition) binary.BigEndian.PutUint32(buf[8:12], uint32(len(h.Stacktraces))) - binary.BigEndian.PutUint32(buf[12:16], uint32(len(h.Locations))) - binary.BigEndian.PutUint32(buf[16:20], uint32(len(h.Mappings))) - binary.BigEndian.PutUint32(buf[20:24], uint32(len(h.Functions))) - binary.BigEndian.PutUint32(buf[24:28], uint32(len(h.Strings))) - n := 28 + n := 12 for i := range h.Stacktraces { h.Stacktraces[i].marshal(buf[n:]) n += stacktraceBlockHeaderSize } - n += marshalSymbolsBlockReferences(buf[n:], h.Locations) - n += marshalSymbolsBlockReferences(buf[n:], h.Mappings) - n += marshalSymbolsBlockReferences(buf[n:], h.Functions) - marshalSymbolsBlockReferences(buf[n:], h.Strings) + n += marshalSymbolsBlockReferences(buf[n:], h.V3.Locations) + n += marshalSymbolsBlockReferences(buf[n:], h.V3.Mappings) + n += marshalSymbolsBlockReferences(buf[n:], h.V3.Functions) + marshalSymbolsBlockReferences(buf[n:], h.V3.Strings) } -func (h *PartitionHeader) unmarshal(buf []byte) (err error) { +func (h *PartitionHeader) unmarshal(buf []byte, version int) (err error) { h.Partition = binary.BigEndian.Uint64(buf[0:8]) h.Stacktraces = make([]StacktraceBlockHeader, int(binary.BigEndian.Uint32(buf[8:12]))) - h.Locations = make([]SymbolsBlockReference, int(binary.BigEndian.Uint32(buf[12:16]))) - h.Mappings = make([]SymbolsBlockReference, int(binary.BigEndian.Uint32(buf[16:20]))) - h.Functions = make([]SymbolsBlockReference, int(binary.BigEndian.Uint32(buf[20:24]))) - h.Strings = make([]SymbolsBlockReference, int(binary.BigEndian.Uint32(buf[24:28]))) - - buf = buf[28:] - stacktracesSize := len(h.Stacktraces) * stacktraceBlockHeaderSize - if err = h.unmarshalStacktraceChunks(buf[:stacktracesSize]); err != nil { - return err - } - buf = buf[stacktracesSize:] - locationsSize := len(h.Locations) * symbolsBlockReferenceSize - if err = h.unmarshalSymbolsBlockReferences(h.Locations, buf[:locationsSize]); err != nil { - return err - } - buf = buf[locationsSize:] - mappingsSize := len(h.Mappings) * symbolsBlockReferenceSize - if err = h.unmarshalSymbolsBlockReferences(h.Mappings, buf[:mappingsSize]); err != nil { - return err - } - buf = buf[mappingsSize:] - functionsSize := len(h.Functions) * symbolsBlockReferenceSize - if err = h.unmarshalSymbolsBlockReferences(h.Functions, buf[:functionsSize]); err != nil { - return err - } - buf = buf[functionsSize:] - stringsSize := len(h.Strings) * symbolsBlockReferenceSize - if err = h.unmarshalSymbolsBlockReferences(h.Strings, buf[:stringsSize]); err != nil { - return err + switch version { + case FormatV2: + h.V2 = new(PartitionHeaderV2) + h.V2.Locations = make([]RowRangeReference, int(binary.BigEndian.Uint32(buf[12:16]))) + h.V2.Mappings = make([]RowRangeReference, int(binary.BigEndian.Uint32(buf[16:20]))) + h.V2.Functions = make([]RowRangeReference, int(binary.BigEndian.Uint32(buf[20:24]))) + h.V2.Strings = make([]RowRangeReference, int(binary.BigEndian.Uint32(buf[24:28]))) + buf = buf[28:] + stacktracesSize := len(h.Stacktraces) * stacktraceBlockHeaderSize + if err = h.unmarshalStacktraceBlockHeaders(buf[:stacktracesSize]); err != nil { + return err + } + err = h.V2.unmarshal(buf[stacktracesSize:]) + case FormatV3: + buf = buf[12:] + stacktracesSize := len(h.Stacktraces) * stacktraceBlockHeaderSize + if err = h.unmarshalStacktraceBlockHeaders(buf[:stacktracesSize]); err != nil { + return err + } + h.V3 = new(PartitionHeaderV3) + err = h.V3.unmarshal(buf[stacktracesSize:]) + default: + return fmt.Errorf("bug: unsupported version: %d", version) } - - return nil + // TODO(kolesnikovae): Validate headers. + return err } func (h *PartitionHeader) Size() int64 { - s := 28 + s := 12 // Partition 8b + number of stacktrace blocks. s += len(h.Stacktraces) * stacktraceBlockHeaderSize - r := len(h.Locations) + len(h.Mappings) + len(h.Functions) + len(h.Strings) - s += r * symbolsBlockReferenceSize + if h.V3 != nil { + s += h.V3.size() + } + if h.V2 != nil { + s += h.V2.size() + } return int64(s) } -func (h *PartitionHeader) unmarshalStacktraceChunks(b []byte) error { +type PartitionHeaderV3 struct { + Locations SymbolsBlockHeader + Mappings SymbolsBlockHeader + Functions SymbolsBlockHeader + Strings SymbolsBlockHeader +} + +const partitionHeaderV3Size = int(unsafe.Sizeof(PartitionHeaderV3{})) + +func (h *PartitionHeaderV3) size() int { return partitionHeaderV3Size } + +func (h *PartitionHeaderV3) unmarshal(buf []byte) (err error) { + s := len(buf) + if s%symbolsBlockReferenceSize > 0 { + return ErrInvalidSize + } + h.Locations.unmarshal(buf[:symbolsBlockReferenceSize]) + buf = buf[symbolsBlockReferenceSize:] + h.Mappings.unmarshal(buf[:symbolsBlockReferenceSize]) + buf = buf[symbolsBlockReferenceSize:] + h.Functions.unmarshal(buf[:symbolsBlockReferenceSize]) + buf = buf[symbolsBlockReferenceSize:] + h.Strings.unmarshal(buf[:symbolsBlockReferenceSize]) + return nil +} + +func (h *PartitionHeader) unmarshalStacktraceBlockHeaders(b []byte) error { s := len(b) if s%stacktraceBlockHeaderSize > 0 { return ErrInvalidSize @@ -316,19 +336,42 @@ func (h *PartitionHeader) unmarshalStacktraceChunks(b []byte) error { return nil } -func (h *PartitionHeader) unmarshalSymbolsBlockReferences(refs []SymbolsBlockReference, b []byte) error { - s := len(b) - if s%symbolsBlockReferenceSize > 0 { - return ErrInvalidSize - } - for i := range refs { - off := i * symbolsBlockReferenceSize - refs[i].unmarshal(b[off : off+symbolsBlockReferenceSize]) - } - return nil +// SymbolsBlockHeader describes a collection of elements encoded in a +// content-specific way: symbolic information such as locations, functions, +// mappings, and strings is represented as Array of Structures in memory, +// and is encoded as Structure of Arrays when written on disk. +type SymbolsBlockHeader struct { + // Offset in the data file. + Offset uint64 + // Size of the section. + Size uint32 + // Checksum of the section. + CRC uint32 + // Length denotes the total number of items encoded. + Length uint32 + // BlockSize denotes the number of items per block. + BlockSize uint32 +} + +const symbolsBlockReferenceSize = int(unsafe.Sizeof(SymbolsBlockHeader{})) + +func (h *SymbolsBlockHeader) marshal(b []byte) { + binary.BigEndian.PutUint64(b[0:8], h.Offset) + binary.BigEndian.PutUint32(b[8:12], h.Size) + binary.BigEndian.PutUint32(b[12:16], h.CRC) + binary.BigEndian.PutUint32(b[16:20], h.Length) + binary.BigEndian.PutUint32(b[20:24], h.BlockSize) } -func marshalSymbolsBlockReferences(b []byte, refs []SymbolsBlockReference) int { +func (h *SymbolsBlockHeader) unmarshal(b []byte) { + h.Offset = binary.BigEndian.Uint64(b[0:8]) + h.Size = binary.BigEndian.Uint32(b[8:12]) + h.CRC = binary.BigEndian.Uint32(b[12:16]) + h.Length = binary.BigEndian.Uint32(b[16:20]) + h.BlockSize = binary.BigEndian.Uint32(b[20:24]) +} + +func marshalSymbolsBlockReferences(b []byte, refs ...SymbolsBlockHeader) int { var off int for i := range refs { refs[i].marshal(b[off : off+symbolsBlockReferenceSize]) @@ -337,45 +380,72 @@ func marshalSymbolsBlockReferences(b []byte, refs []SymbolsBlockReference) int { return off } -type SymbolsBlockReference struct { - Offset uint32 - Size uint32 - CRC uint32 +type PartitionHeaderV2 struct { + Locations []RowRangeReference + Mappings []RowRangeReference + Functions []RowRangeReference + Strings []RowRangeReference } -const symbolsBlockReferenceSize = int(unsafe.Sizeof(SymbolsBlockReference{})) - -func (r *SymbolsBlockReference) marshal(b []byte) { - binary.BigEndian.PutUint32(b[0:4], r.Offset) - binary.BigEndian.PutUint32(b[4:8], r.Size) - binary.BigEndian.PutUint32(b[8:12], r.CRC) +func (h *PartitionHeaderV2) size() int { + s := 16 // Length of row ranges per type. + r := len(h.Locations) + len(h.Mappings) + len(h.Functions) + len(h.Strings) + return s + rowRangeReferenceSize*r } -func (r *SymbolsBlockReference) unmarshal(b []byte) { - r.Offset = binary.BigEndian.Uint32(b[0:4]) - r.Size = binary.BigEndian.Uint32(b[4:8]) - r.CRC = binary.BigEndian.Uint32(b[8:12]) +func (h *PartitionHeaderV2) unmarshal(buf []byte) (err error) { + locationsSize := len(h.Locations) * rowRangeReferenceSize + if err = h.unmarshalRowRangeReferences(h.Locations, buf[:locationsSize]); err != nil { + return err + } + buf = buf[locationsSize:] + mappingsSize := len(h.Mappings) * rowRangeReferenceSize + if err = h.unmarshalRowRangeReferences(h.Mappings, buf[:mappingsSize]); err != nil { + return err + } + buf = buf[mappingsSize:] + functionsSize := len(h.Functions) * rowRangeReferenceSize + if err = h.unmarshalRowRangeReferences(h.Functions, buf[:functionsSize]); err != nil { + return err + } + buf = buf[functionsSize:] + stringsSize := len(h.Strings) * rowRangeReferenceSize + if err = h.unmarshalRowRangeReferences(h.Strings, buf[:stringsSize]); err != nil { + return err + } + return nil } -func (r *SymbolsBlockReference) AsRowRange() RowRangeReference { - return RowRangeReference{ - RowGroup: r.Offset, - Index: r.Size, - Rows: r.CRC, +func (h *PartitionHeaderV2) unmarshalRowRangeReferences(refs []RowRangeReference, b []byte) error { + s := len(b) + if s%rowRangeReferenceSize > 0 { + return ErrInvalidSize + } + for i := range refs { + off := i * rowRangeReferenceSize + refs[i].unmarshal(b[off : off+rowRangeReferenceSize]) } + return nil } +const rowRangeReferenceSize = int(unsafe.Sizeof(RowRangeReference{})) + type RowRangeReference struct { RowGroup uint32 Index uint32 Rows uint32 } -// SymbolsBlockReferencesAsRows re-interprets SymbolsBlockReference as -// RowRangeReference, that used to describe parquet table row ranges (v2). -// Both types have identical binary layouts but different semantics. -func SymbolsBlockReferencesAsRows(s []SymbolsBlockReference) []RowRangeReference { - return *(*[]RowRangeReference)(unsafe.Pointer(&s)) +func (r *RowRangeReference) marshal(b []byte) { + binary.BigEndian.PutUint32(b[0:4], r.RowGroup) + binary.BigEndian.PutUint32(b[4:8], r.Index) + binary.BigEndian.PutUint32(b[8:12], r.Rows) +} + +func (r *RowRangeReference) unmarshal(b []byte) { + r.RowGroup = binary.BigEndian.Uint32(b[0:4]) + r.Index = binary.BigEndian.Uint32(b[4:8]) + r.Rows = binary.BigEndian.Uint32(b[8:12]) } func ReadIndexFile(b []byte) (f IndexFile, err error) { @@ -394,22 +464,28 @@ func ReadIndexFile(b []byte) (f IndexFile, err error) { return f, fmt.Errorf("unmarshal table of contents: %w", err) } + // TODO: validate TOC + // Version-specific data section. switch f.Header.Version { default: - // Must never happen: the version is verified - // when the file header is read. - panic("bug: invalid version") + return f, fmt.Errorf("bug: unsupported version: %d", f.Header.Version) case FormatV1: sch := f.TOC.Entries[tocEntryStacktraceChunkHeaders] - if err = f.PartitionHeaders.fromChunks(b[sch.Offset : sch.Offset+sch.Size]); err != nil { + if err = f.PartitionHeaders.UnmarshalV1(b[sch.Offset : sch.Offset+sch.Size]); err != nil { return f, fmt.Errorf("unmarshal stacktraces: %w", err) } - case FormatV2, FormatV3: + case FormatV2: ph := f.TOC.Entries[tocEntryPartitionHeaders] - if err = f.PartitionHeaders.Unmarshal(b[ph.Offset : ph.Offset+ph.Size]); err != nil { + if err = f.PartitionHeaders.UnmarshalV2(b[ph.Offset : ph.Offset+ph.Size]); err != nil { + return f, fmt.Errorf("reading partition headers: %w", err) + } + + case FormatV3: + ph := f.TOC.Entries[tocEntryPartitionHeaders] + if err = f.PartitionHeaders.UnmarshalV3(b[ph.Offset : ph.Offset+ph.Size]); err != nil { return f, fmt.Errorf("reading partition headers: %w", err) } } @@ -512,152 +588,60 @@ func (h *StacktraceBlockHeader) unmarshal(b []byte) { h.CRC = binary.BigEndian.Uint32(b[60:64]) } -// symbolic information such as locations, functions, mappings, -// and strings is represented as Array of Structures in memory, -// and is encoded as Structure of Arrays when written on disk. -// -// The common structure of the encoded symbolic data is as follows: -// -// [Header] -// [Data encoded in blocks] -// [CRC32] -// -// Where the block format depends on the contents. -// -// Note that the data is decoded in a stream fashion, therefore -// any error in the data will be detected only after all the blocks -// are read in and decoded. -type symbolsBlockHeader struct { - Magic [4]byte - Version uint32 - // Length denotes the total number of items encoded. - Length uint32 - // BlockSize denotes the number of items per block. - BlockSize uint32 -} - -const symbolsBlockHeaderSize = int(unsafe.Sizeof(symbolsBlockHeader{})) - -func newSymbolsBlockHeader(n, bs int) symbolsBlockHeader { - return symbolsBlockHeader{ - Magic: symdbMagic, - Version: 1, - Length: uint32(n), - BlockSize: uint32(bs), - } -} - -func (h *symbolsBlockHeader) marshal(b []byte) { - b[0], b[1], b[2], b[3] = h.Magic[0], h.Magic[1], h.Magic[2], h.Magic[3] - binary.BigEndian.PutUint32(b[4:8], h.Version) - binary.BigEndian.PutUint32(b[8:12], h.Length) - binary.BigEndian.PutUint32(b[12:16], h.BlockSize) -} - -func (h *symbolsBlockHeader) unmarshal(b []byte) { - h.Magic[0], h.Magic[1], h.Magic[2], h.Magic[3] = b[0], b[1], b[2], b[3] - h.Version = binary.BigEndian.Uint32(b[4:8]) - h.Length = binary.BigEndian.Uint32(b[8:12]) - h.BlockSize = binary.BigEndian.Uint32(b[12:16]) -} - -func (h *symbolsBlockHeader) validate() error { - if !bytes.Equal(h.Magic[:], symdbMagic[:]) { - return ErrInvalidMagic - } - if h.Version >= 2 { - return ErrUnknownVersion - } - if h.Length >= 1<<20 && h.BlockSize >= 1<<20 { - return ErrInvalidSize - } - return nil -} - -func writeSymbolsBlockHeader(w io.Writer, buf []byte, h symbolsBlockHeader) ([]byte, error) { - if err := h.validate(); err != nil { - return buf, err - } - buf = slices.GrowLen(buf, symbolsBlockHeaderSize) - h.marshal(buf) - _, err := w.Write(buf) - return buf, err -} - -func readSymbolsBlockHeader(r io.Reader, buf []byte, h *symbolsBlockHeader) ([]byte, error) { - buf = slices.GrowLen(buf, symbolsBlockHeaderSize) - if _, err := io.ReadFull(r, buf); err != nil { - return buf, err - } - h.unmarshal(buf) - return buf, h.validate() -} - type symbolsBlockEncoder[T any] interface { encode(w io.Writer, block []T) error } type symbolsEncoder[T any] struct { - w io.Writer - e symbolsBlockEncoder[T] - bs int - buf []byte + e symbolsBlockEncoder[T] + bs int } const defaultSymbolsBlockSize = 1 << 10 -func newSymbolsEncoder[T any](w io.Writer, e symbolsBlockEncoder[T]) *symbolsEncoder[T] { - return &symbolsEncoder[T]{w: w, e: e, bs: defaultSymbolsBlockSize} +func newSymbolsEncoder[T any](e symbolsBlockEncoder[T]) *symbolsEncoder[T] { + return &symbolsEncoder[T]{e: e, bs: defaultSymbolsBlockSize} } -func (e *symbolsEncoder[T]) Encode(items []T) (err error) { - h := newSymbolsBlockHeader(len(items), e.bs) - if e.buf, err = writeSymbolsBlockHeader(e.w, e.buf, h); err != nil { - return err - } - for i := uint32(0); i < h.Length; i += h.BlockSize { - block := items[i:math.Min(i+h.BlockSize, h.Length)] - if err = e.e.encode(e.w, block); err != nil { +func (e *symbolsEncoder[T]) Encode(w io.Writer, items []T) (err error) { + l := len(items) + for i := 0; i < l; i += e.bs { + block := items[i:math.Min(i+e.bs, l)] + if err = e.e.encode(w, block); err != nil { return err } } return nil } -func (e *symbolsEncoder[T]) Reset(w io.Writer) { e.w = w } - type symbolsBlockDecoder[T any] interface { decode(r io.Reader, block []T) error } type symbolsDecoder[T any] struct { - r io.Reader - h symbolsBlockHeader + h SymbolsBlockHeader d symbolsBlockDecoder[T] - - buf []byte } -func newSymbolsDecoder[T any](r io.Reader, d symbolsBlockDecoder[T]) *symbolsDecoder[T] { - return &symbolsDecoder[T]{r: r, d: d} +func newSymbolsDecoder[T any](h SymbolsBlockHeader, d symbolsBlockDecoder[T]) *symbolsDecoder[T] { + return &symbolsDecoder[T]{h: h, d: d} } -func (d *symbolsDecoder[T]) Open() (n int, err error) { - d.buf, err = readSymbolsBlockHeader(d.r, d.buf, &d.h) - return int(d.h.Length), err -} - -func (d *symbolsDecoder[T]) Decode(items []T) error { +func (d *symbolsDecoder[T]) Decode(dst []T, r io.Reader) error { + if d.h.BlockSize == 0 || d.h.Length == 0 { + return nil + } + if len(dst) < int(d.h.Length) { + return fmt.Errorf("%w: buffer too short", ErrInvalidSize) + } blocks := int((d.h.Length + d.h.BlockSize - 1) / d.h.BlockSize) for i := 0; i < blocks; i++ { lo := i * int(d.h.BlockSize) hi := math.Min(lo+int(d.h.BlockSize), int(d.h.Length)) - block := items[lo:hi] - if err := d.d.decode(d.r, block); err != nil { + block := dst[lo:hi] + if err := d.d.decode(r, block); err != nil { return err } } return nil } - -func (d *symbolsDecoder[T]) Reset(r io.Reader) { d.r = r } diff --git a/pkg/phlaredb/symdb/partition_memory.go b/pkg/phlaredb/symdb/partition_memory.go index e5a81b2740..2ba82073a0 100644 --- a/pkg/phlaredb/symdb/partition_memory.go +++ b/pkg/phlaredb/symdb/partition_memory.go @@ -368,16 +368,16 @@ func (p *PartitionWriter) Release() { } func (p *PartitionWriter) writeTo(w *writer) (err error) { - if p.header.Strings, err = writeSymbolBlocks(w.dataFile, p.strings.slice, w.stringsEncoder); err != nil { + if p.header.V3.Strings, err = writeSymbolsBlock(w.dataFile, p.strings.slice, w.stringsEncoder); err != nil { return err } - if p.header.Mappings, err = writeSymbolBlocks(w.dataFile, p.mappings.slice, w.mappingsEncoder); err != nil { + if p.header.V3.Mappings, err = writeSymbolsBlock(w.dataFile, p.mappings.slice, w.mappingsEncoder); err != nil { return err } - if p.header.Functions, err = writeSymbolBlocks(w.dataFile, p.functions.slice, w.functionsEncoder); err != nil { + if p.header.V3.Functions, err = writeSymbolsBlock(w.dataFile, p.functions.slice, w.functionsEncoder); err != nil { return err } - if p.header.Locations, err = writeSymbolBlocks(w.dataFile, p.locations.slice, w.locationsEncoder); err != nil { + if p.header.V3.Locations, err = writeSymbolsBlock(w.dataFile, p.locations.slice, w.locationsEncoder); err != nil { return err } for ci, c := range p.stacktraces.chunks { @@ -404,25 +404,16 @@ func (p *PartitionWriter) writeTo(w *writer) (err error) { return nil } -func writeSymbolBlocks[T any](f *fileWriter, s []T, e *symbolsEncoder[T]) ([]SymbolsBlockReference, error) { - // TODO(kolesnikovae): Split into blocks (< 1M). - h, err := writeSymbolsBlock(f, func(w io.Writer) error { - e.Reset(w) - err := e.Encode(s) - e.Reset(nil) - return err - }) - return []SymbolsBlockReference{h}, err -} - -func writeSymbolsBlock(w *fileWriter, fn func(io.Writer) error) (h SymbolsBlockReference, err error) { - h.Offset = uint32(w.w.offset) +func writeSymbolsBlock[T any](w *fileWriter, s []T, e *symbolsEncoder[T]) (h SymbolsBlockHeader, err error) { + h.Offset = uint64(w.w.offset) crc := crc32.New(castagnoli) mw := io.MultiWriter(crc, w.w) - if err = fn(mw); err != nil { + if err = e.Encode(mw, s); err != nil { return h, err } - h.Size = uint32(w.w.offset) - h.Offset + h.Size = uint32(w.w.offset) - uint32(h.Offset) h.CRC = crc.Sum32() + h.Length = uint32(len(s)) + h.BlockSize = uint32(e.bs) return h, nil } diff --git a/pkg/phlaredb/symdb/strings_test.go b/pkg/phlaredb/symdb/strings_test.go index 5b391957c8..49c306c408 100644 --- a/pkg/phlaredb/symdb/strings_test.go +++ b/pkg/phlaredb/symdb/strings_test.go @@ -77,17 +77,21 @@ func Test_StringsEncoding(t *testing.T) { for _, tc := range testCases { tc := tc t.Run(tc.description, func(t *testing.T) { - var output bytes.Buffer - e := newSymbolsEncoder[string](&output, new(stringsBlockEncoder)) + var buf bytes.Buffer + e := newSymbolsEncoder[string](new(stringsBlockEncoder)) if tc.blockSize > 0 { e.bs = tc.blockSize } - require.NoError(t, e.Encode(tc.strings)) - d := newSymbolsDecoder[string](&output, new(stringsBlockDecoder)) - n, err := d.Open() - require.NoError(t, err) - out := make([]string, n) - require.NoError(t, d.Decode(out)) + require.NoError(t, e.Encode(&buf, tc.strings)) + + h := SymbolsBlockHeader{ + Length: uint32(len(tc.strings)), + BlockSize: uint32(e.bs), + } + d := newSymbolsDecoder[string](h, new(stringsBlockDecoder)) + + out := make([]string, h.Length) + require.NoError(t, d.Decode(out, &buf)) require.Equal(t, tc.strings, out) }) } diff --git a/pkg/phlaredb/symdb/symdb.go b/pkg/phlaredb/symdb/symdb.go index 56ba25e59b..588296c5a2 100644 --- a/pkg/phlaredb/symdb/symdb.go +++ b/pkg/phlaredb/symdb/symdb.go @@ -165,7 +165,7 @@ func (s *SymDB) PartitionWriter(partition uint64) *PartitionWriter { func (s *SymDB) newPartition(partition uint64) *PartitionWriter { p := PartitionWriter{ - header: PartitionHeader{Partition: partition}, + header: PartitionHeader{Partition: partition, V3: new(PartitionHeaderV3)}, stacktraces: newStacktracesPartition(s.config.Stacktraces.MaxNodesPerChunk), } p.strings.init() From fe4b87d169ada9f6db2fc6c3b38d3b3953140fb2 Mon Sep 17 00:00:00 2001 From: Anton Kolesnikov Date: Tue, 23 Apr 2024 14:16:17 +0800 Subject: [PATCH 12/36] add header crc checks --- pkg/phlaredb/symdb/block_reader_test.go | 45 +++++++++++++++++++++++ pkg/phlaredb/symdb/format.go | 6 ++-- pkg/phlaredb/symdb/functions.go | 34 +++++++++--------- pkg/phlaredb/symdb/locations.go | 48 ++++++++++++------------- pkg/phlaredb/symdb/mappings.go | 45 +++++++++++------------ pkg/phlaredb/symdb/strings.go | 43 +++++++++------------- 6 files changed, 123 insertions(+), 98 deletions(-) diff --git a/pkg/phlaredb/symdb/block_reader_test.go b/pkg/phlaredb/symdb/block_reader_test.go index fcc63ee9b2..f4374489b3 100644 --- a/pkg/phlaredb/symdb/block_reader_test.go +++ b/pkg/phlaredb/symdb/block_reader_test.go @@ -2,6 +2,7 @@ package symdb import ( "context" + "os" "testing" "github.com/stretchr/testify/mock" @@ -24,6 +25,50 @@ var testBlockMeta = &block.Meta{ }, } +func Test_write_block_fixture(t *testing.T) { + t.Skip() + b := newBlockSuite(t, [][]string{ + {"testdata/profile.pb.gz"}, + {"testdata/profile.pb.gz"}, + }) + const fixtureDir = "testdata/symbols/v3" + require.NoError(t, os.RemoveAll(fixtureDir)) + require.NoError(t, os.Rename(b.config.Dir, fixtureDir)) +} + +func Test_Reader_Open_v3(t *testing.T) { + // The block contains two partitions (0 and 1), each partition + // stores symbols of the testdata/profile.pb.gz profile + b, err := filesystem.NewBucket("testdata/symbols/v3") + require.NoError(t, err) + x, err := Open(context.Background(), b, testBlockMeta) + require.NoError(t, err) + + r := NewResolver(context.Background(), x) + defer r.Release() + r.AddSamples(0, schemav1.Samples{ + StacktraceIDs: []uint32{1, 2, 3, 4, 5}, + Values: []uint64{1, 1, 1, 1, 1}, + }) + r.AddSamples(1, schemav1.Samples{ + StacktraceIDs: []uint32{1, 2, 3, 4, 5}, + Values: []uint64{1, 1, 1, 1, 1}, + }) + + resolved, err := r.Tree() + require.NoError(t, err) + expected := `. +├── github.com/pyroscope-io/pyroscope/pkg/agent.(*ProfileSession).takeSnapshots: self 2 total 8 +│ └── github.com/pyroscope-io/pyroscope/pkg/agent/gospy.(*GoSpy).Snapshot: self 2 total 6 +│ └── github.com/pyroscope-io/pyroscope/pkg/convert.ParsePprof: self 0 total 4 +│ └── io/ioutil.ReadAll: self 2 total 4 +│ └── io.ReadAll: self 2 total 2 +└── net/http.(*conn).serve: self 2 total 2 +` + + require.Equal(t, expected, resolved.String()) +} + func Test_Reader_Open_v2(t *testing.T) { // The block contains two partitions (0 and 1), each partition // stores symbols of the testdata/profile.pb.gz profile diff --git a/pkg/phlaredb/symdb/format.go b/pkg/phlaredb/symdb/format.go index d71b2fa642..0164c77ba0 100644 --- a/pkg/phlaredb/symdb/format.go +++ b/pkg/phlaredb/symdb/format.go @@ -310,8 +310,7 @@ const partitionHeaderV3Size = int(unsafe.Sizeof(PartitionHeaderV3{})) func (h *PartitionHeaderV3) size() int { return partitionHeaderV3Size } func (h *PartitionHeaderV3) unmarshal(buf []byte) (err error) { - s := len(buf) - if s%symbolsBlockReferenceSize > 0 { + if len(buf) < symbolsBlockReferenceSize { return ErrInvalidSize } h.Locations.unmarshal(buf[:symbolsBlockReferenceSize]) @@ -614,8 +613,9 @@ func (e *symbolsEncoder[T]) Encode(w io.Writer, items []T) (err error) { return nil } +// TODO: args order type symbolsBlockDecoder[T any] interface { - decode(r io.Reader, block []T) error + decode(r io.Reader, dst []T) error } type symbolsDecoder[T any] struct { diff --git a/pkg/phlaredb/symdb/functions.go b/pkg/phlaredb/symdb/functions.go index d4f5833f1e..ef2de67237 100644 --- a/pkg/phlaredb/symdb/functions.go +++ b/pkg/phlaredb/symdb/functions.go @@ -4,6 +4,7 @@ import ( "bytes" "encoding/binary" "fmt" + "hash/crc32" "io" "unsafe" @@ -26,29 +27,26 @@ type functionsBlockHeader struct { SystemNameSize uint32 FileNameSize uint32 StartLineSize uint32 + CRC uint32 } func (h *functionsBlockHeader) marshal(b []byte) { - binary.LittleEndian.PutUint32(b[0:4], h.FunctionsLen) - binary.LittleEndian.PutUint32(b[4:8], h.NameSize) - binary.LittleEndian.PutUint32(b[8:12], h.SystemNameSize) - binary.LittleEndian.PutUint32(b[12:16], h.FileNameSize) - binary.LittleEndian.PutUint32(b[16:20], h.StartLineSize) + binary.BigEndian.PutUint32(b[0:4], h.FunctionsLen) + binary.BigEndian.PutUint32(b[4:8], h.NameSize) + binary.BigEndian.PutUint32(b[8:12], h.SystemNameSize) + binary.BigEndian.PutUint32(b[12:16], h.FileNameSize) + binary.BigEndian.PutUint32(b[16:20], h.StartLineSize) + h.CRC = crc32.Checksum(b[0:20], castagnoli) + binary.BigEndian.PutUint32(b[20:24], h.CRC) } func (h *functionsBlockHeader) unmarshal(b []byte) { - h.FunctionsLen = binary.LittleEndian.Uint32(b[0:4]) - h.NameSize = binary.LittleEndian.Uint32(b[4:8]) - h.SystemNameSize = binary.LittleEndian.Uint32(b[8:12]) - h.FileNameSize = binary.LittleEndian.Uint32(b[12:16]) - h.StartLineSize = binary.LittleEndian.Uint32(b[16:20]) -} - -// isValid reports whether the header contains sane values. -// This is important as the block might be read before the -// checksum validation. -func (h *functionsBlockHeader) isValid() bool { - return h.FunctionsLen < 1<<20 + h.FunctionsLen = binary.BigEndian.Uint32(b[0:4]) + h.NameSize = binary.BigEndian.Uint32(b[4:8]) + h.SystemNameSize = binary.BigEndian.Uint32(b[8:12]) + h.FileNameSize = binary.BigEndian.Uint32(b[12:16]) + h.StartLineSize = binary.BigEndian.Uint32(b[16:20]) + h.CRC = binary.BigEndian.Uint32(b[20:24]) } type functionsBlockEncoder struct { @@ -126,7 +124,7 @@ func (d *functionsBlockDecoder) readHeader(r io.Reader) error { return nil } d.header.unmarshal(d.tmp) - if !d.header.isValid() { + if crc32.Checksum(d.tmp[:functionsBlockHeaderSize-4], castagnoli) != d.header.CRC { return ErrInvalidSize } return nil diff --git a/pkg/phlaredb/symdb/locations.go b/pkg/phlaredb/symdb/locations.go index 6d5d1ab8f1..f65576f4e7 100644 --- a/pkg/phlaredb/symdb/locations.go +++ b/pkg/phlaredb/symdb/locations.go @@ -4,6 +4,7 @@ import ( "bytes" "encoding/binary" "fmt" + "hash/crc32" "io" "unsafe" @@ -31,33 +32,28 @@ type locationsBlockHeader struct { // Optional, might be empty. AddrSize uint32 // Size of the encoded slice of addresses IsFoldedSize uint32 // Size of the encoded slice of is_folded -} - -func (h *locationsBlockHeader) isValid() bool { - return h.LocationsLen > 0 && h.LocationsLen < 1<<20 && - h.MappingSize > 0 && h.MappingSize < 1<<20 && - h.LinesLen > 0 && h.LinesLen < 1<<20 && - h.LinesSize > 0 && h.LinesSize < 1<<20 && - h.AddrSize < 1<<20 && - h.IsFoldedSize < 1<<20 + CRC uint32 // Header CRC. } func (h *locationsBlockHeader) marshal(b []byte) { - binary.LittleEndian.PutUint32(b[0:4], h.LocationsLen) - binary.LittleEndian.PutUint32(b[4:8], h.MappingSize) - binary.LittleEndian.PutUint32(b[8:12], h.LinesLen) - binary.LittleEndian.PutUint32(b[12:16], h.LinesSize) - binary.LittleEndian.PutUint32(b[16:20], h.AddrSize) - binary.LittleEndian.PutUint32(b[20:24], h.IsFoldedSize) + binary.BigEndian.PutUint32(b[0:4], h.LocationsLen) + binary.BigEndian.PutUint32(b[4:8], h.MappingSize) + binary.BigEndian.PutUint32(b[8:12], h.LinesLen) + binary.BigEndian.PutUint32(b[12:16], h.LinesSize) + binary.BigEndian.PutUint32(b[16:20], h.AddrSize) + binary.BigEndian.PutUint32(b[20:24], h.IsFoldedSize) + h.CRC = crc32.Checksum(b[0:24], castagnoli) + binary.BigEndian.PutUint32(b[24:28], h.CRC) } func (h *locationsBlockHeader) unmarshal(b []byte) { - h.LocationsLen = binary.LittleEndian.Uint32(b[0:4]) - h.MappingSize = binary.LittleEndian.Uint32(b[4:8]) - h.LinesLen = binary.LittleEndian.Uint32(b[8:12]) - h.LinesSize = binary.LittleEndian.Uint32(b[12:16]) - h.AddrSize = binary.LittleEndian.Uint32(b[16:20]) - h.IsFoldedSize = binary.LittleEndian.Uint32(b[20:24]) + h.LocationsLen = binary.BigEndian.Uint32(b[0:4]) + h.MappingSize = binary.BigEndian.Uint32(b[4:8]) + h.LinesLen = binary.BigEndian.Uint32(b[8:12]) + h.LinesSize = binary.BigEndian.Uint32(b[12:16]) + h.AddrSize = binary.BigEndian.Uint32(b[16:20]) + h.IsFoldedSize = binary.BigEndian.Uint32(b[20:24]) + h.CRC = binary.BigEndian.Uint32(b[24:28]) } type locationsBlockEncoder struct { @@ -169,11 +165,11 @@ type locationsBlockDecoder struct { func (d *locationsBlockDecoder) readHeader(r io.Reader) error { d.tmp = slices.GrowLen(d.tmp, locationsBlockHeaderSize) if _, err := io.ReadFull(r, d.tmp); err != nil { - return nil + return err } d.header.unmarshal(d.tmp) - if !d.header.isValid() { - return ErrInvalidSize + if crc32.Checksum(d.tmp[:locationsBlockHeaderSize-4], castagnoli) != d.header.CRC { + return ErrInvalidCRC } return nil } @@ -182,8 +178,8 @@ func (d *locationsBlockDecoder) decode(r io.Reader, locations []v1.InMemoryLocat if err = d.readHeader(r); err != nil { return err } - if d.header.LocationsLen > uint32(len(locations)) { - return fmt.Errorf("locations buffer is too short") + if d.header.LocationsLen != uint32(len(locations)) { + return fmt.Errorf("locations buffer: %w", ErrInvalidSize) } var enc delta.BinaryPackedEncoding diff --git a/pkg/phlaredb/symdb/mappings.go b/pkg/phlaredb/symdb/mappings.go index d4ce07c531..fbbac30f05 100644 --- a/pkg/phlaredb/symdb/mappings.go +++ b/pkg/phlaredb/symdb/mappings.go @@ -4,6 +4,7 @@ import ( "bytes" "encoding/binary" "fmt" + "hash/crc32" "io" "unsafe" @@ -29,33 +30,30 @@ type mappingsBlockHeader struct { MemoryStartSize uint32 MemoryLimitSize uint32 FileOffsetSize uint32 + CRC uint32 } func (h *mappingsBlockHeader) marshal(b []byte) { - binary.LittleEndian.PutUint32(b[0:4], h.MappingsLen) - binary.LittleEndian.PutUint32(b[4:8], h.FileNameSize) - binary.LittleEndian.PutUint32(b[8:12], h.BuildIDSize) - binary.LittleEndian.PutUint32(b[12:16], h.FlagsSize) - binary.LittleEndian.PutUint32(b[16:20], h.MemoryStartSize) - binary.LittleEndian.PutUint32(b[20:24], h.MemoryLimitSize) - binary.LittleEndian.PutUint32(b[24:28], h.FileOffsetSize) + binary.BigEndian.PutUint32(b[0:4], h.MappingsLen) + binary.BigEndian.PutUint32(b[4:8], h.FileNameSize) + binary.BigEndian.PutUint32(b[8:12], h.BuildIDSize) + binary.BigEndian.PutUint32(b[12:16], h.FlagsSize) + binary.BigEndian.PutUint32(b[16:20], h.MemoryStartSize) + binary.BigEndian.PutUint32(b[20:24], h.MemoryLimitSize) + binary.BigEndian.PutUint32(b[24:28], h.FileOffsetSize) + h.CRC = crc32.Checksum(b[0:28], castagnoli) + binary.BigEndian.PutUint32(b[28:32], h.CRC) } func (h *mappingsBlockHeader) unmarshal(b []byte) { - h.MappingsLen = binary.LittleEndian.Uint32(b[0:4]) - h.FileNameSize = binary.LittleEndian.Uint32(b[4:8]) - h.BuildIDSize = binary.LittleEndian.Uint32(b[8:12]) - h.FlagsSize = binary.LittleEndian.Uint32(b[12:16]) - h.MemoryStartSize = binary.LittleEndian.Uint32(b[16:20]) - h.MemoryLimitSize = binary.LittleEndian.Uint32(b[20:24]) - h.FileOffsetSize = binary.LittleEndian.Uint32(b[24:28]) -} - -// isValid reports whether the header contains sane values. -// This is important as the block might be read before the -// checksum validation. -func (h *mappingsBlockHeader) isValid() bool { - return h.MappingsLen < 1<<20 + h.MappingsLen = binary.BigEndian.Uint32(b[0:4]) + h.FileNameSize = binary.BigEndian.Uint32(b[4:8]) + h.BuildIDSize = binary.BigEndian.Uint32(b[8:12]) + h.FlagsSize = binary.BigEndian.Uint32(b[12:16]) + h.MemoryStartSize = binary.BigEndian.Uint32(b[16:20]) + h.MemoryLimitSize = binary.BigEndian.Uint32(b[20:24]) + h.FileOffsetSize = binary.BigEndian.Uint32(b[24:28]) + h.CRC = binary.BigEndian.Uint32(b[28:32]) } type mappingsBlockEncoder struct { @@ -175,10 +173,9 @@ func (d *mappingsBlockDecoder) readHeader(r io.Reader) error { return nil } d.header.unmarshal(d.tmp) - if !d.header.isValid() { - return ErrInvalidSize + if crc32.Checksum(d.tmp[:mappingsBlockHeaderSize-4], castagnoli) != d.header.CRC { + return ErrInvalidCRC } - // TODO: Scale tmp return nil } diff --git a/pkg/phlaredb/symdb/strings.go b/pkg/phlaredb/symdb/strings.go index a66fc2ae83..b488284646 100644 --- a/pkg/phlaredb/symdb/strings.go +++ b/pkg/phlaredb/symdb/strings.go @@ -27,19 +27,15 @@ type stringsBlockHeader struct { } func (h *stringsBlockHeader) marshal(b []byte) { - binary.LittleEndian.PutUint32(b[0:4], h.StringsLen) + binary.BigEndian.PutUint32(b[0:4], h.StringsLen) b[5] = h.BlockEncoding } func (h *stringsBlockHeader) unmarshal(b []byte) { - h.StringsLen = binary.LittleEndian.Uint32(b[0:4]) + h.StringsLen = binary.BigEndian.Uint32(b[0:4]) h.BlockEncoding = b[5] } -func (h *stringsBlockHeader) isValid() bool { - return h.StringsLen < 1<<20 && h.BlockEncoding == 8 || h.BlockEncoding == 16 -} - type stringsBlockEncoder struct { header stringsBlockHeader buf bytes.Buffer @@ -56,9 +52,7 @@ func (e *stringsBlockEncoder) encode(w io.Writer, strings []string) error { } case 16: for j, s := range strings { - // binary.LittleEndian.PutUint16. // TODO: BigEndian - e.tmp[j*2] = byte(len(s)) - e.tmp[j*2+1] = byte(len(s) >> 8) + binary.BigEndian.PutUint16(e.tmp[j*2:], uint16(len(s))) } } if _, err := e.buf.Write(e.tmp[:len(strings)*int(e.header.BlockEncoding)/8]); err != nil { @@ -108,39 +102,34 @@ type stringsBlockDecoder struct { func (d *stringsBlockDecoder) readHeader(r io.Reader) error { d.tmp = slices.GrowLen(d.tmp, stringsBlockHeaderSize) if _, err := io.ReadFull(r, d.tmp); err != nil { - return nil + return err } d.header.unmarshal(d.tmp) - if !d.header.isValid() { - return ErrInvalidSize + if d.header.BlockEncoding == 8 || d.header.BlockEncoding == 16 { + return nil } - return nil + return fmt.Errorf("invalid string block encoding: %d", d.header.BlockEncoding) } func (d *stringsBlockDecoder) decode(r io.Reader, strings []string) (err error) { if err = d.readHeader(r); err != nil { return err } - if d.header.StringsLen < uint32(len(strings)) { - return fmt.Errorf("strings buffer is too short") + if d.header.StringsLen != uint32(len(strings)) { + return fmt.Errorf("invalid string buffer size") } - switch d.header.BlockEncoding { - case 8: + if d.header.BlockEncoding == 8 { return d.decodeStrings8(r, strings) - case 16: - return d.decodeStrings16(r, strings) - default: - // Header validation ensures this never happens. } - return nil + return d.decodeStrings16(r, strings) } func (d *stringsBlockDecoder) decodeStrings8(r io.Reader, dst []string) (err error) { - d.tmp = slices.GrowLen(d.tmp, int(d.header.StringsLen)) // 1 byte per string. + d.tmp = slices.GrowLen(d.tmp, len(dst)) // 1 byte per string. if _, err = io.ReadFull(r, d.tmp); err != nil { return err } - for i := uint32(0); i < d.header.StringsLen; i++ { + for i := 0; i < len(dst); i++ { s := make([]byte, d.tmp[i]) if _, err = io.ReadFull(r, s); err != nil { return err @@ -151,12 +140,12 @@ func (d *stringsBlockDecoder) decodeStrings8(r io.Reader, dst []string) (err err } func (d *stringsBlockDecoder) decodeStrings16(r io.Reader, dst []string) (err error) { - d.tmp = slices.GrowLen(d.tmp, int(d.header.StringsLen)*2) // 2 bytes per string. + d.tmp = slices.GrowLen(d.tmp, len(dst)*2) // 2 bytes per string. if _, err = io.ReadFull(r, d.tmp); err != nil { return err } - for i := uint32(0); i < d.header.StringsLen; i++ { - l := uint16(d.tmp[i*2]) | uint16(d.tmp[i*2+1])<<8 + for i := 0; i < len(dst); i++ { + l := binary.BigEndian.Uint16(d.tmp[i*2:]) s := make([]byte, l) if _, err = io.ReadFull(r, s); err != nil { return err From 59cee243b889e88259ae2971e05143533cae2f64 Mon Sep 17 00:00:00 2001 From: Anton Kolesnikov Date: Tue, 23 Apr 2024 14:48:47 +0800 Subject: [PATCH 13/36] add ReadIndexFile fuzzy test --- pkg/phlaredb/symdb/block_reader_test.go | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/pkg/phlaredb/symdb/block_reader_test.go b/pkg/phlaredb/symdb/block_reader_test.go index f4374489b3..32ae659019 100644 --- a/pkg/phlaredb/symdb/block_reader_test.go +++ b/pkg/phlaredb/symdb/block_reader_test.go @@ -36,6 +36,22 @@ func Test_write_block_fixture(t *testing.T) { require.NoError(t, os.Rename(b.config.Dir, fixtureDir)) } +func Fuzz_ReadIndexFile(f *testing.F) { + files := []string{ + "testdata/symbols/v3/index.symdb", + "testdata/symbols/v2/index.symdb", + "testdata/symbols/v1/index.symdb", + } + for _, path := range files { + data, err := os.ReadFile(path) + require.NoError(f, err) + f.Add(data) + } + f.Fuzz(func(_ *testing.T, b []byte) { + _, _ = ReadIndexFile(b) + }) +} + func Test_Reader_Open_v3(t *testing.T) { // The block contains two partitions (0 and 1), each partition // stores symbols of the testdata/profile.pb.gz profile From b48faa75db1ac357e648b0f88272e7a8ca00d94f Mon Sep 17 00:00:00 2001 From: Anton Kolesnikov Date: Tue, 23 Apr 2024 15:18:57 +0800 Subject: [PATCH 14/36] add fetch buffer pool --- pkg/phlaredb/symdb/block_reader.go | 34 +++++++++++++++++------ pkg/phlaredb/symdb/block_reader_test.go | 16 +++++++++++ pkg/phlaredb/symdb/resolver_pprof_test.go | 4 +-- 3 files changed, 42 insertions(+), 12 deletions(-) diff --git a/pkg/phlaredb/symdb/block_reader.go b/pkg/phlaredb/symdb/block_reader.go index 030ecb048a..4de209f969 100644 --- a/pkg/phlaredb/symdb/block_reader.go +++ b/pkg/phlaredb/symdb/block_reader.go @@ -26,9 +26,6 @@ type Reader struct { files map[string]block.File meta *block.Meta - // TODO: fetch buffer pool - fetchBufferSize int - index IndexFile partitions []*partition partitionsMap map[uint64]*partition @@ -36,15 +33,11 @@ type Reader struct { parquetFiles *parquetFiles } -const defaultFetchBufferSize = 4096 - func Open(ctx context.Context, b objstore.BucketReader, m *block.Meta) (*Reader, error) { r := &Reader{ bucket: b, meta: m, files: make(map[string]block.File), - - fetchBufferSize: defaultFetchBufferSize, } for _, f := range r.meta.Files { r.files[filepath.Base(f.RelPath)] = f @@ -356,10 +349,12 @@ func (c *stacktraceBlock) fetch(ctx context.Context) error { if err != nil { return err } + r := getFetchBufReader(rc) defer func() { + putFetchBufReader(r) err = multierror.New(err, rc.Close()).Err() }() - return c.readFrom(bufio.NewReaderSize(rc, c.reader.fetchBufferSize)) + return c.readFrom(r) }) } @@ -416,10 +411,12 @@ func (t *rawTable[T]) fetch(ctx context.Context) error { if err != nil { return err } + r := getFetchBufReader(rc) defer func() { + putFetchBufReader(r) err = multierror.New(err, rc.Close()).Err() }() - return t.readFrom(bufio.NewReaderSize(rc, t.reader.fetchBufferSize)) + return t.readFrom(r) }) } @@ -490,3 +487,22 @@ func (tx *fetchTx) release() { } wg.Wait() } + +const defaultFetchBufferSize = 64 << 10 + +var fetchBufReaderPool = sync.Pool{ + New: func() any { + return bufio.NewReaderSize(nil, defaultFetchBufferSize) + }, +} + +func getFetchBufReader(r io.Reader) *bufio.Reader { + b := fetchBufReaderPool.Get().(*bufio.Reader) + b.Reset(r) + return b +} + +func putFetchBufReader(b *bufio.Reader) { + b.Reset(nil) + fetchBufReaderPool.Put(b) +} diff --git a/pkg/phlaredb/symdb/block_reader_test.go b/pkg/phlaredb/symdb/block_reader_test.go index 32ae659019..623cd985ff 100644 --- a/pkg/phlaredb/symdb/block_reader_test.go +++ b/pkg/phlaredb/symdb/block_reader_test.go @@ -141,3 +141,19 @@ type mockStacktraceInserter struct{ mock.Mock } func (m *mockStacktraceInserter) InsertStacktrace(stacktraceID uint32, locations []int32) { m.Called(stacktraceID, locations) } + +func Benchmark_Reader_ResolvePprof(b *testing.B) { + ctx := context.Background() + s := newBlockSuite(b, [][]string{ + {"testdata/big-profile.pb.gz"}, + }) + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + r := NewResolver(ctx, s.reader) + r.AddSamples(0, s.indexed[0][0].Samples) + _, err := r.Pprof() + require.NoError(b, err) + r.Release() + } +} diff --git a/pkg/phlaredb/symdb/resolver_pprof_test.go b/pkg/phlaredb/symdb/resolver_pprof_test.go index 207cf92ea3..3a1363b70b 100644 --- a/pkg/phlaredb/symdb/resolver_pprof_test.go +++ b/pkg/phlaredb/symdb/resolver_pprof_test.go @@ -53,9 +53,7 @@ func Benchmark_Resolver_ResolvePprof_Small(b *testing.B) { } func Benchmark_Resolver_ResolvePprof_Big(b *testing.B) { - s := memSuite{t: b, files: [][]string{{"testdata/big-profile.pb.gz"}}} - s.config = DefaultConfig().WithDirectory(b.TempDir()) - s.init() + s := newMemSuite(b, [][]string{{"testdata/big-profile.pb.gz"}}) samples := s.indexed[0][0].Samples b.Run("0", benchmarkResolverResolvePprof(s.db, samples, 0)) b.Run("8K", benchmarkResolverResolvePprof(s.db, samples, 8<<10)) From 40955ca72699ed820386f3643fee33593035ad00 Mon Sep 17 00:00:00 2001 From: Anton Kolesnikov Date: Tue, 23 Apr 2024 15:50:42 +0800 Subject: [PATCH 15/36] improve benchmarks --- pkg/phlaredb/symdb/block_reader_test.go | 14 +++++++++---- pkg/phlaredb/symdb/resolver.go | 2 +- pkg/phlaredb/symdb/resolver_pprof.go | 2 ++ pkg/phlaredb/symdb/resolver_pprof_test.go | 4 +++- pkg/phlaredb/symdb/resolver_pprof_truncate.go | 2 ++ pkg/phlaredb/symdb/symdb_test.go | 21 ++++++++++++++++++- 6 files changed, 38 insertions(+), 7 deletions(-) diff --git a/pkg/phlaredb/symdb/block_reader_test.go b/pkg/phlaredb/symdb/block_reader_test.go index 623cd985ff..98167a022b 100644 --- a/pkg/phlaredb/symdb/block_reader_test.go +++ b/pkg/phlaredb/symdb/block_reader_test.go @@ -144,16 +144,22 @@ func (m *mockStacktraceInserter) InsertStacktrace(stacktraceID uint32, locations func Benchmark_Reader_ResolvePprof(b *testing.B) { ctx := context.Background() - s := newBlockSuite(b, [][]string{ - {"testdata/big-profile.pb.gz"}, - }) + s := memSuite{t: b, files: [][]string{{"testdata/big-profile.pb.gz"}}} + s.config = DefaultConfig().WithDirectory(b.TempDir()) + s.init() + bs := blockSuite{memSuite: &s} + bs.flush() + b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { - r := NewResolver(ctx, s.reader) + r := NewResolver(ctx, bs.reader) r.AddSamples(0, s.indexed[0][0].Samples) _, err := r.Pprof() require.NoError(b, err) r.Release() } + + b.ReportMetric(float64(bs.testBucket.getRangeCount.Load())/float64(b.N), "get_range_calls/op") + b.ReportMetric(float64(bs.testBucket.getRangeSize.Load())/float64(b.N), "get_range_bytes/op") } diff --git a/pkg/phlaredb/symdb/resolver.go b/pkg/phlaredb/symdb/resolver.go index 8705ae31cd..2ba116dcdb 100644 --- a/pkg/phlaredb/symdb/resolver.go +++ b/pkg/phlaredb/symdb/resolver.go @@ -247,7 +247,7 @@ func (r *Resolver) Pprof() (*googlev1.Profile, error) { } lock.Lock() defer lock.Unlock() - return p.Merge(resolved) + return p.MergeNoClone(resolved) }) if err != nil { return nil, err diff --git a/pkg/phlaredb/symdb/resolver_pprof.go b/pkg/phlaredb/symdb/resolver_pprof.go index f6541c4d77..42784ffb72 100644 --- a/pkg/phlaredb/symdb/resolver_pprof.go +++ b/pkg/phlaredb/symdb/resolver_pprof.go @@ -22,6 +22,8 @@ func (r *pprofProtoSymbols) init(symbols *Symbols, samples schemav1.Samples) { func (r *pprofProtoSymbols) InsertStacktrace(_ uint32, locations []int32) { s := &googlev1.Sample{ + // TODO(kolesnikovae): Do not allocate new slices per sample. + // Instead, pre-allocated slabs and reference samples from them. LocationId: make([]uint64, len(locations)), Value: []int64{int64(r.samples.Values[r.cur])}, } diff --git a/pkg/phlaredb/symdb/resolver_pprof_test.go b/pkg/phlaredb/symdb/resolver_pprof_test.go index 3a1363b70b..207cf92ea3 100644 --- a/pkg/phlaredb/symdb/resolver_pprof_test.go +++ b/pkg/phlaredb/symdb/resolver_pprof_test.go @@ -53,7 +53,9 @@ func Benchmark_Resolver_ResolvePprof_Small(b *testing.B) { } func Benchmark_Resolver_ResolvePprof_Big(b *testing.B) { - s := newMemSuite(b, [][]string{{"testdata/big-profile.pb.gz"}}) + s := memSuite{t: b, files: [][]string{{"testdata/big-profile.pb.gz"}}} + s.config = DefaultConfig().WithDirectory(b.TempDir()) + s.init() samples := s.indexed[0][0].Samples b.Run("0", benchmarkResolverResolvePprof(s.db, samples, 0)) b.Run("8K", benchmarkResolverResolvePprof(s.db, samples, 8<<10)) diff --git a/pkg/phlaredb/symdb/resolver_pprof_truncate.go b/pkg/phlaredb/symdb/resolver_pprof_truncate.go index 0bda35c0c2..fa36a19a45 100644 --- a/pkg/phlaredb/symdb/resolver_pprof_truncate.go +++ b/pkg/phlaredb/symdb/resolver_pprof_truncate.go @@ -174,6 +174,8 @@ func (r *pprofProtoTruncatedSymbols) addSample(n truncatedStacktraceSample) { } // If this is a new stack trace, copy locations, create // the sample, and add the stack trace to the map. + // TODO(kolesnikovae): Do not allocate new slices per sample. + // Instead, pre-allocated slabs and reference samples from them. locationsCopy := make([]uint64, len(r.locationsBuf)) copy(locationsCopy, r.locationsBuf) s := &googlev1.Sample{LocationId: locationsCopy, Value: []int64{n.value}} diff --git a/pkg/phlaredb/symdb/symdb_test.go b/pkg/phlaredb/symdb/symdb_test.go index d304e30459..dccb731650 100644 --- a/pkg/phlaredb/symdb/symdb_test.go +++ b/pkg/phlaredb/symdb/symdb_test.go @@ -2,11 +2,14 @@ package symdb import ( "context" + "io" "sort" + "sync/atomic" "testing" "github.com/cespare/xxhash/v2" "github.com/stretchr/testify/require" + "github.com/thanos-io/objstore" googlev1 "github.com/grafana/pyroscope/api/gen/proto/go/google/v1" phlaremodel "github.com/grafana/pyroscope/pkg/model" @@ -28,6 +31,7 @@ type memSuite struct { type blockSuite struct { *memSuite reader *Reader + testBucket } func newMemSuite(t testing.TB, files [][]string) *memSuite { @@ -77,7 +81,10 @@ func (s *memSuite) writeProfileFromFile(p uint64, f string) { func (s *blockSuite) flush() { require.NoError(s.t, s.db.Flush()) - b, err := filesystem.NewBucket(s.config.Dir) + b, err := filesystem.NewBucket(s.config.Dir, func(x objstore.Bucket) (objstore.Bucket, error) { + s.testBucket.Bucket = x + return &s.testBucket, nil + }) require.NoError(s.t, err) s.reader, err = Open(context.Background(), b, testBlockMeta) require.NoError(s.t, err) @@ -87,6 +94,18 @@ func (s *blockSuite) teardown() { require.NoError(s.t, s.reader.Close()) } +type testBucket struct { + getRangeCount atomic.Int64 + getRangeSize atomic.Int64 + objstore.Bucket +} + +func (b *testBucket) GetRange(ctx context.Context, name string, off, length int64) (io.ReadCloser, error) { + b.getRangeCount.Add(1) + b.getRangeSize.Add(length) + return b.Bucket.GetRange(ctx, name, off, length) +} + //nolint:unparam func pprofFingerprint(p *googlev1.Profile, typ int) [][2]uint64 { m := make(map[uint64]uint64, len(p.Sample)) From 3f7838813ab1ef43a762ba195294f2baf9cc679b Mon Sep 17 00:00:00 2001 From: Anton Kolesnikov Date: Tue, 23 Apr 2024 17:53:25 +0800 Subject: [PATCH 16/36] fix tests --- pkg/phlaredb/compact_test.go | 8 ++----- pkg/phlaredb/head_test.go | 34 +++--------------------------- pkg/phlaredb/symdb/block_reader.go | 27 +++++++++++++++++++++--- pkg/phlaredb/symdb/locations.go | 2 +- pkg/phlaredb/symdb/resolver.go | 3 ++- 5 files changed, 32 insertions(+), 42 deletions(-) diff --git a/pkg/phlaredb/compact_test.go b/pkg/phlaredb/compact_test.go index e7890ce3bb..7fdca44d86 100644 --- a/pkg/phlaredb/compact_test.go +++ b/pkg/phlaredb/compact_test.go @@ -628,15 +628,11 @@ func TestFlushMeta(t *testing.T) { require.Equal(t, uint64(3), b.Meta().Stats.NumSeries) require.Equal(t, uint64(3), b.Meta().Stats.NumSamples) require.Equal(t, uint64(3), b.Meta().Stats.NumProfiles) - require.Len(t, b.Meta().Files, 8) + require.Len(t, b.Meta().Files, 4) require.Equal(t, "index.tsdb", b.Meta().Files[0].RelPath) require.Equal(t, "profiles.parquet", b.Meta().Files[1].RelPath) - require.Equal(t, "symbols/functions.parquet", b.Meta().Files[2].RelPath) + require.Equal(t, "symbols/data.symdb", b.Meta().Files[2].RelPath) require.Equal(t, "symbols/index.symdb", b.Meta().Files[3].RelPath) - require.Equal(t, "symbols/locations.parquet", b.Meta().Files[4].RelPath) - require.Equal(t, "symbols/mappings.parquet", b.Meta().Files[5].RelPath) - require.Equal(t, "symbols/stacktraces.symdb", b.Meta().Files[6].RelPath) - require.Equal(t, "symbols/strings.parquet", b.Meta().Files[7].RelPath) } func newBlock(t testing.TB, generator func() []*testhelper.ProfileBuilder) *singleBlockQuerier { diff --git a/pkg/phlaredb/head_test.go b/pkg/phlaredb/head_test.go index 7f2c5cbd5b..c83cd0b883 100644 --- a/pkg/phlaredb/head_test.go +++ b/pkg/phlaredb/head_test.go @@ -370,40 +370,12 @@ func TestHeadFlush(t *testing.T) { }, }, { - RelPath: "symbols/functions.parquet", - Parquet: &block.ParquetFile{ - NumRowGroups: 2, - NumRows: 1423, - }, + RelPath: "symbols/data.symdb", + SizeBytes: 159203, }, { RelPath: "symbols/index.symdb", - SizeBytes: 308, - }, - { - RelPath: "symbols/locations.parquet", - Parquet: &block.ParquetFile{ - NumRowGroups: 2, - NumRows: 2469, - }, - }, - { - RelPath: "symbols/mappings.parquet", - Parquet: &block.ParquetFile{ - NumRowGroups: 2, - NumRows: 3, - }, - }, - { - RelPath: "symbols/stacktraces.symdb", - SizeBytes: 60366, - }, - { - RelPath: "symbols/strings.parquet", - Parquet: &block.ParquetFile{ - NumRowGroups: 2, - NumRows: 1722, - }, + SizeBytes: 384, }, }, Compaction: block.BlockMetaCompaction{ diff --git a/pkg/phlaredb/symdb/block_reader.go b/pkg/phlaredb/symdb/block_reader.go index 4de209f969..30fdd70bac 100644 --- a/pkg/phlaredb/symdb/block_reader.go +++ b/pkg/phlaredb/symdb/block_reader.go @@ -98,10 +98,12 @@ func (r *Reader) file(name string) (block.File, error) { func (r *Reader) partitionReader(h *PartitionHeader) *partition { p := &partition{reader: r} - if r.index.Header.Version == FormatV2 { + switch r.index.Header.Version { + case FormatV1: + p.initEmptyTables(h) + case FormatV2: p.initParquetTables(h) - } - if r.index.Header.Version == FormatV3 { + case FormatV3: p.initTables(h) } p.initStacktraces(h.Stacktraces) @@ -166,6 +168,15 @@ func (p *partition) tx() *fetchTx { return &tx } +// Format V1. +func (p *partition) initEmptyTables(*PartitionHeader) { + p.locations = emptyTable[schemav1.InMemoryLocation]{} + p.mappings = emptyTable[schemav1.InMemoryMapping]{} + p.functions = emptyTable[schemav1.InMemoryFunction]{} + p.strings = emptyTable[string]{} +} + +// Format V2. func (p *partition) initParquetTables(h *PartitionHeader) { p.locations = &parquetTable[schemav1.InMemoryLocation, schemav1.LocationPersister]{ bucket: p.reader.bucket, @@ -189,6 +200,7 @@ func (p *partition) initParquetTables(h *PartitionHeader) { } } +// Format V3. func (p *partition) initTables(h *PartitionHeader) { // TODO(kolesnikovae): decoder pool. p.locations = &rawTable[schemav1.InMemoryLocation]{ @@ -441,6 +453,15 @@ func (t *rawTable[T]) release() { }) } +// This is a stub for versions without tables in the block (format v1). +type emptyTable[T any] struct{} + +func (emptyTable[T]) fetch(context.Context) error { return nil } + +func (emptyTable[T]) release() {} + +func (emptyTable[T]) slice() []T { return nil } + // fetchTx facilitates fetching multiple objects in a transactional manner: // if one of the objects has failed, all the remaining ones are released. type fetchTx []fetchable diff --git a/pkg/phlaredb/symdb/locations.go b/pkg/phlaredb/symdb/locations.go index f65576f4e7..d0a1665f34 100644 --- a/pkg/phlaredb/symdb/locations.go +++ b/pkg/phlaredb/symdb/locations.go @@ -114,7 +114,7 @@ func (e *locationsBlockEncoder) encode(w io.Writer, locations []v1.InMemoryLocat } if folded { - e.tmp = slices.GrowLen(e.tmp, len(e.folded)/8) + e.tmp = slices.GrowLen(e.tmp, len(e.folded)/8+1) encodeBoolean(e.tmp, e.folded) e.header.IsFoldedSize = uint32(len(e.tmp)) e.buf.Write(e.tmp) diff --git a/pkg/phlaredb/symdb/resolver.go b/pkg/phlaredb/symdb/resolver.go index 2ba116dcdb..af56812d9e 100644 --- a/pkg/phlaredb/symdb/resolver.go +++ b/pkg/phlaredb/symdb/resolver.go @@ -247,7 +247,8 @@ func (r *Resolver) Pprof() (*googlev1.Profile, error) { } lock.Lock() defer lock.Unlock() - return p.MergeNoClone(resolved) + // TODO(kolesnikovae): Use MergeNoClone. + return p.Merge(resolved) }) if err != nil { return nil, err From 711eee6b51514c1a0f44412993965b28a42df834 Mon Sep 17 00:00:00 2001 From: Anton Kolesnikov Date: Tue, 23 Apr 2024 18:45:51 +0800 Subject: [PATCH 17/36] fix tests --- pkg/phlaredb/block/block_test.go | 10 +++++----- .../symdb/testdata/symbols/v3/data.symdb | Bin 0 -> 88564 bytes .../symdb/testdata/symbols/v3/index.symdb | Bin 0 -> 640 bytes 3 files changed, 5 insertions(+), 5 deletions(-) create mode 100644 pkg/phlaredb/symdb/testdata/symbols/v3/data.symdb create mode 100644 pkg/phlaredb/symdb/testdata/symbols/v3/index.symdb diff --git a/pkg/phlaredb/block/block_test.go b/pkg/phlaredb/block/block_test.go index 058c9a31c2..c93da7e2b6 100644 --- a/pkg/phlaredb/block/block_test.go +++ b/pkg/phlaredb/block/block_test.go @@ -96,7 +96,7 @@ func TestDelete(t *testing.T) { }) require.NoError(t, block.Upload(ctx, log.NewNopLogger(), bkt, path.Join(dir, meta.ULID.String()))) - require.Equal(t, 9, len(objects(t, bkt, meta.ULID))) + require.Equal(t, 5, len(objects(t, bkt, meta.ULID))) markedForDeletion := promauto.With(prometheus.NewRegistry()).NewCounter(prometheus.CounterOpts{Name: "test"}) require.NoError(t, block.MarkForDeletion(ctx, log.NewNopLogger(), bkt, meta.ULID, "", false, markedForDeletion)) @@ -116,7 +116,7 @@ func TestDelete(t *testing.T) { } }) require.NoError(t, block.Upload(ctx, log.NewNopLogger(), bkt, path.Join(tmpDir, b2.ULID.String()))) - require.Equal(t, 9, len(objects(t, bkt, b2.ULID))) + require.Equal(t, 5, len(objects(t, bkt, b2.ULID))) // Remove meta.json and check if delete can delete it. require.NoError(t, bkt.Delete(ctx, path.Join(b2.ULID.String(), block.MetaFilename))) @@ -196,7 +196,7 @@ func TestUpload(t *testing.T) { t.Run("full block", func(t *testing.T) { require.NoError(t, block.Upload(ctx, log.NewNopLogger(), bkt, path.Join(tmpDir, b1.ULID.String()))) - require.Equal(t, 9, len(bkt.Objects())) + require.Equal(t, 5, len(bkt.Objects())) objs := bkt.Objects() require.Contains(t, objs, path.Join(b1.ULID.String(), block.MetaFilename)) require.Contains(t, objs, path.Join(b1.ULID.String(), block.IndexFilename)) @@ -205,7 +205,7 @@ func TestUpload(t *testing.T) { t.Run("upload is idempotent", func(t *testing.T) { require.NoError(t, block.Upload(ctx, log.NewNopLogger(), bkt, path.Join(tmpDir, b1.ULID.String()))) - require.Equal(t, 9, len(bkt.Objects())) + require.Equal(t, 5, len(bkt.Objects())) objs := bkt.Objects() require.Contains(t, objs, path.Join(b1.ULID.String(), block.MetaFilename)) require.Contains(t, objs, path.Join(b1.ULID.String(), block.IndexFilename)) @@ -355,7 +355,7 @@ func TestUploadCleanup(t *testing.T) { require.ErrorIs(t, uploadErr, errUploadFailed) // If upload of meta.json fails, nothing is cleaned up. - require.Equal(t, 9, len(bkt.Objects())) + require.Equal(t, 5, len(bkt.Objects())) require.Greater(t, len(bkt.Objects()[path.Join(b1.String(), block.IndexFilename)]), 0) require.Greater(t, len(bkt.Objects()[path.Join(b1.String(), block.MetaFilename)]), 0) } diff --git a/pkg/phlaredb/symdb/testdata/symbols/v3/data.symdb b/pkg/phlaredb/symdb/testdata/symbols/v3/data.symdb new file mode 100644 index 0000000000000000000000000000000000000000..486c57d5984b5e3ec14a3e9793b63813aa780b7e GIT binary patch literal 88564 zcmd?Sd3;mF`ahnTB!EEMbYtJwwqVk|P)g~Nbb+?CG-VN?rfJ%Srb)~KEs6_@3b+9x z?mMW6+f`BI<9-#~uZsJQf{Oco-GA?yb53%yG)>C==ll9`uRS?)W}bOw=9y=kXJ!-% zx_`)^A>|5noO(#ykn*yUc~ym_bFwq4@=EjP<`>tMFDkF9nOB-$Su}S+VR=zPLP~00 zR%W&~r?{}JsI;nbT+$Nb;^LyY^NPk54tXLeFT(t`Z?#nmNM`jXn2+Vn}oCQZ*No>N^|H@|$&9KC*CWx@Ok{enhAeSKMR zQBi#SkWzg~+5D1<>hgx_!qW2c!nv~wN``0#Cnk>_J8|TQv8hvs&CDJ%a{RF2s7$X)uc#a`Y3%T_lEehf zu!Q09lgCb)GrW4j$cdAt>GF#Ta^@7y&dbZ5ou8dnT(rnAY0SdP3A5);nW59fYbq-z zjT@JwuS}bfnlzzGAD=Q*qe)60H7b6xCM|nTm7!uzW4R_LFC!;gH+XQt9DQ+l%CHd$ zBNNI>7S2tIPsz?2m8cm$ZFoxZn7Yvki9@Gnr)TD-<;^awn47OpOwuIBj~+f;lbSR> zC8NH$+AwB#a#3laAt@=ZEt(oSZPbKvyy(zAzy{UtSy^pEUf0tQonBlaoiL zB+eb0G%6`I>%^+Es#%LPgQupY78T~_=FN&5o}DyoPTquBWyR&ChO&~p1xqn(7AcDm)05zGe_iQl@#Xc>Z;0$^z#d97tYGg&di>jQ&qPhGjH_B zoC&EjCyv%lpE4$OOvZ@OBSt4D4^0?7F*Re{+$mX;CMQjvGJa%o^32(}y2)i(g*iF1 z=9d=drl)13&zpBb!O(=++AN(;TcFp@N*_LH^0eWzb7z&6=yVxHg)_8+<0eg=IHh3P zl*u#FbF$-y#>b^<5)$H*EQj?2I3kv5J&RtNfjgKEXZCt|C1yeQ2$;lc`V$y)Y zEj=EK+hnsj%*~FLRTi_yZFYF=9wEi;G?^_{yVq@zJ_;{9z1`txw^_994x7o|u64ND zbxxPV<7n}=>8y5-#br0ybk;7XP3v^X@00|)SzrdjPs)g~?Hc7ZHbb74lML+Vr7xiJyNkSJ5 zwmhf5vUaPd!`p&k>(V)UTn004;EC8fN#9Xb>ORHcU(J-(K@ zy!I&T^tim>D>{$MWOp-EJuWL7Gb0}9TD`TcP21+Ro3oAsny!^WiMj`SL5Sg1hiR3r zZL`{2*{{-QyEPm{G#KqXUZniJ6m9=FT;Oo1+#+@N1jvbUh5wW%)0EsTEeX+^r?GN>!=xUP< zW5^W_ZI`K25W@Ub2c@CS;_`h>%P8Xs>DqReV~tS4pT^m|to^O+5CR=cIJB5r+bu5L z>THa<&Cy-~{-3V3*ctg4>>gK7GvUoZyUboI{vs&z)WyHV_C06hQdl%E}69Ykf#)Yb_?Xml3~D?G0BrOvQd20;f#4;IuvQ6H&k)wAr5rCZNYNN>g1v-B$7DE#Q@7DFrf>@iyx9galA#htTIzbV3oJcc zp^9D#_n=fjFNXmPM2y`aOAIf@snp)u2)9RMTx4?I(`}EshH_p*WYEBdDFsc=)q^%~3m}{f^nr<{KY|po5FrqLoWhloYw6)jvHrAM))<*R zYceyZNK``r?ifpvP#WTm%7%aj+gq(|bGW)po-(V)EvD$K9)}5iC1rpPf(%XYpvlmS zFehO(+L`iKFL+KG!PwEXbghT!>UNXU-Qft(SNj45ZG=Mz9KHypKsDTSn95`Y)79Zwi+X~2JysiRTNhY@ zz?og447zt`tE{vHB-R-vnwNe#9lN7VW@$1oQMX;}D|kTyy^wxfgaMX~QN$S)$)_m{ zd6bYO7dD?5fzE5WUJ?Nr*}!PI3eKEGQPwVMQf8*Lx0+lo2B4os;UklP^ph?Gw?m`k zXp9yQ8A)KO369Pf=zSc(K*VCXU;2AQjS1w!#!0Jc+DqrWT9M%|sag(GQoN06`}L zinf2m`WZP3I$b|%7GX3yz092`IDw&|Hacxq4}asfC=-6dH~t~E?g?;i$HcT{3nOSG zgA?BFjKH;}F$zGUJjibCyL?a|hiFlKLjbD?VLnjnjsubpVvO|SE0>nRX1v&^Ab{wG zwKoGs;1Z1eL@7{v0|Z}Jw5oe=AOei3Sh`qXIk4`WJs9#zN29X`GD0-VIG~&udI|ne z022PjDjcMxMUm~oV;*g%$Vr~L%-I5cGaP0vX12zReP}oI9*__|8l>9p$^bn`1-Lggx(6CA?OzvP;Ypd|34PVd%pXXC@qDl$O z*&4=CE0zu|=~H(l0D==aEraV|0LGsIA0;<4R>D2jE{hoSfe;db9?QH|8~4Jt*}U$K zfJ}^JJwtR!}*HaPNfzG7CK@Cm%TqT27VqSbA%;*xG90Bnm7X+BZ<-T9MCb zwYLXm1^ECDV+rCA1b!C#SQyv9Pv|w5r@X#NEFSy(a33##@C-RC5Y5oOXkoPGTkc>z z!ySZJ3YQWEb5&Rym(uu*Tx7u#3_J@`q7Jf&Ctz5mG)9>gmmA)Ta=uQ&DhMg(vxoaA z7`yTrUag33binD@LMDg=i{v3IK(W98=Vvg`x>!n~>(FN&m&1m|Tr8=(;1H6n)&(yy zMpCkPOLXAkA~awEbkh$b$c2@?_a8UV{0+DCo^i;X~u zN~|tLRmx;GTbv$uQ2t2G7d7Q@mWZJK2->?pyn_iKwj?F4C)LUDidauLhm2@!E;7aF z)9_6S42Z>NN;7AZfGxz1u;?X64mgWZ4&YVhHn$f12#fR{Q-Jy!oQo~#<_j?p=4LRW zU`qAp4+t7yykD$N5I`*Up`4Mc89kmZMMH=w;n55L7;XiIwF6+A%@W*Z2aAmenoB-J z0;4(;VZAQmwqkg$woVFmO_%Q6Cm)nC!CEogQ96v$;Jeup>+}8#FR6>E`@Z3iMR4_i$`EEiT^=JGOU${X zWk0k+AP#pn2|+u+&Hq(Uulhykm}QtF>DoFL>BF{n$ivK{pipVHiC`LqbpU{r1fW02 z5v!w2gmKSUI0qfRH#4cj=*(~6GOJE>r*Wl4XsaN&-H7_<--1S}Z|yD!ZKk8ez$(Jo ziFOv6CBzPfQ5$4FDHv8QjVA1BwDgrvXk@j$)zU4^XRV;CY55|Ll&I_AIx#vyO2Mf5 z)imC1Lb+&3L};KJ8?7v@T_&c2H}c=`y2Z2%7X(M=hv*;R3bql_?e0M|Is(TKUy2U0 z+8b?-H7pJLH4$-&+(F+1-YdadEbW$VCyS{=I2)Tq79j;2I?SE0#m$`nB4*;D`pBKG z^?eNa(&{jDTg+Y`7z^IQL8!nsFM%W*y)Dqp%^hOBBE$gQB9w)&IWOqxa$PbcUx0;g zW3jtI@-yO04H^X|ETSa@L8SKTd65S?%8(B53dq@fT1LA==3zKK`@P-a_bOm$WKBX$ zX5VqkGHxO(8cu}iq=^#95 z>`R~1xjR{8cB#u^H(>^OJ!~~uhD8_+OQr&%unrp;>_Mw&VG8kjFwJ5H&+rQvrQU#p z*>qa5M9(7Tu!tPF4QMjc64$UL4x^){v6f3kzNEl^aV<~=BEm%Bvft!6j$%5obU-sB z)F`^B10BU(-tG`5QY091RZ$Q3hZwySU7N+_R4r39;rRuRlYs~(z$MTDgVO{v-j|w| zf#o6=c;vxT=Bj{#g~69)sG2XZp4y_Mb2@~pGmrYppZSY!LQVLHMgg*P8meI7RCSA*O z$l4#K!Vq2or(EO`;xvOXR%_|vxJeS5$>;J8tZgu+VR@WK?9flS5<-(d3a|dF?d@xyxXq!lB$i=rx z%Ckv3e`MKZT@cftb8Z7qgdJWh&Tm}hCIqfCjYU-Pn(ffm+j>|O@Dh_10h7YIu^SFR z;3Rer?CDmlHsd?*KrHt#YfWkc`U5F;))#C!$aGogFH^Sf&#&yCrQ}xj|L0%}Z zAj!`oD;NVo$T(S*2y>r%P5+Jphus<$hLlm zNv=dPn^ufzdhHfBf{ytjID6;ZvneQ%PjLQ4nz_O;AHs4xH;QZyN2k{r@L9(!r?5`s z0Bg|H+KRw~ASY`SL**Ab690jl7W3NAfnuW@9$4RR+6|6yb$8gFB)nCcr$*p6b@_{+6|LV&%r_a?9CJ6vNBs zRBUrt+6EACk`4Ee)Il;qdazE?vh9lHCcD{U6H|C#0s9i5wub2+AFJ~hN$_2UH-BuR@yDfGdPHJ#@ zS&%Z{`~o40HVS?P8FE3n*%C|vR?QJJAAD9oP)@+b`3yJ~A~wK43T4o%9VT~&&g?Sh zWHZ+0e-qOX5g@TWLAv|c9?LEy$(j*nV6Z@ha5d)hs5QAd1swvqBYTNlP$rCfbZZy@ z94f^;C6SAvHaG^%Y?%lH*lLvfF2bQFN!XBiW$6@#nfZlaG?-lG4)zl=jQ#8r51+gb z1K1a3A#@;r^=pf=_C=yu(Y6wKk{+x{XmKob9}Wf_R{aG5mLT~V9dH8qr;5YPwjkhK zGix_G4&Tx;m=jz;7++*5tV@u>^i{V|EU>Hneh$giUC;bfOl*O#n_X5X8YLg}kOFdt zRaCGUsxsI$7WszQ<8#Vz)nD($S|4I5eHw?~QVmWjZE*)D*(@Dh7FT7tUiN`ouy(l3 zp<#fNt`>dtP<&z|E=i6X1s}AeugrH??M@aizfY6Wf6e{wzKaEj#e}Z)0EAY za)2kZ0fplonAXG6WJ+%s?u3$0D@^SSqz5zm)!>)rTy{*fG2p8yV<_c4n)z)Kl7@0y5 z65|+Lmaz#)#Vip9*W#K5r68yegG=^aYc#lm;&|L5#52ROAX1rN9EHQ6qC`)~?80&l z9CKQsPBw7N)Zjxcz3_1s-4;!Ys$~d;6R1qyd*M^d&xLNwmSh}GrpyUjjysSPn!@yA zW{yMwS*A&Vv9s{Kkdf>SsnDXVfj;FNXba+4AmJKpk&Y&A2t1LAvjDrU_i{|J3q1If zmDh=kHMWC4gKMs32N{6Q5g{AJ*`BLW201O(Mq<4B#}`=O8WtnfluGQ9)nqa)dT zh^-GRFr?{PhI~IT_YJ`DmKSZCC{X9e=LQ`e%OVdK*x>^az*K+(_TCGjJ;*>}NsOO@ z+brz_imoGj!)B4NpNL`N?j03BHH3r1LJDFf26!#6kSITLLuRM!2iLntJoX77x;fY zG`aL3FIX=Hna+d%otT}ORgjgHU67fTo0pY)qA4>gv%r+sIx90XS6+`3kfZJbzRVgE zk`YEH7?>QZK#am^ERtWs@9N_^ci)Q`+~LB**_k5ahWFVQK++aO?ujk_l?So=JF9P0L4f}3T;Ij3{G_-reG*V6 zsEP=bAZ-i|iB*w&Eul*6aY2&}Y~OIuCZB=n!zU&PP+xg04mf-a+(tA<18XmJI+;H> zz<(1VVou;#fiVh*6u~Mak4&ovV~w&U0{aRZXB?>u!y$mED@(VusV~ILjIlK)ucsrz z&b;t63SR5AfU7d+$0D!2vYZQ*J{BlJ2mW%x{F8THlr_n^yRQ;hkg6>2Ge2|Tpbeb) zp-BNHB8`lXyjokp?t-f7yPz)kK;piO$R`p!p=JEE=8&F`4Ne!#X83&Al8Djd7Cq9k zK8p<+#H|$@BF$kzg&!_<)C)6<)8QcK^N4Bm0Mj1iW^@@&SX6#TRDH^K)9+pKS0Vwr+`qSZpum@#oTaT}O6v3p`Ie zh+0ZysIy|NzB0^`!q#FIxxUbaBLf`fNO}39aBv2v)d?XO_LC5XST06Q2$w5BiDeEA zhITNdaM?lB!P(0B`O^$46Y(bOyNC0|)dQEft~)Ptwz!KH$23^mtn8e8**E_I46;Na zuBSX~0rVLAAn3i|+hDuwoo^_pH`L0Y=*&oO7K3GF=~|}mHukt7b{9Et_@eBS&V>ZY zeJo4orzb=x0=-I%SYhTC#R|m(poz76v*Pj*!Y^3eUx@Ghtv4Czy zeOWm%RN%ucOv{4s3X{WE8ePF$ZT>85X_p-@;!j}|#YxxKP_dAV%)!F^JN~g!gkBI> ztO&+W=8zcHR;#B+FsOR{?BhUA95f*?6M2%3CHLi-WTD+vKE@|w&*51Y6KUAIX*2aO zrvg8~jHwB+$Y-HA6@3KOiim z{#DE|v#0NduTUHJ}wcsQqHph7@adm~Ti$MllmXSM^ zu=)}UKyh1yT&a{nlvge0lvUje`GoCO?eP`o!VOG?DRdKG4^PmZl}KgNP!Q=W-=t4&lPRh*^9~HG#RJ9U^B7TE)AFj|`51 zM+^nOcrJ36$(&Bk79F7A|1m5Q;Mz5lxyN5uuO97Q9OFWHc&qs`MdX^fW82>@>5@8M z<$N?{trf1G5&Yp;E*F0I%X2ST@7Wo`dAJ}*aH_AtF{FXKqz~vazx^OsihuC> zD9eW$oWb0=u&oVKJ?gQR_5|_GM}1VnwgEArFK+Q+-gJY@<#2Ilx`q7;awUt>g`N6l z#D-qB_KJvGgRB~V4%W9hOrRlmP`COs$-(pqC2(v6Agf_=x-csivnLLlbJrsdr?JD- z>R4mJ_UHCk^Tpr)s8W?a=n6=+`?B$vYXsp&8Du_{9LVc2pu!$JDYvgcbS2x zuPk=WNWXzGb1nF)5#I$NA!QPbU*LnloL(84f0Z+2)t$*lm`}Et)(IsjkU5APU?3uM zL*@+QH?K+s*=S&;9@;{Lh4wI5i{Tnt{_MTIp$uHASlHbi1GXM4b_vyCe)g+c2Ekvg zUu6`~H#x1isL1Ycd#q+1Gxd;w8{Zhd&A!)Wrc;#GSFg@)<}99fxol#Df?_&c9_}#m zfzbIth*C33MEKzPNW0Zhjlt&I8S4-d-e?DZubWE(;qrWX&}7w3u2vsLj>h!s3eIRl)-YD#>|O za3=Y>W9ID4{H(m}%-p;a&E{D-Z8YLv1Y%%UpjR*&+RJ#LpS-PAaKNG-w;B#2af~6U8d%0jAXuF{9@id*+ z?P3mDzxla*b*tO4V)<%t0>(*JC=?1M{>#OGgYn-e{P!R6KgIsc_8n&tmGZm6KMrOe z_kY9w_Wh5Q5puC#ngkVI&I&5V^ZfJAZ|~go&%8|b?W{+S{r*o-rvHath4wu5yRSX> z{J-5Eg@XK!?*CT!kFLjaRKv~BmZ6=$E8{68grqh!7>$~V#%ZUHC{=!*`E2Q1507N& zh|0 zr_p2|YadXv{Jx_# z4LR+9uh`YpJ^q)x&H>Lo?D)F)z1Kg=eXVBji&H+T$$##Y zy=rB~kB?5iXZaT!2QFT@@~){LynJ~3nvM4#wx(Uz9hd*p&Bk?qU2%8wBO4FA?C$*M zndGI1U#uC|^W?_%kEU!(8hq$R%LNB+dgk!<%ImtzH=x9shSV=QUfKHDyQ9}_yLHj6 zHMZ(IEDP6CCj41)UH6eK0~OuRZL1vD^XbkXlkUB5 z!;Epp!(UIn?kyC%F73|2Lxv0){N27oJGDF0%T^w%+VSv#onxN8c+$#ai*`JW)Squ( zV9jW}^0`AhGhV;F#&T@Y_G5>3maTtm!L#!duDa>a&RoO6$=;dBVBDqk-d=0TXl(5` zx)VRIIr-S48*loM{aoP9Xw+#w-#NeR@x869dc?d7R_t!hxJbMbJ z=}T%Up_eD;LX15{`7by>=XtWSN~P?D5M&Q25s<$6uIL!_xhg z`EvP^@Ixufz;g3Cg$(k)3@(4iANJ(gWM~O_88{K1P&q3W^iRqufZ!#>VoI^SLJk2D zhMvC!2PI1o-&w7`f5oeiIE!yU6w5SDP@wC7jb7_K=Hfc6nD-X8W$IL^@7_Izut1g zAGa-f_fF>@cj))uRX6&sx-r)wrFiD`*AKaU?EZVYHt(@^?wRn%J?m!N(Y0V{txo&& zJ%yLQmc8VQ=c^3bg-0Hysh^B$de6Cg#6u}Refs#qSD#$mG-SoKIk%kqp#Akn{ylQS zm!?ziw5Hx$boo0Oi{ods-q~Yf>Y4I4Ui=#ZO+C@xCVo%WXQp|@r}FtF&A$~l*$>8jfrx2CRq@7F@( z-bD{Lom}!uWBM-*|4wawS$)#}&u7lBXso%sQSoc#s3jGe2P=M1FMBWVq$$4`>+%Qk_Z>N9`_7e@Z(n)Fk>*P;`nGG!AIrY|;qr&(OxxYH?aTMx`g_}z z`R&t|AN_mO6-QtCw0ruy=A9q>{P>R(S z4STYetm~Y;A>%CXUB#E}su{KPj`E9Urdu8;PMNWwV!dtL+4nXcdMv+Dy`c8#y14kb zxb)<8t6w~NW#QSketi1%Z|9hvXxb1w`o zoNc`|Ywp&UwtaP|_V((R8b9!~oUMO;+kq*CC)8I?S7=(U)PI}&MoPi5g)6qbpSv)1 z{wc|WH{?CCup#;D*4fW4EPd^)Hx@tp-=go24xGB<#+-G3zdEbB^0g!JZQU3AwB(0R zpV)QJ#%10e3*L9PjcZ(f<*Xm4{rKm#3zN^RtNo;>+Hho{;nD|lmpxPa<+GV<(rk}h z_)Knn#>u~)KRni_RE`|STfZy zPi^yzs7Ss2r91c7fBf=|TONMs##`pv8!mZo;dtfh2RG&|&3t;v=x0{Mzgf1st*kY5 z>7Jb})!Ua`@$=F*rfwpBZx^gNM0dtBBK#iH#i=N~b>b!esKfz~m< z)Nk5uJZLH{FgU;HLT}}^=IFE=HmSyjJSGr$+6m(FLbyMzcXR_Qv*M!y>#h zHvInS;+NaLxG(RfxQk4GY3%3xH22%tUtjdcD9rGoiO-fUwhg~?;EEq_pLOnUS#QJ* zqUX8O1~{%+-F?@uC;zy!d(*+{ldc&)W|+q?%)9T(qA8bqXYK0xdY2<>wPV9Gp7#f= zI(b@`=E2UU^IYE^>Ave3+uUiMFQ%@s4r|q>8?&Z$t>4>`60evv&9h|M>RAKU99**e zfnS?`e8YBizUABjJn?ep=fgVA-)sMD@2TJXy7cF&$^%yxee-koA5*(`p4YYVms8u8 z&9hdQX0FO!bl$1|S?oOgK+m^}n^$aH@ZSxtjAyK`C86twKD%!6sDkGn`M=-4+4KIK zJHJ@E;E^leS-9Ms9ks`jDlFT$>ANkHENza3OXj{X zuHoX-uK6@KZ&}p~#SgZ%{h99l@baGry#4JRzkgEYn6iANu}p1QWcsP$jZdn_3|4%1 z^TIb?FQ_*ztGa(4*|~V{kIVKyRsD^(;7(8DyPRq^+&uNgMa!~3R?jJ$tK9k6qt^RU z?U`qPxwrDQRkzhIId#~QqJfGZUcLQ+(#wZj|HiQ+Umd+|&uJ_7T#@89tAK4m2 zk8IB`J@?HAUfg`usCSPZ*0dL1G;Q-uV>a)6T3Ee?RwyD`(ziNWP=8;F4D}c5dfbUwzJ7hi^*Xw|0xheg4JIZ{7Ao-5u9~{z}n{3zyxaej~p}eW!U`{Q48zx9qkJKQQW~8CSfQbmP0XeE!I=!UY>YC|df$ zFYOKI@2lB*#tUzaywBoL7~`+M@`venJ~KPJuqDpXU{2drF|upZzb{#KPuB~J5_V)( zO?&>z{fDQ;9auc)`|3THp84eG|GFZ5&#Fy#p0}|4@U7qdmUiyngRkCm()1g&g|~iS zx%8TYYmXlMeCf>W0jG`nVG|gOhjl*oy7#Q=?BUZ~ z|2^W}Jax@3ulJ(|jUx?b-1LAqH_tQbyw&df74IBf^+;9OM#Gv*o_1wSJ9SgunoF9x zCQe(!ejw#M?}Hm0fBfvRyzcm9Z_{7x&3BK;-}Z=V&oiCfi#twEUAxobU20i-&aF$z zZ(Y88>RRLWwd>T~Fa7NLai{ZuVa3XLMd5ko^M*OU`9t$Xh3U}MNsW63f0>@O<>}Sl z4c!k~JkNODw(~k3-rM^0xg;SzKK`2<_FPJDtJIUViNBq5Y1#N2xButcL9e`4y6F3v zmv$FDa-!k6EgHpy!87CTSLCVcZ$2&I?T+`%0|qBFoOx8g^ZMJ~`tys8PmRCvt>13n zu_SXX&SKP3+W6^n@AEz|Ex#XDZftJsA(6tv--f-F#nZ@fSe{}9! z>fPr*bbW2rsEi99z2T&T3EBpZnoGBmP*n>EPiH-^|H4{inl+Du2Id)9U8?ZyT`s z{P&wLe~V;XFyzL?BNUT1zCH)~RNSL4eX1@Pa@(5^Pw3u%=C^Twy;!()_>rOGH!olJ zLKoTkRL|aHOIHrjcU^1$%=+Np1K(bI!`l1KJU?~t$o&^ozHxNIva8QI>COc0qvOs$ zW?#ANk@Am!8hhpIa}FI!nb^MntlQ#`-kG2L*86{sU)6cm$1l>$_l`(==Zp`YOFkp> z>y!`5ofYr2ee~yK#~oWXyz=-fyXvprQv6lh8MOnRe4t`!#gVP6x=Np3eE6D^mws}k z`{2b#$87yq$Jx*BpZd}6=6f$6G3VkR<5p)l-zN&|_2it3kG-DT@cApQk54k6^Xsbk zc!lERWdnCCQ7T+NJfWC8Ic}uuxe3?*Y(Mtaq^Yp!d>h1+Ah_=xGt5bBKx-l4OTs#NBk3f)>zH1}6IrSwKT+?2ZTIF(W-9`h(Yn=0A|*#;`; z06IiLb7_u(<^=!Qt4LOlP$?*E7X=KHbJ-t-A|WRwT;{N8C3NqZZxhw*_2hA0)p7+r zi7qQS-#9jfDE3s-suKDH+NY=Ogmw_c&c}B@rl61Bw?^^MYU)wkZojKULCZcnTk+L4 zx>ZjH5;_3gc`?1HgkDJ1L{XqF@}qZwf}a1y1Nv`lgm#h=`hRo_X-Fm`N01!A&n7er zk4!?ZBu(RqkxU^w@ctS3)86Q2jfBT~AJGp>=m)fgq)-!igzPVUk~~bu z0{R$2N3%)@oqbes)VGGNR)BWAG5x8hp6^K|G5kVK zBXlh>(jFjj12sHJ(3e-E;;TSP&Ys59lcaR-#!0c#f3h4Q_=H?e0psFu1^ya{g@cq=tqQpNSZd&Z^)(eYm(YUXA+l? zxl%zZF7g=v@{%~1o`##~nS|a+O78HipF{MSg#;9r_!OCSkQ8XB@dUcJzNF?qWDM1J z)ezSLqImnI4_;Kz7wGd1H_~5+k*jIyR&o^`P4_B>WY=#YAECZWICUH8V6qN)8`g)L zw?NH9FU1!<`V?KNewJKLrc5P!$Q0J2{h~<)X!7F{(9drqbs!y1FD9k+XXV8)FF=y= z7y5Iefq3SSy`-Lavf_xHB+-_Ja^-7P%6L#!9HFO^hK)4qVtOAjlqxSGDK`<4t6V^9 zOTK*XyFsLBxpE^tMR@@*KA+O_@Yq1@UrDq^Hi_rO?-D*#E1fS&>M1!`lIXi(iu=BsX^H`db|PE^ezhHTX&3YnwFJj5()*hv;$LT@LABGqk#9wtra z&?V#ox)^|)h*f#5X7teul}Adzx;B&2&B}w%rxU|U)qeSmz18r1>%jDkK* z|E-}mvT65RM1qKqM^wXRm;gV75IJ%RgycY$Cyz;PA7KS@EP5!hgdQ^gwd4b z(wRU@?85nkUP=-#QoXcQCCr<|7nSF|s@$MJM<^A{nWvx^(DR!N%14QrB$Cv7RO84$ zgoBPD1RY_Gn97_+isG0MCm%F zJhka%8aQq;v%yCsVo{kCP}}hR0G$m*BCO zQVSlfl$vRZvI&yl!bBt89H;)GDM|e+NmSoRQirMUB@30$H5ir8{zz?9vBa}_MQQ;V zO9!Yv*QYEd^ln0ORL8#Z7%QsiU}X*9EueHh9@X@YDw=Xn7fn}JHfYrqSE?=}hU-+@ z2;EBRR~-CA~m$&_va>W+%k8(CqscL9_Jw4g0#9Umf#@oNu7!-XO$(i;)^C?c$-{H3r5n!3i{_fa&5|s4mzHEQu>oJRM4k@ zwH_dUH%(PtO@C5fMN`uUOe5L>3+idzvH`=WVda2Sicup^D;w@OXgtM0Myck~Qj_Z7 zS?3b6jy&l;l;}|%{8)7h`BZgK?a}`^gUlI6npQv{j8a~LLAsdIi}1LR(#;sFO?0&K zY^p9$?jdM~-OHL#S>5=gpvgcgIL%q1%ZoJ(2;5PB%lM7?eFVCm}v_BN`hCiUB@f!`&K$TKy>dH+Zm)-S2<=`E{gB=QLcN)Od8^{PP<5Z94;`2u6}24;p{>yV zeche-gxd2Fy@Y(u^xza|3eOXF>3;}QcM}f`T=&Aj(-p7;1}iX6*DAmv2Px=4IzWNh zn;Ti$@vl(Wry7Yqqmj_ZQS0mE#OaFcXib*B2GpbKTu+=Z1hQzR0#Z(^peNFq3UH_l z1%z0d{+tSWfdPG_r%?a&w2abHJW80D*V9!LvsUOT9`rW5fv{?`H z2iA`s+Bd8pJ}au9E22Lr>jB6zg7E{~~N7LY-)`gIx55 zerrCVd3ebs@4u!v{3?A#LH|QvR(!VptFskQV3;j;>t7{%{-heBIAL4-Rt4Qc-%!yR z=Bd^1=OBA)o+#bF*RZ2B}s9``O6g-as}Zk(n$^~c0YIUHw`_eB%e18 zg0{JXq!4lqk?bj^KAnQn&7`TaAa2f3e!=CMIPaWR>8H_^XbMGtjR z#jk7r?8BZZp$GptpBPK#QY+C?LpCUrjzBLBCv+G|Jwm3DV+35fVjhM{ttXiK7ngw3 z4kn2+$m5xWnXUAZ5}J1(tl5X=5I{*>LdTs z(tDs9>)}ek4?Vq`(!21ulhQr3xf;v?k9lLSU3*z7ALu~T`oQ+bnu-blY}dY{(PLm0di&15hq zMatOqv`%Ki-vo11QWg@o5c}n%Mnxw=>qo(BU`_?jWM-$>>2l>iBy|zUw2?eO+LZUx z`xqPimjV($xn#|Srw^d29}klM3Zj##^4BIFP_BDHDYEoAvq}mSu#fc`T0Tb^M@p2V z>8QlFiT(ppo_Gm8r;&^#$Iz(ZD*8F0sr9sJ29%=IOyzF!vvNQAUHLHGr=bs}D(@>D zrPR@#=+9dSy_x72d`p%zkR~%Q(x~nOjr@y(X!M{O5RD#G1ESG`YSwhorq^f&*$R?r z`j#9b_tV$Fb6zFSL%sVCG~lHgx};or7O8?QbVdmhE+DBpVJcdbcfVCWOVmFp zp?vj*2H?MF{sqE z>2j(AA8RAzEqcL}^Yw4eqwmR#2X23n87j|Rou%h0A6#((X&6l!7Q)<2eO2jzU^v)x z9;t-Xc!p>muiQ%;$hFGXyDOnn)(u}(e@FAjN+mVirD9eFhE72(DhefAnL}R6Mz_IG z%|yd>BpK$+fv?D6`XwP*%CB5=5|wnv3#!W%bUVFFKl@BtEZVhTQuI-hr+g%_P?`5O z%}@M*=IvMIG(4`_J3|E@Nv0~$-vk$(gBqW@ihQPovG6a|TjT^7G4V1Z=3I)2aL(JC zNYiAvrc}yKs!mXMk*}^M8LF%3l^S}*V=9lPZ1%aTntFQ5o2p5q>0ip1$V~Dg8t?+4 z&kMu<93k7OXYM%i4!kOpxG%B<`brT7d=7=#s)yYQLsA91mFZRa6lSX)b}PnI54%YCM>yiX2RwKsI+O4{D~~S*G5jpm)&S8hZNzI#yi|joegH z-?n0d`bF5#>K7<|9)O;s^jSO(QhERnPK7C;!Wqh2VJ-BjIw|NU^sox13H`_jef%k+ zB~1rO6|thXV2@nSj423I`qUxghvVpnN;e@Fs^RNsnyhLeX(|&*d{dqEo|~CCeY5`LKF9^K6zXvHiR=t^r{z1 zZPQwEDfx{kN#bzod7J!7eqz`sE+aWBiOh)sjV}F4nxegeHo(+aJYZaSkrajZlDPmx zc6|vfxrOWn!be8S`JUPp-$aS=+zMZM0+whQ9)I)kFR-eBgmTl==r6>I{lK7~y*r}Xrq<4`M23|*+R5Xn~LAogUm+~LY zx$wXVu8LH3AxRyj-aFERBC3Dpc$)3bLB>ygDUIU?H2GCW+ zFmONzxzj_8Z8rK}^?!+?C6zWcZ5ud+Xw=tZl7ad`{HY^h9o(n9An}m;{O6T(n)Z;H zv_PFmN>3U1C)^25&8lljtLhrk)HGl%IcY!-d?e~~kHPiMJTIwVss@uJvXXqHT+GGI zX-X9N15)@5bu%G%4BU&A5@h8L@ihu4qdY905(1YcPl_kS!zptSB zUkrV@$GIk>zx_sfSFj2q*-&q0y(e(BlFBD)-Xp4IGs#NHo~kw9PV^mc1pYyq3NV9< zNF&(NB1#+ZsHb!x9(6L+Sh3Er9?E?QU7>HNqKkz!l4WEZsG?t}5-f~LTGudyJfI;qKufA&Jx;Ao@jIOf{90d9(`dDGX*2{&yaTNz|qq6z9Bi!P^Q2-9Kc|kaC4O zPCX=UNO@VwysE;|IoTOid8PSt^NZ`s7nN7l%qz{WESkHZu)HWCAtg00D>GZ0Q(Rb9 zR9aOzE@_EzadFYyc}3$2awiO%Ix-_|@|fWx2F=Qk8(3VMq07ro}ZtSGdn3E zX+i${;_8wreM#+1ZTh5Plcwhsk~n_oU>j$S{nvS5CNenF$5zP_xus3<;uNU6T0 zY<@{ab$LT|VQG1J;oMmTB||iW6O+e|oj7vD*wm@RW@e8WIeysiapPxkqKob3+E=q zr(|c1O4JOWHasPHOx@^&#G%u((=&6^@@AJ-%+1#)CTWu6M-LyaNlhA`l2Kn=Z5T5= zxu~?zkd%~H7oV)n(9MWXPEMFzpeq>?Uzm`fFE5UdPa1wg){NZ6$;l&A66X$08kLlq zbz)Un)vQID!Bf*xiwg5|^Jc{j&rTXPCvU>6vf}bmLs>~_(cF^a1?5#GbBYX=C(fQb zPhT)(=-j;7OKS~PdFJd~-Q=>Y!knC0^Gl0!)6+82 z=gm8zU}(Z@?6}lv3-sDq>BA>Yo;G}T?yRyBoi3xOaE5kp+@z@!rxZ+^GI>UNPImmz z__$O}LPC6UeB4k?YI1RDLE+rOxeJQ5@$o~ajZ2uiV5%lLIa#AgOd2qlUn;JKY6fs z5TQHj_&KK{sMrob=1AZa8Mn8~QY&nx=C$J%;0Q2tJsK`tCdcp#rYh#a3rQGv7I=k zLl~t4aOEw<*1;(2#AR_{9XecLYj-nLu>sz~PGrC%U8}dYwZ(FJ42~EHg@$WG043@k z3s9quxBNn(NYh(58d3lw z0*ihyg+UNdz}96>;qsHaqNq4v1jUm!*(o+g2gGx19mqg|9C0yTtStyYqRT2Au-T8w zQ4~kPpnBm_BX)i}7sA4&M%aUm;}?Bj**ys?AzjO^zY|LM)A&AZ846*}5W%Qpb2r=W zkK?24jC`UvmQr@y3qQ3cu&=_`;y*Tx3!Of(VrRPXoZ>wh@}n%m)F4AC_@L1+xzTC{ zU%JUAHm)iVY``^9fj1XNL9whfSTSzD3nVky%_sci$!6hz7^Wp;v6G)5``MZ77Oz`} zL#Wv)Qv7NN|IT<>2Am#-Yj#8kvXbmTcy=r%j)cb@F0z4$rYyfso!6VLt;9(v0Y^$j z0f6Jd4;5u+65`+vTwNC0{r*-4G$LF#usRt$;iOX8{WubQuyHLvb&lQIEggIq{v5Yn zs+IMfAAQ_>2~pOC9u_NNXTez8J0f3%BOj88t-(czBKT&A6C8LM9ic>rYB@7p05bS2 z(Hipa8-TS5{U?D?72dR=74@Mz`S# zhpE_)LttNqD=w=$wAB(3fs1g$s`YlWTd*-ojKpM!b=pi`=y7fjJ9G~xOSo~M$XZVP z5OhmiCY;3S;W z3lV~25^>lJ*gret2q%dIW`czzWX9>DQcI-L%w!Q7$1ay*XHZoM@T6pR{12}uFef|8 zo}WL}N4`aX=*z+z9(b=%z&BmH*Dmx8E|bJfj@B-y1N=%X)-VPNhoG=qC*`M!Ks5E3 zY$hEOpMrelN9#h4B0-X`tZdMYl;O*0!s(CE z0thEHd|;yKk03=XLfxOwn0A4t^JVz-5k2aK6dVi!di~3j(fzNz14gJSUA{?6}R4-Qx#c z9p^;3J8<}R44|Nma0r3J7XbnBehAUxvbA}EjA*=B?YL5psU^|X9|B3opao2;{^5ah z>K3|i3|&9a@HYmom*XnWkr2+`?FSs*6~2x=-nPVSBlK#OM>%ErwCHlc;Um9dGyFv5 zdC@{cRCf|cxiP!MYY1&%WEi#@Jed9fJwv$jHZX^dXJa}<*m6**QLeNNs;O+W!XXR> z5e23)S;3TPc-EpOY<|^{aHE32nO&g_y7#lLWu?Pjz!1fZ63t7$oQ~bm=C@P-H~uw0 zF2VrI#wcQg!gwJ;Dn{|9bVbLF%aMLWz)Yi~8V zTns>zh>J>?uPFGo1SD2Y8{l?;qAxfaqs2o;5}0cJbW$=DAntJSNIC2UZd}KLg?tP` zl%C$_lq9I68{{Cw4!9Yzk z0(o+kMCWw^q{Y+)*OCj1keFtA{sVV-;`BpPt2kStBbTrsCCJgzZCD^Oyx4U(O&qu5 zKuQKrXeV1?iVmJc3?Vm(#VoD`2s#l^wEZKIk?hv4m}?ecG&{Y_ohUegp`kW9aU?l^ z-FX=cS;cu+ML0Cxg*@^I&$7;{8 zv1LA8e(EqBX1MPoT1tu74ZQ~>1O-DtRcFy4IA1qEw||i+%&BOW?y8k7RP-+DQ70VU z9^tO6de-{r#!D>czj!OUV)BlH!O1RJ7v-dM-!U{j@h8-I+ysa*fDey(1WRo;?oeb( zO-KS9jXXWXX==Bu(c=ZTzKii9p+zn5`_(wG z&JKk#00L%ia5YS4w&Mmivm0whjqDiaz!Ga%FpD^P;PcY`Eewo+PZz&=D&T{##K;Qq z6Qu1X=_qIcV_%$0FNFe=c~8p8M~;GURa`xm-Wn~oHe4AX9Xj7@-$0RTMLwK(FC9ED zW(D~G4r2-85Cozj_M0%SfuGQ8E>C%VlUO|V`GJ$Jj}Jh2hAf?=-)mpAc&)Zp-*N`) z8SWs&%D0p#DAB^&xRl0cr7>!g--E^~2q_2VD@5h=Q80Gp zGrU?6-RQu2VXK265*$MwvH}zf3~+u11Fegt6uPb)?i`l`_wZnGqSb}#mt?DTu_#%* zC0HtxpaByoH^2(2tEkS3?^u%)(|tLDmD)ekiwXn^s4 zF=j##DlGP)oRO;;QX*Z7h7eQ2BM<;E+zJe92f#L)CAiHF78?;Xmwbq@o892L2osyZ z#aajU2Df2W$|lL zf{rDyYx-p0-5oaH`>9@s$LOelrwZ1Fl&XUhOkN26WP;OA%mv=Z7x{%AmC186yJuL^ zZ6(zZUD{g~_#^x}0tQ3U5^z5X)NFPK^Qm5T(OeW)`N+fQ*`lIoIR&a>*f&sth2NMA zZP5x1uR~=A!sm>{!|rTB!~mzufbW7vjIZe0fDZzX3@9k~1?X{?vL1KHtYxu4dSB>= zjAZ0F7^}?1b_||7K(7cheGxElQln8H7HF94velg)UO}_DJt(jvOsU{o2w|IwvykB} zYetx|vKl??4nq-EpY7tA(XU3~;zL~Bh%2G|mT;Jwd{_!%nTaX4#nM@2u|Z9ZTr>np z7)6*th_Z_s&%jE9f?lG>HZq=Jn%IaYMlOU-w6U8RMLic$uK^B~Fu2LOgO_aX5W)`o z>zNO03%Fl4474{Qh_;q$?zcjhXdz}Hw=L)ub|hZ?|NLpg^3mzwc%G6 z1vS~%4?3T_i3M)g;iAOoG-t>n;kGxv2FKJ4=0pgb3Vv;wAmMk`zkXJRUNBmWJr8($*oCmfw0qJMxZ*hWaV zy9d$eSP*bHZ0I1Xz0u}a!_ss-;uN`qz6ZQlg11=OE!|EQQ-^RiHj6AmimnCjP&lag zU4LRG9;%Pr=~~~%kT0zc@#bw2j0HD{qk}{8RbZQ!K$4B#7EDodhgd+&8K7H)vJf`s z1wCD^ONQhNu<&gxb~i|VMx3d^qX}UV?EnfQwO7xJJkU{wbOa;5-68WZ9H0H(?(lmR zFf_6zVJ)KXxMdkPkrfRm!gS!o*bow4A}(yLojm zTZ=GjWmtw$m-y|&X&Lm zY7|}6fsW!XZ+D0jDH061s;Gziqd$>Pvi+4Z5Wxhv1Ug`FnqbEJQqwZ9T*Ly8aD%wZDdi4X$GG>X2LFbIe_ z=mnkdG#p~Qag!KYAG8uC)V(g7K$8t#i%Ux3se;gF-_tUh7S*I{c@A0o!&Df;E8vui zTtb{?Fve;vT^u(_Vl(+%-hs6Z#&p~rOwaJiZ8(Pbns9)yMqG$bO#z#N6Wr?HW5zvD z9<1dUyaPt=CX+eRI8T)88)0w><2qrRsru-fATJbHkmTo)6^wx(WE?am z0D%#c{e|KSfC`n-QM7@18WlaUA;s~VBT)LgSe#> zuHqmGZddrNQNO?tm?FiKVCS(9ve|&v$v6kgD4K+zN1wsSwtk06u0%4MR*Y$S?YIx# z1Sh}v&ben(P$Hk;{EIYm;eGfC)8Q14LKgxE6bCP057A$p6 zvE^9zVdglS<~B!*R?va*)@sEBb+u}llnZjXyR9w+c3bQ^oYdg*x`7|x`~o40HVS?P z8FE3n*%C|vR?V^89DG(lP)@+DakRH{7gP*@ffUN1S36AZ4xQO$&dFx1&HpB*AtFFx zdxCWLu|1YuNRl-p%)nrQ2;pkX=TU2NbqYEJbVv3Qxu8rK_vqFz09_~(4I#iWU}i^S zU;tZA{+W7RN&OqJ0(s0}iYHf&fd9{EQAbf&5d&;bvP9aITrP8y$ylX&KB3E+C9A zvJ}=ONMZV_TPPOT)iPHOe){52nAieeH@mD(G)g|`AqC_PtEgZzRAsPhEb^0~!h2An zb;@wnU+=|QA7Uwe8i(If4NfX;aR(>aEFE1IS7o_g_JLclcDT)gD6=Uwi|EE{9zQPkLLI2N5pJNm^m*vf6r#4kJ@2LSh_)%Q7|rshB0g z`2WiL?zkwA=iz7Oc@7jr?-cAcDk>T}Vi)j^qIB*kA|O?ecEW*xV#nTqVDAPiwivO+ z9u<33>>b6}Vu>ZOB);=Jcfg4#nD1Y|&wI-4Q)XvpcW0+cgOFNS+!viKc6)S54Jr@*T&!9zs`_-3a}x30o+s4mAo-WQJBai-T+p9{zUz_Iuoz5CBch-TE6#LPkxCgEh9ve zm9JpAaZY5b)v=m|U5VbBtQ#Vk$g8tY{JJma;|)7YTVg6|AoZ*+pWjR)SikMy&&nmc z@dp_YpVJE28mxEXMRx=#Orl+|MrpTU*{Pk5DRFUMM)7$S%21i{_(I}BGlb;{-$sMx zTl7K z;wyEpSvQpJCi^mDkW{AFy6|1|G6isc)r%@IgyuqdN-OEVudKKzi%Q6a17kaxx`u{D z#(m;E)F89^XeagR03QmhYw5?YnRjJnyM-nYvy!1hmM!h}{!E=!i-Tf$0tTu!Z|nFd zNd>6ra4;3M3SHH_HPlfMJ_evxBc?tn@m;NJSRM5~kYrljP^taH7B~Rw5j@(qN*&GvbaP=s86uDtDp1`%p_qsJ8#G& zQ}nrfVMv%}`3cW!6gbrC*_u({Uh@fy1e)RJkCCcwhC?gLBh;9LH=W%a5<}L}x2=B_ z7O__}R`sM316F_w_AcGVf4xmU$y2F_uW&-yZ187mRmx&5pBu6N6E>|%RF8-1RPJDw z)ALOj$b1#n;*f;@25F;lM?WpyUa92c!8H}ZYKisf8UMAm=Y*XK7Zsu1MN>#{0Q&iDk*_l_xxTXFiS(_3`Qk4&Wc&8*d^%Imz zH4~JdYuk}nhdqsqixT{oD^%>17 zA4pt%7VUvVkx&Pk_}2LF+AaTWc;d-zroa(DB_a<^)*}6zEBVm z#am74WxM?33-8Sf=d|))mL!Y#c}tpzrR~MS`t!2)X3>@*VH9~17EzZyq|OtvMU>~^ zJ*i}skkccHjtoc){~D|?rBxX~8KWd6Smi7AGQ@tGRTJtTS0E#n4`}dYSH@HoI5Bl) zuztc0MINg`c;{M zT8w|8wtGo#li20&g$)IsGB+(OX5sWaM6*~{>$xd!@A^rp@4GQG}YCvStYuvfGE#RO~HrSDnR(DUH&ZCiYMg_R$fs}CM`B65td9V zmDCMOGey)2YUQRuQ^Dq>F=8TOROxDm>Wi<-C6JO1notW9?MG((t}U#(udU+R$&Vy(h#i%+~ZS_Z__U2I!~*R8epJ!cg->o zv+)hBW_JENhN5dVTDL$sX&R%71KGc9jPZpLW$MDdwUjc!Reiqj_z1J|Hlh&`PriIa zMJ@Zq*Ek+fC8qyUYxT*iR5Dpzj%v=-J}|zV+VANMo$?zqW$X}se|&irJ{dB}$|wsf zdKE?|A@M>{s3rY$1?^i%`Q+4QEkGftM1k_MHmM0h&9(?_qf!f_cF`Kh|Jp(STXsRY zkx=1{JKAbZmQl4_{AuqB8Ov912`I=I5SDH7HbnLEN&a+MCBG6xi|{I-`A7}_`xhBzVYCP$@r!a)8$FFV0YAmEDFpC!{6r1O1AaY? z1F7pT1vIp=_xJH|SCmb2REa!Hu-eU1Ez8$$$kiKw`4Fz)RDOFlhZtIHlPnySPW#y6 z_zme59bVCHm5b(uu~7Keq4_p&<@eWWZF*JfymCD@G?`RPM-%l1Hce=kjv1s7OC0a| zR~YC#AkE_N``n0$zM5aqD_@0B|EAtoLCU9UK831j7b(N2RzedAJwwO9*Q{@~f=P4V@Tg}`=2y*!k>xsY7(Kj3QbuP2vbPVaIvZNa9T8O{Q*5zC3llbLX{NSp|uoAkO zBtv+tM^qFI^&0oJu%Cd2h_Lyn8Up!kKqP?5w)pVTbXihTVv-O{kL0f^g_1Seg+KL8 zP8)jpxmQ|oE7K~b0)lT_D_)@ybfspk0MbTNEtXIFMvWwuT+SpK6~25*izkvjX&sJF zbZA84@JQMo?MUa>5rv-nYfWi=79l7*NKJEVUIwjxTc+7VljB1|<0HCS zo0c6y$Gh{fA?3x#hZCXYd1=>7*)>tjn+4^?XztZYq6|pw{1QF1Fz1T`tzwq*vsrgu zKa%@Ivu9t-{kb?S!$LZTRu1qezAg>RS51^<i=>d9L8%0zn=RBM)Nm3~NAWYDTlYTGYD98k zP+%%a0$!2?F$}}fpEdpI(O*;g`%Ux5>^2`VcMNdsbG?^({L9@>`S0pKd>%CvemmYxk(ZZTE^Z??9@f~NePDjf{_?IS{BszSY^OU0Ra@dL0_Ji}w@cF*T#rB9{2BIDE64ZqJUR^4`; zZ*P?wHEfaZW!L7R%U0Rh&JJk69Ma|4ch5WLV3U`7+inhgI7QbtICx|Gn`cV$hEMsq zB&Oq%G=r|MR?0K~nYSrq&y=HQl4Cy}Y2aUS%C%+s!6`#-x67@sSG+uO^3fGXO7fhR zr1i_99HV7M52MfK{C=%jX6|b5)vj?a>!+9Gb!Zt_lAjhDTavvhKdp31hb3vhtH1n~ zq?KmtGHJ(iom!^f%YRva%hs&UE#)PDv|VzUvMuSjK~G;_U+?+O;(XKmP7cA3oagR3 zn&14`%2lGaN8Za<+~O|>q?q`ai67+mci3MTF*IERJ0NJCPdr}) z0R%nrJ~4CWlX-K4cRg7>cj$tcRdc-`#;;O}MnBA%8=SFrSK6lou59#GO7S#3Xh#am zhm_UIvX>^ajt!<^dRkQW;0viBg?RB)!%t;+LFQ@YWm&Bo00RFk$0q<4H|3Z<#h?K| z0gr&I;;SMvUkv}eqOfwP@<(*RY60-jLYZm+?Qb5ha8F-+Du8K`rH19vY4E8F=d)G% zltHRd2st#_SWSJ^5Nbkr@+xu&sI(qv?)g&7KLoA*uYSU3773R;5ZwtC{;ZY)3LaMT z;Q<7!LawqGENXDkOO|dv`&98r8%9lMIS^k+`3(&KzPHpCch={4u7iErJ`5gWWpn4_c4PZKfe=Tg^fbjUmLZynZzbIU^dl`$3ofjy|q)e3-wSn)*44w>$nR3b<}Fu-zNE zdspw@=h~Vd@xAKQt@B0yv9Ci`qy$L126pMHa9-uE?;BWau4C3^^CO1?huaMPp?&b5 zuNBX;28|RA+FCj|FF$xrUht^Wkl8by#b>_{c=qR<9X*ZL#pgc0e)->qJI4I8Dg4Ta z!2AC;op=BAy|fP3!t-yw-v83Xz@Uyn`qYq9UE0{J`_bs(RQIAsE^Rtz+3e%a|B0^3 z6S9W|EwVPfD=w(-`?`Pse+n#&S{vwe$#oe!HTb}~FqtQe>Ac%z-^qqkI!!vf`d!}g z48M5=&1P7+wA+0Dhi&iVBV1eU+;pgTm3U68-A;-+IWE_;*DctvFK$}q^jXqoE9EW5 zIsP*)qUn~MJ=T4gy87m$wewHTT>gGhq}$l=F4-SUJR)69Lya$G65fSFIX>4Pfl<@{YSdP&iht}b91()9zUtqqkGJ1i{3e>bDtEut#vu= zb5j*IR(c}$XuBS@J)Jr*;;{MBXALeI+6H(8<=(LNFzP?JfnJu)9uKbue?)XY=3#$+ z%tha0fA@NEU$_0-w+7Oy8Bp9|tXnDGt2vv3g2U z-(^vX={p}HQ@x>o^?O6U}?*y+| z*F9RXsoSU6_?sX0Yjz~4?j?tHQ4SGC{ssAAE_r_QUi)9_V4s{dsPAk4#H{cS)}gJV ztzEYT4~sYTYBtUPNlw7ZhjDQe4Vnd9D%mrBcuK6__MWdggekW8e++(+7&Ub6LDd1v z?kz3!8SlK{{-vRpi-RM#MKpioIWx;Ncs`wzGS=|lGabZX}KTPJ8@6H#U*q9sOdknn0EV3kE6eYx<5;4 z|3k|av6#Yaewy{}p6{8chg)q{7|aO$N1QPJRqtor|CsT;{RBcXpx$j zbwqVTXXqf~c=7gF#l)m%rD+?F#Pv2-J#0TbreTC>C%J`j{J6qs!@A6O#wtJK)NVS% zi~It&{iS$$F>Ybk$O$^a!#T91?CGK;cv=4uKTkk+#pRF@%7d(DEv4`Wh znPxj0zq&2EwPWCb)7f)#bzNGVxVnEHVK5Xusu$YBVan9!+22J*C3^VvKH1W1=7>f2 ztZf3EPxjd!74@N0%B?xCb*?;H|L(4HV!Oa5atBeQcjzmxi+5d`>oLz)dR)9<>nRU# z{<$yY`xd?o*nP<5X^QOzmCrSSYqD0hKjj@@c}LXKp*NeqZ*R<2qXhG@j|-j74_)Kw zH>{ywFJ0!(b8ENR&(U9c@lolM`)dkD1Q*P!pB&EVJDm-%f41|oRK{(`PQ9wQ@Rd$I$%-5o>B59`A4#ivLhGm$?IxA;nKEK(-t(ncE3bC zq{j^7X)Bsf%fEPa(!n{BLA~X@i@<&0hW|-wg4Zw9_?b)XB?Dwnip0^14gs|Jh-~ zk?xi~!VD6Iu#6^ zx?!S6zmnC@-*%kvuinCffgP5a_E>#0a`vL4jQd3o{JU7{jA;743D4^b8wEYFiTgQU zHC~pw&M3K4*YFz+WA|N18RKHv$T;cm(v)fKhp$RWxxHQ9L^f*0wiIg{Rnv*7$z6l4 z-XFTh*HioB3`w@~rVkdRG{yXNlU z3Xa64`9=>i%E*sQ@sG?HzuK?g>cGJE8S=c0Oi|kD*GVt)l}BYk!F8D)6T>GqR6cz# ze&`rlys*Acf!^az7TJeWQ?k;wN2-pbB*#sR-c=ZJcmmX`Tet4hWd*Zwjgu5IByY#h zc4)Oc@3&{Q&YrjTe$i!iTCY9d%Z_J@nbvw;41Q*8IM0RpMx9rkpLJpBqUe9KrW{zhyxZTBX%lX3Zu~xAYEj9pOI9W$UzHR)y_+#LHRR_t zI;oRxgv_}NCX@A-`!;62n{uHi*;594Pu~;S>aV%9t99D$(a#M2In^VlQE8o4(*iS3 z#zW4b^ukB}!TQqp#RS>ZJ&TJC+YH$~W=-Au8@e{QeB(o_p|NA`oWePUjXPc)b@Ov%CIWnDxcY5;pi*Cbsq=&g`xr(9EbsSljJQ7fhll7XFEJV^^@Wdz-VCd_pjT> z;NGpnnH{O9V%8>Xv}4fW_p!{AT+ET8E}#xI=S-Yohtp96jIF3w1$mPhob+&;^l2Pm zEZE^MmD5o0%o))Y4KWrau$k+Yh(lUJe`twq8T=00@O`<4!PU5m!F)_-r~|ezNY!JgdixpN zhkF^kkEIOW!@G7^f~~2vUjTPgvI}r0B~39r2)O7$(6cRL_uo)5cpGoo;Y|z!LkxvI zu-pD1?7|j=y*Xerz7W99R0$J21@+(@)Wv8q4!HpFa24WEsm*=EF_j_SN%^KPPgK1C zCy>2?5r7#W$8;iM8Ojaa9FPRR0!ypAxK|^biq%GQYL`9#=An&0AQ}1 zzIlqllX${wIsVlU7NSuOEWl=1$mm;oX2WeNZ(lJ1ubUcR*>1VGr({KfHgY+-RvG;e_jg|f$S6<%uXhX zCm~LxBnxo@%5glgV_%e+<7mWD^w$OvAoL`ymNd<7F7hj=@U|y$@i}AN>?V zs5|0d7=bI11f(3Da2q}Z$u11aWA$MHTeUE&Qa)87^t9kS(eg3w0NHcg4p<1XYitZ& zo(}S1&Nzy7LP;xp*$)3?@C82i#3@|B18GQ+{0%F+3;%4`!v!!+0NAU=Qhw)c29*SohLlWBveHb)CLoqgJ!v%sdM{?f~S=fLoxrrl;gaq{Z z9SfzTmSj7qH_P<9yMtXtYFh2y2VR%Y(+lpQO+1M+wqHa-NO2 z%!V7~iTV-WK9c9I0&@riSpp6O3?MP<4+Gd*U>|`WWj}!=hWlrQ=!3$Zn+fKuIT$sD z`LLV&8TNB$a^D)x%Z86^ci=cz^z_6*DJ+e}3|1J6MjhBT(1rC7hdK^pRq~6@sKX8= zzE}sPDshX025hXNDQ5(oMJ&kcMnN#Q3?#?!;t%Wv`L!^-1a&}M$FUUkWc#^HxatTn zOH}yb4lLkEg)*cwAiDyK z(Y6Un9PvY6SZsJI5nI7s`&aDeir$b!>rwbu(Vkm~uS5&bsFRK{nCcAhME3xlhA0cx zF+%D!ILvyjFOmNxFV5C`k^!QYzc}WQV31i zS=3205ob^`9dR1<)l_W8jzy6zTL3ogAxGzP4MFB5;n=dSmtQ9m$X zbr>4G)?e3l!do)f+si>};sdy!O1%KzcVH}Q>atYVp(-wR94N^Vut0N$w45n}-(wes zB$Nq5O01)FydzGQQ5#8-%s(kQAlg%6$E$fM4n-Q>%-nj5>M|q-%f_F3ekMxSjYO)m zl-wWkexzi5llLPf_lGQ4DXLJgm6DLWAC1i*#RSZB0_ixiQ&$0#d3xL_@{ItMROLjN z@lcx670`xmtl`FarsN!+W$-sV!~8z($ykOA7~Yp#{f}M22NVNSJGX8QgV}hI!_M%c zP+Tv*7Q`{6&sJ4(2GqXJ=eDOIP5@mfl+Ig@sk~KJL1|yQ(>9GbGL6pYFvTLQaC;^Q zUYRsK6Gn|7-dqJf5GeD9_He8(a;N(s3aVR1+$)6+vqApS0lou!y0wEya>9m|8^wYX zwh$Df;Rq3Q7`Z$S790_@x+nhY2Bh}Ap(9`ikl4c(N6;E>0>4Vp6>tDZ`;zMYgkzc2 zi3Mt7gdwxXhI{LT`$WLASD6-PCIvFn1p{FL#6l6X?s(B#F+O79fua^!n{&YsU=fu0 zQ@r^!m~w}J!pUx+*R~)q+_e`rK_dJN8uwTxZQ~2Zn?QQT8H?kQ`D^%xYWx{HEc!SJ zeoP{$Ps?>BbE1{)iDD6GiD>&7-l$sQ;@2;@|f zm^}-USazk7UBTdTe5|;EZdgp6XDDtkDG3K2is{mqqZmT6OZK{9?&b zHuEH_5$Qd<+1WDWA4|p9uP18&c5E|jD!BsEo6t`(3&;CF6L>_GYQ*6KKqF67bS9(7 z$edjVui4%3j@^Yj#kj+W-D=;IHN$*r&mRF-f^@($@bdyiI8h|#wi1v0gv2yb;u>Na zDRB)kjg+`%cswf3qY2~?%P5{fG5n0@N#>k`6J*}~Mi#KY82$ROW5Ahwp;319U@{ox zldEKjv%NCl+B5bTh+eT|e10$_(HX=pf-p8qOPC!blY7&T=*m!;gFMY%T_LlTtOjA^ zJE0>pqkfLW6U8!4go-(6Msh3);4)5bH%WS_FJ9Mj9t8gjI&-RxvGx<#?Lm{ls~LEC zkh^Jgj!h)RP^6d$PNX%CfOx-CA$q}L_ClHy*_7^$hI+0Kxx=z3+sN@=26Y{SksOkd z&04`}OKLW9RL!a4X3&7#nWImj1Rn!fuuqbDN?4qGlAFU|9{wQhJ{tRI{8|!HxEE~L zJ(3=*%@yn_xrsKrIV-RITw!O9VkG9=r|~8V(Iuk%&;t0KC1>F$?lROSC#J5J6Egv6 zAdJ5<6%=hLG{v#8D5@ulhbIfcgj;~~#W-&trxH7KpTN0#;^0f%ccA!*Jq=yp6jk6P z;0bm29|y=oRqvK?m7*%&39(2!vR8UhhxbHsTczZ;k|W8H-^$xnU6I^YDfz9`r&97; zwG#^@?^Q}1T{fIO@y2;B0gdXC@Z_LK(i*14vPI(d8yrLh46es@Vq7}_TZlZ#A`i9m zj0(yUogyDvbQ19dAv%tDjFKY6qm&3-Xh>YxnO#lZLbaw7gLkonBi96P%gG);1g4-U z0%wS!)*^poDep{?Ld8SH@>?zO7Mlz(T|}`CMO!WmI&z^Pxg@fk1j%9WizhxRWfM>m577ffF_LXCYZqt~gCc-TmSD136uU*K@Et1yqh+Ln z|6o;$7mx<`MZ-Z+PiG1m>SzKZLmeS6%1NI46^7t%|}DidFVy1hObV`s#Q`AqL+LKKqIbakta8kiX4k} z+d`YS0NDbq5MvW!tj0i^Q%vK1hnSm?ucKqCc(l&mP|U_1|q?M3f+H~6DsH@gt` zvkOq7ud654t?h{!XLY)P;-Zc%#Eaenv-9JsXOgKRM;l1;>H)*fa#W*>oKPdL7D^YHas(PXcGMU!@uXKUY^ zRdw12lD%vnCEHt$8=Rm15f4B-!cXi+@dS#%sY4Y;q8?z>RFn-(bc@7|{^+Lr zL^51w_DG#;&|BoyKqV=Ft{Yh!FTmzHI22^MI?=E}1@fpk{9E)lFkwchP~_^i1F>i+ z4Km_>Vt%708HZZSk~i$I~!$$)`6=@cUo zO?X71cRuoB^qA9w`Vb7aSzkfjj9@9td(y(AL?Hm{bqi^xgg}LW_;7{{@^6bLSMv3Q zTRW0fLY78(DSXalw+2u3e5BE1Mu?KGY?P>uRZH=XAjJI&cA9Ah^t* zatmJz(r5|RjE^&Ck6v?Q+h zPmyT#>_~-aC4Rq!QM(#Wp6Iq%h`R&D-A939B~9b3p>m8Gk$=!!11LZ zMAy%@av+H-01!gHwALPU!BX58K2LM_6JZk6bDF#QV?? zNXw1Jk!(3%gpx8lEAFxVX5OAoDvB|n3maj#nXAhCsqz2Jc-{=Nq$ zgE+^n4$S5{)`k~HnKwu9FoVD1zh2God~5WiuJ!v};h)%mFw_UPqagJb)Rdwh^hIY1Pf=&~qJI;KnKagv7c>6qun9)iE$kh=Pdm)FR0-LfDM#KxJZo@TfLMh2*UhCe|4FCpTn`rBlbr$WS6LhWZ>0N2k1)6WES zBcl~I{hKEQ_d>**Rx&W~Zfss6bqMIaa|{eHd#e8f`4aKms$YQWfabu&>%@WVNn+QpLak4=a!y@nOa2c@eCg0UlPMkRm>; u7{j3U!^4UZ7^6`AM1&Qi{$i|Q#i;QUYgjRA3t|teHNbE~4Xa1$RS^KQRX Date: Wed, 24 Apr 2024 19:34:42 +0800 Subject: [PATCH 18/36] granular versioning --- pkg/phlaredb/symdb/block_reader.go | 54 ++++++++++++++++++-------- pkg/phlaredb/symdb/block_writer.go | 9 ++--- pkg/phlaredb/symdb/format.go | 36 ++++++++++++----- pkg/phlaredb/symdb/functions.go | 15 +++++++ pkg/phlaredb/symdb/locations.go | 15 +++++++ pkg/phlaredb/symdb/mappings.go | 15 +++++++ pkg/phlaredb/symdb/partition_memory.go | 5 ++- pkg/phlaredb/symdb/strings.go | 15 +++++++ pkg/phlaredb/symdb/strings_test.go | 8 ++-- 9 files changed, 135 insertions(+), 37 deletions(-) diff --git a/pkg/phlaredb/symdb/block_reader.go b/pkg/phlaredb/symdb/block_reader.go index 30fdd70bac..a204dcc7c1 100644 --- a/pkg/phlaredb/symdb/block_reader.go +++ b/pkg/phlaredb/symdb/block_reader.go @@ -54,9 +54,12 @@ func Open(ctx context.Context, b objstore.BucketReader, m *block.Meta) (*Reader, r.partitionsMap = make(map[uint64]*partition, len(r.index.PartitionHeaders)) r.partitions = make([]*partition, len(r.index.PartitionHeaders)) for i, h := range r.index.PartitionHeaders { - ph := r.partitionReader(h) - r.partitionsMap[h.Partition] = ph - r.partitions[i] = ph + var p *partition + if p, err = r.partitionReader(h); err != nil { + return nil, err + } + r.partitionsMap[h.Partition] = p + r.partitions[i] = p } return r, nil } @@ -96,7 +99,7 @@ func (r *Reader) file(name string) (block.File, error) { return f, nil } -func (r *Reader) partitionReader(h *PartitionHeader) *partition { +func (r *Reader) partitionReader(h *PartitionHeader) (*partition, error) { p := &partition{reader: r} switch r.index.Header.Version { case FormatV1: @@ -104,10 +107,12 @@ func (r *Reader) partitionReader(h *PartitionHeader) *partition { case FormatV2: p.initParquetTables(h) case FormatV3: - p.initTables(h) + if err := p.initTables(h); err != nil { + return nil, err + } } p.initStacktraces(h.Stacktraces) - return p + return p, nil } var ErrPartitionNotFound = fmt.Errorf("partition not found") @@ -201,28 +206,43 @@ func (p *partition) initParquetTables(h *PartitionHeader) { } // Format V3. -func (p *partition) initTables(h *PartitionHeader) { - // TODO(kolesnikovae): decoder pool. - p.locations = &rawTable[schemav1.InMemoryLocation]{ +func (p *partition) initTables(h *PartitionHeader) (err error) { + locations := &rawTable[schemav1.InMemoryLocation]{ reader: p.reader, header: h.V3.Locations, - dec: newSymbolsDecoder[schemav1.InMemoryLocation](h.V3.Locations, new(locationsBlockDecoder)), } - p.mappings = &rawTable[schemav1.InMemoryMapping]{ + if locations.dec, err = locationsDecoder(h.V3.Locations); err != nil { + return err + } + p.locations = locations + + mappings := &rawTable[schemav1.InMemoryMapping]{ reader: p.reader, header: h.V3.Mappings, - dec: newSymbolsDecoder[schemav1.InMemoryMapping](h.V3.Mappings, new(mappingsBlockDecoder)), } - p.functions = &rawTable[schemav1.InMemoryFunction]{ + if mappings.dec, err = mappingsDecoder(h.V3.Mappings); err != nil { + return err + } + p.mappings = mappings + + functions := &rawTable[schemav1.InMemoryFunction]{ reader: p.reader, header: h.V3.Functions, - dec: newSymbolsDecoder[schemav1.InMemoryFunction](h.V3.Functions, new(functionsBlockDecoder)), } - p.strings = &rawTable[string]{ + if functions.dec, err = functionsDecoder(h.V3.Functions); err != nil { + return err + } + p.functions = functions + + strings := &rawTable[string]{ reader: p.reader, header: h.V3.Strings, - dec: newSymbolsDecoder[string](h.V3.Strings, new(stringsBlockDecoder)), } + if strings.dec, err = stringsDecoder(h.V3.Strings); err != nil { + return err + } + p.strings = strings + return nil } func (p *partition) Symbols() *Symbols { @@ -436,7 +456,7 @@ func (t *rawTable[T]) readFrom(r *bufio.Reader) error { crc := crc32.New(castagnoli) tee := io.TeeReader(r, crc) t.s = make([]T, t.header.Length) - if err := t.dec.Decode(t.s, tee); err != nil { + if err := t.dec.decode(t.s, tee); err != nil { return fmt.Errorf("failed to decode symbols: %w", err) } if t.header.CRC != crc.Sum32() { diff --git a/pkg/phlaredb/symdb/block_writer.go b/pkg/phlaredb/symdb/block_writer.go index 35c8687d2e..19be26e071 100644 --- a/pkg/phlaredb/symdb/block_writer.go +++ b/pkg/phlaredb/symdb/block_writer.go @@ -37,11 +37,10 @@ func newWriter(c *Config) *writer { }, }, - // TODO(kolesnikovae): encoder pool. - stringsEncoder: newSymbolsEncoder[string](new(stringsBlockEncoder)), - mappingsEncoder: newSymbolsEncoder[v1.InMemoryMapping](new(mappingsBlockEncoder)), - functionsEncoder: newSymbolsEncoder[v1.InMemoryFunction](new(functionsBlockEncoder)), - locationsEncoder: newSymbolsEncoder[v1.InMemoryLocation](new(locationsBlockEncoder)), + stringsEncoder: newStringsEncoder(), + mappingsEncoder: newMappingsEncoder(), + functionsEncoder: newFunctionsEncoder(), + locationsEncoder: newLocationsEncoder(), } } diff --git a/pkg/phlaredb/symdb/format.go b/pkg/phlaredb/symdb/format.go index 0164c77ba0..81bda15889 100644 --- a/pkg/phlaredb/symdb/format.go +++ b/pkg/phlaredb/symdb/format.go @@ -350,8 +350,20 @@ type SymbolsBlockHeader struct { Length uint32 // BlockSize denotes the number of items per block. BlockSize uint32 + // Format of the encoded data. + Format SymbolsBlockFormat } +type SymbolsBlockFormat uint32 + +const ( + _ SymbolsBlockFormat = iota + BlockLocationsV1 + BlockFunctionsV1 + BlockMappingsV1 + BlockStringsV1 +) + const symbolsBlockReferenceSize = int(unsafe.Sizeof(SymbolsBlockHeader{})) func (h *SymbolsBlockHeader) marshal(b []byte) { @@ -360,6 +372,7 @@ func (h *SymbolsBlockHeader) marshal(b []byte) { binary.BigEndian.PutUint32(b[12:16], h.CRC) binary.BigEndian.PutUint32(b[16:20], h.Length) binary.BigEndian.PutUint32(b[20:24], h.BlockSize) + binary.BigEndian.PutUint32(b[24:28], uint32(h.Format)) } func (h *SymbolsBlockHeader) unmarshal(b []byte) { @@ -368,6 +381,7 @@ func (h *SymbolsBlockHeader) unmarshal(b []byte) { h.CRC = binary.BigEndian.Uint32(b[12:16]) h.Length = binary.BigEndian.Uint32(b[16:20]) h.BlockSize = binary.BigEndian.Uint32(b[20:24]) + h.Format = SymbolsBlockFormat(binary.BigEndian.Uint32(b[24:28])) } func marshalSymbolsBlockReferences(b []byte, refs ...SymbolsBlockHeader) int { @@ -589,31 +603,35 @@ func (h *StacktraceBlockHeader) unmarshal(b []byte) { type symbolsBlockEncoder[T any] interface { encode(w io.Writer, block []T) error + format() SymbolsBlockFormat } type symbolsEncoder[T any] struct { - e symbolsBlockEncoder[T] - bs int + blockEncoder symbolsBlockEncoder[T] + blockSize int } const defaultSymbolsBlockSize = 1 << 10 func newSymbolsEncoder[T any](e symbolsBlockEncoder[T]) *symbolsEncoder[T] { - return &symbolsEncoder[T]{e: e, bs: defaultSymbolsBlockSize} + return &symbolsEncoder[T]{blockEncoder: e, blockSize: defaultSymbolsBlockSize} } -func (e *symbolsEncoder[T]) Encode(w io.Writer, items []T) (err error) { +func (e *symbolsEncoder[T]) encode(w io.Writer, items []T) (err error) { l := len(items) - for i := 0; i < l; i += e.bs { - block := items[i:math.Min(i+e.bs, l)] - if err = e.e.encode(w, block); err != nil { + for i := 0; i < l; i += e.blockSize { + block := items[i:math.Min(i+e.blockSize, l)] + if err = e.blockEncoder.encode(w, block); err != nil { return err } } return nil } -// TODO: args order +func (e *symbolsEncoder[T]) format() SymbolsBlockFormat { + return e.blockEncoder.format() +} + type symbolsBlockDecoder[T any] interface { decode(r io.Reader, dst []T) error } @@ -627,7 +645,7 @@ func newSymbolsDecoder[T any](h SymbolsBlockHeader, d symbolsBlockDecoder[T]) *s return &symbolsDecoder[T]{h: h, d: d} } -func (d *symbolsDecoder[T]) Decode(dst []T, r io.Reader) error { +func (d *symbolsDecoder[T]) decode(dst []T, r io.Reader) error { if d.h.BlockSize == 0 || d.h.Length == 0 { return nil } diff --git a/pkg/phlaredb/symdb/functions.go b/pkg/phlaredb/symdb/functions.go index ef2de67237..14c09ee4b6 100644 --- a/pkg/phlaredb/symdb/functions.go +++ b/pkg/phlaredb/symdb/functions.go @@ -57,6 +57,14 @@ type functionsBlockEncoder struct { ints []int32 } +func newFunctionsEncoder() *symbolsEncoder[v1.InMemoryFunction] { + return newSymbolsEncoder[v1.InMemoryFunction](new(functionsBlockEncoder)) +} + +func (e *functionsBlockEncoder) format() SymbolsBlockFormat { + return BlockFunctionsV1 +} + func (e *functionsBlockEncoder) encode(w io.Writer, functions []v1.InMemoryFunction) error { e.initWrite(len(functions)) var enc delta.BinaryPackedEncoding @@ -118,6 +126,13 @@ type functionsBlockDecoder struct { tmp []byte } +func functionsDecoder(h SymbolsBlockHeader) (*symbolsDecoder[v1.InMemoryFunction], error) { + if h.Format == BlockFunctionsV1 { + return newSymbolsDecoder[v1.InMemoryFunction](h, new(functionsBlockDecoder)), nil + } + return nil, fmt.Errorf("%w: unknown functions format: %d", ErrUnknownVersion, h.Format) +} + func (d *functionsBlockDecoder) readHeader(r io.Reader) error { d.tmp = slices.GrowLen(d.tmp, functionsBlockHeaderSize) if _, err := io.ReadFull(r, d.tmp); err != nil { diff --git a/pkg/phlaredb/symdb/locations.go b/pkg/phlaredb/symdb/locations.go index d0a1665f34..8d93aac522 100644 --- a/pkg/phlaredb/symdb/locations.go +++ b/pkg/phlaredb/symdb/locations.go @@ -74,6 +74,14 @@ type locationsBlockEncoder struct { buf bytes.Buffer } +func newLocationsEncoder() *symbolsEncoder[v1.InMemoryLocation] { + return newSymbolsEncoder[v1.InMemoryLocation](new(locationsBlockEncoder)) +} + +func (e *locationsBlockEncoder) format() SymbolsBlockFormat { + return BlockLocationsV1 +} + func (e *locationsBlockEncoder) encode(w io.Writer, locations []v1.InMemoryLocation) error { e.initWrite(len(locations)) var addr int64 @@ -162,6 +170,13 @@ type locationsBlockDecoder struct { tmp []byte } +func locationsDecoder(h SymbolsBlockHeader) (*symbolsDecoder[v1.InMemoryLocation], error) { + if h.Format == BlockLocationsV1 { + return newSymbolsDecoder[v1.InMemoryLocation](h, new(locationsBlockDecoder)), nil + } + return nil, fmt.Errorf("%w: unknown locations format: %d", ErrUnknownVersion, h.Format) +} + func (d *locationsBlockDecoder) readHeader(r io.Reader) error { d.tmp = slices.GrowLen(d.tmp, locationsBlockHeaderSize) if _, err := io.ReadFull(r, d.tmp); err != nil { diff --git a/pkg/phlaredb/symdb/mappings.go b/pkg/phlaredb/symdb/mappings.go index fbbac30f05..c77ad8bd88 100644 --- a/pkg/phlaredb/symdb/mappings.go +++ b/pkg/phlaredb/symdb/mappings.go @@ -65,6 +65,14 @@ type mappingsBlockEncoder struct { ints64 []int64 } +func newMappingsEncoder() *symbolsEncoder[v1.InMemoryMapping] { + return newSymbolsEncoder[v1.InMemoryMapping](new(mappingsBlockEncoder)) +} + +func (e *mappingsBlockEncoder) format() SymbolsBlockFormat { + return BlockMappingsV1 +} + func (e *mappingsBlockEncoder) encode(w io.Writer, mappings []v1.InMemoryMapping) error { e.initWrite(len(mappings)) var enc delta.BinaryPackedEncoding @@ -167,6 +175,13 @@ type mappingsBlockDecoder struct { tmp []byte } +func mappingsDecoder(h SymbolsBlockHeader) (*symbolsDecoder[v1.InMemoryMapping], error) { + if h.Format == BlockMappingsV1 { + return newSymbolsDecoder[v1.InMemoryMapping](h, new(mappingsBlockDecoder)), nil + } + return nil, fmt.Errorf("%w: unknown mappings format: %d", ErrUnknownVersion, h.Format) +} + func (d *mappingsBlockDecoder) readHeader(r io.Reader) error { d.tmp = slices.GrowLen(d.tmp, mappingsBlockHeaderSize) if _, err := io.ReadFull(r, d.tmp); err != nil { diff --git a/pkg/phlaredb/symdb/partition_memory.go b/pkg/phlaredb/symdb/partition_memory.go index 2ba82073a0..ab44852dc0 100644 --- a/pkg/phlaredb/symdb/partition_memory.go +++ b/pkg/phlaredb/symdb/partition_memory.go @@ -408,12 +408,13 @@ func writeSymbolsBlock[T any](w *fileWriter, s []T, e *symbolsEncoder[T]) (h Sym h.Offset = uint64(w.w.offset) crc := crc32.New(castagnoli) mw := io.MultiWriter(crc, w.w) - if err = e.Encode(mw, s); err != nil { + if err = e.encode(mw, s); err != nil { return h, err } h.Size = uint32(w.w.offset) - uint32(h.Offset) h.CRC = crc.Sum32() h.Length = uint32(len(s)) - h.BlockSize = uint32(e.bs) + h.BlockSize = uint32(e.blockSize) + h.Format = e.format() return h, nil } diff --git a/pkg/phlaredb/symdb/strings.go b/pkg/phlaredb/symdb/strings.go index b488284646..bc27701c47 100644 --- a/pkg/phlaredb/symdb/strings.go +++ b/pkg/phlaredb/symdb/strings.go @@ -42,6 +42,14 @@ type stringsBlockEncoder struct { tmp []byte } +func newStringsEncoder() *symbolsEncoder[string] { + return newSymbolsEncoder[string](new(stringsBlockEncoder)) +} + +func (e *stringsBlockEncoder) format() SymbolsBlockFormat { + return BlockStringsV1 +} + func (e *stringsBlockEncoder) encode(w io.Writer, strings []string) error { e.initWrite(len(strings)) e.header.BlockEncoding = e.blockEncoding(strings) @@ -99,6 +107,13 @@ type stringsBlockDecoder struct { tmp []byte } +func stringsDecoder(h SymbolsBlockHeader) (*symbolsDecoder[string], error) { + if h.Format == BlockStringsV1 { + return newSymbolsDecoder[string](h, new(stringsBlockDecoder)), nil + } + return nil, fmt.Errorf("%w: unknown strings format: %d", ErrUnknownVersion, h.Format) +} + func (d *stringsBlockDecoder) readHeader(r io.Reader) error { d.tmp = slices.GrowLen(d.tmp, stringsBlockHeaderSize) if _, err := io.ReadFull(r, d.tmp); err != nil { diff --git a/pkg/phlaredb/symdb/strings_test.go b/pkg/phlaredb/symdb/strings_test.go index 49c306c408..f462886043 100644 --- a/pkg/phlaredb/symdb/strings_test.go +++ b/pkg/phlaredb/symdb/strings_test.go @@ -80,18 +80,18 @@ func Test_StringsEncoding(t *testing.T) { var buf bytes.Buffer e := newSymbolsEncoder[string](new(stringsBlockEncoder)) if tc.blockSize > 0 { - e.bs = tc.blockSize + e.blockSize = tc.blockSize } - require.NoError(t, e.Encode(&buf, tc.strings)) + require.NoError(t, e.encode(&buf, tc.strings)) h := SymbolsBlockHeader{ Length: uint32(len(tc.strings)), - BlockSize: uint32(e.bs), + BlockSize: uint32(e.blockSize), } d := newSymbolsDecoder[string](h, new(stringsBlockDecoder)) out := make([]string, h.Length) - require.NoError(t, d.Decode(out, &buf)) + require.NoError(t, d.decode(out, &buf)) require.Equal(t, tc.strings, out) }) } From 3df24259a1bff49e09b598f8624668a8e7c0ced4 Mon Sep 17 00:00:00 2001 From: Anton Kolesnikov Date: Wed, 24 Apr 2024 20:04:06 +0800 Subject: [PATCH 19/36] small improvements --- pkg/phlaredb/symdb/functions.go | 37 ++++++------- pkg/phlaredb/symdb/locations.go | 37 ++++++------- pkg/phlaredb/symdb/mappings.go | 49 +++++++++--------- pkg/phlaredb/symdb/strings.go | 39 ++++++++------ .../symdb/testdata/symbols/v3/index.symdb | Bin 640 -> 704 bytes 5 files changed, 87 insertions(+), 75 deletions(-) diff --git a/pkg/phlaredb/symdb/functions.go b/pkg/phlaredb/symdb/functions.go index 14c09ee4b6..71696339c2 100644 --- a/pkg/phlaredb/symdb/functions.go +++ b/pkg/phlaredb/symdb/functions.go @@ -120,26 +120,27 @@ func (e *functionsBlockEncoder) initWrite(functions int) { } type functionsBlockDecoder struct { + format SymbolsBlockFormat header functionsBlockHeader ints []int32 - tmp []byte + buf []byte } func functionsDecoder(h SymbolsBlockHeader) (*symbolsDecoder[v1.InMemoryFunction], error) { if h.Format == BlockFunctionsV1 { - return newSymbolsDecoder[v1.InMemoryFunction](h, new(functionsBlockDecoder)), nil + return newSymbolsDecoder[v1.InMemoryFunction](h, &functionsBlockDecoder{format: h.Format}), nil } return nil, fmt.Errorf("%w: unknown functions format: %d", ErrUnknownVersion, h.Format) } func (d *functionsBlockDecoder) readHeader(r io.Reader) error { - d.tmp = slices.GrowLen(d.tmp, functionsBlockHeaderSize) - if _, err := io.ReadFull(r, d.tmp); err != nil { + d.buf = slices.GrowLen(d.buf, functionsBlockHeaderSize) + if _, err := io.ReadFull(r, d.buf); err != nil { return nil } - d.header.unmarshal(d.tmp) - if crc32.Checksum(d.tmp[:functionsBlockHeaderSize-4], castagnoli) != d.header.CRC { + d.header.unmarshal(d.buf) + if crc32.Checksum(d.buf[:functionsBlockHeaderSize-4], castagnoli) != d.header.CRC { return ErrInvalidSize } return nil @@ -155,11 +156,11 @@ func (d *functionsBlockDecoder) decode(r io.Reader, functions []v1.InMemoryFunct var enc delta.BinaryPackedEncoding d.ints = slices.GrowLen(d.ints, int(d.header.FunctionsLen)) - d.tmp = slices.GrowLen(d.tmp, int(d.header.NameSize)) - if _, err = io.ReadFull(r, d.tmp); err != nil { + d.buf = slices.GrowLen(d.buf, int(d.header.NameSize)) + if _, err = io.ReadFull(r, d.buf); err != nil { return err } - d.ints, err = enc.DecodeInt32(d.ints, d.tmp) + d.ints, err = enc.DecodeInt32(d.ints, d.buf) if err != nil { return err } @@ -167,11 +168,11 @@ func (d *functionsBlockDecoder) decode(r io.Reader, functions []v1.InMemoryFunct functions[i].Name = uint32(v) } - d.tmp = slices.GrowLen(d.tmp, int(d.header.SystemNameSize)) - if _, err = io.ReadFull(r, d.tmp); err != nil { + d.buf = slices.GrowLen(d.buf, int(d.header.SystemNameSize)) + if _, err = io.ReadFull(r, d.buf); err != nil { return err } - d.ints, err = enc.DecodeInt32(d.ints, d.tmp) + d.ints, err = enc.DecodeInt32(d.ints, d.buf) if err != nil { return err } @@ -179,11 +180,11 @@ func (d *functionsBlockDecoder) decode(r io.Reader, functions []v1.InMemoryFunct functions[i].SystemName = uint32(v) } - d.tmp = slices.GrowLen(d.tmp, int(d.header.FileNameSize)) - if _, err = io.ReadFull(r, d.tmp); err != nil { + d.buf = slices.GrowLen(d.buf, int(d.header.FileNameSize)) + if _, err = io.ReadFull(r, d.buf); err != nil { return err } - d.ints, err = enc.DecodeInt32(d.ints, d.tmp) + d.ints, err = enc.DecodeInt32(d.ints, d.buf) if err != nil { return err } @@ -191,11 +192,11 @@ func (d *functionsBlockDecoder) decode(r io.Reader, functions []v1.InMemoryFunct functions[i].Filename = uint32(v) } - d.tmp = slices.GrowLen(d.tmp, int(d.header.StartLineSize)) - if _, err = io.ReadFull(r, d.tmp); err != nil { + d.buf = slices.GrowLen(d.buf, int(d.header.StartLineSize)) + if _, err = io.ReadFull(r, d.buf); err != nil { return err } - d.ints, err = enc.DecodeInt32(d.ints, d.tmp) + d.ints, err = enc.DecodeInt32(d.ints, d.buf) if err != nil { return err } diff --git a/pkg/phlaredb/symdb/locations.go b/pkg/phlaredb/symdb/locations.go index 8d93aac522..653fbae4a8 100644 --- a/pkg/phlaredb/symdb/locations.go +++ b/pkg/phlaredb/symdb/locations.go @@ -158,6 +158,7 @@ func (e *locationsBlockEncoder) initWrite(locations int) { } type locationsBlockDecoder struct { + format SymbolsBlockFormat header locationsBlockHeader mappings []int32 @@ -167,23 +168,23 @@ type locationsBlockDecoder struct { address []int64 folded []bool - tmp []byte + buf []byte } func locationsDecoder(h SymbolsBlockHeader) (*symbolsDecoder[v1.InMemoryLocation], error) { if h.Format == BlockLocationsV1 { - return newSymbolsDecoder[v1.InMemoryLocation](h, new(locationsBlockDecoder)), nil + return newSymbolsDecoder[v1.InMemoryLocation](h, &locationsBlockDecoder{format: h.Format}), nil } return nil, fmt.Errorf("%w: unknown locations format: %d", ErrUnknownVersion, h.Format) } func (d *locationsBlockDecoder) readHeader(r io.Reader) error { - d.tmp = slices.GrowLen(d.tmp, locationsBlockHeaderSize) - if _, err := io.ReadFull(r, d.tmp); err != nil { + d.buf = slices.GrowLen(d.buf, locationsBlockHeaderSize) + if _, err := io.ReadFull(r, d.buf); err != nil { return err } - d.header.unmarshal(d.tmp) - if crc32.Checksum(d.tmp[:locationsBlockHeaderSize-4], castagnoli) != d.header.CRC { + d.header.unmarshal(d.buf) + if crc32.Checksum(d.buf[:locationsBlockHeaderSize-4], castagnoli) != d.header.CRC { return ErrInvalidCRC } return nil @@ -199,11 +200,11 @@ func (d *locationsBlockDecoder) decode(r io.Reader, locations []v1.InMemoryLocat var enc delta.BinaryPackedEncoding // First we decode mapping_id and assign them to locations. - d.tmp = slices.GrowLen(d.tmp, int(d.header.MappingSize)) - if _, err = io.ReadFull(r, d.tmp); err != nil { + d.buf = slices.GrowLen(d.buf, int(d.header.MappingSize)) + if _, err = io.ReadFull(r, d.buf); err != nil { return err } - d.mappings, err = enc.DecodeInt32(d.mappings, d.tmp) + d.mappings, err = enc.DecodeInt32(d.mappings, d.buf) if err != nil { return err } @@ -219,12 +220,12 @@ func (d *locationsBlockDecoder) decode(r io.Reader, locations []v1.InMemoryLocat // sub-slices. But it has to be allocated as we can't // reference d.lines, which is reusable. lines := make([]v1.InMemoryLine, d.header.LinesLen) - d.tmp = slices.GrowLen(d.tmp, int(d.header.LinesSize)) - if _, err = io.ReadFull(r, d.tmp); err != nil { + d.buf = slices.GrowLen(d.buf, int(d.header.LinesSize)) + if _, err = io.ReadFull(r, d.buf); err != nil { return err } d.lines = slices.GrowLen(d.lines, int(d.header.LinesLen)) - d.lines, err = enc.DecodeInt32(d.lines, d.tmp) + d.lines, err = enc.DecodeInt32(d.lines, d.buf) if err != nil { return err } @@ -244,23 +245,23 @@ func (d *locationsBlockDecoder) decode(r io.Reader, locations []v1.InMemoryLocat // Otherwise, inspect all the optional fields. if int(d.header.AddrSize) > 0 { - d.tmp = slices.GrowLen(d.tmp, int(d.header.AddrSize)) - if _, err = io.ReadFull(r, d.tmp); err != nil { + d.buf = slices.GrowLen(d.buf, int(d.header.AddrSize)) + if _, err = io.ReadFull(r, d.buf); err != nil { return err } d.address = slices.GrowLen(d.address, int(d.header.LocationsLen)) - d.address, err = enc.DecodeInt64(d.address, d.tmp) + d.address, err = enc.DecodeInt64(d.address, d.buf) if err != nil { return err } } if int(d.header.IsFoldedSize) > 0 { - d.tmp = slices.GrowLen(d.tmp, int(d.header.IsFoldedSize)) - if _, err = io.ReadFull(r, d.tmp); err != nil { + d.buf = slices.GrowLen(d.buf, int(d.header.IsFoldedSize)) + if _, err = io.ReadFull(r, d.buf); err != nil { return err } d.folded = slices.GrowLen(d.folded, int(d.header.LocationsLen)) - decodeBoolean(d.folded, d.tmp) + decodeBoolean(d.folded, d.buf) } var o int // Offset within the lines slice. diff --git a/pkg/phlaredb/symdb/mappings.go b/pkg/phlaredb/symdb/mappings.go index c77ad8bd88..7eede61b4f 100644 --- a/pkg/phlaredb/symdb/mappings.go +++ b/pkg/phlaredb/symdb/mappings.go @@ -168,27 +168,28 @@ func (e *mappingsBlockEncoder) initWrite(mappings int) { } type mappingsBlockDecoder struct { + format SymbolsBlockFormat header mappingsBlockHeader ints []int32 ints64 []int64 - tmp []byte + buf []byte } func mappingsDecoder(h SymbolsBlockHeader) (*symbolsDecoder[v1.InMemoryMapping], error) { if h.Format == BlockMappingsV1 { - return newSymbolsDecoder[v1.InMemoryMapping](h, new(mappingsBlockDecoder)), nil + return newSymbolsDecoder[v1.InMemoryMapping](h, &mappingsBlockDecoder{format: h.Format}), nil } return nil, fmt.Errorf("%w: unknown mappings format: %d", ErrUnknownVersion, h.Format) } func (d *mappingsBlockDecoder) readHeader(r io.Reader) error { - d.tmp = slices.GrowLen(d.tmp, mappingsBlockHeaderSize) - if _, err := io.ReadFull(r, d.tmp); err != nil { + d.buf = slices.GrowLen(d.buf, mappingsBlockHeaderSize) + if _, err := io.ReadFull(r, d.buf); err != nil { return nil } - d.header.unmarshal(d.tmp) - if crc32.Checksum(d.tmp[:mappingsBlockHeaderSize-4], castagnoli) != d.header.CRC { + d.header.unmarshal(d.buf) + if crc32.Checksum(d.buf[:mappingsBlockHeaderSize-4], castagnoli) != d.header.CRC { return ErrInvalidCRC } return nil @@ -205,11 +206,11 @@ func (d *mappingsBlockDecoder) decode(r io.Reader, mappings []v1.InMemoryMapping var enc delta.BinaryPackedEncoding d.ints = slices.GrowLen(d.ints, int(d.header.MappingsLen)) - d.tmp = slices.GrowLen(d.tmp, int(d.header.FileNameSize)) - if _, err = io.ReadFull(r, d.tmp); err != nil { + d.buf = slices.GrowLen(d.buf, int(d.header.FileNameSize)) + if _, err = io.ReadFull(r, d.buf); err != nil { return err } - d.ints, err = enc.DecodeInt32(d.ints, d.tmp) + d.ints, err = enc.DecodeInt32(d.ints, d.buf) if err != nil { return err } @@ -217,11 +218,11 @@ func (d *mappingsBlockDecoder) decode(r io.Reader, mappings []v1.InMemoryMapping mappings[i].Filename = uint32(v) } - d.tmp = slices.GrowLen(d.tmp, int(d.header.BuildIDSize)) - if _, err = io.ReadFull(r, d.tmp); err != nil { + d.buf = slices.GrowLen(d.buf, int(d.header.BuildIDSize)) + if _, err = io.ReadFull(r, d.buf); err != nil { return err } - d.ints, err = enc.DecodeInt32(d.ints, d.tmp) + d.ints, err = enc.DecodeInt32(d.ints, d.buf) if err != nil { return err } @@ -229,11 +230,11 @@ func (d *mappingsBlockDecoder) decode(r io.Reader, mappings []v1.InMemoryMapping mappings[i].BuildId = uint32(v) } - d.tmp = slices.GrowLen(d.tmp, int(d.header.FlagsSize)) - if _, err = io.ReadFull(r, d.tmp); err != nil { + d.buf = slices.GrowLen(d.buf, int(d.header.FlagsSize)) + if _, err = io.ReadFull(r, d.buf); err != nil { return err } - d.ints, err = enc.DecodeInt32(d.ints, d.tmp) + d.ints, err = enc.DecodeInt32(d.ints, d.buf) if err != nil { return err } @@ -246,11 +247,11 @@ func (d *mappingsBlockDecoder) decode(r io.Reader, mappings []v1.InMemoryMapping if d.header.MemoryStartSize > 0 { d.ints64 = slices.GrowLen(d.ints64, int(d.header.MappingsLen)) - d.tmp = slices.GrowLen(d.tmp, int(d.header.MemoryStartSize)) - if _, err = io.ReadFull(r, d.tmp); err != nil { + d.buf = slices.GrowLen(d.buf, int(d.header.MemoryStartSize)) + if _, err = io.ReadFull(r, d.buf); err != nil { return err } - d.ints64, err = enc.DecodeInt64(d.ints64, d.tmp) + d.ints64, err = enc.DecodeInt64(d.ints64, d.buf) if err != nil { return err } @@ -260,11 +261,11 @@ func (d *mappingsBlockDecoder) decode(r io.Reader, mappings []v1.InMemoryMapping } if d.header.MemoryLimitSize > 0 { d.ints64 = slices.GrowLen(d.ints64, int(d.header.MappingsLen)) - d.tmp = slices.GrowLen(d.tmp, int(d.header.MemoryLimitSize)) - if _, err = io.ReadFull(r, d.tmp); err != nil { + d.buf = slices.GrowLen(d.buf, int(d.header.MemoryLimitSize)) + if _, err = io.ReadFull(r, d.buf); err != nil { return err } - d.ints64, err = enc.DecodeInt64(d.ints64, d.tmp) + d.ints64, err = enc.DecodeInt64(d.ints64, d.buf) if err != nil { return err } @@ -274,11 +275,11 @@ func (d *mappingsBlockDecoder) decode(r io.Reader, mappings []v1.InMemoryMapping } if d.header.FileOffsetSize > 0 { d.ints64 = slices.GrowLen(d.ints64, int(d.header.MappingsLen)) - d.tmp = slices.GrowLen(d.tmp, int(d.header.FileOffsetSize)) - if _, err = io.ReadFull(r, d.tmp); err != nil { + d.buf = slices.GrowLen(d.buf, int(d.header.FileOffsetSize)) + if _, err = io.ReadFull(r, d.buf); err != nil { return err } - d.ints64, err = enc.DecodeInt64(d.ints64, d.tmp) + d.ints64, err = enc.DecodeInt64(d.ints64, d.buf) if err != nil { return err } diff --git a/pkg/phlaredb/symdb/strings.go b/pkg/phlaredb/symdb/strings.go index bc27701c47..4400218f61 100644 --- a/pkg/phlaredb/symdb/strings.go +++ b/pkg/phlaredb/symdb/strings.go @@ -4,6 +4,7 @@ import ( "bytes" "encoding/binary" "fmt" + "hash/crc32" "io" "unsafe" @@ -24,16 +25,20 @@ type stringsBlockHeader struct { StringsLen uint32 BlockEncoding byte _ [3]byte + CRC uint32 } func (h *stringsBlockHeader) marshal(b []byte) { binary.BigEndian.PutUint32(b[0:4], h.StringsLen) - b[5] = h.BlockEncoding + b[5], b[6], b[7], b[8] = h.BlockEncoding, 0, 0, 0 + h.CRC = crc32.Checksum(b[0:8], castagnoli) + binary.BigEndian.PutUint32(b[8:12], h.CRC) } func (h *stringsBlockHeader) unmarshal(b []byte) { h.StringsLen = binary.BigEndian.Uint32(b[0:4]) h.BlockEncoding = b[5] + h.CRC = binary.BigEndian.Uint32(b[8:12]) } type stringsBlockEncoder struct { @@ -103,27 +108,31 @@ func (e *stringsBlockEncoder) initWrite(strings int) { } type stringsBlockDecoder struct { + format SymbolsBlockFormat header stringsBlockHeader - tmp []byte + buf []byte } func stringsDecoder(h SymbolsBlockHeader) (*symbolsDecoder[string], error) { if h.Format == BlockStringsV1 { - return newSymbolsDecoder[string](h, new(stringsBlockDecoder)), nil + return newSymbolsDecoder[string](h, &stringsBlockDecoder{format: h.Format}), nil } return nil, fmt.Errorf("%w: unknown strings format: %d", ErrUnknownVersion, h.Format) } func (d *stringsBlockDecoder) readHeader(r io.Reader) error { - d.tmp = slices.GrowLen(d.tmp, stringsBlockHeaderSize) - if _, err := io.ReadFull(r, d.tmp); err != nil { + d.buf = slices.GrowLen(d.buf, stringsBlockHeaderSize) + if _, err := io.ReadFull(r, d.buf); err != nil { return err } - d.header.unmarshal(d.tmp) - if d.header.BlockEncoding == 8 || d.header.BlockEncoding == 16 { - return nil + d.header.unmarshal(d.buf) + if crc32.Checksum(d.buf[:stringsBlockHeaderSize-4], castagnoli) != d.header.CRC { + return ErrInvalidCRC + } + if d.header.BlockEncoding != 8 && d.header.BlockEncoding != 16 { + return fmt.Errorf("invalid string block encoding: %d", d.header.BlockEncoding) } - return fmt.Errorf("invalid string block encoding: %d", d.header.BlockEncoding) + return nil } func (d *stringsBlockDecoder) decode(r io.Reader, strings []string) (err error) { @@ -140,12 +149,12 @@ func (d *stringsBlockDecoder) decode(r io.Reader, strings []string) (err error) } func (d *stringsBlockDecoder) decodeStrings8(r io.Reader, dst []string) (err error) { - d.tmp = slices.GrowLen(d.tmp, len(dst)) // 1 byte per string. - if _, err = io.ReadFull(r, d.tmp); err != nil { + d.buf = slices.GrowLen(d.buf, len(dst)) // 1 byte per string. + if _, err = io.ReadFull(r, d.buf); err != nil { return err } for i := 0; i < len(dst); i++ { - s := make([]byte, d.tmp[i]) + s := make([]byte, d.buf[i]) if _, err = io.ReadFull(r, s); err != nil { return err } @@ -155,12 +164,12 @@ func (d *stringsBlockDecoder) decodeStrings8(r io.Reader, dst []string) (err err } func (d *stringsBlockDecoder) decodeStrings16(r io.Reader, dst []string) (err error) { - d.tmp = slices.GrowLen(d.tmp, len(dst)*2) // 2 bytes per string. - if _, err = io.ReadFull(r, d.tmp); err != nil { + d.buf = slices.GrowLen(d.buf, len(dst)*2) // 2 bytes per string. + if _, err = io.ReadFull(r, d.buf); err != nil { return err } for i := 0; i < len(dst); i++ { - l := binary.BigEndian.Uint16(d.tmp[i*2:]) + l := binary.BigEndian.Uint16(d.buf[i*2:]) s := make([]byte, l) if _, err = io.ReadFull(r, s); err != nil { return err diff --git a/pkg/phlaredb/symdb/testdata/symbols/v3/index.symdb b/pkg/phlaredb/symdb/testdata/symbols/v3/index.symdb index da922afdaaf82367ac39784e397de8ab2c557851..7eb2bf119673f22c8a86ea5921c1272fec00df68 100644 GIT binary patch literal 704 zcmXS}%r#_SU|Uefy&-M z*epoW3=HhyvwlMa=G=zRJk4+gj0{ly41a+7UqZxD^|#3=PlbrDgxb%%0Ir`2rk@Gu zMn)@a`ZrGq?uCdqtz}@~-PpWD>JZR-=NK5k_Q321DyadQ!6XsSt@;JX0kXmJKpF<> zq=90S#Lk&20PXq(l}8RIpu!sF#g?wX@MPKzl}Gjr$S#O^jJ7clQ{iC=l7qU1h_Gbz zyb0Cc455K~NeN5FFsS|Tuw(?rIMjY3!je&cHQumf)C8J^CoCDYCGm&lnt!;$QcU%* FHvkTYIhgun9)iE$kh=Pdm)FR0-LfDM#KxJZo@TfLMh2*UhCe|4FCpTn`rBlbr$WS6LhWZ>0N2k1)6WES zBcl~I{hKEQ_d>**Rx&W~Zfss6bqMIaa|{eHd#e8f`4aKms$YQWfabu&>%@WVNn+QpLak4=a!y@nOa2c@eCg0UlPMkRm>; u7{j3U!^4UZ7^6`AM1&Qi{$i|Q#i;QUYgjRA3t|teHNbE~4Xa1$RS^KQRX Date: Wed, 24 Apr 2024 20:04:51 +0800 Subject: [PATCH 20/36] small improvements --- .../symdb/testdata/symbols/v3/data.symdb | Bin 88564 -> 88572 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/pkg/phlaredb/symdb/testdata/symbols/v3/data.symdb b/pkg/phlaredb/symdb/testdata/symbols/v3/data.symdb index 486c57d5984b5e3ec14a3e9793b63813aa780b7e..bc22ef6213a164690280fbc5bcf7be432f446bc5 100644 GIT binary patch delta 34 kcmeyenf1?RRvrchrrq2e3=BQ*Yu;_-__2l!#{9Gf0P5`y!2kdN delta 26 gcmeyfnf1$NRt^RRrrq2e+|C<$eym{vQJ>ZT0DssClK=n! From 4c621abd48fe1893a3ea922d600260ff1c8f105e Mon Sep 17 00:00:00 2001 From: Anton Kolesnikov Date: Fri, 26 Apr 2024 09:53:41 +0800 Subject: [PATCH 21/36] fix test --- pkg/phlaredb/head_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/phlaredb/head_test.go b/pkg/phlaredb/head_test.go index c83cd0b883..7d3da2d11d 100644 --- a/pkg/phlaredb/head_test.go +++ b/pkg/phlaredb/head_test.go @@ -371,11 +371,11 @@ func TestHeadFlush(t *testing.T) { }, { RelPath: "symbols/data.symdb", - SizeBytes: 159203, + SizeBytes: 159215, }, { RelPath: "symbols/index.symdb", - SizeBytes: 384, + SizeBytes: 448, }, }, Compaction: block.BlockMetaCompaction{ From df69a7dd8edaf3e3a9f06298f958ea467d257b32 Mon Sep 17 00:00:00 2001 From: Anton Kolesnikov Date: Fri, 26 Apr 2024 16:43:37 +0800 Subject: [PATCH 22/36] add integration tests --- pkg/phlaredb/symdb/block_reader_test.go | 2 +- pkg/pprof/pprof.go | 19 ++++++---- pkg/pprof/pprof_test.go | 33 +++++++++++++++++ pkg/test/integration/microservices_test.go | 42 ++++++++++++++++++++++ 4 files changed, 88 insertions(+), 8 deletions(-) diff --git a/pkg/phlaredb/symdb/block_reader_test.go b/pkg/phlaredb/symdb/block_reader_test.go index 98167a022b..40a451669f 100644 --- a/pkg/phlaredb/symdb/block_reader_test.go +++ b/pkg/phlaredb/symdb/block_reader_test.go @@ -154,7 +154,7 @@ func Benchmark_Reader_ResolvePprof(b *testing.B) { b.ResetTimer() for i := 0; i < b.N; i++ { r := NewResolver(ctx, bs.reader) - r.AddSamples(0, s.indexed[0][0].Samples) + r.AddSamples(0, schemav1.Samples{}) _, err := r.Pprof() require.NoError(b, err) r.Release() diff --git a/pkg/pprof/pprof.go b/pkg/pprof/pprof.go index ecf66cba3d..d155b2fbab 100644 --- a/pkg/pprof/pprof.go +++ b/pkg/pprof/pprof.go @@ -1089,18 +1089,23 @@ func GetLanguage(profile *Profile, logger log.Logger) string { // SetProfileMetadata sets the metadata on the profile. func SetProfileMetadata(p *profilev1.Profile, ty *typesv1.ProfileType, timeNanos int64, period int64) { m := map[string]int64{ - ty.SampleUnit: 0, - ty.SampleType: 0, - ty.PeriodType: 0, - ty.PeriodUnit: 0, + ty.SampleUnit: -1, + ty.SampleType: -1, + ty.PeriodType: -1, + ty.PeriodUnit: -1, } for i, s := range p.StringTable { - if _, ok := m[s]; !ok { + if _, ok := m[s]; ok { m[s] = int64(i) } } - for k, v := range m { - if v == 0 { + for _, k := range []string{ + ty.SampleUnit, + ty.SampleType, + ty.PeriodType, + ty.PeriodUnit, + } { + if m[k] == -1 { i := int64(len(p.StringTable)) p.StringTable = append(p.StringTable, k) m[k] = i diff --git a/pkg/pprof/pprof_test.go b/pkg/pprof/pprof_test.go index c906600dc6..c22263a9f6 100644 --- a/pkg/pprof/pprof_test.go +++ b/pkg/pprof/pprof_test.go @@ -15,6 +15,7 @@ import ( "google.golang.org/protobuf/proto" profilev1 "github.com/grafana/pyroscope/api/gen/proto/go/google/v1" + typesv1 "github.com/grafana/pyroscope/api/gen/proto/go/types/v1" "github.com/grafana/pyroscope/pkg/pprof/testhelper" ) @@ -1432,3 +1433,35 @@ func Benchmark_GetProfileLanguage(b *testing.B) { }) } } + +func Test_SetProfileMetadata(t *testing.T) { + p := &profilev1.Profile{ + SampleType: []*profilev1.ValueType{{}}, + StringTable: []string{"", "qux"}, + PeriodType: &profilev1.ValueType{}, + } + pt := &typesv1.ProfileType{ + ID: "alfa", + Name: "bravo", + SampleType: "foo", + SampleUnit: "bar", + PeriodType: "baz", + PeriodUnit: "qux", + } + SetProfileMetadata(p, pt, 1, 2) + expected := &profilev1.Profile{ + SampleType: []*profilev1.ValueType{{ + Type: 3, // foo + Unit: 2, // bar + }}, + StringTable: []string{"", "qux", "bar", "foo", "baz"}, + PeriodType: &profilev1.ValueType{ + Type: 4, // baz + Unit: 1, // qux + }, + TimeNanos: 1, + Period: 1, + DefaultSampleType: 3, // foo + } + require.Equal(t, expected.String(), p.String()) +} diff --git a/pkg/test/integration/microservices_test.go b/pkg/test/integration/microservices_test.go index 9cc6d9a597..5a13d50c87 100644 --- a/pkg/test/integration/microservices_test.go +++ b/pkg/test/integration/microservices_test.go @@ -12,6 +12,7 @@ import ( "github.com/stretchr/testify/require" "golang.org/x/sync/errgroup" + profilev1 "github.com/grafana/pyroscope/api/gen/proto/go/google/v1" pushv1 "github.com/grafana/pyroscope/api/gen/proto/go/push/v1" "github.com/grafana/pyroscope/api/gen/proto/go/push/v1/pushv1connect" querierv1 "github.com/grafana/pyroscope/api/gen/proto/go/querier/v1" @@ -181,4 +182,45 @@ func (tc *testCtx) runQueryTest(ctx context.Context, t *testing.T) { sort.Strings(expectedValues) assert.Equal(t, expectedValues, resp.Msg.Names) }) + + t.Run("QuerySelectMergeProfile", func(t *testing.T) { + req := &querierv1.SelectMergeProfileRequest{ + ProfileTypeID: "process_cpu:cpu:nanoseconds:cpu:nanoseconds", + LabelSelector: "{}", + Start: tc.now.Add(-time.Hour).UnixMilli(), + End: tc.now.Add(time.Hour).UnixMilli(), + } + resp, err := tc.querier.SelectMergeProfile(ctx, connect.NewRequest(req)) + require.NoError(t, err) + + expected := &profilev1.Profile{ + SampleType: []*profilev1.ValueType{ + {Type: 6, Unit: 5}, + }, + Sample: []*profilev1.Sample{ + {LocationId: []uint64{1, 2, 3}, Value: []int64{100}}, + {LocationId: []uint64{1, 2, 4}, Value: []int64{501}}, + }, + Mapping: []*profilev1.Mapping{{Id: 1, HasFunctions: true}}, + Location: []*profilev1.Location{ + {Id: 1, MappingId: 1, Line: []*profilev1.Line{{FunctionId: 1}}}, + {Id: 2, MappingId: 1, Line: []*profilev1.Line{{FunctionId: 2}}}, + {Id: 3, MappingId: 1, Line: []*profilev1.Line{{FunctionId: 3}}}, + {Id: 4, MappingId: 1, Line: []*profilev1.Line{{FunctionId: 4}}}, + }, + Function: []*profilev1.Function{ + {Id: 1, Name: 1}, + {Id: 2, Name: 2}, + {Id: 3, Name: 3}, + {Id: 4, Name: 4}, + }, + StringTable: []string{"", "foo", "bar", "baz", "boz", "nanoseconds", "cpu"}, + TimeNanos: req.End * 1e6, + DurationNanos: 7200000000000, + PeriodType: &profilev1.ValueType{Type: 6, Unit: 5}, + Period: 1000000000, + DefaultSampleType: 6, + } + require.Equal(t, expected.String(), resp.Msg.String()) + }) } From 83d2100954a0031b07513aba0e879d7172fafe04 Mon Sep 17 00:00:00 2001 From: Anton Kolesnikov Date: Sat, 27 Apr 2024 17:42:50 +0800 Subject: [PATCH 23/36] merge index and data files --- pkg/phlaredb/block/block_test.go | 10 +- pkg/phlaredb/compact.go | 15 +- pkg/phlaredb/compact_test.go | 9 +- pkg/phlaredb/head.go | 9 +- pkg/phlaredb/head_test.go | 8 +- pkg/phlaredb/symdb/block_reader.go | 188 +++++++++++++---- pkg/phlaredb/symdb/block_reader_parquet.go | 4 +- pkg/phlaredb/symdb/block_reader_test.go | 90 ++++++-- pkg/phlaredb/symdb/block_writer.go | 56 ++--- pkg/phlaredb/symdb/format.go | 196 +++++++++++++----- pkg/phlaredb/symdb/functions.go | 9 +- pkg/phlaredb/symdb/locations.go | 16 +- pkg/phlaredb/symdb/mappings.go | 16 +- pkg/phlaredb/symdb/stacktrace_tree.go | 16 +- pkg/phlaredb/symdb/stacktrace_tree_test.go | 1 - pkg/phlaredb/symdb/symdb.go | 23 +- pkg/phlaredb/symdb/symdb_test.go | 6 - .../symdb/testdata/symbols/v3/index.symdb | Bin 704 -> 0 bytes .../symbols/v3/{data.symdb => symbols.symdb} | Bin 88572 -> 89300 bytes 19 files changed, 441 insertions(+), 231 deletions(-) delete mode 100644 pkg/phlaredb/symdb/testdata/symbols/v3/index.symdb rename pkg/phlaredb/symdb/testdata/symbols/v3/{data.symdb => symbols.symdb} (99%) diff --git a/pkg/phlaredb/block/block_test.go b/pkg/phlaredb/block/block_test.go index c93da7e2b6..0fa9979e69 100644 --- a/pkg/phlaredb/block/block_test.go +++ b/pkg/phlaredb/block/block_test.go @@ -96,7 +96,7 @@ func TestDelete(t *testing.T) { }) require.NoError(t, block.Upload(ctx, log.NewNopLogger(), bkt, path.Join(dir, meta.ULID.String()))) - require.Equal(t, 5, len(objects(t, bkt, meta.ULID))) + require.Equal(t, 4, len(objects(t, bkt, meta.ULID))) markedForDeletion := promauto.With(prometheus.NewRegistry()).NewCounter(prometheus.CounterOpts{Name: "test"}) require.NoError(t, block.MarkForDeletion(ctx, log.NewNopLogger(), bkt, meta.ULID, "", false, markedForDeletion)) @@ -116,7 +116,7 @@ func TestDelete(t *testing.T) { } }) require.NoError(t, block.Upload(ctx, log.NewNopLogger(), bkt, path.Join(tmpDir, b2.ULID.String()))) - require.Equal(t, 5, len(objects(t, bkt, b2.ULID))) + require.Equal(t, 4, len(objects(t, bkt, b2.ULID))) // Remove meta.json and check if delete can delete it. require.NoError(t, bkt.Delete(ctx, path.Join(b2.ULID.String(), block.MetaFilename))) @@ -196,7 +196,7 @@ func TestUpload(t *testing.T) { t.Run("full block", func(t *testing.T) { require.NoError(t, block.Upload(ctx, log.NewNopLogger(), bkt, path.Join(tmpDir, b1.ULID.String()))) - require.Equal(t, 5, len(bkt.Objects())) + require.Equal(t, 4, len(bkt.Objects())) objs := bkt.Objects() require.Contains(t, objs, path.Join(b1.ULID.String(), block.MetaFilename)) require.Contains(t, objs, path.Join(b1.ULID.String(), block.IndexFilename)) @@ -205,7 +205,7 @@ func TestUpload(t *testing.T) { t.Run("upload is idempotent", func(t *testing.T) { require.NoError(t, block.Upload(ctx, log.NewNopLogger(), bkt, path.Join(tmpDir, b1.ULID.String()))) - require.Equal(t, 5, len(bkt.Objects())) + require.Equal(t, 4, len(bkt.Objects())) objs := bkt.Objects() require.Contains(t, objs, path.Join(b1.ULID.String(), block.MetaFilename)) require.Contains(t, objs, path.Join(b1.ULID.String(), block.IndexFilename)) @@ -355,7 +355,7 @@ func TestUploadCleanup(t *testing.T) { require.ErrorIs(t, uploadErr, errUploadFailed) // If upload of meta.json fails, nothing is cleaned up. - require.Equal(t, 5, len(bkt.Objects())) + require.Equal(t, 4, len(bkt.Objects())) require.Greater(t, len(bkt.Objects()[path.Join(b1.String(), block.IndexFilename)]), 0) require.Greater(t, len(bkt.Objects()[path.Join(b1.String(), block.MetaFilename)]), 0) } diff --git a/pkg/phlaredb/compact.go b/pkg/phlaredb/compact.go index f34edb30bb..548b6e9197 100644 --- a/pkg/phlaredb/compact.go +++ b/pkg/phlaredb/compact.go @@ -734,14 +734,9 @@ type symbolsCompactor struct { } func newSymbolsCompactor(path string) *symbolsCompactor { - dst := filepath.Join(path, symdb.DefaultDirName) return &symbolsCompactor{ - w: symdb.NewSymDB(symdb.DefaultConfig(). - WithDirectory(dst). - WithParquetConfig(symdb.ParquetConfig{ - MaxBufferRowCount: defaultParquetConfig.MaxBufferRowCount, - })), - dst: dst, + w: symdb.NewSymDB(symdb.DefaultConfig().WithDirectory(path)), + dst: path, rewriters: make(map[BlockReader]*symdb.Rewriter), } } @@ -772,7 +767,9 @@ func (s *symbolsRewriter) Close() (uint64, error) { if err := s.symbolsCompactor.Flush(); err != nil { return 0, err } - return s.numSamples, util.CopyDir(s.symbolsCompactor.dst, filepath.Join(s.dst, symdb.DefaultDirName)) + dst := filepath.Join(s.dst, symdb.DefaultFileName) + src := filepath.Join(s.symbolsCompactor.dst, symdb.DefaultFileName) + return s.numSamples, util.CopyFile(src, dst) } func (s *symbolsCompactor) ReWriteRow(profile profileRow) (uint64, error) { @@ -814,7 +811,7 @@ func (s *symbolsCompactor) Flush() error { } func (s *symbolsCompactor) Close() error { - return os.RemoveAll(s.dst) + return os.RemoveAll(filepath.Join(s.dst, symdb.DefaultFileName)) } func (s *symbolsCompactor) loadStacktracesID(values []parquet.Value) { diff --git a/pkg/phlaredb/compact_test.go b/pkg/phlaredb/compact_test.go index 7fdca44d86..31a20b4707 100644 --- a/pkg/phlaredb/compact_test.go +++ b/pkg/phlaredb/compact_test.go @@ -27,7 +27,6 @@ import ( phlarecontext "github.com/grafana/pyroscope/pkg/phlare/context" "github.com/grafana/pyroscope/pkg/phlaredb/block" "github.com/grafana/pyroscope/pkg/phlaredb/sharding" - "github.com/grafana/pyroscope/pkg/phlaredb/symdb" "github.com/grafana/pyroscope/pkg/phlaredb/tsdb/index" "github.com/grafana/pyroscope/pkg/pprof/testhelper" ) @@ -198,7 +197,7 @@ func TestCompactWithSplitting(t *testing.T) { }) require.NoError(t, err) - require.NoDirExists(t, filepath.Join(dst, symdb.DefaultDirName)) + require.NoFileExists(t, dst) // 4 shards one per series. require.Equal(t, 4, len(compacted)) @@ -628,11 +627,10 @@ func TestFlushMeta(t *testing.T) { require.Equal(t, uint64(3), b.Meta().Stats.NumSeries) require.Equal(t, uint64(3), b.Meta().Stats.NumSamples) require.Equal(t, uint64(3), b.Meta().Stats.NumProfiles) - require.Len(t, b.Meta().Files, 4) + require.Len(t, b.Meta().Files, 3) require.Equal(t, "index.tsdb", b.Meta().Files[0].RelPath) require.Equal(t, "profiles.parquet", b.Meta().Files[1].RelPath) - require.Equal(t, "symbols/data.symdb", b.Meta().Files[2].RelPath) - require.Equal(t, "symbols/index.symdb", b.Meta().Files[3].RelPath) + require.Equal(t, "symbols.symdb", b.Meta().Files[2].RelPath) } func newBlock(t testing.TB, generator func() []*testhelper.ProfileBuilder) *singleBlockQuerier { @@ -693,7 +691,6 @@ func blockQuerierFromMeta(t *testing.T, dir string, m block.Meta) *singleBlockQu require.NoError(t, err) blk := NewSingleBlockQuerierFromMeta(ctx, bkt, &m) require.NoError(t, blk.Open(ctx)) - // require.NoError(t, blk.symbols.Load(ctx)) return blk } diff --git a/pkg/phlaredb/head.go b/pkg/phlaredb/head.go index 74ecf28d92..7bda7b917e 100644 --- a/pkg/phlaredb/head.go +++ b/pkg/phlaredb/head.go @@ -124,12 +124,7 @@ func NewHead(phlarectx context.Context, cfg Config, limiter TenantLimiter) (*Hea } } - h.symdb = symdb.NewSymDB(symdb.DefaultConfig(). - WithDirectory(filepath.Join(h.headPath, symdb.DefaultDirName)). - WithParquetConfig(symdb.ParquetConfig{ - MaxBufferRowCount: h.parquetConfig.MaxBufferRowCount, - })) - + h.symdb = symdb.NewSymDB(symdb.DefaultConfig().WithDirectory(h.headPath)) h.wg.Add(1) go h.loop() @@ -562,8 +557,6 @@ func (h *Head) flush(ctx context.Context) error { return errors.Wrap(err, "flushing symdb") } for _, file := range h.symdb.Files() { - // Files' path is relative to the symdb dir. - file.RelPath = filepath.Join(symdb.DefaultDirName, file.RelPath) files = append(files, file) blockSize += file.SizeBytes h.metrics.flushedFileSizeBytes.WithLabelValues(file.RelPath).Observe(float64(file.SizeBytes)) diff --git a/pkg/phlaredb/head_test.go b/pkg/phlaredb/head_test.go index 7d3da2d11d..2d36950588 100644 --- a/pkg/phlaredb/head_test.go +++ b/pkg/phlaredb/head_test.go @@ -370,12 +370,8 @@ func TestHeadFlush(t *testing.T) { }, }, { - RelPath: "symbols/data.symdb", - SizeBytes: 159215, - }, - { - RelPath: "symbols/index.symdb", - SizeBytes: 448, + RelPath: "symbols.symdb", + SizeBytes: 159687, }, }, Compaction: block.BlockMetaCompaction{ diff --git a/pkg/phlaredb/symdb/block_reader.go b/pkg/phlaredb/symdb/block_reader.go index a204dcc7c1..79a46f3b76 100644 --- a/pkg/phlaredb/symdb/block_reader.go +++ b/pkg/phlaredb/symdb/block_reader.go @@ -23,13 +23,16 @@ import ( type Reader struct { bucket objstore.BucketReader - files map[string]block.File - meta *block.Meta + file block.File + index IndexFile + footer Footer - index IndexFile partitions []*partition partitionsMap map[uint64]*partition + // Not used in v3; left for compatibility. + meta *block.Meta + files map[string]block.File parquetFiles *parquetFiles } @@ -38,44 +41,145 @@ func Open(ctx context.Context, b objstore.BucketReader, m *block.Meta) (*Reader, bucket: b, meta: m, files: make(map[string]block.File), + file: block.File{RelPath: DefaultFileName}, } for _, f := range r.meta.Files { r.files[filepath.Base(f.RelPath)] = f } - var err error - if err = r.openIndexFile(ctx); err != nil { - return nil, fmt.Errorf("opening index file: %w", err) + if err := r.open(ctx); err != nil { + return nil, err + } + if err := r.buildPartitions(); err != nil { + return nil, err + } + return r, nil +} + +func (r *Reader) open(ctx context.Context) (err error) { + if r.file, err = r.lookupFile(r.file.RelPath); err == nil { + if err = r.openIndex(ctx); err != nil { + return fmt.Errorf("opening index section: %w", err) + } + return nil + } + if err = r.openIndexV12(ctx); err != nil { + return fmt.Errorf("opening index file: %w", err) } if r.index.Header.Version == FormatV2 { if err = openParquetFiles(ctx, r); err != nil { - return nil, err + return fmt.Errorf("opening parquet files: %w", err) } } + return nil +} + +func (r *Reader) buildPartitions() (err error) { r.partitionsMap = make(map[uint64]*partition, len(r.index.PartitionHeaders)) r.partitions = make([]*partition, len(r.index.PartitionHeaders)) for i, h := range r.index.PartitionHeaders { var p *partition if p, err = r.partitionReader(h); err != nil { - return nil, err + return err } r.partitionsMap[h.Partition] = p r.partitions[i] = p } - return r, nil + // Cleanup the index to not retain unused objects. + r.index = IndexFile{ + Header: IndexHeader{ + Version: r.index.Header.Version, + }, + } + return nil } -func (r *Reader) Close() error { - if r == nil { - return nil +func (r *Reader) partitionReader(h *PartitionHeader) (*partition, error) { + p := &partition{reader: r} + switch r.index.Header.Version { + case FormatV1: + p.initEmptyTables(h) + case FormatV2: + p.initParquetTables(h) + case FormatV3: + if err := p.initTables(h); err != nil { + return nil, err + } } - if r.parquetFiles != nil { - return r.parquetFiles.Close() + p.initStacktraces(h.Stacktraces) + return p, nil +} + +// openIndex locates footer and loads the index section from +// the file into the memory. +// +// NOTE(kolesnikovae): Pre-fetch: we could speculatively fetch +// the footer and the index section into a larger buffer rather +// than retrieving them synchronously. +// +// NOTE(kolesnikovae): It is possible to skip the footer, if it +// was cached, and the index section offset and size are known. +func (r *Reader) openIndex(ctx context.Context) error { + if r.file.SizeBytes == 0 { + attrs, err := r.bucket.Attributes(ctx, r.file.RelPath) + if err != nil { + return fmt.Errorf("fetching file attributes: %w", err) + } + r.file.SizeBytes = uint64(attrs.Size) + } + // Read footer. + offset := int64(r.file.SizeBytes) - int64(FooterSize) + if offset < int64(IndexHeaderSize) { + return fmt.Errorf("%w: footer offset: %d", ErrInvalidSize, offset) + } + if err := r.readFooter(ctx, offset, int64(FooterSize)); err != nil { + return err + } + indexSize := offset - int64(r.footer.IndexOffset) + if indexSize < int64(IndexHeaderSize) { + return fmt.Errorf("%w: index section size: %d", ErrInvalidSize, indexSize) + } + return r.readIndexSection(ctx, int64(r.footer.IndexOffset), indexSize) +} + +func (r *Reader) readFooter(ctx context.Context, offset, size int64) error { + o, err := r.bucket.GetRange(ctx, r.file.RelPath, offset, size) + if err != nil { + return fmt.Errorf("fetching footer: %w", err) + } + defer func() { + _ = o.Close() + }() + buf := make([]byte, size) + if _, err = io.ReadFull(o, buf); err != nil { + return fmt.Errorf("reading footer: %w", err) + } + if err = r.footer.UnmarshalBinary(buf); err != nil { + return fmt.Errorf("unmarshaling footer: %w", err) } return nil } -func (r *Reader) openIndexFile(ctx context.Context) error { - f, err := r.file(IndexFileName) +func (r *Reader) readIndexSection(ctx context.Context, offset, size int64) error { + o, err := r.bucket.GetRange(ctx, r.file.RelPath, offset, size) + if err != nil { + return fmt.Errorf("fetching index: %w", err) + } + defer func() { + _ = o.Close() + }() + buf := make([]byte, int(size)) + if _, err = io.ReadFull(o, buf); err != nil { + return fmt.Errorf("reading index: %w", err) + } + r.index, err = OpenIndex(buf) + if err != nil { + return fmt.Errorf("openning index: %w", err) + } + return nil +} + +func (r *Reader) openIndexV12(ctx context.Context) error { + f, err := r.lookupFile(IndexFileName) if err != nil { return err } @@ -87,11 +191,11 @@ func (r *Reader) openIndexFile(ctx context.Context) error { if err != nil { return err } - r.index, err = ReadIndexFile(b) + r.index, err = OpenIndex(b) return err } -func (r *Reader) file(name string) (block.File, error) { +func (r *Reader) lookupFile(name string) (block.File, error) { f, ok := r.files[name] if !ok { return block.File{}, fmt.Errorf("%q: %w", name, os.ErrNotExist) @@ -99,20 +203,14 @@ func (r *Reader) file(name string) (block.File, error) { return f, nil } -func (r *Reader) partitionReader(h *PartitionHeader) (*partition, error) { - p := &partition{reader: r} - switch r.index.Header.Version { - case FormatV1: - p.initEmptyTables(h) - case FormatV2: - p.initParquetTables(h) - case FormatV3: - if err := p.initTables(h); err != nil { - return nil, err - } +func (r *Reader) Close() error { + if r == nil { + return nil } - p.initStacktraces(h.Stacktraces) - return p, nil + if r.parquetFiles != nil { + return r.parquetFiles.Close() + } + return nil } var ErrPartitionNotFound = fmt.Errorf("partition not found") @@ -369,15 +467,11 @@ func (c *stacktraceBlock) fetch(ctx context.Context) error { ) defer span.Finish() return c.r.Inc(func() error { - filename := DataFileName - if c.reader.index.Header.Version < 3 { - filename = StacktracesFileName - } - f, err := c.reader.file(filename) + path, err := c.stacktracesFile() if err != nil { return err } - rc, err := c.reader.bucket.GetRange(ctx, f.RelPath, c.header.Offset, c.header.Size) + rc, err := c.reader.bucket.GetRange(ctx, path, c.header.Offset, c.header.Size) if err != nil { return err } @@ -390,6 +484,17 @@ func (c *stacktraceBlock) fetch(ctx context.Context) error { }) } +func (c *stacktraceBlock) stacktracesFile() (string, error) { + f := c.reader.file + if c.reader.index.Header.Version < 3 { + var err error + if f, err = c.reader.lookupFile(StacktracesFileName); err != nil { + return "", err + } + } + return f.RelPath, nil +} + func (c *stacktraceBlock) readFrom(r *bufio.Reader) error { // NOTE(kolesnikovae): Pool of node chunks could reduce // the alloc size, but it may affect memory locality. @@ -435,11 +540,10 @@ func (t *rawTable[T]) fetch(ctx context.Context) error { ) defer span.Finish() return t.r.Inc(func() error { - f, err := t.reader.file(DataFileName) - if err != nil { - return err - } - rc, err := t.reader.bucket.GetRange(ctx, f.RelPath, int64(t.header.Offset), int64(t.header.Size)) + rc, err := t.reader.bucket.GetRange(ctx, + t.reader.file.RelPath, + int64(t.header.Offset), + int64(t.header.Size)) if err != nil { return err } diff --git a/pkg/phlaredb/symdb/block_reader_parquet.go b/pkg/phlaredb/symdb/block_reader_parquet.go index 1addb2764e..364a678344 100644 --- a/pkg/phlaredb/symdb/block_reader_parquet.go +++ b/pkg/phlaredb/symdb/block_reader_parquet.go @@ -19,6 +19,8 @@ import ( "github.com/grafana/pyroscope/pkg/util/refctr" ) +// Used in v2. Left for compatibility. + type parquetTable[M schemav1.Models, P schemav1.Persister[M]] struct { headers []RowRangeReference bucket objstore.BucketReader @@ -145,7 +147,7 @@ func openParquetFiles(ctx context.Context, r *Reader) error { n := n fp := fp g.Go(func() error { - fm, err := r.file(n) + fm, err := r.lookupFile(n) if err != nil { return err } diff --git a/pkg/phlaredb/symdb/block_reader_test.go b/pkg/phlaredb/symdb/block_reader_test.go index 40a451669f..67521b2350 100644 --- a/pkg/phlaredb/symdb/block_reader_test.go +++ b/pkg/phlaredb/symdb/block_reader_test.go @@ -1,29 +1,46 @@ package symdb import ( + "bytes" "context" "os" "testing" "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" + "github.com/thanos-io/objstore" + pystore "github.com/grafana/pyroscope/pkg/objstore" "github.com/grafana/pyroscope/pkg/objstore/providers/filesystem" "github.com/grafana/pyroscope/pkg/phlaredb/block" schemav1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1" ) -var testBlockMeta = &block.Meta{ - Files: []block.File{ - {RelPath: IndexFileName}, - {RelPath: StacktracesFileName}, - {RelPath: DataFileName}, - {RelPath: "locations.parquet"}, - {RelPath: "mappings.parquet"}, - {RelPath: "functions.parquet"}, - {RelPath: "strings.parquet"}, - }, -} +var ( + testBlockMeta = &block.Meta{ + Files: []block.File{ + {RelPath: DefaultFileName}, + }, + } + + testBlockMetaV1 = &block.Meta{ + Files: []block.File{ + {RelPath: IndexFileName}, + {RelPath: StacktracesFileName}, + }, + } + + testBlockMetaV2 = &block.Meta{ + Files: []block.File{ + {RelPath: IndexFileName}, + {RelPath: StacktracesFileName}, + {RelPath: "locations.parquet"}, + {RelPath: "mappings.parquet"}, + {RelPath: "functions.parquet"}, + {RelPath: "strings.parquet"}, + }, + } +) func Test_write_block_fixture(t *testing.T) { t.Skip() @@ -36,9 +53,8 @@ func Test_write_block_fixture(t *testing.T) { require.NoError(t, os.Rename(b.config.Dir, fixtureDir)) } -func Fuzz_ReadIndexFile(f *testing.F) { +func Fuzz_ReadIndexFile_v12(f *testing.F) { files := []string{ - "testdata/symbols/v3/index.symdb", "testdata/symbols/v2/index.symdb", "testdata/symbols/v1/index.symdb", } @@ -48,7 +64,7 @@ func Fuzz_ReadIndexFile(f *testing.F) { f.Add(data) } f.Fuzz(func(_ *testing.T, b []byte) { - _, _ = ReadIndexFile(b) + _, _ = OpenIndex(b) }) } @@ -85,12 +101,54 @@ func Test_Reader_Open_v3(t *testing.T) { require.Equal(t, expected, resolved.String()) } +func Test_Reader_Open_v3_fuzz(t *testing.T) { + // Make sure the test is valid. + corpus, err := os.ReadFile("testdata/symbols/v3/symbols.symdb") + require.NoError(t, err) + ctx := context.Background() + + bucket := pystore.NewBucket(objstore.NewInMemBucket()) + require.NoError(t, bucket.Upload(ctx, DefaultFileName, bytes.NewReader(corpus))) + b, err := Open(ctx, bucket, testBlockMeta) + require.NoError(t, err) + + r := NewResolver(context.Background(), b) + defer r.Release() + r.AddSamples(0, schemav1.Samples{}) + r.AddSamples(1, schemav1.Samples{}) + _, err = r.Pprof() + require.NoError(t, err) +} + +func Fuzz_Reader_Open_v3(f *testing.F) { + corpus, err := os.ReadFile("testdata/symbols/v3/symbols.symdb") + require.NoError(f, err) + ctx := context.Background() + + f.Add(corpus) + f.Fuzz(func(t *testing.T, data []byte) { + bucket := pystore.NewBucket(objstore.NewInMemBucket()) + require.NoError(t, bucket.Upload(ctx, DefaultFileName, bytes.NewReader(data))) + + b, err := Open(context.Background(), bucket, testBlockMeta) + if err != nil { + return + } + + r := NewResolver(context.Background(), b) + defer r.Release() + r.AddSamples(0, schemav1.Samples{}) + r.AddSamples(1, schemav1.Samples{}) + _, _ = r.Pprof() + }) +} + func Test_Reader_Open_v2(t *testing.T) { // The block contains two partitions (0 and 1), each partition // stores symbols of the testdata/profile.pb.gz profile b, err := filesystem.NewBucket("testdata/symbols/v2") require.NoError(t, err) - x, err := Open(context.Background(), b, testBlockMeta) + x, err := Open(context.Background(), b, testBlockMetaV2) require.NoError(t, err) r := NewResolver(context.Background(), x) @@ -120,7 +178,7 @@ func Test_Reader_Open_v2(t *testing.T) { func Test_Reader_Open_v1(t *testing.T) { b, err := filesystem.NewBucket("testdata/symbols/v1") require.NoError(t, err) - x, err := Open(context.Background(), b, testBlockMeta) + x, err := Open(context.Background(), b, testBlockMetaV1) require.NoError(t, err) r, err := x.partition(context.Background(), 1) require.NoError(t, err) diff --git a/pkg/phlaredb/symdb/block_writer.go b/pkg/phlaredb/symdb/block_writer.go index 19be26e071..585b498f2d 100644 --- a/pkg/phlaredb/symdb/block_writer.go +++ b/pkg/phlaredb/symdb/block_writer.go @@ -7,8 +7,6 @@ import ( "os" "path/filepath" - "github.com/grafana/dskit/multierror" - "github.com/grafana/pyroscope/pkg/phlaredb/block" v1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1" ) @@ -20,6 +18,7 @@ type writer struct { indexFile *fileWriter dataFile *fileWriter files []block.File + footer Footer stringsEncoder *symbolsEncoder[string] mappingsEncoder *symbolsEncoder[v1.InMemoryMapping] @@ -31,11 +30,15 @@ func newWriter(c *Config) *writer { return &writer{ config: c, index: IndexFile{ - Header: Header{ + Header: IndexHeader{ Magic: symdbMagic, Version: FormatV3, }, }, + footer: Footer{ + Magic: symdbMagic, + Version: FormatV3, + }, stringsEncoder: newStringsEncoder(), mappingsEncoder: newMappingsEncoder(), @@ -45,53 +48,32 @@ func newWriter(c *Config) *writer { } func (w *writer) writePartitions(partitions []*PartitionWriter) (err error) { - if w.dataFile, err = w.newFile(DataFileName); err != nil { + if err = os.MkdirAll(w.config.Dir, 0o755); err != nil { + return fmt.Errorf("failed to create directory %q: %w", w.config.Dir, err) + } + if w.dataFile, err = w.newFile(DefaultFileName); err != nil { return err } defer func() { err = w.dataFile.Close() + w.files = []block.File{w.dataFile.meta()} }() for _, p := range partitions { if err = p.writeTo(w); err != nil { - return err + return fmt.Errorf("failed to write partition: %w", err) } w.index.PartitionHeaders = append(w.index.PartitionHeaders, &p.header) } - return nil -} - -func (w *writer) Flush() (err error) { - if err = w.writeIndexFile(); err != nil { - return err - } - w.files = []block.File{ - w.indexFile.meta(), - w.dataFile.meta(), + w.footer.IndexOffset = uint64(w.dataFile.w.offset) + if _, err = w.index.WriteTo(w.dataFile); err != nil { + return fmt.Errorf("failed to write index: %w", err) } - return nil -} - -func (w *writer) createDir() error { - if err := os.MkdirAll(w.config.Dir, 0o755); err != nil { - return fmt.Errorf("failed to create directory %q: %w", w.config.Dir, err) + if _, err = w.dataFile.Write(w.footer.MarshalBinary()); err != nil { + return fmt.Errorf("failed to write footer: %w", err) } return nil } -func (w *writer) writeIndexFile() (err error) { - // Write the index file only after all the files were flushed. - if w.indexFile, err = w.newFile(IndexFileName); err != nil { - return err - } - defer func() { - err = multierror.New(err, w.indexFile.Close()).Err() - }() - if _, err = w.index.WriteTo(w.indexFile); err != nil { - return fmt.Errorf("failed to write index file: %w", err) - } - return err -} - func (w *writer) newFile(path string) (f *fileWriter, err error) { path = filepath.Join(w.config.Dir, path) if f, err = newFileWriter(path); err != nil { @@ -129,7 +111,7 @@ func (f *fileWriter) Write(p []byte) (n int, err error) { return f.w.Write(p) } -func (f *fileWriter) sync() (err error) { +func (f *fileWriter) Flush() (err error) { if err = f.buf.Flush(); err != nil { return err } @@ -137,7 +119,7 @@ func (f *fileWriter) sync() (err error) { } func (f *fileWriter) Close() (err error) { - if err = f.sync(); err != nil { + if err = f.Flush(); err != nil { return err } return f.f.Close() diff --git a/pkg/phlaredb/symdb/format.go b/pkg/phlaredb/symdb/format.go index 81bda15889..780b965494 100644 --- a/pkg/phlaredb/symdb/format.go +++ b/pkg/phlaredb/symdb/format.go @@ -8,40 +8,65 @@ import ( "io" "unsafe" + "github.com/parquet-go/parquet-go/encoding/delta" + "github.com/grafana/pyroscope/pkg/slices" "github.com/grafana/pyroscope/pkg/util/math" ) +// V1 and V2: +// // The database is a collection of files. The only file that is guaranteed // to be present is the index file: it indicates the version of the format, // and the structure of the database contents. The file is supposed to be -// read into memory entirely and opened with a ReadIndexFile call. -// -// Big endian order is used unless otherwise noted. +// read into memory entirely and opened with an OpenIndex call. + +// V3: // -// Layout of the index file (single-pass write): +// The database is a single file. The file consists of the following sections: +// [Data ] +// [Index ] +// [Footer] // -// [Header] Header defines the format version and denotes the content type. +// The file is supposed to be open with Open call: it reads the footer, locates +// index section, and fetches it into memory. // -// [TOC] Table of contents. Its entries refer to the Data section. -// It is of a fixed size for a given version (number of entries). +// Data section is version specific. +// v3: Partitions. // -// [Data] Data is an arbitrary structured section. The exact structure is -// defined by the TOC and Header (version, flags, etc). +// Index section is structured in the following way: // -// [CRC32] Checksum. +// [IndexHeader] Header defines the format version and denotes the content type. +// [TOC ] Table of contents. Its entries refer to the Data section. +// It is of a fixed size for a given version (number of entries). +// [Data ] Data is an arbitrary structured section. The exact structure is +// defined by the TOC and Header (version, flags, etc). +// v1: StacktraceChunkHeaders. +// v2: PartitionHeadersV2. +// v3: PartitionHeadersV3. +// [CRC32 ] Checksum. // +// Footer section is version agnostic and is only needed to locate +// the index offset within the file. + +// In all version big endian order is used unless otherwise noted. const ( - DefaultDirName = "symbols" + DefaultFileName = "symbols.symdb" // Added in v3. + // Pre-v3 assets. Left for compatibility reasons. + + DefaultDirName = "symbols" IndexFileName = "index.symdb" - StacktracesFileName = "stacktraces.symdb" // Used in v1 and v2. - DataFileName = "data.symdb" // Added in v3. + StacktracesFileName = "stacktraces.symdb" ) +type FormatVersion uint32 + const ( - _ = iota + // Within a database, the same format version + // must be used in all places. + _ FormatVersion = iota FormatV1 FormatV2 @@ -52,9 +77,14 @@ const ( const ( // TOC entries are version-specific. + // The constants point to the entry index in the TOC. tocEntryStacktraceChunkHeaders = 0 tocEntryPartitionHeaders = 0 - tocEntries = 1 + + // Total number of entries in the current version. + // TODO(kolesnikovae): TOC size is version specific, + // but at the moment, all versions have the same size: 1. + tocEntriesTotal = 1 ) // https://en.wikipedia.org/wiki/List_of_file_signatures @@ -81,42 +111,83 @@ func (e *FormatError) Error() string { } type IndexFile struct { - Header Header + Header IndexHeader TOC TOC - // Version-specific parts. + // Version-specific. PartitionHeaders PartitionHeaders - CRC uint32 + CRC uint32 // Checksum of the index. } -type Header struct { - Magic [4]byte - Version uint32 - Reserved [8]byte // Reserved for future use. +// NOTE(kolesnikovae): IndexHeader is rudimentary and is left for compatibility. + +type IndexHeader struct { + Magic [4]byte + Version FormatVersion + _ [4]byte // Reserved for future use. + _ [4]byte // Reserved for future use. } -const HeaderSize = int(unsafe.Sizeof(Header{})) +const IndexHeaderSize = int(unsafe.Sizeof(IndexHeader{})) -func (h *Header) MarshalBinary() ([]byte, error) { - b := make([]byte, HeaderSize) +func (h *IndexHeader) MarshalBinary() []byte { + b := make([]byte, IndexHeaderSize) copy(b[0:4], h.Magic[:]) - binary.BigEndian.PutUint32(b[4:8], h.Version) - binary.BigEndian.PutUint32(b[HeaderSize-4:], crc32.Checksum(b[:HeaderSize-4], castagnoli)) - return b, nil + binary.BigEndian.PutUint32(b[4:8], uint32(h.Version)) + return b } -func (h *Header) UnmarshalBinary(b []byte) error { - if len(b) != HeaderSize { +func (h *IndexHeader) UnmarshalBinary(b []byte) error { + if len(b) != IndexHeaderSize { return ErrInvalidSize } if copy(h.Magic[:], b[0:4]); !bytes.Equal(h.Magic[:], symdbMagic[:]) { return ErrInvalidMagic } - // Reserved space may change from version to version. - if h.Version = binary.BigEndian.Uint32(b[4:8]); h.Version >= unknownVersion { + h.Version = FormatVersion(binary.BigEndian.Uint32(b[4:8])) + if h.Version >= unknownVersion { + return ErrUnknownVersion + } + return nil +} + +type Footer struct { + Magic [4]byte + Version FormatVersion + IndexOffset uint64 // Index header offset in the file. + _ [4]byte // Reserved for future use. + CRC uint32 // CRC of the footer. +} + +const FooterSize = int(unsafe.Sizeof(Footer{})) + +func (f *Footer) MarshalBinary() []byte { + b := make([]byte, FooterSize) + copy(b[0:4], f.Magic[:]) + binary.BigEndian.PutUint32(b[4:8], uint32(f.Version)) + binary.BigEndian.PutUint64(b[8:16], f.IndexOffset) + binary.BigEndian.PutUint32(b[16:20], 0) + binary.BigEndian.PutUint32(b[20:24], crc32.Checksum(b[0:20], castagnoli)) + return b +} + +func (f *Footer) UnmarshalBinary(b []byte) error { + if len(b) != FooterSize { + return ErrInvalidSize + } + if copy(f.Magic[:], b[0:4]); !bytes.Equal(f.Magic[:], symdbMagic[:]) { + return ErrInvalidMagic + } + f.Version = FormatVersion(binary.BigEndian.Uint32(b[4:8])) + if f.Version >= unknownVersion { return ErrUnknownVersion } + f.IndexOffset = binary.BigEndian.Uint64(b[8:16]) + f.CRC = binary.BigEndian.Uint32(b[20:24]) + if crc32.Checksum(b[0:20], castagnoli) != f.CRC { + return ErrInvalidCRC + } return nil } @@ -128,13 +199,15 @@ type TOC struct { Entries []TOCEntry } +// TOCEntry refers to a section within the index. +// Offset is relative to the header offset. type TOCEntry struct { Offset int64 Size int64 } func (toc *TOC) Size() int { - return tocEntrySize * tocEntries + return tocEntrySize * tocEntriesTotal } func (toc *TOC) MarshalBinary() ([]byte, error) { @@ -171,7 +244,8 @@ func (h *TOCEntry) unmarshal(b []byte) { type PartitionHeaders []*PartitionHeader type PartitionHeader struct { - Partition uint64 + Partition uint64 + // TODO(kolesnikovae): Switch to SymbolsBlock encoding. Stacktraces []StacktraceBlockHeader V2 *PartitionHeaderV2 V3 *PartitionHeaderV3 @@ -192,7 +266,7 @@ func (h *PartitionHeaders) WriteTo(dst io.Writer) (_ int64, err error) { w.write(buf) for _, p := range *h { if p.V3 == nil { - return 0, fmt.Errorf("v2 format is not supported") + return 0, fmt.Errorf("only v3 format is supported") } buf = slices.GrowLen(buf, int(p.Size())) p.marshal(buf) @@ -226,7 +300,7 @@ func (h *PartitionHeaders) UnmarshalV2(b []byte) error { return h.unmarshal(b, F func (h *PartitionHeaders) UnmarshalV3(b []byte) error { return h.unmarshal(b, FormatV3) } -func (h *PartitionHeaders) unmarshal(b []byte, version int) error { +func (h *PartitionHeaders) unmarshal(b []byte, version FormatVersion) error { partitions := binary.BigEndian.Uint32(b[0:4]) b = b[4:] *h = make(PartitionHeaders, partitions) @@ -255,7 +329,7 @@ func (h *PartitionHeader) marshal(buf []byte) { marshalSymbolsBlockReferences(buf[n:], h.V3.Strings) } -func (h *PartitionHeader) unmarshal(buf []byte, version int) (err error) { +func (h *PartitionHeader) unmarshal(buf []byte, version FormatVersion) (err error) { h.Partition = binary.BigEndian.Uint64(buf[0:8]) h.Stacktraces = make([]StacktraceBlockHeader, int(binary.BigEndian.Uint32(buf[8:12]))) switch version { @@ -461,7 +535,7 @@ func (r *RowRangeReference) unmarshal(b []byte) { r.Rows = binary.BigEndian.Uint32(b[8:12]) } -func ReadIndexFile(b []byte) (f IndexFile, err error) { +func OpenIndex(b []byte) (f IndexFile, err error) { s := len(b) if !f.assertSizeIsValid(b) { return f, ErrInvalidSize @@ -470,10 +544,10 @@ func ReadIndexFile(b []byte) (f IndexFile, err error) { if f.CRC != crc32.Checksum(b[:s+indexChecksumOffset], castagnoli) { return f, ErrInvalidCRC } - if err = f.Header.UnmarshalBinary(b[:HeaderSize]); err != nil { + if err = f.Header.UnmarshalBinary(b[:IndexHeaderSize]); err != nil { return f, fmt.Errorf("unmarshal header: %w", err) } - if err = f.TOC.UnmarshalBinary(b[HeaderSize:f.dataOffset()]); err != nil { + if err = f.TOC.UnmarshalBinary(b[IndexHeaderSize:f.dataOffset()]); err != nil { return f, fmt.Errorf("unmarshal table of contents: %w", err) } @@ -507,22 +581,21 @@ func ReadIndexFile(b []byte) (f IndexFile, err error) { } func (f *IndexFile) assertSizeIsValid(b []byte) bool { - return len(b) >= HeaderSize+f.TOC.Size()+checksumSize + return len(b) >= IndexHeaderSize+f.TOC.Size()+checksumSize } func (f *IndexFile) dataOffset() int { - return HeaderSize + f.TOC.Size() + return IndexHeaderSize + f.TOC.Size() } func (f *IndexFile) WriteTo(dst io.Writer) (n int64, err error) { checksum := crc32.New(castagnoli) w := withWriterOffset(io.MultiWriter(dst, checksum), 0) - headerBytes, _ := f.Header.MarshalBinary() - if _, err = w.Write(headerBytes); err != nil { + if _, err = w.Write(f.Header.MarshalBinary()); err != nil { return w.offset, fmt.Errorf("header write: %w", err) } - toc := TOC{Entries: make([]TOCEntry, tocEntries)} + toc := TOC{Entries: make([]TOCEntry, tocEntriesTotal)} toc.Entries[tocEntryPartitionHeaders] = TOCEntry{ Offset: int64(f.dataOffset()), Size: f.PartitionHeaders.Size(), @@ -650,7 +723,7 @@ func (d *symbolsDecoder[T]) decode(dst []T, r io.Reader) error { return nil } if len(dst) < int(d.h.Length) { - return fmt.Errorf("%w: buffer too short", ErrInvalidSize) + return fmt.Errorf("decoder buffer too short (format %d)", d.h.Format) } blocks := int((d.h.Length + d.h.BlockSize - 1) / d.h.BlockSize) for i := 0; i < blocks; i++ { @@ -658,8 +731,37 @@ func (d *symbolsDecoder[T]) decode(dst []T, r io.Reader) error { hi := math.Min(lo+int(d.h.BlockSize), int(d.h.Length)) block := dst[lo:hi] if err := d.d.decode(r, block); err != nil { - return err + return fmt.Errorf("malformed block (format %d): %w", d.h.Format, err) } } return nil } + +// NOTE(kolesnikovae): delta.BinaryPackedEncoding may +// silently fail on malformed data, producing empty slice. + +func decodeBinaryPackedInt32(dst []int32, data []byte, length int) ([]int32, error) { + var enc delta.BinaryPackedEncoding + var err error + dst, err = enc.DecodeInt32(dst, data) + if err != nil { + return dst, err + } + if len(dst) != length { + return dst, fmt.Errorf("%w: binary packed: expected %d, got %d", ErrInvalidSize, length, len(dst)) + } + return dst, nil +} + +func decodeBinaryPackedInt64(dst []int64, data []byte, length int) ([]int64, error) { + var enc delta.BinaryPackedEncoding + var err error + dst, err = enc.DecodeInt64(dst, data) + if err != nil { + return dst, err + } + if len(dst) != length { + return dst, fmt.Errorf("%w: binary packed: expected %d, got %d", ErrInvalidSize, length, len(dst)) + } + return dst, nil +} diff --git a/pkg/phlaredb/symdb/functions.go b/pkg/phlaredb/symdb/functions.go index 71696339c2..9fbbbd8c74 100644 --- a/pkg/phlaredb/symdb/functions.go +++ b/pkg/phlaredb/symdb/functions.go @@ -154,13 +154,12 @@ func (d *functionsBlockDecoder) decode(r io.Reader, functions []v1.InMemoryFunct return fmt.Errorf("functions buffer is too short") } - var enc delta.BinaryPackedEncoding d.ints = slices.GrowLen(d.ints, int(d.header.FunctionsLen)) d.buf = slices.GrowLen(d.buf, int(d.header.NameSize)) if _, err = io.ReadFull(r, d.buf); err != nil { return err } - d.ints, err = enc.DecodeInt32(d.ints, d.buf) + d.ints, err = decodeBinaryPackedInt32(d.ints, d.buf, int(d.header.FunctionsLen)) if err != nil { return err } @@ -172,7 +171,7 @@ func (d *functionsBlockDecoder) decode(r io.Reader, functions []v1.InMemoryFunct if _, err = io.ReadFull(r, d.buf); err != nil { return err } - d.ints, err = enc.DecodeInt32(d.ints, d.buf) + d.ints, err = decodeBinaryPackedInt32(d.ints, d.buf, int(d.header.FunctionsLen)) if err != nil { return err } @@ -184,7 +183,7 @@ func (d *functionsBlockDecoder) decode(r io.Reader, functions []v1.InMemoryFunct if _, err = io.ReadFull(r, d.buf); err != nil { return err } - d.ints, err = enc.DecodeInt32(d.ints, d.buf) + d.ints, err = decodeBinaryPackedInt32(d.ints, d.buf, int(d.header.FunctionsLen)) if err != nil { return err } @@ -196,7 +195,7 @@ func (d *functionsBlockDecoder) decode(r io.Reader, functions []v1.InMemoryFunct if _, err = io.ReadFull(r, d.buf); err != nil { return err } - d.ints, err = enc.DecodeInt32(d.ints, d.buf) + d.ints, err = decodeBinaryPackedInt32(d.ints, d.buf, int(d.header.FunctionsLen)) if err != nil { return err } diff --git a/pkg/phlaredb/symdb/locations.go b/pkg/phlaredb/symdb/locations.go index 653fbae4a8..74b7a4216b 100644 --- a/pkg/phlaredb/symdb/locations.go +++ b/pkg/phlaredb/symdb/locations.go @@ -198,13 +198,12 @@ func (d *locationsBlockDecoder) decode(r io.Reader, locations []v1.InMemoryLocat return fmt.Errorf("locations buffer: %w", ErrInvalidSize) } - var enc delta.BinaryPackedEncoding // First we decode mapping_id and assign them to locations. d.buf = slices.GrowLen(d.buf, int(d.header.MappingSize)) if _, err = io.ReadFull(r, d.buf); err != nil { return err } - d.mappings, err = enc.DecodeInt32(d.mappings, d.buf) + d.mappings, err = decodeBinaryPackedInt32(d.mappings, d.buf, int(d.header.LocationsLen)) if err != nil { return err } @@ -224,8 +223,8 @@ func (d *locationsBlockDecoder) decode(r io.Reader, locations []v1.InMemoryLocat if _, err = io.ReadFull(r, d.buf); err != nil { return err } - d.lines = slices.GrowLen(d.lines, int(d.header.LinesLen)) - d.lines, err = enc.DecodeInt32(d.lines, d.buf) + // Lines are encoded as pairs of uint32 (function_id and line number). + d.lines, err = decodeBinaryPackedInt32(d.lines, d.buf, int(d.header.LinesLen)*2) if err != nil { return err } @@ -234,9 +233,15 @@ func (d *locationsBlockDecoder) decode(r io.Reader, locations []v1.InMemoryLocat // In most cases we end up here. if d.header.AddrSize == 0 && d.header.IsFoldedSize == 0 { var o int // Offset within the lines slice. + // In case if the block is malformed, an invalid + // line count may cause an out-of-bounds panic. + maxLines := len(lines) for i := 0; i < len(locations); i++ { locations[i].MappingId = uint32(d.mappings[i]) n := o + int(d.lineCount[i]) + if n > maxLines { + return fmt.Errorf("%w: location lines out of bounds", ErrInvalidSize) + } locations[i].Line = lines[o:n] o = n } @@ -249,8 +254,7 @@ func (d *locationsBlockDecoder) decode(r io.Reader, locations []v1.InMemoryLocat if _, err = io.ReadFull(r, d.buf); err != nil { return err } - d.address = slices.GrowLen(d.address, int(d.header.LocationsLen)) - d.address, err = enc.DecodeInt64(d.address, d.buf) + d.address, err = decodeBinaryPackedInt64(d.address, d.buf, int(d.header.LocationsLen)) if err != nil { return err } diff --git a/pkg/phlaredb/symdb/mappings.go b/pkg/phlaredb/symdb/mappings.go index 7eede61b4f..55eb7beefb 100644 --- a/pkg/phlaredb/symdb/mappings.go +++ b/pkg/phlaredb/symdb/mappings.go @@ -203,14 +203,13 @@ func (d *mappingsBlockDecoder) decode(r io.Reader, mappings []v1.InMemoryMapping return fmt.Errorf("mappings buffer is too short") } - var enc delta.BinaryPackedEncoding d.ints = slices.GrowLen(d.ints, int(d.header.MappingsLen)) d.buf = slices.GrowLen(d.buf, int(d.header.FileNameSize)) if _, err = io.ReadFull(r, d.buf); err != nil { return err } - d.ints, err = enc.DecodeInt32(d.ints, d.buf) + d.ints, err = decodeBinaryPackedInt32(d.ints, d.buf, int(d.header.MappingsLen)) if err != nil { return err } @@ -222,7 +221,7 @@ func (d *mappingsBlockDecoder) decode(r io.Reader, mappings []v1.InMemoryMapping if _, err = io.ReadFull(r, d.buf); err != nil { return err } - d.ints, err = enc.DecodeInt32(d.ints, d.buf) + d.ints, err = decodeBinaryPackedInt32(d.ints, d.buf, int(d.header.MappingsLen)) if err != nil { return err } @@ -234,7 +233,7 @@ func (d *mappingsBlockDecoder) decode(r io.Reader, mappings []v1.InMemoryMapping if _, err = io.ReadFull(r, d.buf); err != nil { return err } - d.ints, err = enc.DecodeInt32(d.ints, d.buf) + d.ints, err = decodeBinaryPackedInt32(d.ints, d.buf, int(d.header.MappingsLen)) if err != nil { return err } @@ -246,12 +245,11 @@ func (d *mappingsBlockDecoder) decode(r io.Reader, mappings []v1.InMemoryMapping } if d.header.MemoryStartSize > 0 { - d.ints64 = slices.GrowLen(d.ints64, int(d.header.MappingsLen)) d.buf = slices.GrowLen(d.buf, int(d.header.MemoryStartSize)) if _, err = io.ReadFull(r, d.buf); err != nil { return err } - d.ints64, err = enc.DecodeInt64(d.ints64, d.buf) + d.ints64, err = decodeBinaryPackedInt64(d.ints64, d.buf, int(d.header.MappingsLen)) if err != nil { return err } @@ -260,12 +258,11 @@ func (d *mappingsBlockDecoder) decode(r io.Reader, mappings []v1.InMemoryMapping } } if d.header.MemoryLimitSize > 0 { - d.ints64 = slices.GrowLen(d.ints64, int(d.header.MappingsLen)) d.buf = slices.GrowLen(d.buf, int(d.header.MemoryLimitSize)) if _, err = io.ReadFull(r, d.buf); err != nil { return err } - d.ints64, err = enc.DecodeInt64(d.ints64, d.buf) + d.ints64, err = decodeBinaryPackedInt64(d.ints64, d.buf, int(d.header.MappingsLen)) if err != nil { return err } @@ -274,12 +271,11 @@ func (d *mappingsBlockDecoder) decode(r io.Reader, mappings []v1.InMemoryMapping } } if d.header.FileOffsetSize > 0 { - d.ints64 = slices.GrowLen(d.ints64, int(d.header.MappingsLen)) d.buf = slices.GrowLen(d.buf, int(d.header.FileOffsetSize)) if _, err = io.ReadFull(r, d.buf); err != nil { return err } - d.ints64, err = enc.DecodeInt64(d.ints64, d.buf) + d.ints64, err = decodeBinaryPackedInt64(d.ints64, d.buf, int(d.header.MappingsLen)) if err != nil { return err } diff --git a/pkg/phlaredb/symdb/stacktrace_tree.go b/pkg/phlaredb/symdb/stacktrace_tree.go index d4cc9ef749..de726917ec 100644 --- a/pkg/phlaredb/symdb/stacktrace_tree.go +++ b/pkg/phlaredb/symdb/stacktrace_tree.go @@ -260,6 +260,9 @@ func (d *treeDecoder) unmarshal(t *parentPointerTree, r io.Reader) error { } eof = true } + // len(b) is always >= b.Buffered(), + // therefore Discard does not invalidate + // the buffer. if _, err = buf.Discard(len(b)); err != nil { return err } @@ -272,12 +275,14 @@ func (d *treeDecoder) unmarshal(t *parentPointerTree, r io.Reader) error { xn := len(t.nodes) - np // remaining nodes // Note that g should always be a multiple of 4. g = g[:math.Min((xn+xn%2)*2, d.groupBuffer)] - var gp int - + if len(g)%4 != 0 { + return io.ErrUnexpectedEOF + } // Check if there is a remainder. If this is the case, // decode the group and advance gp. + var gp int if len(rb) > 0 { - // It's expected that r contains a single complete group. + // It's expected that rb contains a single complete group. m := groupvarint.BytesUsed[rb[0]] - len(rb) if m >= (len(b) + len(rb)) { return io.ErrUnexpectedEOF @@ -295,12 +300,15 @@ func (d *treeDecoder) unmarshal(t *parentPointerTree, r io.Reader) error { // Re-fill g. gi, n, rn := decodeU32Groups(g[gp:], b[read:]) gp += gi - read += n + rn // Mark remainder bytes as read, we copy them. + read += n + rn // Mark the remaining bytes as read; we copy them. if rn > 0 { // If there is a remainder, it is copied and decoded on // the next Peek. This should not be possible with eof. rb = append(rb, b[len(b)-rn:]...) } + if len(g) == 0 && len(rb) == 0 { + break + } // g is full, or no more data in buf. for i := 0; i < len(g[:gp])-1; i += 2 { diff --git a/pkg/phlaredb/symdb/stacktrace_tree_test.go b/pkg/phlaredb/symdb/stacktrace_tree_test.go index 83b2dd09f2..55eef46a2f 100644 --- a/pkg/phlaredb/symdb/stacktrace_tree_test.go +++ b/pkg/phlaredb/symdb/stacktrace_tree_test.go @@ -88,7 +88,6 @@ func Test_stacktrace_tree_encoding_group(t *testing.T) { } func Test_stacktrace_tree_encoding_rand(t *testing.T) { - // TODO: Fuzzing. nodes := make([]node, 1<<20) for i := range nodes { nodes[i] = node{ diff --git a/pkg/phlaredb/symdb/symdb.go b/pkg/phlaredb/symdb/symdb.go index 588296c5a2..c78b71fa09 100644 --- a/pkg/phlaredb/symdb/symdb.go +++ b/pkg/phlaredb/symdb/symdb.go @@ -2,7 +2,6 @@ package symdb import ( "context" - "fmt" "sort" "sync" "time" @@ -78,17 +77,12 @@ type SymDB struct { type Config struct { Dir string Stacktraces StacktracesConfig - Parquet ParquetConfig } type StacktracesConfig struct { MaxNodesPerChunk uint32 } -type ParquetConfig struct { - MaxBufferRowCount int -} - type MemoryStats struct { StacktracesSize uint64 LocationsSize uint64 @@ -109,16 +103,12 @@ const statsUpdateInterval = 5 * time.Second func DefaultConfig() *Config { return &Config{ - Dir: DefaultDirName, Stacktraces: StacktracesConfig{ // At the moment chunks are loaded in memory at once. // Due to the fact that chunking causes some duplication, // it's better to keep them large. MaxNodesPerChunk: 4 << 20, }, - Parquet: ParquetConfig{ - MaxBufferRowCount: 100 << 10, - }, } } @@ -127,11 +117,6 @@ func (c *Config) WithDirectory(dir string) *Config { return c } -func (c *Config) WithParquetConfig(pc ParquetConfig) *Config { - c.Parquet = pc - return c -} - func NewSymDB(c *Config) *SymDB { if c == nil { c = DefaultConfig() @@ -262,13 +247,7 @@ func (s *SymDB) Flush() error { sort.Slice(partitions, func(i, j int) bool { return partitions[i].header.Partition < partitions[j].header.Partition }) - if err := s.writer.createDir(); err != nil { - return err - } - if err := s.writer.writePartitions(partitions); err != nil { - return fmt.Errorf("writing partitions: %w", err) - } - return s.writer.Flush() + return s.writer.writePartitions(partitions) } func (s *SymDB) Files() []block.File { diff --git a/pkg/phlaredb/symdb/symdb_test.go b/pkg/phlaredb/symdb/symdb_test.go index dccb731650..c5bb7d9576 100644 --- a/pkg/phlaredb/symdb/symdb_test.go +++ b/pkg/phlaredb/symdb/symdb_test.go @@ -53,9 +53,6 @@ func (s *memSuite) init() { Stacktraces: StacktracesConfig{ MaxNodesPerChunk: 1 << 10, }, - Parquet: ParquetConfig{ - MaxBufferRowCount: 512, - }, } } if s.db == nil { @@ -155,9 +152,6 @@ func Test_Stats(t *testing.T) { Stacktraces: StacktracesConfig{ MaxNodesPerChunk: 4 << 20, }, - Parquet: ParquetConfig{ - MaxBufferRowCount: 100 << 10, - }, }, } diff --git a/pkg/phlaredb/symdb/testdata/symbols/v3/index.symdb b/pkg/phlaredb/symdb/testdata/symbols/v3/index.symdb deleted file mode 100644 index 7eb2bf119673f22c8a86ea5921c1272fec00df68..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 704 zcmXS}%r#_SU|Uefy&-M z*epoW3=HhyvwlMa=G=zRJk4+gj0{ly41a+7UqZxD^|#3=PlbrDgxb%%0Ir`2rk@Gu zMn)@a`ZrGq?uCdqtz}@~-PpWD>JZR-=NK5k_Q321DyadQ!6XsSt@;JX0kXmJKpF<> zq=90S#Lk&20PXq(l}8RIpu!sF#g?wX@MPKzl}Gjr$S#O^jJ7clQ{iC=l7qU1h_Gbz zyb0Cc455K~NeN5FFsS|Tuw(?rIMjY3!je&cHQumf)C8J^CoCDYCGm&lnt!;$QcU%* FHvkTYIhg0G=xbao?G<`kOO3c z<$*K|)JX%yCW)OhRRCJ{3o4HsQb2_@%!@5uZGqC>qvuVqb_Ski2o2OrWN3ozXJ8D2IshJ=jKEljI)I4aWYk{`2~HfL$*2i5 n3u|b`LLAJfEr~xk*Zjj3oQ_A{$f6`-#z Date: Sat, 27 Apr 2024 17:43:19 +0800 Subject: [PATCH 24/36] go mod --- examples/golang-pgo/go.work.sum | 105 +------------------------------- 1 file changed, 2 insertions(+), 103 deletions(-) diff --git a/examples/golang-pgo/go.work.sum b/examples/golang-pgo/go.work.sum index d6b3c94a30..17fedccdc6 100644 --- a/examples/golang-pgo/go.work.sum +++ b/examples/golang-pgo/go.work.sum @@ -1,132 +1,31 @@ cloud.google.com/go/compute v1.23.0 h1:tP41Zoavr8ptEqaW6j+LQOnyBBhO7OkOMAGrgLopTwY= -cloud.google.com/go/compute v1.23.0/go.mod h1:4tCnrn48xsqlwSAiLf1HXMQk8CONslYbdiEZc9FEIbM= cloud.google.com/go/compute/metadata v0.2.3 h1:mg4jlk7mCAj6xXp9UJ4fjI9VUI5rubuGBW5aJ7UnBMY= -cloud.google.com/go/compute/metadata v0.2.3/go.mod h1:VAV5nSsACxMJvgaAuX6Pk2AawlZn8kiOGuCv6gTkwuA= -github.com/agoda-com/opentelemetry-logs-go v0.4.1 h1:PWGqIxkEEg4HIjnHsHmNa+yGu0lhxHz4XPGKeT4o6T0= -github.com/agoda-com/opentelemetry-logs-go v0.4.1/go.mod h1:CeDuVaK9yCWN+8UjOW8AciYJE0rl7K/mw4ejBntGYkc= github.com/antihax/optional v1.0.0 h1:xK2lYat7ZLaVVcIuj82J8kIro4V6kDe0AUDFboUCwcg= -github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY= -github.com/cenkalti/backoff/v4 v4.2.1 h1:y4OZtCnogmCPw98Zjyt5a6+QwPLGkiQsYW5oUqylYbM= -github.com/cenkalti/backoff/v4 v4.2.1/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= github.com/census-instrumentation/opencensus-proto v0.4.1 h1:iKLQ0xPNFxR/2hzXZMrBo8f1j86j5WHzznCCQxV/b8g= -github.com/census-instrumentation/opencensus-proto v0.4.1/go.mod h1:4T9NM4+4Vw91VeyqjLS6ao50K5bOcLKN6Q42XnYaRYw= github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= -github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/cncf/udpa/go v0.0.0-20220112060539-c52dc94e7fbe h1:QQ3GSy+MqSHxm/d8nCtnAiZdYFd45cYZPs8vOOIYKfk= -github.com/cncf/udpa/go v0.0.0-20220112060539-c52dc94e7fbe/go.mod h1:6pvJx4me5XPnfI9Z40ddWsdw2W/uZgQLFXToKeRcDiI= github.com/cncf/xds/go v0.0.0-20230607035331-e9ce68804cb4 h1:/inchEIKaYC1Akx+H+gqO04wryn5h75LSazbRlnya1k= -github.com/cncf/xds/go v0.0.0-20230607035331-e9ce68804cb4/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= -github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= -github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/envoyproxy/go-control-plane v0.11.1 h1:wSUXTlLfiAQRWs2F+p+EKOY9rUyis1MyGqJ2DIk5HpM= -github.com/envoyproxy/go-control-plane v0.11.1/go.mod h1:uhMcXKCQMEJHiAb0w+YGefQLaTEw+YhGluxZkrTmD0g= github.com/envoyproxy/protoc-gen-validate v1.0.2 h1:QkIBuU5k+x7/QXPvPPnWXWlCdaBFApVqftFV6k087DA= -github.com/envoyproxy/protoc-gen-validate v1.0.2/go.mod h1:GpiZQP3dDbg4JouG/NNS7QWXpgx6x8QiMKdmN72jogE= -github.com/felixge/httpsnoop v1.0.3 h1:s/nj+GCswXYzN5v2DpNMuMQYe+0DDwt5WVCU6CWBdXk= -github.com/felixge/httpsnoop v1.0.3/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= -github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= -github.com/go-logr/logr v1.3.0 h1:2y3SDp0ZXuc6/cjLSZ+Q3ir+QB9T/iG5yYRXqsagWSY= -github.com/go-logr/logr v1.3.0/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= -github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= -github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/golang/glog v1.1.2 h1:DVjP2PbBOzHyzA+dn3WhHIq4NdVu3Q+pvivFICf/7fo= -github.com/golang/glog v1.1.2/go.mod h1:zR+okUeTbrL6EL3xHUDxZuEtGv04p5shwip1+mL/rLQ= -github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= -github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= -github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= -github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= -github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/uuid v1.3.1 h1:KjJaJ9iWZ3jOFZIf1Lqf4laDRCasjl0BCmnEGxkdLb4= -github.com/google/uuid v1.3.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/grafana/otel-profiling-go v0.5.1 h1:stVPKAFZSa7eGiqbYuG25VcqYksR6iWvF3YH66t4qL8= -github.com/grafana/otel-profiling-go v0.5.1/go.mod h1:ftN/t5A/4gQI19/8MoWurBEtC6gFw8Dns1sJZ9W4Tls= -github.com/grafana/pyroscope-go v1.1.1 h1:PQoUU9oWtO3ve/fgIiklYuGilvsm8qaGhlY4Vw6MAcQ= -github.com/grafana/pyroscope-go v1.1.1/go.mod h1:Mw26jU7jsL/KStNSGGuuVYdUq7Qghem5P8aXYXSXG88= -github.com/grafana/pyroscope-go/godeltaprof v0.1.6 h1:nEdZ8louGAplSvIJi1HVp7kWvFvdiiYg3COLlTwJiFo= -github.com/grafana/pyroscope-go/godeltaprof v0.1.6/go.mod h1:Tk376Nbldo4Cha9RgiU7ik8WKFkNpfds98aUzS8omLE= -github.com/grpc-ecosystem/grpc-gateway/v2 v2.18.1 h1:6UKoz5ujsI55KNpsJH3UwCq3T8kKbZwNZBNPuTTje8U= -github.com/grpc-ecosystem/grpc-gateway/v2 v2.18.1/go.mod h1:YvJ2f6MplWDhfxiUC3KpyTy76kYUZA4W3pTv/wdKQ9Y= -github.com/klauspost/compress v1.17.3 h1:qkRjuerhUU1EmXLYGkSH6EZL+vPSxIrYjLNAK4slzwA= -github.com/klauspost/compress v1.17.3/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM= -github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= -github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= -github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= -github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= -github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= -github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/rogpeppe/fastuuid v1.2.0 h1:Ppwyp6VYCF1nvBTXL3trRso7mXMlRrw9ooo375wvi2s= -github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ= -github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M= -github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA= -github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0 h1:1zr/of2m5FGMsad5YfcqgdqdWrIhu+EBEJRhR1U7z/c= -github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= -github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= -github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= -github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.42.0 h1:pginetY7+onl4qN1vl0xW/V/v6OBZ0vVdH+esuJgvmM= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.42.0/go.mod h1:XiYsayHc36K3EByOO6nbAXnAWbrUxdjUROCEeeROOH8= -go.opentelemetry.io/otel v1.21.0 h1:hzLeKBZEL7Okw2mGzZ0cc4k/A7Fta0uoPgaJCr8fsFc= -go.opentelemetry.io/otel v1.21.0/go.mod h1:QZzNPQPm1zLX4gZK4cMi+71eaorMSGT3A4znnUvNNEo= -go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.20.0 h1:DeFD0VgTZ+Cj6hxravYYZE2W4GlneVH81iAOPjZkzk8= -go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.20.0/go.mod h1:GijYcYmNpX1KazD5JmWGsi4P7dDTTTnfv1UbGn84MnU= -go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.20.0 h1:CsBiKCiQPdSjS+MlRiqeTI9JDDpSuk0Hb6QTRfwer8k= -go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.20.0/go.mod h1:CMJYNAfooOwSZSAmAeMUV1M+TXld3BiK++z9fqIm2xk= -go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.20.0 h1:4s9HxB4azeeQkhY0GE5wZlMj4/pz8tE5gx2OQpGUw58= -go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.20.0/go.mod h1:djVA3TUJ2fSdMX0JE5XxFBOaZzprElJoP7fD4vnV2SU= -go.opentelemetry.io/otel/metric v1.21.0 h1:tlYWfeo+Bocx5kLEloTjbcDwBuELRrIFxwdQ36PlJu4= -go.opentelemetry.io/otel/metric v1.21.0/go.mod h1:o1p3CA8nNHW8j5yuQLdc1eeqEaPfzug24uvsyIEJRWM= -go.opentelemetry.io/otel/sdk v1.21.0 h1:FTt8qirL1EysG6sTQRZ5TokkU8d0ugCj8htOgThZXQ8= -go.opentelemetry.io/otel/sdk v1.21.0/go.mod h1:Nna6Yv7PWTdgJHVRD9hIYywQBRx7pbox6nwBnZIxl/E= -go.opentelemetry.io/otel/trace v1.21.0 h1:WD9i5gzvoUPuXIXH24ZNBudiarZDKuekPqi/E8fpfLc= -go.opentelemetry.io/otel/trace v1.21.0/go.mod h1:LGbsEB0f9LGjN+OZaQQ26sohbOmiMR+BaslueVtS/qQ= -go.opentelemetry.io/proto/otlp v1.0.0 h1:T0TX0tmXU8a3CbNXzEKGeU5mIVOdf0oykP+u2lIVU/I= -go.opentelemetry.io/proto/otlp v1.0.0/go.mod h1:Sy6pihPLfYHkr3NkUbEhGHFhINUSI/v80hjKIs5JXpM= -go.uber.org/goleak v1.2.1 h1:NBol2c7O1ZokfZ0LEU9K6Whx/KnwvepVetCUhtKja4A= -go.uber.org/goleak v1.2.1/go.mod h1:qlT2yGI9QafXHhZZLxlSuNsMw3FFLxBr+tBRlmO1xH4= golang.org/x/crypto v0.15.0 h1:frVn1TEaCEaZcn3Tmd7Y2b5KKPaZ+I32Q2OA3kYp5TA= golang.org/x/crypto v0.15.0/go.mod h1:4ChreQoLWfG3xLDer1WdlH5NdlQ3+mwnQq1YTKY+72g= +golang.org/x/crypto v0.21.0 h1:X31++rzVUdKhX5sWmSOFZxx8UW/ldWx55cbf08iNAMA= golang.org/x/mod v0.8.0 h1:LUYupSeNrTNCGzR/hVBk2NHZO4hXcVaW1k4Qx7rjPx8= -golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/net v0.18.0 h1:mIYleuAkSbHh0tCv7RvjL3F6ZVbLjq4+R7zbOn3Kokg= golang.org/x/net v0.18.0/go.mod h1:/czyP5RqHAH4odGYxBJ1qz0+CE5WZ+2j1YgoEo8F2jQ= golang.org/x/oauth2 v0.13.0 h1:jDDenyj+WgFtmV3zYVoi8aE2BwtXFLWOA67ZfNWftiY= -golang.org/x/oauth2 v0.13.0/go.mod h1:/JMhi4ZRXAf4HG9LiNmxvk+45+96RUlVThiH8FzNBn0= golang.org/x/sync v0.3.0 h1:ftCYgMx6zT/asHUrPw8BLLscYtGznsLAnjq5RH9P66E= -golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y= golang.org/x/sys v0.14.0 h1:Vz7Qs629MkJkGyHxUlRHizWJRG2j8fbQKjELVSNhy7Q= -golang.org/x/sys v0.14.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.14.0 h1:LGK9IlZ8T9jvdy6cTdfKUCltatMFOehAQo9SRC46UQ8= golang.org/x/term v0.14.0/go.mod h1:TySc+nGkYR6qt8km8wUhuFRTVSMIX3XPR58y2lC8vww= -golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= -golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/term v0.18.0 h1:FcHjZXDMxI8mM3nwhX9HlKop4C0YQvCVCdwYl2wOtE8= golang.org/x/tools v0.6.0 h1:BOw41kyTf3PuCW1pVQf8+Cyg8pMlkYB1oo9iJ6D/lKM= -golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4= -golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= google.golang.org/appengine v1.6.7 h1:FZR1q0exgwxzPzp/aF+VccGrSfxfPpkBqjIIEq3ru6c= -google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= -google.golang.org/genproto v0.0.0-20231030173426-d783a09b4405 h1:I6WNifs6pF9tNdSob2W24JtyxIYjzFB9qDlpUC76q+U= -google.golang.org/genproto v0.0.0-20231030173426-d783a09b4405/go.mod h1:3WDQMjmJk36UQhjQ89emUzb1mdaHcPeeAh4SCBKznB4= -google.golang.org/genproto/googleapis/api v0.0.0-20231106174013-bbf56f31fb17 h1:JpwMPBpFN3uKhdaekDpiNlImDdkUAyiJ6ez/uxGaUSo= -google.golang.org/genproto/googleapis/api v0.0.0-20231106174013-bbf56f31fb17/go.mod h1:0xJLfVdJqpAPl8tDg1ujOCGzx6LFLttXT5NhllGOXY4= -google.golang.org/genproto/googleapis/rpc v0.0.0-20231106174013-bbf56f31fb17 h1:Jyp0Hsi0bmHXG6k9eATXoYtjd6e2UzZ1SCn/wIupY14= -google.golang.org/genproto/googleapis/rpc v0.0.0-20231106174013-bbf56f31fb17/go.mod h1:oQ5rr10WTTMvP4A36n8JpR1OrO1BEiV4f78CneXZxkA= -google.golang.org/grpc v1.59.0 h1:Z5Iec2pjwb+LEOqzpB2MR12/eKFhDPhuqW91O+4bwUk= -google.golang.org/grpc v1.59.0/go.mod h1:aUPDwccQo6OTjy7Hct4AfBPD1GptF4fyUjIkQ9YtF98= -google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= -google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= google.golang.org/protobuf v1.31.0 h1:g0LDEJHgrBl9N9r17Ru3sqWhkIx2NB67okBHPwC7hs8= -google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= -gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= -gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= -gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= From b6b25c343dddf3c3920d38551ca0eadf72ab30bc Mon Sep 17 00:00:00 2001 From: Anton Kolesnikov Date: Sat, 27 Apr 2024 17:50:37 +0800 Subject: [PATCH 25/36] fix comments --- pkg/phlaredb/symdb/locations.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pkg/phlaredb/symdb/locations.go b/pkg/phlaredb/symdb/locations.go index 74b7a4216b..07e79e01b6 100644 --- a/pkg/phlaredb/symdb/locations.go +++ b/pkg/phlaredb/symdb/locations.go @@ -60,7 +60,7 @@ type locationsBlockEncoder struct { header locationsBlockHeader mapping []int32 - // Assuming there is no locations with more than 255 lines. + // Assuming there are no locations with more than 255 lines. // We could even use a nibble (4 bits), but there are locations // with 10 and more functions, therefore there is a change that // capacity of 2^4 is not enough in all cases. @@ -89,7 +89,6 @@ func (e *locationsBlockEncoder) encode(w io.Writer, locations []v1.InMemoryLocat for i, loc := range locations { e.mapping[i] = int32(loc.MappingId) e.lineCount[i] = byte(len(loc.Line)) - // Append lines but the first one. for j := 0; j < len(loc.Line) && j < maxLocationLines; j++ { e.lines = append(e.lines, int32(loc.Line[j].FunctionId), From d491c4249e38904d2eaa7abb7138c6a970d8319d Mon Sep 17 00:00:00 2001 From: Anton Kolesnikov Date: Sun, 28 Apr 2024 13:04:19 +0800 Subject: [PATCH 26/36] add unit tests --- pkg/phlaredb/symdb/block_reader.go | 8 +- pkg/phlaredb/symdb/block_reader_test.go | 30 +++---- pkg/phlaredb/symdb/functions.go | 2 +- pkg/phlaredb/symdb/functions_test.go | 62 +++++++++++++ pkg/phlaredb/symdb/locations.go | 9 +- pkg/phlaredb/symdb/locations_test.go | 111 ++++++++++++++++++++++++ pkg/phlaredb/symdb/mappings.go | 2 +- pkg/phlaredb/symdb/mappings_test.go | 109 +++++++++++++++++++++++ pkg/phlaredb/symdb/strings.go | 2 +- pkg/phlaredb/symdb/strings_test.go | 24 ++--- pkg/phlaredb/symdb/symdb_test.go | 4 + 11 files changed, 320 insertions(+), 43 deletions(-) create mode 100644 pkg/phlaredb/symdb/functions_test.go create mode 100644 pkg/phlaredb/symdb/locations_test.go create mode 100644 pkg/phlaredb/symdb/mappings_test.go diff --git a/pkg/phlaredb/symdb/block_reader.go b/pkg/phlaredb/symdb/block_reader.go index 79a46f3b76..e7eab9d19f 100644 --- a/pkg/phlaredb/symdb/block_reader.go +++ b/pkg/phlaredb/symdb/block_reader.go @@ -309,7 +309,7 @@ func (p *partition) initTables(h *PartitionHeader) (err error) { reader: p.reader, header: h.V3.Locations, } - if locations.dec, err = locationsDecoder(h.V3.Locations); err != nil { + if locations.dec, err = newLocationsDecoder(h.V3.Locations); err != nil { return err } p.locations = locations @@ -318,7 +318,7 @@ func (p *partition) initTables(h *PartitionHeader) (err error) { reader: p.reader, header: h.V3.Mappings, } - if mappings.dec, err = mappingsDecoder(h.V3.Mappings); err != nil { + if mappings.dec, err = newMappingsDecoder(h.V3.Mappings); err != nil { return err } p.mappings = mappings @@ -327,7 +327,7 @@ func (p *partition) initTables(h *PartitionHeader) (err error) { reader: p.reader, header: h.V3.Functions, } - if functions.dec, err = functionsDecoder(h.V3.Functions); err != nil { + if functions.dec, err = newFunctionsDecoder(h.V3.Functions); err != nil { return err } p.functions = functions @@ -336,7 +336,7 @@ func (p *partition) initTables(h *PartitionHeader) (err error) { reader: p.reader, header: h.V3.Strings, } - if strings.dec, err = stringsDecoder(h.V3.Strings); err != nil { + if strings.dec, err = newStringsDecoder(h.V3.Strings); err != nil { return err } p.strings = strings diff --git a/pkg/phlaredb/symdb/block_reader_test.go b/pkg/phlaredb/symdb/block_reader_test.go index 67521b2350..b037108cff 100644 --- a/pkg/phlaredb/symdb/block_reader_test.go +++ b/pkg/phlaredb/symdb/block_reader_test.go @@ -53,21 +53,6 @@ func Test_write_block_fixture(t *testing.T) { require.NoError(t, os.Rename(b.config.Dir, fixtureDir)) } -func Fuzz_ReadIndexFile_v12(f *testing.F) { - files := []string{ - "testdata/symbols/v2/index.symdb", - "testdata/symbols/v1/index.symdb", - } - for _, path := range files { - data, err := os.ReadFile(path) - require.NoError(f, err) - f.Add(data) - } - f.Fuzz(func(_ *testing.T, b []byte) { - _, _ = OpenIndex(b) - }) -} - func Test_Reader_Open_v3(t *testing.T) { // The block contains two partitions (0 and 1), each partition // stores symbols of the testdata/profile.pb.gz profile @@ -194,6 +179,21 @@ func Test_Reader_Open_v1(t *testing.T) { require.NoError(t, err) } +func Fuzz_ReadIndexFile_v12(f *testing.F) { + files := []string{ + "testdata/symbols/v2/index.symdb", + "testdata/symbols/v1/index.symdb", + } + for _, path := range files { + data, err := os.ReadFile(path) + require.NoError(f, err) + f.Add(data) + } + f.Fuzz(func(_ *testing.T, b []byte) { + _, _ = OpenIndex(b) + }) +} + type mockStacktraceInserter struct{ mock.Mock } func (m *mockStacktraceInserter) InsertStacktrace(stacktraceID uint32, locations []int32) { diff --git a/pkg/phlaredb/symdb/functions.go b/pkg/phlaredb/symdb/functions.go index 9fbbbd8c74..ef485d0e2e 100644 --- a/pkg/phlaredb/symdb/functions.go +++ b/pkg/phlaredb/symdb/functions.go @@ -127,7 +127,7 @@ type functionsBlockDecoder struct { buf []byte } -func functionsDecoder(h SymbolsBlockHeader) (*symbolsDecoder[v1.InMemoryFunction], error) { +func newFunctionsDecoder(h SymbolsBlockHeader) (*symbolsDecoder[v1.InMemoryFunction], error) { if h.Format == BlockFunctionsV1 { return newSymbolsDecoder[v1.InMemoryFunction](h, &functionsBlockDecoder{format: h.Format}), nil } diff --git a/pkg/phlaredb/symdb/functions_test.go b/pkg/phlaredb/symdb/functions_test.go new file mode 100644 index 0000000000..190e42ef56 --- /dev/null +++ b/pkg/phlaredb/symdb/functions_test.go @@ -0,0 +1,62 @@ +package symdb + +import ( + "bytes" + "testing" + + "github.com/stretchr/testify/require" + + v1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1" +) + +func Test_FunctionsEncoding(t *testing.T) { + type testCase struct { + description string + funcs []v1.InMemoryFunction + } + + testCases := []testCase{ + { + description: "empty", + funcs: []v1.InMemoryFunction{}, + }, + { + description: "zero", + funcs: []v1.InMemoryFunction{{}}, + }, + { + description: "single function", + funcs: []v1.InMemoryFunction{ + {Name: 1, SystemName: 2, Filename: 3, StartLine: 4}, + }, + }, + { + description: "multiline blocks", + funcs: []v1.InMemoryFunction{ + {Name: 1, SystemName: 2, Filename: 3, StartLine: 4}, + {Name: 5, SystemName: 6, Filename: 7, StartLine: 8}, + {Name: 9, SystemName: 10, Filename: 11}, + {}, + {Name: 13, SystemName: 14, Filename: 15, StartLine: 16}, + }, + }, + } + + for _, tc := range testCases { + tc := tc + t.Run(tc.description, func(t *testing.T) { + var buf bytes.Buffer + w := newTestFileWriter(&buf) + e := newFunctionsEncoder() + e.blockSize = 3 + h, err := writeSymbolsBlock(w, tc.funcs, e) + require.NoError(t, err) + + d, err := newFunctionsDecoder(h) + require.NoError(t, err) + out := make([]v1.InMemoryFunction, h.Length) + require.NoError(t, d.decode(out, &buf)) + require.Equal(t, tc.funcs, out) + }) + } +} diff --git a/pkg/phlaredb/symdb/locations.go b/pkg/phlaredb/symdb/locations.go index 07e79e01b6..5da7ae0f9e 100644 --- a/pkg/phlaredb/symdb/locations.go +++ b/pkg/phlaredb/symdb/locations.go @@ -84,7 +84,7 @@ func (e *locationsBlockEncoder) format() SymbolsBlockFormat { func (e *locationsBlockEncoder) encode(w io.Writer, locations []v1.InMemoryLocation) error { e.initWrite(len(locations)) - var addr int64 + var addr uint64 var folded bool for i, loc := range locations { e.mapping[i] = int32(loc.MappingId) @@ -94,7 +94,7 @@ func (e *locationsBlockEncoder) encode(w io.Writer, locations []v1.InMemoryLocat int32(loc.Line[j].FunctionId), loc.Line[j].Line) } - addr |= int64(loc.Address) + addr |= loc.Address e.addr[i] = int64(loc.Address) folded = folded || loc.IsFolded e.folded[i] = loc.IsFolded @@ -170,7 +170,7 @@ type locationsBlockDecoder struct { buf []byte } -func locationsDecoder(h SymbolsBlockHeader) (*symbolsDecoder[v1.InMemoryLocation], error) { +func newLocationsDecoder(h SymbolsBlockHeader) (*symbolsDecoder[v1.InMemoryLocation], error) { if h.Format == BlockLocationsV1 { return newSymbolsDecoder[v1.InMemoryLocation](h, &locationsBlockDecoder{format: h.Format}), nil } @@ -248,6 +248,8 @@ func (d *locationsBlockDecoder) decode(r io.Reader, locations []v1.InMemoryLocat } // Otherwise, inspect all the optional fields. + d.address = slices.GrowLen(d.address, int(d.header.LocationsLen)) + d.folded = slices.GrowLen(d.folded, int(d.header.LocationsLen)) if int(d.header.AddrSize) > 0 { d.buf = slices.GrowLen(d.buf, int(d.header.AddrSize)) if _, err = io.ReadFull(r, d.buf); err != nil { @@ -263,7 +265,6 @@ func (d *locationsBlockDecoder) decode(r io.Reader, locations []v1.InMemoryLocat if _, err = io.ReadFull(r, d.buf); err != nil { return err } - d.folded = slices.GrowLen(d.folded, int(d.header.LocationsLen)) decodeBoolean(d.folded, d.buf) } diff --git a/pkg/phlaredb/symdb/locations_test.go b/pkg/phlaredb/symdb/locations_test.go new file mode 100644 index 0000000000..729fe59500 --- /dev/null +++ b/pkg/phlaredb/symdb/locations_test.go @@ -0,0 +1,111 @@ +package symdb + +import ( + "bytes" + "math" + "testing" + + "github.com/stretchr/testify/require" + + v1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1" +) + +func Test_LocationsEncoding(t *testing.T) { + type testCase struct { + description string + locs []v1.InMemoryLocation + } + + testCases := []testCase{ + { + description: "empty", + locs: []v1.InMemoryLocation{}, + }, + { + description: "zero", + locs: []v1.InMemoryLocation{{Line: []v1.InMemoryLine{}}}, + }, + { + description: "single location", + locs: []v1.InMemoryLocation{ + { + Address: math.MaxUint64, + MappingId: 1, + IsFolded: false, + Line: []v1.InMemoryLine{ + {FunctionId: 1, Line: 1}, + }, + }, + }, + }, + { + description: "multiline locations", + locs: []v1.InMemoryLocation{ + { + Line: []v1.InMemoryLine{ + {FunctionId: 1, Line: 1}, + }, + }, + { + Line: []v1.InMemoryLine{ + {FunctionId: 1, Line: 1}, + {FunctionId: 2, Line: 1}, + }, + }, + { + Line: []v1.InMemoryLine{ + {FunctionId: 1, Line: 1}, + {FunctionId: 2, Line: 1}, + {FunctionId: 3, Line: 1}, + }, + }, + }, + }, + { + description: "optional fields mix", + locs: []v1.InMemoryLocation{ + {Line: []v1.InMemoryLine{{FunctionId: 1, Line: 1}}}, + {Line: []v1.InMemoryLine{{FunctionId: 1, Line: 1}}}, + { + Address: math.MaxUint64, + MappingId: 1, + IsFolded: true, + Line: []v1.InMemoryLine{{FunctionId: 1, Line: 1}}, + }, + {Line: []v1.InMemoryLine{{FunctionId: 1, Line: 1}}}, + }, + }, + { + description: "optional fields mix split", + locs: []v1.InMemoryLocation{ + {Line: []v1.InMemoryLine{{FunctionId: 1, Line: 1}}}, + {Line: []v1.InMemoryLine{{FunctionId: 1, Line: 1}}}, + {Line: []v1.InMemoryLine{{FunctionId: 1, Line: 1}}}, + { + Address: math.MaxUint64, + MappingId: 1, + IsFolded: true, + Line: []v1.InMemoryLine{{FunctionId: 1, Line: 1}}, + }, + }, + }, + } + + for _, tc := range testCases { + tc := tc + t.Run(tc.description, func(t *testing.T) { + var buf bytes.Buffer + w := newTestFileWriter(&buf) + e := newLocationsEncoder() + e.blockSize = 3 + h, err := writeSymbolsBlock(w, tc.locs, e) + require.NoError(t, err) + + d, err := newLocationsDecoder(h) + require.NoError(t, err) + out := make([]v1.InMemoryLocation, h.Length) + require.NoError(t, d.decode(out, &buf)) + require.Equal(t, tc.locs, out) + }) + } +} diff --git a/pkg/phlaredb/symdb/mappings.go b/pkg/phlaredb/symdb/mappings.go index 55eb7beefb..f2885c6ef0 100644 --- a/pkg/phlaredb/symdb/mappings.go +++ b/pkg/phlaredb/symdb/mappings.go @@ -176,7 +176,7 @@ type mappingsBlockDecoder struct { buf []byte } -func mappingsDecoder(h SymbolsBlockHeader) (*symbolsDecoder[v1.InMemoryMapping], error) { +func newMappingsDecoder(h SymbolsBlockHeader) (*symbolsDecoder[v1.InMemoryMapping], error) { if h.Format == BlockMappingsV1 { return newSymbolsDecoder[v1.InMemoryMapping](h, &mappingsBlockDecoder{format: h.Format}), nil } diff --git a/pkg/phlaredb/symdb/mappings_test.go b/pkg/phlaredb/symdb/mappings_test.go new file mode 100644 index 0000000000..406acea458 --- /dev/null +++ b/pkg/phlaredb/symdb/mappings_test.go @@ -0,0 +1,109 @@ +package symdb + +import ( + "bytes" + "math" + "testing" + + "github.com/stretchr/testify/require" + + v1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1" +) + +func Test_MappingsEncoding(t *testing.T) { + type testCase struct { + description string + mappings []v1.InMemoryMapping + } + + testCases := []testCase{ + { + description: "empty", + mappings: []v1.InMemoryMapping{}, + }, + { + description: "zero", + mappings: []v1.InMemoryMapping{{}}, + }, + { + description: "single mapping", + mappings: []v1.InMemoryMapping{ + { + MemoryStart: math.MaxUint64, + MemoryLimit: math.MaxUint64, + FileOffset: math.MaxUint64, + Filename: 1, + BuildId: 2, + HasFunctions: true, + HasFilenames: false, + HasLineNumbers: false, + HasInlineFrames: false, + }, + }, + }, + { + description: "optional fields mix", + mappings: []v1.InMemoryMapping{ + // Block size == 3 + {MemoryStart: math.MaxUint64}, + {}, + {}, + + {}, + {MemoryLimit: math.MaxUint64}, + {}, + + {}, + {}, + {FileOffset: math.MaxUint64}, + + {MemoryStart: math.MaxUint64}, + {MemoryLimit: math.MaxUint64}, + {FileOffset: math.MaxUint64}, + + {}, + {}, + {}, + }, + }, + { + description: "flag combinations", + mappings: []v1.InMemoryMapping{ + {HasFunctions: false, HasFilenames: false, HasLineNumbers: false, HasInlineFrames: false}, + {HasFunctions: false, HasFilenames: false, HasLineNumbers: false, HasInlineFrames: true}, + {HasFunctions: false, HasFilenames: false, HasLineNumbers: true, HasInlineFrames: false}, + {HasFunctions: false, HasFilenames: false, HasLineNumbers: true, HasInlineFrames: true}, + {HasFunctions: false, HasFilenames: true, HasLineNumbers: false, HasInlineFrames: false}, + {HasFunctions: false, HasFilenames: true, HasLineNumbers: false, HasInlineFrames: true}, + {HasFunctions: false, HasFilenames: true, HasLineNumbers: true, HasInlineFrames: false}, + {HasFunctions: false, HasFilenames: true, HasLineNumbers: true, HasInlineFrames: true}, + {HasFunctions: true, HasFilenames: false, HasLineNumbers: false, HasInlineFrames: false}, + {HasFunctions: true, HasFilenames: false, HasLineNumbers: false, HasInlineFrames: true}, + {HasFunctions: true, HasFilenames: false, HasLineNumbers: true, HasInlineFrames: false}, + {HasFunctions: true, HasFilenames: false, HasLineNumbers: true, HasInlineFrames: true}, + {HasFunctions: true, HasFilenames: true, HasLineNumbers: false, HasInlineFrames: false}, + {HasFunctions: true, HasFilenames: true, HasLineNumbers: false, HasInlineFrames: true}, + {HasFunctions: true, HasFilenames: true, HasLineNumbers: true, HasInlineFrames: false}, + {HasFunctions: true, HasFilenames: true, HasLineNumbers: true, HasInlineFrames: true}, + }, + }, + } + + for _, tc := range testCases { + tc := tc + t.Run(tc.description, func(t *testing.T) { + var buf bytes.Buffer + w := newTestFileWriter(&buf) + e := newMappingsEncoder() + e.blockSize = 3 + h, err := writeSymbolsBlock(w, tc.mappings, e) + require.NoError(t, err) + + d, err := newMappingsDecoder(h) + require.NoError(t, err) + out := make([]v1.InMemoryMapping, h.Length) + require.NoError(t, d.decode(out, &buf)) + require.Equal(t, tc.mappings, out) + }) + } +} diff --git a/pkg/phlaredb/symdb/strings.go b/pkg/phlaredb/symdb/strings.go index 4400218f61..596f313957 100644 --- a/pkg/phlaredb/symdb/strings.go +++ b/pkg/phlaredb/symdb/strings.go @@ -113,7 +113,7 @@ type stringsBlockDecoder struct { buf []byte } -func stringsDecoder(h SymbolsBlockHeader) (*symbolsDecoder[string], error) { +func newStringsDecoder(h SymbolsBlockHeader) (*symbolsDecoder[string], error) { if h.Format == BlockStringsV1 { return newSymbolsDecoder[string](h, &stringsBlockDecoder{format: h.Format}), nil } diff --git a/pkg/phlaredb/symdb/strings_test.go b/pkg/phlaredb/symdb/strings_test.go index f462886043..ca95b03afa 100644 --- a/pkg/phlaredb/symdb/strings_test.go +++ b/pkg/phlaredb/symdb/strings_test.go @@ -12,7 +12,6 @@ func Test_StringsEncoding(t *testing.T) { type testCase struct { description string strings []string - blockSize int } testCases := []testCase{ @@ -26,7 +25,6 @@ func Test_StringsEncoding(t *testing.T) { "a", "b", }, - blockSize: 4, }, { description: "exact block size", @@ -36,7 +34,6 @@ func Test_StringsEncoding(t *testing.T) { "cde", "def", }, - blockSize: 4, }, { description: "greater than block size", @@ -47,7 +44,6 @@ func Test_StringsEncoding(t *testing.T) { "def", "e", }, - blockSize: 4, }, { description: "mixed encoding", @@ -56,7 +52,6 @@ func Test_StringsEncoding(t *testing.T) { "bcd", strings.Repeat("e", 256), }, - blockSize: 4, }, { description: "mixed encoding exact block", @@ -70,7 +65,6 @@ func Test_StringsEncoding(t *testing.T) { strings.Repeat("j", 256), strings.Repeat("h", 256), }, - blockSize: 4, }, } @@ -78,18 +72,14 @@ func Test_StringsEncoding(t *testing.T) { tc := tc t.Run(tc.description, func(t *testing.T) { var buf bytes.Buffer - e := newSymbolsEncoder[string](new(stringsBlockEncoder)) - if tc.blockSize > 0 { - e.blockSize = tc.blockSize - } - require.NoError(t, e.encode(&buf, tc.strings)) - - h := SymbolsBlockHeader{ - Length: uint32(len(tc.strings)), - BlockSize: uint32(e.blockSize), - } - d := newSymbolsDecoder[string](h, new(stringsBlockDecoder)) + w := newTestFileWriter(&buf) + e := newStringsEncoder() + e.blockSize = 4 + h, err := writeSymbolsBlock(w, tc.strings, e) + require.NoError(t, err) + d, err := newStringsDecoder(h) + require.NoError(t, err) out := make([]string, h.Length) require.NoError(t, d.decode(out, &buf)) require.Equal(t, tc.strings, out) diff --git a/pkg/phlaredb/symdb/symdb_test.go b/pkg/phlaredb/symdb/symdb_test.go index c5bb7d9576..a1bab91bc5 100644 --- a/pkg/phlaredb/symdb/symdb_test.go +++ b/pkg/phlaredb/symdb/symdb_test.go @@ -103,6 +103,10 @@ func (b *testBucket) GetRange(ctx context.Context, name string, off, length int6 return b.Bucket.GetRange(ctx, name, off, length) } +func newTestFileWriter(w io.Writer) *fileWriter { + return &fileWriter{w: &writerOffset{Writer: w}} +} + //nolint:unparam func pprofFingerprint(p *googlev1.Profile, typ int) [][2]uint64 { m := make(map[uint64]uint64, len(p.Sample)) From ea5f4a8b29746390ac7e6a75f9a7a74a1467e680 Mon Sep 17 00:00:00 2001 From: Anton Kolesnikov Date: Sun, 28 Apr 2024 13:19:58 +0800 Subject: [PATCH 27/36] ignore "unused" false-positive --- pkg/phlaredb/symdb/block_reader.go | 1 + pkg/phlaredb/symdb/block_reader_parquet.go | 1 + pkg/phlaredb/symdb/block_writer.go | 9 ++++----- pkg/phlaredb/symdb/format.go | 7 +------ pkg/phlaredb/symdb/functions.go | 1 + pkg/phlaredb/symdb/locations.go | 1 + pkg/phlaredb/symdb/mappings.go | 1 + pkg/phlaredb/symdb/strings.go | 1 + 8 files changed, 11 insertions(+), 11 deletions(-) diff --git a/pkg/phlaredb/symdb/block_reader.go b/pkg/phlaredb/symdb/block_reader.go index e7eab9d19f..34f0a5eb0e 100644 --- a/pkg/phlaredb/symdb/block_reader.go +++ b/pkg/phlaredb/symdb/block_reader.go @@ -1,3 +1,4 @@ +//nolint:unused package symdb import ( diff --git a/pkg/phlaredb/symdb/block_reader_parquet.go b/pkg/phlaredb/symdb/block_reader_parquet.go index 364a678344..90ba014b85 100644 --- a/pkg/phlaredb/symdb/block_reader_parquet.go +++ b/pkg/phlaredb/symdb/block_reader_parquet.go @@ -1,3 +1,4 @@ +//nolint:unused package symdb import ( diff --git a/pkg/phlaredb/symdb/block_writer.go b/pkg/phlaredb/symdb/block_writer.go index 585b498f2d..d489905279 100644 --- a/pkg/phlaredb/symdb/block_writer.go +++ b/pkg/phlaredb/symdb/block_writer.go @@ -14,11 +14,10 @@ import ( type writer struct { config *Config - index IndexFile - indexFile *fileWriter - dataFile *fileWriter - files []block.File - footer Footer + index IndexFile + dataFile *fileWriter + files []block.File + footer Footer stringsEncoder *symbolsEncoder[string] mappingsEncoder *symbolsEncoder[v1.InMemoryMapping] diff --git a/pkg/phlaredb/symdb/format.go b/pkg/phlaredb/symdb/format.go index 780b965494..562519aaf9 100644 --- a/pkg/phlaredb/symdb/format.go +++ b/pkg/phlaredb/symdb/format.go @@ -1,3 +1,4 @@ +//nolint:unused package symdb import ( @@ -523,12 +524,6 @@ type RowRangeReference struct { Rows uint32 } -func (r *RowRangeReference) marshal(b []byte) { - binary.BigEndian.PutUint32(b[0:4], r.RowGroup) - binary.BigEndian.PutUint32(b[4:8], r.Index) - binary.BigEndian.PutUint32(b[8:12], r.Rows) -} - func (r *RowRangeReference) unmarshal(b []byte) { r.RowGroup = binary.BigEndian.Uint32(b[0:4]) r.Index = binary.BigEndian.Uint32(b[4:8]) diff --git a/pkg/phlaredb/symdb/functions.go b/pkg/phlaredb/symdb/functions.go index ef485d0e2e..66963dfbab 100644 --- a/pkg/phlaredb/symdb/functions.go +++ b/pkg/phlaredb/symdb/functions.go @@ -1,3 +1,4 @@ +//nolint:unused package symdb import ( diff --git a/pkg/phlaredb/symdb/locations.go b/pkg/phlaredb/symdb/locations.go index 5da7ae0f9e..78ac4cd988 100644 --- a/pkg/phlaredb/symdb/locations.go +++ b/pkg/phlaredb/symdb/locations.go @@ -1,3 +1,4 @@ +//nolint:unused package symdb import ( diff --git a/pkg/phlaredb/symdb/mappings.go b/pkg/phlaredb/symdb/mappings.go index f2885c6ef0..3094b74c44 100644 --- a/pkg/phlaredb/symdb/mappings.go +++ b/pkg/phlaredb/symdb/mappings.go @@ -1,3 +1,4 @@ +//nolint:unused package symdb import ( diff --git a/pkg/phlaredb/symdb/strings.go b/pkg/phlaredb/symdb/strings.go index 596f313957..1992e85c1b 100644 --- a/pkg/phlaredb/symdb/strings.go +++ b/pkg/phlaredb/symdb/strings.go @@ -1,3 +1,4 @@ +//nolint:unused package symdb import ( From 6abb099eb0a993aecab9c49f71aefdea128fb92d Mon Sep 17 00:00:00 2001 From: Anton Kolesnikov Date: Mon, 29 Apr 2024 10:15:05 +0800 Subject: [PATCH 28/36] update docs --- .../block-format/_index.md | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/docs/sources/reference-pyroscope-architecture/block-format/_index.md b/docs/sources/reference-pyroscope-architecture/block-format/_index.md index 915c6b1b9e..d45a85480f 100644 --- a/docs/sources/reference-pyroscope-architecture/block-format/_index.md +++ b/docs/sources/reference-pyroscope-architecture/block-format/_index.md @@ -23,12 +23,7 @@ the block there are multiple files: * `profiles.parquet` [parquet] table that contains profiles. -* `symbols` sub-directory contains profiling symbols that provide a link between - the compiled or interpreted binary code and the original source code: - - A `index.symdb` file with meta information, which helps to find symbols for a specific profile. - - A `stacktraces.symdb` file contains stack traces compacted in the [parent pointer tree]. - - Parquet tables for models referenced by stack traces: - `locations.parquet`, `functions.parquet`, `mappings.parquet`, `strings.parquet`. +* `symbols.symdb` that contains symbolic information for the profiles stored in the block. ## Data model From 9ad268decf747b0ef3ef99e2766f685f2c1820ef Mon Sep 17 00:00:00 2001 From: Anton Kolesnikov Date: Mon, 13 May 2024 14:09:32 +0800 Subject: [PATCH 29/36] add support for forward compatibility --- pkg/phlaredb/symdb/format.go | 37 ++++++++++--- pkg/phlaredb/symdb/functions.go | 42 +++++++-------- pkg/phlaredb/symdb/locations.go | 43 +++++++-------- pkg/phlaredb/symdb/mappings.go | 40 +++++++------- pkg/phlaredb/symdb/partition_memory.go | 3 +- pkg/phlaredb/symdb/strings.go | 49 ++++++++---------- .../symdb/testdata/symbols/v3/symbols.symdb | Bin 89300 -> 89300 bytes 7 files changed, 114 insertions(+), 100 deletions(-) diff --git a/pkg/phlaredb/symdb/format.go b/pkg/phlaredb/symdb/format.go index 562519aaf9..0dbbcae529 100644 --- a/pkg/phlaredb/symdb/format.go +++ b/pkg/phlaredb/symdb/format.go @@ -425,11 +425,19 @@ type SymbolsBlockHeader struct { Length uint32 // BlockSize denotes the number of items per block. BlockSize uint32 + // BlockSize denotes the encoder block header size in bytes. + // This enables forward compatibility within the same format version: + // as long as fields are not removed, or reordered, and the encoding + // scheme does not change, the format can be extended with no change + // of the format version. Decoder is able to read the whole header and + // skip unknown fields. + BlockHeaderSize uint16 // Format of the encoded data. + // Change of the format _version_ may break forward compatibility. Format SymbolsBlockFormat } -type SymbolsBlockFormat uint32 +type SymbolsBlockFormat uint16 const ( _ SymbolsBlockFormat = iota @@ -439,6 +447,22 @@ const ( BlockStringsV1 ) +type headerUnmarshaler interface { + unmarshal([]byte) + checksum() uint32 +} + +func readSymbolsBlockHeader(buf []byte, r io.Reader, v headerUnmarshaler) error { + if _, err := io.ReadFull(r, buf); err != nil { + return err + } + v.unmarshal(buf) + if crc32.Checksum(buf[:len(buf)-checksumSize], castagnoli) != v.checksum() { + return ErrInvalidSize + } + return nil +} + const symbolsBlockReferenceSize = int(unsafe.Sizeof(SymbolsBlockHeader{})) func (h *SymbolsBlockHeader) marshal(b []byte) { @@ -447,7 +471,8 @@ func (h *SymbolsBlockHeader) marshal(b []byte) { binary.BigEndian.PutUint32(b[12:16], h.CRC) binary.BigEndian.PutUint32(b[16:20], h.Length) binary.BigEndian.PutUint32(b[20:24], h.BlockSize) - binary.BigEndian.PutUint32(b[24:28], uint32(h.Format)) + binary.BigEndian.PutUint16(b[24:26], h.BlockHeaderSize) + binary.BigEndian.PutUint16(b[26:28], uint16(h.Format)) } func (h *SymbolsBlockHeader) unmarshal(b []byte) { @@ -456,7 +481,8 @@ func (h *SymbolsBlockHeader) unmarshal(b []byte) { h.CRC = binary.BigEndian.Uint32(b[12:16]) h.Length = binary.BigEndian.Uint32(b[16:20]) h.BlockSize = binary.BigEndian.Uint32(b[20:24]) - h.Format = SymbolsBlockFormat(binary.BigEndian.Uint32(b[24:28])) + h.BlockHeaderSize = binary.BigEndian.Uint16(b[24:26]) + h.Format = SymbolsBlockFormat(binary.BigEndian.Uint16(b[26:28])) } func marshalSymbolsBlockReferences(b []byte, refs ...SymbolsBlockHeader) int { @@ -672,6 +698,7 @@ func (h *StacktraceBlockHeader) unmarshal(b []byte) { type symbolsBlockEncoder[T any] interface { encode(w io.Writer, block []T) error format() SymbolsBlockFormat + headerSize() uintptr } type symbolsEncoder[T any] struct { @@ -696,10 +723,6 @@ func (e *symbolsEncoder[T]) encode(w io.Writer, items []T) (err error) { return nil } -func (e *symbolsEncoder[T]) format() SymbolsBlockFormat { - return e.blockEncoder.format() -} - type symbolsBlockDecoder[T any] interface { decode(r io.Reader, dst []T) error } diff --git a/pkg/phlaredb/symdb/functions.go b/pkg/phlaredb/symdb/functions.go index 66963dfbab..6d7a53240f 100644 --- a/pkg/phlaredb/symdb/functions.go +++ b/pkg/phlaredb/symdb/functions.go @@ -13,10 +13,9 @@ import ( v1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1" "github.com/grafana/pyroscope/pkg/slices" + "github.com/grafana/pyroscope/pkg/util/math" ) -const functionsBlockHeaderSize = int(unsafe.Sizeof(functionsBlockHeader{})) - var ( _ symbolsBlockEncoder[v1.InMemoryFunction] = (*functionsBlockEncoder)(nil) _ symbolsBlockDecoder[v1.InMemoryFunction] = (*functionsBlockDecoder)(nil) @@ -37,6 +36,8 @@ func (h *functionsBlockHeader) marshal(b []byte) { binary.BigEndian.PutUint32(b[8:12], h.SystemNameSize) binary.BigEndian.PutUint32(b[12:16], h.FileNameSize) binary.BigEndian.PutUint32(b[16:20], h.StartLineSize) + // Fields can be added here in the future. + // CRC must be the last four bytes. h.CRC = crc32.Checksum(b[0:20], castagnoli) binary.BigEndian.PutUint32(b[20:24], h.CRC) } @@ -47,9 +48,13 @@ func (h *functionsBlockHeader) unmarshal(b []byte) { h.SystemNameSize = binary.BigEndian.Uint32(b[8:12]) h.FileNameSize = binary.BigEndian.Uint32(b[12:16]) h.StartLineSize = binary.BigEndian.Uint32(b[16:20]) - h.CRC = binary.BigEndian.Uint32(b[20:24]) + // In future versions, new fields are decoded here; + // if pos < len(b)-checksumSize, then there are more fields. + h.CRC = binary.BigEndian.Uint32(b[len(b)-checksumSize:]) } +func (h *functionsBlockHeader) checksum() uint32 { return h.CRC } + type functionsBlockEncoder struct { header functionsBlockHeader @@ -62,9 +67,9 @@ func newFunctionsEncoder() *symbolsEncoder[v1.InMemoryFunction] { return newSymbolsEncoder[v1.InMemoryFunction](new(functionsBlockEncoder)) } -func (e *functionsBlockEncoder) format() SymbolsBlockFormat { - return BlockFunctionsV1 -} +func (e *functionsBlockEncoder) format() SymbolsBlockFormat { return BlockFunctionsV1 } + +func (e *functionsBlockEncoder) headerSize() uintptr { return unsafe.Sizeof(functionsBlockHeader{}) } func (e *functionsBlockEncoder) encode(w io.Writer, functions []v1.InMemoryFunction) error { e.initWrite(len(functions)) @@ -98,7 +103,7 @@ func (e *functionsBlockEncoder) encode(w io.Writer, functions []v1.InMemoryFunct e.header.StartLineSize = uint32(len(e.tmp)) e.buf.Write(e.tmp) - e.tmp = slices.GrowLen(e.tmp, functionsBlockHeaderSize) + e.tmp = slices.GrowLen(e.tmp, int(e.headerSize())) e.header.marshal(e.tmp) if _, err := w.Write(e.tmp); err != nil { return err @@ -121,8 +126,8 @@ func (e *functionsBlockEncoder) initWrite(functions int) { } type functionsBlockDecoder struct { - format SymbolsBlockFormat - header functionsBlockHeader + headerSize uint16 + header functionsBlockHeader ints []int32 buf []byte @@ -130,25 +135,18 @@ type functionsBlockDecoder struct { func newFunctionsDecoder(h SymbolsBlockHeader) (*symbolsDecoder[v1.InMemoryFunction], error) { if h.Format == BlockFunctionsV1 { - return newSymbolsDecoder[v1.InMemoryFunction](h, &functionsBlockDecoder{format: h.Format}), nil + headerSize := math.Max(functionsBlockHeaderMinSize, h.BlockHeaderSize) + return newSymbolsDecoder[v1.InMemoryFunction](h, &functionsBlockDecoder{headerSize: headerSize}), nil } return nil, fmt.Errorf("%w: unknown functions format: %d", ErrUnknownVersion, h.Format) } -func (d *functionsBlockDecoder) readHeader(r io.Reader) error { - d.buf = slices.GrowLen(d.buf, functionsBlockHeaderSize) - if _, err := io.ReadFull(r, d.buf); err != nil { - return nil - } - d.header.unmarshal(d.buf) - if crc32.Checksum(d.buf[:functionsBlockHeaderSize-4], castagnoli) != d.header.CRC { - return ErrInvalidSize - } - return nil -} +// In early versions, block header size is not specified. Must not change. +const functionsBlockHeaderMinSize = 24 func (d *functionsBlockDecoder) decode(r io.Reader, functions []v1.InMemoryFunction) (err error) { - if err = d.readHeader(r); err != nil { + d.buf = slices.GrowLen(d.buf, int(d.headerSize)) + if err = readSymbolsBlockHeader(d.buf, r, &d.header); err != nil { return err } if d.header.FunctionsLen > uint32(len(functions)) { diff --git a/pkg/phlaredb/symdb/locations.go b/pkg/phlaredb/symdb/locations.go index 78ac4cd988..6d75311ad7 100644 --- a/pkg/phlaredb/symdb/locations.go +++ b/pkg/phlaredb/symdb/locations.go @@ -13,12 +13,10 @@ import ( v1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1" "github.com/grafana/pyroscope/pkg/slices" + "github.com/grafana/pyroscope/pkg/util/math" ) -const ( - maxLocationLines = 255 - locationsBlockHeaderSize = int(unsafe.Sizeof(locationsBlockHeader{})) -) +const maxLocationLines = 255 var ( _ symbolsBlockEncoder[v1.InMemoryLocation] = (*locationsBlockEncoder)(nil) @@ -43,6 +41,8 @@ func (h *locationsBlockHeader) marshal(b []byte) { binary.BigEndian.PutUint32(b[12:16], h.LinesSize) binary.BigEndian.PutUint32(b[16:20], h.AddrSize) binary.BigEndian.PutUint32(b[20:24], h.IsFoldedSize) + // Fields can be added here in the future. + // CRC must be the last four bytes. h.CRC = crc32.Checksum(b[0:24], castagnoli) binary.BigEndian.PutUint32(b[24:28], h.CRC) } @@ -54,9 +54,13 @@ func (h *locationsBlockHeader) unmarshal(b []byte) { h.LinesSize = binary.BigEndian.Uint32(b[12:16]) h.AddrSize = binary.BigEndian.Uint32(b[16:20]) h.IsFoldedSize = binary.BigEndian.Uint32(b[20:24]) + // In future versions, new fields are decoded here; + // if pos < len(b)-checksumSize, then there are more fields. h.CRC = binary.BigEndian.Uint32(b[24:28]) } +func (h *locationsBlockHeader) checksum() uint32 { return h.CRC } + type locationsBlockEncoder struct { header locationsBlockHeader @@ -79,9 +83,9 @@ func newLocationsEncoder() *symbolsEncoder[v1.InMemoryLocation] { return newSymbolsEncoder[v1.InMemoryLocation](new(locationsBlockEncoder)) } -func (e *locationsBlockEncoder) format() SymbolsBlockFormat { - return BlockLocationsV1 -} +func (e *locationsBlockEncoder) format() SymbolsBlockFormat { return BlockLocationsV1 } + +func (e *locationsBlockEncoder) headerSize() uintptr { return unsafe.Sizeof(locationsBlockHeader{}) } func (e *locationsBlockEncoder) encode(w io.Writer, locations []v1.InMemoryLocation) error { e.initWrite(len(locations)) @@ -128,7 +132,7 @@ func (e *locationsBlockEncoder) encode(w io.Writer, locations []v1.InMemoryLocat e.buf.Write(e.tmp) } - e.tmp = slices.GrowLen(e.tmp, locationsBlockHeaderSize) + e.tmp = slices.GrowLen(e.tmp, int(e.headerSize())) e.header.marshal(e.tmp) if _, err := w.Write(e.tmp); err != nil { return err @@ -158,8 +162,8 @@ func (e *locationsBlockEncoder) initWrite(locations int) { } type locationsBlockDecoder struct { - format SymbolsBlockFormat - header locationsBlockHeader + headerSize uint16 + header locationsBlockHeader mappings []int32 lineCount []byte @@ -173,25 +177,18 @@ type locationsBlockDecoder struct { func newLocationsDecoder(h SymbolsBlockHeader) (*symbolsDecoder[v1.InMemoryLocation], error) { if h.Format == BlockLocationsV1 { - return newSymbolsDecoder[v1.InMemoryLocation](h, &locationsBlockDecoder{format: h.Format}), nil + headerSize := math.Max(locationsBlockHeaderMinSize, h.BlockHeaderSize) + return newSymbolsDecoder[v1.InMemoryLocation](h, &locationsBlockDecoder{headerSize: headerSize}), nil } return nil, fmt.Errorf("%w: unknown locations format: %d", ErrUnknownVersion, h.Format) } -func (d *locationsBlockDecoder) readHeader(r io.Reader) error { - d.buf = slices.GrowLen(d.buf, locationsBlockHeaderSize) - if _, err := io.ReadFull(r, d.buf); err != nil { - return err - } - d.header.unmarshal(d.buf) - if crc32.Checksum(d.buf[:locationsBlockHeaderSize-4], castagnoli) != d.header.CRC { - return ErrInvalidCRC - } - return nil -} +// In early versions, block header size is not specified. Must not change. +const locationsBlockHeaderMinSize = 28 func (d *locationsBlockDecoder) decode(r io.Reader, locations []v1.InMemoryLocation) (err error) { - if err = d.readHeader(r); err != nil { + d.buf = slices.GrowLen(d.buf, int(d.headerSize)) + if err = readSymbolsBlockHeader(d.buf, r, &d.header); err != nil { return err } if d.header.LocationsLen != uint32(len(locations)) { diff --git a/pkg/phlaredb/symdb/mappings.go b/pkg/phlaredb/symdb/mappings.go index 3094b74c44..371e774068 100644 --- a/pkg/phlaredb/symdb/mappings.go +++ b/pkg/phlaredb/symdb/mappings.go @@ -13,10 +13,9 @@ import ( v1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1" "github.com/grafana/pyroscope/pkg/slices" + "github.com/grafana/pyroscope/pkg/util/math" ) -const mappingsBlockHeaderSize = int(unsafe.Sizeof(mappingsBlockHeader{})) - var ( _ symbolsBlockEncoder[v1.InMemoryMapping] = (*mappingsBlockEncoder)(nil) _ symbolsBlockDecoder[v1.InMemoryMapping] = (*mappingsBlockDecoder)(nil) @@ -42,6 +41,8 @@ func (h *mappingsBlockHeader) marshal(b []byte) { binary.BigEndian.PutUint32(b[16:20], h.MemoryStartSize) binary.BigEndian.PutUint32(b[20:24], h.MemoryLimitSize) binary.BigEndian.PutUint32(b[24:28], h.FileOffsetSize) + // Fields can be added here in the future. + // CRC must be the last four bytes. h.CRC = crc32.Checksum(b[0:28], castagnoli) binary.BigEndian.PutUint32(b[28:32], h.CRC) } @@ -54,9 +55,13 @@ func (h *mappingsBlockHeader) unmarshal(b []byte) { h.MemoryStartSize = binary.BigEndian.Uint32(b[16:20]) h.MemoryLimitSize = binary.BigEndian.Uint32(b[20:24]) h.FileOffsetSize = binary.BigEndian.Uint32(b[24:28]) + // In future versions, new fields are decoded here; + // if pos < len(b)-checksumSize, then there are more fields. h.CRC = binary.BigEndian.Uint32(b[28:32]) } +func (h *mappingsBlockHeader) checksum() uint32 { return h.CRC } + type mappingsBlockEncoder struct { header mappingsBlockHeader @@ -70,9 +75,9 @@ func newMappingsEncoder() *symbolsEncoder[v1.InMemoryMapping] { return newSymbolsEncoder[v1.InMemoryMapping](new(mappingsBlockEncoder)) } -func (e *mappingsBlockEncoder) format() SymbolsBlockFormat { - return BlockMappingsV1 -} +func (e *mappingsBlockEncoder) format() SymbolsBlockFormat { return BlockMappingsV1 } + +func (e *mappingsBlockEncoder) headerSize() uintptr { return unsafe.Sizeof(mappingsBlockHeader{}) } func (e *mappingsBlockEncoder) encode(w io.Writer, mappings []v1.InMemoryMapping) error { e.initWrite(len(mappings)) @@ -145,7 +150,7 @@ func (e *mappingsBlockEncoder) encode(w io.Writer, mappings []v1.InMemoryMapping e.buf.Write(e.tmp) } - e.tmp = slices.GrowLen(e.tmp, mappingsBlockHeaderSize) + e.tmp = slices.GrowLen(e.tmp, int(e.headerSize())) e.header.marshal(e.tmp) if _, err := w.Write(e.tmp); err != nil { return err @@ -169,8 +174,8 @@ func (e *mappingsBlockEncoder) initWrite(mappings int) { } type mappingsBlockDecoder struct { - format SymbolsBlockFormat - header mappingsBlockHeader + headerSize uint16 + header mappingsBlockHeader ints []int32 ints64 []int64 @@ -179,25 +184,18 @@ type mappingsBlockDecoder struct { func newMappingsDecoder(h SymbolsBlockHeader) (*symbolsDecoder[v1.InMemoryMapping], error) { if h.Format == BlockMappingsV1 { - return newSymbolsDecoder[v1.InMemoryMapping](h, &mappingsBlockDecoder{format: h.Format}), nil + headerSize := math.Max(mappingsBlockHeaderMinSize, h.BlockHeaderSize) + return newSymbolsDecoder[v1.InMemoryMapping](h, &mappingsBlockDecoder{headerSize: headerSize}), nil } return nil, fmt.Errorf("%w: unknown mappings format: %d", ErrUnknownVersion, h.Format) } -func (d *mappingsBlockDecoder) readHeader(r io.Reader) error { - d.buf = slices.GrowLen(d.buf, mappingsBlockHeaderSize) - if _, err := io.ReadFull(r, d.buf); err != nil { - return nil - } - d.header.unmarshal(d.buf) - if crc32.Checksum(d.buf[:mappingsBlockHeaderSize-4], castagnoli) != d.header.CRC { - return ErrInvalidCRC - } - return nil -} +// In early versions, block header size is not specified. Must not change. +const mappingsBlockHeaderMinSize = 32 func (d *mappingsBlockDecoder) decode(r io.Reader, mappings []v1.InMemoryMapping) (err error) { - if err = d.readHeader(r); err != nil { + d.buf = slices.GrowLen(d.buf, int(d.headerSize)) + if err = readSymbolsBlockHeader(d.buf, r, &d.header); err != nil { return err } if d.header.MappingsLen > uint32(len(mappings)) { diff --git a/pkg/phlaredb/symdb/partition_memory.go b/pkg/phlaredb/symdb/partition_memory.go index ab44852dc0..b6c8339789 100644 --- a/pkg/phlaredb/symdb/partition_memory.go +++ b/pkg/phlaredb/symdb/partition_memory.go @@ -415,6 +415,7 @@ func writeSymbolsBlock[T any](w *fileWriter, s []T, e *symbolsEncoder[T]) (h Sym h.CRC = crc.Sum32() h.Length = uint32(len(s)) h.BlockSize = uint32(e.blockSize) - h.Format = e.format() + h.BlockHeaderSize = uint16(e.blockEncoder.headerSize()) + h.Format = e.blockEncoder.format() return h, nil } diff --git a/pkg/phlaredb/symdb/strings.go b/pkg/phlaredb/symdb/strings.go index 1992e85c1b..a228f0b69c 100644 --- a/pkg/phlaredb/symdb/strings.go +++ b/pkg/phlaredb/symdb/strings.go @@ -10,12 +10,10 @@ import ( "unsafe" "github.com/grafana/pyroscope/pkg/slices" + "github.com/grafana/pyroscope/pkg/util/math" ) -const ( - maxStringLen = 1<<16 - 1 - stringsBlockHeaderSize = int(unsafe.Sizeof(stringsBlockHeader{})) -) +const maxStringLen = 1<<16 - 1 var ( _ symbolsBlockEncoder[string] = (*stringsBlockEncoder)(nil) @@ -32,6 +30,8 @@ type stringsBlockHeader struct { func (h *stringsBlockHeader) marshal(b []byte) { binary.BigEndian.PutUint32(b[0:4], h.StringsLen) b[5], b[6], b[7], b[8] = h.BlockEncoding, 0, 0, 0 + // Fields can be added here in the future. + // CRC must be the last four bytes. h.CRC = crc32.Checksum(b[0:8], castagnoli) binary.BigEndian.PutUint32(b[8:12], h.CRC) } @@ -39,9 +39,13 @@ func (h *stringsBlockHeader) marshal(b []byte) { func (h *stringsBlockHeader) unmarshal(b []byte) { h.StringsLen = binary.BigEndian.Uint32(b[0:4]) h.BlockEncoding = b[5] + // In future versions, new fields are decoded here; + // if pos < len(b)-checksumSize, then there are more fields. h.CRC = binary.BigEndian.Uint32(b[8:12]) } +func (h *stringsBlockHeader) checksum() uint32 { return h.CRC } + type stringsBlockEncoder struct { header stringsBlockHeader buf bytes.Buffer @@ -52,9 +56,9 @@ func newStringsEncoder() *symbolsEncoder[string] { return newSymbolsEncoder[string](new(stringsBlockEncoder)) } -func (e *stringsBlockEncoder) format() SymbolsBlockFormat { - return BlockStringsV1 -} +func (e *stringsBlockEncoder) format() SymbolsBlockFormat { return BlockStringsV1 } + +func (e *stringsBlockEncoder) headerSize() uintptr { return unsafe.Sizeof(stringsBlockHeader{}) } func (e *stringsBlockEncoder) encode(w io.Writer, strings []string) error { e.initWrite(len(strings)) @@ -80,7 +84,7 @@ func (e *stringsBlockEncoder) encode(w io.Writer, strings []string) error { return err } } - e.tmp = slices.GrowLen(e.tmp, stringsBlockHeaderSize) + e.tmp = slices.GrowLen(e.tmp, int(e.headerSize())) e.header.marshal(e.tmp) if _, err := w.Write(e.tmp); err != nil { return err @@ -109,37 +113,30 @@ func (e *stringsBlockEncoder) initWrite(strings int) { } type stringsBlockDecoder struct { - format SymbolsBlockFormat - header stringsBlockHeader - buf []byte + headerSize uint16 + header stringsBlockHeader + buf []byte } func newStringsDecoder(h SymbolsBlockHeader) (*symbolsDecoder[string], error) { if h.Format == BlockStringsV1 { - return newSymbolsDecoder[string](h, &stringsBlockDecoder{format: h.Format}), nil + headerSize := math.Max(stringsBlockHeaderMinSize, h.BlockHeaderSize) + return newSymbolsDecoder[string](h, &stringsBlockDecoder{headerSize: headerSize}), nil } return nil, fmt.Errorf("%w: unknown strings format: %d", ErrUnknownVersion, h.Format) } -func (d *stringsBlockDecoder) readHeader(r io.Reader) error { - d.buf = slices.GrowLen(d.buf, stringsBlockHeaderSize) - if _, err := io.ReadFull(r, d.buf); err != nil { +// In early versions, block header size is not specified. Must not change. +const stringsBlockHeaderMinSize = 12 + +func (d *stringsBlockDecoder) decode(r io.Reader, strings []string) (err error) { + d.buf = slices.GrowLen(d.buf, int(d.headerSize)) + if err = readSymbolsBlockHeader(d.buf, r, &d.header); err != nil { return err } - d.header.unmarshal(d.buf) - if crc32.Checksum(d.buf[:stringsBlockHeaderSize-4], castagnoli) != d.header.CRC { - return ErrInvalidCRC - } if d.header.BlockEncoding != 8 && d.header.BlockEncoding != 16 { return fmt.Errorf("invalid string block encoding: %d", d.header.BlockEncoding) } - return nil -} - -func (d *stringsBlockDecoder) decode(r io.Reader, strings []string) (err error) { - if err = d.readHeader(r); err != nil { - return err - } if d.header.StringsLen != uint32(len(strings)) { return fmt.Errorf("invalid string buffer size") } diff --git a/pkg/phlaredb/symdb/testdata/symbols/v3/symbols.symdb b/pkg/phlaredb/symdb/testdata/symbols/v3/symbols.symdb index af10787561f7fe081804ad6e8aebb49e9fe54269..a58190c627f6abf39d2456eba86190ac8b70fc72 100644 GIT binary patch delta 172 zcmcbzll96@)`l&NtkH}z)7hdK24s3XL?#RC#Y7#I{lG!!suO9I6viJdc5U|?YU1(ug!V1h|AFs%8lqjr8DM}>6Ub(gi04-Q0u*9q zU|<2s0BI;-)RqK_O%gk2ssPmX3o6e9lV)I8^N)d{hIz52t1Zx+-B5Y3A&y7h$QD=T U8Uocq%wvrF1JXDBLJT7d0KjG&761SM From 9cfe9aea60a43582c49ac5f5fb15109e26517f31 Mon Sep 17 00:00:00 2001 From: Anton Kolesnikov Date: Mon, 13 May 2024 16:19:07 +0800 Subject: [PATCH 30/36] use v2 by default --- pkg/phlaredb/symdb/block_writer.go | 73 +----- pkg/phlaredb/symdb/block_writer_v2.go | 316 +++++++++++++++++++++++++ pkg/phlaredb/symdb/block_writer_v3.go | 137 +++++++++++ pkg/phlaredb/symdb/format.go | 68 +++++- pkg/phlaredb/symdb/partition_memory.go | 55 ----- pkg/phlaredb/symdb/resolver.go | 1 - pkg/phlaredb/symdb/symdb.go | 35 ++- pkg/phlaredb/symdb/symdb_test.go | 9 +- 8 files changed, 556 insertions(+), 138 deletions(-) create mode 100644 pkg/phlaredb/symdb/block_writer_v2.go create mode 100644 pkg/phlaredb/symdb/block_writer_v3.go diff --git a/pkg/phlaredb/symdb/block_writer.go b/pkg/phlaredb/symdb/block_writer.go index d489905279..bf31dd9216 100644 --- a/pkg/phlaredb/symdb/block_writer.go +++ b/pkg/phlaredb/symdb/block_writer.go @@ -2,83 +2,16 @@ package symdb import ( "bufio" - "fmt" "io" "os" "path/filepath" "github.com/grafana/pyroscope/pkg/phlaredb/block" - v1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1" ) -type writer struct { - config *Config - - index IndexFile - dataFile *fileWriter - files []block.File - footer Footer - - stringsEncoder *symbolsEncoder[string] - mappingsEncoder *symbolsEncoder[v1.InMemoryMapping] - functionsEncoder *symbolsEncoder[v1.InMemoryFunction] - locationsEncoder *symbolsEncoder[v1.InMemoryLocation] -} - -func newWriter(c *Config) *writer { - return &writer{ - config: c, - index: IndexFile{ - Header: IndexHeader{ - Magic: symdbMagic, - Version: FormatV3, - }, - }, - footer: Footer{ - Magic: symdbMagic, - Version: FormatV3, - }, - - stringsEncoder: newStringsEncoder(), - mappingsEncoder: newMappingsEncoder(), - functionsEncoder: newFunctionsEncoder(), - locationsEncoder: newLocationsEncoder(), - } -} - -func (w *writer) writePartitions(partitions []*PartitionWriter) (err error) { - if err = os.MkdirAll(w.config.Dir, 0o755); err != nil { - return fmt.Errorf("failed to create directory %q: %w", w.config.Dir, err) - } - if w.dataFile, err = w.newFile(DefaultFileName); err != nil { - return err - } - defer func() { - err = w.dataFile.Close() - w.files = []block.File{w.dataFile.meta()} - }() - for _, p := range partitions { - if err = p.writeTo(w); err != nil { - return fmt.Errorf("failed to write partition: %w", err) - } - w.index.PartitionHeaders = append(w.index.PartitionHeaders, &p.header) - } - w.footer.IndexOffset = uint64(w.dataFile.w.offset) - if _, err = w.index.WriteTo(w.dataFile); err != nil { - return fmt.Errorf("failed to write index: %w", err) - } - if _, err = w.dataFile.Write(w.footer.MarshalBinary()); err != nil { - return fmt.Errorf("failed to write footer: %w", err) - } - return nil -} - -func (w *writer) newFile(path string) (f *fileWriter, err error) { - path = filepath.Join(w.config.Dir, path) - if f, err = newFileWriter(path); err != nil { - return nil, fmt.Errorf("failed to create %q: %w", path, err) - } - return f, err +type blockWriter interface { + writePartitions(partitions []*PartitionWriter) error + meta() []block.File } type fileWriter struct { diff --git a/pkg/phlaredb/symdb/block_writer_v2.go b/pkg/phlaredb/symdb/block_writer_v2.go new file mode 100644 index 0000000000..40dd149dbf --- /dev/null +++ b/pkg/phlaredb/symdb/block_writer_v2.go @@ -0,0 +1,316 @@ +package symdb + +import ( + "context" + "fmt" + "hash/crc32" + "io" + "os" + "path/filepath" + + "github.com/grafana/dskit/multierror" + "github.com/parquet-go/parquet-go" + "golang.org/x/sync/errgroup" + + "github.com/grafana/pyroscope/pkg/phlaredb/block" + schemav1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1" + "github.com/grafana/pyroscope/pkg/util/build" + "github.com/grafana/pyroscope/pkg/util/math" +) + +type writerV2 struct { + config *Config + + index IndexFile + indexWriter *fileWriter + stacktraces *fileWriter + files []block.File + + // Parquet tables. + mappings parquetWriter[schemav1.InMemoryMapping, schemav1.MappingPersister] + functions parquetWriter[schemav1.InMemoryFunction, schemav1.FunctionPersister] + locations parquetWriter[schemav1.InMemoryLocation, schemav1.LocationPersister] + strings parquetWriter[string, schemav1.StringPersister] +} + +func newWriterV2(c *Config) *writerV2 { + return &writerV2{ + config: c, + index: IndexFile{ + Header: IndexHeader{ + Magic: symdbMagic, + Version: FormatV2, + }, + }, + } +} + +func (w *writerV2) writePartitions(partitions []*PartitionWriter) error { + if err := w.createDir(); err != nil { + return err + } + + g, _ := errgroup.WithContext(context.Background()) + g.Go(func() (err error) { + if w.stacktraces, err = w.newFile(StacktracesFileName); err != nil { + return err + } + for _, partition := range partitions { + if err = w.writeStacktraces(partition); err != nil { + return err + } + } + return w.stacktraces.Close() + }) + + g.Go(func() (err error) { + if err = w.strings.init(w.config.Dir, w.config.Parquet); err != nil { + return err + } + for _, partition := range partitions { + if partition.header.V2.Strings, err = w.strings.readFrom(partition.strings.slice); err != nil { + return err + } + } + return w.strings.Close() + }) + + g.Go(func() (err error) { + if err = w.functions.init(w.config.Dir, w.config.Parquet); err != nil { + return err + } + for _, partition := range partitions { + if partition.header.V2.Functions, err = w.functions.readFrom(partition.functions.slice); err != nil { + return err + } + } + return w.functions.Close() + }) + + g.Go(func() (err error) { + if err = w.mappings.init(w.config.Dir, w.config.Parquet); err != nil { + return err + } + for _, partition := range partitions { + if partition.header.V2.Mappings, err = w.mappings.readFrom(partition.mappings.slice); err != nil { + return err + } + } + return w.mappings.Close() + }) + + g.Go(func() (err error) { + if err = w.locations.init(w.config.Dir, w.config.Parquet); err != nil { + return err + } + for _, partition := range partitions { + if partition.header.V2.Locations, err = w.locations.readFrom(partition.locations.slice); err != nil { + return err + } + } + return w.locations.Close() + }) + + if err := g.Wait(); err != nil { + return err + } + + for _, partition := range partitions { + w.index.PartitionHeaders = append(w.index.PartitionHeaders, &partition.header) + } + + return w.Flush() +} + +func (w *writerV2) Flush() (err error) { + if err = w.writeIndexFile(); err != nil { + return err + } + w.files = []block.File{ + w.indexWriter.meta(), + w.stacktraces.meta(), + w.locations.meta(), + w.mappings.meta(), + w.functions.meta(), + w.strings.meta(), + } + return nil +} + +func (w *writerV2) writeStacktraces(partition *PartitionWriter) (err error) { + for ci, c := range partition.stacktraces.chunks { + stacks := c.stacks + if stacks == 0 { + stacks = uint32(len(partition.stacktraces.hashToIdx)) + } + h := StacktraceBlockHeader{ + Offset: w.stacktraces.w.offset, + Size: 0, // Set later. + Partition: partition.header.Partition, + BlockIndex: uint16(ci), + Encoding: StacktraceEncodingGroupVarint, + Stacktraces: stacks, + StacktraceNodes: c.tree.len(), + StacktraceMaxDepth: 0, // TODO + StacktraceMaxNodes: c.partition.maxNodesPerChunk, + CRC: 0, // Set later. + } + crc := crc32.New(castagnoli) + if h.Size, err = c.WriteTo(io.MultiWriter(crc, w.stacktraces)); err != nil { + return fmt.Errorf("writing stacktrace chunk data: %w", err) + } + h.CRC = crc.Sum32() + partition.header.Stacktraces = append(partition.header.Stacktraces, h) + } + return nil +} + +func (w *writerV2) createDir() error { + if err := os.MkdirAll(w.config.Dir, 0o755); err != nil { + return fmt.Errorf("failed to create directory %q: %w", w.config.Dir, err) + } + return nil +} + +func (w *writerV2) writeIndexFile() (err error) { + // Write the index file only after all the files were flushed. + if w.indexWriter, err = w.newFile(IndexFileName); err != nil { + return err + } + defer func() { + err = multierror.New(err, w.indexWriter.Close()).Err() + }() + if _, err = w.index.WriteTo(w.indexWriter); err != nil { + return fmt.Errorf("failed to write index file: %w", err) + } + return err +} + +func (w *writerV2) newFile(path string) (f *fileWriter, err error) { + path = filepath.Join(w.config.Dir, path) + if f, err = newFileWriter(path); err != nil { + return nil, fmt.Errorf("failed to create %q: %w", path, err) + } + return f, err +} + +func (w *writerV2) meta() []block.File { return w.files } + +type parquetWriter[M schemav1.Models, P schemav1.Persister[M]] struct { + persister P + config ParquetConfig + + currentRowGroup uint32 + currentRows uint32 + rowsTotal uint64 + + buffer *parquet.Buffer + rowsBatch []parquet.Row + + writer *parquet.GenericWriter[P] + file *os.File + path string +} + +func (s *parquetWriter[M, P]) init(dir string, c ParquetConfig) (err error) { + s.config = c + s.path = filepath.Join(dir, s.persister.Name()+block.ParquetSuffix) + s.file, err = os.OpenFile(s.path, os.O_RDWR|os.O_CREATE|os.O_EXCL, 0o644) + if err != nil { + return err + } + s.rowsBatch = make([]parquet.Row, 0, 128) + s.buffer = parquet.NewBuffer(s.persister.Schema(), parquet.ColumnBufferCapacity(s.config.MaxBufferRowCount)) + s.writer = parquet.NewGenericWriter[P](s.file, s.persister.Schema(), + parquet.CreatedBy("github.com/grafana/pyroscope/", build.Version, build.Revision), + parquet.PageBufferSize(3*1024*1024), + ) + return nil +} + +func (s *parquetWriter[M, P]) readFrom(values []M) (ranges []RowRangeReference, err error) { + for len(values) > 0 { + var r RowRangeReference + if r, err = s.writeRows(values); err != nil { + return nil, err + } + ranges = append(ranges, r) + values = values[r.Rows:] + } + return ranges, nil +} + +func (s *parquetWriter[M, P]) writeRows(values []M) (r RowRangeReference, err error) { + r.RowGroup = s.currentRowGroup + r.Index = s.currentRows + if len(values) == 0 { + return r, nil + } + var n int + for len(values) > 0 && int(s.currentRows) < s.config.MaxBufferRowCount { + s.fillBatch(values) + if n, err = s.buffer.WriteRows(s.rowsBatch); err != nil { + return r, err + } + s.currentRows += uint32(n) + r.Rows += uint32(n) + values = values[n:] + } + if int(s.currentRows)+cap(s.rowsBatch) >= s.config.MaxBufferRowCount { + if err = s.flushBuffer(); err != nil { + return r, err + } + } + return r, nil +} + +func (s *parquetWriter[M, P]) fillBatch(values []M) int { + m := math.Min(len(values), cap(s.rowsBatch)) + s.rowsBatch = s.rowsBatch[:m] + for i := 0; i < m; i++ { + row := s.rowsBatch[i][:0] + s.rowsBatch[i] = s.persister.Deconstruct(row, 0, values[i]) + } + return m +} + +func (s *parquetWriter[M, P]) flushBuffer() error { + if _, err := s.writer.WriteRowGroup(s.buffer); err != nil { + return err + } + s.rowsTotal += uint64(s.buffer.NumRows()) + s.currentRowGroup++ + s.currentRows = 0 + s.buffer.Reset() + return nil +} + +func (s *parquetWriter[M, P]) meta() block.File { + f := block.File{ + // Note that the path is relative to the symdb root dir. + RelPath: filepath.Base(s.path), + Parquet: &block.ParquetFile{ + NumRows: s.rowsTotal, + }, + } + if f.Parquet.NumRows > 0 { + f.Parquet.NumRowGroups = uint64(s.currentRowGroup + 1) + } + if stat, err := os.Stat(s.path); err == nil { + f.SizeBytes = uint64(stat.Size()) + } + return f +} + +func (s *parquetWriter[M, P]) Close() error { + if err := s.flushBuffer(); err != nil { + return fmt.Errorf("flushing parquet buffer: %w", err) + } + if err := s.writer.Close(); err != nil { + return fmt.Errorf("closing parquet writer: %w", err) + } + if err := s.file.Close(); err != nil { + return fmt.Errorf("closing parquet file: %w", err) + } + return nil +} diff --git a/pkg/phlaredb/symdb/block_writer_v3.go b/pkg/phlaredb/symdb/block_writer_v3.go new file mode 100644 index 0000000000..842ce6d12d --- /dev/null +++ b/pkg/phlaredb/symdb/block_writer_v3.go @@ -0,0 +1,137 @@ +package symdb + +import ( + "fmt" + "hash/crc32" + "io" + "os" + "path/filepath" + + "github.com/grafana/pyroscope/pkg/phlaredb/block" + v1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1" +) + +type writerV3 struct { + config *Config + + index IndexFile + dataFile *fileWriter + files []block.File + footer Footer + + stringsEncoder *symbolsEncoder[string] + mappingsEncoder *symbolsEncoder[v1.InMemoryMapping] + functionsEncoder *symbolsEncoder[v1.InMemoryFunction] + locationsEncoder *symbolsEncoder[v1.InMemoryLocation] +} + +func newWriterV3(c *Config) *writerV3 { + return &writerV3{ + config: c, + index: IndexFile{ + Header: IndexHeader{ + Magic: symdbMagic, + Version: FormatV3, + }, + }, + footer: Footer{ + Magic: symdbMagic, + Version: FormatV3, + }, + + stringsEncoder: newStringsEncoder(), + mappingsEncoder: newMappingsEncoder(), + functionsEncoder: newFunctionsEncoder(), + locationsEncoder: newLocationsEncoder(), + } +} + +func (w *writerV3) writePartitions(partitions []*PartitionWriter) (err error) { + if err = os.MkdirAll(w.config.Dir, 0o755); err != nil { + return fmt.Errorf("failed to create directory %q: %w", w.config.Dir, err) + } + if w.dataFile, err = w.newFile(DefaultFileName); err != nil { + return err + } + defer func() { + err = w.dataFile.Close() + w.files = []block.File{w.dataFile.meta()} + }() + for _, p := range partitions { + if err = writePartitionV3(w, p); err != nil { + return fmt.Errorf("failed to write partition: %w", err) + } + w.index.PartitionHeaders = append(w.index.PartitionHeaders, &p.header) + } + w.footer.IndexOffset = uint64(w.dataFile.w.offset) + if _, err = w.index.WriteTo(w.dataFile); err != nil { + return fmt.Errorf("failed to write index: %w", err) + } + if _, err = w.dataFile.Write(w.footer.MarshalBinary()); err != nil { + return fmt.Errorf("failed to write footer: %w", err) + } + return nil +} + +func (w *writerV3) meta() []block.File { return w.files } + +func (w *writerV3) newFile(path string) (f *fileWriter, err error) { + path = filepath.Join(w.config.Dir, path) + if f, err = newFileWriter(path); err != nil { + return nil, fmt.Errorf("failed to create %q: %w", path, err) + } + return f, err +} + +func writePartitionV3(w *writerV3, p *PartitionWriter) (err error) { + if p.header.V3.Strings, err = writeSymbolsBlock(w.dataFile, p.strings.slice, w.stringsEncoder); err != nil { + return err + } + if p.header.V3.Mappings, err = writeSymbolsBlock(w.dataFile, p.mappings.slice, w.mappingsEncoder); err != nil { + return err + } + if p.header.V3.Functions, err = writeSymbolsBlock(w.dataFile, p.functions.slice, w.functionsEncoder); err != nil { + return err + } + if p.header.V3.Locations, err = writeSymbolsBlock(w.dataFile, p.locations.slice, w.locationsEncoder); err != nil { + return err + } + for ci, c := range p.stacktraces.chunks { + stacks := c.stacks + if stacks == 0 { + stacks = uint32(len(p.stacktraces.hashToIdx)) + } + h := StacktraceBlockHeader{ + Offset: w.dataFile.w.offset, + Partition: p.header.Partition, + BlockIndex: uint16(ci), + Encoding: StacktraceEncodingGroupVarint, + Stacktraces: stacks, + StacktraceNodes: c.tree.len(), + StacktraceMaxNodes: c.partition.maxNodesPerChunk, + } + crc := crc32.New(castagnoli) + if h.Size, err = c.WriteTo(io.MultiWriter(crc, w.dataFile)); err != nil { + return fmt.Errorf("writing stacktrace chunk data: %w", err) + } + h.CRC = crc.Sum32() + p.header.Stacktraces = append(p.header.Stacktraces, h) + } + return nil +} + +func writeSymbolsBlock[T any](w *fileWriter, s []T, e *symbolsEncoder[T]) (h SymbolsBlockHeader, err error) { + h.Offset = uint64(w.w.offset) + crc := crc32.New(castagnoli) + mw := io.MultiWriter(crc, w.w) + if err = e.encode(mw, s); err != nil { + return h, err + } + h.Size = uint32(w.w.offset) - uint32(h.Offset) + h.CRC = crc.Sum32() + h.Length = uint32(len(s)) + h.BlockSize = uint32(e.blockSize) + h.BlockHeaderSize = uint16(e.blockEncoder.headerSize()) + h.Format = e.blockEncoder.format() + return h, nil +} diff --git a/pkg/phlaredb/symdb/format.go b/pkg/phlaredb/symdb/format.go index 0dbbcae529..7afb72bcac 100644 --- a/pkg/phlaredb/symdb/format.go +++ b/pkg/phlaredb/symdb/format.go @@ -260,17 +260,31 @@ func (h *PartitionHeaders) Size() int64 { return s } -func (h *PartitionHeaders) WriteTo(dst io.Writer) (_ int64, err error) { +func (h *PartitionHeaders) MarshalV3To(dst io.Writer) (_ int64, err error) { w := withWriterOffset(dst, 0) buf := make([]byte, 4, 128) binary.BigEndian.PutUint32(buf, uint32(len(*h))) w.write(buf) for _, p := range *h { - if p.V3 == nil { - return 0, fmt.Errorf("only v3 format is supported") - } buf = slices.GrowLen(buf, int(p.Size())) - p.marshal(buf) + p.marshalV3(buf) + w.write(buf) + } + return w.offset, w.err +} + +func (h *PartitionHeaders) MarshalV2To(dst io.Writer) (_ int64, err error) { + w := withWriterOffset(dst, 0) + buf := make([]byte, 4, 128) + binary.BigEndian.PutUint32(buf, uint32(len(*h))) + w.write(buf) + for _, p := range *h { + s := p.Size() + if int(s) > cap(buf) { + buf = make([]byte, s) + } + buf = buf[:s] + p.marshalV2(buf) w.write(buf) } return w.offset, w.err @@ -316,7 +330,25 @@ func (h *PartitionHeaders) unmarshal(b []byte, version FormatVersion) error { return nil } -func (h *PartitionHeader) marshal(buf []byte) { +func (h *PartitionHeader) marshalV2(buf []byte) { + binary.BigEndian.PutUint64(buf[0:8], h.Partition) + binary.BigEndian.PutUint32(buf[8:12], uint32(len(h.Stacktraces))) + binary.BigEndian.PutUint32(buf[12:16], uint32(len(h.V2.Locations))) + binary.BigEndian.PutUint32(buf[16:20], uint32(len(h.V2.Mappings))) + binary.BigEndian.PutUint32(buf[20:24], uint32(len(h.V2.Functions))) + binary.BigEndian.PutUint32(buf[24:28], uint32(len(h.V2.Strings))) + n := 28 + for i := range h.Stacktraces { + h.Stacktraces[i].marshal(buf[n:]) + n += stacktraceBlockHeaderSize + } + n += marshalRowRangeReferences(buf[n:], h.V2.Locations) + n += marshalRowRangeReferences(buf[n:], h.V2.Mappings) + n += marshalRowRangeReferences(buf[n:], h.V2.Functions) + marshalRowRangeReferences(buf[n:], h.V2.Strings) +} + +func (h *PartitionHeader) marshalV3(buf []byte) { binary.BigEndian.PutUint64(buf[0:8], h.Partition) binary.BigEndian.PutUint32(buf[8:12], uint32(len(h.Stacktraces))) n := 12 @@ -542,6 +574,15 @@ func (h *PartitionHeaderV2) unmarshalRowRangeReferences(refs []RowRangeReference return nil } +func marshalRowRangeReferences(b []byte, refs []RowRangeReference) int { + var off int + for i := range refs { + refs[i].marshal(b[off : off+rowRangeReferenceSize]) + off += rowRangeReferenceSize + } + return off +} + const rowRangeReferenceSize = int(unsafe.Sizeof(RowRangeReference{})) type RowRangeReference struct { @@ -550,6 +591,12 @@ type RowRangeReference struct { Rows uint32 } +func (r *RowRangeReference) marshal(b []byte) { + binary.BigEndian.PutUint32(b[0:4], r.RowGroup) + binary.BigEndian.PutUint32(b[4:8], r.Index) + binary.BigEndian.PutUint32(b[8:12], r.Rows) +} + func (r *RowRangeReference) unmarshal(b []byte) { r.RowGroup = binary.BigEndian.Uint32(b[0:4]) r.Index = binary.BigEndian.Uint32(b[4:8]) @@ -625,7 +672,14 @@ func (f *IndexFile) WriteTo(dst io.Writer) (n int64, err error) { if _, err = w.Write(tocBytes); err != nil { return w.offset, fmt.Errorf("toc write: %w", err) } - if _, err = f.PartitionHeaders.WriteTo(w); err != nil { + + switch f.Header.Version { + case FormatV3: + _, err = f.PartitionHeaders.MarshalV3To(w) + default: + _, err = f.PartitionHeaders.MarshalV2To(w) + } + if err != nil { return w.offset, fmt.Errorf("partitions headers: %w", err) } diff --git a/pkg/phlaredb/symdb/partition_memory.go b/pkg/phlaredb/symdb/partition_memory.go index b6c8339789..d410aee449 100644 --- a/pkg/phlaredb/symdb/partition_memory.go +++ b/pkg/phlaredb/symdb/partition_memory.go @@ -2,8 +2,6 @@ package symdb import ( "context" - "fmt" - "hash/crc32" "io" "sync" @@ -366,56 +364,3 @@ func (p *PartitionWriter) WriteStats(s *PartitionStats) { func (p *PartitionWriter) Release() { // Noop. Satisfies PartitionReader interface. } - -func (p *PartitionWriter) writeTo(w *writer) (err error) { - if p.header.V3.Strings, err = writeSymbolsBlock(w.dataFile, p.strings.slice, w.stringsEncoder); err != nil { - return err - } - if p.header.V3.Mappings, err = writeSymbolsBlock(w.dataFile, p.mappings.slice, w.mappingsEncoder); err != nil { - return err - } - if p.header.V3.Functions, err = writeSymbolsBlock(w.dataFile, p.functions.slice, w.functionsEncoder); err != nil { - return err - } - if p.header.V3.Locations, err = writeSymbolsBlock(w.dataFile, p.locations.slice, w.locationsEncoder); err != nil { - return err - } - for ci, c := range p.stacktraces.chunks { - stacks := c.stacks - if stacks == 0 { - stacks = uint32(len(p.stacktraces.hashToIdx)) - } - h := StacktraceBlockHeader{ - Offset: w.dataFile.w.offset, - Partition: p.header.Partition, - BlockIndex: uint16(ci), - Encoding: StacktraceEncodingGroupVarint, - Stacktraces: stacks, - StacktraceNodes: c.tree.len(), - StacktraceMaxNodes: c.partition.maxNodesPerChunk, - } - crc := crc32.New(castagnoli) - if h.Size, err = c.WriteTo(io.MultiWriter(crc, w.dataFile)); err != nil { - return fmt.Errorf("writing stacktrace chunk data: %w", err) - } - h.CRC = crc.Sum32() - p.header.Stacktraces = append(p.header.Stacktraces, h) - } - return nil -} - -func writeSymbolsBlock[T any](w *fileWriter, s []T, e *symbolsEncoder[T]) (h SymbolsBlockHeader, err error) { - h.Offset = uint64(w.w.offset) - crc := crc32.New(castagnoli) - mw := io.MultiWriter(crc, w.w) - if err = e.encode(mw, s); err != nil { - return h, err - } - h.Size = uint32(w.w.offset) - uint32(h.Offset) - h.CRC = crc.Sum32() - h.Length = uint32(len(s)) - h.BlockSize = uint32(e.blockSize) - h.BlockHeaderSize = uint16(e.blockEncoder.headerSize()) - h.Format = e.blockEncoder.format() - return h, nil -} diff --git a/pkg/phlaredb/symdb/resolver.go b/pkg/phlaredb/symdb/resolver.go index af56812d9e..8705ae31cd 100644 --- a/pkg/phlaredb/symdb/resolver.go +++ b/pkg/phlaredb/symdb/resolver.go @@ -247,7 +247,6 @@ func (r *Resolver) Pprof() (*googlev1.Profile, error) { } lock.Lock() defer lock.Unlock() - // TODO(kolesnikovae): Use MergeNoClone. return p.Merge(resolved) }) if err != nil { diff --git a/pkg/phlaredb/symdb/symdb.go b/pkg/phlaredb/symdb/symdb.go index c78b71fa09..c5df560473 100644 --- a/pkg/phlaredb/symdb/symdb.go +++ b/pkg/phlaredb/symdb/symdb.go @@ -64,7 +64,7 @@ type StacktraceInserter interface { type SymDB struct { config *Config - writer *writer + writer blockWriter stats MemoryStats m sync.RWMutex @@ -76,13 +76,19 @@ type SymDB struct { type Config struct { Dir string + Version FormatVersion Stacktraces StacktracesConfig + Parquet ParquetConfig } type StacktracesConfig struct { MaxNodesPerChunk uint32 } +type ParquetConfig struct { + MaxBufferRowCount int +} + type MemoryStats struct { StacktracesSize uint64 LocationsSize uint64 @@ -103,12 +109,16 @@ const statsUpdateInterval = 5 * time.Second func DefaultConfig() *Config { return &Config{ + Version: FormatV2, Stacktraces: StacktracesConfig{ // At the moment chunks are loaded in memory at once. // Due to the fact that chunking causes some duplication, // it's better to keep them large. MaxNodesPerChunk: 4 << 20, }, + Parquet: ParquetConfig{ + MaxBufferRowCount: 100 << 10, + }, } } @@ -117,16 +127,27 @@ func (c *Config) WithDirectory(dir string) *Config { return c } +func (c *Config) WithParquetConfig(pc ParquetConfig) *Config { + c.Parquet = pc + return c +} + func NewSymDB(c *Config) *SymDB { if c == nil { c = DefaultConfig() } db := &SymDB{ config: c, - writer: newWriter(c), partitions: make(map[uint64]*PartitionWriter), stop: make(chan struct{}), } + switch c.Version { + case FormatV3: + db.writer = newWriterV3(c) + default: + c.Version = FormatV2 + db.writer = newWriterV2(c) + } db.wg.Add(1) go db.updateStatsLoop() return db @@ -150,9 +171,15 @@ func (s *SymDB) PartitionWriter(partition uint64) *PartitionWriter { func (s *SymDB) newPartition(partition uint64) *PartitionWriter { p := PartitionWriter{ - header: PartitionHeader{Partition: partition, V3: new(PartitionHeaderV3)}, + header: PartitionHeader{Partition: partition}, stacktraces: newStacktracesPartition(s.config.Stacktraces.MaxNodesPerChunk), } + switch s.config.Version { + case FormatV2: + p.header.V2 = new(PartitionHeaderV2) + case FormatV3: + p.header.V3 = new(PartitionHeaderV3) + } p.strings.init() p.mappings.init() p.functions.init() @@ -251,5 +278,5 @@ func (s *SymDB) Flush() error { } func (s *SymDB) Files() []block.File { - return s.writer.files + return s.writer.meta() } diff --git a/pkg/phlaredb/symdb/symdb_test.go b/pkg/phlaredb/symdb/symdb_test.go index a1bab91bc5..b641e826a6 100644 --- a/pkg/phlaredb/symdb/symdb_test.go +++ b/pkg/phlaredb/symdb/symdb_test.go @@ -14,6 +14,7 @@ import ( googlev1 "github.com/grafana/pyroscope/api/gen/proto/go/google/v1" phlaremodel "github.com/grafana/pyroscope/pkg/model" "github.com/grafana/pyroscope/pkg/objstore/providers/filesystem" + "github.com/grafana/pyroscope/pkg/phlaredb/block" v1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1" "github.com/grafana/pyroscope/pkg/pprof" ) @@ -53,6 +54,9 @@ func (s *memSuite) init() { Stacktraces: StacktracesConfig{ MaxNodesPerChunk: 1 << 10, }, + Parquet: ParquetConfig{ + MaxBufferRowCount: 512, + }, } } if s.db == nil { @@ -83,7 +87,7 @@ func (s *blockSuite) flush() { return &s.testBucket, nil }) require.NoError(s.t, err) - s.reader, err = Open(context.Background(), b, testBlockMeta) + s.reader, err = Open(context.Background(), b, &block.Meta{Files: s.db.Files()}) require.NoError(s.t, err) } @@ -156,6 +160,9 @@ func Test_Stats(t *testing.T) { Stacktraces: StacktracesConfig{ MaxNodesPerChunk: 4 << 20, }, + Parquet: ParquetConfig{ + MaxBufferRowCount: 100 << 10, + }, }, } From 355de0e9afecf3812a29c2d7b579163305c0c607 Mon Sep 17 00:00:00 2001 From: Anton Kolesnikov Date: Mon, 13 May 2024 16:21:19 +0800 Subject: [PATCH 31/36] fix ConvertToBlockStats --- pkg/phlaredb/block/metadata.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/phlaredb/block/metadata.go b/pkg/phlaredb/block/metadata.go index 3d9add4c16..0c2cc0c854 100644 --- a/pkg/phlaredb/block/metadata.go +++ b/pkg/phlaredb/block/metadata.go @@ -368,7 +368,7 @@ func (stats MetaStats) ConvertToBlockStats() *ingestv1.BlockStats { indexBytes = f.SizeBytes } else if f.RelPath == "profiles.parquet" { profileBytes += f.SizeBytes - } else if strings.HasPrefix(f.RelPath, "symbols") { + } else if strings.HasPrefix(f.RelPath, "symbols") || filepath.Ext(f.RelPath) == ".symdb" { symbolBytes += f.SizeBytes } } From af2dae04eb8b5af36e3262886f00f8b7f0fec673 Mon Sep 17 00:00:00 2001 From: Anton Kolesnikov Date: Mon, 13 May 2024 17:00:13 +0800 Subject: [PATCH 32/36] add explicit format config option --- pkg/phlaredb/block/block_test.go | 10 ++++---- pkg/phlaredb/compact.go | 41 +++++++++++++++++++++++++------- pkg/phlaredb/compact_test.go | 12 +++++++--- pkg/phlaredb/head.go | 8 ++++++- pkg/phlaredb/head_test.go | 36 ++++++++++++++++++++++++++-- pkg/phlaredb/symdb/symdb.go | 5 ++++ 6 files changed, 93 insertions(+), 19 deletions(-) diff --git a/pkg/phlaredb/block/block_test.go b/pkg/phlaredb/block/block_test.go index 0fa9979e69..058c9a31c2 100644 --- a/pkg/phlaredb/block/block_test.go +++ b/pkg/phlaredb/block/block_test.go @@ -96,7 +96,7 @@ func TestDelete(t *testing.T) { }) require.NoError(t, block.Upload(ctx, log.NewNopLogger(), bkt, path.Join(dir, meta.ULID.String()))) - require.Equal(t, 4, len(objects(t, bkt, meta.ULID))) + require.Equal(t, 9, len(objects(t, bkt, meta.ULID))) markedForDeletion := promauto.With(prometheus.NewRegistry()).NewCounter(prometheus.CounterOpts{Name: "test"}) require.NoError(t, block.MarkForDeletion(ctx, log.NewNopLogger(), bkt, meta.ULID, "", false, markedForDeletion)) @@ -116,7 +116,7 @@ func TestDelete(t *testing.T) { } }) require.NoError(t, block.Upload(ctx, log.NewNopLogger(), bkt, path.Join(tmpDir, b2.ULID.String()))) - require.Equal(t, 4, len(objects(t, bkt, b2.ULID))) + require.Equal(t, 9, len(objects(t, bkt, b2.ULID))) // Remove meta.json and check if delete can delete it. require.NoError(t, bkt.Delete(ctx, path.Join(b2.ULID.String(), block.MetaFilename))) @@ -196,7 +196,7 @@ func TestUpload(t *testing.T) { t.Run("full block", func(t *testing.T) { require.NoError(t, block.Upload(ctx, log.NewNopLogger(), bkt, path.Join(tmpDir, b1.ULID.String()))) - require.Equal(t, 4, len(bkt.Objects())) + require.Equal(t, 9, len(bkt.Objects())) objs := bkt.Objects() require.Contains(t, objs, path.Join(b1.ULID.String(), block.MetaFilename)) require.Contains(t, objs, path.Join(b1.ULID.String(), block.IndexFilename)) @@ -205,7 +205,7 @@ func TestUpload(t *testing.T) { t.Run("upload is idempotent", func(t *testing.T) { require.NoError(t, block.Upload(ctx, log.NewNopLogger(), bkt, path.Join(tmpDir, b1.ULID.String()))) - require.Equal(t, 4, len(bkt.Objects())) + require.Equal(t, 9, len(bkt.Objects())) objs := bkt.Objects() require.Contains(t, objs, path.Join(b1.ULID.String(), block.MetaFilename)) require.Contains(t, objs, path.Join(b1.ULID.String(), block.IndexFilename)) @@ -355,7 +355,7 @@ func TestUploadCleanup(t *testing.T) { require.ErrorIs(t, uploadErr, errUploadFailed) // If upload of meta.json fails, nothing is cleaned up. - require.Equal(t, 4, len(bkt.Objects())) + require.Equal(t, 9, len(bkt.Objects())) require.Greater(t, len(bkt.Objects()[path.Join(b1.String(), block.IndexFilename)]), 0) require.Greater(t, len(bkt.Objects()[path.Join(b1.String(), block.MetaFilename)]), 0) } diff --git a/pkg/phlaredb/compact.go b/pkg/phlaredb/compact.go index 548b6e9197..844eedd14f 100644 --- a/pkg/phlaredb/compact.go +++ b/pkg/phlaredb/compact.go @@ -100,7 +100,7 @@ func CompactWithSplitting(ctx context.Context, opts CompactWithSplittingOpts) ( srcMetas[i] = b.Meta() } - symbolsCompactor := newSymbolsCompactor(opts.Dst) + symbolsCompactor := newSymbolsCompactor(opts.Dst, symdb.FormatV2) defer runutil.CloseWithLogOnErr(util.Logger, symbolsCompactor, "close symbols compactor") outMeta := compactMetas(srcMetas...) @@ -725,6 +725,7 @@ func (it *dedupeProfileRowIterator) Next() bool { } type symbolsCompactor struct { + version symdb.FormatVersion rewriters map[BlockReader]*symdb.Rewriter w *symdb.SymDB stacktraces []uint32 @@ -733,10 +734,27 @@ type symbolsCompactor struct { flushed bool } -func newSymbolsCompactor(path string) *symbolsCompactor { +func newSymbolsCompactor(path string, version symdb.FormatVersion) *symbolsCompactor { + if version == symdb.FormatV3 { + return &symbolsCompactor{ + version: version, + w: symdb.NewSymDB(symdb.DefaultConfig(). + WithVersion(symdb.FormatV3). + WithDirectory(path)), + dst: path, + rewriters: make(map[BlockReader]*symdb.Rewriter), + } + } + dst := filepath.Join(path, symdb.DefaultDirName) return &symbolsCompactor{ - w: symdb.NewSymDB(symdb.DefaultConfig().WithDirectory(path)), - dst: path, + version: symdb.FormatV2, + w: symdb.NewSymDB(symdb.DefaultConfig(). + WithVersion(symdb.FormatV2). + WithDirectory(dst). + WithParquetConfig(symdb.ParquetConfig{ + MaxBufferRowCount: defaultParquetConfig.MaxBufferRowCount, + })), + dst: dst, rewriters: make(map[BlockReader]*symdb.Rewriter), } } @@ -767,9 +785,13 @@ func (s *symbolsRewriter) Close() (uint64, error) { if err := s.symbolsCompactor.Flush(); err != nil { return 0, err } - dst := filepath.Join(s.dst, symdb.DefaultFileName) - src := filepath.Join(s.symbolsCompactor.dst, symdb.DefaultFileName) - return s.numSamples, util.CopyFile(src, dst) + if s.version == symdb.FormatV3 { + dst := filepath.Join(s.dst, symdb.DefaultFileName) + src := filepath.Join(s.symbolsCompactor.dst, symdb.DefaultFileName) + return s.numSamples, util.CopyFile(src, dst) + } else { + return s.numSamples, util.CopyDir(s.symbolsCompactor.dst, filepath.Join(s.dst, symdb.DefaultDirName)) + } } func (s *symbolsCompactor) ReWriteRow(profile profileRow) (uint64, error) { @@ -811,7 +833,10 @@ func (s *symbolsCompactor) Flush() error { } func (s *symbolsCompactor) Close() error { - return os.RemoveAll(filepath.Join(s.dst, symdb.DefaultFileName)) + if s.version == symdb.FormatV3 { + return os.RemoveAll(filepath.Join(s.dst, symdb.DefaultFileName)) + } + return os.RemoveAll(s.dst) } func (s *symbolsCompactor) loadStacktracesID(values []parquet.Value) { diff --git a/pkg/phlaredb/compact_test.go b/pkg/phlaredb/compact_test.go index 31a20b4707..5912506255 100644 --- a/pkg/phlaredb/compact_test.go +++ b/pkg/phlaredb/compact_test.go @@ -27,6 +27,7 @@ import ( phlarecontext "github.com/grafana/pyroscope/pkg/phlare/context" "github.com/grafana/pyroscope/pkg/phlaredb/block" "github.com/grafana/pyroscope/pkg/phlaredb/sharding" + "github.com/grafana/pyroscope/pkg/phlaredb/symdb" "github.com/grafana/pyroscope/pkg/phlaredb/tsdb/index" "github.com/grafana/pyroscope/pkg/pprof/testhelper" ) @@ -197,7 +198,7 @@ func TestCompactWithSplitting(t *testing.T) { }) require.NoError(t, err) - require.NoFileExists(t, dst) + require.NoDirExists(t, filepath.Join(dst, symdb.DefaultDirName)) // 4 shards one per series. require.Equal(t, 4, len(compacted)) @@ -627,10 +628,15 @@ func TestFlushMeta(t *testing.T) { require.Equal(t, uint64(3), b.Meta().Stats.NumSeries) require.Equal(t, uint64(3), b.Meta().Stats.NumSamples) require.Equal(t, uint64(3), b.Meta().Stats.NumProfiles) - require.Len(t, b.Meta().Files, 3) + require.Len(t, b.Meta().Files, 8) require.Equal(t, "index.tsdb", b.Meta().Files[0].RelPath) require.Equal(t, "profiles.parquet", b.Meta().Files[1].RelPath) - require.Equal(t, "symbols.symdb", b.Meta().Files[2].RelPath) + require.Equal(t, "symbols/functions.parquet", b.Meta().Files[2].RelPath) + require.Equal(t, "symbols/index.symdb", b.Meta().Files[3].RelPath) + require.Equal(t, "symbols/locations.parquet", b.Meta().Files[4].RelPath) + require.Equal(t, "symbols/mappings.parquet", b.Meta().Files[5].RelPath) + require.Equal(t, "symbols/stacktraces.symdb", b.Meta().Files[6].RelPath) + require.Equal(t, "symbols/strings.parquet", b.Meta().Files[7].RelPath) } func newBlock(t testing.TB, generator func() []*testhelper.ProfileBuilder) *singleBlockQuerier { diff --git a/pkg/phlaredb/head.go b/pkg/phlaredb/head.go index 76107eab9b..9db3440d5b 100644 --- a/pkg/phlaredb/head.go +++ b/pkg/phlaredb/head.go @@ -124,7 +124,13 @@ func NewHead(phlarectx context.Context, cfg Config, limiter TenantLimiter) (*Hea } } - h.symdb = symdb.NewSymDB(symdb.DefaultConfig().WithDirectory(h.headPath)) + h.symdb = symdb.NewSymDB(symdb.DefaultConfig(). + WithVersion(symdb.FormatV2). + WithDirectory(filepath.Join(h.headPath, symdb.DefaultDirName)). + WithParquetConfig(symdb.ParquetConfig{ + MaxBufferRowCount: h.parquetConfig.MaxBufferRowCount, + })) + h.wg.Add(1) go h.loop() diff --git a/pkg/phlaredb/head_test.go b/pkg/phlaredb/head_test.go index 2d36950588..7f2c5cbd5b 100644 --- a/pkg/phlaredb/head_test.go +++ b/pkg/phlaredb/head_test.go @@ -370,8 +370,40 @@ func TestHeadFlush(t *testing.T) { }, }, { - RelPath: "symbols.symdb", - SizeBytes: 159687, + RelPath: "symbols/functions.parquet", + Parquet: &block.ParquetFile{ + NumRowGroups: 2, + NumRows: 1423, + }, + }, + { + RelPath: "symbols/index.symdb", + SizeBytes: 308, + }, + { + RelPath: "symbols/locations.parquet", + Parquet: &block.ParquetFile{ + NumRowGroups: 2, + NumRows: 2469, + }, + }, + { + RelPath: "symbols/mappings.parquet", + Parquet: &block.ParquetFile{ + NumRowGroups: 2, + NumRows: 3, + }, + }, + { + RelPath: "symbols/stacktraces.symdb", + SizeBytes: 60366, + }, + { + RelPath: "symbols/strings.parquet", + Parquet: &block.ParquetFile{ + NumRowGroups: 2, + NumRows: 1722, + }, }, }, Compaction: block.BlockMetaCompaction{ diff --git a/pkg/phlaredb/symdb/symdb.go b/pkg/phlaredb/symdb/symdb.go index c5df560473..b6103da5d3 100644 --- a/pkg/phlaredb/symdb/symdb.go +++ b/pkg/phlaredb/symdb/symdb.go @@ -132,6 +132,11 @@ func (c *Config) WithParquetConfig(pc ParquetConfig) *Config { return c } +func (c *Config) WithVersion(v FormatVersion) *Config { + c.Version = v + return c +} + func NewSymDB(c *Config) *SymDB { if c == nil { c = DefaultConfig() From e8e7fc17959ccd2f4d1e4fcfcd5f5eb14e4553c8 Mon Sep 17 00:00:00 2001 From: Anton Kolesnikov Date: Mon, 13 May 2024 17:38:42 +0800 Subject: [PATCH 33/36] fix metadata --- pkg/phlaredb/head.go | 21 ++++++++++++++++----- pkg/phlaredb/phlaredb.go | 6 +++++- pkg/phlaredb/symdb/symdb.go | 10 +++++++--- 3 files changed, 28 insertions(+), 9 deletions(-) diff --git a/pkg/phlaredb/head.go b/pkg/phlaredb/head.go index 9db3440d5b..748246dc78 100644 --- a/pkg/phlaredb/head.go +++ b/pkg/phlaredb/head.go @@ -124,12 +124,19 @@ func NewHead(phlarectx context.Context, cfg Config, limiter TenantLimiter) (*Hea } } - h.symdb = symdb.NewSymDB(symdb.DefaultConfig(). - WithVersion(symdb.FormatV2). - WithDirectory(filepath.Join(h.headPath, symdb.DefaultDirName)). - WithParquetConfig(symdb.ParquetConfig{ + symdbConfig := symdb.DefaultConfig() + if cfg.SymDBFormat == symdb.FormatV3 { + symdbConfig.Version = symdb.FormatV3 + symdbConfig.Dir = h.headPath + } else { + symdbConfig.Version = symdb.FormatV2 + symdbConfig.Dir = filepath.Join(h.headPath, symdb.DefaultDirName) + symdbConfig.Parquet = symdb.ParquetConfig{ MaxBufferRowCount: h.parquetConfig.MaxBufferRowCount, - })) + } + } + + h.symdb = symdb.NewSymDB(symdbConfig) h.wg.Add(1) go h.loop() @@ -563,6 +570,10 @@ func (h *Head) flush(ctx context.Context) error { return errors.Wrap(err, "flushing symdb") } for _, file := range h.symdb.Files() { + // Files' path is relative to the symdb dir. + if h.symdb.FormatVersion() == symdb.FormatV2 { + file.RelPath = filepath.Join(symdb.DefaultDirName, file.RelPath) + } files = append(files, file) blockSize += file.SizeBytes h.metrics.flushedFileSizeBytes.WithLabelValues(file.RelPath).Observe(float64(file.SizeBytes)) diff --git a/pkg/phlaredb/phlaredb.go b/pkg/phlaredb/phlaredb.go index a503c7769e..a204b4335b 100644 --- a/pkg/phlaredb/phlaredb.go +++ b/pkg/phlaredb/phlaredb.go @@ -31,6 +31,7 @@ import ( phlareobj "github.com/grafana/pyroscope/pkg/objstore" phlarecontext "github.com/grafana/pyroscope/pkg/phlare/context" "github.com/grafana/pyroscope/pkg/phlaredb/block" + "github.com/grafana/pyroscope/pkg/phlaredb/symdb" "github.com/grafana/pyroscope/pkg/util" ) @@ -49,7 +50,10 @@ type Config struct { // TODO: docs RowGroupTargetSize uint64 `yaml:"row_group_target_size"` - Parquet *ParquetConfig `yaml:"-"` // Those configs should not be exposed to the user, rather they should be determined by pyroscope itself. Currently, they are solely used for test cases. + // Those configs should not be exposed to the user, rather they should be determined by pyroscope itself. + // Currently, they are solely used for test cases. + Parquet *ParquetConfig `yaml:"-"` + SymDBFormat symdb.FormatVersion `yaml:"-"` MinFreeDisk uint64 `yaml:"min_free_disk_gb"` MinDiskAvailablePercentage float64 `yaml:"min_disk_available_percentage"` diff --git a/pkg/phlaredb/symdb/symdb.go b/pkg/phlaredb/symdb/symdb.go index b6103da5d3..def5882753 100644 --- a/pkg/phlaredb/symdb/symdb.go +++ b/pkg/phlaredb/symdb/symdb.go @@ -63,7 +63,7 @@ type StacktraceInserter interface { } type SymDB struct { - config *Config + config Config writer blockWriter stats MemoryStats @@ -142,7 +142,7 @@ func NewSymDB(c *Config) *SymDB { c = DefaultConfig() } db := &SymDB{ - config: c, + config: *c, partitions: make(map[uint64]*PartitionWriter), stop: make(chan struct{}), } @@ -150,7 +150,7 @@ func NewSymDB(c *Config) *SymDB { case FormatV3: db.writer = newWriterV3(c) default: - c.Version = FormatV2 + db.config.Version = FormatV2 db.writer = newWriterV2(c) } db.wg.Add(1) @@ -285,3 +285,7 @@ func (s *SymDB) Flush() error { func (s *SymDB) Files() []block.File { return s.writer.meta() } + +func (s *SymDB) FormatVersion() FormatVersion { + return s.config.Version +} From 1a10f6fc94ec97e96af1e4b2c1f5b6cb606a8e56 Mon Sep 17 00:00:00 2001 From: Anton Kolesnikov Date: Mon, 13 May 2024 17:41:05 +0800 Subject: [PATCH 34/36] remove file writer base parameter --- pkg/phlaredb/symdb/block_writer.go | 6 +++--- pkg/phlaredb/symdb/format.go | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pkg/phlaredb/symdb/block_writer.go b/pkg/phlaredb/symdb/block_writer.go index bf31dd9216..28f7a3d44c 100644 --- a/pkg/phlaredb/symdb/block_writer.go +++ b/pkg/phlaredb/symdb/block_writer.go @@ -29,7 +29,7 @@ func newFileWriter(path string) (*fileWriter, error) { // There is no particular reason to use // a buffer larger than the default 4K. b := bufio.NewWriterSize(f, 4096) - w := withWriterOffset(b, 0) + w := withWriterOffset(b) fw := fileWriter{ path: path, buf: b, @@ -71,8 +71,8 @@ type writerOffset struct { err error } -func withWriterOffset(w io.Writer, base int64) *writerOffset { - return &writerOffset{Writer: w, offset: base} +func withWriterOffset(w io.Writer) *writerOffset { + return &writerOffset{Writer: w} } func (w *writerOffset) write(p []byte) { diff --git a/pkg/phlaredb/symdb/format.go b/pkg/phlaredb/symdb/format.go index 7afb72bcac..43a23196c2 100644 --- a/pkg/phlaredb/symdb/format.go +++ b/pkg/phlaredb/symdb/format.go @@ -261,7 +261,7 @@ func (h *PartitionHeaders) Size() int64 { } func (h *PartitionHeaders) MarshalV3To(dst io.Writer) (_ int64, err error) { - w := withWriterOffset(dst, 0) + w := withWriterOffset(dst) buf := make([]byte, 4, 128) binary.BigEndian.PutUint32(buf, uint32(len(*h))) w.write(buf) @@ -274,7 +274,7 @@ func (h *PartitionHeaders) MarshalV3To(dst io.Writer) (_ int64, err error) { } func (h *PartitionHeaders) MarshalV2To(dst io.Writer) (_ int64, err error) { - w := withWriterOffset(dst, 0) + w := withWriterOffset(dst) buf := make([]byte, 4, 128) binary.BigEndian.PutUint32(buf, uint32(len(*h))) w.write(buf) @@ -658,7 +658,7 @@ func (f *IndexFile) dataOffset() int { func (f *IndexFile) WriteTo(dst io.Writer) (n int64, err error) { checksum := crc32.New(castagnoli) - w := withWriterOffset(io.MultiWriter(dst, checksum), 0) + w := withWriterOffset(io.MultiWriter(dst, checksum)) if _, err = w.Write(f.Header.MarshalBinary()); err != nil { return w.offset, fmt.Errorf("header write: %w", err) } From 309866ec5b7a57157ca3c8be94971d81b28415ef Mon Sep 17 00:00:00 2001 From: Anton Kolesnikov Date: Mon, 13 May 2024 20:49:17 +0800 Subject: [PATCH 35/36] fixes --- pkg/phlaredb/symdb/format.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pkg/phlaredb/symdb/format.go b/pkg/phlaredb/symdb/format.go index 43a23196c2..d7be2ef672 100644 --- a/pkg/phlaredb/symdb/format.go +++ b/pkg/phlaredb/symdb/format.go @@ -457,11 +457,11 @@ type SymbolsBlockHeader struct { Length uint32 // BlockSize denotes the number of items per block. BlockSize uint32 - // BlockSize denotes the encoder block header size in bytes. + // BlockHeaderSize denotes the encoder block header size in bytes. // This enables forward compatibility within the same format version: - // as long as fields are not removed, or reordered, and the encoding - // scheme does not change, the format can be extended with no change - // of the format version. Decoder is able to read the whole header and + // as long as fields are not removed or reordered, and the encoding + // scheme does not change, the format can be extended without updating + // the format version. Decoder is able to read the whole header and // skip unknown fields. BlockHeaderSize uint16 // Format of the encoded data. @@ -490,7 +490,7 @@ func readSymbolsBlockHeader(buf []byte, r io.Reader, v headerUnmarshaler) error } v.unmarshal(buf) if crc32.Checksum(buf[:len(buf)-checksumSize], castagnoli) != v.checksum() { - return ErrInvalidSize + return ErrInvalidCRC } return nil } From 22c896aaf8bf9f9af42ac9a94afcae65f02e88bb Mon Sep 17 00:00:00 2001 From: Anton Kolesnikov Date: Wed, 26 Jun 2024 11:49:40 +0800 Subject: [PATCH 36/36] resolve post-merge conflicts --- pkg/phlaredb/schemas/v1/functions.go | 2 +- pkg/phlaredb/schemas/v1/locations.go | 4 ++-- pkg/phlaredb/schemas/v1/mappings.go | 6 +++--- pkg/phlaredb/schemas/v1/schema_test.go | 2 +- pkg/phlaredb/symdb/block_reader.go | 6 +++--- pkg/phlaredb/symdb/block_reader_parquet.go | 2 +- pkg/phlaredb/symdb/block_writer_v2.go | 2 +- 7 files changed, 12 insertions(+), 12 deletions(-) diff --git a/pkg/phlaredb/schemas/v1/functions.go b/pkg/phlaredb/schemas/v1/functions.go index 6bd6e7e3e3..bdaac2f22e 100644 --- a/pkg/phlaredb/schemas/v1/functions.go +++ b/pkg/phlaredb/schemas/v1/functions.go @@ -14,7 +14,7 @@ func (FunctionPersister) Name() string { return "functions" } func (FunctionPersister) Schema() *parquet.Schema { return functionsSchema } -func (FunctionPersister) Deconstruct(row parquet.Row, _ uint64, fn InMemoryFunction) parquet.Row { +func (FunctionPersister) Deconstruct(row parquet.Row, fn InMemoryFunction) parquet.Row { if cap(row) < 5 { row = make(parquet.Row, 0, 5) } diff --git a/pkg/phlaredb/schemas/v1/locations.go b/pkg/phlaredb/schemas/v1/locations.go index 748d82389c..f04056fc44 100644 --- a/pkg/phlaredb/schemas/v1/locations.go +++ b/pkg/phlaredb/schemas/v1/locations.go @@ -59,7 +59,7 @@ func (LocationPersister) Deconstruct(row parquet.Row, loc InMemoryLocation) parq return row } -func (LocationPersister) Reconstruct(row parquet.Row) (*InMemoryLocation, error) { +func (LocationPersister) Reconstruct(row parquet.Row) (InMemoryLocation, error) { loc := InMemoryLocation{ Id: row[0].Uint64(), MappingId: uint32(row[1].Uint64()), @@ -74,7 +74,7 @@ func (LocationPersister) Reconstruct(row parquet.Row) (*InMemoryLocation, error) for i, v := range lines[len(lines)/2:] { loc.Line[i].Line = int32(v.Uint64()) } - return &loc, nil + return loc, nil } type InMemoryLocation struct { diff --git a/pkg/phlaredb/schemas/v1/mappings.go b/pkg/phlaredb/schemas/v1/mappings.go index 8c06c61dbf..c8220dcbc6 100644 --- a/pkg/phlaredb/schemas/v1/mappings.go +++ b/pkg/phlaredb/schemas/v1/mappings.go @@ -14,7 +14,7 @@ func (MappingPersister) Name() string { return "mappings" } func (MappingPersister) Schema() *parquet.Schema { return mappingsSchema } -func (MappingPersister) Deconstruct(row parquet.Row, m *InMemoryMapping) parquet.Row { +func (MappingPersister) Deconstruct(row parquet.Row, m InMemoryMapping) parquet.Row { if cap(row) < 10 { row = make(parquet.Row, 0, 10) } @@ -32,7 +32,7 @@ func (MappingPersister) Deconstruct(row parquet.Row, m *InMemoryMapping) parquet return row } -func (MappingPersister) Reconstruct(row parquet.Row) (*InMemoryMapping, error) { +func (MappingPersister) Reconstruct(row parquet.Row) (InMemoryMapping, error) { mapping := InMemoryMapping{ Id: row[0].Uint64(), MemoryStart: row[1].Uint64(), @@ -45,7 +45,7 @@ func (MappingPersister) Reconstruct(row parquet.Row) (*InMemoryMapping, error) { HasLineNumbers: row[8].Boolean(), HasInlineFrames: row[9].Boolean(), } - return &mapping, nil + return mapping, nil } type InMemoryMapping struct { diff --git a/pkg/phlaredb/schemas/v1/schema_test.go b/pkg/phlaredb/schemas/v1/schema_test.go index de63dc3fe5..8573471c6e 100644 --- a/pkg/phlaredb/schemas/v1/schema_test.go +++ b/pkg/phlaredb/schemas/v1/schema_test.go @@ -428,7 +428,7 @@ func (*pprofMappingPersister) Reconstruct(row parquet.Row) (*profilev1.Mapping, return &m, nil } -type ReadWriter[T any, P Persister[T]] struct {} +type ReadWriter[T any, P Persister[T]] struct{} func (r *ReadWriter[T, P]) WriteParquetFile(file io.Writer, elements []T) error { var ( diff --git a/pkg/phlaredb/symdb/block_reader.go b/pkg/phlaredb/symdb/block_reader.go index cd962b3a06..8561b71c07 100644 --- a/pkg/phlaredb/symdb/block_reader.go +++ b/pkg/phlaredb/symdb/block_reader.go @@ -403,14 +403,14 @@ func (p *partition) ResolveStacktraceLocations(ctx context.Context, dst Stacktra } func (p *partition) SplitStacktraceIDRanges(appender *SampleAppender) iter.Iterator[*StacktraceIDRange] { - if len(p.stacktraceChunks) == 0 { + if len(p.stacktraces) == 0 { return iter.NewEmptyIterator[*StacktraceIDRange]() } var n int samples := appender.Samples() - ranges := SplitStacktraces(samples.StacktraceIDs, p.stacktraceChunks[0].header.StacktraceMaxNodes) + ranges := SplitStacktraces(samples.StacktraceIDs, p.stacktraces[0].header.StacktraceMaxNodes) for _, sr := range ranges { - c := p.stacktraceChunks[sr.chunk] + c := p.stacktraces[sr.chunk] sr.ParentPointerTree = c.t sr.Samples = samples.Range(n, n+len(sr.IDs)) n += len(sr.IDs) diff --git a/pkg/phlaredb/symdb/block_reader_parquet.go b/pkg/phlaredb/symdb/block_reader_parquet.go index 90ba014b85..ec97c3f9c1 100644 --- a/pkg/phlaredb/symdb/block_reader_parquet.go +++ b/pkg/phlaredb/symdb/block_reader_parquet.go @@ -88,7 +88,7 @@ func (t *parquetTable[M, P]) readRows(dst []M, buf []parquet.Row, rows parquet.R if i == len(dst) { return nil } - _, v, err := t.persister.Reconstruct(row) + v, err := t.persister.Reconstruct(row) if err != nil { return err } diff --git a/pkg/phlaredb/symdb/block_writer_v2.go b/pkg/phlaredb/symdb/block_writer_v2.go index 40dd149dbf..6d675fe8d4 100644 --- a/pkg/phlaredb/symdb/block_writer_v2.go +++ b/pkg/phlaredb/symdb/block_writer_v2.go @@ -269,7 +269,7 @@ func (s *parquetWriter[M, P]) fillBatch(values []M) int { s.rowsBatch = s.rowsBatch[:m] for i := 0; i < m; i++ { row := s.rowsBatch[i][:0] - s.rowsBatch[i] = s.persister.Deconstruct(row, 0, values[i]) + s.rowsBatch[i] = s.persister.Deconstruct(row, values[i]) } return m }