From c5cb8f2d660671680e4bbfaf944872471c0424f7 Mon Sep 17 00:00:00 2001
From: Anton Kolesnikov <anton.e.kolesnikov@gmail.com>
Date: Mon, 18 Mar 2024 19:11:58 +0800
Subject: [PATCH 01/36] feat: strings encoding

---
 pkg/phlaredb/symdb/strings_encoding.go      | 229 ++++++++++++++++++++
 pkg/phlaredb/symdb/strings_encoding_test.go |  95 ++++++++
 pkg/slices/slices.go                        |  11 +-
 3 files changed, 330 insertions(+), 5 deletions(-)
 create mode 100644 pkg/phlaredb/symdb/strings_encoding.go
 create mode 100644 pkg/phlaredb/symdb/strings_encoding_test.go

diff --git a/pkg/phlaredb/symdb/strings_encoding.go b/pkg/phlaredb/symdb/strings_encoding.go
new file mode 100644
index 0000000000..d4312f8e8a
--- /dev/null
+++ b/pkg/phlaredb/symdb/strings_encoding.go
@@ -0,0 +1,229 @@
+package symdb
+
+import (
+	"encoding/binary"
+	"fmt"
+	"io"
+	"unsafe"
+
+	"github.com/grafana/pyroscope/pkg/slices"
+)
+
+// Almost all strings in profiles are very short, their length fits 8 bits.
+// Strings larger than 65536 are not expected and are getting truncated.
+// Typically, there are only 1-10 strings longer than 256 in a data set
+// consisting of a few dozens of thousands of strings.
+//
+// A traditional var length encoding is rather wasteful in our case.
+// Instead, we split the strings into blocks and use encoding that depends
+// on the maximum length of the strings in the block.
+//
+// The output data starts with a header: number of strings, block size,
+// number of blocks, and the block encoding map. In the map, each byte
+// specifies the number of bits needed to decode the maximum value from
+// that block, rounded up to the next power of two. Currently, the length
+// value is either 8 bits or 16.
+//
+// Blocks of data follow after the header. Each block includes two parts:
+// strings lengths array and strings data.
+
+const maxStringLen = 1<<16 - 1
+
+type StringsEncoder struct {
+	w         io.Writer
+	blockSize int
+	blocks    []byte
+	buf       []byte
+}
+
+func NewStringsEncoder(w io.Writer) *StringsEncoder { return &StringsEncoder{w: w} }
+
+func (e *StringsEncoder) WriteStrings(strings []string) error {
+	if e.blockSize == 0 {
+		e.blockSize = 1 << 10 // 1k strings per block by default.
+	}
+	nb := (len(strings) + e.blockSize - 1) / e.blockSize
+	e.blocks = slices.GrowLen(e.blocks, nb)
+	var offset uint32
+	var bi int
+	l := uint32(len(strings))
+	for offset < l {
+		lo := offset
+		hi := offset + uint32(e.blockSize)
+		if x := uint32(len(strings)); hi > x {
+			hi = x
+		}
+		e.blocks[bi] = e.blockEncoding(strings[lo:hi])
+		offset = hi
+		bi++
+	}
+	if err := e.writeHeader(strings); err != nil {
+		return err
+	}
+	// Next we write string lengths and values in blocks.
+	e.buf = slices.GrowLen(e.buf, e.blockSize*2) // Up to 2 bytes per string.
+	for i, b := range e.blocks {
+		// e.buf = e.buf[:0]
+		lo := i * e.blockSize
+		hi := lo + e.blockSize
+		if x := len(strings); hi > x {
+			hi = x
+		}
+		bs := strings[lo:hi]
+		switch b {
+		case 8:
+			for j, s := range bs {
+				e.buf[j] = byte(len(s))
+			}
+		case 16:
+			for j, s := range bs {
+				// binary.LittleEndian.PutUint16.
+				e.buf[j*2] = byte(len(s))
+				e.buf[j*2+1] = byte(len(s) >> 8)
+			}
+		default:
+			panic("bug: unexpected block size")
+		}
+		if _, err := e.w.Write(e.buf[:len(bs)*int(b)/8]); err != nil {
+			return err
+		}
+		for _, s := range bs {
+			if len(s) > maxStringLen {
+				s = s[:maxStringLen]
+			}
+			if _, err := e.w.Write(*((*[]byte)(unsafe.Pointer(&s)))); err != nil {
+				return err
+			}
+		}
+	}
+	return nil
+}
+
+func (e *StringsEncoder) writeHeader(strings []string) (err error) {
+	e.buf = slices.GrowLen(e.buf, 12)
+	binary.LittleEndian.PutUint32(e.buf[0:4], uint32(len(strings)))
+	binary.LittleEndian.PutUint32(e.buf[4:8], uint32(e.blockSize))
+	binary.LittleEndian.PutUint32(e.buf[8:12], uint32(len(e.blocks)))
+	if _, err = e.w.Write(e.buf); err != nil {
+		return err
+	}
+	_, err = e.w.Write(e.blocks)
+	return err
+}
+
+func (e *StringsEncoder) blockEncoding(b []string) byte {
+	var x uint16
+	for _, s := range b {
+		x |= uint16(len(s)) >> 8
+	}
+	if x > 0 {
+		return 16
+	}
+	return 8
+}
+
+func (e *StringsEncoder) Reset() {
+	e.buf = e.buf[:0]
+	e.blocks = e.blocks[:0]
+	e.blockSize = 0
+	e.w = nil
+}
+
+type StringsDecoder struct {
+	r          io.Reader
+	stringsLen uint32
+	blocksLen  uint32
+	blockSize  uint32
+	blocks     []byte
+	buf        []byte
+}
+
+func NewStringsDecoder(r io.Reader) *StringsDecoder { return &StringsDecoder{r: r} }
+
+func (d *StringsDecoder) readHeader() (err error) {
+	d.buf = slices.GrowLen(d.buf, 12)
+	if _, err = io.ReadFull(d.r, d.buf); err != nil {
+		return err
+	}
+	d.stringsLen = binary.LittleEndian.Uint32(d.buf[0:4])
+	d.blockSize = binary.LittleEndian.Uint32(d.buf[4:8])
+	d.blocksLen = binary.LittleEndian.Uint32(d.buf[8:12])
+	// Sanity checks are needed as we process the stream data
+	// before verifying the check sum.
+	if d.blocksLen > 1<<20 || d.stringsLen > 1<<20 || d.blockSize > 1<<20 {
+		return fmt.Errorf("malformed header")
+	}
+	d.blocks = slices.GrowLen(d.blocks, int(d.blocksLen))
+	_, err = io.ReadFull(d.r, d.blocks)
+	return err
+}
+
+func (d *StringsDecoder) StringsLen() (int, error) {
+	if err := d.readHeader(); err != nil {
+		return 0, err
+	}
+	return int(d.stringsLen), nil
+}
+
+func (d *StringsDecoder) ReadStrings(dst []string) (err error) {
+	for i := 0; i < len(d.blocks); i++ {
+		bs := d.blockSize
+		if i == len(d.blocks)-1 && d.stringsLen%d.blockSize > 0 {
+			bs = d.stringsLen % d.blockSize
+		}
+		switch d.blocks[i] {
+		case 8:
+			err = d.readStrings8(i, int(bs), dst)
+		case 16:
+			err = d.readStrings16(i, int(bs), dst)
+		default:
+			err = fmt.Errorf("unknown block encoding")
+		}
+		if err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func (d *StringsDecoder) readStrings8(idx, length int, dst []string) (err error) {
+	d.buf = slices.GrowLen(d.buf, length) // 1 byte per string.
+	if _, err = io.ReadFull(d.r, d.buf); err != nil {
+		return err
+	}
+	offset := int(d.blockSize) * idx
+	for i, l := range d.buf {
+		s := make([]byte, l) // Up to 256 bytes.
+		if _, err = io.ReadFull(d.r, s); err != nil {
+			return err
+		}
+		dst[offset+i] = *(*string)(unsafe.Pointer(&s))
+	}
+	return err
+}
+
+func (d *StringsDecoder) readStrings16(idx, length int, dst []string) (err error) {
+	d.buf = slices.GrowLen(d.buf, length*2) // 2 bytes per string.
+	if _, err = io.ReadFull(d.r, d.buf); err != nil {
+		return err
+	}
+	offset := int(d.blockSize) * idx
+	for i := 0; i < len(d.buf); i += 2 {
+		l := uint16(d.buf[i]) | uint16(d.buf[i+1])<<8
+		s := make([]byte, l) // Up to 65536 bytes.
+		if _, err = io.ReadFull(d.r, s); err != nil {
+			return err
+		}
+		dst[offset+i/2] = *(*string)(unsafe.Pointer(&s))
+	}
+	return err
+}
+
+func (d *StringsDecoder) Reset() {
+	d.buf = d.buf[:0]
+	d.blocks = d.blocks[:0]
+	d.blockSize = 0
+	d.blocksLen = 0
+	d.stringsLen = 0
+	d.r = nil
+}
diff --git a/pkg/phlaredb/symdb/strings_encoding_test.go b/pkg/phlaredb/symdb/strings_encoding_test.go
new file mode 100644
index 0000000000..519f719df8
--- /dev/null
+++ b/pkg/phlaredb/symdb/strings_encoding_test.go
@@ -0,0 +1,95 @@
+package symdb
+
+import (
+	"bufio"
+	"bytes"
+	"strings"
+	"testing"
+
+	"github.com/stretchr/testify/require"
+)
+
+func Test_StringsEncoding(t *testing.T) {
+	type testCase struct {
+		description string
+		strings     []string
+		blockSize   int
+	}
+
+	testCases := []testCase{
+		{
+			description: "empty",
+			strings:     []string{},
+		},
+		{
+			description: "less than block size",
+			strings: []string{
+				"a",
+				"b",
+			},
+			blockSize: 4,
+		},
+		{
+			description: "exact block size",
+			strings: []string{
+				"a",
+				"bc",
+				"cde",
+				"def",
+			},
+			blockSize: 4,
+		},
+		{
+			description: "greater than block size",
+			strings: []string{
+				"a",
+				"bc",
+				"cde",
+				"def",
+				"e",
+			},
+			blockSize: 4,
+		},
+		{
+			description: "mixed encoding",
+			strings: []string{
+				"a",
+				"bcd",
+				strings.Repeat("e", 256),
+			},
+			blockSize: 4,
+		},
+		{
+			description: "mixed encoding exact block",
+			strings: []string{
+				"a",
+				"b",
+				"c",
+				"d",
+				strings.Repeat("e", 256),
+				strings.Repeat("f", 256),
+				strings.Repeat("j", 256),
+				strings.Repeat("h", 256),
+			},
+			blockSize: 4,
+		},
+	}
+
+	for _, tc := range testCases {
+		tc := tc
+		t.Run(tc.description, func(t *testing.T) {
+			var output bytes.Buffer
+			e := NewStringsEncoder(&output)
+			if tc.blockSize > 0 {
+				e.blockSize = tc.blockSize
+			}
+			require.NoError(t, e.WriteStrings(tc.strings))
+			d := NewStringsDecoder(bufio.NewReader(&output))
+			n, err := d.StringsLen()
+			require.NoError(t, err)
+			out := make([]string, n)
+			require.NoError(t, d.ReadStrings(out))
+			require.Equal(t, tc.strings, out)
+		})
+	}
+}
diff --git a/pkg/slices/slices.go b/pkg/slices/slices.go
index 70de8711f4..e574ccbb02 100644
--- a/pkg/slices/slices.go
+++ b/pkg/slices/slices.go
@@ -1,5 +1,9 @@
 package slices
 
+import (
+	"golang.org/x/exp/slices"
+)
+
 // RemoveInPlace removes all elements from a slice that match the given predicate.
 // Does not allocate a new slice.
 func RemoveInPlace[T any](collection []T, predicate func(T, int) bool) []T {
@@ -27,9 +31,6 @@ func Clear[S ~[]E, E any](s S) {
 }
 
 func GrowLen[S ~[]E, E any](s S, n int) S {
-	if cap(s) < n {
-		s = make([]E, n)
-	}
-	s = s[:n]
-	return s
+	s = s[:0]
+	return slices.Grow(s, n)[:n]
 }

From 76c31b42ba88f842395fdbd283bb31dc55eef6b2 Mon Sep 17 00:00:00 2001
From: Anton Kolesnikov <anton.e.kolesnikov@gmail.com>
Date: Wed, 20 Mar 2024 13:02:36 +0800
Subject: [PATCH 02/36] simplify string encoding heuristics

---
 .../symdb/{strings_encoding.go => encoding_strings.go}    | 8 +++-----
 ...{strings_encoding_test.go => encoding_strings_test.go} | 0
 2 files changed, 3 insertions(+), 5 deletions(-)
 rename pkg/phlaredb/symdb/{strings_encoding.go => encoding_strings.go} (98%)
 rename pkg/phlaredb/symdb/{strings_encoding_test.go => encoding_strings_test.go} (100%)

diff --git a/pkg/phlaredb/symdb/strings_encoding.go b/pkg/phlaredb/symdb/encoding_strings.go
similarity index 98%
rename from pkg/phlaredb/symdb/strings_encoding.go
rename to pkg/phlaredb/symdb/encoding_strings.go
index d4312f8e8a..ac69b33fdd 100644
--- a/pkg/phlaredb/symdb/strings_encoding.go
+++ b/pkg/phlaredb/symdb/encoding_strings.go
@@ -112,12 +112,10 @@ func (e *StringsEncoder) writeHeader(strings []string) (err error) {
 }
 
 func (e *StringsEncoder) blockEncoding(b []string) byte {
-	var x uint16
 	for _, s := range b {
-		x |= uint16(len(s)) >> 8
-	}
-	if x > 0 {
-		return 16
+		if len(s) > 255 {
+			return 16
+		}
 	}
 	return 8
 }
diff --git a/pkg/phlaredb/symdb/strings_encoding_test.go b/pkg/phlaredb/symdb/encoding_strings_test.go
similarity index 100%
rename from pkg/phlaredb/symdb/strings_encoding_test.go
rename to pkg/phlaredb/symdb/encoding_strings_test.go

From c716fa64a1dddf1284afcb387ef86002c1d0fe56 Mon Sep 17 00:00:00 2001
From: Anton Kolesnikov <anton.e.kolesnikov@gmail.com>
Date: Wed, 20 Mar 2024 20:34:21 +0800
Subject: [PATCH 03/36] locations encoding

---
 pkg/phlaredb/symdb/encoding_locations.go | 176 +++++++++++++++++++++++
 1 file changed, 176 insertions(+)
 create mode 100644 pkg/phlaredb/symdb/encoding_locations.go

diff --git a/pkg/phlaredb/symdb/encoding_locations.go b/pkg/phlaredb/symdb/encoding_locations.go
new file mode 100644
index 0000000000..bb7b6b50e5
--- /dev/null
+++ b/pkg/phlaredb/symdb/encoding_locations.go
@@ -0,0 +1,176 @@
+package symdb
+
+import (
+	"bytes"
+	"encoding/binary"
+	"io"
+	"unsafe"
+
+	"github.com/parquet-go/parquet-go/encoding/delta"
+
+	v1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1"
+	"github.com/grafana/pyroscope/pkg/slices"
+)
+
+// https://parquet.apache.org/docs/file-format/data-pages/encodings/#delta-encoding-delta_binary_packed--5
+
+type LocationsEncoder struct {
+	w io.Writer
+}
+
+type locationsBlock struct {
+	locsLen uint32
+
+	mapping  []int32
+	function []int32
+	line     []int32
+	// Optional.
+	count  []int32
+	lines  []int32
+	addr   []int64
+	folded []bool
+
+	tmp []byte
+	buf bytes.Buffer
+
+	hasLines  bool
+	hasAddr   bool
+	hasFolded bool
+}
+
+func (lb *locationsBlock) encode(w io.Writer, locations []v1.InMemoryLocation) (int64, error) {
+	lb.reset(len(locations))
+	var addr int64
+	for i, loc := range locations {
+		lb.mapping[i] = int32(loc.MappingId)
+		lb.function[i] = int32(loc.Line[0].FunctionId)
+		lb.line[i] = loc.Line[0].Line
+		lb.count[i] = int32(len(loc.Line) - 1)
+		// Append lines but the first one.
+		for j := 1; j < len(loc.Line); j++ {
+			line := loc.Line[j]
+			lb.lines = append(lb.lines, line.Line, int32(line.FunctionId))
+		}
+		addr |= int64(loc.Address)
+		lb.addr[i] = int64(loc.Address)
+		lb.hasFolded = lb.hasFolded || loc.IsFolded
+		lb.folded[i] = loc.IsFolded
+	}
+	lb.hasLines = len(lb.lines) > 0
+	lb.hasAddr = addr > 0
+	h := locationsBlockHeader{
+		LocationsLen: lb.locsLen,
+	}
+
+	var enc delta.BinaryPackedEncoding
+	lb.tmp, _ = enc.EncodeInt32(lb.tmp, lb.mapping)
+	h.MappingSize = uint32(len(lb.tmp))
+	lb.buf.Write(lb.tmp)
+	lb.tmp, _ = enc.EncodeInt32(lb.tmp, lb.function)
+	h.FunctionSize = uint32(len(lb.tmp))
+	lb.buf.Write(lb.tmp)
+	lb.tmp, _ = enc.EncodeInt32(lb.tmp, lb.line)
+	h.LineSize = uint32(len(lb.tmp))
+	lb.buf.Write(lb.tmp)
+	if lb.hasLines {
+		lb.tmp, _ = enc.EncodeInt32(lb.tmp, lb.count)
+		h.CountSize = uint32(len(lb.tmp))
+		lb.buf.Write(lb.tmp)
+		lb.tmp, _ = enc.EncodeInt32(lb.tmp, lb.lines)
+		h.LinesSize = uint32(len(lb.tmp))
+		lb.buf.Write(lb.tmp)
+	}
+	if lb.hasAddr {
+		lb.tmp, _ = enc.EncodeInt64(lb.tmp, lb.addr)
+		h.AddrSize = uint32(len(lb.tmp))
+		lb.buf.Write(lb.tmp)
+	}
+	if lb.hasFolded {
+		// TODO
+	}
+
+	lb.tmp = slices.GrowLen(lb.tmp, locationsBlockHeaderSize)
+	h.marshal(lb.tmp)
+	n, err := w.Write(lb.tmp)
+	if err != nil {
+		return int64(n), err
+	}
+	m, err := lb.buf.WriteTo(w)
+	return m + int64(n), err
+}
+
+func (lb *locationsBlock) reset(locations int) {
+	// Actual estimate is ~6 bytes per location.
+	// In a large data set, the most expensive member
+	// is FunctionID, and it's about 2 bytes per location.
+	lb.buf.Reset()
+	lb.buf.Grow(locations * 8)
+	*lb = locationsBlock{
+		locsLen: uint32(locations),
+
+		mapping:  slices.GrowLen(lb.mapping, locations),
+		function: slices.GrowLen(lb.function, locations),
+		line:     slices.GrowLen(lb.line, locations),
+
+		count:  slices.GrowLen(lb.count, locations),
+		lines:  lb.lines[:0], // Appended.
+		addr:   slices.GrowLen(lb.addr, locations),
+		folded: slices.GrowLen(lb.folded, locations),
+
+		buf: lb.buf,
+		tmp: slices.GrowLen(lb.tmp, 2*locations),
+	}
+}
+
+const locationsBlockHeaderSize = int(unsafe.Sizeof(locationsBlockHeader{}))
+
+type locationsBlockHeader struct {
+	LocationsLen uint32
+	MappingSize  uint32
+	FunctionSize uint32
+	LineSize     uint32
+	CountSize    uint32
+	LinesSize    uint32
+	AddrSize     uint32
+	IsFoldedSize uint32
+}
+
+func (h *locationsBlockHeader) marshal(b []byte) {
+	binary.LittleEndian.PutUint32(b[0:4], h.LocationsLen)
+	binary.LittleEndian.PutUint32(b[4:8], h.MappingSize)
+	binary.LittleEndian.PutUint32(b[8:12], h.FunctionSize)
+	binary.LittleEndian.PutUint32(b[12:16], h.LineSize)
+	binary.LittleEndian.PutUint32(b[16:20], h.CountSize)
+	binary.LittleEndian.PutUint32(b[20:24], h.LinesSize)
+	binary.LittleEndian.PutUint32(b[24:28], h.AddrSize)
+	binary.LittleEndian.PutUint32(b[28:32], h.IsFoldedSize)
+}
+
+func (lb *locationsBlock) locations() int { return int(lb.locsLen) }
+
+func (lb *locationsBlock) decode(locations []v1.InMemoryLocation) {
+	lines := make([]v1.InMemoryLine, len(lb.function)+len(lb.lines)/2)
+	var j int32 // Offset within the lines slice.
+	var o int32 // Offset within the encoded lines slice.
+	for i := 0; i < len(locations); i++ {
+		ll := lb.count[i] + 1
+		locations[i].Line = lines[j : j+ll]
+		locations[i].Line[0].Line = lb.line[i]
+		locations[i].Line[0].FunctionId = uint32(lb.function[i])
+		locations[i].MappingId = uint32(lb.mapping[i])
+		j += ll
+		for l := int32(1); l < ll; l++ {
+			locations[i].Line[l].FunctionId = uint32(lb.lines[o+1])
+			locations[i].Line[l].Line = lb.lines[o]
+			o += 2
+		}
+	}
+}
+
+func NewLocationsEncoder(w io.Writer) *LocationsEncoder {
+	return &LocationsEncoder{w: w}
+}
+
+func (e *LocationsEncoder) EncodeLocations(locations []v1.InMemoryLocation) error {
+	return nil
+}

From 42342157ce2ef9d1580814e5c929bde904680e35 Mon Sep 17 00:00:00 2001
From: Anton Kolesnikov <anton.e.kolesnikov@gmail.com>
Date: Thu, 21 Mar 2024 20:29:45 +0800
Subject: [PATCH 04/36] locations wire format

---
 pkg/phlaredb/symdb/encoding_locations.go | 458 +++++++++++++++++------
 pkg/phlaredb/symdb/encoding_strings.go   |  20 +-
 2 files changed, 355 insertions(+), 123 deletions(-)

diff --git a/pkg/phlaredb/symdb/encoding_locations.go b/pkg/phlaredb/symdb/encoding_locations.go
index bb7b6b50e5..87df17d7c2 100644
--- a/pkg/phlaredb/symdb/encoding_locations.go
+++ b/pkg/phlaredb/symdb/encoding_locations.go
@@ -1,8 +1,10 @@
 package symdb
 
 import (
+	"bufio"
 	"bytes"
 	"encoding/binary"
+	"fmt"
 	"io"
 	"unsafe"
 
@@ -10,167 +12,397 @@ import (
 
 	v1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1"
 	"github.com/grafana/pyroscope/pkg/slices"
+	"github.com/grafana/pyroscope/pkg/util/math"
 )
 
 // https://parquet.apache.org/docs/file-format/data-pages/encodings/#delta-encoding-delta_binary_packed--5
 
 type LocationsEncoder struct {
 	w io.Writer
+	e locationsBlockEncoder
+
+	blockSize int
+	locations int
+
+	buf []byte
+}
+
+const (
+	maxLocationLines          = 255
+	defaultLocationsBlockSize = 1 << 10
+)
+
+func NewLocationsEncoder(w io.Writer) *LocationsEncoder {
+	return &LocationsEncoder{w: w}
+}
+
+func (e *LocationsEncoder) EncodeLocations(locations []v1.InMemoryLocation) error {
+	if e.blockSize == 0 {
+		e.blockSize = defaultLocationsBlockSize
+	}
+	e.locations = len(locations)
+	if err := e.writeHeader(); err != nil {
+		return err
+	}
+	for i := 0; i < len(locations); i += e.blockSize {
+		block := locations[i:math.Min(i+e.blockSize, len(locations))]
+		if _, err := e.e.encode(e.w, block); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func (e *LocationsEncoder) writeHeader() (err error) {
+	e.buf = slices.GrowLen(e.buf, 8)
+	binary.LittleEndian.PutUint32(e.buf[0:4], uint32(e.locations))
+	binary.LittleEndian.PutUint32(e.buf[4:8], uint32(e.blockSize))
+	_, err = e.w.Write(e.buf)
+	return err
+}
+
+type LocationsDecoder struct {
+	r io.Reader
+	d locationsBlockDecoder
+
+	blockSize uint32
+	locations uint32
+
+	buf []byte
 }
 
-type locationsBlock struct {
-	locsLen uint32
+func NewLocationsDecoder(r io.Reader) *LocationsDecoder { return &LocationsDecoder{r: r} }
+
+func (d *LocationsDecoder) LocationsLen() (int, error) {
+	if err := d.readHeader(); err != nil {
+		return 0, err
+	}
+	return int(d.locations), nil
+}
 
-	mapping  []int32
-	function []int32
-	line     []int32
+func (d *LocationsDecoder) readHeader() (err error) {
+	d.buf = slices.GrowLen(d.buf, 8)
+	if _, err = io.ReadFull(d.r, d.buf); err != nil {
+		return err
+	}
+	d.locations = binary.LittleEndian.Uint32(d.buf[0:4])
+	d.blockSize = binary.LittleEndian.Uint32(d.buf[4:8])
+	// Sanity checks are needed as we process the stream data
+	// before verifying the check sum.
+	if d.locations > 1<<20 || d.blockSize > 1<<20 {
+		return ErrInvalidSize
+	}
+	return nil
+}
+
+func (d *LocationsDecoder) DecodeLocations(locations []v1.InMemoryLocation) error {
+	blocks := int((d.locations + d.blockSize - 1) / d.blockSize)
+	// It's expected that the reader is already buffered.
+	r, ok := d.r.(*bufio.Reader)
+	if !ok {
+		bufSize := int(d.blockSize * 16) // 16 bytes per location.
+		r = bufio.NewReaderSize(d.r, bufSize)
+	}
+	for i := 0; i < blocks; i++ {
+		lo := i * int(d.blockSize)
+		hi := math.Min(lo+int(d.blockSize), int(d.locations))
+		block := locations[lo:hi]
+		if err := d.d.decode(r, block); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+type locationsBlockEncoder struct {
+	header locationsBlockHeader
+
+	mapping []int32
+	// Assuming there is no locations with more than 255 lines.
+	// We could even use a nibble (4 bits), but there are locations
+	// with 10 and more functions, therefore there is a change that
+	// capacity of 2^4 is not enough in all cases.
+	lineCount []byte
+	lines     []int32
 	// Optional.
-	count  []int32
-	lines  []int32
 	addr   []int64
 	folded []bool
 
 	tmp []byte
 	buf bytes.Buffer
 
-	hasLines  bool
-	hasAddr   bool
 	hasFolded bool
 }
 
-func (lb *locationsBlock) encode(w io.Writer, locations []v1.InMemoryLocation) (int64, error) {
-	lb.reset(len(locations))
+const locationsBlockHeaderSize = int(unsafe.Sizeof(locationsBlockHeader{}))
+
+type locationsBlockHeader struct {
+	LocationsLen uint32 // Number of locations
+	MappingSize  uint32 // Size of the encoded slice of mapping_ids
+	LinesLen     uint32 // Number of lines per location
+	LinesSize    uint32 // Size of the encoded lines
+	// Optional, might be empty.
+	AddrSize     uint32 // Size of the encoded slice of addresses
+	IsFoldedSize uint32 // Size of the encoded slice of is_folded
+}
+
+// isValid reports whether the header contains sane values.
+// This is important as the block might be read before the
+// checksum validation.
+func (h *locationsBlockHeader) isValid() bool {
+	return h.LocationsLen > 0 && h.LocationsLen < 1<<20 &&
+		h.MappingSize > 0 && h.MappingSize < 1<<20 &&
+		h.LinesLen > 0 && h.LinesLen < 1<<20 &&
+		h.LinesSize > 0 && h.LinesSize < 1<<20 &&
+		h.AddrSize < 1<<20 &&
+		h.IsFoldedSize < 1<<20
+}
+
+func (h *locationsBlockHeader) marshal(b []byte) {
+	binary.LittleEndian.PutUint32(b[0:4], h.LocationsLen)
+	binary.LittleEndian.PutUint32(b[4:8], h.MappingSize)
+	binary.LittleEndian.PutUint32(b[8:12], h.LinesLen)
+	binary.LittleEndian.PutUint32(b[12:16], h.LinesSize)
+	binary.LittleEndian.PutUint32(b[16:20], h.AddrSize)
+	binary.LittleEndian.PutUint32(b[20:24], h.IsFoldedSize)
+}
+
+func (h *locationsBlockHeader) unmarshal(b []byte) {
+	h.LocationsLen = binary.LittleEndian.Uint32(b[0:4])
+	h.MappingSize = binary.LittleEndian.Uint32(b[4:8])
+	h.LinesLen = binary.LittleEndian.Uint32(b[8:12])
+	h.LinesSize = binary.LittleEndian.Uint32(b[12:16])
+	h.AddrSize = binary.LittleEndian.Uint32(b[16:20])
+	h.IsFoldedSize = binary.LittleEndian.Uint32(b[20:24])
+}
+
+func (e *locationsBlockEncoder) encode(w io.Writer, locations []v1.InMemoryLocation) (int64, error) {
+	e.initWrite(len(locations))
 	var addr int64
 	for i, loc := range locations {
-		lb.mapping[i] = int32(loc.MappingId)
-		lb.function[i] = int32(loc.Line[0].FunctionId)
-		lb.line[i] = loc.Line[0].Line
-		lb.count[i] = int32(len(loc.Line) - 1)
+		e.mapping[i] = int32(loc.MappingId)
+		e.lineCount[i] = byte(len(loc.Line))
 		// Append lines but the first one.
-		for j := 1; j < len(loc.Line); j++ {
-			line := loc.Line[j]
-			lb.lines = append(lb.lines, line.Line, int32(line.FunctionId))
+		for j := 0; j < len(loc.Line) && j < maxLocationLines; j++ {
+			e.lines = append(e.lines,
+				int32(loc.Line[j].FunctionId),
+				loc.Line[j].Line)
 		}
 		addr |= int64(loc.Address)
-		lb.addr[i] = int64(loc.Address)
-		lb.hasFolded = lb.hasFolded || loc.IsFolded
-		lb.folded[i] = loc.IsFolded
-	}
-	lb.hasLines = len(lb.lines) > 0
-	lb.hasAddr = addr > 0
-	h := locationsBlockHeader{
-		LocationsLen: lb.locsLen,
+		e.addr[i] = int64(loc.Address)
+		e.hasFolded = e.hasFolded || loc.IsFolded
+		e.folded[i] = loc.IsFolded
 	}
 
+	// Mapping and line count per location.
 	var enc delta.BinaryPackedEncoding
-	lb.tmp, _ = enc.EncodeInt32(lb.tmp, lb.mapping)
-	h.MappingSize = uint32(len(lb.tmp))
-	lb.buf.Write(lb.tmp)
-	lb.tmp, _ = enc.EncodeInt32(lb.tmp, lb.function)
-	h.FunctionSize = uint32(len(lb.tmp))
-	lb.buf.Write(lb.tmp)
-	lb.tmp, _ = enc.EncodeInt32(lb.tmp, lb.line)
-	h.LineSize = uint32(len(lb.tmp))
-	lb.buf.Write(lb.tmp)
-	if lb.hasLines {
-		lb.tmp, _ = enc.EncodeInt32(lb.tmp, lb.count)
-		h.CountSize = uint32(len(lb.tmp))
-		lb.buf.Write(lb.tmp)
-		lb.tmp, _ = enc.EncodeInt32(lb.tmp, lb.lines)
-		h.LinesSize = uint32(len(lb.tmp))
-		lb.buf.Write(lb.tmp)
-	}
-	if lb.hasAddr {
-		lb.tmp, _ = enc.EncodeInt64(lb.tmp, lb.addr)
-		h.AddrSize = uint32(len(lb.tmp))
-		lb.buf.Write(lb.tmp)
-	}
-	if lb.hasFolded {
-		// TODO
-	}
-
-	lb.tmp = slices.GrowLen(lb.tmp, locationsBlockHeaderSize)
-	h.marshal(lb.tmp)
-	n, err := w.Write(lb.tmp)
+	e.tmp, _ = enc.EncodeInt32(e.tmp, e.mapping)
+	e.header.MappingSize = uint32(len(e.tmp))
+	e.buf.Write(e.tmp)
+	// Line count size and length is deterministic.
+	e.buf.Write(e.lineCount) // Without any encoding.
+
+	// Lines slice size and length (in lines, not int32s).
+	e.tmp, _ = enc.EncodeInt32(e.tmp, e.lines)
+	e.header.LinesLen = uint32(len(e.lines) / 2)
+	e.header.LinesSize = uint32(len(e.tmp))
+	e.buf.Write(e.tmp)
+
+	if addr > 0 {
+		e.tmp, _ = enc.EncodeInt64(e.tmp, e.addr)
+		e.header.AddrSize = uint32(len(e.tmp))
+		e.buf.Write(e.tmp)
+	}
+
+	if e.hasFolded {
+		e.tmp = slices.GrowLen(e.tmp, len(e.folded)/8)
+		encodeBoolean(e.tmp, e.folded)
+		e.header.IsFoldedSize = uint32(len(e.tmp))
+		e.buf.Write(e.tmp)
+	}
+
+	e.tmp = slices.GrowLen(e.tmp, locationsBlockHeaderSize)
+	e.header.marshal(e.tmp)
+	n, err := w.Write(e.tmp)
 	if err != nil {
 		return int64(n), err
 	}
-	m, err := lb.buf.WriteTo(w)
+	m, err := e.buf.WriteTo(w)
 	return m + int64(n), err
 }
 
-func (lb *locationsBlock) reset(locations int) {
+func (e *locationsBlockEncoder) initWrite(locations int) {
 	// Actual estimate is ~6 bytes per location.
 	// In a large data set, the most expensive member
 	// is FunctionID, and it's about 2 bytes per location.
-	lb.buf.Reset()
-	lb.buf.Grow(locations * 8)
-	*lb = locationsBlock{
-		locsLen: uint32(locations),
-
-		mapping:  slices.GrowLen(lb.mapping, locations),
-		function: slices.GrowLen(lb.function, locations),
-		line:     slices.GrowLen(lb.line, locations),
+	e.buf.Reset()
+	e.buf.Grow(locations * 8)
+	*e = locationsBlockEncoder{
+		header: locationsBlockHeader{LocationsLen: uint32(locations)},
 
-		count:  slices.GrowLen(lb.count, locations),
-		lines:  lb.lines[:0], // Appended.
-		addr:   slices.GrowLen(lb.addr, locations),
-		folded: slices.GrowLen(lb.folded, locations),
+		mapping:   slices.GrowLen(e.mapping, locations),
+		lineCount: slices.GrowLen(e.lineCount, locations),
+		lines:     e.lines[:0], // Appendable.
+		addr:      slices.GrowLen(e.addr, locations),
+		folded:    slices.GrowLen(e.folded, locations),
 
-		buf: lb.buf,
-		tmp: slices.GrowLen(lb.tmp, 2*locations),
+		buf: e.buf,
+		tmp: slices.GrowLen(e.tmp, 2*locations),
 	}
 }
 
-const locationsBlockHeaderSize = int(unsafe.Sizeof(locationsBlockHeader{}))
+type locationsBlockDecoder struct {
+	header locationsBlockHeader
 
-type locationsBlockHeader struct {
-	LocationsLen uint32
-	MappingSize  uint32
-	FunctionSize uint32
-	LineSize     uint32
-	CountSize    uint32
-	LinesSize    uint32
-	AddrSize     uint32
-	IsFoldedSize uint32
+	mappings  []int32
+	lineCount []byte
+	lines     []int32
+
+	address []int64
+	folded  []bool
+
+	tmp []byte
 }
 
-func (h *locationsBlockHeader) marshal(b []byte) {
-	binary.LittleEndian.PutUint32(b[0:4], h.LocationsLen)
-	binary.LittleEndian.PutUint32(b[4:8], h.MappingSize)
-	binary.LittleEndian.PutUint32(b[8:12], h.FunctionSize)
-	binary.LittleEndian.PutUint32(b[12:16], h.LineSize)
-	binary.LittleEndian.PutUint32(b[16:20], h.CountSize)
-	binary.LittleEndian.PutUint32(b[20:24], h.LinesSize)
-	binary.LittleEndian.PutUint32(b[24:28], h.AddrSize)
-	binary.LittleEndian.PutUint32(b[28:32], h.IsFoldedSize)
-}
-
-func (lb *locationsBlock) locations() int { return int(lb.locsLen) }
-
-func (lb *locationsBlock) decode(locations []v1.InMemoryLocation) {
-	lines := make([]v1.InMemoryLine, len(lb.function)+len(lb.lines)/2)
-	var j int32 // Offset within the lines slice.
-	var o int32 // Offset within the encoded lines slice.
-	for i := 0; i < len(locations); i++ {
-		ll := lb.count[i] + 1
-		locations[i].Line = lines[j : j+ll]
-		locations[i].Line[0].Line = lb.line[i]
-		locations[i].Line[0].FunctionId = uint32(lb.function[i])
-		locations[i].MappingId = uint32(lb.mapping[i])
-		j += ll
-		for l := int32(1); l < ll; l++ {
-			locations[i].Line[l].FunctionId = uint32(lb.lines[o+1])
-			locations[i].Line[l].Line = lb.lines[o]
-			o += 2
+func (d *locationsBlockDecoder) readHeader(r io.Reader) error {
+	d.tmp = slices.GrowLen(d.tmp, locationsBlockHeaderSize)
+	if _, err := io.ReadFull(r, d.tmp); err != nil {
+		return nil
+	}
+	d.header.unmarshal(d.tmp)
+	if !d.header.isValid() {
+		return ErrInvalidSize
+	}
+	return nil
+}
+
+func (d *locationsBlockDecoder) decode(r *bufio.Reader, locations []v1.InMemoryLocation) error {
+	if err := d.readHeader(r); err != nil {
+		return err
+	}
+	if d.header.LocationsLen > uint32(len(locations)) {
+		return fmt.Errorf("locations buffer is too short")
+	}
+
+	var enc delta.BinaryPackedEncoding
+	// First we decode mapping_id and assign them to locations.
+	buf, err := r.Peek(int(d.header.MappingSize))
+	if err != nil {
+		return err
+	}
+	d.mappings = slices.GrowLen(d.mappings, int(d.header.LocationsLen))
+	d.mappings, err = enc.DecodeInt32(d.mappings, buf)
+	if err != nil {
+		return err
+	}
+	_, _ = r.Discard(len(buf))
+
+	// Line count per location.
+	// One byte per location.
+	buf, err = r.Peek(int(d.header.LocationsLen))
+	if err != nil {
+		return err
+	}
+	d.lineCount = slices.GrowLen(d.lineCount, int(d.header.LocationsLen))
+	copy(d.lineCount, buf)
+	_, _ = r.Discard(len(buf))
+
+	// Lines. A single slice backs all the location line
+	// sub-slices. But it has to be allocated as we can't
+	// reference d.lines, which is reusable.
+	lines := make([]v1.InMemoryLine, d.header.LinesLen)
+	// Unlike other members, d.header.LinesSize potentially
+	// might be too big to fit into the reader's buffer.
+	// This is not expected, but we have to handle it in
+	// a graceful way.
+	if r.Size() > int(d.header.LinesSize) {
+		buf, err = r.Peek(int(d.header.LinesSize))
+		if err != nil {
+			return err
+		}
+	} else {
+		buf = make([]byte, int(d.header.LinesSize))
+		if _, err = io.ReadFull(r, buf); err != nil {
+			return err
+		}
+	}
+	d.lines = slices.GrowLen(d.lines, int(d.header.LinesLen))
+	d.lines, err = enc.DecodeInt32(d.lines, buf)
+	if err != nil {
+		return err
+	}
+	copy(lines, *(*[]v1.InMemoryLine)(unsafe.Pointer(&d.lines)))
+	if r.Size() > int(d.header.LinesSize) {
+		// Advance the buffer offset, if we haven't read from it.
+		// Note that this invalidates buf, therefore it can only
+		// be done after it was decoded.
+		_, _ = r.Discard(len(buf))
+	}
+
+	// In most cases we end up here.
+	if d.header.AddrSize == 0 && d.header.IsFoldedSize == 0 {
+		var o int // Offset within the lines slice.
+		for i := uint32(0); i < d.header.LocationsLen; i++ {
+			locations[i].MappingId = uint32(d.mappings[i])
+			n := o + int(d.lineCount[i])
+			locations[i].Line = lines[o:n]
+			o = n
+		}
+		return nil
+	}
+
+	// Otherwise, inspect all the optional fields.
+	if int(d.header.AddrSize) > 0 {
+		buf, err = r.Peek(int(d.header.AddrSize))
+		if err != nil {
+			return err
+		}
+		d.address = slices.GrowLen(d.address, int(d.header.LocationsLen))
+		d.address, err = enc.DecodeInt64(d.address, buf)
+		if err != nil {
+			return err
+		}
+		_, _ = r.Discard(len(buf))
+	}
+	if int(d.header.IsFoldedSize) > 0 {
+		buf, err = r.Peek(int(d.header.IsFoldedSize))
+		if err != nil {
+			return err
 		}
+		d.folded = slices.GrowLen(d.folded, int(d.header.LocationsLen))
+		decodeBoolean(d.folded, buf)
+		_, _ = r.Discard(len(buf))
 	}
+
+	var o int // Offset within the lines slice.
+	for i := uint32(0); i < d.header.LocationsLen; i++ {
+		locations[i].MappingId = uint32(d.mappings[i])
+		n := o + int(d.lineCount[i])
+		locations[i].Line = lines[o:n]
+		o = n
+		locations[i].Address = uint64(d.address[i])
+		locations[i].IsFolded = d.folded[i]
+	}
+
+	return nil
 }
 
-func NewLocationsEncoder(w io.Writer) *LocationsEncoder {
-	return &LocationsEncoder{w: w}
+func encodeBoolean(dst []byte, src []bool) {
+	for i := range dst {
+		dst[i] = 0
+	}
+	for i, b := range src {
+		if b {
+			dst[i>>3] |= 1 << i & 7
+		}
+	}
 }
 
-func (e *LocationsEncoder) EncodeLocations(locations []v1.InMemoryLocation) error {
-	return nil
+func decodeBoolean(dst []bool, src []byte) {
+	for i := range dst {
+		dst[i] = false
+	}
+	for i := range dst {
+		dst[i] = src[i>>3]&(1<<i&7) != 0
+	}
 }
diff --git a/pkg/phlaredb/symdb/encoding_strings.go b/pkg/phlaredb/symdb/encoding_strings.go
index ac69b33fdd..5ecced263f 100644
--- a/pkg/phlaredb/symdb/encoding_strings.go
+++ b/pkg/phlaredb/symdb/encoding_strings.go
@@ -38,7 +38,7 @@ type StringsEncoder struct {
 
 func NewStringsEncoder(w io.Writer) *StringsEncoder { return &StringsEncoder{w: w} }
 
-func (e *StringsEncoder) WriteStrings(strings []string) error {
+func (e *StringsEncoder) EncodeStrings(strings []string) error {
 	if e.blockSize == 0 {
 		e.blockSize = 1 << 10 // 1k strings per block by default.
 	}
@@ -138,6 +138,13 @@ type StringsDecoder struct {
 
 func NewStringsDecoder(r io.Reader) *StringsDecoder { return &StringsDecoder{r: r} }
 
+func (d *StringsDecoder) StringsLen() (int, error) {
+	if err := d.readHeader(); err != nil {
+		return 0, err
+	}
+	return int(d.stringsLen), nil
+}
+
 func (d *StringsDecoder) readHeader() (err error) {
 	d.buf = slices.GrowLen(d.buf, 12)
 	if _, err = io.ReadFull(d.r, d.buf); err != nil {
@@ -149,21 +156,14 @@ func (d *StringsDecoder) readHeader() (err error) {
 	// Sanity checks are needed as we process the stream data
 	// before verifying the check sum.
 	if d.blocksLen > 1<<20 || d.stringsLen > 1<<20 || d.blockSize > 1<<20 {
-		return fmt.Errorf("malformed header")
+		return ErrInvalidSize
 	}
 	d.blocks = slices.GrowLen(d.blocks, int(d.blocksLen))
 	_, err = io.ReadFull(d.r, d.blocks)
 	return err
 }
 
-func (d *StringsDecoder) StringsLen() (int, error) {
-	if err := d.readHeader(); err != nil {
-		return 0, err
-	}
-	return int(d.stringsLen), nil
-}
-
-func (d *StringsDecoder) ReadStrings(dst []string) (err error) {
+func (d *StringsDecoder) DecodeStrings(dst []string) (err error) {
 	for i := 0; i < len(d.blocks); i++ {
 		bs := d.blockSize
 		if i == len(d.blocks)-1 && d.stringsLen%d.blockSize > 0 {

From 48e523c7cb5c881c2552464376c5ff0c984ca404 Mon Sep 17 00:00:00 2001
From: Anton Kolesnikov <anton.e.kolesnikov@gmail.com>
Date: Fri, 22 Mar 2024 15:22:58 +0800
Subject: [PATCH 05/36] remove bufio reader

---
 pkg/phlaredb/symdb/encoding_locations.go    | 83 ++++++++-------------
 pkg/phlaredb/symdb/encoding_strings_test.go |  4 +-
 2 files changed, 35 insertions(+), 52 deletions(-)

diff --git a/pkg/phlaredb/symdb/encoding_locations.go b/pkg/phlaredb/symdb/encoding_locations.go
index 87df17d7c2..68b7e2a10a 100644
--- a/pkg/phlaredb/symdb/encoding_locations.go
+++ b/pkg/phlaredb/symdb/encoding_locations.go
@@ -1,7 +1,6 @@
 package symdb
 
 import (
-	"bufio"
 	"bytes"
 	"encoding/binary"
 	"fmt"
@@ -61,6 +60,13 @@ func (e *LocationsEncoder) writeHeader() (err error) {
 	return err
 }
 
+func (e *LocationsEncoder) Reset(w io.Writer) {
+	e.locations = 0
+	e.blockSize = 0
+	e.buf = e.buf[:0]
+	e.w = w
+}
+
 type LocationsDecoder struct {
 	r io.Reader
 	d locationsBlockDecoder
@@ -97,23 +103,24 @@ func (d *LocationsDecoder) readHeader() (err error) {
 
 func (d *LocationsDecoder) DecodeLocations(locations []v1.InMemoryLocation) error {
 	blocks := int((d.locations + d.blockSize - 1) / d.blockSize)
-	// It's expected that the reader is already buffered.
-	r, ok := d.r.(*bufio.Reader)
-	if !ok {
-		bufSize := int(d.blockSize * 16) // 16 bytes per location.
-		r = bufio.NewReaderSize(d.r, bufSize)
-	}
 	for i := 0; i < blocks; i++ {
 		lo := i * int(d.blockSize)
 		hi := math.Min(lo+int(d.blockSize), int(d.locations))
 		block := locations[lo:hi]
-		if err := d.d.decode(r, block); err != nil {
+		if err := d.d.decode(d.r, block); err != nil {
 			return err
 		}
 	}
 	return nil
 }
 
+func (d *LocationsDecoder) Reset(r io.Reader) {
+	d.locations = 0
+	d.blockSize = 0
+	d.buf = d.buf[:0]
+	d.r = r
+}
+
 type locationsBlockEncoder struct {
 	header locationsBlockHeader
 
@@ -276,8 +283,8 @@ func (d *locationsBlockDecoder) readHeader(r io.Reader) error {
 	return nil
 }
 
-func (d *locationsBlockDecoder) decode(r *bufio.Reader, locations []v1.InMemoryLocation) error {
-	if err := d.readHeader(r); err != nil {
+func (d *locationsBlockDecoder) decode(r io.Reader, locations []v1.InMemoryLocation) (err error) {
+	if err = d.readHeader(r); err != nil {
 		return err
 	}
 	if d.header.LocationsLen > uint32(len(locations)) {
@@ -286,63 +293,41 @@ func (d *locationsBlockDecoder) decode(r *bufio.Reader, locations []v1.InMemoryL
 
 	var enc delta.BinaryPackedEncoding
 	// First we decode mapping_id and assign them to locations.
-	buf, err := r.Peek(int(d.header.MappingSize))
-	if err != nil {
+	d.tmp = slices.GrowLen(d.tmp, int(d.header.MappingSize))
+	if _, err = io.ReadFull(r, d.tmp); err != nil {
 		return err
 	}
-	d.mappings = slices.GrowLen(d.mappings, int(d.header.LocationsLen))
-	d.mappings, err = enc.DecodeInt32(d.mappings, buf)
+	d.mappings, err = enc.DecodeInt32(d.mappings, d.tmp)
 	if err != nil {
 		return err
 	}
-	_, _ = r.Discard(len(buf))
 
 	// Line count per location.
 	// One byte per location.
-	buf, err = r.Peek(int(d.header.LocationsLen))
-	if err != nil {
+	d.lineCount = slices.GrowLen(d.lineCount, int(d.header.LocationsLen))
+	if _, err = io.ReadFull(r, d.lineCount); err != nil {
 		return err
 	}
-	d.lineCount = slices.GrowLen(d.lineCount, int(d.header.LocationsLen))
-	copy(d.lineCount, buf)
-	_, _ = r.Discard(len(buf))
 
 	// Lines. A single slice backs all the location line
 	// sub-slices. But it has to be allocated as we can't
 	// reference d.lines, which is reusable.
 	lines := make([]v1.InMemoryLine, d.header.LinesLen)
-	// Unlike other members, d.header.LinesSize potentially
-	// might be too big to fit into the reader's buffer.
-	// This is not expected, but we have to handle it in
-	// a graceful way.
-	if r.Size() > int(d.header.LinesSize) {
-		buf, err = r.Peek(int(d.header.LinesSize))
-		if err != nil {
-			return err
-		}
-	} else {
-		buf = make([]byte, int(d.header.LinesSize))
-		if _, err = io.ReadFull(r, buf); err != nil {
-			return err
-		}
+	d.tmp = slices.GrowLen(d.tmp, int(d.header.LinesSize))
+	if _, err = io.ReadFull(r, d.tmp); err != nil {
+		return err
 	}
 	d.lines = slices.GrowLen(d.lines, int(d.header.LinesLen))
-	d.lines, err = enc.DecodeInt32(d.lines, buf)
+	d.lines, err = enc.DecodeInt32(d.lines, d.tmp)
 	if err != nil {
 		return err
 	}
 	copy(lines, *(*[]v1.InMemoryLine)(unsafe.Pointer(&d.lines)))
-	if r.Size() > int(d.header.LinesSize) {
-		// Advance the buffer offset, if we haven't read from it.
-		// Note that this invalidates buf, therefore it can only
-		// be done after it was decoded.
-		_, _ = r.Discard(len(buf))
-	}
 
 	// In most cases we end up here.
 	if d.header.AddrSize == 0 && d.header.IsFoldedSize == 0 {
 		var o int // Offset within the lines slice.
-		for i := uint32(0); i < d.header.LocationsLen; i++ {
+		for i := 0; i < len(locations); i++ {
 			locations[i].MappingId = uint32(d.mappings[i])
 			n := o + int(d.lineCount[i])
 			locations[i].Line = lines[o:n]
@@ -353,25 +338,23 @@ func (d *locationsBlockDecoder) decode(r *bufio.Reader, locations []v1.InMemoryL
 
 	// Otherwise, inspect all the optional fields.
 	if int(d.header.AddrSize) > 0 {
-		buf, err = r.Peek(int(d.header.AddrSize))
-		if err != nil {
+		d.tmp = slices.GrowLen(d.tmp, int(d.header.AddrSize))
+		if _, err = io.ReadFull(r, d.tmp); err != nil {
 			return err
 		}
 		d.address = slices.GrowLen(d.address, int(d.header.LocationsLen))
-		d.address, err = enc.DecodeInt64(d.address, buf)
+		d.address, err = enc.DecodeInt64(d.address, d.tmp)
 		if err != nil {
 			return err
 		}
-		_, _ = r.Discard(len(buf))
 	}
 	if int(d.header.IsFoldedSize) > 0 {
-		buf, err = r.Peek(int(d.header.IsFoldedSize))
-		if err != nil {
+		d.tmp = slices.GrowLen(d.tmp, int(d.header.IsFoldedSize))
+		if _, err = io.ReadFull(r, d.tmp); err != nil {
 			return err
 		}
 		d.folded = slices.GrowLen(d.folded, int(d.header.LocationsLen))
-		decodeBoolean(d.folded, buf)
-		_, _ = r.Discard(len(buf))
+		decodeBoolean(d.folded, d.tmp)
 	}
 
 	var o int // Offset within the lines slice.
diff --git a/pkg/phlaredb/symdb/encoding_strings_test.go b/pkg/phlaredb/symdb/encoding_strings_test.go
index 519f719df8..1fa6191eb1 100644
--- a/pkg/phlaredb/symdb/encoding_strings_test.go
+++ b/pkg/phlaredb/symdb/encoding_strings_test.go
@@ -83,12 +83,12 @@ func Test_StringsEncoding(t *testing.T) {
 			if tc.blockSize > 0 {
 				e.blockSize = tc.blockSize
 			}
-			require.NoError(t, e.WriteStrings(tc.strings))
+			require.NoError(t, e.EncodeStrings(tc.strings))
 			d := NewStringsDecoder(bufio.NewReader(&output))
 			n, err := d.StringsLen()
 			require.NoError(t, err)
 			out := make([]string, n)
-			require.NoError(t, d.ReadStrings(out))
+			require.NoError(t, d.DecodeStrings(out))
 			require.Equal(t, tc.strings, out)
 		})
 	}

From 17d327f857380085e249d4f8d8f205bbcc7ebbf8 Mon Sep 17 00:00:00 2001
From: Anton Kolesnikov <anton.e.kolesnikov@gmail.com>
Date: Tue, 26 Mar 2024 14:38:00 +0800
Subject: [PATCH 06/36] add functions codecs

---
 pkg/phlaredb/symdb/encoding_functions.go | 295 +++++++++++++++++++++++
 1 file changed, 295 insertions(+)
 create mode 100644 pkg/phlaredb/symdb/encoding_functions.go

diff --git a/pkg/phlaredb/symdb/encoding_functions.go b/pkg/phlaredb/symdb/encoding_functions.go
new file mode 100644
index 0000000000..72cca758cc
--- /dev/null
+++ b/pkg/phlaredb/symdb/encoding_functions.go
@@ -0,0 +1,295 @@
+package symdb
+
+import (
+	"bytes"
+	"encoding/binary"
+	"fmt"
+	"io"
+	"unsafe"
+
+	"github.com/parquet-go/parquet-go/encoding/delta"
+
+	v1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1"
+	"github.com/grafana/pyroscope/pkg/slices"
+	"github.com/grafana/pyroscope/pkg/util/math"
+)
+
+type FunctionsEncoder struct {
+	w io.Writer
+	e functionsBlockEncoder
+
+	blockSize int
+	functions int
+
+	buf []byte
+}
+
+const (
+	defaultFunctionsBlockSize = 1 << 10
+)
+
+func NewFunctionsEncoder(w io.Writer) *FunctionsEncoder {
+	return &FunctionsEncoder{w: w}
+}
+
+func (e *FunctionsEncoder) EncodeFunctions(locations []v1.InMemoryFunction) error {
+	if e.blockSize == 0 {
+		e.blockSize = defaultFunctionsBlockSize
+	}
+	e.functions = len(locations)
+	if err := e.writeHeader(); err != nil {
+		return err
+	}
+	for i := 0; i < len(locations); i += e.blockSize {
+		block := locations[i:math.Min(i+e.blockSize, len(locations))]
+		if _, err := e.e.encode(e.w, block); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func (e *FunctionsEncoder) writeHeader() (err error) {
+	e.buf = slices.GrowLen(e.buf, 8)
+	binary.LittleEndian.PutUint32(e.buf[0:4], uint32(e.functions))
+	binary.LittleEndian.PutUint32(e.buf[4:8], uint32(e.blockSize))
+	_, err = e.w.Write(e.buf)
+	return err
+}
+
+func (e *FunctionsEncoder) Reset(w io.Writer) {
+	e.functions = 0
+	e.blockSize = 0
+	e.buf = e.buf[:0]
+	e.w = w
+}
+
+type FunctionsDecoder struct {
+	r io.Reader
+	d functionsBlockDecoder
+
+	blockSize uint32
+	functions uint32
+
+	buf []byte
+}
+
+func NewFunctionsDecoder(r io.Reader) *FunctionsDecoder { return &FunctionsDecoder{r: r} }
+
+func (d *FunctionsDecoder) FunctionsLen() (int, error) {
+	if err := d.readHeader(); err != nil {
+		return 0, err
+	}
+	return int(d.functions), nil
+}
+
+func (d *FunctionsDecoder) readHeader() (err error) {
+	d.buf = slices.GrowLen(d.buf, 8)
+	if _, err = io.ReadFull(d.r, d.buf); err != nil {
+		return err
+	}
+	d.functions = binary.LittleEndian.Uint32(d.buf[0:4])
+	d.blockSize = binary.LittleEndian.Uint32(d.buf[4:8])
+	// Sanity checks are needed as we process the stream data
+	// before verifying the check sum.
+	if d.functions > 1<<20 || d.blockSize > 1<<20 {
+		return ErrInvalidSize
+	}
+	return nil
+}
+
+func (d *FunctionsDecoder) DecodeFunctions(functions []v1.InMemoryFunction) error {
+	blocks := int((d.functions + d.blockSize - 1) / d.blockSize)
+	for i := 0; i < blocks; i++ {
+		lo := i * int(d.blockSize)
+		hi := math.Min(lo+int(d.blockSize), int(d.functions))
+		block := functions[lo:hi]
+		if err := d.d.decode(d.r, block); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func (d *FunctionsDecoder) Reset(r io.Reader) {
+	d.functions = 0
+	d.blockSize = 0
+	d.buf = d.buf[:0]
+	d.r = r
+}
+
+const functionsBlockHeaderSize = int(unsafe.Sizeof(functionsBlockHeader{}))
+
+type functionsBlockHeader struct {
+	FunctionsLen   uint32
+	NameSize       uint32
+	SystemNameSize uint32
+	FileNameSize   uint32
+	StartLineSize  uint32
+}
+
+func (h *functionsBlockHeader) marshal(b []byte) {
+	binary.LittleEndian.PutUint32(b[0:4], h.FunctionsLen)
+	binary.LittleEndian.PutUint32(b[4:8], h.NameSize)
+	binary.LittleEndian.PutUint32(b[8:12], h.SystemNameSize)
+	binary.LittleEndian.PutUint32(b[12:16], h.FileNameSize)
+	binary.LittleEndian.PutUint32(b[16:20], h.StartLineSize)
+}
+
+func (h *functionsBlockHeader) unmarshal(b []byte) {
+	h.FunctionsLen = binary.LittleEndian.Uint32(b[0:4])
+	h.NameSize = binary.LittleEndian.Uint32(b[4:8])
+	h.SystemNameSize = binary.LittleEndian.Uint32(b[8:12])
+	h.FileNameSize = binary.LittleEndian.Uint32(b[12:16])
+	h.StartLineSize = binary.LittleEndian.Uint32(b[16:20])
+}
+
+// isValid reports whether the header contains sane values.
+// This is important as the block might be read before the
+// checksum validation.
+func (h *functionsBlockHeader) isValid() bool {
+	return h.FunctionsLen < 1<<20
+}
+
+type functionsBlockEncoder struct {
+	header functionsBlockHeader
+
+	tmp  []byte
+	buf  bytes.Buffer
+	ints []int32
+}
+
+func (e *functionsBlockEncoder) encode(w io.Writer, functions []v1.InMemoryFunction) (int64, error) {
+	e.initWrite(len(functions))
+	var enc delta.BinaryPackedEncoding
+
+	for i, f := range functions {
+		e.ints[i] = int32(f.Name)
+	}
+	e.tmp, _ = enc.EncodeInt32(e.tmp, e.ints)
+	e.header.NameSize = uint32(len(e.tmp))
+	e.buf.Write(e.tmp)
+
+	for i, f := range functions {
+		e.ints[i] = int32(f.SystemName)
+	}
+	e.tmp, _ = enc.EncodeInt32(e.tmp, e.ints)
+	e.header.SystemNameSize = uint32(len(e.tmp))
+	e.buf.Write(e.tmp)
+
+	for i, f := range functions {
+		e.ints[i] = int32(f.Filename)
+	}
+	e.tmp, _ = enc.EncodeInt32(e.tmp, e.ints)
+	e.header.FileNameSize = uint32(len(e.tmp))
+	e.buf.Write(e.tmp)
+
+	for i, f := range functions {
+		e.ints[i] = int32(f.StartLine)
+	}
+	e.tmp, _ = enc.EncodeInt32(e.tmp, e.ints)
+	e.header.StartLineSize = uint32(len(e.tmp))
+	e.buf.Write(e.tmp)
+
+	e.tmp = slices.GrowLen(e.tmp, functionsBlockHeaderSize)
+	e.header.marshal(e.tmp)
+	n, err := w.Write(e.tmp)
+	if err != nil {
+		return int64(n), err
+	}
+	m, err := e.buf.WriteTo(w)
+	return m + int64(n), err
+}
+
+func (e *functionsBlockEncoder) initWrite(functions int) {
+	e.buf.Reset()
+	// Actual estimate is ~7 bytes per function.
+	e.buf.Grow(functions * 8)
+	*e = functionsBlockEncoder{
+		header: functionsBlockHeader{FunctionsLen: uint32(functions)},
+
+		tmp:  slices.GrowLen(e.tmp, functions*2),
+		ints: slices.GrowLen(e.ints, functions),
+		buf:  e.buf,
+	}
+}
+
+type functionsBlockDecoder struct {
+	header functionsBlockHeader
+
+	ints []int32
+	tmp  []byte
+}
+
+func (d *functionsBlockDecoder) readHeader(r io.Reader) error {
+	d.tmp = slices.GrowLen(d.tmp, functionsBlockHeaderSize)
+	if _, err := io.ReadFull(r, d.tmp); err != nil {
+		return nil
+	}
+	d.header.unmarshal(d.tmp)
+	if !d.header.isValid() {
+		return ErrInvalidSize
+	}
+	return nil
+}
+
+func (d *functionsBlockDecoder) decode(r io.Reader, functions []v1.InMemoryFunction) (err error) {
+	if err = d.readHeader(r); err != nil {
+		return err
+	}
+	if d.header.FunctionsLen > uint32(len(functions)) {
+		return fmt.Errorf("functions buffer is too short")
+	}
+
+	var enc delta.BinaryPackedEncoding
+	d.ints = slices.GrowLen(d.ints, int(d.header.FunctionsLen))
+	d.tmp = slices.GrowLen(d.tmp, int(d.header.NameSize))
+	if _, err = io.ReadFull(r, d.tmp); err != nil {
+		return err
+	}
+	d.ints, err = enc.DecodeInt32(d.ints, d.tmp)
+	if err != nil {
+		return err
+	}
+	for i, v := range d.ints {
+		functions[i].Name = uint32(v)
+	}
+
+	d.tmp = slices.GrowLen(d.tmp, int(d.header.SystemNameSize))
+	if _, err = io.ReadFull(r, d.tmp); err != nil {
+		return err
+	}
+	d.ints, err = enc.DecodeInt32(d.ints, d.tmp)
+	if err != nil {
+		return err
+	}
+	for i, v := range d.ints {
+		functions[i].SystemName = uint32(v)
+	}
+
+	d.tmp = slices.GrowLen(d.tmp, int(d.header.FileNameSize))
+	if _, err = io.ReadFull(r, d.tmp); err != nil {
+		return err
+	}
+	d.ints, err = enc.DecodeInt32(d.ints, d.tmp)
+	if err != nil {
+		return err
+	}
+	for i, v := range d.ints {
+		functions[i].Filename = uint32(v)
+	}
+
+	d.tmp = slices.GrowLen(d.tmp, int(d.header.StartLineSize))
+	if _, err = io.ReadFull(r, d.tmp); err != nil {
+		return err
+	}
+	d.ints, err = enc.DecodeInt32(d.ints, d.tmp)
+	if err != nil {
+		return err
+	}
+	for i, v := range d.ints {
+		functions[i].StartLine = uint32(v)
+	}
+
+	return nil
+}

From f859abb895d428a25c1512710d5319cf038ee0b9 Mon Sep 17 00:00:00 2001
From: Anton Kolesnikov <anton.e.kolesnikov@gmail.com>
Date: Tue, 26 Mar 2024 16:03:29 +0800
Subject: [PATCH 07/36] add mapping codecs

---
 pkg/phlaredb/symdb/encoding_functions.go |   8 +-
 pkg/phlaredb/symdb/encoding_locations.go |   7 +-
 pkg/phlaredb/symdb/encoding_mappings.go  | 380 +++++++++++++++++++++++
 3 files changed, 387 insertions(+), 8 deletions(-)
 create mode 100644 pkg/phlaredb/symdb/encoding_mappings.go

diff --git a/pkg/phlaredb/symdb/encoding_functions.go b/pkg/phlaredb/symdb/encoding_functions.go
index 72cca758cc..78fa383941 100644
--- a/pkg/phlaredb/symdb/encoding_functions.go
+++ b/pkg/phlaredb/symdb/encoding_functions.go
@@ -32,16 +32,16 @@ func NewFunctionsEncoder(w io.Writer) *FunctionsEncoder {
 	return &FunctionsEncoder{w: w}
 }
 
-func (e *FunctionsEncoder) EncodeFunctions(locations []v1.InMemoryFunction) error {
+func (e *FunctionsEncoder) EncodeFunctions(functions []v1.InMemoryFunction) error {
 	if e.blockSize == 0 {
 		e.blockSize = defaultFunctionsBlockSize
 	}
-	e.functions = len(locations)
+	e.functions = len(functions)
 	if err := e.writeHeader(); err != nil {
 		return err
 	}
-	for i := 0; i < len(locations); i += e.blockSize {
-		block := locations[i:math.Min(i+e.blockSize, len(locations))]
+	for i := 0; i < len(functions); i += e.blockSize {
+		block := functions[i:math.Min(i+e.blockSize, len(functions))]
 		if _, err := e.e.encode(e.w, block); err != nil {
 			return err
 		}
diff --git a/pkg/phlaredb/symdb/encoding_locations.go b/pkg/phlaredb/symdb/encoding_locations.go
index 68b7e2a10a..193d3233c4 100644
--- a/pkg/phlaredb/symdb/encoding_locations.go
+++ b/pkg/phlaredb/symdb/encoding_locations.go
@@ -137,8 +137,6 @@ type locationsBlockEncoder struct {
 
 	tmp []byte
 	buf bytes.Buffer
-
-	hasFolded bool
 }
 
 const locationsBlockHeaderSize = int(unsafe.Sizeof(locationsBlockHeader{}))
@@ -186,6 +184,7 @@ func (h *locationsBlockHeader) unmarshal(b []byte) {
 func (e *locationsBlockEncoder) encode(w io.Writer, locations []v1.InMemoryLocation) (int64, error) {
 	e.initWrite(len(locations))
 	var addr int64
+	var folded bool
 	for i, loc := range locations {
 		e.mapping[i] = int32(loc.MappingId)
 		e.lineCount[i] = byte(len(loc.Line))
@@ -197,7 +196,7 @@ func (e *locationsBlockEncoder) encode(w io.Writer, locations []v1.InMemoryLocat
 		}
 		addr |= int64(loc.Address)
 		e.addr[i] = int64(loc.Address)
-		e.hasFolded = e.hasFolded || loc.IsFolded
+		folded = folded || loc.IsFolded
 		e.folded[i] = loc.IsFolded
 	}
 
@@ -221,7 +220,7 @@ func (e *locationsBlockEncoder) encode(w io.Writer, locations []v1.InMemoryLocat
 		e.buf.Write(e.tmp)
 	}
 
-	if e.hasFolded {
+	if folded {
 		e.tmp = slices.GrowLen(e.tmp, len(e.folded)/8)
 		encodeBoolean(e.tmp, e.folded)
 		e.header.IsFoldedSize = uint32(len(e.tmp))
diff --git a/pkg/phlaredb/symdb/encoding_mappings.go b/pkg/phlaredb/symdb/encoding_mappings.go
new file mode 100644
index 0000000000..c1a5794bc9
--- /dev/null
+++ b/pkg/phlaredb/symdb/encoding_mappings.go
@@ -0,0 +1,380 @@
+package symdb
+
+import (
+	"bytes"
+	"encoding/binary"
+	"fmt"
+	"io"
+	"unsafe"
+
+	"github.com/parquet-go/parquet-go/encoding/delta"
+
+	v1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1"
+	"github.com/grafana/pyroscope/pkg/slices"
+	"github.com/grafana/pyroscope/pkg/util/math"
+)
+
+type MappingsEncoder struct {
+	w io.Writer
+	e mappingsBlockEncoder
+
+	blockSize int
+	mappings  int
+
+	buf []byte
+}
+
+const (
+	defaultMappingsBlockSize = 1 << 10
+)
+
+func NewMappingsEncoder(w io.Writer) *MappingsEncoder {
+	return &MappingsEncoder{w: w}
+}
+
+func (e *MappingsEncoder) EncodeMappings(mappings []v1.InMemoryMapping) error {
+	if e.blockSize == 0 {
+		e.blockSize = defaultMappingsBlockSize
+	}
+	e.mappings = len(mappings)
+	if err := e.writeHeader(); err != nil {
+		return err
+	}
+	for i := 0; i < len(mappings); i += e.blockSize {
+		block := mappings[i:math.Min(i+e.blockSize, len(mappings))]
+		if _, err := e.e.encode(e.w, block); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func (e *MappingsEncoder) writeHeader() (err error) {
+	e.buf = slices.GrowLen(e.buf, 8)
+	binary.LittleEndian.PutUint32(e.buf[0:4], uint32(e.mappings))
+	binary.LittleEndian.PutUint32(e.buf[4:8], uint32(e.blockSize))
+	_, err = e.w.Write(e.buf)
+	return err
+}
+
+func (e *MappingsEncoder) Reset(w io.Writer) {
+	e.mappings = 0
+	e.blockSize = 0
+	e.buf = e.buf[:0]
+	e.w = w
+}
+
+type MappingsDecoder struct {
+	r io.Reader
+	d mappingsBlockDecoder
+
+	blockSize uint32
+	mappings  uint32
+
+	buf []byte
+}
+
+func NewMappingsDecoder(r io.Reader) *MappingsDecoder { return &MappingsDecoder{r: r} }
+
+func (d *MappingsDecoder) MappingsLen() (int, error) {
+	if err := d.readHeader(); err != nil {
+		return 0, err
+	}
+	return int(d.mappings), nil
+}
+
+func (d *MappingsDecoder) readHeader() (err error) {
+	d.buf = slices.GrowLen(d.buf, 8)
+	if _, err = io.ReadFull(d.r, d.buf); err != nil {
+		return err
+	}
+	d.mappings = binary.LittleEndian.Uint32(d.buf[0:4])
+	d.blockSize = binary.LittleEndian.Uint32(d.buf[4:8])
+	// Sanity checks are needed as we process the stream data
+	// before verifying the check sum.
+	if d.mappings > 1<<20 || d.blockSize > 1<<20 {
+		return ErrInvalidSize
+	}
+	return nil
+}
+
+func (d *MappingsDecoder) DecodeMappings(mappings []v1.InMemoryMapping) error {
+	blocks := int((d.mappings + d.blockSize - 1) / d.blockSize)
+	for i := 0; i < blocks; i++ {
+		lo := i * int(d.blockSize)
+		hi := math.Min(lo+int(d.blockSize), int(d.mappings))
+		block := mappings[lo:hi]
+		if err := d.d.decode(d.r, block); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func (d *MappingsDecoder) Reset(r io.Reader) {
+	d.mappings = 0
+	d.blockSize = 0
+	d.buf = d.buf[:0]
+	d.r = r
+}
+
+const mappingsBlockHeaderSize = int(unsafe.Sizeof(mappingsBlockHeader{}))
+
+type mappingsBlockHeader struct {
+	MappingsLen  uint32
+	FileNameSize uint32
+	BuildIDSize  uint32
+	FlagsSize    uint32
+	// Optional.
+	MemoryStartSize uint32
+	MemoryLimitSize uint32
+	FileOffsetSize  uint32
+}
+
+func (h *mappingsBlockHeader) marshal(b []byte) {
+	binary.LittleEndian.PutUint32(b[0:4], h.MappingsLen)
+	binary.LittleEndian.PutUint32(b[4:8], h.FileNameSize)
+	binary.LittleEndian.PutUint32(b[8:12], h.BuildIDSize)
+	binary.LittleEndian.PutUint32(b[12:16], h.FlagsSize)
+	binary.LittleEndian.PutUint32(b[16:20], h.MemoryStartSize)
+	binary.LittleEndian.PutUint32(b[20:24], h.MemoryLimitSize)
+	binary.LittleEndian.PutUint32(b[24:28], h.FileOffsetSize)
+}
+
+func (h *mappingsBlockHeader) unmarshal(b []byte) {
+	h.MappingsLen = binary.LittleEndian.Uint32(b[0:4])
+	h.FileNameSize = binary.LittleEndian.Uint32(b[4:8])
+	h.BuildIDSize = binary.LittleEndian.Uint32(b[8:12])
+	h.FlagsSize = binary.LittleEndian.Uint32(b[12:16])
+	h.MemoryStartSize = binary.LittleEndian.Uint32(b[16:20])
+	h.MemoryLimitSize = binary.LittleEndian.Uint32(b[20:24])
+	h.FileOffsetSize = binary.LittleEndian.Uint32(b[24:28])
+}
+
+// isValid reports whether the header contains sane values.
+// This is important as the block might be read before the
+// checksum validation.
+func (h *mappingsBlockHeader) isValid() bool {
+	return h.MappingsLen < 1<<20
+}
+
+type mappingsBlockEncoder struct {
+	header mappingsBlockHeader
+
+	tmp    []byte
+	buf    bytes.Buffer
+	ints   []int32
+	ints64 []int64
+}
+
+func (e *mappingsBlockEncoder) encode(w io.Writer, mappings []v1.InMemoryMapping) (int64, error) {
+	e.initWrite(len(mappings))
+	var enc delta.BinaryPackedEncoding
+
+	for i, m := range mappings {
+		e.ints[i] = int32(m.Filename)
+	}
+	e.tmp, _ = enc.EncodeInt32(e.tmp, e.ints)
+	e.header.FileNameSize = uint32(len(e.tmp))
+	e.buf.Write(e.tmp)
+
+	for i, m := range mappings {
+		e.ints[i] = int32(m.BuildId)
+	}
+	e.tmp, _ = enc.EncodeInt32(e.tmp, e.ints)
+	e.header.BuildIDSize = uint32(len(e.tmp))
+	e.buf.Write(e.tmp)
+
+	for i, m := range mappings {
+		var v int32
+		if m.HasFunctions {
+			v |= 1 << 3
+		}
+		if m.HasFilenames {
+			v |= 1 << 2
+		}
+		if m.HasLineNumbers {
+			v |= 1 << 1
+		}
+		if m.HasInlineFrames {
+			v |= 1
+		}
+		e.ints[i] = v
+	}
+	e.tmp, _ = enc.EncodeInt32(e.tmp, e.ints)
+	e.header.FlagsSize = uint32(len(e.tmp))
+	e.buf.Write(e.tmp)
+
+	var memoryStart uint64
+	for i, m := range mappings {
+		memoryStart |= m.MemoryStart
+		e.ints64[i] = int64(m.MemoryStart)
+	}
+	if memoryStart != 0 {
+		e.tmp, _ = enc.EncodeInt64(e.tmp, e.ints64)
+		e.header.MemoryStartSize = uint32(len(e.tmp))
+		e.buf.Write(e.tmp)
+	}
+
+	var memoryLimit uint64
+	for i, m := range mappings {
+		memoryLimit |= m.MemoryLimit
+		e.ints64[i] = int64(m.MemoryLimit)
+	}
+	if memoryLimit != 0 {
+		e.tmp, _ = enc.EncodeInt64(e.tmp, e.ints64)
+		e.header.MemoryLimitSize = uint32(len(e.tmp))
+		e.buf.Write(e.tmp)
+	}
+
+	var fileOffset uint64
+	for i, m := range mappings {
+		fileOffset |= m.FileOffset
+		e.ints64[i] = int64(m.FileOffset)
+	}
+	if fileOffset != 0 {
+		e.tmp, _ = enc.EncodeInt64(e.tmp, e.ints64)
+		e.header.FileOffsetSize = uint32(len(e.tmp))
+		e.buf.Write(e.tmp)
+	}
+
+	e.tmp = slices.GrowLen(e.tmp, mappingsBlockHeaderSize)
+	e.header.marshal(e.tmp)
+	n, err := w.Write(e.tmp)
+	if err != nil {
+		return int64(n), err
+	}
+	m, err := e.buf.WriteTo(w)
+	return m + int64(n), err
+}
+
+func (e *mappingsBlockEncoder) initWrite(mappings int) {
+	e.buf.Reset()
+	// Actual estimate is ~7 bytes per mapping.
+	e.buf.Grow(mappings * 8)
+	*e = mappingsBlockEncoder{
+		header: mappingsBlockHeader{MappingsLen: uint32(mappings)},
+
+		tmp:    slices.GrowLen(e.tmp, mappings*2),
+		ints:   slices.GrowLen(e.ints, mappings),
+		ints64: slices.GrowLen(e.ints64, mappings),
+		buf:    e.buf,
+	}
+}
+
+type mappingsBlockDecoder struct {
+	header mappingsBlockHeader
+
+	ints   []int32
+	ints64 []int64
+	tmp    []byte
+}
+
+func (d *mappingsBlockDecoder) readHeader(r io.Reader) error {
+	d.tmp = slices.GrowLen(d.tmp, mappingsBlockHeaderSize)
+	if _, err := io.ReadFull(r, d.tmp); err != nil {
+		return nil
+	}
+	d.header.unmarshal(d.tmp)
+	if !d.header.isValid() {
+		return ErrInvalidSize
+	}
+	// TODO: Scale tmp
+	return nil
+}
+
+func (d *mappingsBlockDecoder) decode(r io.Reader, mappings []v1.InMemoryMapping) (err error) {
+	if err = d.readHeader(r); err != nil {
+		return err
+	}
+	if d.header.MappingsLen > uint32(len(mappings)) {
+		return fmt.Errorf("mappings buffer is too short")
+	}
+
+	var enc delta.BinaryPackedEncoding
+	d.ints = slices.GrowLen(d.ints, int(d.header.MappingsLen))
+
+	d.tmp = slices.GrowLen(d.tmp, int(d.header.FileNameSize))
+	if _, err = io.ReadFull(r, d.tmp); err != nil {
+		return err
+	}
+	d.ints, err = enc.DecodeInt32(d.ints, d.tmp)
+	if err != nil {
+		return err
+	}
+	for i, v := range d.ints {
+		mappings[i].Filename = uint32(v)
+	}
+
+	d.tmp = slices.GrowLen(d.tmp, int(d.header.BuildIDSize))
+	if _, err = io.ReadFull(r, d.tmp); err != nil {
+		return err
+	}
+	d.ints, err = enc.DecodeInt32(d.ints, d.tmp)
+	if err != nil {
+		return err
+	}
+	for i, v := range d.ints {
+		mappings[i].BuildId = uint32(v)
+	}
+
+	d.tmp = slices.GrowLen(d.tmp, int(d.header.FlagsSize))
+	if _, err = io.ReadFull(r, d.tmp); err != nil {
+		return err
+	}
+	d.ints, err = enc.DecodeInt32(d.ints, d.tmp)
+	if err != nil {
+		return err
+	}
+	for i, v := range d.ints {
+		mappings[i].HasFunctions = v&(1<<3) > 0
+		mappings[i].HasFilenames = v&(1<<2) > 0
+		mappings[i].HasLineNumbers = v&(1<<1) > 0
+		mappings[i].HasInlineFrames = v&1 > 0
+	}
+
+	if d.header.MemoryStartSize > 0 {
+		d.ints64 = slices.GrowLen(d.ints64, int(d.header.MappingsLen))
+		d.tmp = slices.GrowLen(d.tmp, int(d.header.MemoryStartSize))
+		if _, err = io.ReadFull(r, d.tmp); err != nil {
+			return err
+		}
+		d.ints64, err = enc.DecodeInt64(d.ints64, d.tmp)
+		if err != nil {
+			return err
+		}
+		for i, v := range d.ints64 {
+			mappings[i].MemoryStart = uint64(v)
+		}
+	}
+	if d.header.MemoryLimitSize > 0 {
+		d.ints64 = slices.GrowLen(d.ints64, int(d.header.MappingsLen))
+		d.tmp = slices.GrowLen(d.tmp, int(d.header.MemoryLimitSize))
+		if _, err = io.ReadFull(r, d.tmp); err != nil {
+			return err
+		}
+		d.ints64, err = enc.DecodeInt64(d.ints64, d.tmp)
+		if err != nil {
+			return err
+		}
+		for i, v := range d.ints64 {
+			mappings[i].MemoryLimit = uint64(v)
+		}
+	}
+	if d.header.FileOffsetSize > 0 {
+		d.ints64 = slices.GrowLen(d.ints64, int(d.header.MappingsLen))
+		d.tmp = slices.GrowLen(d.tmp, int(d.header.FileOffsetSize))
+		if _, err = io.ReadFull(r, d.tmp); err != nil {
+			return err
+		}
+		d.ints64, err = enc.DecodeInt64(d.ints64, d.tmp)
+		if err != nil {
+			return err
+		}
+		for i, v := range d.ints64 {
+			mappings[i].FileOffset = uint64(v)
+		}
+	}
+
+	return nil
+}

From 265ca213baecfa61031ea08a91474961cab00b0b Mon Sep 17 00:00:00 2001
From: Anton Kolesnikov <anton.e.kolesnikov@gmail.com>
Date: Wed, 27 Mar 2024 12:12:13 +0800
Subject: [PATCH 08/36] unify interfaces

---
 pkg/phlaredb/symdb/encoding_strings.go        | 227 ------------------
 pkg/phlaredb/symdb/format.go                  | 157 ++++++++++++
 .../{encoding_functions.go => functions.go}   | 119 +--------
 .../{encoding_locations.go => locations.go}   | 162 +++----------
 .../{encoding_mappings.go => mappings.go}     | 119 +--------
 pkg/phlaredb/symdb/strings.go                 | 167 +++++++++++++
 ...coding_strings_test.go => strings_test.go} |  13 +-
 7 files changed, 377 insertions(+), 587 deletions(-)
 delete mode 100644 pkg/phlaredb/symdb/encoding_strings.go
 rename pkg/phlaredb/symdb/{encoding_functions.go => functions.go} (64%)
 rename pkg/phlaredb/symdb/{encoding_locations.go => locations.go} (72%)
 rename pkg/phlaredb/symdb/{encoding_mappings.go => mappings.go} (73%)
 create mode 100644 pkg/phlaredb/symdb/strings.go
 rename pkg/phlaredb/symdb/{encoding_strings_test.go => strings_test.go} (83%)

diff --git a/pkg/phlaredb/symdb/encoding_strings.go b/pkg/phlaredb/symdb/encoding_strings.go
deleted file mode 100644
index 5ecced263f..0000000000
--- a/pkg/phlaredb/symdb/encoding_strings.go
+++ /dev/null
@@ -1,227 +0,0 @@
-package symdb
-
-import (
-	"encoding/binary"
-	"fmt"
-	"io"
-	"unsafe"
-
-	"github.com/grafana/pyroscope/pkg/slices"
-)
-
-// Almost all strings in profiles are very short, their length fits 8 bits.
-// Strings larger than 65536 are not expected and are getting truncated.
-// Typically, there are only 1-10 strings longer than 256 in a data set
-// consisting of a few dozens of thousands of strings.
-//
-// A traditional var length encoding is rather wasteful in our case.
-// Instead, we split the strings into blocks and use encoding that depends
-// on the maximum length of the strings in the block.
-//
-// The output data starts with a header: number of strings, block size,
-// number of blocks, and the block encoding map. In the map, each byte
-// specifies the number of bits needed to decode the maximum value from
-// that block, rounded up to the next power of two. Currently, the length
-// value is either 8 bits or 16.
-//
-// Blocks of data follow after the header. Each block includes two parts:
-// strings lengths array and strings data.
-
-const maxStringLen = 1<<16 - 1
-
-type StringsEncoder struct {
-	w         io.Writer
-	blockSize int
-	blocks    []byte
-	buf       []byte
-}
-
-func NewStringsEncoder(w io.Writer) *StringsEncoder { return &StringsEncoder{w: w} }
-
-func (e *StringsEncoder) EncodeStrings(strings []string) error {
-	if e.blockSize == 0 {
-		e.blockSize = 1 << 10 // 1k strings per block by default.
-	}
-	nb := (len(strings) + e.blockSize - 1) / e.blockSize
-	e.blocks = slices.GrowLen(e.blocks, nb)
-	var offset uint32
-	var bi int
-	l := uint32(len(strings))
-	for offset < l {
-		lo := offset
-		hi := offset + uint32(e.blockSize)
-		if x := uint32(len(strings)); hi > x {
-			hi = x
-		}
-		e.blocks[bi] = e.blockEncoding(strings[lo:hi])
-		offset = hi
-		bi++
-	}
-	if err := e.writeHeader(strings); err != nil {
-		return err
-	}
-	// Next we write string lengths and values in blocks.
-	e.buf = slices.GrowLen(e.buf, e.blockSize*2) // Up to 2 bytes per string.
-	for i, b := range e.blocks {
-		// e.buf = e.buf[:0]
-		lo := i * e.blockSize
-		hi := lo + e.blockSize
-		if x := len(strings); hi > x {
-			hi = x
-		}
-		bs := strings[lo:hi]
-		switch b {
-		case 8:
-			for j, s := range bs {
-				e.buf[j] = byte(len(s))
-			}
-		case 16:
-			for j, s := range bs {
-				// binary.LittleEndian.PutUint16.
-				e.buf[j*2] = byte(len(s))
-				e.buf[j*2+1] = byte(len(s) >> 8)
-			}
-		default:
-			panic("bug: unexpected block size")
-		}
-		if _, err := e.w.Write(e.buf[:len(bs)*int(b)/8]); err != nil {
-			return err
-		}
-		for _, s := range bs {
-			if len(s) > maxStringLen {
-				s = s[:maxStringLen]
-			}
-			if _, err := e.w.Write(*((*[]byte)(unsafe.Pointer(&s)))); err != nil {
-				return err
-			}
-		}
-	}
-	return nil
-}
-
-func (e *StringsEncoder) writeHeader(strings []string) (err error) {
-	e.buf = slices.GrowLen(e.buf, 12)
-	binary.LittleEndian.PutUint32(e.buf[0:4], uint32(len(strings)))
-	binary.LittleEndian.PutUint32(e.buf[4:8], uint32(e.blockSize))
-	binary.LittleEndian.PutUint32(e.buf[8:12], uint32(len(e.blocks)))
-	if _, err = e.w.Write(e.buf); err != nil {
-		return err
-	}
-	_, err = e.w.Write(e.blocks)
-	return err
-}
-
-func (e *StringsEncoder) blockEncoding(b []string) byte {
-	for _, s := range b {
-		if len(s) > 255 {
-			return 16
-		}
-	}
-	return 8
-}
-
-func (e *StringsEncoder) Reset() {
-	e.buf = e.buf[:0]
-	e.blocks = e.blocks[:0]
-	e.blockSize = 0
-	e.w = nil
-}
-
-type StringsDecoder struct {
-	r          io.Reader
-	stringsLen uint32
-	blocksLen  uint32
-	blockSize  uint32
-	blocks     []byte
-	buf        []byte
-}
-
-func NewStringsDecoder(r io.Reader) *StringsDecoder { return &StringsDecoder{r: r} }
-
-func (d *StringsDecoder) StringsLen() (int, error) {
-	if err := d.readHeader(); err != nil {
-		return 0, err
-	}
-	return int(d.stringsLen), nil
-}
-
-func (d *StringsDecoder) readHeader() (err error) {
-	d.buf = slices.GrowLen(d.buf, 12)
-	if _, err = io.ReadFull(d.r, d.buf); err != nil {
-		return err
-	}
-	d.stringsLen = binary.LittleEndian.Uint32(d.buf[0:4])
-	d.blockSize = binary.LittleEndian.Uint32(d.buf[4:8])
-	d.blocksLen = binary.LittleEndian.Uint32(d.buf[8:12])
-	// Sanity checks are needed as we process the stream data
-	// before verifying the check sum.
-	if d.blocksLen > 1<<20 || d.stringsLen > 1<<20 || d.blockSize > 1<<20 {
-		return ErrInvalidSize
-	}
-	d.blocks = slices.GrowLen(d.blocks, int(d.blocksLen))
-	_, err = io.ReadFull(d.r, d.blocks)
-	return err
-}
-
-func (d *StringsDecoder) DecodeStrings(dst []string) (err error) {
-	for i := 0; i < len(d.blocks); i++ {
-		bs := d.blockSize
-		if i == len(d.blocks)-1 && d.stringsLen%d.blockSize > 0 {
-			bs = d.stringsLen % d.blockSize
-		}
-		switch d.blocks[i] {
-		case 8:
-			err = d.readStrings8(i, int(bs), dst)
-		case 16:
-			err = d.readStrings16(i, int(bs), dst)
-		default:
-			err = fmt.Errorf("unknown block encoding")
-		}
-		if err != nil {
-			return err
-		}
-	}
-	return nil
-}
-
-func (d *StringsDecoder) readStrings8(idx, length int, dst []string) (err error) {
-	d.buf = slices.GrowLen(d.buf, length) // 1 byte per string.
-	if _, err = io.ReadFull(d.r, d.buf); err != nil {
-		return err
-	}
-	offset := int(d.blockSize) * idx
-	for i, l := range d.buf {
-		s := make([]byte, l) // Up to 256 bytes.
-		if _, err = io.ReadFull(d.r, s); err != nil {
-			return err
-		}
-		dst[offset+i] = *(*string)(unsafe.Pointer(&s))
-	}
-	return err
-}
-
-func (d *StringsDecoder) readStrings16(idx, length int, dst []string) (err error) {
-	d.buf = slices.GrowLen(d.buf, length*2) // 2 bytes per string.
-	if _, err = io.ReadFull(d.r, d.buf); err != nil {
-		return err
-	}
-	offset := int(d.blockSize) * idx
-	for i := 0; i < len(d.buf); i += 2 {
-		l := uint16(d.buf[i]) | uint16(d.buf[i+1])<<8
-		s := make([]byte, l) // Up to 65536 bytes.
-		if _, err = io.ReadFull(d.r, s); err != nil {
-			return err
-		}
-		dst[offset+i/2] = *(*string)(unsafe.Pointer(&s))
-	}
-	return err
-}
-
-func (d *StringsDecoder) Reset() {
-	d.buf = d.buf[:0]
-	d.blocks = d.blocks[:0]
-	d.blockSize = 0
-	d.blocksLen = 0
-	d.stringsLen = 0
-	d.r = nil
-}
diff --git a/pkg/phlaredb/symdb/format.go b/pkg/phlaredb/symdb/format.go
index 781ed86832..5ed441ed34 100644
--- a/pkg/phlaredb/symdb/format.go
+++ b/pkg/phlaredb/symdb/format.go
@@ -7,6 +7,9 @@ import (
 	"hash/crc32"
 	"io"
 	"unsafe"
+
+	"github.com/grafana/pyroscope/pkg/slices"
+	"github.com/grafana/pyroscope/pkg/util/math"
 )
 
 // The database is a collection of files. The only file that is guaranteed
@@ -484,3 +487,157 @@ func (f *IndexFile) WriteTo(dst io.Writer) (n int64, err error) {
 
 	return w.offset, nil
 }
+
+// symbolic information such as locations, functions, mappings,
+// and strings is represented as Array of Structures in memory,
+// and is encoded as Structure of Arrays when written on disk.
+//
+// The common structure of the encoded symbolic data is as follows:
+//
+// [Header]
+// [Data encoded in blocks]
+// [CRC32]
+//
+// Where the block format depends on the contents.
+//
+// Note that the data is decoded in a stream fashion, therefore
+// any error in the data will be detected only after all the blocks
+// are read in and decoded.
+type symbolsBlockHeader struct {
+	Magic   [4]byte
+	Version uint32
+	// Length denotes the total number of items encoded.
+	Length uint32
+	// BlockSize denotes the number of items per block.
+	BlockSize uint32
+}
+
+const (
+	defaultSymbolsBlockSize = 1 << 10
+	symbolsBlockHeaderSize  = int(unsafe.Sizeof(mappingsBlockHeader{}))
+)
+
+func newSymbolsBlockHeader(n, bs int) symbolsBlockHeader {
+	return symbolsBlockHeader{
+		Magic:     symdbMagic,
+		Version:   1,
+		Length:    uint32(n),
+		BlockSize: uint32(bs),
+	}
+}
+
+func (h *symbolsBlockHeader) marshal(b []byte) {
+	b[0], b[1], b[2], b[3] = h.Magic[0], h.Magic[1], h.Magic[2], h.Magic[3]
+	binary.BigEndian.PutUint32(b[4:8], h.Version)
+	binary.BigEndian.PutUint32(b[8:12], h.Length)
+	binary.BigEndian.PutUint32(b[12:16], h.BlockSize)
+}
+
+func (h *symbolsBlockHeader) unmarshal(b []byte) {
+	h.Magic[0], h.Magic[1], h.Magic[2], h.Magic[3] = b[0], b[1], b[2], b[3]
+	h.Version = binary.BigEndian.Uint32(b[4:8])
+	h.Length = binary.BigEndian.Uint32(b[8:12])
+	h.BlockSize = binary.BigEndian.Uint32(b[12:16])
+}
+
+func (h *symbolsBlockHeader) validate() error {
+	if h.Magic[0] != symdbMagic[0] ||
+		h.Magic[1] != symdbMagic[1] ||
+		h.Magic[2] != symdbMagic[2] ||
+		h.Magic[3] != symdbMagic[3] {
+		return ErrInvalidMagic
+	}
+	if h.Version >= 2 {
+		return ErrUnknownVersion
+	}
+	if h.Length >= 1<<20 && h.BlockSize >= 1<<20 {
+		return ErrInvalidSize
+	}
+	return nil
+}
+
+func writeSymbolsBlockHeader(w io.Writer, buf []byte, h symbolsBlockHeader) ([]byte, error) {
+	if err := h.validate(); err != nil {
+		return buf, err
+	}
+	buf = slices.GrowLen(buf, symbolsBlockHeaderSize)
+	h.marshal(buf)
+	_, err := w.Write(buf)
+	return buf, err
+}
+
+func readSymbolsBlockHeader(r io.Reader, buf []byte, h *symbolsBlockHeader) ([]byte, error) {
+	buf = slices.GrowLen(buf, symbolsBlockHeaderSize)
+	if _, err := io.ReadFull(r, buf); err != nil {
+		return buf, err
+	}
+	h.unmarshal(buf)
+	return buf, h.validate()
+}
+
+type symbolsBlockEncoder[T any] interface {
+	encode(w io.Writer, block []T) error
+}
+
+type symbolsEncoder[T any] struct {
+	w   io.Writer
+	e   symbolsBlockEncoder[T]
+	bs  int
+	buf []byte
+}
+
+func newSymbolsEncoder[T any](w io.Writer, e symbolsBlockEncoder[T]) *symbolsEncoder[T] {
+	return &symbolsEncoder[T]{w: w, e: e, bs: defaultSymbolsBlockSize}
+}
+
+func (e *symbolsEncoder[T]) Encode(items []T) (err error) {
+	h := newSymbolsBlockHeader(len(items), e.bs)
+	if e.buf, err = writeSymbolsBlockHeader(e.w, e.buf, h); err != nil {
+		return err
+	}
+	for i := uint32(0); i < h.Length; i += h.BlockSize {
+		block := items[i:math.Min(i+h.BlockSize, h.Length)]
+		if err = e.e.encode(e.w, block); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func (e *symbolsEncoder[T]) Reset(w io.Writer) { e.w = w }
+
+type symbolsBlockDecoder[T any] interface {
+	decode(r io.Reader, block []T) error
+}
+
+type symbolsDecoder[T any] struct {
+	r io.Reader
+	h symbolsBlockHeader
+	d symbolsBlockDecoder[T]
+
+	buf []byte
+}
+
+func newSymbolsDecoder[T any](r io.Reader, d symbolsBlockDecoder[T]) *symbolsDecoder[T] {
+	return &symbolsDecoder[T]{r: r, d: d}
+}
+
+func (d *symbolsDecoder[T]) Open() (n int, err error) {
+	d.buf, err = readSymbolsBlockHeader(d.r, d.buf, &d.h)
+	return int(d.h.Length), err
+}
+
+func (d *symbolsDecoder[T]) Decode(items []T) error {
+	blocks := int((d.h.Length + d.h.BlockSize - 1) / d.h.BlockSize)
+	for i := 0; i < blocks; i++ {
+		lo := i * int(d.h.BlockSize)
+		hi := math.Min(lo+int(d.h.BlockSize), int(d.h.Length))
+		block := items[lo:hi]
+		if err := d.d.decode(d.r, block); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func (d *symbolsDecoder[T]) Reset(r io.Reader) { d.r = r }
diff --git a/pkg/phlaredb/symdb/encoding_functions.go b/pkg/phlaredb/symdb/functions.go
similarity index 64%
rename from pkg/phlaredb/symdb/encoding_functions.go
rename to pkg/phlaredb/symdb/functions.go
index 78fa383941..d4f5833f1e 100644
--- a/pkg/phlaredb/symdb/encoding_functions.go
+++ b/pkg/phlaredb/symdb/functions.go
@@ -11,115 +11,15 @@ import (
 
 	v1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1"
 	"github.com/grafana/pyroscope/pkg/slices"
-	"github.com/grafana/pyroscope/pkg/util/math"
 )
 
-type FunctionsEncoder struct {
-	w io.Writer
-	e functionsBlockEncoder
-
-	blockSize int
-	functions int
-
-	buf []byte
-}
+const functionsBlockHeaderSize = int(unsafe.Sizeof(functionsBlockHeader{}))
 
-const (
-	defaultFunctionsBlockSize = 1 << 10
+var (
+	_ symbolsBlockEncoder[v1.InMemoryFunction] = (*functionsBlockEncoder)(nil)
+	_ symbolsBlockDecoder[v1.InMemoryFunction] = (*functionsBlockDecoder)(nil)
 )
 
-func NewFunctionsEncoder(w io.Writer) *FunctionsEncoder {
-	return &FunctionsEncoder{w: w}
-}
-
-func (e *FunctionsEncoder) EncodeFunctions(functions []v1.InMemoryFunction) error {
-	if e.blockSize == 0 {
-		e.blockSize = defaultFunctionsBlockSize
-	}
-	e.functions = len(functions)
-	if err := e.writeHeader(); err != nil {
-		return err
-	}
-	for i := 0; i < len(functions); i += e.blockSize {
-		block := functions[i:math.Min(i+e.blockSize, len(functions))]
-		if _, err := e.e.encode(e.w, block); err != nil {
-			return err
-		}
-	}
-	return nil
-}
-
-func (e *FunctionsEncoder) writeHeader() (err error) {
-	e.buf = slices.GrowLen(e.buf, 8)
-	binary.LittleEndian.PutUint32(e.buf[0:4], uint32(e.functions))
-	binary.LittleEndian.PutUint32(e.buf[4:8], uint32(e.blockSize))
-	_, err = e.w.Write(e.buf)
-	return err
-}
-
-func (e *FunctionsEncoder) Reset(w io.Writer) {
-	e.functions = 0
-	e.blockSize = 0
-	e.buf = e.buf[:0]
-	e.w = w
-}
-
-type FunctionsDecoder struct {
-	r io.Reader
-	d functionsBlockDecoder
-
-	blockSize uint32
-	functions uint32
-
-	buf []byte
-}
-
-func NewFunctionsDecoder(r io.Reader) *FunctionsDecoder { return &FunctionsDecoder{r: r} }
-
-func (d *FunctionsDecoder) FunctionsLen() (int, error) {
-	if err := d.readHeader(); err != nil {
-		return 0, err
-	}
-	return int(d.functions), nil
-}
-
-func (d *FunctionsDecoder) readHeader() (err error) {
-	d.buf = slices.GrowLen(d.buf, 8)
-	if _, err = io.ReadFull(d.r, d.buf); err != nil {
-		return err
-	}
-	d.functions = binary.LittleEndian.Uint32(d.buf[0:4])
-	d.blockSize = binary.LittleEndian.Uint32(d.buf[4:8])
-	// Sanity checks are needed as we process the stream data
-	// before verifying the check sum.
-	if d.functions > 1<<20 || d.blockSize > 1<<20 {
-		return ErrInvalidSize
-	}
-	return nil
-}
-
-func (d *FunctionsDecoder) DecodeFunctions(functions []v1.InMemoryFunction) error {
-	blocks := int((d.functions + d.blockSize - 1) / d.blockSize)
-	for i := 0; i < blocks; i++ {
-		lo := i * int(d.blockSize)
-		hi := math.Min(lo+int(d.blockSize), int(d.functions))
-		block := functions[lo:hi]
-		if err := d.d.decode(d.r, block); err != nil {
-			return err
-		}
-	}
-	return nil
-}
-
-func (d *FunctionsDecoder) Reset(r io.Reader) {
-	d.functions = 0
-	d.blockSize = 0
-	d.buf = d.buf[:0]
-	d.r = r
-}
-
-const functionsBlockHeaderSize = int(unsafe.Sizeof(functionsBlockHeader{}))
-
 type functionsBlockHeader struct {
 	FunctionsLen   uint32
 	NameSize       uint32
@@ -159,7 +59,7 @@ type functionsBlockEncoder struct {
 	ints []int32
 }
 
-func (e *functionsBlockEncoder) encode(w io.Writer, functions []v1.InMemoryFunction) (int64, error) {
+func (e *functionsBlockEncoder) encode(w io.Writer, functions []v1.InMemoryFunction) error {
 	e.initWrite(len(functions))
 	var enc delta.BinaryPackedEncoding
 
@@ -193,12 +93,11 @@ func (e *functionsBlockEncoder) encode(w io.Writer, functions []v1.InMemoryFunct
 
 	e.tmp = slices.GrowLen(e.tmp, functionsBlockHeaderSize)
 	e.header.marshal(e.tmp)
-	n, err := w.Write(e.tmp)
-	if err != nil {
-		return int64(n), err
+	if _, err := w.Write(e.tmp); err != nil {
+		return err
 	}
-	m, err := e.buf.WriteTo(w)
-	return m + int64(n), err
+	_, err := e.buf.WriteTo(w)
+	return err
 }
 
 func (e *functionsBlockEncoder) initWrite(functions int) {
diff --git a/pkg/phlaredb/symdb/encoding_locations.go b/pkg/phlaredb/symdb/locations.go
similarity index 72%
rename from pkg/phlaredb/symdb/encoding_locations.go
rename to pkg/phlaredb/symdb/locations.go
index 193d3233c4..6d5d1ab8f1 100644
--- a/pkg/phlaredb/symdb/encoding_locations.go
+++ b/pkg/phlaredb/symdb/locations.go
@@ -11,135 +11,17 @@ import (
 
 	v1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1"
 	"github.com/grafana/pyroscope/pkg/slices"
-	"github.com/grafana/pyroscope/pkg/util/math"
 )
 
-// https://parquet.apache.org/docs/file-format/data-pages/encodings/#delta-encoding-delta_binary_packed--5
-
-type LocationsEncoder struct {
-	w io.Writer
-	e locationsBlockEncoder
-
-	blockSize int
-	locations int
-
-	buf []byte
-}
-
 const (
-	maxLocationLines          = 255
-	defaultLocationsBlockSize = 1 << 10
+	maxLocationLines         = 255
+	locationsBlockHeaderSize = int(unsafe.Sizeof(locationsBlockHeader{}))
 )
 
-func NewLocationsEncoder(w io.Writer) *LocationsEncoder {
-	return &LocationsEncoder{w: w}
-}
-
-func (e *LocationsEncoder) EncodeLocations(locations []v1.InMemoryLocation) error {
-	if e.blockSize == 0 {
-		e.blockSize = defaultLocationsBlockSize
-	}
-	e.locations = len(locations)
-	if err := e.writeHeader(); err != nil {
-		return err
-	}
-	for i := 0; i < len(locations); i += e.blockSize {
-		block := locations[i:math.Min(i+e.blockSize, len(locations))]
-		if _, err := e.e.encode(e.w, block); err != nil {
-			return err
-		}
-	}
-	return nil
-}
-
-func (e *LocationsEncoder) writeHeader() (err error) {
-	e.buf = slices.GrowLen(e.buf, 8)
-	binary.LittleEndian.PutUint32(e.buf[0:4], uint32(e.locations))
-	binary.LittleEndian.PutUint32(e.buf[4:8], uint32(e.blockSize))
-	_, err = e.w.Write(e.buf)
-	return err
-}
-
-func (e *LocationsEncoder) Reset(w io.Writer) {
-	e.locations = 0
-	e.blockSize = 0
-	e.buf = e.buf[:0]
-	e.w = w
-}
-
-type LocationsDecoder struct {
-	r io.Reader
-	d locationsBlockDecoder
-
-	blockSize uint32
-	locations uint32
-
-	buf []byte
-}
-
-func NewLocationsDecoder(r io.Reader) *LocationsDecoder { return &LocationsDecoder{r: r} }
-
-func (d *LocationsDecoder) LocationsLen() (int, error) {
-	if err := d.readHeader(); err != nil {
-		return 0, err
-	}
-	return int(d.locations), nil
-}
-
-func (d *LocationsDecoder) readHeader() (err error) {
-	d.buf = slices.GrowLen(d.buf, 8)
-	if _, err = io.ReadFull(d.r, d.buf); err != nil {
-		return err
-	}
-	d.locations = binary.LittleEndian.Uint32(d.buf[0:4])
-	d.blockSize = binary.LittleEndian.Uint32(d.buf[4:8])
-	// Sanity checks are needed as we process the stream data
-	// before verifying the check sum.
-	if d.locations > 1<<20 || d.blockSize > 1<<20 {
-		return ErrInvalidSize
-	}
-	return nil
-}
-
-func (d *LocationsDecoder) DecodeLocations(locations []v1.InMemoryLocation) error {
-	blocks := int((d.locations + d.blockSize - 1) / d.blockSize)
-	for i := 0; i < blocks; i++ {
-		lo := i * int(d.blockSize)
-		hi := math.Min(lo+int(d.blockSize), int(d.locations))
-		block := locations[lo:hi]
-		if err := d.d.decode(d.r, block); err != nil {
-			return err
-		}
-	}
-	return nil
-}
-
-func (d *LocationsDecoder) Reset(r io.Reader) {
-	d.locations = 0
-	d.blockSize = 0
-	d.buf = d.buf[:0]
-	d.r = r
-}
-
-type locationsBlockEncoder struct {
-	header locationsBlockHeader
-
-	mapping []int32
-	// Assuming there is no locations with more than 255 lines.
-	// We could even use a nibble (4 bits), but there are locations
-	// with 10 and more functions, therefore there is a change that
-	// capacity of 2^4 is not enough in all cases.
-	lineCount []byte
-	lines     []int32
-	// Optional.
-	addr   []int64
-	folded []bool
-
-	tmp []byte
-	buf bytes.Buffer
-}
-
-const locationsBlockHeaderSize = int(unsafe.Sizeof(locationsBlockHeader{}))
+var (
+	_ symbolsBlockEncoder[v1.InMemoryLocation] = (*locationsBlockEncoder)(nil)
+	_ symbolsBlockDecoder[v1.InMemoryLocation] = (*locationsBlockDecoder)(nil)
+)
 
 type locationsBlockHeader struct {
 	LocationsLen uint32 // Number of locations
@@ -151,9 +33,6 @@ type locationsBlockHeader struct {
 	IsFoldedSize uint32 // Size of the encoded slice of is_folded
 }
 
-// isValid reports whether the header contains sane values.
-// This is important as the block might be read before the
-// checksum validation.
 func (h *locationsBlockHeader) isValid() bool {
 	return h.LocationsLen > 0 && h.LocationsLen < 1<<20 &&
 		h.MappingSize > 0 && h.MappingSize < 1<<20 &&
@@ -181,7 +60,25 @@ func (h *locationsBlockHeader) unmarshal(b []byte) {
 	h.IsFoldedSize = binary.LittleEndian.Uint32(b[20:24])
 }
 
-func (e *locationsBlockEncoder) encode(w io.Writer, locations []v1.InMemoryLocation) (int64, error) {
+type locationsBlockEncoder struct {
+	header locationsBlockHeader
+
+	mapping []int32
+	// Assuming there is no locations with more than 255 lines.
+	// We could even use a nibble (4 bits), but there are locations
+	// with 10 and more functions, therefore there is a change that
+	// capacity of 2^4 is not enough in all cases.
+	lineCount []byte
+	lines     []int32
+	// Optional.
+	addr   []int64
+	folded []bool
+
+	tmp []byte
+	buf bytes.Buffer
+}
+
+func (e *locationsBlockEncoder) encode(w io.Writer, locations []v1.InMemoryLocation) error {
 	e.initWrite(len(locations))
 	var addr int64
 	var folded bool
@@ -229,12 +126,11 @@ func (e *locationsBlockEncoder) encode(w io.Writer, locations []v1.InMemoryLocat
 
 	e.tmp = slices.GrowLen(e.tmp, locationsBlockHeaderSize)
 	e.header.marshal(e.tmp)
-	n, err := w.Write(e.tmp)
-	if err != nil {
-		return int64(n), err
+	if _, err := w.Write(e.tmp); err != nil {
+		return err
 	}
-	m, err := e.buf.WriteTo(w)
-	return m + int64(n), err
+	_, err := e.buf.WriteTo(w)
+	return err
 }
 
 func (e *locationsBlockEncoder) initWrite(locations int) {
diff --git a/pkg/phlaredb/symdb/encoding_mappings.go b/pkg/phlaredb/symdb/mappings.go
similarity index 73%
rename from pkg/phlaredb/symdb/encoding_mappings.go
rename to pkg/phlaredb/symdb/mappings.go
index c1a5794bc9..d4ce07c531 100644
--- a/pkg/phlaredb/symdb/encoding_mappings.go
+++ b/pkg/phlaredb/symdb/mappings.go
@@ -11,115 +11,15 @@ import (
 
 	v1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1"
 	"github.com/grafana/pyroscope/pkg/slices"
-	"github.com/grafana/pyroscope/pkg/util/math"
 )
 
-type MappingsEncoder struct {
-	w io.Writer
-	e mappingsBlockEncoder
-
-	blockSize int
-	mappings  int
-
-	buf []byte
-}
+const mappingsBlockHeaderSize = int(unsafe.Sizeof(mappingsBlockHeader{}))
 
-const (
-	defaultMappingsBlockSize = 1 << 10
+var (
+	_ symbolsBlockEncoder[v1.InMemoryMapping] = (*mappingsBlockEncoder)(nil)
+	_ symbolsBlockDecoder[v1.InMemoryMapping] = (*mappingsBlockDecoder)(nil)
 )
 
-func NewMappingsEncoder(w io.Writer) *MappingsEncoder {
-	return &MappingsEncoder{w: w}
-}
-
-func (e *MappingsEncoder) EncodeMappings(mappings []v1.InMemoryMapping) error {
-	if e.blockSize == 0 {
-		e.blockSize = defaultMappingsBlockSize
-	}
-	e.mappings = len(mappings)
-	if err := e.writeHeader(); err != nil {
-		return err
-	}
-	for i := 0; i < len(mappings); i += e.blockSize {
-		block := mappings[i:math.Min(i+e.blockSize, len(mappings))]
-		if _, err := e.e.encode(e.w, block); err != nil {
-			return err
-		}
-	}
-	return nil
-}
-
-func (e *MappingsEncoder) writeHeader() (err error) {
-	e.buf = slices.GrowLen(e.buf, 8)
-	binary.LittleEndian.PutUint32(e.buf[0:4], uint32(e.mappings))
-	binary.LittleEndian.PutUint32(e.buf[4:8], uint32(e.blockSize))
-	_, err = e.w.Write(e.buf)
-	return err
-}
-
-func (e *MappingsEncoder) Reset(w io.Writer) {
-	e.mappings = 0
-	e.blockSize = 0
-	e.buf = e.buf[:0]
-	e.w = w
-}
-
-type MappingsDecoder struct {
-	r io.Reader
-	d mappingsBlockDecoder
-
-	blockSize uint32
-	mappings  uint32
-
-	buf []byte
-}
-
-func NewMappingsDecoder(r io.Reader) *MappingsDecoder { return &MappingsDecoder{r: r} }
-
-func (d *MappingsDecoder) MappingsLen() (int, error) {
-	if err := d.readHeader(); err != nil {
-		return 0, err
-	}
-	return int(d.mappings), nil
-}
-
-func (d *MappingsDecoder) readHeader() (err error) {
-	d.buf = slices.GrowLen(d.buf, 8)
-	if _, err = io.ReadFull(d.r, d.buf); err != nil {
-		return err
-	}
-	d.mappings = binary.LittleEndian.Uint32(d.buf[0:4])
-	d.blockSize = binary.LittleEndian.Uint32(d.buf[4:8])
-	// Sanity checks are needed as we process the stream data
-	// before verifying the check sum.
-	if d.mappings > 1<<20 || d.blockSize > 1<<20 {
-		return ErrInvalidSize
-	}
-	return nil
-}
-
-func (d *MappingsDecoder) DecodeMappings(mappings []v1.InMemoryMapping) error {
-	blocks := int((d.mappings + d.blockSize - 1) / d.blockSize)
-	for i := 0; i < blocks; i++ {
-		lo := i * int(d.blockSize)
-		hi := math.Min(lo+int(d.blockSize), int(d.mappings))
-		block := mappings[lo:hi]
-		if err := d.d.decode(d.r, block); err != nil {
-			return err
-		}
-	}
-	return nil
-}
-
-func (d *MappingsDecoder) Reset(r io.Reader) {
-	d.mappings = 0
-	d.blockSize = 0
-	d.buf = d.buf[:0]
-	d.r = r
-}
-
-const mappingsBlockHeaderSize = int(unsafe.Sizeof(mappingsBlockHeader{}))
-
 type mappingsBlockHeader struct {
 	MappingsLen  uint32
 	FileNameSize uint32
@@ -167,7 +67,7 @@ type mappingsBlockEncoder struct {
 	ints64 []int64
 }
 
-func (e *mappingsBlockEncoder) encode(w io.Writer, mappings []v1.InMemoryMapping) (int64, error) {
+func (e *mappingsBlockEncoder) encode(w io.Writer, mappings []v1.InMemoryMapping) error {
 	e.initWrite(len(mappings))
 	var enc delta.BinaryPackedEncoding
 
@@ -240,12 +140,11 @@ func (e *mappingsBlockEncoder) encode(w io.Writer, mappings []v1.InMemoryMapping
 
 	e.tmp = slices.GrowLen(e.tmp, mappingsBlockHeaderSize)
 	e.header.marshal(e.tmp)
-	n, err := w.Write(e.tmp)
-	if err != nil {
-		return int64(n), err
+	if _, err := w.Write(e.tmp); err != nil {
+		return err
 	}
-	m, err := e.buf.WriteTo(w)
-	return m + int64(n), err
+	_, err := e.buf.WriteTo(w)
+	return err
 }
 
 func (e *mappingsBlockEncoder) initWrite(mappings int) {
diff --git a/pkg/phlaredb/symdb/strings.go b/pkg/phlaredb/symdb/strings.go
new file mode 100644
index 0000000000..a66fc2ae83
--- /dev/null
+++ b/pkg/phlaredb/symdb/strings.go
@@ -0,0 +1,167 @@
+package symdb
+
+import (
+	"bytes"
+	"encoding/binary"
+	"fmt"
+	"io"
+	"unsafe"
+
+	"github.com/grafana/pyroscope/pkg/slices"
+)
+
+const (
+	maxStringLen           = 1<<16 - 1
+	stringsBlockHeaderSize = int(unsafe.Sizeof(stringsBlockHeader{}))
+)
+
+var (
+	_ symbolsBlockEncoder[string] = (*stringsBlockEncoder)(nil)
+	_ symbolsBlockDecoder[string] = (*stringsBlockDecoder)(nil)
+)
+
+type stringsBlockHeader struct {
+	StringsLen    uint32
+	BlockEncoding byte
+	_             [3]byte
+}
+
+func (h *stringsBlockHeader) marshal(b []byte) {
+	binary.LittleEndian.PutUint32(b[0:4], h.StringsLen)
+	b[5] = h.BlockEncoding
+}
+
+func (h *stringsBlockHeader) unmarshal(b []byte) {
+	h.StringsLen = binary.LittleEndian.Uint32(b[0:4])
+	h.BlockEncoding = b[5]
+}
+
+func (h *stringsBlockHeader) isValid() bool {
+	return h.StringsLen < 1<<20 && h.BlockEncoding == 8 || h.BlockEncoding == 16
+}
+
+type stringsBlockEncoder struct {
+	header stringsBlockHeader
+	buf    bytes.Buffer
+	tmp    []byte
+}
+
+func (e *stringsBlockEncoder) encode(w io.Writer, strings []string) error {
+	e.initWrite(len(strings))
+	e.header.BlockEncoding = e.blockEncoding(strings)
+	switch e.header.BlockEncoding {
+	case 8:
+		for j, s := range strings {
+			e.tmp[j] = byte(len(s))
+		}
+	case 16:
+		for j, s := range strings {
+			// binary.LittleEndian.PutUint16. // TODO: BigEndian
+			e.tmp[j*2] = byte(len(s))
+			e.tmp[j*2+1] = byte(len(s) >> 8)
+		}
+	}
+	if _, err := e.buf.Write(e.tmp[:len(strings)*int(e.header.BlockEncoding)/8]); err != nil {
+		return err
+	}
+	for _, s := range strings {
+		if len(s) > maxStringLen {
+			s = s[:maxStringLen]
+		}
+		if _, err := e.buf.Write(*((*[]byte)(unsafe.Pointer(&s)))); err != nil {
+			return err
+		}
+	}
+	e.tmp = slices.GrowLen(e.tmp, stringsBlockHeaderSize)
+	e.header.marshal(e.tmp)
+	if _, err := w.Write(e.tmp); err != nil {
+		return err
+	}
+	_, err := e.buf.WriteTo(w)
+	return err
+}
+
+func (e *stringsBlockEncoder) blockEncoding(b []string) byte {
+	for _, s := range b {
+		if len(s) > 255 {
+			return 16
+		}
+	}
+	return 8
+}
+
+func (e *stringsBlockEncoder) initWrite(strings int) {
+	e.buf.Reset()
+	e.buf.Grow(strings * 16)
+	*e = stringsBlockEncoder{
+		header: stringsBlockHeader{StringsLen: uint32(strings)},
+		tmp:    slices.GrowLen(e.tmp, strings*2),
+		buf:    e.buf,
+	}
+}
+
+type stringsBlockDecoder struct {
+	header stringsBlockHeader
+	tmp    []byte
+}
+
+func (d *stringsBlockDecoder) readHeader(r io.Reader) error {
+	d.tmp = slices.GrowLen(d.tmp, stringsBlockHeaderSize)
+	if _, err := io.ReadFull(r, d.tmp); err != nil {
+		return nil
+	}
+	d.header.unmarshal(d.tmp)
+	if !d.header.isValid() {
+		return ErrInvalidSize
+	}
+	return nil
+}
+
+func (d *stringsBlockDecoder) decode(r io.Reader, strings []string) (err error) {
+	if err = d.readHeader(r); err != nil {
+		return err
+	}
+	if d.header.StringsLen < uint32(len(strings)) {
+		return fmt.Errorf("strings buffer is too short")
+	}
+	switch d.header.BlockEncoding {
+	case 8:
+		return d.decodeStrings8(r, strings)
+	case 16:
+		return d.decodeStrings16(r, strings)
+	default:
+		// Header validation ensures this never happens.
+	}
+	return nil
+}
+
+func (d *stringsBlockDecoder) decodeStrings8(r io.Reader, dst []string) (err error) {
+	d.tmp = slices.GrowLen(d.tmp, int(d.header.StringsLen)) // 1 byte per string.
+	if _, err = io.ReadFull(r, d.tmp); err != nil {
+		return err
+	}
+	for i := uint32(0); i < d.header.StringsLen; i++ {
+		s := make([]byte, d.tmp[i])
+		if _, err = io.ReadFull(r, s); err != nil {
+			return err
+		}
+		dst[i] = *(*string)(unsafe.Pointer(&s))
+	}
+	return err
+}
+
+func (d *stringsBlockDecoder) decodeStrings16(r io.Reader, dst []string) (err error) {
+	d.tmp = slices.GrowLen(d.tmp, int(d.header.StringsLen)*2) // 2 bytes per string.
+	if _, err = io.ReadFull(r, d.tmp); err != nil {
+		return err
+	}
+	for i := uint32(0); i < d.header.StringsLen; i++ {
+		l := uint16(d.tmp[i*2]) | uint16(d.tmp[i*2+1])<<8
+		s := make([]byte, l)
+		if _, err = io.ReadFull(r, s); err != nil {
+			return err
+		}
+		dst[i] = *(*string)(unsafe.Pointer(&s))
+	}
+	return err
+}
diff --git a/pkg/phlaredb/symdb/encoding_strings_test.go b/pkg/phlaredb/symdb/strings_test.go
similarity index 83%
rename from pkg/phlaredb/symdb/encoding_strings_test.go
rename to pkg/phlaredb/symdb/strings_test.go
index 1fa6191eb1..5b391957c8 100644
--- a/pkg/phlaredb/symdb/encoding_strings_test.go
+++ b/pkg/phlaredb/symdb/strings_test.go
@@ -1,7 +1,6 @@
 package symdb
 
 import (
-	"bufio"
 	"bytes"
 	"strings"
 	"testing"
@@ -79,16 +78,16 @@ func Test_StringsEncoding(t *testing.T) {
 		tc := tc
 		t.Run(tc.description, func(t *testing.T) {
 			var output bytes.Buffer
-			e := NewStringsEncoder(&output)
+			e := newSymbolsEncoder[string](&output, new(stringsBlockEncoder))
 			if tc.blockSize > 0 {
-				e.blockSize = tc.blockSize
+				e.bs = tc.blockSize
 			}
-			require.NoError(t, e.EncodeStrings(tc.strings))
-			d := NewStringsDecoder(bufio.NewReader(&output))
-			n, err := d.StringsLen()
+			require.NoError(t, e.Encode(tc.strings))
+			d := newSymbolsDecoder[string](&output, new(stringsBlockDecoder))
+			n, err := d.Open()
 			require.NoError(t, err)
 			out := make([]string, n)
-			require.NoError(t, d.DecodeStrings(out))
+			require.NoError(t, d.Decode(out))
 			require.Equal(t, tc.strings, out)
 		})
 	}

From 918f4eec144f8f86822288462a954f1257bb92f6 Mon Sep 17 00:00:00 2001
From: Anton Kolesnikov <anton.e.kolesnikov@gmail.com>
Date: Tue, 2 Apr 2024 15:18:30 +0800
Subject: [PATCH 09/36] adhere to the value semantics

---
 pkg/ingester/pyroscope/ingest_handler_test.go |  2 +-
 pkg/phlaredb/block_querier_symbols.go         |  8 +--
 pkg/phlaredb/schemas/v1/functions.go          | 17 +++---
 pkg/phlaredb/schemas/v1/locations.go          | 18 +++---
 pkg/phlaredb/schemas/v1/mappings.go           | 17 +++---
 pkg/phlaredb/schemas/v1/models.go             |  8 +--
 pkg/phlaredb/schemas/v1/schema_test.go        | 34 +++++------
 pkg/phlaredb/schemas/v1/strings.go            | 10 ++--
 pkg/phlaredb/symdb/block_reader.go            | 16 +++---
 pkg/phlaredb/symdb/block_writer.go            |  8 +--
 pkg/phlaredb/symdb/dedup_slice.go             | 56 +++++++++----------
 pkg/phlaredb/symdb/partition_memory.go        | 54 ++++++++++++++++--
 pkg/phlaredb/symdb/rewriter.go                | 40 ++++++-------
 pkg/phlaredb/symdb/symdb.go                   |  6 +-
 pkg/slices/slices.go                          |  2 +-
 15 files changed, 167 insertions(+), 129 deletions(-)

diff --git a/pkg/ingester/pyroscope/ingest_handler_test.go b/pkg/ingester/pyroscope/ingest_handler_test.go
index c16f5f6da9..bc3a1ed14e 100644
--- a/pkg/ingester/pyroscope/ingest_handler_test.go
+++ b/pkg/ingester/pyroscope/ingest_handler_test.go
@@ -7,6 +7,7 @@ import (
 	"mime/multipart"
 	"net/http/httptest"
 	"os"
+	"slices"
 	"sort"
 	"testing"
 
@@ -15,7 +16,6 @@ import (
 	"github.com/prometheus/prometheus/model/labels"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
-	"golang.org/x/exp/slices"
 
 	profilev1 "github.com/grafana/pyroscope/api/gen/proto/go/google/v1"
 	pushv1 "github.com/grafana/pyroscope/api/gen/proto/go/push/v1"
diff --git a/pkg/phlaredb/block_querier_symbols.go b/pkg/phlaredb/block_querier_symbols.go
index 5b88ef4964..9124ba8927 100644
--- a/pkg/phlaredb/block_querier_symbols.go
+++ b/pkg/phlaredb/block_querier_symbols.go
@@ -204,10 +204,10 @@ func (p *symbolsPartition) Release() {
 }
 
 type inMemoryParquetTables struct {
-	strings   inMemoryparquetReader[string, *schemav1.StringPersister]
-	functions inMemoryparquetReader[*schemav1.InMemoryFunction, *schemav1.FunctionPersister]
-	locations inMemoryparquetReader[*schemav1.InMemoryLocation, *schemav1.LocationPersister]
-	mappings  inMemoryparquetReader[*schemav1.InMemoryMapping, *schemav1.MappingPersister]
+	strings   inMemoryparquetReader[string, schemav1.StringPersister]
+	functions inMemoryparquetReader[schemav1.InMemoryFunction, schemav1.FunctionPersister]
+	locations inMemoryparquetReader[schemav1.InMemoryLocation, schemav1.LocationPersister]
+	mappings  inMemoryparquetReader[schemav1.InMemoryMapping, schemav1.MappingPersister]
 }
 
 func openInMemoryParquetTables(ctx context.Context, r phlareobj.BucketReader, meta *block.Meta) (*inMemoryParquetTables, error) {
diff --git a/pkg/phlaredb/schemas/v1/functions.go b/pkg/phlaredb/schemas/v1/functions.go
index 62d723fc7d..bbeaf8e637 100644
--- a/pkg/phlaredb/schemas/v1/functions.go
+++ b/pkg/phlaredb/schemas/v1/functions.go
@@ -10,13 +10,13 @@ var functionsSchema = parquet.SchemaOf(new(profilev1.Function))
 
 type FunctionPersister struct{}
 
-func (*FunctionPersister) Name() string { return "functions" }
+func (FunctionPersister) Name() string { return "functions" }
 
-func (*FunctionPersister) Schema() *parquet.Schema { return functionsSchema }
+func (FunctionPersister) Schema() *parquet.Schema { return functionsSchema }
 
-func (*FunctionPersister) SortingColumns() parquet.SortingOption { return parquet.SortingColumns() }
+func (FunctionPersister) SortingColumns() parquet.SortingOption { return parquet.SortingColumns() }
 
-func (*FunctionPersister) Deconstruct(row parquet.Row, _ uint64, fn *InMemoryFunction) parquet.Row {
+func (FunctionPersister) Deconstruct(row parquet.Row, _ uint64, fn InMemoryFunction) parquet.Row {
 	if cap(row) < 5 {
 		row = make(parquet.Row, 0, 5)
 	}
@@ -29,7 +29,7 @@ func (*FunctionPersister) Deconstruct(row parquet.Row, _ uint64, fn *InMemoryFun
 	return row
 }
 
-func (*FunctionPersister) Reconstruct(row parquet.Row) (uint64, *InMemoryFunction, error) {
+func (FunctionPersister) Reconstruct(row parquet.Row) (uint64, InMemoryFunction, error) {
 	loc := InMemoryFunction{
 		Id:         row[0].Uint64(),
 		Name:       row[1].Uint32(),
@@ -37,7 +37,7 @@ func (*FunctionPersister) Reconstruct(row parquet.Row) (uint64, *InMemoryFunctio
 		Filename:   row[3].Uint32(),
 		StartLine:  row[4].Uint32(),
 	}
-	return 0, &loc, nil
+	return 0, loc, nil
 }
 
 type InMemoryFunction struct {
@@ -54,7 +54,6 @@ type InMemoryFunction struct {
 	StartLine uint32
 }
 
-func (f *InMemoryFunction) Clone() *InMemoryFunction {
-	n := *f
-	return &n
+func (f InMemoryFunction) Clone() InMemoryFunction {
+	return f
 }
diff --git a/pkg/phlaredb/schemas/v1/locations.go b/pkg/phlaredb/schemas/v1/locations.go
index b9cbf91ba6..a426f0958c 100644
--- a/pkg/phlaredb/schemas/v1/locations.go
+++ b/pkg/phlaredb/schemas/v1/locations.go
@@ -10,13 +10,13 @@ var locationsSchema = parquet.SchemaOf(new(profilev1.Location))
 
 type LocationPersister struct{}
 
-func (*LocationPersister) Name() string { return "locations" }
+func (LocationPersister) Name() string { return "locations" }
 
-func (*LocationPersister) Schema() *parquet.Schema { return locationsSchema }
+func (LocationPersister) Schema() *parquet.Schema { return locationsSchema }
 
-func (*LocationPersister) SortingColumns() parquet.SortingOption { return parquet.SortingColumns() }
+func (LocationPersister) SortingColumns() parquet.SortingOption { return parquet.SortingColumns() }
 
-func (*LocationPersister) Deconstruct(row parquet.Row, _ uint64, loc *InMemoryLocation) parquet.Row {
+func (LocationPersister) Deconstruct(row parquet.Row, _ uint64, loc InMemoryLocation) parquet.Row {
 	var (
 		col    = -1
 		newCol = func() int {
@@ -61,7 +61,7 @@ func (*LocationPersister) Deconstruct(row parquet.Row, _ uint64, loc *InMemoryLo
 	return row
 }
 
-func (*LocationPersister) Reconstruct(row parquet.Row) (uint64, *InMemoryLocation, error) {
+func (LocationPersister) Reconstruct(row parquet.Row) (uint64, InMemoryLocation, error) {
 	loc := InMemoryLocation{
 		Id:        row[0].Uint64(),
 		MappingId: uint32(row[1].Uint64()),
@@ -76,7 +76,7 @@ func (*LocationPersister) Reconstruct(row parquet.Row) (uint64, *InMemoryLocatio
 	for i, v := range lines[len(lines)/2:] {
 		loc.Line[i].Line = int32(v.Uint64())
 	}
-	return 0, &loc, nil
+	return 0, loc, nil
 }
 
 type InMemoryLocation struct {
@@ -110,11 +110,11 @@ type InMemoryLocation struct {
 	Line []InMemoryLine
 }
 
-func (l *InMemoryLocation) Clone() *InMemoryLocation {
-	x := *l
+func (l InMemoryLocation) Clone() InMemoryLocation {
+	x := l
 	x.Line = make([]InMemoryLine, len(l.Line))
 	copy(x.Line, l.Line)
-	return &x
+	return x
 }
 
 type InMemoryLine struct {
diff --git a/pkg/phlaredb/schemas/v1/mappings.go b/pkg/phlaredb/schemas/v1/mappings.go
index 0d5503f6cb..cb53ec5d08 100644
--- a/pkg/phlaredb/schemas/v1/mappings.go
+++ b/pkg/phlaredb/schemas/v1/mappings.go
@@ -10,13 +10,13 @@ var mappingsSchema = parquet.SchemaOf(new(profilev1.Mapping))
 
 type MappingPersister struct{}
 
-func (*MappingPersister) Name() string { return "mappings" }
+func (MappingPersister) Name() string { return "mappings" }
 
-func (*MappingPersister) Schema() *parquet.Schema { return mappingsSchema }
+func (MappingPersister) Schema() *parquet.Schema { return mappingsSchema }
 
-func (*MappingPersister) SortingColumns() parquet.SortingOption { return parquet.SortingColumns() }
+func (MappingPersister) SortingColumns() parquet.SortingOption { return parquet.SortingColumns() }
 
-func (*MappingPersister) Deconstruct(row parquet.Row, _ uint64, m *InMemoryMapping) parquet.Row {
+func (MappingPersister) Deconstruct(row parquet.Row, _ uint64, m InMemoryMapping) parquet.Row {
 	if cap(row) < 10 {
 		row = make(parquet.Row, 0, 10)
 	}
@@ -34,7 +34,7 @@ func (*MappingPersister) Deconstruct(row parquet.Row, _ uint64, m *InMemoryMappi
 	return row
 }
 
-func (*MappingPersister) Reconstruct(row parquet.Row) (uint64, *InMemoryMapping, error) {
+func (MappingPersister) Reconstruct(row parquet.Row) (uint64, InMemoryMapping, error) {
 	mapping := InMemoryMapping{
 		Id:              row[0].Uint64(),
 		MemoryStart:     row[1].Uint64(),
@@ -47,7 +47,7 @@ func (*MappingPersister) Reconstruct(row parquet.Row) (uint64, *InMemoryMapping,
 		HasLineNumbers:  row[8].Boolean(),
 		HasInlineFrames: row[9].Boolean(),
 	}
-	return 0, &mapping, nil
+	return 0, mapping, nil
 }
 
 type InMemoryMapping struct {
@@ -74,7 +74,6 @@ type InMemoryMapping struct {
 	HasInlineFrames bool
 }
 
-func (m *InMemoryMapping) Clone() *InMemoryMapping {
-	n := *m
-	return &n
+func (m InMemoryMapping) Clone() InMemoryMapping {
+	return m
 }
diff --git a/pkg/phlaredb/schemas/v1/models.go b/pkg/phlaredb/schemas/v1/models.go
index 84fa49956b..2fdcf812b2 100644
--- a/pkg/phlaredb/schemas/v1/models.go
+++ b/pkg/phlaredb/schemas/v1/models.go
@@ -3,10 +3,10 @@ package v1
 import googlev1 "github.com/grafana/pyroscope/api/gen/proto/go/google/v1"
 
 type Models interface {
-	*Profile | *InMemoryProfile |
-		*googlev1.Location | *InMemoryLocation |
-		*googlev1.Function | *InMemoryFunction |
-		*googlev1.Mapping | *InMemoryMapping |
+	*Profile | InMemoryProfile |
+		*googlev1.Location | InMemoryLocation |
+		*googlev1.Function | InMemoryFunction |
+		*googlev1.Mapping | InMemoryMapping |
 		*Stacktrace |
 		string
 }
diff --git a/pkg/phlaredb/schemas/v1/schema_test.go b/pkg/phlaredb/schemas/v1/schema_test.go
index b79106c39b..da2a70df50 100644
--- a/pkg/phlaredb/schemas/v1/schema_test.go
+++ b/pkg/phlaredb/schemas/v1/schema_test.go
@@ -68,7 +68,7 @@ func newStrings() []string {
 func TestStringsRoundTrip(t *testing.T) {
 	var (
 		s   = newStrings()
-		w   = &ReadWriter[string, *StringPersister]{}
+		w   = &ReadWriter[string, StringPersister]{}
 		buf bytes.Buffer
 	)
 
@@ -200,7 +200,7 @@ func TestLocationsRoundTrip(t *testing.T) {
 		},
 	}
 
-	mem := []*InMemoryLocation{
+	mem := []InMemoryLocation{
 		{
 			Id:        8,
 			Address:   9,
@@ -236,14 +236,14 @@ func TestLocationsRoundTrip(t *testing.T) {
 	}
 
 	var buf bytes.Buffer
-	require.NoError(t, new(ReadWriter[*profilev1.Location, *pprofLocationPersister]).WriteParquetFile(&buf, raw))
-	actual, err := new(ReadWriter[*InMemoryLocation, *LocationPersister]).ReadParquetFile(bytes.NewReader(buf.Bytes()))
+	require.NoError(t, new(ReadWriter[*profilev1.Location, pprofLocationPersister]).WriteParquetFile(&buf, raw))
+	actual, err := new(ReadWriter[InMemoryLocation, LocationPersister]).ReadParquetFile(bytes.NewReader(buf.Bytes()))
 	require.NoError(t, err)
 	assert.Equal(t, mem, actual)
 
 	buf.Reset()
-	require.NoError(t, new(ReadWriter[*InMemoryLocation, *LocationPersister]).WriteParquetFile(&buf, mem))
-	actual, err = new(ReadWriter[*InMemoryLocation, *LocationPersister]).ReadParquetFile(bytes.NewReader(buf.Bytes()))
+	require.NoError(t, new(ReadWriter[InMemoryLocation, LocationPersister]).WriteParquetFile(&buf, mem))
+	actual, err = new(ReadWriter[InMemoryLocation, LocationPersister]).ReadParquetFile(bytes.NewReader(buf.Bytes()))
 	require.NoError(t, err)
 	assert.Equal(t, mem, actual)
 }
@@ -252,20 +252,20 @@ var protoLocationsSchema = parquet.SchemaOf(&profilev1.Location{})
 
 type pprofLocationPersister struct{}
 
-func (*pprofLocationPersister) Name() string { return "locations" }
+func (pprofLocationPersister) Name() string { return "locations" }
 
-func (*pprofLocationPersister) Schema() *parquet.Schema { return protoLocationsSchema }
+func (pprofLocationPersister) Schema() *parquet.Schema { return protoLocationsSchema }
 
-func (*pprofLocationPersister) SortingColumns() parquet.SortingOption {
+func (pprofLocationPersister) SortingColumns() parquet.SortingOption {
 	return parquet.SortingColumns()
 }
 
-func (*pprofLocationPersister) Deconstruct(row parquet.Row, _ uint64, loc *profilev1.Location) parquet.Row {
+func (pprofLocationPersister) Deconstruct(row parquet.Row, _ uint64, loc *profilev1.Location) parquet.Row {
 	row = protoLocationsSchema.Deconstruct(row, loc)
 	return row
 }
 
-func (*pprofLocationPersister) Reconstruct(row parquet.Row) (uint64, *profilev1.Location, error) {
+func (pprofLocationPersister) Reconstruct(row parquet.Row) (uint64, *profilev1.Location, error) {
 	var loc profilev1.Location
 	if err := protoLocationsSchema.Reconstruct(&loc, row); err != nil {
 		return 0, nil, err
@@ -291,7 +291,7 @@ func TestFunctionsRoundTrip(t *testing.T) {
 		},
 	}
 
-	mem := []*InMemoryFunction{
+	mem := []InMemoryFunction{
 		{
 			Id:         6,
 			Name:       7,
@@ -310,13 +310,13 @@ func TestFunctionsRoundTrip(t *testing.T) {
 
 	var buf bytes.Buffer
 	require.NoError(t, new(ReadWriter[*profilev1.Function, *pprofFunctionPersister]).WriteParquetFile(&buf, raw))
-	actual, err := new(ReadWriter[*InMemoryFunction, *FunctionPersister]).ReadParquetFile(bytes.NewReader(buf.Bytes()))
+	actual, err := new(ReadWriter[InMemoryFunction, FunctionPersister]).ReadParquetFile(bytes.NewReader(buf.Bytes()))
 	require.NoError(t, err)
 	assert.Equal(t, mem, actual)
 
 	buf.Reset()
-	require.NoError(t, new(ReadWriter[*InMemoryFunction, *FunctionPersister]).WriteParquetFile(&buf, mem))
-	actual, err = new(ReadWriter[*InMemoryFunction, *FunctionPersister]).ReadParquetFile(bytes.NewReader(buf.Bytes()))
+	require.NoError(t, new(ReadWriter[InMemoryFunction, FunctionPersister]).WriteParquetFile(&buf, mem))
+	actual, err = new(ReadWriter[InMemoryFunction, FunctionPersister]).ReadParquetFile(bytes.NewReader(buf.Bytes()))
 	require.NoError(t, err)
 	assert.Equal(t, mem, actual)
 }
@@ -374,7 +374,7 @@ func TestMappingsRoundTrip(t *testing.T) {
 		},
 	}
 
-	mem := []*InMemoryMapping{
+	mem := []InMemoryMapping{
 		{
 			Id:              7,
 			MemoryStart:     8,
@@ -403,7 +403,7 @@ func TestMappingsRoundTrip(t *testing.T) {
 
 	var buf bytes.Buffer
 	require.NoError(t, new(ReadWriter[*profilev1.Mapping, *pprofMappingPersister]).WriteParquetFile(&buf, raw))
-	actual, err := new(ReadWriter[*InMemoryMapping, *MappingPersister]).ReadParquetFile(bytes.NewReader(buf.Bytes()))
+	actual, err := new(ReadWriter[InMemoryMapping, MappingPersister]).ReadParquetFile(bytes.NewReader(buf.Bytes()))
 	require.NoError(t, err)
 	assert.Equal(t, mem, actual)
 
diff --git a/pkg/phlaredb/schemas/v1/strings.go b/pkg/phlaredb/schemas/v1/strings.go
index 844aa460dc..d470bb4202 100644
--- a/pkg/phlaredb/schemas/v1/strings.go
+++ b/pkg/phlaredb/schemas/v1/strings.go
@@ -13,13 +13,13 @@ var stringsSchema = parquet.NewSchema("String", phlareparquet.Group{
 
 type StringPersister struct{}
 
-func (*StringPersister) Name() string { return "strings" }
+func (StringPersister) Name() string { return "strings" }
 
-func (*StringPersister) Schema() *parquet.Schema { return stringsSchema }
+func (StringPersister) Schema() *parquet.Schema { return stringsSchema }
 
-func (*StringPersister) SortingColumns() parquet.SortingOption { return parquet.SortingColumns() }
+func (StringPersister) SortingColumns() parquet.SortingOption { return parquet.SortingColumns() }
 
-func (*StringPersister) Deconstruct(row parquet.Row, id uint64, s string) parquet.Row {
+func (StringPersister) Deconstruct(row parquet.Row, id uint64, s string) parquet.Row {
 	if cap(row) < 2 {
 		row = make(parquet.Row, 0, 2)
 	}
@@ -29,6 +29,6 @@ func (*StringPersister) Deconstruct(row parquet.Row, id uint64, s string) parque
 	return row
 }
 
-func (*StringPersister) Reconstruct(row parquet.Row) (id uint64, s string, err error) {
+func (StringPersister) Reconstruct(row parquet.Row) (id uint64, s string, err error) {
 	return 0, row[1].String(), nil
 }
diff --git a/pkg/phlaredb/symdb/block_reader.go b/pkg/phlaredb/symdb/block_reader.go
index 95d0736c9d..a38d85687c 100644
--- a/pkg/phlaredb/symdb/block_reader.go
+++ b/pkg/phlaredb/symdb/block_reader.go
@@ -144,22 +144,22 @@ func (r *Reader) file(name string) (block.File, error) {
 func (r *Reader) partitionReader(h *PartitionHeader) *partition {
 	p := &partition{
 		reader: r,
-		locations: parquetTableRange[*schemav1.InMemoryLocation, *schemav1.LocationPersister]{
+		locations: parquetTableRange[schemav1.InMemoryLocation, schemav1.LocationPersister]{
 			bucket:  r.bucket,
 			headers: h.Locations,
 			file:    &r.locations,
 		},
-		mappings: parquetTableRange[*schemav1.InMemoryMapping, *schemav1.MappingPersister]{
+		mappings: parquetTableRange[schemav1.InMemoryMapping, schemav1.MappingPersister]{
 			bucket:  r.bucket,
 			headers: h.Mappings,
 			file:    &r.mappings,
 		},
-		functions: parquetTableRange[*schemav1.InMemoryFunction, *schemav1.FunctionPersister]{
+		functions: parquetTableRange[schemav1.InMemoryFunction, schemav1.FunctionPersister]{
 			bucket:  r.bucket,
 			headers: h.Functions,
 			file:    &r.functions,
 		},
-		strings: parquetTableRange[string, *schemav1.StringPersister]{
+		strings: parquetTableRange[string, schemav1.StringPersister]{
 			bucket:  r.bucket,
 			headers: h.Strings,
 			file:    &r.strings,
@@ -208,10 +208,10 @@ type partition struct {
 	reader *Reader
 
 	stacktraceChunks []*stacktraceChunkReader
-	locations        parquetTableRange[*schemav1.InMemoryLocation, *schemav1.LocationPersister]
-	mappings         parquetTableRange[*schemav1.InMemoryMapping, *schemav1.MappingPersister]
-	functions        parquetTableRange[*schemav1.InMemoryFunction, *schemav1.FunctionPersister]
-	strings          parquetTableRange[string, *schemav1.StringPersister]
+	locations        parquetTableRange[schemav1.InMemoryLocation, schemav1.LocationPersister]
+	mappings         parquetTableRange[schemav1.InMemoryMapping, schemav1.MappingPersister]
+	functions        parquetTableRange[schemav1.InMemoryFunction, schemav1.FunctionPersister]
+	strings          parquetTableRange[string, schemav1.StringPersister]
 }
 
 func (p *partition) init(ctx context.Context) (err error) {
diff --git a/pkg/phlaredb/symdb/block_writer.go b/pkg/phlaredb/symdb/block_writer.go
index 101d5e3e6b..af862c1c12 100644
--- a/pkg/phlaredb/symdb/block_writer.go
+++ b/pkg/phlaredb/symdb/block_writer.go
@@ -28,10 +28,10 @@ type writer struct {
 	files       []block.File
 
 	// Parquet tables.
-	mappings  parquetWriter[*schemav1.InMemoryMapping, *schemav1.MappingPersister]
-	functions parquetWriter[*schemav1.InMemoryFunction, *schemav1.FunctionPersister]
-	locations parquetWriter[*schemav1.InMemoryLocation, *schemav1.LocationPersister]
-	strings   parquetWriter[string, *schemav1.StringPersister]
+	mappings  parquetWriter[schemav1.InMemoryMapping, schemav1.MappingPersister]
+	functions parquetWriter[schemav1.InMemoryFunction, schemav1.FunctionPersister]
+	locations parquetWriter[schemav1.InMemoryLocation, schemav1.LocationPersister]
+	strings   parquetWriter[string, schemav1.StringPersister]
 }
 
 func newWriter(c *Config) *writer {
diff --git a/pkg/phlaredb/symdb/dedup_slice.go b/pkg/phlaredb/symdb/dedup_slice.go
index faa8e9e53a..4cdef9accb 100644
--- a/pkg/phlaredb/symdb/dedup_slice.go
+++ b/pkg/phlaredb/symdb/dedup_slice.go
@@ -37,9 +37,9 @@ func (p *PartitionWriter) WriteProfileSymbols(profile *profilev1.Profile) []sche
 	pprof.ZeroLabelStrings(profile)
 
 	p.strings.ingest(profile.StringTable, rewrites)
-	mappings := make([]*schemav1.InMemoryMapping, len(profile.Mapping))
+	mappings := make([]schemav1.InMemoryMapping, len(profile.Mapping))
 	for i, v := range profile.Mapping {
-		mappings[i] = &schemav1.InMemoryMapping{
+		mappings[i] = schemav1.InMemoryMapping{
 			Id:              v.Id,
 			MemoryStart:     v.MemoryStart,
 			MemoryLimit:     v.MemoryLimit,
@@ -54,9 +54,9 @@ func (p *PartitionWriter) WriteProfileSymbols(profile *profilev1.Profile) []sche
 	}
 
 	p.mappings.ingest(mappings, rewrites)
-	funcs := make([]*schemav1.InMemoryFunction, len(profile.Function))
+	funcs := make([]schemav1.InMemoryFunction, len(profile.Function))
 	for i, v := range profile.Function {
-		funcs[i] = &schemav1.InMemoryFunction{
+		funcs[i] = schemav1.InMemoryFunction{
 			Id:         v.Id,
 			Name:       uint32(v.Name),
 			SystemName: uint32(v.SystemName),
@@ -66,9 +66,9 @@ func (p *PartitionWriter) WriteProfileSymbols(profile *profilev1.Profile) []sche
 	}
 
 	p.functions.ingest(funcs, rewrites)
-	locs := make([]*schemav1.InMemoryLocation, len(profile.Location))
+	locs := make([]schemav1.InMemoryLocation, len(profile.Location))
 	for i, v := range profile.Location {
-		x := &schemav1.InMemoryLocation{
+		x := schemav1.InMemoryLocation{
 			Id:        v.Id,
 			Address:   v.Address,
 			MappingId: uint32(v.MappingId),
@@ -214,7 +214,7 @@ type rewriter struct {
 
 type storeHelper[M schemav1.Models] interface {
 	// some Models contain their own IDs within the struct, this allows to set them and keep track of the preexisting ID. It should return the oldID that is supposed to be rewritten.
-	setID(existingSliceID uint64, newID uint64, element M) uint64
+	setID(existingSliceID uint64, newID uint64, element *M) uint64
 
 	// size returns a (rough estimation) of the size of a single element M
 	size(M) uint64
@@ -222,7 +222,7 @@ type storeHelper[M schemav1.Models] interface {
 	// clone copies parts that are not optimally sized from protobuf parsing
 	clone(M) M
 
-	rewrite(*rewriter, M) error
+	rewrite(*rewriter, *M) error
 }
 
 type Helper[M schemav1.Models, K comparable] interface {
@@ -261,7 +261,7 @@ func (s *deduplicatingSlice[M, K, H]) ingest(elems []M, rewriter *rewriter) {
 	missing = missing[:0]
 	// rewrite elements
 	for pos := range elems {
-		_ = s.helper.rewrite(rewriter, elems[pos])
+		_ = s.helper.rewrite(rewriter, &elems[pos])
 	}
 
 	// try to find if element already exists in slice, when supposed to deduplicate
@@ -269,7 +269,7 @@ func (s *deduplicatingSlice[M, K, H]) ingest(elems []M, rewriter *rewriter) {
 	for pos := range elems {
 		k := s.helper.key(elems[pos])
 		if posSlice, exists := s.lookup[k]; exists {
-			rewritingMap[int64(s.helper.setID(uint64(pos), uint64(posSlice), elems[pos]))] = posSlice
+			rewritingMap[int64(s.helper.setID(uint64(pos), uint64(posSlice), &elems[pos]))] = posSlice
 		} else {
 			missing = append(missing, int64(pos))
 		}
@@ -284,14 +284,14 @@ func (s *deduplicatingSlice[M, K, H]) ingest(elems []M, rewriter *rewriter) {
 			// check again if element exists
 			k := s.helper.key(elems[pos])
 			if posSlice, exists := s.lookup[k]; exists {
-				rewritingMap[int64(s.helper.setID(uint64(pos), uint64(posSlice), elems[pos]))] = posSlice
+				rewritingMap[int64(s.helper.setID(uint64(pos), uint64(posSlice), &elems[pos]))] = posSlice
 				continue
 			}
 
 			// add element to slice/map
 			s.slice = append(s.slice, s.helper.clone(elems[pos]))
 			s.lookup[k] = posSlice
-			rewritingMap[int64(s.helper.setID(uint64(pos), uint64(posSlice), elems[pos]))] = posSlice
+			rewritingMap[int64(s.helper.setID(uint64(pos), uint64(posSlice), &elems[pos]))] = posSlice
 			posSlice++
 			s.size.Add(s.helper.size(elems[pos]))
 		}
@@ -381,7 +381,7 @@ func (*stringsHelper) addToRewriter(r *rewriter, m idConversionTable) {
 }
 
 // nolint unused
-func (*stringsHelper) rewrite(*rewriter, string) error {
+func (*stringsHelper) rewrite(*rewriter, *string) error {
 	return nil
 }
 
@@ -389,7 +389,7 @@ func (*stringsHelper) size(s string) uint64 {
 	return uint64(len(s))
 }
 
-func (*stringsHelper) setID(oldID, newID uint64, s string) uint64 {
+func (*stringsHelper) setID(oldID, newID uint64, s *string) uint64 {
 	return oldID
 }
 
@@ -410,7 +410,7 @@ const (
 
 type locationsHelper struct{}
 
-func (*locationsHelper) key(l *schemav1.InMemoryLocation) locationsKey {
+func (*locationsHelper) key(l schemav1.InMemoryLocation) locationsKey {
 	return locationsKey{
 		Address:   l.Address,
 		MappingId: l.MappingId,
@@ -465,15 +465,15 @@ func (*locationsHelper) setID(_, newID uint64, l *schemav1.InMemoryLocation) uin
 	return oldID
 }
 
-func (*locationsHelper) size(l *schemav1.InMemoryLocation) uint64 {
+func (*locationsHelper) size(l schemav1.InMemoryLocation) uint64 {
 	return uint64(len(l.Line))*lineSize + locationSize
 }
 
-func (*locationsHelper) clone(l *schemav1.InMemoryLocation) *schemav1.InMemoryLocation {
-	x := *l
+func (*locationsHelper) clone(l schemav1.InMemoryLocation) schemav1.InMemoryLocation {
+	x := l
 	x.Line = make([]schemav1.InMemoryLine, len(l.Line))
 	copy(x.Line, l.Line)
-	return &x
+	return x
 }
 
 type mappingsHelper struct{}
@@ -492,7 +492,7 @@ type mappingsKey struct {
 	HasInlineFrames bool
 }
 
-func (*mappingsHelper) key(m *schemav1.InMemoryMapping) mappingsKey {
+func (*mappingsHelper) key(m schemav1.InMemoryMapping) mappingsKey {
 	return mappingsKey{
 		MemoryStart:     m.MemoryStart,
 		MemoryLimit:     m.MemoryLimit,
@@ -523,13 +523,12 @@ func (*mappingsHelper) setID(_, newID uint64, m *schemav1.InMemoryMapping) uint6
 	return oldID
 }
 
-func (*mappingsHelper) size(_ *schemav1.InMemoryMapping) uint64 {
+func (*mappingsHelper) size(_ schemav1.InMemoryMapping) uint64 {
 	return mappingSize
 }
 
-func (*mappingsHelper) clone(m *schemav1.InMemoryMapping) *schemav1.InMemoryMapping {
-	x := *m
-	return &x
+func (*mappingsHelper) clone(m schemav1.InMemoryMapping) schemav1.InMemoryMapping {
+	return m
 }
 
 type functionsKey struct {
@@ -543,7 +542,7 @@ type functionsHelper struct{}
 
 const functionSize = uint64(unsafe.Sizeof(schemav1.InMemoryFunction{}))
 
-func (*functionsHelper) key(f *schemav1.InMemoryFunction) functionsKey {
+func (*functionsHelper) key(f schemav1.InMemoryFunction) functionsKey {
 	return functionsKey{
 		Name:       f.Name,
 		SystemName: f.SystemName,
@@ -569,11 +568,10 @@ func (*functionsHelper) setID(_, newID uint64, f *schemav1.InMemoryFunction) uin
 	return oldID
 }
 
-func (*functionsHelper) size(_ *schemav1.InMemoryFunction) uint64 {
+func (*functionsHelper) size(_ schemav1.InMemoryFunction) uint64 {
 	return functionSize
 }
 
-func (*functionsHelper) clone(f *schemav1.InMemoryFunction) *schemav1.InMemoryFunction {
-	x := *f
-	return &x
+func (*functionsHelper) clone(f schemav1.InMemoryFunction) schemav1.InMemoryFunction {
+	return f
 }
diff --git a/pkg/phlaredb/symdb/partition_memory.go b/pkg/phlaredb/symdb/partition_memory.go
index 0a127fcff5..ab3f7781de 100644
--- a/pkg/phlaredb/symdb/partition_memory.go
+++ b/pkg/phlaredb/symdb/partition_memory.go
@@ -2,6 +2,8 @@ package symdb
 
 import (
 	"context"
+	"fmt"
+	"hash/crc32"
 	"io"
 	"sync"
 
@@ -13,9 +15,9 @@ type PartitionWriter struct {
 
 	stacktraces *stacktracesPartition
 	strings     deduplicatingSlice[string, string, *stringsHelper]
-	mappings    deduplicatingSlice[*schemav1.InMemoryMapping, mappingsKey, *mappingsHelper]
-	functions   deduplicatingSlice[*schemav1.InMemoryFunction, functionsKey, *functionsHelper]
-	locations   deduplicatingSlice[*schemav1.InMemoryLocation, locationsKey, *locationsHelper]
+	mappings    deduplicatingSlice[schemav1.InMemoryMapping, mappingsKey, *mappingsHelper]
+	functions   deduplicatingSlice[schemav1.InMemoryFunction, functionsKey, *functionsHelper]
+	locations   deduplicatingSlice[schemav1.InMemoryLocation, locationsKey, *locationsHelper]
 }
 
 func (p *PartitionWriter) AppendStacktraces(dst []uint32, s []*schemav1.Stacktrace) {
@@ -311,15 +313,15 @@ func SplitStacktraces(s []uint32, n uint32) []StacktracesRange {
 	return cs
 }
 
-func (p *PartitionWriter) AppendLocations(dst []uint32, locations []*schemav1.InMemoryLocation) {
+func (p *PartitionWriter) AppendLocations(dst []uint32, locations []schemav1.InMemoryLocation) {
 	p.locations.append(dst, locations)
 }
 
-func (p *PartitionWriter) AppendMappings(dst []uint32, mappings []*schemav1.InMemoryMapping) {
+func (p *PartitionWriter) AppendMappings(dst []uint32, mappings []schemav1.InMemoryMapping) {
 	p.mappings.append(dst, mappings)
 }
 
-func (p *PartitionWriter) AppendFunctions(dst []uint32, functions []*schemav1.InMemoryFunction) {
+func (p *PartitionWriter) AppendFunctions(dst []uint32, functions []schemav1.InMemoryFunction) {
 	p.functions.append(dst, functions)
 }
 
@@ -364,3 +366,43 @@ func (p *PartitionWriter) WriteStats(s *PartitionStats) {
 func (p *PartitionWriter) Release() {
 	// Noop. Satisfies PartitionReader interface.
 }
+
+func (p *PartitionWriter) write(w *fileWriter) (err error) {
+	senc := newSymbolsEncoder[string](w.w, new(stringsBlockEncoder))
+	senc.Encode(p.strings.slice)
+
+	menc := newSymbolsEncoder[schemav1.InMemoryMapping](w.w, new(mappingsBlockEncoder))
+	menc.Encode(p.mappings.slice)
+
+	fenc := newSymbolsEncoder[schemav1.InMemoryFunction](w.w, new(functionsBlockEncoder))
+	fenc.Encode(p.functions.slice)
+
+	lenc := newSymbolsEncoder[schemav1.InMemoryLocation](w.w, new(locationsBlockEncoder))
+	lenc.Encode(p.locations.slice)
+
+	for ci, c := range p.stacktraces.chunks {
+		stacks := c.stacks
+		if stacks == 0 {
+			stacks = uint32(len(p.stacktraces.hashToIdx))
+		}
+		h := StacktraceChunkHeader{
+			Offset:             w.w.offset,
+			Size:               0, // Set later.
+			Partition:          p.header.Partition,
+			ChunkIndex:         uint16(ci),
+			ChunkEncoding:      ChunkEncodingGroupVarint,
+			Stacktraces:        stacks,
+			StacktraceNodes:    c.tree.len(),
+			StacktraceMaxDepth: 0, // TODO
+			StacktraceMaxNodes: c.partition.maxNodesPerChunk,
+			CRC:                0, // Set later.
+		}
+		crc := crc32.New(castagnoli)
+		if h.Size, err = c.WriteTo(io.MultiWriter(crc, w.w)); err != nil {
+			return fmt.Errorf("writing stacktrace chunk data: %w", err)
+		}
+		h.CRC = crc.Sum32()
+		p.header.StacktraceChunks = append(p.header.StacktraceChunks, h)
+	}
+	return nil
+}
diff --git a/pkg/phlaredb/symdb/rewriter.go b/pkg/phlaredb/symdb/rewriter.go
index d19e1e2ca1..437a45ab1c 100644
--- a/pkg/phlaredb/symdb/rewriter.go
+++ b/pkg/phlaredb/symdb/rewriter.go
@@ -68,9 +68,9 @@ func (r *Rewriter) getOrCreatePartition(partition uint64) (_ *partitionRewriter,
 	pr.WriteStats(&stats)
 
 	n.stacktraces = newLookupTable[[]int32](stats.MaxStacktraceID)
-	n.locations = newLookupTable[*schemav1.InMemoryLocation](stats.LocationsTotal)
-	n.mappings = newLookupTable[*schemav1.InMemoryMapping](stats.MappingsTotal)
-	n.functions = newLookupTable[*schemav1.InMemoryFunction](stats.FunctionsTotal)
+	n.locations = newLookupTable[schemav1.InMemoryLocation](stats.LocationsTotal)
+	n.mappings = newLookupTable[schemav1.InMemoryMapping](stats.MappingsTotal)
+	n.functions = newLookupTable[schemav1.InMemoryFunction](stats.FunctionsTotal)
 	n.strings = newLookupTable[string](stats.StringsTotal)
 
 	r.partitions[partition] = n
@@ -84,9 +84,9 @@ type partitionRewriter struct {
 	dst *PartitionWriter
 
 	stacktraces *lookupTable[[]int32]
-	locations   *lookupTable[*schemav1.InMemoryLocation]
-	mappings    *lookupTable[*schemav1.InMemoryMapping]
-	functions   *lookupTable[*schemav1.InMemoryFunction]
+	locations   *lookupTable[schemav1.InMemoryLocation]
+	mappings    *lookupTable[schemav1.InMemoryMapping]
+	functions   *lookupTable[schemav1.InMemoryFunction]
 	strings     *lookupTable[string]
 
 	// FIXME(kolesnikovae): schemav1.Stacktrace should be just a uint32 slice:
@@ -162,25 +162,25 @@ func (p *partitionRewriter) appendRewrite(stacktraces []uint32) error {
 	p.dst.AppendStrings(p.strings.buf, p.strings.values)
 	p.strings.updateResolved()
 
-	for _, v := range p.functions.values {
-		v.Name = p.strings.lookupResolved(v.Name)
-		v.Filename = p.strings.lookupResolved(v.Filename)
-		v.SystemName = p.strings.lookupResolved(v.SystemName)
+	for i := range p.functions.values {
+		p.functions.values[i].Name = p.strings.lookupResolved(p.functions.values[i].Name)
+		p.functions.values[i].Filename = p.strings.lookupResolved(p.functions.values[i].Filename)
+		p.functions.values[i].SystemName = p.strings.lookupResolved(p.functions.values[i].SystemName)
 	}
 	p.dst.AppendFunctions(p.functions.buf, p.functions.values)
 	p.functions.updateResolved()
 
-	for _, v := range p.mappings.values {
-		v.BuildId = p.strings.lookupResolved(v.BuildId)
-		v.Filename = p.strings.lookupResolved(v.Filename)
+	for i := range p.mappings.values {
+		p.mappings.values[i].BuildId = p.strings.lookupResolved(p.mappings.values[i].BuildId)
+		p.mappings.values[i].Filename = p.strings.lookupResolved(p.mappings.values[i].Filename)
 	}
 	p.dst.AppendMappings(p.mappings.buf, p.mappings.values)
 	p.mappings.updateResolved()
 
-	for _, v := range p.locations.values {
-		v.MappingId = p.mappings.lookupResolved(v.MappingId)
-		for j, line := range v.Line {
-			v.Line[j].FunctionId = p.functions.lookupResolved(line.FunctionId)
+	for i := range p.locations.values {
+		p.locations.values[i].MappingId = p.mappings.lookupResolved(p.locations.values[i].MappingId)
+		for j, line := range p.locations.values[i].Line {
+			p.locations.values[i].Line[j].FunctionId = p.functions.lookupResolved(line.FunctionId)
 		}
 	}
 	p.dst.AppendLocations(p.locations.buf, p.locations.values)
@@ -247,9 +247,9 @@ func (p *partitionRewriter) InsertStacktrace(stacktrace uint32, locations []int3
 func cloneSymbolsPartially(x *Symbols) *Symbols {
 	n := Symbols{
 		Stacktraces: x.Stacktraces,
-		Locations:   make([]*schemav1.InMemoryLocation, len(x.Locations)),
-		Mappings:    make([]*schemav1.InMemoryMapping, len(x.Mappings)),
-		Functions:   make([]*schemav1.InMemoryFunction, len(x.Functions)),
+		Locations:   make([]schemav1.InMemoryLocation, len(x.Locations)),
+		Mappings:    make([]schemav1.InMemoryMapping, len(x.Mappings)),
+		Functions:   make([]schemav1.InMemoryFunction, len(x.Functions)),
 		Strings:     x.Strings,
 	}
 	for i, l := range x.Locations {
diff --git a/pkg/phlaredb/symdb/symdb.go b/pkg/phlaredb/symdb/symdb.go
index 934b0f01a3..e0dedf8356 100644
--- a/pkg/phlaredb/symdb/symdb.go
+++ b/pkg/phlaredb/symdb/symdb.go
@@ -26,9 +26,9 @@ type PartitionReader interface {
 
 type Symbols struct {
 	Stacktraces StacktraceResolver
-	Locations   []*schemav1.InMemoryLocation
-	Mappings    []*schemav1.InMemoryMapping
-	Functions   []*schemav1.InMemoryFunction
+	Locations   []schemav1.InMemoryLocation
+	Mappings    []schemav1.InMemoryMapping
+	Functions   []schemav1.InMemoryFunction
 	Strings     []string
 }
 
diff --git a/pkg/slices/slices.go b/pkg/slices/slices.go
index e574ccbb02..d3245fe740 100644
--- a/pkg/slices/slices.go
+++ b/pkg/slices/slices.go
@@ -1,7 +1,7 @@
 package slices
 
 import (
-	"golang.org/x/exp/slices"
+	"slices"
 )
 
 // RemoveInPlace removes all elements from a slice that match the given predicate.

From 6c98c3f3dbb763275c43c1213ebfbcd7a37eb296 Mon Sep 17 00:00:00 2001
From: Anton Kolesnikov <anton.e.kolesnikov@gmail.com>
Date: Thu, 4 Apr 2024 19:16:29 +0800
Subject: [PATCH 10/36] write path

---
 pkg/phlaredb/symdb/block_reader.go     |  14 +-
 pkg/phlaredb/symdb/block_writer.go     | 270 +++----------------------
 pkg/phlaredb/symdb/format.go           | 264 +++++++++++++-----------
 pkg/phlaredb/symdb/partition_memory.go |  68 ++++---
 4 files changed, 223 insertions(+), 393 deletions(-)

diff --git a/pkg/phlaredb/symdb/block_reader.go b/pkg/phlaredb/symdb/block_reader.go
index a38d85687c..4ef32ae3c6 100644
--- a/pkg/phlaredb/symdb/block_reader.go
+++ b/pkg/phlaredb/symdb/block_reader.go
@@ -146,26 +146,26 @@ func (r *Reader) partitionReader(h *PartitionHeader) *partition {
 		reader: r,
 		locations: parquetTableRange[schemav1.InMemoryLocation, schemav1.LocationPersister]{
 			bucket:  r.bucket,
-			headers: h.Locations,
+			headers: SymbolsBlockReferencesAsRows(h.Locations),
 			file:    &r.locations,
 		},
 		mappings: parquetTableRange[schemav1.InMemoryMapping, schemav1.MappingPersister]{
 			bucket:  r.bucket,
-			headers: h.Mappings,
+			headers: SymbolsBlockReferencesAsRows(h.Mappings),
 			file:    &r.mappings,
 		},
 		functions: parquetTableRange[schemav1.InMemoryFunction, schemav1.FunctionPersister]{
 			bucket:  r.bucket,
-			headers: h.Functions,
+			headers: SymbolsBlockReferencesAsRows(h.Functions),
 			file:    &r.functions,
 		},
 		strings: parquetTableRange[string, schemav1.StringPersister]{
 			bucket:  r.bucket,
-			headers: h.Strings,
+			headers: SymbolsBlockReferencesAsRows(h.Strings),
 			file:    &r.strings,
 		},
 	}
-	p.setStacktracesChunks(h.StacktraceChunks)
+	p.setStacktracesChunks(h.Stacktraces)
 	return p
 }
 
@@ -295,7 +295,7 @@ func (p *partition) ResolveStacktraceLocations(ctx context.Context, dst Stacktra
 	return nil
 }
 
-func (p *partition) setStacktracesChunks(chunks []StacktraceChunkHeader) {
+func (p *partition) setStacktracesChunks(chunks []StacktraceBlockHeader) {
 	p.stacktraceChunks = make([]*stacktraceChunkReader, len(chunks))
 	for i, c := range chunks {
 		p.stacktraceChunks[i] = &stacktraceChunkReader{
@@ -347,7 +347,7 @@ func (r *stacktracesLookup) do() error {
 
 type stacktraceChunkReader struct {
 	reader *Reader
-	header StacktraceChunkHeader
+	header StacktraceBlockHeader
 
 	r refctr.Counter
 	t *parentPointerTree
diff --git a/pkg/phlaredb/symdb/block_writer.go b/pkg/phlaredb/symdb/block_writer.go
index af862c1c12..099f04da2f 100644
--- a/pkg/phlaredb/symdb/block_writer.go
+++ b/pkg/phlaredb/symdb/block_writer.go
@@ -2,36 +2,29 @@ package symdb
 
 import (
 	"bufio"
-	"context"
 	"fmt"
-	"hash/crc32"
 	"io"
 	"os"
 	"path/filepath"
 
 	"github.com/grafana/dskit/multierror"
-	"github.com/parquet-go/parquet-go"
-	"golang.org/x/sync/errgroup"
 
 	"github.com/grafana/pyroscope/pkg/phlaredb/block"
-	schemav1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1"
-	"github.com/grafana/pyroscope/pkg/util/build"
-	"github.com/grafana/pyroscope/pkg/util/math"
+	v1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1"
 )
 
 type writer struct {
 	config *Config
 
-	index       IndexFile
-	indexWriter *fileWriter
-	stacktraces *fileWriter
-	files       []block.File
+	index     IndexFile
+	indexFile *fileWriter
+	dataFile  *fileWriter
+	files     []block.File
 
-	// Parquet tables.
-	mappings  parquetWriter[schemav1.InMemoryMapping, schemav1.MappingPersister]
-	functions parquetWriter[schemav1.InMemoryFunction, schemav1.FunctionPersister]
-	locations parquetWriter[schemav1.InMemoryLocation, schemav1.LocationPersister]
-	strings   parquetWriter[string, schemav1.StringPersister]
+	stringsEncoder   *symbolsEncoder[string]
+	mappingsEncoder  *symbolsEncoder[v1.InMemoryMapping]
+	functionsEncoder *symbolsEncoder[v1.InMemoryFunction]
+	locationsEncoder *symbolsEncoder[v1.InMemoryLocation]
 }
 
 func newWriter(c *Config) *writer {
@@ -40,82 +33,30 @@ func newWriter(c *Config) *writer {
 		index: IndexFile{
 			Header: Header{
 				Magic:   symdbMagic,
-				Version: FormatV2,
+				Version: FormatV3,
 			},
 		},
+
+		stringsEncoder:   newSymbolsEncoder[string](nil, new(stringsBlockEncoder)),
+		mappingsEncoder:  newSymbolsEncoder[v1.InMemoryMapping](nil, new(mappingsBlockEncoder)),
+		functionsEncoder: newSymbolsEncoder[v1.InMemoryFunction](nil, new(functionsBlockEncoder)),
+		locationsEncoder: newSymbolsEncoder[v1.InMemoryLocation](nil, new(locationsBlockEncoder)),
 	}
 }
 
-func (w *writer) writePartitions(partitions []*PartitionWriter) error {
-	g, _ := errgroup.WithContext(context.Background())
-	g.Go(func() (err error) {
-		if w.stacktraces, err = w.newFile(StacktracesFileName); err != nil {
-			return err
-		}
-		for _, partition := range partitions {
-			if err = w.writeStacktraces(partition); err != nil {
-				return err
-			}
-		}
-		return w.stacktraces.Close()
-	})
-
-	g.Go(func() (err error) {
-		if err = w.strings.init(w.config.Dir, w.config.Parquet); err != nil {
-			return err
-		}
-		for _, partition := range partitions {
-			if partition.header.Strings, err = w.strings.readFrom(partition.strings.slice); err != nil {
-				return err
-			}
-		}
-		return w.strings.Close()
-	})
-
-	g.Go(func() (err error) {
-		if err = w.functions.init(w.config.Dir, w.config.Parquet); err != nil {
-			return err
-		}
-		for _, partition := range partitions {
-			if partition.header.Functions, err = w.functions.readFrom(partition.functions.slice); err != nil {
-				return err
-			}
-		}
-		return w.functions.Close()
-	})
-
-	g.Go(func() (err error) {
-		if err = w.mappings.init(w.config.Dir, w.config.Parquet); err != nil {
-			return err
-		}
-		for _, partition := range partitions {
-			if partition.header.Mappings, err = w.mappings.readFrom(partition.mappings.slice); err != nil {
-				return err
-			}
-		}
-		return w.mappings.Close()
-	})
-
-	g.Go(func() (err error) {
-		if err = w.locations.init(w.config.Dir, w.config.Parquet); err != nil {
-			return err
-		}
-		for _, partition := range partitions {
-			if partition.header.Locations, err = w.locations.readFrom(partition.locations.slice); err != nil {
-				return err
-			}
-		}
-		return w.locations.Close()
-	})
-
-	if err := g.Wait(); err != nil {
+func (w *writer) writePartitions(partitions []*PartitionWriter) (err error) {
+	if w.dataFile, err = w.newFile(DataFileName); err != nil {
 		return err
 	}
-
-	for _, partition := range partitions {
-		w.index.PartitionHeaders = append(w.index.PartitionHeaders, &partition.header)
+	defer func() {
+		err = w.dataFile.Close()
+	}()
+	for _, p := range partitions {
+		if err = p.writeTo(w); err != nil {
+			return err
+		}
+		w.index.PartitionHeaders = append(w.index.PartitionHeaders, &p.header)
 	}
-
 	return nil
 }
 
@@ -124,40 +65,8 @@ func (w *writer) Flush() (err error) {
 		return err
 	}
 	w.files = []block.File{
-		w.indexWriter.meta(),
-		w.stacktraces.meta(),
-		w.locations.meta(),
-		w.mappings.meta(),
-		w.functions.meta(),
-		w.strings.meta(),
-	}
-	return nil
-}
-
-func (w *writer) writeStacktraces(partition *PartitionWriter) (err error) {
-	for ci, c := range partition.stacktraces.chunks {
-		stacks := c.stacks
-		if stacks == 0 {
-			stacks = uint32(len(partition.stacktraces.hashToIdx))
-		}
-		h := StacktraceChunkHeader{
-			Offset:             w.stacktraces.w.offset,
-			Size:               0, // Set later.
-			Partition:          partition.header.Partition,
-			ChunkIndex:         uint16(ci),
-			ChunkEncoding:      ChunkEncodingGroupVarint,
-			Stacktraces:        stacks,
-			StacktraceNodes:    c.tree.len(),
-			StacktraceMaxDepth: 0, // TODO
-			StacktraceMaxNodes: c.partition.maxNodesPerChunk,
-			CRC:                0, // Set later.
-		}
-		crc := crc32.New(castagnoli)
-		if h.Size, err = c.WriteTo(io.MultiWriter(crc, w.stacktraces)); err != nil {
-			return fmt.Errorf("writing stacktrace chunk data: %w", err)
-		}
-		h.CRC = crc.Sum32()
-		partition.header.StacktraceChunks = append(partition.header.StacktraceChunks, h)
+		w.indexFile.meta(),
+		w.dataFile.meta(),
 	}
 	return nil
 }
@@ -171,13 +80,13 @@ func (w *writer) createDir() error {
 
 func (w *writer) writeIndexFile() (err error) {
 	// Write the index file only after all the files were flushed.
-	if w.indexWriter, err = w.newFile(IndexFileName); err != nil {
+	if w.indexFile, err = w.newFile(IndexFileName); err != nil {
 		return err
 	}
 	defer func() {
-		err = multierror.New(err, w.indexWriter.Close()).Err()
+		err = multierror.New(err, w.indexFile.Close()).Err()
 	}()
-	if _, err = w.index.WriteTo(w.indexWriter); err != nil {
+	if _, err = w.index.WriteTo(w.indexFile); err != nil {
 		return fmt.Errorf("failed to write index file: %w", err)
 	}
 	return err
@@ -265,122 +174,3 @@ func (w *writerOffset) Write(p []byte) (n int, err error) {
 	w.offset += int64(n)
 	return n, err
 }
-
-type parquetWriter[M schemav1.Models, P schemav1.Persister[M]] struct {
-	persister P
-	config    ParquetConfig
-
-	currentRowGroup uint32
-	currentRows     uint32
-	rowsTotal       uint64
-
-	buffer    *parquet.Buffer
-	rowsBatch []parquet.Row
-
-	writer *parquet.GenericWriter[P]
-	file   *os.File
-	path   string
-}
-
-func (s *parquetWriter[M, P]) init(dir string, c ParquetConfig) (err error) {
-	s.config = c
-	s.path = filepath.Join(dir, s.persister.Name()+block.ParquetSuffix)
-	s.file, err = os.OpenFile(s.path, os.O_RDWR|os.O_CREATE|os.O_EXCL, 0o644)
-	if err != nil {
-		return err
-	}
-	s.rowsBatch = make([]parquet.Row, 0, 128)
-	s.buffer = parquet.NewBuffer(s.persister.Schema(), parquet.ColumnBufferCapacity(s.config.MaxBufferRowCount))
-	s.writer = parquet.NewGenericWriter[P](s.file, s.persister.Schema(),
-		parquet.CreatedBy("github.com/grafana/pyroscope/", build.Version, build.Revision),
-		parquet.PageBufferSize(3*1024*1024),
-	)
-	return nil
-}
-
-func (s *parquetWriter[M, P]) readFrom(values []M) (ranges []RowRangeReference, err error) {
-	for len(values) > 0 {
-		var r RowRangeReference
-		if r, err = s.writeRows(values); err != nil {
-			return nil, err
-		}
-		ranges = append(ranges, r)
-		values = values[r.Rows:]
-	}
-	return ranges, nil
-}
-
-func (s *parquetWriter[M, P]) writeRows(values []M) (r RowRangeReference, err error) {
-	r.RowGroup = s.currentRowGroup
-	r.Index = s.currentRows
-	if len(values) == 0 {
-		return r, nil
-	}
-	var n int
-	for len(values) > 0 && int(s.currentRows) < s.config.MaxBufferRowCount {
-		s.fillBatch(values)
-		if n, err = s.buffer.WriteRows(s.rowsBatch); err != nil {
-			return r, err
-		}
-		s.currentRows += uint32(n)
-		r.Rows += uint32(n)
-		values = values[n:]
-	}
-	if int(s.currentRows)+cap(s.rowsBatch) >= s.config.MaxBufferRowCount {
-		if err = s.flushBuffer(); err != nil {
-			return r, err
-		}
-	}
-	return r, nil
-}
-
-func (s *parquetWriter[M, P]) fillBatch(values []M) int {
-	m := math.Min(len(values), cap(s.rowsBatch))
-	s.rowsBatch = s.rowsBatch[:m]
-	for i := 0; i < m; i++ {
-		row := s.rowsBatch[i][:0]
-		s.rowsBatch[i] = s.persister.Deconstruct(row, 0, values[i])
-	}
-	return m
-}
-
-func (s *parquetWriter[M, P]) flushBuffer() error {
-	if _, err := s.writer.WriteRowGroup(s.buffer); err != nil {
-		return err
-	}
-	s.rowsTotal += uint64(s.buffer.NumRows())
-	s.currentRowGroup++
-	s.currentRows = 0
-	s.buffer.Reset()
-	return nil
-}
-
-func (s *parquetWriter[M, P]) meta() block.File {
-	f := block.File{
-		// Note that the path is relative to the symdb root dir.
-		RelPath: filepath.Base(s.path),
-		Parquet: &block.ParquetFile{
-			NumRows: s.rowsTotal,
-		},
-	}
-	if f.Parquet.NumRows > 0 {
-		f.Parquet.NumRowGroups = uint64(s.currentRowGroup + 1)
-	}
-	if stat, err := os.Stat(s.path); err == nil {
-		f.SizeBytes = uint64(stat.Size())
-	}
-	return f
-}
-
-func (s *parquetWriter[M, P]) Close() error {
-	if err := s.flushBuffer(); err != nil {
-		return fmt.Errorf("flushing parquet buffer: %w", err)
-	}
-	if err := s.writer.Close(); err != nil {
-		return fmt.Errorf("closing parquet writer: %w", err)
-	}
-	if err := s.file.Close(); err != nil {
-		return fmt.Errorf("closing parquet file: %w", err)
-	}
-	return nil
-}
diff --git a/pkg/phlaredb/symdb/format.go b/pkg/phlaredb/symdb/format.go
index 5ed441ed34..8c8c4d0edc 100644
--- a/pkg/phlaredb/symdb/format.go
+++ b/pkg/phlaredb/symdb/format.go
@@ -36,16 +36,16 @@ const (
 	DefaultDirName = "symbols"
 
 	IndexFileName       = "index.symdb"
-	StacktracesFileName = "stacktraces.symdb"
+	StacktracesFileName = "stacktraces.symdb" // Used in v1 and v2.
+	DataFileName        = "data.symdb"        // Added in v3.
 )
 
-const HeaderSize = int(unsafe.Sizeof(Header{}))
-
 const (
 	_ = iota
 
 	FormatV1
 	FormatV2
+	FormatV3
 
 	unknownVersion
 )
@@ -96,6 +96,8 @@ type Header struct {
 	Reserved [8]byte // Reserved for future use.
 }
 
+const HeaderSize = int(unsafe.Sizeof(Header{}))
+
 func (h *Header) MarshalBinary() ([]byte, error) {
 	b := make([]byte, HeaderSize)
 	copy(b[0:4], h.Magic[:])
@@ -171,11 +173,11 @@ type PartitionHeaders []*PartitionHeader
 type PartitionHeader struct {
 	Partition uint64
 
-	StacktraceChunks []StacktraceChunkHeader
-	Locations        []RowRangeReference
-	Mappings         []RowRangeReference
-	Functions        []RowRangeReference
-	Strings          []RowRangeReference
+	Stacktraces []StacktraceBlockHeader
+	Locations   []SymbolsBlockReference
+	Mappings    []SymbolsBlockReference
+	Functions   []SymbolsBlockReference
+	Strings     []SymbolsBlockReference
 }
 
 func (h *PartitionHeaders) Size() int64 {
@@ -220,13 +222,13 @@ func (h *PartitionHeaders) Unmarshal(b []byte) error {
 
 func (h *PartitionHeaders) fromChunks(b []byte) error {
 	s := len(b)
-	if s%stacktraceChunkHeaderSize > 0 {
+	if s%stacktraceBlockHeaderSize > 0 {
 		return ErrInvalidSize
 	}
-	chunks := make([]StacktraceChunkHeader, s/stacktraceChunkHeaderSize)
+	chunks := make([]StacktraceBlockHeader, s/stacktraceBlockHeaderSize)
 	for i := range chunks {
-		off := i * stacktraceChunkHeaderSize
-		chunks[i].unmarshal(b[off : off+stacktraceChunkHeaderSize])
+		off := i * stacktraceBlockHeaderSize
+		chunks[i].unmarshal(b[off : off+stacktraceBlockHeaderSize])
 	}
 	var p *PartitionHeader
 	for _, c := range chunks {
@@ -234,60 +236,60 @@ func (h *PartitionHeaders) fromChunks(b []byte) error {
 			p = &PartitionHeader{Partition: c.Partition}
 			*h = append(*h, p)
 		}
-		p.StacktraceChunks = append(p.StacktraceChunks, c)
+		p.Stacktraces = append(p.Stacktraces, c)
 	}
 	return nil
 }
 
 func (h *PartitionHeader) marshal(buf []byte) {
 	binary.BigEndian.PutUint64(buf[0:8], h.Partition)
-	binary.BigEndian.PutUint32(buf[8:12], uint32(len(h.StacktraceChunks)))
+	binary.BigEndian.PutUint32(buf[8:12], uint32(len(h.Stacktraces)))
 	binary.BigEndian.PutUint32(buf[12:16], uint32(len(h.Locations)))
 	binary.BigEndian.PutUint32(buf[16:20], uint32(len(h.Mappings)))
 	binary.BigEndian.PutUint32(buf[20:24], uint32(len(h.Functions)))
 	binary.BigEndian.PutUint32(buf[24:28], uint32(len(h.Strings)))
 	n := 28
-	for i := range h.StacktraceChunks {
-		h.StacktraceChunks[i].marshal(buf[n:])
-		n += stacktraceChunkHeaderSize
+	for i := range h.Stacktraces {
+		h.Stacktraces[i].marshal(buf[n:])
+		n += stacktraceBlockHeaderSize
 	}
-	n += marshalRowRangeReferences(buf[n:], h.Locations)
-	n += marshalRowRangeReferences(buf[n:], h.Mappings)
-	n += marshalRowRangeReferences(buf[n:], h.Functions)
-	marshalRowRangeReferences(buf[n:], h.Strings)
+	n += marshalSymbolsBlockReferences(buf[n:], h.Locations)
+	n += marshalSymbolsBlockReferences(buf[n:], h.Mappings)
+	n += marshalSymbolsBlockReferences(buf[n:], h.Functions)
+	marshalSymbolsBlockReferences(buf[n:], h.Strings)
 }
 
 func (h *PartitionHeader) unmarshal(buf []byte) (err error) {
 	h.Partition = binary.BigEndian.Uint64(buf[0:8])
-	h.StacktraceChunks = make([]StacktraceChunkHeader, int(binary.BigEndian.Uint32(buf[8:12])))
-	h.Locations = make([]RowRangeReference, int(binary.BigEndian.Uint32(buf[12:16])))
-	h.Mappings = make([]RowRangeReference, int(binary.BigEndian.Uint32(buf[16:20])))
-	h.Functions = make([]RowRangeReference, int(binary.BigEndian.Uint32(buf[20:24])))
-	h.Strings = make([]RowRangeReference, int(binary.BigEndian.Uint32(buf[24:28])))
+	h.Stacktraces = make([]StacktraceBlockHeader, int(binary.BigEndian.Uint32(buf[8:12])))
+	h.Locations = make([]SymbolsBlockReference, int(binary.BigEndian.Uint32(buf[12:16])))
+	h.Mappings = make([]SymbolsBlockReference, int(binary.BigEndian.Uint32(buf[16:20])))
+	h.Functions = make([]SymbolsBlockReference, int(binary.BigEndian.Uint32(buf[20:24])))
+	h.Strings = make([]SymbolsBlockReference, int(binary.BigEndian.Uint32(buf[24:28])))
 
 	buf = buf[28:]
-	stacktracesSize := len(h.StacktraceChunks) * stacktraceChunkHeaderSize
+	stacktracesSize := len(h.Stacktraces) * stacktraceBlockHeaderSize
 	if err = h.unmarshalStacktraceChunks(buf[:stacktracesSize]); err != nil {
 		return err
 	}
 	buf = buf[stacktracesSize:]
-	locationsSize := len(h.Locations) * rowRangeReferenceSize
-	if err = h.unmarshalRowRangeReferences(h.Locations, buf[:locationsSize]); err != nil {
+	locationsSize := len(h.Locations) * symbolsBlockReferenceSize
+	if err = h.unmarshalSymbolsBlockReferences(h.Locations, buf[:locationsSize]); err != nil {
 		return err
 	}
 	buf = buf[locationsSize:]
-	mappingsSize := len(h.Mappings) * rowRangeReferenceSize
-	if err = h.unmarshalRowRangeReferences(h.Mappings, buf[:mappingsSize]); err != nil {
+	mappingsSize := len(h.Mappings) * symbolsBlockReferenceSize
+	if err = h.unmarshalSymbolsBlockReferences(h.Mappings, buf[:mappingsSize]); err != nil {
 		return err
 	}
 	buf = buf[mappingsSize:]
-	functionsSize := len(h.Functions) * rowRangeReferenceSize
-	if err = h.unmarshalRowRangeReferences(h.Functions, buf[:functionsSize]); err != nil {
+	functionsSize := len(h.Functions) * symbolsBlockReferenceSize
+	if err = h.unmarshalSymbolsBlockReferences(h.Functions, buf[:functionsSize]); err != nil {
 		return err
 	}
 	buf = buf[functionsSize:]
-	stringsSize := len(h.Strings) * rowRangeReferenceSize
-	if err = h.unmarshalRowRangeReferences(h.Strings, buf[:stringsSize]); err != nil {
+	stringsSize := len(h.Strings) * symbolsBlockReferenceSize
+	if err = h.unmarshalSymbolsBlockReferences(h.Strings, buf[:stringsSize]); err != nil {
 		return err
 	}
 
@@ -296,120 +298,84 @@ func (h *PartitionHeader) unmarshal(buf []byte) (err error) {
 
 func (h *PartitionHeader) Size() int64 {
 	s := 28
-	s += len(h.StacktraceChunks) * stacktraceChunkHeaderSize
+	s += len(h.Stacktraces) * stacktraceBlockHeaderSize
 	r := len(h.Locations) + len(h.Mappings) + len(h.Functions) + len(h.Strings)
-	s += r * rowRangeReferenceSize
+	s += r * symbolsBlockReferenceSize
 	return int64(s)
 }
 
 func (h *PartitionHeader) unmarshalStacktraceChunks(b []byte) error {
 	s := len(b)
-	if s%stacktraceChunkHeaderSize > 0 {
+	if s%stacktraceBlockHeaderSize > 0 {
 		return ErrInvalidSize
 	}
-	for i := range h.StacktraceChunks {
-		off := i * stacktraceChunkHeaderSize
-		h.StacktraceChunks[i].unmarshal(b[off : off+stacktraceChunkHeaderSize])
+	for i := range h.Stacktraces {
+		off := i * stacktraceBlockHeaderSize
+		h.Stacktraces[i].unmarshal(b[off : off+stacktraceBlockHeaderSize])
 	}
 	return nil
 }
 
-func (h *PartitionHeader) unmarshalRowRangeReferences(refs []RowRangeReference, b []byte) error {
+func (h *PartitionHeader) unmarshalSymbolsBlockReferences(refs []SymbolsBlockReference, b []byte) error {
 	s := len(b)
-	if s%rowRangeReferenceSize > 0 {
+	if s%symbolsBlockReferenceSize > 0 {
 		return ErrInvalidSize
 	}
 	for i := range refs {
-		off := i * rowRangeReferenceSize
-		refs[i].unmarshal(b[off : off+rowRangeReferenceSize])
+		off := i * symbolsBlockReferenceSize
+		refs[i].unmarshal(b[off : off+symbolsBlockReferenceSize])
 	}
 	return nil
 }
 
-func marshalRowRangeReferences(b []byte, refs []RowRangeReference) int {
+func marshalSymbolsBlockReferences(b []byte, refs []SymbolsBlockReference) int {
 	var off int
 	for i := range refs {
-		refs[i].marshal(b[off : off+rowRangeReferenceSize])
-		off += rowRangeReferenceSize
+		refs[i].marshal(b[off : off+symbolsBlockReferenceSize])
+		off += symbolsBlockReferenceSize
 	}
 	return off
 }
 
-const rowRangeReferenceSize = int(unsafe.Sizeof(RowRangeReference{}))
-
-type RowRangeReference struct {
-	RowGroup uint32
-	Index    uint32
-	Rows     uint32
+type SymbolsBlockReference struct {
+	Offset uint32
+	Size   uint32
+	CRC    uint32
 }
 
-func (r *RowRangeReference) marshal(b []byte) {
-	binary.BigEndian.PutUint32(b[0:4], r.RowGroup)
-	binary.BigEndian.PutUint32(b[4:8], r.Index)
-	binary.BigEndian.PutUint32(b[8:12], r.Rows)
-}
+const symbolsBlockReferenceSize = int(unsafe.Sizeof(SymbolsBlockReference{}))
 
-func (r *RowRangeReference) unmarshal(b []byte) {
-	r.RowGroup = binary.BigEndian.Uint32(b[0:4])
-	r.Index = binary.BigEndian.Uint32(b[4:8])
-	r.Rows = binary.BigEndian.Uint32(b[8:12])
+func (r *SymbolsBlockReference) marshal(b []byte) {
+	binary.BigEndian.PutUint32(b[0:4], r.Offset)
+	binary.BigEndian.PutUint32(b[4:8], r.Size)
+	binary.BigEndian.PutUint32(b[8:12], r.CRC)
 }
 
-const stacktraceChunkHeaderSize = int(unsafe.Sizeof(StacktraceChunkHeader{}))
-
-type StacktraceChunkHeader struct {
-	Offset int64
-	Size   int64
-
-	Partition     uint64
-	ChunkIndex    uint16
-	ChunkEncoding ChunkEncoding
-	_             [5]byte // Reserved.
-
-	Stacktraces        uint32 // Number of unique stack traces in the chunk.
-	StacktraceNodes    uint32 // Number of nodes in the stacktrace tree.
-	StacktraceMaxDepth uint32 // Max stack trace depth in the tree.
-	StacktraceMaxNodes uint32 // Max number of nodes at the time of the chunk creation.
-
-	_   [12]byte // Padding. 64 bytes per chunk header.
-	CRC uint32   // Checksum of the chunk data [Offset:Size).
+func (r *SymbolsBlockReference) unmarshal(b []byte) {
+	r.Offset = binary.BigEndian.Uint32(b[0:4])
+	r.Size = binary.BigEndian.Uint32(b[4:8])
+	r.CRC = binary.BigEndian.Uint32(b[8:12])
 }
 
-type ChunkEncoding byte
-
-const (
-	_ ChunkEncoding = iota
-	ChunkEncodingGroupVarint
-)
+func (r *SymbolsBlockReference) AsRowRange() RowRangeReference {
+	return RowRangeReference{
+		RowGroup: r.Offset,
+		Index:    r.Size,
+		Rows:     r.CRC,
+	}
+}
 
-func (h *StacktraceChunkHeader) marshal(b []byte) {
-	binary.BigEndian.PutUint64(b[0:8], uint64(h.Offset))
-	binary.BigEndian.PutUint64(b[8:16], uint64(h.Size))
-	binary.BigEndian.PutUint64(b[16:24], h.Partition)
-	binary.BigEndian.PutUint16(b[24:26], h.ChunkIndex)
-	b[27] = byte(h.ChunkEncoding)
-	// 5 bytes reserved.
-	binary.BigEndian.PutUint32(b[32:36], h.Stacktraces)
-	binary.BigEndian.PutUint32(b[36:40], h.StacktraceNodes)
-	binary.BigEndian.PutUint32(b[40:44], h.StacktraceMaxDepth)
-	binary.BigEndian.PutUint32(b[44:48], h.StacktraceMaxNodes)
-	// 12 bytes reserved.
-	binary.BigEndian.PutUint32(b[60:64], h.CRC)
+type RowRangeReference struct {
+	RowGroup uint32
+	Index    uint32
+	Rows     uint32
 }
 
-func (h *StacktraceChunkHeader) unmarshal(b []byte) {
-	h.Offset = int64(binary.BigEndian.Uint64(b[0:8]))
-	h.Size = int64(binary.BigEndian.Uint64(b[8:16]))
-	h.Partition = binary.BigEndian.Uint64(b[16:24])
-	h.ChunkIndex = binary.BigEndian.Uint16(b[24:26])
-	h.ChunkEncoding = ChunkEncoding(b[27])
-	// 5 bytes reserved.
-	h.Stacktraces = binary.BigEndian.Uint32(b[32:36])
-	h.StacktraceNodes = binary.BigEndian.Uint32(b[36:40])
-	h.StacktraceMaxDepth = binary.BigEndian.Uint32(b[40:44])
-	h.StacktraceMaxNodes = binary.BigEndian.Uint32(b[44:48])
-	// 12 bytes reserved.
-	h.CRC = binary.BigEndian.Uint32(b[60:64])
+// SymbolsBlockReferencesAsRows re-interprets SymbolsBlockReference as
+// RowRangeReference, that used to describe parquet table row ranges (v2).
+// Both types have identical binary layouts but different semantics.
+func SymbolsBlockReferencesAsRows(s []SymbolsBlockReference) []RowRangeReference {
+	return *(*[]RowRangeReference)(unsafe.Pointer(&s))
 }
 
 func ReadIndexFile(b []byte) (f IndexFile, err error) {
@@ -441,7 +407,7 @@ func ReadIndexFile(b []byte) (f IndexFile, err error) {
 			return f, fmt.Errorf("unmarshal stacktraces: %w", err)
 		}
 
-	case FormatV2:
+	case FormatV2, FormatV3:
 		ph := f.TOC.Entries[tocEntryPartitionHeaders]
 		if err = f.PartitionHeaders.Unmarshal(b[ph.Offset : ph.Offset+ph.Size]); err != nil {
 			return f, fmt.Errorf("reading partition headers: %w", err)
@@ -488,6 +454,64 @@ func (f *IndexFile) WriteTo(dst io.Writer) (n int64, err error) {
 	return w.offset, nil
 }
 
+type StacktraceBlockHeader struct {
+	Offset int64
+	Size   int64
+
+	Partition  uint64 // Used in v1.
+	BlockIndex uint16 // Used in v1.
+
+	Encoding ChunkEncoding
+	_        [5]byte // Reserved.
+
+	Stacktraces        uint32 // Number of unique stack traces in the chunk.
+	StacktraceNodes    uint32 // Number of nodes in the stacktrace tree.
+	StacktraceMaxDepth uint32 // Max stack trace depth in the tree.
+	StacktraceMaxNodes uint32 // Max number of nodes at the time of the chunk creation.
+
+	_   [12]byte // Padding. 64 bytes per chunk header.
+	CRC uint32   // Checksum of the chunk data [Offset:Size).
+}
+
+const stacktraceBlockHeaderSize = int(unsafe.Sizeof(StacktraceBlockHeader{}))
+
+type ChunkEncoding byte
+
+const (
+	_ ChunkEncoding = iota
+	StacktraceEncodingGroupVarint
+)
+
+func (h *StacktraceBlockHeader) marshal(b []byte) {
+	binary.BigEndian.PutUint64(b[0:8], uint64(h.Offset))
+	binary.BigEndian.PutUint64(b[8:16], uint64(h.Size))
+	binary.BigEndian.PutUint64(b[16:24], h.Partition)
+	binary.BigEndian.PutUint16(b[24:26], h.BlockIndex)
+	b[27] = byte(h.Encoding)
+	// 5 bytes reserved.
+	binary.BigEndian.PutUint32(b[32:36], h.Stacktraces)
+	binary.BigEndian.PutUint32(b[36:40], h.StacktraceNodes)
+	binary.BigEndian.PutUint32(b[40:44], h.StacktraceMaxDepth)
+	binary.BigEndian.PutUint32(b[44:48], h.StacktraceMaxNodes)
+	// 12 bytes reserved.
+	binary.BigEndian.PutUint32(b[60:64], h.CRC)
+}
+
+func (h *StacktraceBlockHeader) unmarshal(b []byte) {
+	h.Offset = int64(binary.BigEndian.Uint64(b[0:8]))
+	h.Size = int64(binary.BigEndian.Uint64(b[8:16]))
+	h.Partition = binary.BigEndian.Uint64(b[16:24])
+	h.BlockIndex = binary.BigEndian.Uint16(b[24:26])
+	h.Encoding = ChunkEncoding(b[27])
+	// 5 bytes reserved.
+	h.Stacktraces = binary.BigEndian.Uint32(b[32:36])
+	h.StacktraceNodes = binary.BigEndian.Uint32(b[36:40])
+	h.StacktraceMaxDepth = binary.BigEndian.Uint32(b[40:44])
+	h.StacktraceMaxNodes = binary.BigEndian.Uint32(b[44:48])
+	// 12 bytes reserved.
+	h.CRC = binary.BigEndian.Uint32(b[60:64])
+}
+
 // symbolic information such as locations, functions, mappings,
 // and strings is represented as Array of Structures in memory,
 // and is encoded as Structure of Arrays when written on disk.
@@ -512,10 +536,7 @@ type symbolsBlockHeader struct {
 	BlockSize uint32
 }
 
-const (
-	defaultSymbolsBlockSize = 1 << 10
-	symbolsBlockHeaderSize  = int(unsafe.Sizeof(mappingsBlockHeader{}))
-)
+const symbolsBlockHeaderSize = int(unsafe.Sizeof(symbolsBlockHeader{}))
 
 func newSymbolsBlockHeader(n, bs int) symbolsBlockHeader {
 	return symbolsBlockHeader{
@@ -541,10 +562,7 @@ func (h *symbolsBlockHeader) unmarshal(b []byte) {
 }
 
 func (h *symbolsBlockHeader) validate() error {
-	if h.Magic[0] != symdbMagic[0] ||
-		h.Magic[1] != symdbMagic[1] ||
-		h.Magic[2] != symdbMagic[2] ||
-		h.Magic[3] != symdbMagic[3] {
+	if !bytes.Equal(h.Magic[:], symdbMagic[:]) {
 		return ErrInvalidMagic
 	}
 	if h.Version >= 2 {
@@ -586,6 +604,8 @@ type symbolsEncoder[T any] struct {
 	buf []byte
 }
 
+const defaultSymbolsBlockSize = 1 << 10
+
 func newSymbolsEncoder[T any](w io.Writer, e symbolsBlockEncoder[T]) *symbolsEncoder[T] {
 	return &symbolsEncoder[T]{w: w, e: e, bs: defaultSymbolsBlockSize}
 }
diff --git a/pkg/phlaredb/symdb/partition_memory.go b/pkg/phlaredb/symdb/partition_memory.go
index ab3f7781de..e5a81b2740 100644
--- a/pkg/phlaredb/symdb/partition_memory.go
+++ b/pkg/phlaredb/symdb/partition_memory.go
@@ -56,7 +56,7 @@ type stacktracesPartition struct {
 	m         sync.RWMutex
 	hashToIdx map[uint64]uint32
 	chunks    []*stacktraceChunk
-	header    []StacktraceChunkHeader
+	header    []StacktraceBlockHeader
 }
 
 func newStacktracesPartition(maxNodesPerChunk uint32) *stacktracesPartition {
@@ -74,7 +74,7 @@ func newStacktracesPartition(maxNodesPerChunk uint32) *stacktracesPartition {
 func (p *stacktracesPartition) size() uint64 {
 	p.m.RLock()
 	// TODO: map footprint isn't accounted
-	v := len(p.header) * stacktraceChunkHeaderSize
+	v := len(p.header) * stacktraceBlockHeaderSize
 	for _, c := range p.chunks {
 		v += stacktraceTreeNodeSize * cap(c.tree.nodes)
 	}
@@ -367,42 +367,62 @@ func (p *PartitionWriter) Release() {
 	// Noop. Satisfies PartitionReader interface.
 }
 
-func (p *PartitionWriter) write(w *fileWriter) (err error) {
-	senc := newSymbolsEncoder[string](w.w, new(stringsBlockEncoder))
-	senc.Encode(p.strings.slice)
-
-	menc := newSymbolsEncoder[schemav1.InMemoryMapping](w.w, new(mappingsBlockEncoder))
-	menc.Encode(p.mappings.slice)
-
-	fenc := newSymbolsEncoder[schemav1.InMemoryFunction](w.w, new(functionsBlockEncoder))
-	fenc.Encode(p.functions.slice)
-
-	lenc := newSymbolsEncoder[schemav1.InMemoryLocation](w.w, new(locationsBlockEncoder))
-	lenc.Encode(p.locations.slice)
-
+func (p *PartitionWriter) writeTo(w *writer) (err error) {
+	if p.header.Strings, err = writeSymbolBlocks(w.dataFile, p.strings.slice, w.stringsEncoder); err != nil {
+		return err
+	}
+	if p.header.Mappings, err = writeSymbolBlocks(w.dataFile, p.mappings.slice, w.mappingsEncoder); err != nil {
+		return err
+	}
+	if p.header.Functions, err = writeSymbolBlocks(w.dataFile, p.functions.slice, w.functionsEncoder); err != nil {
+		return err
+	}
+	if p.header.Locations, err = writeSymbolBlocks(w.dataFile, p.locations.slice, w.locationsEncoder); err != nil {
+		return err
+	}
 	for ci, c := range p.stacktraces.chunks {
 		stacks := c.stacks
 		if stacks == 0 {
 			stacks = uint32(len(p.stacktraces.hashToIdx))
 		}
-		h := StacktraceChunkHeader{
-			Offset:             w.w.offset,
-			Size:               0, // Set later.
+		h := StacktraceBlockHeader{
+			Offset:             w.dataFile.w.offset,
 			Partition:          p.header.Partition,
-			ChunkIndex:         uint16(ci),
-			ChunkEncoding:      ChunkEncodingGroupVarint,
+			BlockIndex:         uint16(ci),
+			Encoding:           StacktraceEncodingGroupVarint,
 			Stacktraces:        stacks,
 			StacktraceNodes:    c.tree.len(),
-			StacktraceMaxDepth: 0, // TODO
 			StacktraceMaxNodes: c.partition.maxNodesPerChunk,
-			CRC:                0, // Set later.
 		}
 		crc := crc32.New(castagnoli)
-		if h.Size, err = c.WriteTo(io.MultiWriter(crc, w.w)); err != nil {
+		if h.Size, err = c.WriteTo(io.MultiWriter(crc, w.dataFile)); err != nil {
 			return fmt.Errorf("writing stacktrace chunk data: %w", err)
 		}
 		h.CRC = crc.Sum32()
-		p.header.StacktraceChunks = append(p.header.StacktraceChunks, h)
+		p.header.Stacktraces = append(p.header.Stacktraces, h)
 	}
 	return nil
 }
+
+func writeSymbolBlocks[T any](f *fileWriter, s []T, e *symbolsEncoder[T]) ([]SymbolsBlockReference, error) {
+	// TODO(kolesnikovae): Split into blocks (< 1M).
+	h, err := writeSymbolsBlock(f, func(w io.Writer) error {
+		e.Reset(w)
+		err := e.Encode(s)
+		e.Reset(nil)
+		return err
+	})
+	return []SymbolsBlockReference{h}, err
+}
+
+func writeSymbolsBlock(w *fileWriter, fn func(io.Writer) error) (h SymbolsBlockReference, err error) {
+	h.Offset = uint32(w.w.offset)
+	crc := crc32.New(castagnoli)
+	mw := io.MultiWriter(crc, w.w)
+	if err = fn(mw); err != nil {
+		return h, err
+	}
+	h.Size = uint32(w.w.offset) - h.Offset
+	h.CRC = crc.Sum32()
+	return h, nil
+}

From 8c105664fa90a131a975888bc3f2dec060a1ab56 Mon Sep 17 00:00:00 2001
From: Anton Kolesnikov <anton.e.kolesnikov@gmail.com>
Date: Mon, 22 Apr 2024 19:33:04 +0800
Subject: [PATCH 11/36] improve versioning

---
 Makefile                                   |   2 +-
 pkg/phlaredb/symdb/block_reader.go         | 366 ++++++++---------
 pkg/phlaredb/symdb/block_reader_parquet.go | 163 ++++++++
 pkg/phlaredb/symdb/block_reader_test.go    |   1 +
 pkg/phlaredb/symdb/block_writer.go         |   9 +-
 pkg/phlaredb/symdb/format.go               | 434 ++++++++++-----------
 pkg/phlaredb/symdb/partition_memory.go     |  29 +-
 pkg/phlaredb/symdb/strings_test.go         |  20 +-
 pkg/phlaredb/symdb/symdb.go                |   2 +-
 9 files changed, 564 insertions(+), 462 deletions(-)
 create mode 100644 pkg/phlaredb/symdb/block_reader_parquet.go

diff --git a/Makefile b/Makefile
index fd69e72ce2..f7e01d8727 100644
--- a/Makefile
+++ b/Makefile
@@ -260,7 +260,7 @@ $(BIN)/buf: Makefile
 
 $(BIN)/golangci-lint: Makefile
 	@mkdir -p $(@D)
-	GOBIN=$(abspath $(@D)) $(GO) install github.com/golangci/golangci-lint/cmd/golangci-lint@v1.54.0
+	GOBIN=$(abspath $(@D)) $(GO) install github.com/golangci/golangci-lint/cmd/golangci-lint@v1.57.2
 
 $(BIN)/protoc-gen-go: Makefile go.mod
 	@mkdir -p $(@D)
diff --git a/pkg/phlaredb/symdb/block_reader.go b/pkg/phlaredb/symdb/block_reader.go
index 95e66e16f3..030ecb048a 100644
--- a/pkg/phlaredb/symdb/block_reader.go
+++ b/pkg/phlaredb/symdb/block_reader.go
@@ -3,7 +3,6 @@ package symdb
 import (
 	"bufio"
 	"context"
-	"errors"
 	"fmt"
 	"hash/crc32"
 	"io"
@@ -14,11 +13,9 @@ import (
 	"github.com/grafana/dskit/multierror"
 	"github.com/opentracing/opentracing-go"
 	otlog "github.com/opentracing/opentracing-go/log"
-	"github.com/parquet-go/parquet-go"
 	"golang.org/x/sync/errgroup"
 
 	"github.com/grafana/pyroscope/pkg/objstore"
-	parquetobj "github.com/grafana/pyroscope/pkg/objstore/parquet"
 	"github.com/grafana/pyroscope/pkg/phlaredb/block"
 	schemav1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1"
 	"github.com/grafana/pyroscope/pkg/util/refctr"
@@ -29,44 +26,36 @@ type Reader struct {
 	files  map[string]block.File
 	meta   *block.Meta
 
-	chunkFetchBufferSize int
+	// TODO: fetch buffer pool
+	fetchBufferSize int
 
 	index         IndexFile
 	partitions    []*partition
 	partitionsMap map[uint64]*partition
 
-	locations parquetobj.File
-	mappings  parquetobj.File
-	functions parquetobj.File
-	strings   parquetobj.File
+	parquetFiles *parquetFiles
 }
 
-const defaultChunkFetchBufferSize = 4096
+const defaultFetchBufferSize = 4096
 
 func Open(ctx context.Context, b objstore.BucketReader, m *block.Meta) (*Reader, error) {
-	r := Reader{
+	r := &Reader{
 		bucket: b,
 		meta:   m,
 		files:  make(map[string]block.File),
 
-		chunkFetchBufferSize: defaultChunkFetchBufferSize,
+		fetchBufferSize: defaultFetchBufferSize,
 	}
-	if err := r.open(ctx); err != nil {
-		return nil, err
-	}
-	return &r, nil
-}
-
-func (r *Reader) open(ctx context.Context) (err error) {
 	for _, f := range r.meta.Files {
 		r.files[filepath.Base(f.RelPath)] = f
 	}
+	var err error
 	if err = r.openIndexFile(ctx); err != nil {
-		return fmt.Errorf("opening index file: %w", err)
+		return nil, fmt.Errorf("opening index file: %w", err)
 	}
 	if r.index.Header.Version == FormatV2 {
-		if err = r.openParquetFiles(ctx); err != nil {
-			return err
+		if err = openParquetFiles(ctx, r); err != nil {
+			return nil, err
 		}
 	}
 	r.partitionsMap = make(map[uint64]*partition, len(r.index.PartitionHeaders))
@@ -76,6 +65,16 @@ func (r *Reader) open(ctx context.Context) (err error) {
 		r.partitionsMap[h.Partition] = ph
 		r.partitions[i] = ph
 	}
+	return r, nil
+}
+
+func (r *Reader) Close() error {
+	if r == nil {
+		return nil
+	}
+	if r.parquetFiles != nil {
+		return r.parquetFiles.Close()
+	}
 	return nil
 }
 
@@ -96,39 +95,6 @@ func (r *Reader) openIndexFile(ctx context.Context) error {
 	return err
 }
 
-const parquetReadBufferSize = 256 << 10 // 256KB
-
-func (r *Reader) openParquetFiles(ctx context.Context) error {
-	options := []parquet.FileOption{
-		parquet.SkipBloomFilters(true), // we don't use bloom filters
-		parquet.FileReadMode(parquet.ReadModeAsync),
-		parquet.ReadBufferSize(parquetReadBufferSize),
-	}
-
-	m := map[string]*parquetobj.File{
-		new(schemav1.LocationPersister).Name() + block.ParquetSuffix: &r.locations,
-		new(schemav1.MappingPersister).Name() + block.ParquetSuffix:  &r.mappings,
-		new(schemav1.FunctionPersister).Name() + block.ParquetSuffix: &r.functions,
-		new(schemav1.StringPersister).Name() + block.ParquetSuffix:   &r.strings,
-	}
-	g, ctx := errgroup.WithContext(ctx)
-	for n, fp := range m {
-		n := n
-		fp := fp
-		g.Go(func() error {
-			fm, err := r.file(n)
-			if err != nil {
-				return err
-			}
-			if err = fp.Open(ctx, r.bucket, fm, options...); err != nil {
-				return fmt.Errorf("openning file %q: %w", n, err)
-			}
-			return nil
-		})
-	}
-	return g.Wait()
-}
-
 func (r *Reader) file(name string) (block.File, error) {
 	f, ok := r.files[name]
 	if !ok {
@@ -138,43 +104,15 @@ func (r *Reader) file(name string) (block.File, error) {
 }
 
 func (r *Reader) partitionReader(h *PartitionHeader) *partition {
-	p := &partition{
-		reader: r,
-		locations: parquetTableRange[schemav1.InMemoryLocation, schemav1.LocationPersister]{
-			bucket:  r.bucket,
-			headers: SymbolsBlockReferencesAsRows(h.Locations),
-			file:    &r.locations,
-		},
-		mappings: parquetTableRange[schemav1.InMemoryMapping, schemav1.MappingPersister]{
-			bucket:  r.bucket,
-			headers: SymbolsBlockReferencesAsRows(h.Mappings),
-			file:    &r.mappings,
-		},
-		functions: parquetTableRange[schemav1.InMemoryFunction, schemav1.FunctionPersister]{
-			bucket:  r.bucket,
-			headers: SymbolsBlockReferencesAsRows(h.Functions),
-			file:    &r.functions,
-		},
-		strings: parquetTableRange[string, schemav1.StringPersister]{
-			bucket:  r.bucket,
-			headers: SymbolsBlockReferencesAsRows(h.Strings),
-			file:    &r.strings,
-		},
-	}
-	p.setStacktracesChunks(h.Stacktraces)
-	return p
-}
-
-func (r *Reader) Close() error {
-	if r == nil {
-		return nil
+	p := &partition{reader: r}
+	if r.index.Header.Version == FormatV2 {
+		p.initParquetTables(h)
+	}
+	if r.index.Header.Version == FormatV3 {
+		p.initTables(h)
 	}
-	return multierror.New(
-		r.locations.Close(),
-		r.mappings.Close(),
-		r.functions.Close(),
-		r.strings.Close()).
-		Err()
+	p.initStacktraces(h.Stacktraces)
+	return p
 }
 
 var ErrPartitionNotFound = fmt.Errorf("partition not found")
@@ -192,7 +130,7 @@ func (r *Reader) partition(ctx context.Context, partition uint64) (*partition, e
 	if !ok {
 		return nil, ErrPartitionNotFound
 	}
-	if err := p.init(ctx); err != nil {
+	if err := p.fetch(ctx); err != nil {
 		return nil, err
 	}
 	return p, nil
@@ -201,14 +139,19 @@ func (r *Reader) partition(ctx context.Context, partition uint64) (*partition, e
 type partition struct {
 	reader *Reader
 
-	stacktraceChunks []*stacktraceChunkReader
-	locations        parquetTableRange[schemav1.InMemoryLocation, schemav1.LocationPersister]
-	mappings         parquetTableRange[schemav1.InMemoryMapping, schemav1.MappingPersister]
-	functions        parquetTableRange[schemav1.InMemoryFunction, schemav1.FunctionPersister]
-	strings          parquetTableRange[string, schemav1.StringPersister]
+	stacktraces []*stacktraceBlock
+	locations   table[schemav1.InMemoryLocation]
+	mappings    table[schemav1.InMemoryMapping]
+	functions   table[schemav1.InMemoryFunction]
+	strings     table[string]
+}
+
+type table[T any] interface {
+	fetchable
+	slice() []T
 }
 
-func (p *partition) init(ctx context.Context) (err error) {
+func (p *partition) fetch(ctx context.Context) (err error) {
 	return p.tx().fetch(ctx)
 }
 
@@ -217,68 +160,115 @@ func (p *partition) Release() {
 }
 
 func (p *partition) tx() *fetchTx {
-	tx := make(fetchTx, 0, len(p.stacktraceChunks)+4)
-	for _, c := range p.stacktraceChunks {
+	tx := make(fetchTx, 0, len(p.stacktraces)+4)
+	for _, c := range p.stacktraces {
 		tx.append(c)
 	}
 	if p.reader.index.Header.Version > FormatV1 {
-		tx.append(&p.locations)
-		tx.append(&p.mappings)
-		tx.append(&p.functions)
-		tx.append(&p.strings)
+		tx.append(p.locations)
+		tx.append(p.mappings)
+		tx.append(p.functions)
+		tx.append(p.strings)
 	}
 	return &tx
 }
 
+func (p *partition) initParquetTables(h *PartitionHeader) {
+	p.locations = &parquetTable[schemav1.InMemoryLocation, schemav1.LocationPersister]{
+		bucket:  p.reader.bucket,
+		headers: h.V2.Locations,
+		file:    &p.reader.parquetFiles.locations,
+	}
+	p.mappings = &parquetTable[schemav1.InMemoryMapping, schemav1.MappingPersister]{
+		bucket:  p.reader.bucket,
+		headers: h.V2.Mappings,
+		file:    &p.reader.parquetFiles.mappings,
+	}
+	p.functions = &parquetTable[schemav1.InMemoryFunction, schemav1.FunctionPersister]{
+		bucket:  p.reader.bucket,
+		headers: h.V2.Functions,
+		file:    &p.reader.parquetFiles.functions,
+	}
+	p.strings = &parquetTable[string, schemav1.StringPersister]{
+		bucket:  p.reader.bucket,
+		headers: h.V2.Strings,
+		file:    &p.reader.parquetFiles.strings,
+	}
+}
+
+func (p *partition) initTables(h *PartitionHeader) {
+	// TODO(kolesnikovae): decoder pool.
+	p.locations = &rawTable[schemav1.InMemoryLocation]{
+		reader: p.reader,
+		header: h.V3.Locations,
+		dec:    newSymbolsDecoder[schemav1.InMemoryLocation](h.V3.Locations, new(locationsBlockDecoder)),
+	}
+	p.mappings = &rawTable[schemav1.InMemoryMapping]{
+		reader: p.reader,
+		header: h.V3.Mappings,
+		dec:    newSymbolsDecoder[schemav1.InMemoryMapping](h.V3.Mappings, new(mappingsBlockDecoder)),
+	}
+	p.functions = &rawTable[schemav1.InMemoryFunction]{
+		reader: p.reader,
+		header: h.V3.Functions,
+		dec:    newSymbolsDecoder[schemav1.InMemoryFunction](h.V3.Functions, new(functionsBlockDecoder)),
+	}
+	p.strings = &rawTable[string]{
+		reader: p.reader,
+		header: h.V3.Strings,
+		dec:    newSymbolsDecoder[string](h.V3.Strings, new(stringsBlockDecoder)),
+	}
+}
+
 func (p *partition) Symbols() *Symbols {
 	return &Symbols{
 		Stacktraces: p,
-		Locations:   p.locations.s,
-		Mappings:    p.mappings.s,
-		Functions:   p.functions.s,
-		Strings:     p.strings.s,
+		Locations:   p.locations.slice(),
+		Mappings:    p.mappings.slice(),
+		Functions:   p.functions.slice(),
+		Strings:     p.strings.slice(),
 	}
 }
 
 func (p *partition) WriteStats(s *PartitionStats) {
 	var nodes uint32
-	for _, c := range p.stacktraceChunks {
+	for _, c := range p.stacktraces {
 		s.StacktracesTotal += int(c.header.Stacktraces)
 		nodes += c.header.StacktraceNodes
 	}
 	s.MaxStacktraceID = int(nodes)
-	s.LocationsTotal = len(p.locations.s)
-	s.MappingsTotal = len(p.mappings.s)
-	s.FunctionsTotal = len(p.functions.s)
-	s.StringsTotal = len(p.strings.s)
+	s.LocationsTotal = len(p.locations.slice())
+	s.MappingsTotal = len(p.mappings.slice())
+	s.FunctionsTotal = len(p.functions.slice())
+	s.StringsTotal = len(p.strings.slice())
 }
 
 var ErrInvalidStacktraceRange = fmt.Errorf("invalid range: stack traces can't be resolved")
 
 func (p *partition) LookupLocations(dst []uint64, stacktraceID uint32) []uint64 {
 	dst = dst[:0]
-	if len(p.stacktraceChunks) == 0 {
+	if len(p.stacktraces) == 0 {
 		return dst
 	}
-	nodesPerChunk := p.stacktraceChunks[0].header.StacktraceMaxNodes
+	nodesPerChunk := p.stacktraces[0].header.StacktraceMaxNodes
 	chunkID := stacktraceID / nodesPerChunk
 	localSID := stacktraceID % nodesPerChunk
-	if localSID == 0 || int(chunkID) > len(p.stacktraceChunks) {
+	if localSID == 0 || int(chunkID) > len(p.stacktraces) {
 		return dst
 	}
-	return p.stacktraceChunks[chunkID].t.resolveUint64(dst, localSID)
+	return p.stacktraces[chunkID].t.resolveUint64(dst, localSID)
 }
 
 func (p *partition) ResolveStacktraceLocations(ctx context.Context, dst StacktraceInserter, s []uint32) (err error) {
 	if len(s) == 0 {
 		return nil
 	}
-	if len(p.stacktraceChunks) == 0 {
+	if len(p.stacktraces) == 0 {
 		return ErrInvalidStacktraceRange
 	}
 	// First, we determine the chunks needed for the range.
 	// All chunks in a block must have the same StacktraceMaxNodes.
-	sr := SplitStacktraces(s, p.stacktraceChunks[0].header.StacktraceMaxNodes)
+	sr := SplitStacktraces(s, p.stacktraces[0].header.StacktraceMaxNodes)
 	for _, c := range sr {
 		if err = p.lookupStacktraces(ctx, dst, c).do(); err != nil {
 			return err
@@ -287,19 +277,19 @@ func (p *partition) ResolveStacktraceLocations(ctx context.Context, dst Stacktra
 	return nil
 }
 
-func (p *partition) setStacktracesChunks(chunks []StacktraceBlockHeader) {
-	p.stacktraceChunks = make([]*stacktraceChunkReader, len(chunks))
+func (p *partition) initStacktraces(chunks []StacktraceBlockHeader) {
+	p.stacktraces = make([]*stacktraceBlock, len(chunks))
 	for i, c := range chunks {
-		p.stacktraceChunks[i] = &stacktraceChunkReader{
+		p.stacktraces[i] = &stacktraceBlock{
 			reader: p.reader,
 			header: c,
 		}
 	}
 }
 
-func (p *partition) stacktraceChunkReader(i uint32) *stacktraceChunkReader {
-	if int(i) < len(p.stacktraceChunks) {
-		return p.stacktraceChunks[i]
+func (p *partition) stacktraceChunkReader(i uint32) *stacktraceBlock {
+	if int(i) < len(p.stacktraces) {
+		return p.stacktraces[i]
 	}
 	return nil
 }
@@ -337,7 +327,7 @@ func (r *stacktracesLookup) do() error {
 	return nil
 }
 
-type stacktraceChunkReader struct {
+type stacktraceBlock struct {
 	reader *Reader
 	header StacktraceBlockHeader
 
@@ -345,8 +335,8 @@ type stacktraceChunkReader struct {
 	t *parentPointerTree
 }
 
-func (c *stacktraceChunkReader) fetch(ctx context.Context) error {
-	span, ctx := opentracing.StartSpanFromContext(ctx, "stacktraceChunkReader.fetch")
+func (c *stacktraceBlock) fetch(ctx context.Context) error {
+	span, ctx := opentracing.StartSpanFromContext(ctx, "stacktraceBlock.fetch")
 	span.LogFields(
 		otlog.Int64("size", c.header.Size),
 		otlog.Uint32("nodes", c.header.StacktraceNodes),
@@ -354,7 +344,11 @@ func (c *stacktraceChunkReader) fetch(ctx context.Context) error {
 	)
 	defer span.Finish()
 	return c.r.Inc(func() error {
-		f, err := c.reader.file(StacktracesFileName)
+		filename := DataFileName
+		if c.reader.index.Header.Version < 3 {
+			filename = StacktracesFileName
+		}
+		f, err := c.reader.file(filename)
 		if err != nil {
 			return err
 		}
@@ -365,12 +359,11 @@ func (c *stacktraceChunkReader) fetch(ctx context.Context) error {
 		defer func() {
 			err = multierror.New(err, rc.Close()).Err()
 		}()
-		// Consider pooling the buffer.
-		return c.readFrom(bufio.NewReaderSize(rc, c.reader.chunkFetchBufferSize))
+		return c.readFrom(bufio.NewReaderSize(rc, c.reader.fetchBufferSize))
 	})
 }
 
-func (c *stacktraceChunkReader) readFrom(r io.Reader) error {
+func (c *stacktraceBlock) readFrom(r *bufio.Reader) error {
 	// NOTE(kolesnikovae): Pool of node chunks could reduce
 	//   the alloc size, but it may affect memory locality.
 	//   Although, properly aligned chunks of, say, 1-4K nodes
@@ -393,94 +386,59 @@ func (c *stacktraceChunkReader) readFrom(r io.Reader) error {
 	return nil
 }
 
-func (c *stacktraceChunkReader) release() {
+func (c *stacktraceBlock) release() {
 	c.r.Dec(func() {
 		c.t = nil
 	})
 }
 
-type parquetTableRange[M schemav1.Models, P schemav1.Persister[M]] struct {
-	headers   []RowRangeReference
-	bucket    objstore.BucketReader
-	persister P
-
-	file *parquetobj.File
-
-	r refctr.Counter
-	s []M
+type rawTable[T any] struct {
+	reader *Reader
+	header SymbolsBlockHeader
+	dec    *symbolsDecoder[T]
+	r      refctr.Counter
+	s      []T
 }
 
-// parquet.CopyRows uses hardcoded buffer size:
-// defaultRowBufferSize = 42
-const inMemoryReaderRowsBufSize = 1 << 10
-
-func (t *parquetTableRange[M, P]) fetch(ctx context.Context) (err error) {
-	span, _ := opentracing.StartSpanFromContext(ctx, "parquetTableRange.fetch", opentracing.Tags{
-		"table_name": t.persister.Name(),
-		"row_groups": len(t.headers),
-	})
+func (t *rawTable[T]) fetch(ctx context.Context) error {
+	span, ctx := opentracing.StartSpanFromContext(ctx, "symbolsTable.fetch")
+	span.LogFields(
+		otlog.Uint32("size", t.header.Size),
+		otlog.Uint32("length", t.header.Length),
+	)
 	defer span.Finish()
 	return t.r.Inc(func() error {
-		var s uint32
-		for _, h := range t.headers {
-			s += h.Rows
+		f, err := t.reader.file(DataFileName)
+		if err != nil {
+			return err
 		}
-		buf := make([]parquet.Row, inMemoryReaderRowsBufSize)
-		t.s = make([]M, s)
-		var offset int
-		// TODO(kolesnikovae): Row groups could be fetched in parallel.
-		rgs := t.file.RowGroups()
-		for _, h := range t.headers {
-			span.LogFields(
-				otlog.Uint32("row_group", h.RowGroup),
-				otlog.Uint32("index_row", h.Index),
-				otlog.Uint32("rows", h.Rows),
-			)
-			rg := rgs[h.RowGroup]
-			rows := rg.Rows()
-			if err := rows.SeekToRow(int64(h.Index)); err != nil {
-				return err
-			}
-			dst := t.s[offset : offset+int(h.Rows)]
-			if err := t.readRows(dst, buf, rows); err != nil {
-				return fmt.Errorf("reading row group from parquet file %q: %w", t.file.Path(), err)
-			}
-			offset += int(h.Rows)
+		rc, err := t.reader.bucket.GetRange(ctx, f.RelPath, int64(t.header.Offset), int64(t.header.Size))
+		if err != nil {
+			return err
 		}
-		return nil
+		defer func() {
+			err = multierror.New(err, rc.Close()).Err()
+		}()
+		return t.readFrom(bufio.NewReaderSize(rc, t.reader.fetchBufferSize))
 	})
 }
 
-func (t *parquetTableRange[M, P]) readRows(dst []M, buf []parquet.Row, rows parquet.Rows) (err error) {
-	defer func() {
-		err = multierror.New(err, rows.Close()).Err()
-	}()
-	for i := 0; i < len(dst); {
-		n, err := rows.ReadRows(buf)
-		if n > 0 {
-			for _, row := range buf[:n] {
-				if i == len(dst) {
-					return nil
-				}
-				_, v, err := t.persister.Reconstruct(row)
-				if err != nil {
-					return err
-				}
-				dst[i] = v
-				i++
-			}
-		}
-		if err != nil {
-			if errors.Is(err, io.EOF) {
-				return nil
-			}
-			return err
-		}
+func (t *rawTable[T]) readFrom(r *bufio.Reader) error {
+	crc := crc32.New(castagnoli)
+	tee := io.TeeReader(r, crc)
+	t.s = make([]T, t.header.Length)
+	if err := t.dec.Decode(t.s, tee); err != nil {
+		return fmt.Errorf("failed to decode symbols: %w", err)
+	}
+	if t.header.CRC != crc.Sum32() {
+		return ErrInvalidCRC
 	}
 	return nil
 }
 
-func (t *parquetTableRange[M, P]) release() {
+func (t *rawTable[T]) slice() []T { return t.s }
+
+func (t *rawTable[T]) release() {
 	t.r.Dec(func() {
 		t.s = nil
 	})
@@ -488,14 +446,14 @@ func (t *parquetTableRange[M, P]) release() {
 
 // fetchTx facilitates fetching multiple objects in a transactional manner:
 // if one of the objects has failed, all the remaining ones are released.
-type fetchTx []fetch
+type fetchTx []fetchable
 
-type fetch interface {
+type fetchable interface {
 	fetch(context.Context) error
 	release()
 }
 
-func (tx *fetchTx) append(x fetch) { *tx = append(*tx, x) }
+func (tx *fetchTx) append(x fetchable) { *tx = append(*tx, x) }
 
 func (tx *fetchTx) fetch(ctx context.Context) (err error) {
 	defer func() {
diff --git a/pkg/phlaredb/symdb/block_reader_parquet.go b/pkg/phlaredb/symdb/block_reader_parquet.go
new file mode 100644
index 0000000000..1addb2764e
--- /dev/null
+++ b/pkg/phlaredb/symdb/block_reader_parquet.go
@@ -0,0 +1,163 @@
+package symdb
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"io"
+
+	"github.com/grafana/dskit/multierror"
+	"github.com/opentracing/opentracing-go"
+	otlog "github.com/opentracing/opentracing-go/log"
+	"github.com/parquet-go/parquet-go"
+	"golang.org/x/sync/errgroup"
+
+	"github.com/grafana/pyroscope/pkg/objstore"
+	parquetobj "github.com/grafana/pyroscope/pkg/objstore/parquet"
+	"github.com/grafana/pyroscope/pkg/phlaredb/block"
+	schemav1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1"
+	"github.com/grafana/pyroscope/pkg/util/refctr"
+)
+
+type parquetTable[M schemav1.Models, P schemav1.Persister[M]] struct {
+	headers   []RowRangeReference
+	bucket    objstore.BucketReader
+	persister P
+
+	file *parquetobj.File
+
+	r refctr.Counter
+	s []M
+}
+
+const (
+	// parquet.CopyRows uses hardcoded buffer size:
+	// defaultRowBufferSize = 42
+	inMemoryReaderRowsBufSize = 1 << 10
+	parquetReadBufferSize     = 256 << 10 // 256KB
+)
+
+func (t *parquetTable[M, P]) fetch(ctx context.Context) (err error) {
+	span, _ := opentracing.StartSpanFromContext(ctx, "parquetTable.fetch", opentracing.Tags{
+		"table_name": t.persister.Name(),
+		"row_groups": len(t.headers),
+	})
+	defer span.Finish()
+	return t.r.Inc(func() error {
+		var s uint32
+		for _, h := range t.headers {
+			s += h.Rows
+		}
+		buf := make([]parquet.Row, inMemoryReaderRowsBufSize)
+		t.s = make([]M, s)
+		var offset int
+		// TODO(kolesnikovae): Row groups could be fetched in parallel.
+		rgs := t.file.RowGroups()
+		for _, h := range t.headers {
+			span.LogFields(
+				otlog.Uint32("row_group", h.RowGroup),
+				otlog.Uint32("index_row", h.Index),
+				otlog.Uint32("rows", h.Rows),
+			)
+			rg := rgs[h.RowGroup]
+			rows := rg.Rows()
+			if err := rows.SeekToRow(int64(h.Index)); err != nil {
+				return err
+			}
+			dst := t.s[offset : offset+int(h.Rows)]
+			if err := t.readRows(dst, buf, rows); err != nil {
+				return fmt.Errorf("reading row group from parquet file %q: %w", t.file.Path(), err)
+			}
+			offset += int(h.Rows)
+		}
+		return nil
+	})
+}
+
+func (t *parquetTable[M, P]) readRows(dst []M, buf []parquet.Row, rows parquet.Rows) (err error) {
+	defer func() {
+		err = multierror.New(err, rows.Close()).Err()
+	}()
+	for i := 0; i < len(dst); {
+		n, err := rows.ReadRows(buf)
+		if n > 0 {
+			for _, row := range buf[:n] {
+				if i == len(dst) {
+					return nil
+				}
+				_, v, err := t.persister.Reconstruct(row)
+				if err != nil {
+					return err
+				}
+				dst[i] = v
+				i++
+			}
+		}
+		if err != nil {
+			if errors.Is(err, io.EOF) {
+				return nil
+			}
+			return err
+		}
+	}
+	return nil
+}
+
+func (t *parquetTable[M, P]) slice() []M { return t.s }
+
+func (t *parquetTable[M, P]) release() {
+	t.r.Dec(func() {
+		t.s = nil
+	})
+}
+
+type parquetFiles struct {
+	locations parquetobj.File
+	mappings  parquetobj.File
+	functions parquetobj.File
+	strings   parquetobj.File
+}
+
+func (f *parquetFiles) Close() error {
+	return multierror.New(
+		f.locations.Close(),
+		f.mappings.Close(),
+		f.functions.Close(),
+		f.strings.Close()).
+		Err()
+}
+
+func openParquetFiles(ctx context.Context, r *Reader) error {
+	options := []parquet.FileOption{
+		parquet.SkipBloomFilters(true),
+		parquet.FileReadMode(parquet.ReadModeAsync),
+		parquet.ReadBufferSize(parquetReadBufferSize),
+	}
+	files := new(parquetFiles)
+	m := map[string]*parquetobj.File{
+		new(schemav1.LocationPersister).Name() + block.ParquetSuffix: &files.locations,
+		new(schemav1.MappingPersister).Name() + block.ParquetSuffix:  &files.mappings,
+		new(schemav1.FunctionPersister).Name() + block.ParquetSuffix: &files.functions,
+		new(schemav1.StringPersister).Name() + block.ParquetSuffix:   &files.strings,
+	}
+	g, ctx := errgroup.WithContext(ctx)
+	for n, fp := range m {
+		n := n
+		fp := fp
+		g.Go(func() error {
+			fm, err := r.file(n)
+			if err != nil {
+				return err
+			}
+			if err = fp.Open(ctx, r.bucket, fm, options...); err != nil {
+				return fmt.Errorf("openning file %q: %w", n, err)
+			}
+			return nil
+		})
+	}
+	if err := g.Wait(); err != nil {
+		return err
+	}
+	r.parquetFiles = files
+	return nil
+}
diff --git a/pkg/phlaredb/symdb/block_reader_test.go b/pkg/phlaredb/symdb/block_reader_test.go
index f4e5b2cdaf..fcc63ee9b2 100644
--- a/pkg/phlaredb/symdb/block_reader_test.go
+++ b/pkg/phlaredb/symdb/block_reader_test.go
@@ -16,6 +16,7 @@ var testBlockMeta = &block.Meta{
 	Files: []block.File{
 		{RelPath: IndexFileName},
 		{RelPath: StacktracesFileName},
+		{RelPath: DataFileName},
 		{RelPath: "locations.parquet"},
 		{RelPath: "mappings.parquet"},
 		{RelPath: "functions.parquet"},
diff --git a/pkg/phlaredb/symdb/block_writer.go b/pkg/phlaredb/symdb/block_writer.go
index 099f04da2f..35c8687d2e 100644
--- a/pkg/phlaredb/symdb/block_writer.go
+++ b/pkg/phlaredb/symdb/block_writer.go
@@ -37,10 +37,11 @@ func newWriter(c *Config) *writer {
 			},
 		},
 
-		stringsEncoder:   newSymbolsEncoder[string](nil, new(stringsBlockEncoder)),
-		mappingsEncoder:  newSymbolsEncoder[v1.InMemoryMapping](nil, new(mappingsBlockEncoder)),
-		functionsEncoder: newSymbolsEncoder[v1.InMemoryFunction](nil, new(functionsBlockEncoder)),
-		locationsEncoder: newSymbolsEncoder[v1.InMemoryLocation](nil, new(locationsBlockEncoder)),
+		// TODO(kolesnikovae): encoder pool.
+		stringsEncoder:   newSymbolsEncoder[string](new(stringsBlockEncoder)),
+		mappingsEncoder:  newSymbolsEncoder[v1.InMemoryMapping](new(mappingsBlockEncoder)),
+		functionsEncoder: newSymbolsEncoder[v1.InMemoryFunction](new(functionsBlockEncoder)),
+		locationsEncoder: newSymbolsEncoder[v1.InMemoryLocation](new(locationsBlockEncoder)),
 	}
 }
 
diff --git a/pkg/phlaredb/symdb/format.go b/pkg/phlaredb/symdb/format.go
index 8c8c4d0edc..d71b2fa642 100644
--- a/pkg/phlaredb/symdb/format.go
+++ b/pkg/phlaredb/symdb/format.go
@@ -171,13 +171,10 @@ func (h *TOCEntry) unmarshal(b []byte) {
 type PartitionHeaders []*PartitionHeader
 
 type PartitionHeader struct {
-	Partition uint64
-
+	Partition   uint64
 	Stacktraces []StacktraceBlockHeader
-	Locations   []SymbolsBlockReference
-	Mappings    []SymbolsBlockReference
-	Functions   []SymbolsBlockReference
-	Strings     []SymbolsBlockReference
+	V2          *PartitionHeaderV2
+	V3          *PartitionHeaderV3
 }
 
 func (h *PartitionHeaders) Size() int64 {
@@ -194,33 +191,17 @@ func (h *PartitionHeaders) WriteTo(dst io.Writer) (_ int64, err error) {
 	binary.BigEndian.PutUint32(buf, uint32(len(*h)))
 	w.write(buf)
 	for _, p := range *h {
-		s := p.Size()
-		if int(s) > cap(buf) {
-			buf = make([]byte, s)
+		if p.V3 == nil {
+			return 0, fmt.Errorf("v2 format is not supported")
 		}
-		buf = buf[:s]
+		buf = slices.GrowLen(buf, int(p.Size()))
 		p.marshal(buf)
 		w.write(buf)
 	}
 	return w.offset, w.err
 }
 
-func (h *PartitionHeaders) Unmarshal(b []byte) error {
-	partitions := binary.BigEndian.Uint32(b[0:4])
-	b = b[4:]
-	*h = make(PartitionHeaders, partitions)
-	for i := range *h {
-		var p PartitionHeader
-		if err := p.unmarshal(b); err != nil {
-			return err
-		}
-		b = b[p.Size():]
-		(*h)[i] = &p
-	}
-	return nil
-}
-
-func (h *PartitionHeaders) fromChunks(b []byte) error {
+func (h *PartitionHeaders) UnmarshalV1(b []byte) error {
 	s := len(b)
 	if s%stacktraceBlockHeaderSize > 0 {
 		return ErrInvalidSize
@@ -241,70 +222,109 @@ func (h *PartitionHeaders) fromChunks(b []byte) error {
 	return nil
 }
 
+func (h *PartitionHeaders) UnmarshalV2(b []byte) error { return h.unmarshal(b, FormatV2) }
+
+func (h *PartitionHeaders) UnmarshalV3(b []byte) error { return h.unmarshal(b, FormatV3) }
+
+func (h *PartitionHeaders) unmarshal(b []byte, version int) error {
+	partitions := binary.BigEndian.Uint32(b[0:4])
+	b = b[4:]
+	*h = make(PartitionHeaders, partitions)
+	for i := range *h {
+		var p PartitionHeader
+		if err := p.unmarshal(b, version); err != nil {
+			return err
+		}
+		b = b[p.Size():]
+		(*h)[i] = &p
+	}
+	return nil
+}
+
 func (h *PartitionHeader) marshal(buf []byte) {
 	binary.BigEndian.PutUint64(buf[0:8], h.Partition)
 	binary.BigEndian.PutUint32(buf[8:12], uint32(len(h.Stacktraces)))
-	binary.BigEndian.PutUint32(buf[12:16], uint32(len(h.Locations)))
-	binary.BigEndian.PutUint32(buf[16:20], uint32(len(h.Mappings)))
-	binary.BigEndian.PutUint32(buf[20:24], uint32(len(h.Functions)))
-	binary.BigEndian.PutUint32(buf[24:28], uint32(len(h.Strings)))
-	n := 28
+	n := 12
 	for i := range h.Stacktraces {
 		h.Stacktraces[i].marshal(buf[n:])
 		n += stacktraceBlockHeaderSize
 	}
-	n += marshalSymbolsBlockReferences(buf[n:], h.Locations)
-	n += marshalSymbolsBlockReferences(buf[n:], h.Mappings)
-	n += marshalSymbolsBlockReferences(buf[n:], h.Functions)
-	marshalSymbolsBlockReferences(buf[n:], h.Strings)
+	n += marshalSymbolsBlockReferences(buf[n:], h.V3.Locations)
+	n += marshalSymbolsBlockReferences(buf[n:], h.V3.Mappings)
+	n += marshalSymbolsBlockReferences(buf[n:], h.V3.Functions)
+	marshalSymbolsBlockReferences(buf[n:], h.V3.Strings)
 }
 
-func (h *PartitionHeader) unmarshal(buf []byte) (err error) {
+func (h *PartitionHeader) unmarshal(buf []byte, version int) (err error) {
 	h.Partition = binary.BigEndian.Uint64(buf[0:8])
 	h.Stacktraces = make([]StacktraceBlockHeader, int(binary.BigEndian.Uint32(buf[8:12])))
-	h.Locations = make([]SymbolsBlockReference, int(binary.BigEndian.Uint32(buf[12:16])))
-	h.Mappings = make([]SymbolsBlockReference, int(binary.BigEndian.Uint32(buf[16:20])))
-	h.Functions = make([]SymbolsBlockReference, int(binary.BigEndian.Uint32(buf[20:24])))
-	h.Strings = make([]SymbolsBlockReference, int(binary.BigEndian.Uint32(buf[24:28])))
-
-	buf = buf[28:]
-	stacktracesSize := len(h.Stacktraces) * stacktraceBlockHeaderSize
-	if err = h.unmarshalStacktraceChunks(buf[:stacktracesSize]); err != nil {
-		return err
-	}
-	buf = buf[stacktracesSize:]
-	locationsSize := len(h.Locations) * symbolsBlockReferenceSize
-	if err = h.unmarshalSymbolsBlockReferences(h.Locations, buf[:locationsSize]); err != nil {
-		return err
-	}
-	buf = buf[locationsSize:]
-	mappingsSize := len(h.Mappings) * symbolsBlockReferenceSize
-	if err = h.unmarshalSymbolsBlockReferences(h.Mappings, buf[:mappingsSize]); err != nil {
-		return err
-	}
-	buf = buf[mappingsSize:]
-	functionsSize := len(h.Functions) * symbolsBlockReferenceSize
-	if err = h.unmarshalSymbolsBlockReferences(h.Functions, buf[:functionsSize]); err != nil {
-		return err
-	}
-	buf = buf[functionsSize:]
-	stringsSize := len(h.Strings) * symbolsBlockReferenceSize
-	if err = h.unmarshalSymbolsBlockReferences(h.Strings, buf[:stringsSize]); err != nil {
-		return err
+	switch version {
+	case FormatV2:
+		h.V2 = new(PartitionHeaderV2)
+		h.V2.Locations = make([]RowRangeReference, int(binary.BigEndian.Uint32(buf[12:16])))
+		h.V2.Mappings = make([]RowRangeReference, int(binary.BigEndian.Uint32(buf[16:20])))
+		h.V2.Functions = make([]RowRangeReference, int(binary.BigEndian.Uint32(buf[20:24])))
+		h.V2.Strings = make([]RowRangeReference, int(binary.BigEndian.Uint32(buf[24:28])))
+		buf = buf[28:]
+		stacktracesSize := len(h.Stacktraces) * stacktraceBlockHeaderSize
+		if err = h.unmarshalStacktraceBlockHeaders(buf[:stacktracesSize]); err != nil {
+			return err
+		}
+		err = h.V2.unmarshal(buf[stacktracesSize:])
+	case FormatV3:
+		buf = buf[12:]
+		stacktracesSize := len(h.Stacktraces) * stacktraceBlockHeaderSize
+		if err = h.unmarshalStacktraceBlockHeaders(buf[:stacktracesSize]); err != nil {
+			return err
+		}
+		h.V3 = new(PartitionHeaderV3)
+		err = h.V3.unmarshal(buf[stacktracesSize:])
+	default:
+		return fmt.Errorf("bug: unsupported version: %d", version)
 	}
-
-	return nil
+	// TODO(kolesnikovae): Validate headers.
+	return err
 }
 
 func (h *PartitionHeader) Size() int64 {
-	s := 28
+	s := 12 // Partition 8b + number of stacktrace blocks.
 	s += len(h.Stacktraces) * stacktraceBlockHeaderSize
-	r := len(h.Locations) + len(h.Mappings) + len(h.Functions) + len(h.Strings)
-	s += r * symbolsBlockReferenceSize
+	if h.V3 != nil {
+		s += h.V3.size()
+	}
+	if h.V2 != nil {
+		s += h.V2.size()
+	}
 	return int64(s)
 }
 
-func (h *PartitionHeader) unmarshalStacktraceChunks(b []byte) error {
+type PartitionHeaderV3 struct {
+	Locations SymbolsBlockHeader
+	Mappings  SymbolsBlockHeader
+	Functions SymbolsBlockHeader
+	Strings   SymbolsBlockHeader
+}
+
+const partitionHeaderV3Size = int(unsafe.Sizeof(PartitionHeaderV3{}))
+
+func (h *PartitionHeaderV3) size() int { return partitionHeaderV3Size }
+
+func (h *PartitionHeaderV3) unmarshal(buf []byte) (err error) {
+	s := len(buf)
+	if s%symbolsBlockReferenceSize > 0 {
+		return ErrInvalidSize
+	}
+	h.Locations.unmarshal(buf[:symbolsBlockReferenceSize])
+	buf = buf[symbolsBlockReferenceSize:]
+	h.Mappings.unmarshal(buf[:symbolsBlockReferenceSize])
+	buf = buf[symbolsBlockReferenceSize:]
+	h.Functions.unmarshal(buf[:symbolsBlockReferenceSize])
+	buf = buf[symbolsBlockReferenceSize:]
+	h.Strings.unmarshal(buf[:symbolsBlockReferenceSize])
+	return nil
+}
+
+func (h *PartitionHeader) unmarshalStacktraceBlockHeaders(b []byte) error {
 	s := len(b)
 	if s%stacktraceBlockHeaderSize > 0 {
 		return ErrInvalidSize
@@ -316,19 +336,42 @@ func (h *PartitionHeader) unmarshalStacktraceChunks(b []byte) error {
 	return nil
 }
 
-func (h *PartitionHeader) unmarshalSymbolsBlockReferences(refs []SymbolsBlockReference, b []byte) error {
-	s := len(b)
-	if s%symbolsBlockReferenceSize > 0 {
-		return ErrInvalidSize
-	}
-	for i := range refs {
-		off := i * symbolsBlockReferenceSize
-		refs[i].unmarshal(b[off : off+symbolsBlockReferenceSize])
-	}
-	return nil
+// SymbolsBlockHeader describes a collection of elements encoded in a
+// content-specific way: symbolic information such as locations, functions,
+// mappings, and strings is represented as Array of Structures in memory,
+// and is encoded as Structure of Arrays when written on disk.
+type SymbolsBlockHeader struct {
+	// Offset in the data file.
+	Offset uint64
+	// Size of the section.
+	Size uint32
+	// Checksum of the section.
+	CRC uint32
+	// Length denotes the total number of items encoded.
+	Length uint32
+	// BlockSize denotes the number of items per block.
+	BlockSize uint32
+}
+
+const symbolsBlockReferenceSize = int(unsafe.Sizeof(SymbolsBlockHeader{}))
+
+func (h *SymbolsBlockHeader) marshal(b []byte) {
+	binary.BigEndian.PutUint64(b[0:8], h.Offset)
+	binary.BigEndian.PutUint32(b[8:12], h.Size)
+	binary.BigEndian.PutUint32(b[12:16], h.CRC)
+	binary.BigEndian.PutUint32(b[16:20], h.Length)
+	binary.BigEndian.PutUint32(b[20:24], h.BlockSize)
 }
 
-func marshalSymbolsBlockReferences(b []byte, refs []SymbolsBlockReference) int {
+func (h *SymbolsBlockHeader) unmarshal(b []byte) {
+	h.Offset = binary.BigEndian.Uint64(b[0:8])
+	h.Size = binary.BigEndian.Uint32(b[8:12])
+	h.CRC = binary.BigEndian.Uint32(b[12:16])
+	h.Length = binary.BigEndian.Uint32(b[16:20])
+	h.BlockSize = binary.BigEndian.Uint32(b[20:24])
+}
+
+func marshalSymbolsBlockReferences(b []byte, refs ...SymbolsBlockHeader) int {
 	var off int
 	for i := range refs {
 		refs[i].marshal(b[off : off+symbolsBlockReferenceSize])
@@ -337,45 +380,72 @@ func marshalSymbolsBlockReferences(b []byte, refs []SymbolsBlockReference) int {
 	return off
 }
 
-type SymbolsBlockReference struct {
-	Offset uint32
-	Size   uint32
-	CRC    uint32
+type PartitionHeaderV2 struct {
+	Locations []RowRangeReference
+	Mappings  []RowRangeReference
+	Functions []RowRangeReference
+	Strings   []RowRangeReference
 }
 
-const symbolsBlockReferenceSize = int(unsafe.Sizeof(SymbolsBlockReference{}))
-
-func (r *SymbolsBlockReference) marshal(b []byte) {
-	binary.BigEndian.PutUint32(b[0:4], r.Offset)
-	binary.BigEndian.PutUint32(b[4:8], r.Size)
-	binary.BigEndian.PutUint32(b[8:12], r.CRC)
+func (h *PartitionHeaderV2) size() int {
+	s := 16 // Length of row ranges per type.
+	r := len(h.Locations) + len(h.Mappings) + len(h.Functions) + len(h.Strings)
+	return s + rowRangeReferenceSize*r
 }
 
-func (r *SymbolsBlockReference) unmarshal(b []byte) {
-	r.Offset = binary.BigEndian.Uint32(b[0:4])
-	r.Size = binary.BigEndian.Uint32(b[4:8])
-	r.CRC = binary.BigEndian.Uint32(b[8:12])
+func (h *PartitionHeaderV2) unmarshal(buf []byte) (err error) {
+	locationsSize := len(h.Locations) * rowRangeReferenceSize
+	if err = h.unmarshalRowRangeReferences(h.Locations, buf[:locationsSize]); err != nil {
+		return err
+	}
+	buf = buf[locationsSize:]
+	mappingsSize := len(h.Mappings) * rowRangeReferenceSize
+	if err = h.unmarshalRowRangeReferences(h.Mappings, buf[:mappingsSize]); err != nil {
+		return err
+	}
+	buf = buf[mappingsSize:]
+	functionsSize := len(h.Functions) * rowRangeReferenceSize
+	if err = h.unmarshalRowRangeReferences(h.Functions, buf[:functionsSize]); err != nil {
+		return err
+	}
+	buf = buf[functionsSize:]
+	stringsSize := len(h.Strings) * rowRangeReferenceSize
+	if err = h.unmarshalRowRangeReferences(h.Strings, buf[:stringsSize]); err != nil {
+		return err
+	}
+	return nil
 }
 
-func (r *SymbolsBlockReference) AsRowRange() RowRangeReference {
-	return RowRangeReference{
-		RowGroup: r.Offset,
-		Index:    r.Size,
-		Rows:     r.CRC,
+func (h *PartitionHeaderV2) unmarshalRowRangeReferences(refs []RowRangeReference, b []byte) error {
+	s := len(b)
+	if s%rowRangeReferenceSize > 0 {
+		return ErrInvalidSize
+	}
+	for i := range refs {
+		off := i * rowRangeReferenceSize
+		refs[i].unmarshal(b[off : off+rowRangeReferenceSize])
 	}
+	return nil
 }
 
+const rowRangeReferenceSize = int(unsafe.Sizeof(RowRangeReference{}))
+
 type RowRangeReference struct {
 	RowGroup uint32
 	Index    uint32
 	Rows     uint32
 }
 
-// SymbolsBlockReferencesAsRows re-interprets SymbolsBlockReference as
-// RowRangeReference, that used to describe parquet table row ranges (v2).
-// Both types have identical binary layouts but different semantics.
-func SymbolsBlockReferencesAsRows(s []SymbolsBlockReference) []RowRangeReference {
-	return *(*[]RowRangeReference)(unsafe.Pointer(&s))
+func (r *RowRangeReference) marshal(b []byte) {
+	binary.BigEndian.PutUint32(b[0:4], r.RowGroup)
+	binary.BigEndian.PutUint32(b[4:8], r.Index)
+	binary.BigEndian.PutUint32(b[8:12], r.Rows)
+}
+
+func (r *RowRangeReference) unmarshal(b []byte) {
+	r.RowGroup = binary.BigEndian.Uint32(b[0:4])
+	r.Index = binary.BigEndian.Uint32(b[4:8])
+	r.Rows = binary.BigEndian.Uint32(b[8:12])
 }
 
 func ReadIndexFile(b []byte) (f IndexFile, err error) {
@@ -394,22 +464,28 @@ func ReadIndexFile(b []byte) (f IndexFile, err error) {
 		return f, fmt.Errorf("unmarshal table of contents: %w", err)
 	}
 
+	// TODO: validate TOC
+
 	// Version-specific data section.
 	switch f.Header.Version {
 	default:
-		// Must never happen: the version is verified
-		// when the file header is read.
-		panic("bug: invalid version")
+		return f, fmt.Errorf("bug: unsupported version: %d", f.Header.Version)
 
 	case FormatV1:
 		sch := f.TOC.Entries[tocEntryStacktraceChunkHeaders]
-		if err = f.PartitionHeaders.fromChunks(b[sch.Offset : sch.Offset+sch.Size]); err != nil {
+		if err = f.PartitionHeaders.UnmarshalV1(b[sch.Offset : sch.Offset+sch.Size]); err != nil {
 			return f, fmt.Errorf("unmarshal stacktraces: %w", err)
 		}
 
-	case FormatV2, FormatV3:
+	case FormatV2:
 		ph := f.TOC.Entries[tocEntryPartitionHeaders]
-		if err = f.PartitionHeaders.Unmarshal(b[ph.Offset : ph.Offset+ph.Size]); err != nil {
+		if err = f.PartitionHeaders.UnmarshalV2(b[ph.Offset : ph.Offset+ph.Size]); err != nil {
+			return f, fmt.Errorf("reading partition headers: %w", err)
+		}
+
+	case FormatV3:
+		ph := f.TOC.Entries[tocEntryPartitionHeaders]
+		if err = f.PartitionHeaders.UnmarshalV3(b[ph.Offset : ph.Offset+ph.Size]); err != nil {
 			return f, fmt.Errorf("reading partition headers: %w", err)
 		}
 	}
@@ -512,152 +588,60 @@ func (h *StacktraceBlockHeader) unmarshal(b []byte) {
 	h.CRC = binary.BigEndian.Uint32(b[60:64])
 }
 
-// symbolic information such as locations, functions, mappings,
-// and strings is represented as Array of Structures in memory,
-// and is encoded as Structure of Arrays when written on disk.
-//
-// The common structure of the encoded symbolic data is as follows:
-//
-// [Header]
-// [Data encoded in blocks]
-// [CRC32]
-//
-// Where the block format depends on the contents.
-//
-// Note that the data is decoded in a stream fashion, therefore
-// any error in the data will be detected only after all the blocks
-// are read in and decoded.
-type symbolsBlockHeader struct {
-	Magic   [4]byte
-	Version uint32
-	// Length denotes the total number of items encoded.
-	Length uint32
-	// BlockSize denotes the number of items per block.
-	BlockSize uint32
-}
-
-const symbolsBlockHeaderSize = int(unsafe.Sizeof(symbolsBlockHeader{}))
-
-func newSymbolsBlockHeader(n, bs int) symbolsBlockHeader {
-	return symbolsBlockHeader{
-		Magic:     symdbMagic,
-		Version:   1,
-		Length:    uint32(n),
-		BlockSize: uint32(bs),
-	}
-}
-
-func (h *symbolsBlockHeader) marshal(b []byte) {
-	b[0], b[1], b[2], b[3] = h.Magic[0], h.Magic[1], h.Magic[2], h.Magic[3]
-	binary.BigEndian.PutUint32(b[4:8], h.Version)
-	binary.BigEndian.PutUint32(b[8:12], h.Length)
-	binary.BigEndian.PutUint32(b[12:16], h.BlockSize)
-}
-
-func (h *symbolsBlockHeader) unmarshal(b []byte) {
-	h.Magic[0], h.Magic[1], h.Magic[2], h.Magic[3] = b[0], b[1], b[2], b[3]
-	h.Version = binary.BigEndian.Uint32(b[4:8])
-	h.Length = binary.BigEndian.Uint32(b[8:12])
-	h.BlockSize = binary.BigEndian.Uint32(b[12:16])
-}
-
-func (h *symbolsBlockHeader) validate() error {
-	if !bytes.Equal(h.Magic[:], symdbMagic[:]) {
-		return ErrInvalidMagic
-	}
-	if h.Version >= 2 {
-		return ErrUnknownVersion
-	}
-	if h.Length >= 1<<20 && h.BlockSize >= 1<<20 {
-		return ErrInvalidSize
-	}
-	return nil
-}
-
-func writeSymbolsBlockHeader(w io.Writer, buf []byte, h symbolsBlockHeader) ([]byte, error) {
-	if err := h.validate(); err != nil {
-		return buf, err
-	}
-	buf = slices.GrowLen(buf, symbolsBlockHeaderSize)
-	h.marshal(buf)
-	_, err := w.Write(buf)
-	return buf, err
-}
-
-func readSymbolsBlockHeader(r io.Reader, buf []byte, h *symbolsBlockHeader) ([]byte, error) {
-	buf = slices.GrowLen(buf, symbolsBlockHeaderSize)
-	if _, err := io.ReadFull(r, buf); err != nil {
-		return buf, err
-	}
-	h.unmarshal(buf)
-	return buf, h.validate()
-}
-
 type symbolsBlockEncoder[T any] interface {
 	encode(w io.Writer, block []T) error
 }
 
 type symbolsEncoder[T any] struct {
-	w   io.Writer
-	e   symbolsBlockEncoder[T]
-	bs  int
-	buf []byte
+	e  symbolsBlockEncoder[T]
+	bs int
 }
 
 const defaultSymbolsBlockSize = 1 << 10
 
-func newSymbolsEncoder[T any](w io.Writer, e symbolsBlockEncoder[T]) *symbolsEncoder[T] {
-	return &symbolsEncoder[T]{w: w, e: e, bs: defaultSymbolsBlockSize}
+func newSymbolsEncoder[T any](e symbolsBlockEncoder[T]) *symbolsEncoder[T] {
+	return &symbolsEncoder[T]{e: e, bs: defaultSymbolsBlockSize}
 }
 
-func (e *symbolsEncoder[T]) Encode(items []T) (err error) {
-	h := newSymbolsBlockHeader(len(items), e.bs)
-	if e.buf, err = writeSymbolsBlockHeader(e.w, e.buf, h); err != nil {
-		return err
-	}
-	for i := uint32(0); i < h.Length; i += h.BlockSize {
-		block := items[i:math.Min(i+h.BlockSize, h.Length)]
-		if err = e.e.encode(e.w, block); err != nil {
+func (e *symbolsEncoder[T]) Encode(w io.Writer, items []T) (err error) {
+	l := len(items)
+	for i := 0; i < l; i += e.bs {
+		block := items[i:math.Min(i+e.bs, l)]
+		if err = e.e.encode(w, block); err != nil {
 			return err
 		}
 	}
 	return nil
 }
 
-func (e *symbolsEncoder[T]) Reset(w io.Writer) { e.w = w }
-
 type symbolsBlockDecoder[T any] interface {
 	decode(r io.Reader, block []T) error
 }
 
 type symbolsDecoder[T any] struct {
-	r io.Reader
-	h symbolsBlockHeader
+	h SymbolsBlockHeader
 	d symbolsBlockDecoder[T]
-
-	buf []byte
 }
 
-func newSymbolsDecoder[T any](r io.Reader, d symbolsBlockDecoder[T]) *symbolsDecoder[T] {
-	return &symbolsDecoder[T]{r: r, d: d}
+func newSymbolsDecoder[T any](h SymbolsBlockHeader, d symbolsBlockDecoder[T]) *symbolsDecoder[T] {
+	return &symbolsDecoder[T]{h: h, d: d}
 }
 
-func (d *symbolsDecoder[T]) Open() (n int, err error) {
-	d.buf, err = readSymbolsBlockHeader(d.r, d.buf, &d.h)
-	return int(d.h.Length), err
-}
-
-func (d *symbolsDecoder[T]) Decode(items []T) error {
+func (d *symbolsDecoder[T]) Decode(dst []T, r io.Reader) error {
+	if d.h.BlockSize == 0 || d.h.Length == 0 {
+		return nil
+	}
+	if len(dst) < int(d.h.Length) {
+		return fmt.Errorf("%w: buffer too short", ErrInvalidSize)
+	}
 	blocks := int((d.h.Length + d.h.BlockSize - 1) / d.h.BlockSize)
 	for i := 0; i < blocks; i++ {
 		lo := i * int(d.h.BlockSize)
 		hi := math.Min(lo+int(d.h.BlockSize), int(d.h.Length))
-		block := items[lo:hi]
-		if err := d.d.decode(d.r, block); err != nil {
+		block := dst[lo:hi]
+		if err := d.d.decode(r, block); err != nil {
 			return err
 		}
 	}
 	return nil
 }
-
-func (d *symbolsDecoder[T]) Reset(r io.Reader) { d.r = r }
diff --git a/pkg/phlaredb/symdb/partition_memory.go b/pkg/phlaredb/symdb/partition_memory.go
index e5a81b2740..2ba82073a0 100644
--- a/pkg/phlaredb/symdb/partition_memory.go
+++ b/pkg/phlaredb/symdb/partition_memory.go
@@ -368,16 +368,16 @@ func (p *PartitionWriter) Release() {
 }
 
 func (p *PartitionWriter) writeTo(w *writer) (err error) {
-	if p.header.Strings, err = writeSymbolBlocks(w.dataFile, p.strings.slice, w.stringsEncoder); err != nil {
+	if p.header.V3.Strings, err = writeSymbolsBlock(w.dataFile, p.strings.slice, w.stringsEncoder); err != nil {
 		return err
 	}
-	if p.header.Mappings, err = writeSymbolBlocks(w.dataFile, p.mappings.slice, w.mappingsEncoder); err != nil {
+	if p.header.V3.Mappings, err = writeSymbolsBlock(w.dataFile, p.mappings.slice, w.mappingsEncoder); err != nil {
 		return err
 	}
-	if p.header.Functions, err = writeSymbolBlocks(w.dataFile, p.functions.slice, w.functionsEncoder); err != nil {
+	if p.header.V3.Functions, err = writeSymbolsBlock(w.dataFile, p.functions.slice, w.functionsEncoder); err != nil {
 		return err
 	}
-	if p.header.Locations, err = writeSymbolBlocks(w.dataFile, p.locations.slice, w.locationsEncoder); err != nil {
+	if p.header.V3.Locations, err = writeSymbolsBlock(w.dataFile, p.locations.slice, w.locationsEncoder); err != nil {
 		return err
 	}
 	for ci, c := range p.stacktraces.chunks {
@@ -404,25 +404,16 @@ func (p *PartitionWriter) writeTo(w *writer) (err error) {
 	return nil
 }
 
-func writeSymbolBlocks[T any](f *fileWriter, s []T, e *symbolsEncoder[T]) ([]SymbolsBlockReference, error) {
-	// TODO(kolesnikovae): Split into blocks (< 1M).
-	h, err := writeSymbolsBlock(f, func(w io.Writer) error {
-		e.Reset(w)
-		err := e.Encode(s)
-		e.Reset(nil)
-		return err
-	})
-	return []SymbolsBlockReference{h}, err
-}
-
-func writeSymbolsBlock(w *fileWriter, fn func(io.Writer) error) (h SymbolsBlockReference, err error) {
-	h.Offset = uint32(w.w.offset)
+func writeSymbolsBlock[T any](w *fileWriter, s []T, e *symbolsEncoder[T]) (h SymbolsBlockHeader, err error) {
+	h.Offset = uint64(w.w.offset)
 	crc := crc32.New(castagnoli)
 	mw := io.MultiWriter(crc, w.w)
-	if err = fn(mw); err != nil {
+	if err = e.Encode(mw, s); err != nil {
 		return h, err
 	}
-	h.Size = uint32(w.w.offset) - h.Offset
+	h.Size = uint32(w.w.offset) - uint32(h.Offset)
 	h.CRC = crc.Sum32()
+	h.Length = uint32(len(s))
+	h.BlockSize = uint32(e.bs)
 	return h, nil
 }
diff --git a/pkg/phlaredb/symdb/strings_test.go b/pkg/phlaredb/symdb/strings_test.go
index 5b391957c8..49c306c408 100644
--- a/pkg/phlaredb/symdb/strings_test.go
+++ b/pkg/phlaredb/symdb/strings_test.go
@@ -77,17 +77,21 @@ func Test_StringsEncoding(t *testing.T) {
 	for _, tc := range testCases {
 		tc := tc
 		t.Run(tc.description, func(t *testing.T) {
-			var output bytes.Buffer
-			e := newSymbolsEncoder[string](&output, new(stringsBlockEncoder))
+			var buf bytes.Buffer
+			e := newSymbolsEncoder[string](new(stringsBlockEncoder))
 			if tc.blockSize > 0 {
 				e.bs = tc.blockSize
 			}
-			require.NoError(t, e.Encode(tc.strings))
-			d := newSymbolsDecoder[string](&output, new(stringsBlockDecoder))
-			n, err := d.Open()
-			require.NoError(t, err)
-			out := make([]string, n)
-			require.NoError(t, d.Decode(out))
+			require.NoError(t, e.Encode(&buf, tc.strings))
+
+			h := SymbolsBlockHeader{
+				Length:    uint32(len(tc.strings)),
+				BlockSize: uint32(e.bs),
+			}
+			d := newSymbolsDecoder[string](h, new(stringsBlockDecoder))
+
+			out := make([]string, h.Length)
+			require.NoError(t, d.Decode(out, &buf))
 			require.Equal(t, tc.strings, out)
 		})
 	}
diff --git a/pkg/phlaredb/symdb/symdb.go b/pkg/phlaredb/symdb/symdb.go
index 56ba25e59b..588296c5a2 100644
--- a/pkg/phlaredb/symdb/symdb.go
+++ b/pkg/phlaredb/symdb/symdb.go
@@ -165,7 +165,7 @@ func (s *SymDB) PartitionWriter(partition uint64) *PartitionWriter {
 
 func (s *SymDB) newPartition(partition uint64) *PartitionWriter {
 	p := PartitionWriter{
-		header:      PartitionHeader{Partition: partition},
+		header:      PartitionHeader{Partition: partition, V3: new(PartitionHeaderV3)},
 		stacktraces: newStacktracesPartition(s.config.Stacktraces.MaxNodesPerChunk),
 	}
 	p.strings.init()

From fe4b87d169ada9f6db2fc6c3b38d3b3953140fb2 Mon Sep 17 00:00:00 2001
From: Anton Kolesnikov <anton.e.kolesnikov@gmail.com>
Date: Tue, 23 Apr 2024 14:16:17 +0800
Subject: [PATCH 12/36] add header crc checks

---
 pkg/phlaredb/symdb/block_reader_test.go | 45 +++++++++++++++++++++++
 pkg/phlaredb/symdb/format.go            |  6 ++--
 pkg/phlaredb/symdb/functions.go         | 34 +++++++++---------
 pkg/phlaredb/symdb/locations.go         | 48 ++++++++++++-------------
 pkg/phlaredb/symdb/mappings.go          | 45 +++++++++++------------
 pkg/phlaredb/symdb/strings.go           | 43 +++++++++-------------
 6 files changed, 123 insertions(+), 98 deletions(-)

diff --git a/pkg/phlaredb/symdb/block_reader_test.go b/pkg/phlaredb/symdb/block_reader_test.go
index fcc63ee9b2..f4374489b3 100644
--- a/pkg/phlaredb/symdb/block_reader_test.go
+++ b/pkg/phlaredb/symdb/block_reader_test.go
@@ -2,6 +2,7 @@ package symdb
 
 import (
 	"context"
+	"os"
 	"testing"
 
 	"github.com/stretchr/testify/mock"
@@ -24,6 +25,50 @@ var testBlockMeta = &block.Meta{
 	},
 }
 
+func Test_write_block_fixture(t *testing.T) {
+	t.Skip()
+	b := newBlockSuite(t, [][]string{
+		{"testdata/profile.pb.gz"},
+		{"testdata/profile.pb.gz"},
+	})
+	const fixtureDir = "testdata/symbols/v3"
+	require.NoError(t, os.RemoveAll(fixtureDir))
+	require.NoError(t, os.Rename(b.config.Dir, fixtureDir))
+}
+
+func Test_Reader_Open_v3(t *testing.T) {
+	// The block contains two partitions (0 and 1), each partition
+	// stores symbols of the testdata/profile.pb.gz profile
+	b, err := filesystem.NewBucket("testdata/symbols/v3")
+	require.NoError(t, err)
+	x, err := Open(context.Background(), b, testBlockMeta)
+	require.NoError(t, err)
+
+	r := NewResolver(context.Background(), x)
+	defer r.Release()
+	r.AddSamples(0, schemav1.Samples{
+		StacktraceIDs: []uint32{1, 2, 3, 4, 5},
+		Values:        []uint64{1, 1, 1, 1, 1},
+	})
+	r.AddSamples(1, schemav1.Samples{
+		StacktraceIDs: []uint32{1, 2, 3, 4, 5},
+		Values:        []uint64{1, 1, 1, 1, 1},
+	})
+
+	resolved, err := r.Tree()
+	require.NoError(t, err)
+	expected := `.
+├── github.com/pyroscope-io/pyroscope/pkg/agent.(*ProfileSession).takeSnapshots: self 2 total 8
+│   └── github.com/pyroscope-io/pyroscope/pkg/agent/gospy.(*GoSpy).Snapshot: self 2 total 6
+│       └── github.com/pyroscope-io/pyroscope/pkg/convert.ParsePprof: self 0 total 4
+│           └── io/ioutil.ReadAll: self 2 total 4
+│               └── io.ReadAll: self 2 total 2
+└── net/http.(*conn).serve: self 2 total 2
+`
+
+	require.Equal(t, expected, resolved.String())
+}
+
 func Test_Reader_Open_v2(t *testing.T) {
 	// The block contains two partitions (0 and 1), each partition
 	// stores symbols of the testdata/profile.pb.gz profile
diff --git a/pkg/phlaredb/symdb/format.go b/pkg/phlaredb/symdb/format.go
index d71b2fa642..0164c77ba0 100644
--- a/pkg/phlaredb/symdb/format.go
+++ b/pkg/phlaredb/symdb/format.go
@@ -310,8 +310,7 @@ const partitionHeaderV3Size = int(unsafe.Sizeof(PartitionHeaderV3{}))
 func (h *PartitionHeaderV3) size() int { return partitionHeaderV3Size }
 
 func (h *PartitionHeaderV3) unmarshal(buf []byte) (err error) {
-	s := len(buf)
-	if s%symbolsBlockReferenceSize > 0 {
+	if len(buf) < symbolsBlockReferenceSize {
 		return ErrInvalidSize
 	}
 	h.Locations.unmarshal(buf[:symbolsBlockReferenceSize])
@@ -614,8 +613,9 @@ func (e *symbolsEncoder[T]) Encode(w io.Writer, items []T) (err error) {
 	return nil
 }
 
+// TODO: args order
 type symbolsBlockDecoder[T any] interface {
-	decode(r io.Reader, block []T) error
+	decode(r io.Reader, dst []T) error
 }
 
 type symbolsDecoder[T any] struct {
diff --git a/pkg/phlaredb/symdb/functions.go b/pkg/phlaredb/symdb/functions.go
index d4f5833f1e..ef2de67237 100644
--- a/pkg/phlaredb/symdb/functions.go
+++ b/pkg/phlaredb/symdb/functions.go
@@ -4,6 +4,7 @@ import (
 	"bytes"
 	"encoding/binary"
 	"fmt"
+	"hash/crc32"
 	"io"
 	"unsafe"
 
@@ -26,29 +27,26 @@ type functionsBlockHeader struct {
 	SystemNameSize uint32
 	FileNameSize   uint32
 	StartLineSize  uint32
+	CRC            uint32
 }
 
 func (h *functionsBlockHeader) marshal(b []byte) {
-	binary.LittleEndian.PutUint32(b[0:4], h.FunctionsLen)
-	binary.LittleEndian.PutUint32(b[4:8], h.NameSize)
-	binary.LittleEndian.PutUint32(b[8:12], h.SystemNameSize)
-	binary.LittleEndian.PutUint32(b[12:16], h.FileNameSize)
-	binary.LittleEndian.PutUint32(b[16:20], h.StartLineSize)
+	binary.BigEndian.PutUint32(b[0:4], h.FunctionsLen)
+	binary.BigEndian.PutUint32(b[4:8], h.NameSize)
+	binary.BigEndian.PutUint32(b[8:12], h.SystemNameSize)
+	binary.BigEndian.PutUint32(b[12:16], h.FileNameSize)
+	binary.BigEndian.PutUint32(b[16:20], h.StartLineSize)
+	h.CRC = crc32.Checksum(b[0:20], castagnoli)
+	binary.BigEndian.PutUint32(b[20:24], h.CRC)
 }
 
 func (h *functionsBlockHeader) unmarshal(b []byte) {
-	h.FunctionsLen = binary.LittleEndian.Uint32(b[0:4])
-	h.NameSize = binary.LittleEndian.Uint32(b[4:8])
-	h.SystemNameSize = binary.LittleEndian.Uint32(b[8:12])
-	h.FileNameSize = binary.LittleEndian.Uint32(b[12:16])
-	h.StartLineSize = binary.LittleEndian.Uint32(b[16:20])
-}
-
-// isValid reports whether the header contains sane values.
-// This is important as the block might be read before the
-// checksum validation.
-func (h *functionsBlockHeader) isValid() bool {
-	return h.FunctionsLen < 1<<20
+	h.FunctionsLen = binary.BigEndian.Uint32(b[0:4])
+	h.NameSize = binary.BigEndian.Uint32(b[4:8])
+	h.SystemNameSize = binary.BigEndian.Uint32(b[8:12])
+	h.FileNameSize = binary.BigEndian.Uint32(b[12:16])
+	h.StartLineSize = binary.BigEndian.Uint32(b[16:20])
+	h.CRC = binary.BigEndian.Uint32(b[20:24])
 }
 
 type functionsBlockEncoder struct {
@@ -126,7 +124,7 @@ func (d *functionsBlockDecoder) readHeader(r io.Reader) error {
 		return nil
 	}
 	d.header.unmarshal(d.tmp)
-	if !d.header.isValid() {
+	if crc32.Checksum(d.tmp[:functionsBlockHeaderSize-4], castagnoli) != d.header.CRC {
 		return ErrInvalidSize
 	}
 	return nil
diff --git a/pkg/phlaredb/symdb/locations.go b/pkg/phlaredb/symdb/locations.go
index 6d5d1ab8f1..f65576f4e7 100644
--- a/pkg/phlaredb/symdb/locations.go
+++ b/pkg/phlaredb/symdb/locations.go
@@ -4,6 +4,7 @@ import (
 	"bytes"
 	"encoding/binary"
 	"fmt"
+	"hash/crc32"
 	"io"
 	"unsafe"
 
@@ -31,33 +32,28 @@ type locationsBlockHeader struct {
 	// Optional, might be empty.
 	AddrSize     uint32 // Size of the encoded slice of addresses
 	IsFoldedSize uint32 // Size of the encoded slice of is_folded
-}
-
-func (h *locationsBlockHeader) isValid() bool {
-	return h.LocationsLen > 0 && h.LocationsLen < 1<<20 &&
-		h.MappingSize > 0 && h.MappingSize < 1<<20 &&
-		h.LinesLen > 0 && h.LinesLen < 1<<20 &&
-		h.LinesSize > 0 && h.LinesSize < 1<<20 &&
-		h.AddrSize < 1<<20 &&
-		h.IsFoldedSize < 1<<20
+	CRC          uint32 // Header CRC.
 }
 
 func (h *locationsBlockHeader) marshal(b []byte) {
-	binary.LittleEndian.PutUint32(b[0:4], h.LocationsLen)
-	binary.LittleEndian.PutUint32(b[4:8], h.MappingSize)
-	binary.LittleEndian.PutUint32(b[8:12], h.LinesLen)
-	binary.LittleEndian.PutUint32(b[12:16], h.LinesSize)
-	binary.LittleEndian.PutUint32(b[16:20], h.AddrSize)
-	binary.LittleEndian.PutUint32(b[20:24], h.IsFoldedSize)
+	binary.BigEndian.PutUint32(b[0:4], h.LocationsLen)
+	binary.BigEndian.PutUint32(b[4:8], h.MappingSize)
+	binary.BigEndian.PutUint32(b[8:12], h.LinesLen)
+	binary.BigEndian.PutUint32(b[12:16], h.LinesSize)
+	binary.BigEndian.PutUint32(b[16:20], h.AddrSize)
+	binary.BigEndian.PutUint32(b[20:24], h.IsFoldedSize)
+	h.CRC = crc32.Checksum(b[0:24], castagnoli)
+	binary.BigEndian.PutUint32(b[24:28], h.CRC)
 }
 
 func (h *locationsBlockHeader) unmarshal(b []byte) {
-	h.LocationsLen = binary.LittleEndian.Uint32(b[0:4])
-	h.MappingSize = binary.LittleEndian.Uint32(b[4:8])
-	h.LinesLen = binary.LittleEndian.Uint32(b[8:12])
-	h.LinesSize = binary.LittleEndian.Uint32(b[12:16])
-	h.AddrSize = binary.LittleEndian.Uint32(b[16:20])
-	h.IsFoldedSize = binary.LittleEndian.Uint32(b[20:24])
+	h.LocationsLen = binary.BigEndian.Uint32(b[0:4])
+	h.MappingSize = binary.BigEndian.Uint32(b[4:8])
+	h.LinesLen = binary.BigEndian.Uint32(b[8:12])
+	h.LinesSize = binary.BigEndian.Uint32(b[12:16])
+	h.AddrSize = binary.BigEndian.Uint32(b[16:20])
+	h.IsFoldedSize = binary.BigEndian.Uint32(b[20:24])
+	h.CRC = binary.BigEndian.Uint32(b[24:28])
 }
 
 type locationsBlockEncoder struct {
@@ -169,11 +165,11 @@ type locationsBlockDecoder struct {
 func (d *locationsBlockDecoder) readHeader(r io.Reader) error {
 	d.tmp = slices.GrowLen(d.tmp, locationsBlockHeaderSize)
 	if _, err := io.ReadFull(r, d.tmp); err != nil {
-		return nil
+		return err
 	}
 	d.header.unmarshal(d.tmp)
-	if !d.header.isValid() {
-		return ErrInvalidSize
+	if crc32.Checksum(d.tmp[:locationsBlockHeaderSize-4], castagnoli) != d.header.CRC {
+		return ErrInvalidCRC
 	}
 	return nil
 }
@@ -182,8 +178,8 @@ func (d *locationsBlockDecoder) decode(r io.Reader, locations []v1.InMemoryLocat
 	if err = d.readHeader(r); err != nil {
 		return err
 	}
-	if d.header.LocationsLen > uint32(len(locations)) {
-		return fmt.Errorf("locations buffer is too short")
+	if d.header.LocationsLen != uint32(len(locations)) {
+		return fmt.Errorf("locations buffer: %w", ErrInvalidSize)
 	}
 
 	var enc delta.BinaryPackedEncoding
diff --git a/pkg/phlaredb/symdb/mappings.go b/pkg/phlaredb/symdb/mappings.go
index d4ce07c531..fbbac30f05 100644
--- a/pkg/phlaredb/symdb/mappings.go
+++ b/pkg/phlaredb/symdb/mappings.go
@@ -4,6 +4,7 @@ import (
 	"bytes"
 	"encoding/binary"
 	"fmt"
+	"hash/crc32"
 	"io"
 	"unsafe"
 
@@ -29,33 +30,30 @@ type mappingsBlockHeader struct {
 	MemoryStartSize uint32
 	MemoryLimitSize uint32
 	FileOffsetSize  uint32
+	CRC             uint32
 }
 
 func (h *mappingsBlockHeader) marshal(b []byte) {
-	binary.LittleEndian.PutUint32(b[0:4], h.MappingsLen)
-	binary.LittleEndian.PutUint32(b[4:8], h.FileNameSize)
-	binary.LittleEndian.PutUint32(b[8:12], h.BuildIDSize)
-	binary.LittleEndian.PutUint32(b[12:16], h.FlagsSize)
-	binary.LittleEndian.PutUint32(b[16:20], h.MemoryStartSize)
-	binary.LittleEndian.PutUint32(b[20:24], h.MemoryLimitSize)
-	binary.LittleEndian.PutUint32(b[24:28], h.FileOffsetSize)
+	binary.BigEndian.PutUint32(b[0:4], h.MappingsLen)
+	binary.BigEndian.PutUint32(b[4:8], h.FileNameSize)
+	binary.BigEndian.PutUint32(b[8:12], h.BuildIDSize)
+	binary.BigEndian.PutUint32(b[12:16], h.FlagsSize)
+	binary.BigEndian.PutUint32(b[16:20], h.MemoryStartSize)
+	binary.BigEndian.PutUint32(b[20:24], h.MemoryLimitSize)
+	binary.BigEndian.PutUint32(b[24:28], h.FileOffsetSize)
+	h.CRC = crc32.Checksum(b[0:28], castagnoli)
+	binary.BigEndian.PutUint32(b[28:32], h.CRC)
 }
 
 func (h *mappingsBlockHeader) unmarshal(b []byte) {
-	h.MappingsLen = binary.LittleEndian.Uint32(b[0:4])
-	h.FileNameSize = binary.LittleEndian.Uint32(b[4:8])
-	h.BuildIDSize = binary.LittleEndian.Uint32(b[8:12])
-	h.FlagsSize = binary.LittleEndian.Uint32(b[12:16])
-	h.MemoryStartSize = binary.LittleEndian.Uint32(b[16:20])
-	h.MemoryLimitSize = binary.LittleEndian.Uint32(b[20:24])
-	h.FileOffsetSize = binary.LittleEndian.Uint32(b[24:28])
-}
-
-// isValid reports whether the header contains sane values.
-// This is important as the block might be read before the
-// checksum validation.
-func (h *mappingsBlockHeader) isValid() bool {
-	return h.MappingsLen < 1<<20
+	h.MappingsLen = binary.BigEndian.Uint32(b[0:4])
+	h.FileNameSize = binary.BigEndian.Uint32(b[4:8])
+	h.BuildIDSize = binary.BigEndian.Uint32(b[8:12])
+	h.FlagsSize = binary.BigEndian.Uint32(b[12:16])
+	h.MemoryStartSize = binary.BigEndian.Uint32(b[16:20])
+	h.MemoryLimitSize = binary.BigEndian.Uint32(b[20:24])
+	h.FileOffsetSize = binary.BigEndian.Uint32(b[24:28])
+	h.CRC = binary.BigEndian.Uint32(b[28:32])
 }
 
 type mappingsBlockEncoder struct {
@@ -175,10 +173,9 @@ func (d *mappingsBlockDecoder) readHeader(r io.Reader) error {
 		return nil
 	}
 	d.header.unmarshal(d.tmp)
-	if !d.header.isValid() {
-		return ErrInvalidSize
+	if crc32.Checksum(d.tmp[:mappingsBlockHeaderSize-4], castagnoli) != d.header.CRC {
+		return ErrInvalidCRC
 	}
-	// TODO: Scale tmp
 	return nil
 }
 
diff --git a/pkg/phlaredb/symdb/strings.go b/pkg/phlaredb/symdb/strings.go
index a66fc2ae83..b488284646 100644
--- a/pkg/phlaredb/symdb/strings.go
+++ b/pkg/phlaredb/symdb/strings.go
@@ -27,19 +27,15 @@ type stringsBlockHeader struct {
 }
 
 func (h *stringsBlockHeader) marshal(b []byte) {
-	binary.LittleEndian.PutUint32(b[0:4], h.StringsLen)
+	binary.BigEndian.PutUint32(b[0:4], h.StringsLen)
 	b[5] = h.BlockEncoding
 }
 
 func (h *stringsBlockHeader) unmarshal(b []byte) {
-	h.StringsLen = binary.LittleEndian.Uint32(b[0:4])
+	h.StringsLen = binary.BigEndian.Uint32(b[0:4])
 	h.BlockEncoding = b[5]
 }
 
-func (h *stringsBlockHeader) isValid() bool {
-	return h.StringsLen < 1<<20 && h.BlockEncoding == 8 || h.BlockEncoding == 16
-}
-
 type stringsBlockEncoder struct {
 	header stringsBlockHeader
 	buf    bytes.Buffer
@@ -56,9 +52,7 @@ func (e *stringsBlockEncoder) encode(w io.Writer, strings []string) error {
 		}
 	case 16:
 		for j, s := range strings {
-			// binary.LittleEndian.PutUint16. // TODO: BigEndian
-			e.tmp[j*2] = byte(len(s))
-			e.tmp[j*2+1] = byte(len(s) >> 8)
+			binary.BigEndian.PutUint16(e.tmp[j*2:], uint16(len(s)))
 		}
 	}
 	if _, err := e.buf.Write(e.tmp[:len(strings)*int(e.header.BlockEncoding)/8]); err != nil {
@@ -108,39 +102,34 @@ type stringsBlockDecoder struct {
 func (d *stringsBlockDecoder) readHeader(r io.Reader) error {
 	d.tmp = slices.GrowLen(d.tmp, stringsBlockHeaderSize)
 	if _, err := io.ReadFull(r, d.tmp); err != nil {
-		return nil
+		return err
 	}
 	d.header.unmarshal(d.tmp)
-	if !d.header.isValid() {
-		return ErrInvalidSize
+	if d.header.BlockEncoding == 8 || d.header.BlockEncoding == 16 {
+		return nil
 	}
-	return nil
+	return fmt.Errorf("invalid string block encoding: %d", d.header.BlockEncoding)
 }
 
 func (d *stringsBlockDecoder) decode(r io.Reader, strings []string) (err error) {
 	if err = d.readHeader(r); err != nil {
 		return err
 	}
-	if d.header.StringsLen < uint32(len(strings)) {
-		return fmt.Errorf("strings buffer is too short")
+	if d.header.StringsLen != uint32(len(strings)) {
+		return fmt.Errorf("invalid string buffer size")
 	}
-	switch d.header.BlockEncoding {
-	case 8:
+	if d.header.BlockEncoding == 8 {
 		return d.decodeStrings8(r, strings)
-	case 16:
-		return d.decodeStrings16(r, strings)
-	default:
-		// Header validation ensures this never happens.
 	}
-	return nil
+	return d.decodeStrings16(r, strings)
 }
 
 func (d *stringsBlockDecoder) decodeStrings8(r io.Reader, dst []string) (err error) {
-	d.tmp = slices.GrowLen(d.tmp, int(d.header.StringsLen)) // 1 byte per string.
+	d.tmp = slices.GrowLen(d.tmp, len(dst)) // 1 byte per string.
 	if _, err = io.ReadFull(r, d.tmp); err != nil {
 		return err
 	}
-	for i := uint32(0); i < d.header.StringsLen; i++ {
+	for i := 0; i < len(dst); i++ {
 		s := make([]byte, d.tmp[i])
 		if _, err = io.ReadFull(r, s); err != nil {
 			return err
@@ -151,12 +140,12 @@ func (d *stringsBlockDecoder) decodeStrings8(r io.Reader, dst []string) (err err
 }
 
 func (d *stringsBlockDecoder) decodeStrings16(r io.Reader, dst []string) (err error) {
-	d.tmp = slices.GrowLen(d.tmp, int(d.header.StringsLen)*2) // 2 bytes per string.
+	d.tmp = slices.GrowLen(d.tmp, len(dst)*2) // 2 bytes per string.
 	if _, err = io.ReadFull(r, d.tmp); err != nil {
 		return err
 	}
-	for i := uint32(0); i < d.header.StringsLen; i++ {
-		l := uint16(d.tmp[i*2]) | uint16(d.tmp[i*2+1])<<8
+	for i := 0; i < len(dst); i++ {
+		l := binary.BigEndian.Uint16(d.tmp[i*2:])
 		s := make([]byte, l)
 		if _, err = io.ReadFull(r, s); err != nil {
 			return err

From 59cee243b889e88259ae2971e05143533cae2f64 Mon Sep 17 00:00:00 2001
From: Anton Kolesnikov <anton.e.kolesnikov@gmail.com>
Date: Tue, 23 Apr 2024 14:48:47 +0800
Subject: [PATCH 13/36] add ReadIndexFile fuzzy test

---
 pkg/phlaredb/symdb/block_reader_test.go | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/pkg/phlaredb/symdb/block_reader_test.go b/pkg/phlaredb/symdb/block_reader_test.go
index f4374489b3..32ae659019 100644
--- a/pkg/phlaredb/symdb/block_reader_test.go
+++ b/pkg/phlaredb/symdb/block_reader_test.go
@@ -36,6 +36,22 @@ func Test_write_block_fixture(t *testing.T) {
 	require.NoError(t, os.Rename(b.config.Dir, fixtureDir))
 }
 
+func Fuzz_ReadIndexFile(f *testing.F) {
+	files := []string{
+		"testdata/symbols/v3/index.symdb",
+		"testdata/symbols/v2/index.symdb",
+		"testdata/symbols/v1/index.symdb",
+	}
+	for _, path := range files {
+		data, err := os.ReadFile(path)
+		require.NoError(f, err)
+		f.Add(data)
+	}
+	f.Fuzz(func(_ *testing.T, b []byte) {
+		_, _ = ReadIndexFile(b)
+	})
+}
+
 func Test_Reader_Open_v3(t *testing.T) {
 	// The block contains two partitions (0 and 1), each partition
 	// stores symbols of the testdata/profile.pb.gz profile

From b48faa75db1ac357e648b0f88272e7a8ca00d94f Mon Sep 17 00:00:00 2001
From: Anton Kolesnikov <anton.e.kolesnikov@gmail.com>
Date: Tue, 23 Apr 2024 15:18:57 +0800
Subject: [PATCH 14/36] add fetch buffer pool

---
 pkg/phlaredb/symdb/block_reader.go        | 34 +++++++++++++++++------
 pkg/phlaredb/symdb/block_reader_test.go   | 16 +++++++++++
 pkg/phlaredb/symdb/resolver_pprof_test.go |  4 +--
 3 files changed, 42 insertions(+), 12 deletions(-)

diff --git a/pkg/phlaredb/symdb/block_reader.go b/pkg/phlaredb/symdb/block_reader.go
index 030ecb048a..4de209f969 100644
--- a/pkg/phlaredb/symdb/block_reader.go
+++ b/pkg/phlaredb/symdb/block_reader.go
@@ -26,9 +26,6 @@ type Reader struct {
 	files  map[string]block.File
 	meta   *block.Meta
 
-	// TODO: fetch buffer pool
-	fetchBufferSize int
-
 	index         IndexFile
 	partitions    []*partition
 	partitionsMap map[uint64]*partition
@@ -36,15 +33,11 @@ type Reader struct {
 	parquetFiles *parquetFiles
 }
 
-const defaultFetchBufferSize = 4096
-
 func Open(ctx context.Context, b objstore.BucketReader, m *block.Meta) (*Reader, error) {
 	r := &Reader{
 		bucket: b,
 		meta:   m,
 		files:  make(map[string]block.File),
-
-		fetchBufferSize: defaultFetchBufferSize,
 	}
 	for _, f := range r.meta.Files {
 		r.files[filepath.Base(f.RelPath)] = f
@@ -356,10 +349,12 @@ func (c *stacktraceBlock) fetch(ctx context.Context) error {
 		if err != nil {
 			return err
 		}
+		r := getFetchBufReader(rc)
 		defer func() {
+			putFetchBufReader(r)
 			err = multierror.New(err, rc.Close()).Err()
 		}()
-		return c.readFrom(bufio.NewReaderSize(rc, c.reader.fetchBufferSize))
+		return c.readFrom(r)
 	})
 }
 
@@ -416,10 +411,12 @@ func (t *rawTable[T]) fetch(ctx context.Context) error {
 		if err != nil {
 			return err
 		}
+		r := getFetchBufReader(rc)
 		defer func() {
+			putFetchBufReader(r)
 			err = multierror.New(err, rc.Close()).Err()
 		}()
-		return t.readFrom(bufio.NewReaderSize(rc, t.reader.fetchBufferSize))
+		return t.readFrom(r)
 	})
 }
 
@@ -490,3 +487,22 @@ func (tx *fetchTx) release() {
 	}
 	wg.Wait()
 }
+
+const defaultFetchBufferSize = 64 << 10
+
+var fetchBufReaderPool = sync.Pool{
+	New: func() any {
+		return bufio.NewReaderSize(nil, defaultFetchBufferSize)
+	},
+}
+
+func getFetchBufReader(r io.Reader) *bufio.Reader {
+	b := fetchBufReaderPool.Get().(*bufio.Reader)
+	b.Reset(r)
+	return b
+}
+
+func putFetchBufReader(b *bufio.Reader) {
+	b.Reset(nil)
+	fetchBufReaderPool.Put(b)
+}
diff --git a/pkg/phlaredb/symdb/block_reader_test.go b/pkg/phlaredb/symdb/block_reader_test.go
index 32ae659019..623cd985ff 100644
--- a/pkg/phlaredb/symdb/block_reader_test.go
+++ b/pkg/phlaredb/symdb/block_reader_test.go
@@ -141,3 +141,19 @@ type mockStacktraceInserter struct{ mock.Mock }
 func (m *mockStacktraceInserter) InsertStacktrace(stacktraceID uint32, locations []int32) {
 	m.Called(stacktraceID, locations)
 }
+
+func Benchmark_Reader_ResolvePprof(b *testing.B) {
+	ctx := context.Background()
+	s := newBlockSuite(b, [][]string{
+		{"testdata/big-profile.pb.gz"},
+	})
+	b.ReportAllocs()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		r := NewResolver(ctx, s.reader)
+		r.AddSamples(0, s.indexed[0][0].Samples)
+		_, err := r.Pprof()
+		require.NoError(b, err)
+		r.Release()
+	}
+}
diff --git a/pkg/phlaredb/symdb/resolver_pprof_test.go b/pkg/phlaredb/symdb/resolver_pprof_test.go
index 207cf92ea3..3a1363b70b 100644
--- a/pkg/phlaredb/symdb/resolver_pprof_test.go
+++ b/pkg/phlaredb/symdb/resolver_pprof_test.go
@@ -53,9 +53,7 @@ func Benchmark_Resolver_ResolvePprof_Small(b *testing.B) {
 }
 
 func Benchmark_Resolver_ResolvePprof_Big(b *testing.B) {
-	s := memSuite{t: b, files: [][]string{{"testdata/big-profile.pb.gz"}}}
-	s.config = DefaultConfig().WithDirectory(b.TempDir())
-	s.init()
+	s := newMemSuite(b, [][]string{{"testdata/big-profile.pb.gz"}})
 	samples := s.indexed[0][0].Samples
 	b.Run("0", benchmarkResolverResolvePprof(s.db, samples, 0))
 	b.Run("8K", benchmarkResolverResolvePprof(s.db, samples, 8<<10))

From 40955ca72699ed820386f3643fee33593035ad00 Mon Sep 17 00:00:00 2001
From: Anton Kolesnikov <anton.e.kolesnikov@gmail.com>
Date: Tue, 23 Apr 2024 15:50:42 +0800
Subject: [PATCH 15/36] improve benchmarks

---
 pkg/phlaredb/symdb/block_reader_test.go       | 14 +++++++++----
 pkg/phlaredb/symdb/resolver.go                |  2 +-
 pkg/phlaredb/symdb/resolver_pprof.go          |  2 ++
 pkg/phlaredb/symdb/resolver_pprof_test.go     |  4 +++-
 pkg/phlaredb/symdb/resolver_pprof_truncate.go |  2 ++
 pkg/phlaredb/symdb/symdb_test.go              | 21 ++++++++++++++++++-
 6 files changed, 38 insertions(+), 7 deletions(-)

diff --git a/pkg/phlaredb/symdb/block_reader_test.go b/pkg/phlaredb/symdb/block_reader_test.go
index 623cd985ff..98167a022b 100644
--- a/pkg/phlaredb/symdb/block_reader_test.go
+++ b/pkg/phlaredb/symdb/block_reader_test.go
@@ -144,16 +144,22 @@ func (m *mockStacktraceInserter) InsertStacktrace(stacktraceID uint32, locations
 
 func Benchmark_Reader_ResolvePprof(b *testing.B) {
 	ctx := context.Background()
-	s := newBlockSuite(b, [][]string{
-		{"testdata/big-profile.pb.gz"},
-	})
+	s := memSuite{t: b, files: [][]string{{"testdata/big-profile.pb.gz"}}}
+	s.config = DefaultConfig().WithDirectory(b.TempDir())
+	s.init()
+	bs := blockSuite{memSuite: &s}
+	bs.flush()
+
 	b.ReportAllocs()
 	b.ResetTimer()
 	for i := 0; i < b.N; i++ {
-		r := NewResolver(ctx, s.reader)
+		r := NewResolver(ctx, bs.reader)
 		r.AddSamples(0, s.indexed[0][0].Samples)
 		_, err := r.Pprof()
 		require.NoError(b, err)
 		r.Release()
 	}
+
+	b.ReportMetric(float64(bs.testBucket.getRangeCount.Load())/float64(b.N), "get_range_calls/op")
+	b.ReportMetric(float64(bs.testBucket.getRangeSize.Load())/float64(b.N), "get_range_bytes/op")
 }
diff --git a/pkg/phlaredb/symdb/resolver.go b/pkg/phlaredb/symdb/resolver.go
index 8705ae31cd..2ba116dcdb 100644
--- a/pkg/phlaredb/symdb/resolver.go
+++ b/pkg/phlaredb/symdb/resolver.go
@@ -247,7 +247,7 @@ func (r *Resolver) Pprof() (*googlev1.Profile, error) {
 		}
 		lock.Lock()
 		defer lock.Unlock()
-		return p.Merge(resolved)
+		return p.MergeNoClone(resolved)
 	})
 	if err != nil {
 		return nil, err
diff --git a/pkg/phlaredb/symdb/resolver_pprof.go b/pkg/phlaredb/symdb/resolver_pprof.go
index f6541c4d77..42784ffb72 100644
--- a/pkg/phlaredb/symdb/resolver_pprof.go
+++ b/pkg/phlaredb/symdb/resolver_pprof.go
@@ -22,6 +22,8 @@ func (r *pprofProtoSymbols) init(symbols *Symbols, samples schemav1.Samples) {
 
 func (r *pprofProtoSymbols) InsertStacktrace(_ uint32, locations []int32) {
 	s := &googlev1.Sample{
+		// TODO(kolesnikovae): Do not allocate new slices per sample.
+		//  Instead, pre-allocated slabs and reference samples from them.
 		LocationId: make([]uint64, len(locations)),
 		Value:      []int64{int64(r.samples.Values[r.cur])},
 	}
diff --git a/pkg/phlaredb/symdb/resolver_pprof_test.go b/pkg/phlaredb/symdb/resolver_pprof_test.go
index 3a1363b70b..207cf92ea3 100644
--- a/pkg/phlaredb/symdb/resolver_pprof_test.go
+++ b/pkg/phlaredb/symdb/resolver_pprof_test.go
@@ -53,7 +53,9 @@ func Benchmark_Resolver_ResolvePprof_Small(b *testing.B) {
 }
 
 func Benchmark_Resolver_ResolvePprof_Big(b *testing.B) {
-	s := newMemSuite(b, [][]string{{"testdata/big-profile.pb.gz"}})
+	s := memSuite{t: b, files: [][]string{{"testdata/big-profile.pb.gz"}}}
+	s.config = DefaultConfig().WithDirectory(b.TempDir())
+	s.init()
 	samples := s.indexed[0][0].Samples
 	b.Run("0", benchmarkResolverResolvePprof(s.db, samples, 0))
 	b.Run("8K", benchmarkResolverResolvePprof(s.db, samples, 8<<10))
diff --git a/pkg/phlaredb/symdb/resolver_pprof_truncate.go b/pkg/phlaredb/symdb/resolver_pprof_truncate.go
index 0bda35c0c2..fa36a19a45 100644
--- a/pkg/phlaredb/symdb/resolver_pprof_truncate.go
+++ b/pkg/phlaredb/symdb/resolver_pprof_truncate.go
@@ -174,6 +174,8 @@ func (r *pprofProtoTruncatedSymbols) addSample(n truncatedStacktraceSample) {
 	}
 	// If this is a new stack trace, copy locations, create
 	// the sample, and add the stack trace to the map.
+	// TODO(kolesnikovae): Do not allocate new slices per sample.
+	//  Instead, pre-allocated slabs and reference samples from them.
 	locationsCopy := make([]uint64, len(r.locationsBuf))
 	copy(locationsCopy, r.locationsBuf)
 	s := &googlev1.Sample{LocationId: locationsCopy, Value: []int64{n.value}}
diff --git a/pkg/phlaredb/symdb/symdb_test.go b/pkg/phlaredb/symdb/symdb_test.go
index d304e30459..dccb731650 100644
--- a/pkg/phlaredb/symdb/symdb_test.go
+++ b/pkg/phlaredb/symdb/symdb_test.go
@@ -2,11 +2,14 @@ package symdb
 
 import (
 	"context"
+	"io"
 	"sort"
+	"sync/atomic"
 	"testing"
 
 	"github.com/cespare/xxhash/v2"
 	"github.com/stretchr/testify/require"
+	"github.com/thanos-io/objstore"
 
 	googlev1 "github.com/grafana/pyroscope/api/gen/proto/go/google/v1"
 	phlaremodel "github.com/grafana/pyroscope/pkg/model"
@@ -28,6 +31,7 @@ type memSuite struct {
 type blockSuite struct {
 	*memSuite
 	reader *Reader
+	testBucket
 }
 
 func newMemSuite(t testing.TB, files [][]string) *memSuite {
@@ -77,7 +81,10 @@ func (s *memSuite) writeProfileFromFile(p uint64, f string) {
 
 func (s *blockSuite) flush() {
 	require.NoError(s.t, s.db.Flush())
-	b, err := filesystem.NewBucket(s.config.Dir)
+	b, err := filesystem.NewBucket(s.config.Dir, func(x objstore.Bucket) (objstore.Bucket, error) {
+		s.testBucket.Bucket = x
+		return &s.testBucket, nil
+	})
 	require.NoError(s.t, err)
 	s.reader, err = Open(context.Background(), b, testBlockMeta)
 	require.NoError(s.t, err)
@@ -87,6 +94,18 @@ func (s *blockSuite) teardown() {
 	require.NoError(s.t, s.reader.Close())
 }
 
+type testBucket struct {
+	getRangeCount atomic.Int64
+	getRangeSize  atomic.Int64
+	objstore.Bucket
+}
+
+func (b *testBucket) GetRange(ctx context.Context, name string, off, length int64) (io.ReadCloser, error) {
+	b.getRangeCount.Add(1)
+	b.getRangeSize.Add(length)
+	return b.Bucket.GetRange(ctx, name, off, length)
+}
+
 //nolint:unparam
 func pprofFingerprint(p *googlev1.Profile, typ int) [][2]uint64 {
 	m := make(map[uint64]uint64, len(p.Sample))

From 3f7838813ab1ef43a762ba195294f2baf9cc679b Mon Sep 17 00:00:00 2001
From: Anton Kolesnikov <anton.e.kolesnikov@gmail.com>
Date: Tue, 23 Apr 2024 17:53:25 +0800
Subject: [PATCH 16/36] fix tests

---
 pkg/phlaredb/compact_test.go       |  8 ++-----
 pkg/phlaredb/head_test.go          | 34 +++---------------------------
 pkg/phlaredb/symdb/block_reader.go | 27 +++++++++++++++++++++---
 pkg/phlaredb/symdb/locations.go    |  2 +-
 pkg/phlaredb/symdb/resolver.go     |  3 ++-
 5 files changed, 32 insertions(+), 42 deletions(-)

diff --git a/pkg/phlaredb/compact_test.go b/pkg/phlaredb/compact_test.go
index e7890ce3bb..7fdca44d86 100644
--- a/pkg/phlaredb/compact_test.go
+++ b/pkg/phlaredb/compact_test.go
@@ -628,15 +628,11 @@ func TestFlushMeta(t *testing.T) {
 	require.Equal(t, uint64(3), b.Meta().Stats.NumSeries)
 	require.Equal(t, uint64(3), b.Meta().Stats.NumSamples)
 	require.Equal(t, uint64(3), b.Meta().Stats.NumProfiles)
-	require.Len(t, b.Meta().Files, 8)
+	require.Len(t, b.Meta().Files, 4)
 	require.Equal(t, "index.tsdb", b.Meta().Files[0].RelPath)
 	require.Equal(t, "profiles.parquet", b.Meta().Files[1].RelPath)
-	require.Equal(t, "symbols/functions.parquet", b.Meta().Files[2].RelPath)
+	require.Equal(t, "symbols/data.symdb", b.Meta().Files[2].RelPath)
 	require.Equal(t, "symbols/index.symdb", b.Meta().Files[3].RelPath)
-	require.Equal(t, "symbols/locations.parquet", b.Meta().Files[4].RelPath)
-	require.Equal(t, "symbols/mappings.parquet", b.Meta().Files[5].RelPath)
-	require.Equal(t, "symbols/stacktraces.symdb", b.Meta().Files[6].RelPath)
-	require.Equal(t, "symbols/strings.parquet", b.Meta().Files[7].RelPath)
 }
 
 func newBlock(t testing.TB, generator func() []*testhelper.ProfileBuilder) *singleBlockQuerier {
diff --git a/pkg/phlaredb/head_test.go b/pkg/phlaredb/head_test.go
index 7f2c5cbd5b..c83cd0b883 100644
--- a/pkg/phlaredb/head_test.go
+++ b/pkg/phlaredb/head_test.go
@@ -370,40 +370,12 @@ func TestHeadFlush(t *testing.T) {
 					},
 				},
 				{
-					RelPath: "symbols/functions.parquet",
-					Parquet: &block.ParquetFile{
-						NumRowGroups: 2,
-						NumRows:      1423,
-					},
+					RelPath:   "symbols/data.symdb",
+					SizeBytes: 159203,
 				},
 				{
 					RelPath:   "symbols/index.symdb",
-					SizeBytes: 308,
-				},
-				{
-					RelPath: "symbols/locations.parquet",
-					Parquet: &block.ParquetFile{
-						NumRowGroups: 2,
-						NumRows:      2469,
-					},
-				},
-				{
-					RelPath: "symbols/mappings.parquet",
-					Parquet: &block.ParquetFile{
-						NumRowGroups: 2,
-						NumRows:      3,
-					},
-				},
-				{
-					RelPath:   "symbols/stacktraces.symdb",
-					SizeBytes: 60366,
-				},
-				{
-					RelPath: "symbols/strings.parquet",
-					Parquet: &block.ParquetFile{
-						NumRowGroups: 2,
-						NumRows:      1722,
-					},
+					SizeBytes: 384,
 				},
 			},
 			Compaction: block.BlockMetaCompaction{
diff --git a/pkg/phlaredb/symdb/block_reader.go b/pkg/phlaredb/symdb/block_reader.go
index 4de209f969..30fdd70bac 100644
--- a/pkg/phlaredb/symdb/block_reader.go
+++ b/pkg/phlaredb/symdb/block_reader.go
@@ -98,10 +98,12 @@ func (r *Reader) file(name string) (block.File, error) {
 
 func (r *Reader) partitionReader(h *PartitionHeader) *partition {
 	p := &partition{reader: r}
-	if r.index.Header.Version == FormatV2 {
+	switch r.index.Header.Version {
+	case FormatV1:
+		p.initEmptyTables(h)
+	case FormatV2:
 		p.initParquetTables(h)
-	}
-	if r.index.Header.Version == FormatV3 {
+	case FormatV3:
 		p.initTables(h)
 	}
 	p.initStacktraces(h.Stacktraces)
@@ -166,6 +168,15 @@ func (p *partition) tx() *fetchTx {
 	return &tx
 }
 
+// Format V1.
+func (p *partition) initEmptyTables(*PartitionHeader) {
+	p.locations = emptyTable[schemav1.InMemoryLocation]{}
+	p.mappings = emptyTable[schemav1.InMemoryMapping]{}
+	p.functions = emptyTable[schemav1.InMemoryFunction]{}
+	p.strings = emptyTable[string]{}
+}
+
+// Format V2.
 func (p *partition) initParquetTables(h *PartitionHeader) {
 	p.locations = &parquetTable[schemav1.InMemoryLocation, schemav1.LocationPersister]{
 		bucket:  p.reader.bucket,
@@ -189,6 +200,7 @@ func (p *partition) initParquetTables(h *PartitionHeader) {
 	}
 }
 
+// Format V3.
 func (p *partition) initTables(h *PartitionHeader) {
 	// TODO(kolesnikovae): decoder pool.
 	p.locations = &rawTable[schemav1.InMemoryLocation]{
@@ -441,6 +453,15 @@ func (t *rawTable[T]) release() {
 	})
 }
 
+// This is a stub for versions without tables in the block (format v1).
+type emptyTable[T any] struct{}
+
+func (emptyTable[T]) fetch(context.Context) error { return nil }
+
+func (emptyTable[T]) release() {}
+
+func (emptyTable[T]) slice() []T { return nil }
+
 // fetchTx facilitates fetching multiple objects in a transactional manner:
 // if one of the objects has failed, all the remaining ones are released.
 type fetchTx []fetchable
diff --git a/pkg/phlaredb/symdb/locations.go b/pkg/phlaredb/symdb/locations.go
index f65576f4e7..d0a1665f34 100644
--- a/pkg/phlaredb/symdb/locations.go
+++ b/pkg/phlaredb/symdb/locations.go
@@ -114,7 +114,7 @@ func (e *locationsBlockEncoder) encode(w io.Writer, locations []v1.InMemoryLocat
 	}
 
 	if folded {
-		e.tmp = slices.GrowLen(e.tmp, len(e.folded)/8)
+		e.tmp = slices.GrowLen(e.tmp, len(e.folded)/8+1)
 		encodeBoolean(e.tmp, e.folded)
 		e.header.IsFoldedSize = uint32(len(e.tmp))
 		e.buf.Write(e.tmp)
diff --git a/pkg/phlaredb/symdb/resolver.go b/pkg/phlaredb/symdb/resolver.go
index 2ba116dcdb..af56812d9e 100644
--- a/pkg/phlaredb/symdb/resolver.go
+++ b/pkg/phlaredb/symdb/resolver.go
@@ -247,7 +247,8 @@ func (r *Resolver) Pprof() (*googlev1.Profile, error) {
 		}
 		lock.Lock()
 		defer lock.Unlock()
-		return p.MergeNoClone(resolved)
+		// TODO(kolesnikovae): Use MergeNoClone.
+		return p.Merge(resolved)
 	})
 	if err != nil {
 		return nil, err

From 711eee6b51514c1a0f44412993965b28a42df834 Mon Sep 17 00:00:00 2001
From: Anton Kolesnikov <anton.e.kolesnikov@gmail.com>
Date: Tue, 23 Apr 2024 18:45:51 +0800
Subject: [PATCH 17/36] fix tests

---
 pkg/phlaredb/block/block_test.go                |  10 +++++-----
 .../symdb/testdata/symbols/v3/data.symdb        | Bin 0 -> 88564 bytes
 .../symdb/testdata/symbols/v3/index.symdb       | Bin 0 -> 640 bytes
 3 files changed, 5 insertions(+), 5 deletions(-)
 create mode 100644 pkg/phlaredb/symdb/testdata/symbols/v3/data.symdb
 create mode 100644 pkg/phlaredb/symdb/testdata/symbols/v3/index.symdb

diff --git a/pkg/phlaredb/block/block_test.go b/pkg/phlaredb/block/block_test.go
index 058c9a31c2..c93da7e2b6 100644
--- a/pkg/phlaredb/block/block_test.go
+++ b/pkg/phlaredb/block/block_test.go
@@ -96,7 +96,7 @@ func TestDelete(t *testing.T) {
 			})
 
 			require.NoError(t, block.Upload(ctx, log.NewNopLogger(), bkt, path.Join(dir, meta.ULID.String())))
-			require.Equal(t, 9, len(objects(t, bkt, meta.ULID)))
+			require.Equal(t, 5, len(objects(t, bkt, meta.ULID)))
 
 			markedForDeletion := promauto.With(prometheus.NewRegistry()).NewCounter(prometheus.CounterOpts{Name: "test"})
 			require.NoError(t, block.MarkForDeletion(ctx, log.NewNopLogger(), bkt, meta.ULID, "", false, markedForDeletion))
@@ -116,7 +116,7 @@ func TestDelete(t *testing.T) {
 				}
 			})
 			require.NoError(t, block.Upload(ctx, log.NewNopLogger(), bkt, path.Join(tmpDir, b2.ULID.String())))
-			require.Equal(t, 9, len(objects(t, bkt, b2.ULID)))
+			require.Equal(t, 5, len(objects(t, bkt, b2.ULID)))
 
 			// Remove meta.json and check if delete can delete it.
 			require.NoError(t, bkt.Delete(ctx, path.Join(b2.ULID.String(), block.MetaFilename)))
@@ -196,7 +196,7 @@ func TestUpload(t *testing.T) {
 
 	t.Run("full block", func(t *testing.T) {
 		require.NoError(t, block.Upload(ctx, log.NewNopLogger(), bkt, path.Join(tmpDir, b1.ULID.String())))
-		require.Equal(t, 9, len(bkt.Objects()))
+		require.Equal(t, 5, len(bkt.Objects()))
 		objs := bkt.Objects()
 		require.Contains(t, objs, path.Join(b1.ULID.String(), block.MetaFilename))
 		require.Contains(t, objs, path.Join(b1.ULID.String(), block.IndexFilename))
@@ -205,7 +205,7 @@ func TestUpload(t *testing.T) {
 
 	t.Run("upload is idempotent", func(t *testing.T) {
 		require.NoError(t, block.Upload(ctx, log.NewNopLogger(), bkt, path.Join(tmpDir, b1.ULID.String())))
-		require.Equal(t, 9, len(bkt.Objects()))
+		require.Equal(t, 5, len(bkt.Objects()))
 		objs := bkt.Objects()
 		require.Contains(t, objs, path.Join(b1.ULID.String(), block.MetaFilename))
 		require.Contains(t, objs, path.Join(b1.ULID.String(), block.IndexFilename))
@@ -355,7 +355,7 @@ func TestUploadCleanup(t *testing.T) {
 		require.ErrorIs(t, uploadErr, errUploadFailed)
 
 		// If upload of meta.json fails, nothing is cleaned up.
-		require.Equal(t, 9, len(bkt.Objects()))
+		require.Equal(t, 5, len(bkt.Objects()))
 		require.Greater(t, len(bkt.Objects()[path.Join(b1.String(), block.IndexFilename)]), 0)
 		require.Greater(t, len(bkt.Objects()[path.Join(b1.String(), block.MetaFilename)]), 0)
 	}
diff --git a/pkg/phlaredb/symdb/testdata/symbols/v3/data.symdb b/pkg/phlaredb/symdb/testdata/symbols/v3/data.symdb
new file mode 100644
index 0000000000000000000000000000000000000000..486c57d5984b5e3ec14a3e9793b63813aa780b7e
GIT binary patch
literal 88564
zcmd?Sd3;mF`ahnTB!EEMbYtJwwqVk|P)g~Nbb+?CG-VN?rfJ%Srb)~KEs6_@3b+9x
z?mMW6+f`BI<9-#~uZsJQf{Oco-GA?yb53%yG)>C==ll9`uRS?)W}bOw=9y=kXJ!-%
zx_`)^A>|5noO(#ykn*yUc~ym_bFwq4@=EjP<`>tMFDkF9nOB-$Su}S+VR=zPLP~00
zR%W&~r?{}JsI;nbT+$Nb;^LyY^NPk5<W3kib!0}`<T1lX44Rc6H?X)iLzknQIU}#M
zJU>4tXLeFT(t`Z?#nmNM`jXn2+Vn}oCQZ*No>N^|H@|$&9KC*CWx@Ok{enhAeSKMR
zQBi#SkWzg~+5D1<>hgx_!qW2c!nv~wN``0#Cnk>_J8|TQv8hvs&CDJ%a{RF2<HpYz
zKYDCw_0-gq)RZZy6ZHE0`a(@oTxNb|)rdhARSDBF8ye;>s7$X)uc#a`Y3%T_lEehf
zu!Q09lgCb)GrW4j$cdAt>GF#Ta^@7y&dbZ5ou8dnT(rnAY0SdP3A5);nW59fYbq-z
zjT@JwuS}bfnlzzGAD=Q*qe)60H7b6xCM|nTm7!uzW4R_LFC!;gH+XQt9DQ+l%CHd$
zBNNI>7S2tIPsz?2m8cm$ZFoxZn7Yvki9@Gnr)TD-<;^awn47OpOwuIBj~+f;lbSR>
zC8NH$+AwB#a#3laAt@=ZE<RbCp_>t(oSZPbKvyy(zAzy{UtSy^pEUf0tQonBlaoiL
zB+eb0G%6`I>%^+Es#%LPgQupY78T~_=FN&5o}DyoPTquBWyR&ChO&~<qPZo-3(BiX
z<`fw!Pn<n>p1xqn(7AcDm)05zGe_iQl@#Xc>Z;0$^z#d97tYGg&di>jQ&qPhGjH_B
zoC&EjCyv%lpE4$OOvZ@OBSt4D4^0?7F*Re{+$mX;CMQjvGJa%o^32(}y2)i(g*iF1
z=9d=drl)13&zpBb!O(=++AN(;TcFp@N*_LH^0eWzb7z&6=yVxHg)_8+<0eg=IHh3P
zl*u#FbF$-y#>b^<5)$H*<Ku>EQj?2I3kv5J&RtNfjgKEXZCt|C1yeQ2$;lc`V$y)Y
zEj=EK+hnsj%*~FLRTi_yZFYF=9wEi;G?^_{yVq@zJ_;{9z1`txw^_994x7o|u64ND
zbxxPV<7n}=>8y5-#br0ybk;7XP3v^<cWtTDX|cD~3vZ1!tJ%^!h?3P=+Ux=}IRKPS
z1l8=dyG?DZ?%p8;meiUZcDJ|7QfqO$P3;z5d$qkSLit`WFgsc;=H@o5#nu{2^=TPm
z&8*UNt=HaVa=AN9HnBH)Z$>X@00|)SzrdjPs)g~?Hc7ZHbb74lML+Vr7xiJyNkSJ5
zwmhf5vUaPd!`p&k>(V)UTn<pB({iHK;eXLNJKJ?`v&-bPXwx#BDBiZjWd+@(YuB)6
zJrb-oODu479182y*eVyWsBt))>004;EC8fN#9Xb><Z8Eg(zPy&)8X>ORHcU(J-(K@
zy!I&T^tim>D>{$MWOp-EJuWL7Gb0}9TD`TcP21+Ro3oAsny!^WiMj`SL5Sg1hiR3r
zZL`{2*{{-QyEPm{G#KqXUZniJ6m9=FT;Oo1+#+<c1?_3HxU43db*;sSQ8Sx7(M}7G
zAC}H7!HA(ryttO3QjXp<`|*gaEu87bTCL7(GIy{pC}&U1Ow($M(V@4xJ7cVz6&Ha;
zKbS(t2`JQA)^G~Xj;-Q=5oDYV<F*8x$>@N1jvbUh5wW%)0EsTEeX+^r?GN>!=xUP<
zW5^W_ZI`K25W@Ub2c@CS;_`h>%P8Xs>DqReV~tS4pT^m|to^O+5CR=cIJB5r+bu5L
z>THa<&Cy-~{-3V3*ctg4>>gK7<U%DVmz|kepv~3+Rd3ML4POh-SmS6Hg-+xOfR(;X
z%(=*Ns@LN7xFeU4q14(EI03@r!{E0%cn`T_<EjF|wpz^|h9(~A+Inx)?9X2stQa5y
z$;{>GvUoZyUboI{vs&z)W<hCWT0&NqHj^nDQU(`@%Z;Im4uY(t7C_9<?rJSyyJlIp
z%76#dE-#ywu}G{pU0Z1as;<}oa9AB4k5gM^vbWkSt_p`MwEO+73_v7YH!!3bJRJwx
z?yy@t!cPf4*f`^+t1VSVW4-j1@#xij9`zUfYCAd~da~UkR#a`TuvyzXJpY^4SbgA|
z|M#etGs6X~4^}4JHb<wf-Eku8$`e6nAP={$%i4-@T4QoqbZ$?pG;Bgyty~oeSGr_%
zrgk>yHV_C06hQdl%E}69Ykf#)Yb_?Xml3~D?G0BrOvQd20;f#4;<CC!TP+b0ravdF
zT5mU390U@RA=YU#d7;O-JvwOTP8a5!u6-@1QwX{x;Kfd95CS?fv=}G+9BrOh91p7#
zjqo8Nkcdc#q0|mUVb$3o*%{UQaA~taPU2e!L`av(uIH8#tOBdu>IuvQ6H&<A?g;#V
z(#&KL8s|V+m&MdtCBT!Cby7WnIqjBiPZ_M~&OY)j0z_XH-f+f)Os&j*3HhdL_u7TN
zu{wZTjkOEvK&w1EgdaWy5FbQSkI81zG4UzLSEs{aL-8^sNb;4zr~t2#4R82zY^*Xt
z@_uNj4OPM}>k)wAr5rCZNYNN>g1v-B$7DE#Q@7DFrf>@iyx9galA#htTIzbV3oJcc
zp^9D#_n=fjFNXmPM2y`aOAIf@snp)u2)9RMTx4?I(`}EshH_p*WYEBdDFsc=)<CsC
z{4fWRN3g0Doh<Eu1b09w>q^%~3m}{f^nr<{KY|po5FrqLoWhloYw6)jvHrAM))<*R
zYceyZNK``r?ifpvP#WTm%7%aj+gq(|bGW)po-(V)EvD$K9)}5iC1rpPf(%XYpvlmS
zFehO(+L`iKFL+KG!PwEXbghT!>UNXU-Qft(SNj45ZG=Mz9KHypKsDTS<yMo<*5(B=
zqVZ<6n_U*BmPFT-5J((az{Kqz9+ei)LRX`uKWO+H1Fua#n&B_cktlar0w;9;8g#sE
ziP=UFOk&@_`mORPr!1cqT@E;W<f110MCEzWLPOuiA9`2HjoIZfS?w?+ZD3^hP=g24
zAE0M=7BkB*Fo%$h=@4PdL8V5y(l)53vQZC*Fc?G>n95`Y)79Zwi+X~2JysiRTNhY@
zz?og447zt`tE{vHB-R-vnwNe#9lN7VW@$1oQMX;}D|kTyy^wxfgaMX~QN$S)$)_m{
zd6bYO7dD?5fzE5WUJ?Nr*}!PI3eKEGQPwVMQf8*Lx0+lo2B4os;UklP^ph?Gw?m`k
zXp9yQ8A)KO369Pf=z<s5GSY;&Gr4;D<*JmAL5R}R`<#*lmD|$Jq#Kw`qwq`SvyH7A
zL`cjPz<a{PS`~9|h*0{~6}`<gBc^M*wpwsxM%O5=lIXlnfV7yp;97EFg%X{l=RdHH
z=CnCXt)jwm93Umg(b8>Sc(K*VCXU;2AQjS1w!#!0Jc+DqrWT9M%|sag(GQoN06`}L
zinf2m`WZP3I$b|%7GX3yz092`IDw&|Hacxq4}asfC=-6dH~t~E?g?;i$HcT{3nOSG
zgA?BFjKH;}F$zGUJjibCyL?a|hiFlKLjbD?VLnjnjsubpVvO|SE0>nRX1v&^Ab{wG
zwKoGs;1Z1eL@7{v0|Z}Jw5oe=AOei3Sh`qXIk4`WJs9#zN29X`GD0-VIG~&udI|ne
z022PjDjcMxMUm~oV;*g%$Vr~L%-I5cGaP0vX12zReP}oI9*__|8l>9p$<OUyBnn@W
zYz9Ay8gpi}!w}HcBl5#i&srZ{i^Xy}Co9L+$f7GIrxzHURTh&|959!q&BnwiG(GXB
zZ@mjXJmwKBwb{5sktsDH32-#>^bn`1-Lggx(6CA?OzvP;Ypd|34PVd%pXXC@qDl$O
z*&4=CE0zu|=~H(l0D==aEraV|0LGsIA0;<4R>D2jE{hoSfe;db9?QH|8~4Jt*}U$K
zfJ}^JJ<Q7*kP0#kE^2|_uf~COcBo!`<z+gvy%o9L@YghYOy<tO5^Gp6i#U3aI&!21
ze7Y=MAt?C@fx>wtR!}*HaPNfzG7CK@Cm%TqT27VqSbA%;*xG90Bnm7X+BZ<-T9MCb
zwYLXm1^ECDV+rCA1b!C#SQyv9Pv|w5r@X#NEFSy(a33##@C-RC5Y5oOXkoPGTkc>z
z!ySZJ3YQWEb5&Rym(uu*Tx7u#3_J@`q7Jf&Ctz5mG)9>gmmA)Ta=uQ&DhMg(vxoaA
z7`yTrUag33binD@LMDg=i{v3IK(W98=Vvg`x>!n~>(FN&m&1m|Tr8=(;1H6n)&(yy
zMpCkPOLXAkA~awE<px+mbrscF@f~Y&V!AI!uu>bkh$b$c2@?_a8UV{0+DCo^i;X~u
zN~|tLRmx;GTbv$uQ2t2G7d7Q@mWZJK2->?pyn_iKwj?F4C)LUDidauLhm2@!E;7aF
z)9_6S42Z>NN;7AZfGxz1u;?X64mgWZ4&YVhHn$f12#fR{Q-Jy!oQo~#<_j?p=4LRW
zU`qAp4+t7yykD$N5I`*Up`4Mc89kmZMMH=w;n55L7;XiIwF6+A%@W*Z2aAmenoB-J
z0;4<n!tj5LT7QrV4*Ceu$HRGPMMrVWN^g+k8#<X#UBDuF_zKyA(_(A%jyc&f3S(qK
z{F|@BSheIv&Xu^D4DYv<ts^!WD`xqWDnW<ClwdR0g2gFcPym+m{j1tc`aq3{kOQ*G
z;@2DbqPP#LoPzsyr*WGap$Sa2GKz*|KOnFeRSSGpwQ#3eu$I=LYq7R#?U<u1eoacy
zv4nPq^vb`xJ8Zu9Q@sw4(NO_U6|4;@RflLkc_H+Z35yJ3F7Q6S$S?G$OrD!Ny!K8(
z2NGSyLYMZI1?%8`9RY(OX$d^U)#~mL!_Z<S3AU&xT26sF4)zUHVBt3=LtC^$!|PDl
zf$+Hw#WxusVt~_Sz;{6-MzjOEHsFK6BLfP`eF1ut$E0_(>(;VZAQmwqkg$wo<T)6t
z%*EaWo;yIV2s3>VFmO_%Q6Cm)nC!CEogQ96v$;Jeup>+<oyXLIy%?ZZ_JqFTYXZYt
z){HP^Wi@grBCI~!6;x#l3wnuMlMhQlEHg3Xwpco=EH<dAk&A{P38M%z2vK%X;~7|K
zP|!=5e(43<$asb+)QBcVE`)KjSyo$YqMnPW*8qn~7~Evt!AmxG2w{hP_J&jyR0T;N
z?MHP%D~@3hL_JJs1vVt2`X<OM$!6eA5`^c-IxhOsx=u?_luo3w;VccoW&jeVE;pkT
ziB0;W^CCeMRbDQH<j!GXfl*@BK9>}8#FR6>E`@Z3<j`@pZ{a(R{3wKv@Dt-3W$Gc;
zOdKwVU2SP=E4Cl;aLzE{$Kk;F;tfNf%C);*m%i)aK|%KgMs4`jMW$Z1xqbbh^SPT?
z;C7uOTFn`<NZ4)0*Wj3%!JG(zQ^DUI3~{EMS2RYgnn%>iMR4_i$`EEiT^=JGOU${X
zWk0k+AP#pn2|+u+&Hq(Uulhykm}QtF>DoFL>BF{n$ivK{pipVHiC`LqbpU{r1fW02
z5v!w2gmKSUI0qfRH#4cj=*(~6GOJE>r*Wl4XsaN&-H7_<--1S}Z|yD!ZKk8ez$(Jo
ziFOv6CBzPfQ5$4FDHv8QjVA1BwDgrvXk@j$)zU4^XRV;CY55|Ll&I_AIx#vyO2Mf5
z)imC1Lb+&3L};KJ8?7v@T_&c2H}c=`y2Z2%7X(M=hv*;R3bql_?e0M|Is(TKUy2U0
z+8b?-H7pJLH4$-&+(F+1-YdadEbW$VCyS{=I2)Tq79j;2I?SE0#m$`nB4*;D`pBKG
z^?eNa(&{jDTg+Y`7z^IQL8!nsFM%W*y)Dqp%^hOBBE$gQB9w)&IWOqxa$PbcUx0;g
zW3jtI@-yO04H^X|ETSa@L8SKTd65S?%8(B53dq@fT1LA==3zKK`@P-a_bOm$WKBX$
zX5VqkGHxO(8cu}iq=<ii6A{iWO2maN+|kJPP)Lr^MzGO7LCWIxWj}=we!;4j>^#95
z>`R~1xjR{8cB#u^H(>^OJ!~~uhD8_+OQr&%unrp;>_Mw&VG8kjFwJ5H&+rQvrQU#p
z*>qa5M9(7Tu!tPF4QMjc64$UL4x^){v6f3kzNEl^aV<~=BEm%Bvft!6j$%5obU-sB
z)F`^B10BU(-tG`5QY091RZ$Q3hZwySU7N+_R4r39;rRuRlYs~(z$MTDgVO{v-j|w|
zf#o6=c;vxT=Bj{#g~69)<E{#1gQ;)!=VPo1Ut+@Dx8m=ByWeJV@_F4a2s#l$Aelzd
z_kL{xB3p_%=mniHR1X>sG2XZp4y_Mb2@~pGmrYppZSY!LQVLHMgg*P8meI7RCSA*O
z$l4#K!Vq2or(EO`;xvOXR%_|vxJeS5$>;J8tZgu+<L+R3hEHx|e<I%l86d1NrG07&
z*c6=LR$;dxU$OFFEgx(aYova+A9G>VR@WK?9flS5<-(d3a|dF?d@xyxXq!lB$i=rx
z%Ckv3e`MKZT@cftb8Z7qgdJWh&Tm}hCIqfCjYU-Pn(ffm+j>|O@Dh_10h7YIu^SFR
z;3Rer?CDmlHsd?*KrHt#YfWkc`U5F;))#C!$<zXctRB8e*qJO%hI=#v9;obwE77BE
zL<CyUcAPL7ffw8&*r&n6n)}W#6FT7?Fmg8;xRGQ>aGogFH^Sf&#&yCrQ}xj|L0%}Z
zAj!`oD;NVo$T(<B0N}~uK~y1pI3_4Qb{Hz7qi6$TU$N_lyp-hW7J}fw{(an@Ik#J_
ztCvU#%-fB35Vw@VRU9P2?Fzp&Mu0PM7gMBo65HL453<>S*2y>r%P5+Jphus<$hLlm
zNv=dPn^ufzdhHfBf{ytjID6;ZvneQ%PjLQ4nz_O;AHs4xH;QZyN2k{r@L9(!r?5`s
z0Bg|H+KRw~ASY`SL**Ab690jl7W3NAfnuW@9$4RR+6|6<?k^yO(em(IwrVQGL;77_
zKCZ9zO9SSSVq@w17RDGB<Oa(r>yb$8gFB)nCcr$*p6b@_{+6|LV&%r_a?9CJ6vNBs
zRBUrt+6EACk`4Ee)Il;qdazE?vh9lHCcD{U6H|C#0s9i5wub2+AFJ<SCOhN$oaf6r
zCQ`e?IZ3f23zj;k*mA7<Fms$ubDN_@E9k&@Yqes6x>~hN$_2UH-BuR@yDfGdPHJ#@
zS&%Z{`~o40HVS?P8FE3n*%C|vR?QJJAAD9oP)@+b`3yJ~A~wK43T4o%9VT~&&g?Sh
zWHZ+0e-qOX5g@TWLAv|c9?LEy$(j*nV6Z@ha5d)hs5QAd1swvqBYTNlP$rCfbZZy@
z94f^;C6SAvHaG^%Y?%lH*lLvfF2bQFN!XBiW$6@#nfZlaG?-lG4)zl=jQ#8r51+gb
z1K1a3A#@;r^=pf=_C=yu(Y6wKk{+x{XmKob9}Wf_R{aG5mLT~V9dH8qr;5YPwjkhK
zGix_G4&Tx;m=jz;7++*5tV@u>^i{V|EU>Hneh$giUC;bfOl*O#n_X5X8YLg}kOFdt
zRaCGUsxsI$7WszQ<8#Vz)nD($S|4I5eHw?~QVmWjZE*)D*(@Dh7FT7tUiN`ouy(l3
zp<#fNt`>dtP<&z|E=i6X1s}AeugrH??M@aiz<Qni(5?zjK|HUgLmKeFydkA|-oTOQ
zoy)l_HaMc9Iq(KV#@sK`c`^NKIp=a7@L`Nky#uHE^>fY6Wf6e{wzKaEj#e}Z)0EAY
z<H1CL5^McFogE4#57A~jrrSA(@N09>a)2kZ0fplonAXG6WJ+<MiO8fWG+?itAM+sD
zQNHyUDM5;t76Y#ZS>%s?u3$0D@^SSqz5zm)!>)rTy{*fG2p8yV<_c4n)z)Kl7@0y5
z65|+Lmaz#)#Vip9*W#K5r68yegG=^aYc#lm;&|L5#52ROAX1rN9EHQ6qC`)~?80&l
z9CKQsPBw7N)Zjxcz3_1s-4;!Ys$~d;6R1qyd*M^d&xLNwmSh}GrpyUjjysSPn!@yA
zW{yMwS*A&Vv9s{Kkdf>SsnDXVfj;FNXba+4AmJKpk&Y&A2t1LAvjDrU_i{|J3q1If
zmDh=kHMWC4gKMs32N{6Q5g{AJ*`<tiw?be7cdm`fPVKDl+HAdp2yKD{O2^<e+e~YD
zctUJAi06k!jfH<%U8<cogy|H%(?=t>BLW201O(Mq<4B#}`=O8WtnfluGQ9)nqa)dT
zh^-GRFr?{PhI~IT_YJ`DmKSZCC{X9e=LQ`e%OVdK*x>^az*K+(_TCGjJ;*>}NsOO@
z+brz_imoGj!)B4NpNL`N?j03BHH3r1LJDFf26<DWq=nw9jsb14@AxV;csKKQLwI&L
zS|hYQx?zxZll4v*VCe9z^S{y8$+I(4z~gAfS}4ObP?AJau_UjAQ9Nt5hVLVmiJ@o;
z3r9N%(E*KctjAcq)?~+KHrVgbY_Z!rM4a`B0(^NK0-DWqokk}_fCq<zdC)8P6@70A
zUJ&L35X6YK(c6`u8)b8ZeIRZ`Hw5*+*#@({)w>!#6kSITLLuRM!2iLntJoX77x;fY
zG`aL3FIX=Hna+d%otT}ORgjgHU67fTo0pY)qA4>gv%r+sIx90XS6+`3kfZJbzRVgE
zk`YEH7?>QZK#am^ERtWs@9N_^ci)Q`+~LB**_k5ahWFVQK++<HpD<d3<4`uT(P2;*
z{ep!Z3%2YSslHY?WL_TLVlZz6qc>aO?ujk_l?So=JF9P0L4f}3T;Ij3{G_-reG*V6
zsEP=bAZ-i|iB*w&Eul*6aY2&}Y~OIuCZB=n!zU&PP+xg04mf-a+(tA<18XmJI+;H>
zz<(1VVou;#fiVh*6u~Mak4&ovV~w&U0{aRZXB?>u!y$mED@(VusV~ILjIlK)ucsrz
z&b;t63SR5AfU7d+$0D!2vYZQ*J{BlJ2mW%x{F8THlr_n^yRQ;hkg6>2Ge2|Tpbeb)
zp-BNHB8`lXyjokp?t-f7yPz)kK;piO$R`p!p=JEE=8&F`4Ne!#X83&Al8Djd7Cq9k
zK8p<+#H|$@BF$kzg&!_<)C)6<)8QcK^N4Bm0Mj1iW^@@&<K&J8n5EcW#10_ht0hgs
zIbA`Y!XCU89x=EKc(4Img>SX6#TRDH^K)9+pKS0Vwr+`qSZpum@#oTaT}O6v3p`Ie
zh+0ZysIy|NzB0^`!q#FIxxUbaBLf`fNO}39aBv2v)d?XO_LC5XST06Q2$w5BiDeEA
zhITNdaM?lB!P(0B`O^$46Y(bOyNC0|)dQEft~)Ptwz!KH$23^mtn8e8**E_I46;Na
zuBSX~0rVLAAn3i|+hDuwoo^_pH`L0Y=*&oO7K3GF=~|}mHukt7b{9Et_@eBS&V>ZY
zeJo4orzb=x0=-I%SYhTC<A1(jT?Z0^KkFTG*ls^6tX1+)LR-aNLp%v4Qd^HKgAVas
zNCA8?(ZkOr5=KJ!*2rZ=XkQ|yj>#R|m(poz76v*Pj*!Y^3eUx@Gh<C-R*o>tv4Czy
zeOWm%RN%ucOv{4s3X{WE8ePF$ZT>85X_p-@;!j}|#YxxKP_dAV%)!F^JN~g!gkBI>
ztO&+W=8zcHR;#B+FsOR{?BhUA95f*?6M2%3CHLi-WTD+vKE@|w&*51Y6KUAIX*2aO
zrvg8~jHwB+$Y-HA<YAwc`DHq;n2|4Bg~M%OV3ydVi%;gsaEbx~{N2$N0I{)-7Kokw
zWmhcu_X|a+3MH|O5$5dp-jy*{888zni?tSsarIr!+|??(^M@^3V2=A+S>6@3KOiim
z{<K;+c@;H7>#DE|v#0Ndu<MFgH%m<jm@;l>TUHJ}wcsQqHph7@adm~Ti$MllmXSM^
zu=)}UKyh1yT&a{nlvge0lvUje`GoCO?eP`o!VOG?DR<av4b7-eDh@=07xYDMaS%`j
zSa3LB6YrZ}2`i&vc#!;R06&9pAvP3ikI*94q7%yyPllLE76~FlJWOupsbTkDtktz3
z2*S}TAC-g0oa6A}%Z3XeGJIeIe!${s${?<Lb7;t+&u?5<=ezJLEO6f)E57&*qp;_g
zxp28r8GErZD}rs@{HS#7W3%CBR4^Db=9Ndz&HA8*?XQ(>dKG4^PmZl}<BUw<(EnB}
z5kc{Ta?4FT8#Dd=3OdV(RUD>KgNP!Q=W-=t4&lPRh*^9~HG#RJ9U^B7TE)AFj|`51
zM+^nOcrJ36$(&Bk79F7A|1m5Q;Mz5lxyN5uuO97Q9OFWHc&qs`MdX^fW82>@>5@8M
z<$N?{trf1G5&Yp;E*F0I%X2ST@7Wo`dAJ}*aH_AtF{FXKqz~vaz<zen>x^OsihuC>
zD9eW$oWb0=u&oVKJ?gQR_5|_GM}1VnwgEArFK+Q+-gJY@<#2Ilx`q7;awUt>g`N6l
z#D-qB_KJvGgRB~V4%W9hOrRlmP`COs$-(pqC2(v6Agf_=x-csivnLLlbJrsdr?JD-
z>R4mJ_UH<nUk40BK=Wb@!&$&kfDuN52LWfNTZf0_M*tqhzOul!ZiH|_-^ty6VVmP4
zdl}4L1h=S=_(?zNA}k7pon9p!v7rUec9^?F3Cq~6?>Ck^Tpr)s8W?a=n6=<xcVoAx
z9SxJpx^dT9tJk-?R*U#70FVw+6JIl>+`?B$vYXsp&8Du_{9LVc2pu!$JDYvgcbS2x
zuPk=WNWXzGb1nF)5#I$NA!QPbU*LnloL(84f0Z+2)t$*lm`}Et)(IsjkU5APU?3uM
zL*@+QH?K+s*=S&;9@;{Lh4wI5i{Tnt{_MTIp$uHASlHbi1GXM4b_vyCe)g+c2Ekvg
zUu6`~H#x1isL1Ycd#q+1Gxd;w8{Zhd&A!)Wrc;#GSFg@)<}99fxol#Df?_&c9_}#m
zfzbIth*C33MEKzPNW0Zhjlt&I8S4-<YBhH<9~1OfVBd*e?VRP-;*8Et8&0i}?z?Np
zL^9%J9~ShbH(3zg6_`tq>d-e?DZubWE(;qrWX&}7w3u2vsLj>h!s3eIRl)-YD#>|O
za3=Y>W9ID4{H(m}%-p;a&E{D-Z8<GDW=q~|fzTKnMi~B%2?nLDmg<vcA9Is9+S+{F
z11l;xUmLyB>YLv1Y%%UpjR*&+RJ#LpS-PAaKNG-w;B#2af~6U8d%0jAXuF{9@id*+
z?P3mDzxla*b*tO4V)<%t0>(*JC=?1M{>#OGgYn-e{P!R6KgIsc_8n&tmGZm6KMrOe
z_kY9w_Wh5Q5puC#ngkVI&I&5V^ZfJAZ|~go&%8|b?W{+S{r*o-rvHath4wu5yRSX>
z{J-5Eg@XK!?*CT!kFLjaRKv~BmZ6=$E8{68grqh!7>$~V#%ZUHC{=!*`E2Q1507N&
z<Sci|z_Y05UR##2&@^c4L7sS_LcM;%H7RkY4BL=))6w$!uLrY#4;|h#<F1DeU&a35
z*FN8`hmLOQyldZ4_TlJNoxUvW7cU`v<sXpGUrhWZPXiRO*gmoB3;-3N@G4nm2G0j&
zAC)NY|Bubpd`Jck8Z^i?`}FwhpFi@=!|$!w@%+c`ePiNped@FCddAeTH<!KhG>h|0
zr_<?mx$Mr|HFvt4&W?4(&TE~{({X^VbGmcibmwhO=Y@E2#XH;I?sg4%{)~a0hn-H3
z%jHx!ogef#=Q^FYyX;<<b#2PaNJKGwbUIf#kEl)napbsP`oCE@)>p2|YadXv{Jx_#
z4LR+9uh`YpJ^q)x&H>Lo?D)F)z1Kg=eXVBji&H+T$$#<RRh2m}szx=8t<K+8oA-L|
zw%VK*bCc`x3};U)I`5jL+a|yJx-nrz&i34G4S7A6DLpxNzM<UwMos=}H3cuWHrD2C
zPkFYd<hvakdN!e~dfH35!%kdx$n#Onm8Ch`+D^S@@mn=xOgHQ*DY$f5vf`<MjC>#Y
zy=rB~kB?5iXZaT!2QFT@@~){LynJ~3nvM4#wx(Uz9hd*p&Bk?qU2%8wBO4FA?C$*M
zndGI1U#uC|^W?_%kEU!(8hq$R%LNB+dgk!<%ImtzH=x9shSV=QUfKHDyQ9}_yLHj6
zHMZ(IE<C(_`nctXcXpdP4{zDEv-`-#>DP6CCj41)UH6eK0~OuRZL1vD^XbkXlkUB5
z!;Epp!(UIn?kyC%F73|2Lxv0){N27oJGDF0%T^w%+VSv#onxN8c+$#ai*`JW)Squ(
zV9jW}^0`AhGhV;F#&T@Y_G5>3maTtm!L#!duDa>a&RoO6$=;dBVBDqk-d=0TXl(5`
zx)VRIIr-S48*loM{aoP9Xw+#w-#NeR@x869dc?d7R_t!<VpSl+iernOzj)tQSL|5%
z@K?9)Sap?k*N#PBbnSAg&-`NRj+JX~f4Dnv!b!6)=t*Bf!W0OsMhL5$rH>hxJbMbJ
z=}T%Up_eD;LX15{`7by>=XtWSN~P?D5M&Q25s<<E;)4_jLCioo91hF>$6uIL!_xhg
z`EvP^@Ixufz;g3Cg$(k)3@(4iANJ(gWM~O_88{K1P&q3W^iRqufZ!#>VoI^SLJk2D
zhMvC!2PI1o-&w7`f5<KTUr1qv)%+*Rgg4&ckXCXKEU);FW#CYGDd~ffim>oeiI<Rn
zVUpx|1U!A2SS7v}mQQ+-o>E!yU6w5SDP@wC7jb7_K=Hfc6nD-X8W$IL^@7_Izut1g
zAGa-f_fF>@cj))uRX6&sx-r)wrFiD`*AKaU?EZVYHt(@^?wRn%J?m!N(Y0V{txo&&
zJ%yLQmc8VQ=c^3bg-0Hysh^B$de6Cg#6u}Refs#qSD#$mG-SoKIk%kqp#Akn{ylQS
zm!?ziw5Hx$boo0Oi{ods-q~<iao~*CHmemqudcrBuCndVfA#7RWqsw)xmVsjCGUl|
zwm$vb(z9R4+n#mgr)M&flRN7NZ*HAEbDjF$O~c;2YH`j*<A2Uu=K1)eg&*x+oW9@`
zL+<)1um7@T>Yf>Y4I4Ui=#ZO+C@xCVo%WXQp|@r}FtF&A$~l*$>8jfrx2CRq@7F@(
z-bD{Lom}!uWBM-*|4wawS$)#}&u7lBXso%sQSoc#s3jGe2P=M1FMBWVq$$4`>+%<U
zwxb~PnZ@r^&YJPY(zAbVzR9~RF@O2Y{HCg1bwd^}AGD)l!&{lpJiUBP-YJ(&UHQY$
zP2X)e`E>Qk_Z>N9`_7e@Z(n)Fk>*P;`nGG!AIrY|;qr&(OxxYH?aTMx`g_<zXZ>}z
z`R&t|AN_mO6-QtCw0ruy=A9q>{P>R<adAWAdNwt`ICEm&?psp7*i?7$SoOpi8}c4g
zz4imGaXxSOV$awOcmIB9PtxL_=g<FZPgd%LxB)Y_RiC|S<rBMG4E5x!8T+drdtund
z^z)y-_4n;Ju3d7)p3xWOR8P6*=w%Q5Zal4K{Jy)NI<!l3`S|^nO+&X<zqe)gRd+sS
z+q~|=OZ1~}HjZ6i@z?s+QTOgE-2LaKTR%8<`;{+TeB&S2SZdEU&)o87#zISt)->(S
z4STYetm~Y;A>%CXUB#E}su{KPj`E9Urdu8;PMNWwV!dtL+4nXcdMv+Dy`c8#y14kb
zxb)<8t6w~NW#QSketi1%Z|9hvX<SiVdF#&CbFc4l+v{HXx~FX4(VVBZZM|>xb1w`o
zoNc`|Ywp&UwtaP|_V((R8b9!~oUMO;+kq*CC)8I?S7=(U)PI}&MoPi5g)6qbpSv)1
z{wc|WH{?CCup#;D*4fW4EPd^)Hx@tp-=go24xGB<#+-G3zdEbB^0g!JZQU3AwB(0R
zpV)QJ#%10e3*L9PjcZ(f<*Xm4{rKm#3zN^RtNo;>+Hho{;nD|lmpxPa<+GV<(rk}h
z_)Knn#><ad=iNN*pOde$9$J2M;jJ58^KNKsx^UmHAw!1@eM>u~)KRni_RE`|STfZy
zPi^yzs7Ss2r91c7fBf=|TONMs##`pv8!mZo;dtfh2RG&|&3t;v=x0{Mzgf1st*kY5
z>7Jb})!Ua`@$=F*r<b}noILO6rH&2eKXXkJI&y0sSb1ufwqf+<rC)7bcJmiD+j()L
zm%VxTk#p8~JC{5-=cnl{P4_PSXXW>fwpBZx^gNM0dtBBK#iH#i=N~b>b!esKfz~m<
z)Nk5uJZ<Ol@vE2ZPF;4H_auX*`4?lWVsGQy(<=5SK)Me}Y&X0!ZqP#|YiqXL{o04K
z9^6p2^+>LH{FgU;HLT}}^=IFE=HmSyjJSGr$+6m(FLbyMzcXR_Qv*M!y>#h<k4aDa
z<Ntc`;&VP6d+9gVrQJXG;@ZvA?;o=|{a^N1Y%5ipuB(1hv+uEAj&z-#_|u067H@ob
z@ZzDHN3_2G{-}L9rGvXJnezOnL&m<DxwP%V%f@d0=$FC+|7WWE);smGaW{2Rui~C>
zHvInS;+NaLxG(RfxQk4GY3%3xH22%tUtjdcD9rGoiO-fUwhg~?;EEq_pLOnUS#QJ*
zqUX8O1~{%+-F?@uC;zy!d(*+{ldc&)W|+q?%)9T(qA8bqXYK0xdY2<>wPV9Gp7#f=
zI(b@`=E2UU^IYE^>Ave3+uUiMFQ%@s4r|q>8?&Z$t>4>`60evv&9h|M>RAKU99**e
zfnS?`e8YBizUABjJn?ep=fgVA-)sMD@2TJXy7cF&$^%yxee-koA5*(`p4YYVms8u8
z&9hdQX0FO!bl$1|S?oOgK+m^}n^$aH@ZSxtjAyK`C86twKD%!6sDkGn`M=-4+4KIK
zJHJ@E;E^leS-9<wnrUO8#;y4!dHeRSEQ8P7z3=_&>Ms9ks`<B%20yXA_`Zuz*z@I=
z=M`40zgYLsh@U<(eE85w3trl?W81*$vCqHr_!WSVqCS>jDlFT$>ANkHENza3OXj{X
zuHoX-uK6@KZ&}p~#SgZ%{h99l@baGry#4JRzkgEYn6iANu}p1QWcsP$jZdn_3|4%1
z^TIb?FQ_*ztGa(4*|~V{kIVKyRsD^(;7(8DyPRq^+&uNgMa!~3R?jJ$tK9k6qt^RU
z?U`qPxwrDQRkzhIId#~QqJfGZUcLQ+(#wZj|HiQ+Umd+|&uJ_7T#@89tA<p*vaIym
ztKZDNZQnPy-+gL^WBpD0uQ~kH3E5d6T(o7g`@@f?Y|L-}X2CVX=Cm0{?tErT>K4m2
zk8IB`J@?HAUfg`usCSPZ*0dL1G;Q-uV>a)6<DK)Lygc#bxyEfbA2?%Qw$qj6d^5Y_
z!>T3Ee?RwyD`(ziNWP=8;F4D}c5dfbUwzJ7hi^*Xw|0xheg4JIZ{7Ao-5u9<o;$m7
z`b&>~{z}n{3zyxaej~p}eW!U`{Q48zx9qkJKQQW~8CSfQbmP0XeE!I=!UY>YC|df$
zFYOKI@2lB*#tUzaywBoL7~`+M@`venJ~KPJuqDpXU{2drF|upZzb{#KPuB~J5_V)(
zO?&>z{fDQ;9auc)`|3THp84eG|GFZ5&#Fy#p0}|4@U7qdmUiyngRkCm()1g&g|~iS
zx%8TYYmXlMeCf>W0jG`nV<e;XtB0@nD$jQRPR}`p)w@&O>G|gOhjl*oy7#Q=?BUZ~
z|2^W}Jax@3ulJ(|jUx?b-1LAqH_tQbyw&df74IBf^+;9OM#Gv*o_1wSJ9SgunoF9x
zCQe(!ejw#M?}Hm0fBfvRyzcm9Z_{7x&3BK;-}Z=V&oiCfi#twEUAxobU20i-&aF$z
zZ(Y88>RRLWwd>T~Fa7NLai{ZuVa3XLMd5ko^M*OU`9t$Xh3U}MNsW63f0>@O<>}Sl
z4c!k~JkNODw(~k3-rM^0xg;SzKK`2<_FPJDtJIUViNBq5Y1#N2xButcL9e`4y6F3v
zmv$FDa-!k6EgHpy!87CTSLCVcZ$2&I?T+`%0|qBFoOx8g^ZMJ~`tys8PmRCvt>13n
z<T?D)4(-}?wKo>u_SXX&SKP3+W6^n@AEz|Ex#XDZftJsA(6tv--f-F#nZ@fSe{}9!
z>fPr*bbW2rsEi99z2T&T3EB<c+_1P?@#+QZOu3(HPTKI;DM;M4;m;4Z&YJz|TH3m$
zK$EoL_3N+c_<O^~C$7J7)_)T>pZnoGBmP*n>EPiH-^|H4{inl+Du2Id)9U8?ZyT`s
z{P&wLe~V;XFyzL?BNUT1zCH)~RNSL4eX1@Pa@(5^Pw3u%=C^Twy;!()_>rOGH!olJ
zLKoTkRL|aHOIHrjcU^1$%=+Np1K(bI!`l1KJU?~t$o&^ozHxNIva8QI>COc0qvOs$
zW?#ANk@Am!8hhpIa}FI!nb^MntlQ#`-kG2L*86{sU)6cm$1l>$_l`(==Zp`YOFkp>
z>y!`5ofYr2ee~yK#~oWXyz=-fyXvprQv6lh8MOnRe4t`!#gVP6x=Np3eE6D^mws}k
z`{2b#$87yq$Jx*BpZd}6=6f$6G3VkR<5p)l-zN&|_2it3kG-DT@cApQk54k6^Xsbk
zc!lERWdnCCQ7T+NJfWC8Ic}uuxe3?*Y(Mta<Q;E3eezfDUR81LcI&50rw+f|<h=f-
zy@#}~RzGp%$7JhGbM~~|m-EuHmP?s$Fs6S*0lyKwhHg^Ojq2%&!PDqC1)WN#DCju$
zYcidrpcC1z+vu$dx{K~q&>q^Yp!d>h1+Ah_=xGt5bBKx-l4OTs#NBk3f)>zH1<g~}
zC<fm}Pu5qiqcx{fJw1)mwRrSU+KtB=N>}6IrSwKT+?2ZTIF(W-9`h(Yn=0A|*#;`;
z06IiLb7_u(<^=!Qt4LOlP$?*E7X=KHbJ-t-A|WRwT;{N8C3NqZZxhw*_2hA0)p7+r
zi7qQS-#9jfDE3s-suKDH+NY=Ogmw_c&c}B@rl61Bw?^^MYU)wkZojKULCZcnTk+L4
zx>ZjH5;_3gc`?1HgkDJ1L{XqF@}qZwf}a1y1Nv`lgm#h=`hRo_X-Fm`N01!A&n7er
zk4!?ZBu(RqkxU^w@ctS3)<yK+G?GDo<Lxg(|0Md~M$>86Q2jfBT~<YD4Nak)G?Cn+
zn&qJF<H&q6j!st4NpvC`%i9$6R=P_;chVjO2H;)=Me`JB-s1}T7=2VhkJ2Lw`YHXS
zgdV07P}~0zx*v~+3EhXsC~96oR2?UiIg=G7{R63>AJGp>=m)fgq)-!igzPVUk~~bu
z0{R$2N3%)@oq<MV&=*Mpd6mS|4h?O8oph0Rh>bes)VGGNR)BWAG5x8hp6^K|G5kVK
zBXlh>(jFjj12sHJ(3e-E;;TSP<H#U7fzkVT_Gc^|Q$maAc2fEWIaW|X4TaP|L7{qD
zPH7n)rIeNk6k7};JVHO!(<v0QMs?9-^6S#X3(1mGiI=P<BCZD$C)1KNT9&$iyibOc
z>&Ys59lcaR-#!0c#f3h4Q_=H?e0psFu1^ya{g@cq=tqQpNSZd&Z^)(eYm(YUXA+l?
zxl%zZF7g=v@{%~1o`##~nS|a+O78HipF{MSg#;9r_!OCSkQ8XB@dUcJzNF?qWDM1J
z)ezSLqImnI4_;Kz7wGd1H_~5+k*jIyR&o^`P4_B>WY=#YAECZWICUH8V6qN)8`g)L
zw?NH9FU1!<`V?KNewJKLrc5P!$Q0J2{h~<)X!7F{(9drqbs!y1FD9k+XXV8)FF=y=
z7y5Iefq3SSy`-Lavf_xHB+-_Ja^-7P%6L#!9HFO^hK)4qVtOAjlqxSGDK`<4t6V^9
zOTK*XyFsLBxpE^tMR@@*KA+O_@Yq1<xztF{0Xfd2hD>@UrDq^Hi_rO?-D<L#&P#oq
zxX7*aEpoc@Ec&Jb-<K)qQo5w69m8QGry{$Pf<YQ-C4GQ?K@uOPE4C|#kgJs8nORu=
z6o%=^^3~JqFX=<X@Ev`S(7nX)uF^{1x{w%8t)gcrD{10*`r{P(gMxlfzpJMkHz?1f
zhE2*dDCaaQ2tAoJy{Rms?<wa1*Fs8X<1veZd6$48<l`leg7JbkQLtYA$igF&QXL*<
zg0X0Lj2s+9E~olOw^Mo<RncvfD)n?Lr8{U-t@0GINO>*#E1f<?E+GdA7^4D=adYAd
z>S&>M1!`lIXi(iu=BsX^H`db|PE^ezhHTX&3YnwFJj5()*hv;$LT@LABGqk#9wtra
z&?V#ox)^|)h*f#5X7teul}Adzx;B&2&B}w%rxU|U)qe<mnIyhX>Smz18r1>%jDkK*
z|E-}<bty*>mvT65RM1qKqM^wXRm;gV75IJ%RgycY$Cyz;PA7KS@EP5!hgdQ^gwd4b
z(wRU@?85nkUP=-#QoXcQCCr<|7nSF|s@$MJM<^A{nWvx^(DR!N%14QrB$Cv7RO84$
z<h@cAeb-_k`c<t2-I#)Yd|S1Fe4z4|Qj9-WF@?|z>goBPD1RY_Gn97_+isG0MCm%F
zJhka%<sx!O`36b6o_cQ)c_4V(32GBH#H&xCv>8aQq;v%yCsVo{kCP}}hR0G$m*BCO
zQVSlfl$vRZvI&yl!bBt89H;)GDM|e+NmSoRQirMUB@30$H5ir8{zz?9vBa}_MQQ;V
zO9!Yv*QYEd^ln0ORL8#Z7%QsiU}X*9EueHh9@X@YDw=Xn7fn}JHfYrqSE?=}hU-+@
z2;EBRR~-CA<tBzdRUNdh9W1vEQ*3C;;0MTg<WJ)*8nOWF!cebP5c)1rpQiqVyjt0C
zkE-@;^;FtKNisc8m4qr1Db?VSKxsT4ag+|lV+f^#G2Kp}SE~O_Q?FA$MNd$lV_b0#
z`2q5*^bF2D5?@oczNIv$ZdaFsZjTwa?IM|EIWgGjNrWx~n_Ws4C<l?!R{D?OUnJ40
z`s*h3pG~Ezi%F(3lcbIySCajz`^n>~m$&_va>W+%k8(CqscL9_Jw4g0#9Umf<a04~
zx^g0!sa&WrRh+8y7~iO(1C*;k7rV(3k7n$Xamvo7QL0ojL#-sn_%^aqbpuI!mcDVB
z@^$08E%Z$?lxT)l9HD~^kE_;^cPa?EM7{UVkI7Xkh)@MY==-JTtDkJyNXSg}JM`_u
z56Ru160Ud|Q>#@oNu7!-XO$(i;)^C?c$-{H3r5n!3i{_fa&5|s4mzHEQu>oJRM4k@
zwH_dUH%(PtO@C5fMN`uUOe5L>3+idzvH`=WVda2Sicup^D;w@OXgtM0Myck~Qj_Z7
zS?3b6jy&l;l;}|%{8)7h`BZgK?a}`^gUlI6npQv{j8a~LLAsdIi}1LR(#;sFO?0&K
zY^p9$?jd<o$<_g(5(4TarLi+86=P6BPc<DF@GyO0z&_Oa5Ty^&RI}<J=}`Ga*_q@l
zs&SnA3n3bM=;Hx9$<YB^!|j)j5u}i4sOJflC*|xEGG8s|%L@89eN0bJA+(t^ysVmM
zfS#5*W<WK`qX#v&wOvLm8d_MO_S6sY93azZO7atAm1-Dyk}<1#n*2Te07?MaO3t*C
z`Vn;n>M~-OHL#S>5=gpvgcgIL%q1%ZoJ(2;5PB%lM7?eFVCm}v_BN`hCiUB@f!`<w
zdWU+NrVqS>&K$TKy>dH+Zm)-S2<=`E{gB=QLcN)Od8^{PP<5Z94;`2u6}24;p{>yV
zeche-gxd2Fy@Y(u^xza|3eOXF>3;}QcM}f`T=&Aj(-p7;1}iX6*DAmv2Px=4IzWNh
zn;Ti$@vl(Wry7Yqqmj_ZQS0mE#OaFcXib*B2GpbKTu+=Z1hQzR0#Z(^peNFq3UH_l
z1%z0d{+tSWfdPG_r%?a&w2abHJW80D*V9!Lvs<wv;hJ~_*q}83>UOT9`rW5fv{?`H
z2iA`s+Bd8pJ<K0yuzKpDTvu{YSU;l9pr>}au9E22Lr>jB6zg7E{~~N7LY-)`gIx55
zerrCVd3ebs@4u!v{3?A#LH|QvR(!VptFskQV3;j;>t7{%{-heBIAL4-Rt4Qc-%!yR
z<omsvgswrvrhq)#6=Vzuwoa(-nL^Jc1Ib?f6*tn=Osn%_IuCDh)kp=^=;Z;5Lu%?b
zd(P!nnZ9QoIpZ|Y<}T7mmK&CmspQ#tRP|CZrCf9yK)rf0{ZeB5v5ZV2rFbnN78qfx
zd`dAl!mcJwXOd@t(9=}4T}7^XhD-H&`gbjX*k44_2%Sz6OUb<zL^j<xz3OQVp$mw9
z9z<^d#VT%f>=Bd^1=OBA)o+#bF*RZ2B}s9``O6g-as}Zk(n$^~c0YIUHw`_eB%e18
zg0{JXq!4lqk?bj^KAnQn&7`Ta<fMiNR*;m3A0>Aa2f3e!=CMIPaWR>8H_^XbMGtjR
z#jk7r?8BZZp$GptpBPK#QY+C?LpCUrjzBLBCv+G|Jwm3DV+35fVjhM{ttXiK7ngw3
z4kn2+$m5xWnXUAZ5}J1(tl5X=5I{*>LdTs<rz+_2GTmO3L)E=!lPa&m8Qx0sG~Ge>
z(tDs9>)}ek4?Vq`(!21ulhQr3<iSNxLEf&1ynTXRj}gC)zNes<(2Esl)P)*)?JIN&
zc_T^}MI@dj*rP_+qfb#-1qY$T)TK7j_0X6&VS-q|-@x7!^h5fAhQ9wHEW}T#WFhM5
z&xHPjJU<fp10LTK`W+EEzG))dBGc$Oq@kSDRFO|f(@t^<*-e^B=_k~)2<FojLFN-|
z_)4lszH7w7rW~kvvy>xf;v?k9lLSU3*z7ALu~T`oQ+bnu-blY}dY{(PLm0di&15hq
zMatOqv`%Ki-vo11QWg@o5c}n%Mnxw=>qo(BU`_?jWM-$>>2l>iBy|zUw2?eO+LZUx
z`xqPimjV($xn#|Srw^d29}klM3Zj##^4BIFP_BDHDYEoAvq}mSu#fc`T0Tb^M@p2V
z>8QlFiT(ppo_Gm8r;&^#$Iz(ZD*8F0sr9sJ29%=IOyzF!vvNQAUHLHGr=bs}D(@>D
zrPR@#=+9dSy_x72d`p%zkR~%Q(x~nOjr@y(X!M{O5RD#G1ESG`YSwhorq^f&*$R?r
z`j#9b_tV$Fb6zFSL%sVCG~lHgx};or7O8?QbVdmhE+DBpVJc<QOB>dbcfVCWOVmFp
zp?v<C0@k@gc@1e%ZjdpvC!uoReG9Eopw5%)bw9lgwG~<epZS%vLUB>j*2H?MF{sqE
z>2j(AA8RAzEqcL}^Yw4eqwmR#2X23n87j|Rou%h0A6#((X&6l!7Q)<2eO2jzU^v)x
z9;t-Xc!p>muiQ%;$hFGXyDOnn)(u}(e@FAjN+mVirD9eFhE72(DhefAnL}R6Mz_IG
z%|yd>BpK$+fv?D6`XwP*%CB5=5|wnv3#!W%bUVFFKl@BtEZVhTQuI-hr+g%_P?`5O
z%}@M*=IvMIG(4`_J3|E@Nv0~$-vk$(gBqW@ihQPovG6a|TjT^7G4V1Z=3I)2aL(JC
zNYiAvrc}yKs!mXMk*}^M8LF%3l^S}*V=9lPZ1%aTntFQ5o2p5q>0ip1$V~Dg8t?+4
z&kMu<93k7OXYM%i4!kOpxG%B<`brT7d=7=#s)yYQLsA91mFZRa6lSX)b}PnI54%<V
z$fB@b^`K~JI@i<pUsKs>YCM>yiX2RwKsI+O4{D~~S*G5jpm)&S8hZNzI#yi|joegH
z-?n0d`bF5#>K7<|9)O;s^jSO(QhERnPK7C;!Wqh2VJ-BjIw|NU^sox13H`_jef%k+
zB~1rO6|thXV2@nSj423I`qUxghvVpnN;e@Fs^RNsnyhLeX(|&*d{dqEo|<bazc=O4
zd&q0_05NFPc?5bv;tJw!A^%rTj~!9kX<`@YI7w|ye1Kf?FcAeq(=w<mE1|PA?N_fR
zlavOMdIMze*Gf;*_oSN~Rj(mU2?I9LlmVh+1a>~CCeY5`LKF9^K6zXvHiR=t^r{z1
zZPQwEDfx{kN#bzod7J!7eqz`sE+aWBiOh)sjV}F4nxegeHo(+aJYZaSkrajZlDPmx
zc6|vfxr<Td*|g*V)0nZuaFxu(I1+?4f<ST_Av#wm|4j@n%BKi@k{Fcg-$_R;O+1Cx
zRZ~ymD7v(pE=jyk{oX@r8P9`u?J?z$X9k2gq-D*DLYMVtsi%7zmD6cz6I@C;a5_x8
zf`IJRzcVxVpQioFtLfv)t7zhofrB*zPpGGBUl}lqG`%sPfOM&UBZ__h?D{rwle!|0
zxEcz{sjsNeqBm5$MGaQ+ot>OWn!be8S`JUPp-$aS=+zMZM0+whQ9)<Y845UD(={}0
z8~r8m3i|VJ>I)kFR-eBgmTl==r6>I{lK7~y*r}Xrq<4`M23|*+R5Xn~LAogUm+~LY
zx$wXVu8LH3AxRyj-a<wWJg6D|^{jzkC9WB8>FERBC3Dpc$)3bLB>ygDUIU?H2GCW+
zFmONzxzj_8Z8rK}^?!+?C6zWcZ5ud+Xw=tZl7ad`{HY^h9o(n9An}m;{O6T(n)Z;H
zv_PFmN>3U1C)^25&8lljtLhrk)HGl%IcY!-d?e~~kHPiMJTIwVss@uJvXXqHT+GGI
zX-X9N15)@5bu%G%4BU&A5@h8L@ihu4<gxjagKRziS^{3Ro+!$=O|zVyq+n8~gf6Lt
z8L%jl8E{Rg8K9zP(XRCh*V%NN0#3Cp3i_Vtf#q&A{W(f{!62$Qzw1I<pg#YciYAaN
z2Pv-W)w0@q?{vl9XzeC(spne-t(!eqQG5dZuRiWslBB>qdY905(1YcPl_kS!zptSB
zUkrV@$GIk>zx_sfSFj2q*-&q0y(e(BlFBD)-Xp4IGs#NHo~kw9PV^mc1pYyq3NV9<
zNF&(NB1#+ZsHb!x9(6L+Sh3Er9?E?QU7>HNqKkz!l4WEZsG?t<Xkm54uUcvTxK|;V
z8kP05_FC?DC-A$UL7HyHGR|$NCv`aNgE68#!k5e1f%t_`v7V%choRrnCxEp7@i^Wd
zqtMs&^bv}6B0b$t|AB>}5-f~LTGudyJfI;qK<OmgVM-a#J4%L;BP8(?a{cGzIz}h&
zCQc;drjZHE;Xa=I8LOaU=pXv0ZY9^<PfDg04H`lc3&<Bm<nwy^*(2lvqS;zIlw7Jx
zJAr(EK=I2n^l1hCH~qU|G<|IXt;bL=B(x5XTC(mDqWL*zt(=MbSy|984c%LfWu!X#
zF#WNF{y>ufA&Jx;Ao@jIOf{90d9(`dDGX*2{&yaTNz|qq6z9Bi!P^Q2-9Kc|kaC4O
zPCX=UNO@VwysE;|IoTOid8PSt^NZ`s7nN7l%qz{WESkHZu)HWCAtg00D>GZ0Q(Rb9
zR9aOzE@_EzadFYyc}3$2awiO%Ix-_|@|fWx2F=Qk8(3VMq07<DoRL>ro}ZtSGdn3E
zX+i${;_8wreM#+1ZTh5Plcwhs&#5k~n_oU>j$S{nvS5CNenF$5zP_xus3<;uNU6T0
zY<@{ab$LT|VQG1J;oMmTB||iW6O+e|oj7vD*wm@RW@e8WIeysiapPx<A3e6TdTMG)
zYRZ(<33`2geW4~PE;B!~YQ&(5s)T8o4Gr@bRHj#@S5%IeG<JAdNn(O#Si<o5$zvzY
z8D2eM<ittSbooUEIdckU=jCP3&d*LOE?Q)mG-hGtgxPba%+P7#HI<c<#*ItTSEkKK
zO`1@pk53t@(Ih308WlfTla@WF%1|+<v0RgrmywgL8$7sRj=s1&W!Q*>kqKob3+E=q
zr(|c1O4JOWHasPHOx@^&#G%u((=&6^@@AJ-%+1#)CTWu6M-LyaNlhA`l2Kn=Z5T5=
zxu~?zkd%~H7oV)n(9MWXPEMFzpeq>?Uzm`fFE5UdPa1wg){NZ6$;l&A66X$08kLlq
zbz)Un)vQID!Bf*xiwg5|^Jc{j&rTXPCvU>6vf}bmLs>~_(cF^a1?5#GbBYX=C(fQb
zPhT)(=-j;7OKS~<nIrPDN(yszbya0W`uPR53uk3#XJ*gNsj6F$nKyc5&V<yN6G!W&
zPZ^UsCS%0t5u=lnhbD}kn3^$e?v$)alanS-89y>PdFJd~-Q=>Y!knC0^Gl0!)6+82
z=gm8zU}(Z@?6}lv3-sDq>BA>Yo;G}T?yRyBoi3xOaE5kp+@z@!rxZ+^GI>UNPImmz
z__$O}LPC6UeB4k?YI1RDLE+rOxeJQ5@$o~ajZ2uiV5%lLIa#AgOd2qlUn;<NE;b7r
zvE60|_5cYfZl}p?vD&?E><w#KWifl)!V6FDeLI|gBOO0!)al^+rnJK0KlQ>JKY6fs
z5TQHj_&KK{sMrob=1AZa8Mn8~QY&nx=C$J%;0Q<O_JRT1^KNc#W4DJyxvP&?A9Q@J
zSK5Fs_D0Md;oE%Qd&!_t=naDi0D9R}?nA!!qQ1@(c8EuVZBS*hZE+EH?h0qBVjFS5
zeKBj;O^Wp{2iw#b3mlzrB~rkiyl5&Hun-RC5`M>2tJsK`tCdcp#rYh#a3rQGv7I=k
zLl~t4aOEw<*1;(2#AR_{9XecLYj-nLu>sz~PGrC%U8}dYwZ(FJ42~EHg@$WG043@k
z3<m-TF}#Eeo7k3h_Nz47ZVd+!4MsbU7b$-)McdoLg|`70n&EsRE591sh*4uFy2TEQ
zoe?C!h@nXw71_<4K0IQppEKQ9tHlWl*a(YZEoV<$fn{2a>s9quxBNn(NYh(58d3lw
z0*ihyg+UNdz}96>;qsHaqNq4v1jUm!*(o+g2gGx19mqg|9C0yTtStyYqRT2Au-T8w
zQ4~kPpnBm_BX)i}7sA4&M%aUm;}?Bj**ys?AzjO^zY|LM)A&AZ846*}5W%Qpb2r=W
zkK?24jC`UvmQr@y3qQ3cu&=_`;y*Tx3!Of(VrRPXoZ>wh@}n%m)F4AC_@L1+xzTC{
zU%JUAHm)iVY``^9fj1XNL9whfSTSzD3nVky%_sci$!6hz7^Wp;v6G)5``MZ77Oz`}
zL#Wv)Qv7NN|IT<>2Am#-Yj#8kvXbmTcy=r%j)cb@F0z4$rYyfso!6VLt;9(v0Y^$j
z0f6Jd4;5u+65`+vTwNC0{r*-4G$LF#usRt$;iOX8{WubQuyHLvb&lQIEggIq{v5Yn
zs+IMfAAQ_>2~pOC9u_NNXTez8J0f3%BOj88t-(czBKT&A6C8LM9ic>rYB@7p05bS2
z<?vT;`ED%7*$1s!v9&^3t^9biaHZK953@nHT>(Hipa8-TS5{U?D?72dR=74@Mz`S#
zhpE_)LttNqD=w=$wAB(3fs1g$s`YlWTd*-ojKpM!b=pi`=y7fjJ9G~xOSo~M$XZVP
z5OhmiCY<pq4njajh8E+bl7mx2gyMKu+4Vj?L<AC%hQ^1A@4IxB6wl|w1;_H>;3S;W
z3lV~25^>lJ*gret2q%dIW`czzWX9>DQcI-L%w!Q7$1ay*XHZoM@T6pR{12}uFef|8
zo}WL}N4`aX=*z+z9(b=%z&BmH*Dmx8E|bJfj@B-y1N=%X)-VPNhoG=qC*`M!Ks5E3
zY$hEOpMrelN9#h4B0-X`tZdMYl;O*<vBGr<GH9p`Rhk{n9sw9$f^Hp%BryUl3bZ07
z!zrA)jgIjv@MasxNJg_^q$Msuz*Vzcp^9D#_n=fjFNXmPM2y`ySs6o!aVoX9HsZ|V
z$hgSlyr-L;b{jdnR6{whq0hWbDQI%y$ZC0ygdb)=mXk-YsuhJO1cVMqQru>0!s(CE
z0thEHd|;yKk03=XL<oc*r*Ng@T6*|WtUqypj*;23CVnt)R6_vn7)y~*8sd%0h6IH(
zYqhow_coe5Ww>fxOwn0A4t^JVz-5k2aK6dVi!di~3j(fzNz14gJSUA{?6}R4-Qx#c
z9p^;3J8<}R44|Nma0r3J7XbnBehAUxvbA}EjA*=B?YL5psU^|X9|B3opao2;{^5ah
z>K3|i3|&9a@HYmom*XnWkr2+`?FSs*6~2x=-nPVSBlK#OM>%ErwCHlc;Um9dGyFv5
zdC@{cRCf|cxiP!MYY1&%WEi#@Jed9fJwv$jHZX^dXJa}<*m6**QLeNNs;O+W!XXR>
z5e23)S;3TPc-EpOY<|^{aHE32nO&g_y7#lLWu?Pjz!1fZ63t7$oQ~bm=C@P-H~uw0
zF2VrI#wcQg!gwJ;Dn{<p6ox!XNRlSRCx+-95Ls}P+*UR)>|9bVbLF%aMLWz)Yi~8V
zTns>zh>J>?uPFGo1SD2Y8{l?;qAxfaqs2o;5}0cJbW$=DAntJSNIC2UZd}KLg?tP`
zl%C$_lq9I68<d&t)z0J`H@#v(AP5q31@Pkh%IK&$ga}0|;?u9L5V=w>{{Cw4!9Yzk
z0(o+kMCWw^q{Y+)*OCj1keFtA{sVV-;`BpPt2kStBbTrsCCJgzZCD^Oyx4U(O&qu5
zKuQKrXeV1?iVmJc3?Vm(#VoD`2s#l^wEZKIk?hv4m}?ecG&{Y_ohUegp`kW9aU?l^
z<F+Uhe!@5YA-3)baBj!Mw8gan5deD#3rE3rX9U`o#wY*@+_D^CxAt8gRMIG5gL^p9
zRl(k`z9E2BaF)X$NzCW?Ao(E1NH4x}X&G$Bi+u_Lh;CSWGhl?-!$fnG0<||l!Pg;1
z=-b{v1Q=7X43@xhaJs9L-JD(NXms{KMu=t^2b2>-FX=cS;cu+ML0Cxg*@^I&$7;{8
zv1LA8e(EqBX1MPoT1tu74ZQ~>1O-DtRcFy4IA1qEw||i+%&BOW?y8k7RP-+DQ70VU
z9^tO6de-{r#!D>czj!OUV)BlH!O1RJ7v-dM-!U{j@h8-I+ysa*fDey(1WRo;?oeb(
zO-KS9jXXWXX==Bu(c=<i2=@w{UGH;X@)TXG@THBNfge(mIdc8Z)-aA*v2<ujpSmjn
z5S++q8G;flI-mkRN^WN9=wl)110f^=J^F5F=Lbj!WMU+Xn}>ZTzKii9p+zn5`_(wG
z&JKk#00L%ia5YS4w&Mmivm0whjqDiaz!Ga%FpD^P;PcY`Eewo+PZz&=D&T{##K;Qq
z6Qu1X=_qIcV_%$0FNFe=c~8p8M~;GURa`xm-Wn~oHe4AX9Xj7@-$0RTMLwK(FC9ED
zW(D~G4r2-85Cozj_M0%SfuGQ8E>C%VlUO|V`GJ$Jj}Jh2hAf?=-)mpAc&)Zp-*N`)
z8SWs&%D0p#DAB^&xRl0c<RS}}VBlGh5_OPGJOSh>r7>!g--E^~2q_2VD@5h=Q80Gp
zGrU?6-RQu2VXK265*$MwvH}zf3~+u11Fegt6uPb)?i`l`_wZnGqSb}#mt?DTu_#%*
zC0HtxpaByoH^2(2tEkS3?^u%)(|tLDmD<onG;#4vyt5I0F}wV-w?6V4SZoAJRAO~W
z);+zKGMUX5r^g+XKY|`4X}RGn5kdVCw0D1a2NOVSNlIK#s*~Xrv7T@a8PV9p)epx3
z#b-)02`OL;u_G*c$-(;9*}<#KZEh|25&Q_aM=4S+wxpXc#6Xyv!H9w>)ekiwXn^s4
zF=j##DlGP)oRO;;QX*Z7h7eQ2BM<;E+zJe92f#L)CAiHF78?;Xmwbq@o892L2osyZ
z#a<AG|6|npgVZ81*hC8Y2+_yGd1=Mfy_l8WAjda!GNHPFMe^_!vIVEb*61B`^58R{
zr{7m$tXgs-=So~nUd%4bgo4*(teE9fs)U7bObIq~Em)lL1qEO^-@mHOqz}}H2st2|
zEPlO_FN*u1$|<;a9b4iy+@?lo0#mJwq9NH22+W^rK?oVY4hxHI9k>ajU2Df2W$|lL
zf{rDyYx-p0-5oaH`>9@s$LOelrwZ1Fl&XUhOkN26WP;OA%mv=Z7x{%AmC186yJuL^
zZ6(zZUD{g~_#^x}0tQ3U5^z5X)NFPK^Qm5T(OeW)`N+fQ*`lIoIR&a>*f&sth2NMA
zZP5x1uR~=A!sm>{!|rTB!~mzufbW7vjIZe0fDZzX3@9k~1?X{?vL1KHtYxu4dSB>=
zjAZ0F7^}?1b_||7K(7cheGxElQln8H7HF94velg)UO}_DJt(jvOsU{o2w|IwvykB}
zYetx|vKl??4nq-EpY7tA(XU3~;zL~Bh%2G|mT;Jwd{_!%nTaX4#nM@2u|Z9ZTr>np
z7)6*th_Z_s&%jE9f?lG>HZq=Jn%IaYMlOU-w6U8RMLic$uK^B~Fu2LOgO_aX5W)`o
z><y_Z*(H$us4i&5F${vJhY78~hD21~1eqn-4E4C5(Sq<CS;s|RTGwd_iqeTxHk_p)
z*bG3z)Wt6<m)NAAydyyr+_93IjL9OwIV>zNO03%Fl4474{Qh_;q$?zcj<bCW-*Mze
zA$)`%4}W#b)kCbAI9!ks+R|2B?bpV`Im3h>hXdz}Hw=L)ub|hZ?|NLpg^3mzwc%G6
z1vS~%4?3T_i3M)g;iAOoG-t>n;kGxv2FKJ4=0pgb3Vv;wA<mTZipHo_^N3ox2(BJb
z8Nv*x%VWeHW&GAM_CpKD4#Ymcw@e7y0dD@Uf_l|2LdPt_97)&Ku?lRsXjdL)76pY$
zvrPmedjA1Zk{<pbN34!A5e6~}8%fDlPn((4VRYs<aG6yH7Y8fEl@_6`g5Y)wm(+!r
z6MbuUL1;4_Ee2K*&Q7$m&@5r|PZ+g9=97Y9)zWCf-D;M;@(GRP7tTuaSu3c&TE565
zCF(l3PK-{IQZTB1HH~+hP%c^$5gO>mMk`zkXJRUNBmWJr8($*oCmfw0qJMxZ*hWaV
zy9d$eSP*bHZ0I1Xz0u}a!_ss-;uN`qz6ZQlg11=OE!|EQQ-^RiHj6AmimnCjP&lag
zU4LRG9;%Pr=~~~%kT0zc@#bw2j0HD{qk}{8RbZQ!K$4B#7EDodhgd+&8K7H)vJf`s
z1wCD^ONQhNu<&gxb~i|VMx3d^qX}UV?EnfQwO7xJJkU{wbOa;5-68WZ9H0H(?(lmR
zFf_6zVJ)KXxMdkPkrfRm!gS!o*bow4A}(y<jz$*XE;&Yn?kf)u(iit1CWK$G>Lojm
zTZ=GjWmtw$m-y|&X&L<TVay<}hbat^i^x<!+()c4xp0+dU`O^=52jhn;MtYO0i)C#
za4;M3TI66}6Q%@s2GbI`9cy&dG}dydi0f+6VN7sxEl}1C5he<k{eqC~XEB{vI-r>m
zY7|}6fsW!XZ+D0jDH061s;Gziqd$>Pvi+4Z5Wxhv1Ug`FnqbEJQqwZ9T*Ly8aD%<v
zRRIMHgD=a*T@}a%Q{U`&X)rw&U!of?$x|cAatGY~Hj|Uj>wZDdi4X$GG>X2LFbIe_
z=mnkdG#p~Qag!KYAG8uC)V(g7K$8t#i%Ux3se;gF-_tUh7S*I{c@A0o!&Df;E8vui
zTtb{?Fve;vT^u(_Vl(+%-hs6Z#&p~rOwaJiZ8(Pbns9)yMqG$bO#z#N6Wr?HW5zvD
z9<1d<l3<P0@AhLZtlH{YgP_B(!oFNsvtsT*jF=B5OA&1oDGj-}k4m0R+W8~PF6)Ar
z2Ay*o=%;pgtvJ7Nm75T_&NLQL#cQ@hTW{-OQNT+~RzyA8xhn650}wcg-GkMlRw%jn
z&N~pxJ<M8@+JOE*ik<ZZ(}hhfP{``xn}nUo(qy<tGvI;BZnzRX+D1g61#QO(lM#5q
zErNX-pdoJ*`DH>UyaPt=CX+eRI8T)88)0w><2qrRsru-fATJbHkmTo)6^wx(WE?am
z0D%#c{e|KSfC`n-QM7@18W<Cf3316y!K|ostq=qU_V45N%(>laUA;s~VBT)LgSe#>
zuHqmGZddrNQNO?tm?FiKVCS(9ve|&v$v6kgD4K+zN1wsSwtk06u0%4MR*Y$S?YIx#
z1Sh}v&ben(P$Hk;{EIYm<q;p;D6%;monGluZDF-u$1JC?PUHY<(A3(Bz=I$sYZOD}
z7djIEftwcd+RuSv8$=#J-CwH1_y)#4_ZJYtXnA-p6z&k0myhdf{nCJWq}bT`zJ)P{
z1-Zd;%6cRc`{0gfi3u<dv!}YXyT4^EomjcCy0Ajx_q#<gynH6cHixBc0Gv3583YrT
zc~9yfnOOJ$n4y+!S1dQ#%@&)O!UGG~mjJajO#k>;eGfC)8Q14LKgxE6bCP057A$p6
zvE^9zVdglS<~B!*R?va*)@sEBb+u}llnZjXyR9w+c3bQ^oYdg*x`7|x`~o40HVS?P
z8FE3n*%C|vR?V^89DG(lP)@+DakRH{7gP*@ffUN1S36AZ4xQO$&dFx1&HpB*AtFFx
zdxCWLu|1YuNRl-p%)nrQ2;pkX=TU2NbqYEJbVv3Qxu8rK_vqFz09_~(4I#iWU}i^S
zU;tZ<a^FQb^dt!zGOsM1!Z0(x5R3+s%iO_!LWZ%Qed6Jh_hA70qAY|C<gb2hQPwR{
z%tetW>A{+W7RN&OqJ0(s0}iYHf&fd9{EQAbf&5d&;bvP9aITrP8y$ylX&KB3E+C9A
zvJ}=ONMZV_TPPOT)iPHOe){52nAieeH@mD(G)g|`AqC_PtEgZzRAsPhEb^0~!h2An
zb;@wnU+=|QA7Uwe8i(If4NfX;aR(>aEFE1IS7o_g_JLclcDT)<VStma7Jc+kd}1Rm
zNscq^3F2GQSLQpcb|;GmV7<<MXjcWNAfDILAq{w7-jLEfZ{SGu&gEPd8_tr5=D-^e
z8FRl#=f(7|<($iTz=ttD^$wfW+|M;Tmqi2;*v`HuI9lP`0O14DFHD5V9&7zRogJ|W
z%yR|%4!RuY5PoeAS`P4JHlT1`1Jim~noKDUG!dCJg$C@k^J5-D)?=guwua8)rNzK&
zK^FO=pDS#z>gD6=Uwi|EE{9zQPkLLI2N5pJNm^m*vf6r#4kJ@2LSh_)%Q7|rshB0g
z`2WiL?zkwA=iz7Oc@7jr?-cAcDk>T}Vi)j^qIB*kA|O?ecEW*xV#nTqVDAPiwivO+
z9u<33>>b6}Vu>ZOB);=Jcfg4#nD1Y|&wI-4Q)XvpcW0+<Il+nxQ5yx7+OTrYFPB;q
zT8ZO?b&I5)c^YlMmZvg}>cgOFNS+!viKc6)S54Jr@*T&!9zs`_-3a}x30o+<U9<?Q
zN}=-lK80dxU9HKs<)>s4mAo-WQJBai-T+p9{zUz_Iuoz5CBch-TE6#LPkxCgEh9ve
zm9JpAaZY5b)v=m|U5VbBtQ#Vk$g8tY{JJma;|)7YTVg6|AoZ*+pWjR)SikMy&&nmc
z@dp_YpVJE28mxEXMRx=#Orl+|MrpTU*{Pk5DRFUMM)7$S%21i{_(I}BGlb;{-$sMx
z<kA{bQPF^9r3)3|ZHgM!<0}~WD$t@@7m_?*Zii~}^O-%!lld}&Y8sLs#oyM3j?|@{
zOnKt}1Lfr%@W1BQ_%<~ly{e;Q`IUzQ5-0|uM&n2l@XM|6c?<F=X-X_Pnf3>Tl<fqn
zSwnt_8gJU#L#vK?N|L`!N(DDmg@@N+-^vWyRjDB_e5vSfAY0RR!Ynn(Za&&g$}>7K
z;wyEpSvQpJCi^mDkW{AFy6|1|G6isc)r%@IgyuqdN-OEVudKKzi%Q6a17kaxx`u{D
z#(m;E)F89^XeagR03QmhYw5?YnRjJnyM-nYvy!1hmM!h}{!E=!i-Tf$0tTu!Z|nFd
zNd>6ra4;3M3SHH_HPlfMJ_evxBc?tn@m;NJSRM5~kYrljP^taH<H&7~NJ*s_idl3z
zErq1m1N|p)?abd%^n(6-k|o!vQWWe98F`zB{(Wz0ZeeR-VQFh_VQph!{e7spg}H60
zO++_yb8GE#l!Up)O(0COeiM;j>7B~Rw5j@(qN*&GvbaP=s86uDtDp1`%p_qsJ8#G&
zQ}nrfVMv%}`3cW!6gbrC*_u({Uh@fy1e)RJkCCcwhC?gLBh;9LH=W%a5<}L}x2=B_
z7O__}R`sM316F_w_AcGVf4xmU$y2F_uW&-yZ187mRmx&5pBu6N6E>|%RF8-1RPJDw
z)ALOj$b1#n;*f;@25F;lM?WpyUa92c!8H}ZYKisf8U<P@LbH&<$}~?BtTn8OT6|Tx
z&iHFhSQP=Z>MAm=Y*XK7Zsu1MN>#{0Q&iDk*_l_xxTXFiS(_3`Qk4&Wc&8*d^%Imz
zH4~Jd<WrA7?Z&HNNm|WaT@ISBa_UztJRicLItap#s`N>Yuk}nhdqsqixT{oD^%>17
zA4pt%7VUvVkx&Pk_}2LF+AaTWc;d-zroa(DB_a<^)<mSW+U(n~k-Ckc4UyqhqN=*P
z_@iEUx407p2xu-stLUoXdFw&jjdq~ZIE6q1xuvwdh(CZxm@QGL&*`f4s>*}6zEBVm
z#am74WxM?33-8Sf=d|))mL!Y#c}tpzrR~MS`t!2)X3>@*VH9~17EzZyq|OtvMU>~^
zJ*i}skkccHjtoc){~D|?rBxX~8KWd6Smi7AGQ@tGRTJtTS0E#n4`}dYSH@HoI5Bl)
zuztc0MINg<w9exVPfDkdxLKNwd3Vh&S~{j7CMt$MCtvFw1!4K<9vLQD57p~>`c;{M
zT8w|8wtGo#li20&g$)IsGB+(OX5sWaM6*~{>$xd!@A^<wIWpcmkq%$fdS#YG56Z&F
zTF-@O;8(H``I;3gyt~EAKcBM2x{36l@@wfQl})Z7g|C%%vFf^NT8-9|&_Iex*8(u3
z^<DI&qV-g9d{ju?6Y6(9f_VSDEeX<$*E`A4<obrFJGwF+^;2G_2vCA|CU^C*uu^yP
zx1bK8@+_>rp@4GQG}YCvStYuvfGE#RO~HrSDnR(DUH&ZCiYMg_R$fs}CM`B65td9V
zmDCMOGey)2YUQRuQ^Dq>F=8TOROxDm>Wi<-C6JO1notW9?MG((<LV-l{L*gca=}-v
zjO0}sZQhIvP3MCO{LI2P>t}U#(udU+R$&Vy(h#i%+~ZS_Z__U2I!~*R8epJ!cg->o
zv+)hBW_JENhN5dVTDL$sX&R%71KGc9jPZpLW$MDdwUjc!Reiqj_z1J|Hlh&`PriIa
zMJ@Zq*Ek+fC8qyUYxT*iR5Dpzj%v=-J}|zV+VANMo$?zqW$X}se|&irJ{dB}$|wsf
zdKE?|A@M>{s3rY$1?^i%`Q+4QEkGftM1k_MHmM0h&9(?_qf!f_cF`Kh|Jp(STXsRY
zkx=1{JKAbZmQl4_{AuqB8Ov912`I=I5SDH7HbnLEN<A}Lg$&Z}=?c3mi6kz~J(3lX
zL5=7@>&a+MCBG6xi|{I-`A7}_`xhBzVYCP$@r!a)8$FFV0YAmEDFpC!{6r1O1AaY?
z1F7pT1vIp=_xJH|SCmb2REa!Hu-eU1Ez8$$$kiKw`4Fz)RDOFlhZtIHlPnySPW#y6
z_zme59bVCHm5b(uu~7Keq4_p&<@eWWZF*JfymCD@G?`RPM-%l1Hce=kjv1s7OC0a|
zR~YC#AkE_N``n0$zM5aqD_@0B|EAtoLCU9UK831j7b(N2RzedAJwwO9*Q{@~f=<dX
zGr~guzhRPqes(Q1JiVf<FGjS#5ExhIr)ae>P4V@Tg}`=2y*!k>xsY7(K<G^^22ua?
zj5;1p)8*=m3UDDx_GNIMU>j3QbuP2vbPVaIvZNa9T8O{Q*5zC3llbLX{NSp|uoAkO
zBtv+tM^qFI^&0oJu%Cd2h_Lyn8Up!kKqP?5w)pVTbXihTVv-O{kL0f^g_1Seg+KL8
zP8)jpxmQ|oE7K~b0)lT_D_)@ybfspk0MbTNEtXIFMvWwuT+SpK6~25*izkvjX&sJF
zbZA84@JQMo?MUa>5rv<j7P9dab|VVaII2*%m5}qNTUU*#f&hvrRtG}9wGSy5YCG+)
zUzOFV7;_Z@wA==%(jci|eJwJ+o`v5P%%5IWcEpA$$?U}Nc#VeT#claDO?M=z%I}7g
z16Q`pS{-3errn|mRI#$W$@Eig5h>-nYfWi=79l7*NKJEVUIwjxTc+7VljB1|<0HCS
zo0c6y$Gh{fA?3x#hZCXYd1=>7*)>tjn+4^?XztZYq6|pw{1QF1Fz1T`tzwq*vsrgu
zKa%@Ivu9t-{kb?S!$LZTRu1qezAg>RS51^<<a>i=>d9L8%0zn=RBM)Nm<s8CqF7kF
z{0-EqXtGOP9F5QaR4k9dti0HY0iY&7R2f4*RFsgItcnRY<6S*^kZMWi;Kb3d+m_vi
z@HU0Lr%S43h@e=6Cf6DmpQGY!I2-y6qjD5x<tQ{-vxbBSom<Y@5s5C;*}^VJcUm+W
z6CTUQnBwVl7P_QXjqR3fO8!M`9GzN2wGe*qZU_w|Ii2joFZz;(M$+=GPodOWo$O6M
z7vU&hxvXApu2s#@^svw{6_u7WB#f+}c#0}fLI)~o%c{yi+VhU)-OamN*jSod+k77$
z-pwk?D$FW8(x$tb(Re&OV^lOuWmKjvW#yHPK0Zv67!_46J!nRS&ex_^i6|f56xrgX
zhYPK6D9be@k;;mUSE?$60F4J>3~NAWYDTlYT<ss2#*?`xJTps4PT~Wy6+T>GYD98k
zP+%%a0$!2?F$}}fpEdpI(O*;g`%Ux5>^2`VcMNdsbG?^({L9@>`S0pKd>%Cve<iNR
z(ao4j*#wx6AKzZx__41!|8C6QNAEsX0{TC6tFF(!cjfij{C~SX3<K||`Jbu(a2%yg
zy;dG`pnASz>mmYxk(ZZTE^Z??9@f~NePDjf{_?IS{BszSY^OU0Ra@dL<UK-b<rE1I
zr!%5)trr;@3~rd!amD?9o`2}^pB*JrJ8#@kvVi}htDWUnJMK@7-MI5U|Kk3F*m4m5
zO30ypE4(1Mifo!I?Pr8VlWnIaZ)ZZP#v&BS1M~RGvFyY$&intdy5d`)TdP*Br0yf@
zE<I8Dbl3HuxhL)<?`&Ro^`YONr#E-!?~)Q?M?@+|E0xNWq@)C;_3#ZzN@a9rALU}D
zawHv~tL&iM*+IERshmzXNp+P&uB0XDpBSYZTcT8|l9H5+Qh767*;}bxo0O1}6q8|i
zh8|KjdZ|<nRhDwKi8-v2Pyf=*D(370n;|-`fm`ppdRYzmH)xe2t<@WwSe@g$694FP
z{labQ^R9ZQ+TC{Tdg@naC#zFjQ?C{-U31-RE?DQfS)H<O;BF%u+ospVMgF;MuU(MW
z3$n_y&h@fM|AAFmZMeuzyXe~WysPc02p>0_Ji}w@cF*T#rB9{2BIDE64ZqJUR^4`;
zZ*P?wHEfaZW!L7R%U0Rh&JJk69Ma|4ch5WLV3U`7+inhgI7QbtICx|Gn`cV$hEMsq
zB&Oq%G=r|MR?0K~nYSrq&y=HQl4Cy}Y2aUS%C%+s!6`#-x67@sSG+uO^3fGXO7fhR
zr1i_99HV7M52MfK{C=%jX6|b5)vj?a>!+9Gb!Zt_lAjhDTavvhKdp31hb3vhtH1n~
zq?KmtGHJ(iom!^f%YRva%hs&UE#)PDv|VzUvMuSjK~G;_U+?+O;(XKmP7cA3oagR3
zn&14`%<qC9dC%QNPhYPc5M$yq|9Ek}$%VD9k&nFd9u?<1jN3QhSpRwpRut!3%Zl2j
zbRhuwD$|s;ZjmNF5z+Va>2lGaN8Za<+~O|>q?q`ai67+mci3MTF*IERJ0NJCPdr})
z0R%nrJ~4CWlX-K4cRg7>cj$tcRdc-`#;;O}MnBA%8=SFrSK6lou59#GO7S#3Xh#am
zhm_UIvX>^ajt!<^dRkQW;0viBg?RB)!%t;+LFQ@YWm&Bo00RFk$0q<4H|3Z<#h?K|
z0gr&I;;SMvUkv}eqOfwP@<(*RY60-jLYZm+?Qb5ha8F-+Du8K`rH19vY4E8F=d)G%
zltHRd2st#_SWSJ^5Nbkr@+xu&sI(qv?)g&7KLoA*uYSU3773R;5ZwtC{;ZY)3LaMT
z;Q<7!LawqGENXDkOO|dv`&98r8%9lMIS^k+`3(<Oc2ky?<<;D4J=VG`1JcT`p`Cw2
z9KXtd+0dnqfq}uo0c$0HWG{Qa#{1d^<@@#0-5cGTZFFzGgr4;2vUI8b+7`Pv$4@JW
zi7jaTesgB$_3;Dz-ONl6Z|*VYyrtj66V5VIkJ4RebhoMEx^i9P9fq&&?e{o$&{v@!
zwAkv$3ELAc?D@4x>&KzPHpCch={4u7iErJ`5gWWpn4_c4PZKfe=Tg^fbjUmL<eWb1
z=~Sop{7vm_PF~JAeB6KRNt-;2(pN`J8Z?OY)SDL3y-TKO%hZOK7Wi7tX!Y7AKy~M~
z$L)2#odyh+S&wUX;Z1h?g3kXmtW&3s{tA6&Mn|&|m(6xu?y^m{*IB2YvpSl&MET?x
z1z-QGhrG~xmtv6J8=p>ZynZzbIU^dl`$3ofjy|q)e3-wSn)*44w>$nR3b<}Fu-zNE
zdspw@=h~Vd@xAKQt@B0yv9Ci`qy$L126pMHa9-uE?;BWau4C3^^CO1?huaMPp?&b5
zuNBX;28|RA+FCj|FF$xrUht^Wkl8by#b>_{c=qR<9X*ZL#pgc0e)->qJI4I8Dg4Ta
z!2AC;op=BAy|fP3!t-yw-v83Xz@Uyn`qYq9UE0{J`_bs(RQIAsE^Rtz+3e%a|B0^3
z6S9W|EwVPfD=w(-`?`Pse+n#&S{vwe$#oe!HTb}~FqtQe>Ac%z-^qqkI!!vf`d!}g
z48M5=&1P7+wA+0Dhi&iVBV1eU+;pgTm3U68-A;-+IWE_;*DctvFK$}q^jXqoE9EW5
zIsP*)qUn~MJ=T4gy87m$wewHTT>gGhq}$l=F4-SUJR)69Lya$G6<GRZ#&*v#8I!WH
z&kw6yoBFTsH=|3Z$ZdTLI}dOi7uRy^7N6pMU429Y+zz`N)HN{Z)F3nU)cyHA#;(3I
za_JST&?7!UE>5fSFIX>4Pfl<@{YSdP&iht}b91()9zUtqqkGJ1i{3e>bDtEut#vu=
zb5j*IR(c}$XuBS@J)Jr*;;{MBXALeI+6H(8<=(LNFzP?JfnJu)9uKbue?)XY=3#$+
z%tha0fA@NEU$_0-<yM*hp6lk~biTB1RNCZMet+IOuxj&^fRwodZX`#w^a-5b?WOU{
z4~sn-jCOaso9-ej^^ncpZXIyM?eQ`5;T_}lOh01nX>w+7Oy8Bp9|tXnDGt2vv3g2U
z-(^vX={p<h*U_(Y*|eQ~qHEyVIf?^*?GyWo;#7?tjn<ytP>}HQ@x>o^?O6U}?*y+|
z*F9RXsoSU6_?sX0Yjz~4?j?tHQ4SGC{ssAAE_r_QUi)9_V4s{dsPAk4#H{cS)}gJV
ztzEYT4~sYTYBtUPNlw7ZhjDQe4Vnd9D%mrBcuK6__MWdggekW8e++(+7&Ub6LDd1v
z?kz3!8SlK{{-vRpi-RM#MKpioIW<o{B0sQIYQQ?9fFDu@$|6JF$Rn6SpDV_WyX%p5
z*Ov^DU2R!whh2ti_NMc<x^2&L$SJiBZ}oWDlZNRB#*N)F+IRQO#tT#J9=V;Fo|s&6
zwRML>x;Ncs`wzGS=|lGabZX}KTP<ckUDEOA-ZR~%b@;h?YNwwP&c+3EQ<u0L6z|;k
zrZj$}<khXCzEgJT`PP}%IO4{QraP_d_2Or>J8@6H#U*q9sOdknn0EV3kE6eYx<5;4
z|3k|av6#Yaewy{}p6{8chg)q{7|aO$N1QPJRqtor|CsT<DUI+tl4JJ1ag8?U2EAO{
zZNgiNiw3oDVsb~F#6_uT8~+;gGCyr<k;}kEjhZ)9$r`5YoZqY6oRn^>;{RBcXpx$j
zbwqVTXXqf~c=7gF#l)m%rD+?F#Pv2-J#0TbreTC>C%J`j{J6qs!@A6O#wtJK)NVS%
zi~It&{iS$$F>Ybk$O$^a!#T<a4WlO&Cj4GF?CD?rubrKa&hPc~b=v#(Y55c5gWn8`
z3g{c-VsAdw(tF~t-+YxN+tQ!;h6GI+@OM^{$&r}z^{MIW9LsFm)b{wEU*0_}xY2XN
zL;nGL=3n*5eeY`AoXoi4ZyMy~J&DvCy>91?CGK;cv=4uKTkk+#pRF@%7d(DEv4`Wh
znPxj0zq&2EwPWCb)7f)#bzNGVxVnEHVK5Xusu$YBVan9!+22J*C3^VvKH1W1=7>f2
ztZf3EPxjd!74@N0%B?xCb*?;H|L(4HV!Oa5atBeQcjzmxi+5d`>oLz)dR)9<>nRU#
z{<$yY`xd?o*nP<5X^QOzmCrSSYqD0hKjj@@c}LXKp*NeqZ*R<2qXhG@j|-j74_)Kw
zH>{ywFJ0!(b8ENR&(U9c@lolM`)dkD1Q*P!pB&EVJDm-%f41<F<(i#O*KQhSk~nU~
z?nNa}YFk>|oRK{(`PQ9wQ@Rd$I$%-5o>B59`A4#ivLhGm$?IxA;nKEK(-t(ncE3bC
zq{j^7X)Bsf%fEPa(!n{BLA~X<E02!aX{k)IP+qc(zUAz9;zjS1^Sf-6HCXRtJL{ZD
zex5+}!ts|&R&?5#ku6T1H1kAG?w{`K7spQM?$hD)-UnxUo$?6SEV|e=U9=&*W!-VL
zlYd+n*XU@|ft}}FufP1-j}P`V>@i@<&0hW|-wg4Zw9_?b)XB?Dwnip0^14gs|Jh-~
zk?xi~!VD6<!aJ^VY!W~9*I5CZ<4=0mn``cDd}99Y5@Um-zCB;K6wDrd@WD^>Iu#6^
zx?!S6zmnC@-*%kvuinCffgP5a_E>#0a`vL4jQd3o{JU7{jA;743D4^b8wEYFiTgQU
zHC~pw&M3K4*YFz+WA|N18RKHv$T;cm(v)fKhp$RWxxHQ9L^f*0wiIg{Rnv*7$z6l4
z-XFTh*<p%o_^iW8CdR|2+6<qih;L&&oWGza6H~TlCBA>HioB3`w@~rVkdRG{yXNlU
z3Xa64`9=>i%E*sQ@sG?HzuK?g>cGJE8S=c0Oi|kD*GVt)l}BYk!F8D)6T>GqR6cz#
ze&`rlys*Acf!^az7TJeWQ?k;wN2-pbB*#sR-c=ZJcmmX`Tet4hWd*Zwjgu5IByY#h
zc4)Oc@3&{Q&YrjTe$i!iTCY9d%Z_J@nbvw;41Q*8IM0<M>RpMx9<HNT&ujF3Y5vkR
zmp?q5a;Vkv%Wv0CRh7J&YnqYiw!F`pf3{5tT9y&*JyG?to#K+;qoi$N59VUVWT#~#
z=9%}&Y<qjcWzo7xJC?dRH#M2OciF(AdZt-Vm-(hK=O$-{T0amE%-T1Y9<Ivza4V-<
z_j4H-k!>rkpLJpBqUe9KrW{zhyxZTBX%lX3Zu~xAYEj9pOI9W$UzHR)y_+#LHRR_t
zI;oRxgv_}NCX@A-`!;62n{uHi*;594Pu~;S>aV%9t99D$(a#M2In^VlQE8o4(*iS3
z#zW4b^ukB}!TQqp#R<R1Z2wpHO2)E`t)nLy={4Cs+3Di_)&UF058P1CbZ^T^j}n3d
z_Vl~+s>S>ZJ&TJC+YH$~W=-Au8@e{QeB(o_p|NA`oWePUjXPc)b@O<GQRaUb-t4Dz
zyc%`;L)*ml*;!}zpIzm-FuTu_s8Mb@2e&!;JC^1QjkiDSTe4`7|K0h?MKkX=&-p2O
z?6KYLZ?6m4GN*CRnJ*1eO_Vo)iCGHcXFj@M?e*Yn(w%|f<Nq33w=TmB3ecVF$1+KO
z9$?zGHE5D_y!F!836CzfoqO@{peNTBIBv;{x#!=$(VS4_(iMfprsrG^l)h{bv!Z7~
z)K;t00b#TG*x)yPfT6e%F2bn{P7!rr^o+43gYB^$gDv^1w)h=`ZTPD-xSGLLn9pE3
zrZKn$T^Mx615)e-*b_MH0SywF#+$GkgSKeTppD3t(c6fFq)wUWIufNg0x^S<bi_1D
zh9jm@l7hILl4QgrN`@gSDd~$i7MUTn;&d6*L45|T(TYK<%3p;{15sm+LGoP~a!e-h
zUkp>v%CIWnDxcY5;pi*Cbsq=&g`xr(9EbsSljJQ7fhll7XFEJV^^@Wdz-VCd_pjT>
z;NGpnnH{O9V%8>Xv}4fW_p!{AT+ET8E}#xI=S-Yohtp96jIF3w1$mPhob+&;^l2Pm
zEZE^Mm<?VHph;t}BK($s7L=F+&Id&+kV89|OZUISvm}r{^?_Rd(%nCRA3*xH85%D_
z>D5o0%o))Y4KWrau$k+Yh(lUJe`twq8T=00@O`<4!PU5m!F)_-r~|ezNY!JgdixpN
zhkF^kkEIOW!@G7^f~~2vUjTPgvI}r0B~39r2)O7$(6cRL_uo)5cpGoo;Y|z!LkxvI
zu-pD1?7|j=y*Xerz7W99R0$J21@+(@)Wv8q4!HpFa24WEsm*=EF_j_SN%^KPPgK1C
zCy>2?5r7#W$8;iM8Oja<wdF!8cmc6eOQ?mdd46xjf3?8ocGwH^!2UfvvUNmR50oKs
zs1*AlI#6PdXs71bKE#Bjcu$J$kVXwRqb>a9FPRR0!ypAxK|^biq%GQYL<ge*a042_
zQW%O$a5jV2CT(G+m-8Ek6G3};Wf851Ar$X`EDCP}-U7umd<wJi4={?yE|8=Kc4E+R
zhD!cV3K)><DO-V~0XKl%deyj|AT{p+#Bq{C(5(n;#VD_hg`Re<zd>`9#=An&0AQ}1
zzIlqllX${wIsVlU7NSuOEWl=1$mm;oX2WeNZ<fH_a@2!NqFXkus&PxCsc<&Eky4vd
zlZuYP9B9`b3ZNa|qW_5}8RE%1cEmq#!AKVy;Y_gi9Ajg^M*-@yZ}7E52CAM=2%ey_
zFn|Q8k6~W@*z?Y8UE(SOz>(lJ1ubUcR*>1VGr({KfHgY+-RvG;e_jg|f$S6<%uXhX
zCm~LxBnxo@%5glgV_%e+<7mWD^w<KhKk=>$OvAoL`ymNd<7F7hj=@U|y$@i}AN>?V
zs5|0d7=bI11f(3Da2q}Z$u11aWA$MHTeUE&Qa)87^t9kS(eg3w0NHcg4p<1XYitZ&
zo(}S1&Nzy7LP;xp*$)3?@C82i#3@<qXp~K5N1-5Tf&d4B;u6~nud_Xg)*gu6Dd~nJ
zyzNLJbfp^`B*9Cv2}$${i3KI*h-Q?81NDX1J}9aMb5OcB5Ag@&Fc*=PVh-Y5RJgH&
z!JA!7?bQhn!7L~O5{wK9#%YouObo*WhDzf(QO2!={@e;&?upC3=emK+lKT!x=SXQB
z(kS%G2k%+97G%A+HGn0c7>|B18GQ+{0%F+3;%4`!v!!+<x~75sG`8qOCy)hmzX6^B
z$qjJtOw|>0NAU=Qhw)c29*SohLlWBveHb)CLoqgJ!v%sdM{?f~S=fLoxrrl;gaq{Z
z9SfzTmSj7qH_P<WXd+9~g#7_$gJcGGI)_t_8_6kl;yE^pp@v`?KA6YgWSpdsv3nsL
zBw)0eYY89Wx;=;2A|pXMGy<p@4XGWka9MDZtH{N)n!vOJoa~8{?y_${Hi}&jaqB=*
z$|gNxhbhjm-cZb51j$lN`B5VSNw(D%g`%vkXfR?35gCjaM9Cn;KuQK822kRU=tqe!
zVk9LIh~a3+Do7Jdm&kFNf#{*4zUVKIh?av<L(vxSV2^vr*<&v;4jDgHYLJmFv_Kv1
zfz;3!a1($P_vndA?&yqqtSjLgfY_fB7hLa*hMVKDlgP=-ROC3H`vGK2xLm*-@C+)t
z%O!*C0~d|%LrCOC(GaU+sJ9I!!Uy?}Vi-W;Lgp!A0Iz{)gy=4ubMo5Exs4UIM+HIy
zoXFLuA|!}nO6nojrNjWS4kh}CdNkZ><9yMtXtYFh2y2VR%Y(+lpQO+1M+wqHa-NO2
z%!V7~iTV-WK9c9I0&@riSpp6O3?MP<4+Gd*U>|`WWj}!=hWlrQ=!3$Zn+fKuIT$sD
z`LLV&8TNB$a^D)x%Z86^ci=cz^z_6*DJ+e}3|1J6MjhBT(1rC7hdK^pRq~6@sKX8=
zzE}sPDshX025hXNDQ5(oMJ&kcMnN#Q3?#?!;t%Wv`L!^-1a&}M$FUUkWc#^HxatTn
zOH}yb4lLkEg)*c<Z`e-~9SoZS&_#3=uSjmerh6b5UUDNBt_!$xf^p_#hmH>wAiDyK
z(Y6Un9PvY6SZsJI5nI7s`&aDeir$b!>rwbu(Vkm~uS5&bsFRK{nCcAhME3xlhA0cx
zF+%D!ILvyjFOm<IK~t_b+J|yQV<rG(!olQXiHa+_!~F>NxFV5C`k^!QYzc}WQV31i
zS=3205ob^`9dR1<)l_W8jzy6zTL3ogAxGzP4MFB5;<pPZju%ksmQozm*@Y){c2cQ3
z5VxaII9CMGT!ktd4P#K8IN=R|7>n=d<imX(!NUDdHby0bM2xBfoXT*lA@moi?PUh{
z<31@41`GkOGhAO8*=a`2bzHy(i^OZ9et<|Z_OKPHJoQya!59r29Dt!*LpaC_D^G0j
z0&kKL068!^0X!SK+lm}O)=k$Q(2g`+U%);jP<lg<&IAb40W6k;VoDSi*<a8p^x;r!
z^vqfJDWjWGN2TbXyB@pfuA^32i)7nN$vPzKUP|^M{z#0v5+AQ-o_}uc3*?gxjQ2Ix
zP9$IiS^wqDEjb`k-Nsq)fVYDU$x=80$@aeiZ|+Kt>SmtQ9m$X<pvTZS&0t7E)?!c>
zbr>4G)?e3l!do)f+si>};sdy!O1%KzcVH}Q>atYVp(-wR94N^Vut0N$w45n}-(wes
zB$Nq5O01)FydzGQQ5#8-%s(kQAlg%6$E$fM4n-Q>%-nj5>M|q-%f_F3ekMxSjYO)m
zl-wWkexzi5llLPf_lGQ4DXLJgm6DLWAC1i*#RSZB0_ixiQ&$0#d3xL_@{ItMROLjN
z@lcx670`xmtl`FarsN!+W$-sV!~8z($ykOA7~Yp#{f}M22NVNSJGX8QgV}hI!_M%c
zP+Tv*7Q`{6&sJ4(2GqXJ=eDOIP5@mfl+Ig@sk~KJL1|yQ(>9GbGL6pYFvTLQaC;^Q
zUYRsK6Gn|7-dqJf5GeD9_He8(a;N(s3aVR1+$)6+vqApS0lou!y0wEya>9m|8^wYX
zwh$Df;Rq3Q7`Z$S790_@x+nhY2Bh}Ap(9`ikl4c(N6;E>0>4Vp6>tDZ`;zMYgkzc2
zi3Mt7gdwxXhI{LT`$WLASD6-PCIvFn1p{FL#6l6X?s(B#F+O79fua^!n{&YsU=fu0
zQ@r^!m~w}J!pUx+*R~)q+_e`rK_dJN8uwTxZQ~2Zn?QQT8H?kQ`D^%xYWx{HEc!SJ
z<aWI=224?ANt}s|sg)W5HUy(mFos7!QrfXEb(Kg8H1=oOk)+iFNoUw^4!qlnd+g9=
zD|xd!dIDjQ_+iUI*q*`vRp@@fIOMjB1?Ln-S+$p@Qmn^9+)SpiltKw~A;nFI8!6d<
zSb%ohy$_MT9Y^~104}8-UxL>eoP{$Ps?>BbE<TIx;9?C!6kn5BYJXHt{^%hjub_yG
z7<VHDjw6dnU`%2bl5Zs57`%ly#dzZud5HJ0%tMsoYrt0o^Ahk+N?rgy2X(_M+E7@;
z7{`NGKX7%1d!WdN!LSZO!2T|(yvco<SIK=MAKnR>1{)iDD6GiD>&7-l$sQ;@2;@|f
zm^}-USazk7UBTdTe5|;EZdgp6XDDt<!ih0#F%I3eT=*5_j+Pk<l59wr1FjslA*-J<
zE+Tj8(~->kDG3<Jegq?LVpAX322t$KxRn=!KQW~78`uq>K2is{mqqZmT6OZK{9?&b
zHuEH_5$Qd<+1WDWA4|p9uP18&c5E|jD!BsEo6t`(3&;CF6L>_GYQ*6KKqF67bS9(7
z$edjVui4%3j@^Yj#kj+W-D=;IHN$*r&mRF-f^@($@bdyiI8h|#wi1v0gv2yb;u>Na
zDRB)kjg+`%cswf3qY2~?%P5{fG5n0@N#>k`6J*}~Mi#KY82$ROW5Ahwp;319U@{ox
zldEKjv%NCl+B5bTh+eT|e10$_(HX=pf-p8qOPC!blY7&T=*m!;gFMY%T_LlTtOjA^
zJE0>pqkfLW6U8!4go-(6Msh3);4)5bH%WS_FJ9Mj9t8gjI&-RxvGx<#?Lm{ls~LEC
zkh^Jgj!h)RP^6d$PNX%CfOx-CA$q}L_ClHy*_7^$hI+0Kxx=z3+sN@=26Y{SksOkd
z&04`}OKLW9RL!a4X3&7#nWImj1Rn!fuuqbDN?4qGlAFU|9{wQhJ{tRI{8|!HxEE~L
zJ(3=*%@yn_xrsKrIV-RITw!O9VkG9=r|~8V(Iuk%&;t0KC1>F$?lROSC#J5J6Egv6
zAdJ5<6%=hLG{v#8D5@ulhbIfcgj;~~#W-&trxH7KpTN0#;^0f%ccA!*Jq=yp6jk6P
z;0bm29|y=oRqvK?m7*%&39(2!vR8UhhxbHsTczZ;k|W8H-^$xnU6I^YDfz9`r&97;
zwG#^@?^Q}1T{fIO@y2;B0gdXC@Z_LK(i*14vPI(d8yrLh46es@Vq7}_TZlZ#A`i9m
zj0(yUogyDvbQ19dAv%tDjFKY6qm&3-Xh>YxnO#lZLbaw7gLkonBi96P%gG);1g4-U
z0%wS!)*^poDep{?Ld8SH@>?zO7Mlz(T|}`CMO!WmI&z^Pxg@f<E)p!2cM2Qa4CnDE
z$iyNWAiF>k1j%9WizhxRWfM>m577ffF_LXCYZqt~gCc-TmSD136uU*K@Et1yqh+Ln
z|6o;$7mx<`MZ-Z+PiG1m>SzKZ<mVIDeCp?CqhvRCcR#18h#(}XRa3~QRb;?ycnd5@
z8lmb6`~|Og>LmeS6%1NI46^7t%|}DidFVy1hObV`s#Q`AqL+LKKqIbakta8kiX4k}
z+d`YS0NDbq5MvW!tj0i^Q%<Tgi2W60VeBEmgCJu??;zR@C4<r31yz!!=%0pulC7fa
zJ49MCkF0A|s1+R1`79u<RjnG>vK1hnSm?ucKqCc(l&mP|U_1|q?M3f+H~6DsH@gt`
zvkOq7ud654t?h{!XLY)P;-Zc%#Eaenv-9JsXOgKRM;l1;>H)*fa#W*>oKPdL7<irl
z--F^YX{|ttWXs$|LjV_&@(0sF@I8ZFurose*G^*Wn2T>D^YHas(PXcGMU!@uXKUY^
zRdw12lD%vnCEHt$8=<!D5>Rm15f4B-!cXi+@dS#%sY4Y;q8?z>RFn-(bc@7|{^+Lr
zL^51w_DG#;&|BoyKqV=Ft{Yh!FTmzHI22^MI?=E}1@fpk{9E)lFkwchP~_^i1F>i+
z4Km_>Vt%70<Q;5fCrgS&lTNTb6$Q`*ZAB8WAFTU<!U>8HZZSk~i$I~!$$)`6=@cUo
zO?X71cRuoB^qA9w`Vb7aSzkfjj9@9td(y(AL?Hm{bqi^xgg}LW_;7{{@^6bLSMv3Q
zTRW0f<A8Axe40QU$naXn4*lH74e<V&8?fkeH-N)27(b3l8jHCM1=X?{ysnAB3Sl(q
zcorwuLS|C@^tey$`QsecgZZ_XC13Qc3b%A%3Tx^&X{LIfG3ef1kLgnz|CSmYgZd1O
zqicXysRjE<oa`F9y>LY78(DSXalw+2u3e5BE1Mu?KGY?P>uRZH=XAjJI&cA9Ah^t*
zatmJz(r5|<z6XUZjo@D3Lt@Dr(TfsKL=Q^bwM=6sGjSXl_jVX0^>RjE^&Ck6v?Q+h
zPmyT#>_~-aC4Rq!QM(#Wp6Iq%h`R&D-A939B~9b3p>m8Gk$=!!<Btf_WkZPY>11LZ
zMA<H~Z}9*TU6JgkyM0LZx)k>y%@av+H-01!gHwALPU!BX58K2LM_6JZk6bDF#QV??
zN<nfLmOg+bJfB>Xw1Jk!(3%gpx8lEAFxVX5OAoDvB|n3maj#nXAhCsqz2Jc-{=Nq$
zgE+^n4$S5{)`k~HnKwu9FoVD1zh2God~5WiuJ!<Qr^F31_kj4dRfe_@uMlOGuEbdA
zLeog@xC>v};h)%mFw_UPqagJb)Rdwh^hIY1Pf=&~qJI;KnKagv7c>6q<SYLV#N#7c

literal 0
HcmV?d00001

diff --git a/pkg/phlaredb/symdb/testdata/symbols/v3/index.symdb b/pkg/phlaredb/symdb/testdata/symbols/v3/index.symdb
new file mode 100644
index 0000000000000000000000000000000000000000..da922afdaaf82367ac39784e397de8ab2c557851
GIT binary patch
literal 640
zcmXS}%r#_SU|<Gf26J|QTM*5_z!U?a85BSqC>un9)iE$kh=Pdm)FR0-LfDM#KxJ<r
zY!)PG1_t)<S-&9ybFM>Zo@TfLMh2*UhCe|4FCpTn`rBlbr$WS6LhWZ>0N2k1)6WES
zBcl~I{hKEQ_d>**Rx&W~Zfss6bqMIaa|{eHd#e8f`4aKms$YQWfabu&>%@WVNn+<r
z6@X^{LWqM*tNy<|_pKRF-)<yvkY6BS#b_G>QpLak4=a!y@nOa2c@eCg0UlPMkRm>;
u7{j3U!^4UZ7^6`AM1&Qi{$i|Q#i;QUYgjRA3t|teHNbE~4Xa1$RS^KQRX<Sx

literal 0
HcmV?d00001


From c2f152600f475e5f2b08009ae0cc36dd3d480915 Mon Sep 17 00:00:00 2001
From: Anton Kolesnikov <anton.e.kolesnikov@gmail.com>
Date: Wed, 24 Apr 2024 19:34:42 +0800
Subject: [PATCH 18/36] granular versioning

---
 pkg/phlaredb/symdb/block_reader.go     | 54 ++++++++++++++++++--------
 pkg/phlaredb/symdb/block_writer.go     |  9 ++---
 pkg/phlaredb/symdb/format.go           | 36 ++++++++++++-----
 pkg/phlaredb/symdb/functions.go        | 15 +++++++
 pkg/phlaredb/symdb/locations.go        | 15 +++++++
 pkg/phlaredb/symdb/mappings.go         | 15 +++++++
 pkg/phlaredb/symdb/partition_memory.go |  5 ++-
 pkg/phlaredb/symdb/strings.go          | 15 +++++++
 pkg/phlaredb/symdb/strings_test.go     |  8 ++--
 9 files changed, 135 insertions(+), 37 deletions(-)

diff --git a/pkg/phlaredb/symdb/block_reader.go b/pkg/phlaredb/symdb/block_reader.go
index 30fdd70bac..a204dcc7c1 100644
--- a/pkg/phlaredb/symdb/block_reader.go
+++ b/pkg/phlaredb/symdb/block_reader.go
@@ -54,9 +54,12 @@ func Open(ctx context.Context, b objstore.BucketReader, m *block.Meta) (*Reader,
 	r.partitionsMap = make(map[uint64]*partition, len(r.index.PartitionHeaders))
 	r.partitions = make([]*partition, len(r.index.PartitionHeaders))
 	for i, h := range r.index.PartitionHeaders {
-		ph := r.partitionReader(h)
-		r.partitionsMap[h.Partition] = ph
-		r.partitions[i] = ph
+		var p *partition
+		if p, err = r.partitionReader(h); err != nil {
+			return nil, err
+		}
+		r.partitionsMap[h.Partition] = p
+		r.partitions[i] = p
 	}
 	return r, nil
 }
@@ -96,7 +99,7 @@ func (r *Reader) file(name string) (block.File, error) {
 	return f, nil
 }
 
-func (r *Reader) partitionReader(h *PartitionHeader) *partition {
+func (r *Reader) partitionReader(h *PartitionHeader) (*partition, error) {
 	p := &partition{reader: r}
 	switch r.index.Header.Version {
 	case FormatV1:
@@ -104,10 +107,12 @@ func (r *Reader) partitionReader(h *PartitionHeader) *partition {
 	case FormatV2:
 		p.initParquetTables(h)
 	case FormatV3:
-		p.initTables(h)
+		if err := p.initTables(h); err != nil {
+			return nil, err
+		}
 	}
 	p.initStacktraces(h.Stacktraces)
-	return p
+	return p, nil
 }
 
 var ErrPartitionNotFound = fmt.Errorf("partition not found")
@@ -201,28 +206,43 @@ func (p *partition) initParquetTables(h *PartitionHeader) {
 }
 
 // Format V3.
-func (p *partition) initTables(h *PartitionHeader) {
-	// TODO(kolesnikovae): decoder pool.
-	p.locations = &rawTable[schemav1.InMemoryLocation]{
+func (p *partition) initTables(h *PartitionHeader) (err error) {
+	locations := &rawTable[schemav1.InMemoryLocation]{
 		reader: p.reader,
 		header: h.V3.Locations,
-		dec:    newSymbolsDecoder[schemav1.InMemoryLocation](h.V3.Locations, new(locationsBlockDecoder)),
 	}
-	p.mappings = &rawTable[schemav1.InMemoryMapping]{
+	if locations.dec, err = locationsDecoder(h.V3.Locations); err != nil {
+		return err
+	}
+	p.locations = locations
+
+	mappings := &rawTable[schemav1.InMemoryMapping]{
 		reader: p.reader,
 		header: h.V3.Mappings,
-		dec:    newSymbolsDecoder[schemav1.InMemoryMapping](h.V3.Mappings, new(mappingsBlockDecoder)),
 	}
-	p.functions = &rawTable[schemav1.InMemoryFunction]{
+	if mappings.dec, err = mappingsDecoder(h.V3.Mappings); err != nil {
+		return err
+	}
+	p.mappings = mappings
+
+	functions := &rawTable[schemav1.InMemoryFunction]{
 		reader: p.reader,
 		header: h.V3.Functions,
-		dec:    newSymbolsDecoder[schemav1.InMemoryFunction](h.V3.Functions, new(functionsBlockDecoder)),
 	}
-	p.strings = &rawTable[string]{
+	if functions.dec, err = functionsDecoder(h.V3.Functions); err != nil {
+		return err
+	}
+	p.functions = functions
+
+	strings := &rawTable[string]{
 		reader: p.reader,
 		header: h.V3.Strings,
-		dec:    newSymbolsDecoder[string](h.V3.Strings, new(stringsBlockDecoder)),
 	}
+	if strings.dec, err = stringsDecoder(h.V3.Strings); err != nil {
+		return err
+	}
+	p.strings = strings
+	return nil
 }
 
 func (p *partition) Symbols() *Symbols {
@@ -436,7 +456,7 @@ func (t *rawTable[T]) readFrom(r *bufio.Reader) error {
 	crc := crc32.New(castagnoli)
 	tee := io.TeeReader(r, crc)
 	t.s = make([]T, t.header.Length)
-	if err := t.dec.Decode(t.s, tee); err != nil {
+	if err := t.dec.decode(t.s, tee); err != nil {
 		return fmt.Errorf("failed to decode symbols: %w", err)
 	}
 	if t.header.CRC != crc.Sum32() {
diff --git a/pkg/phlaredb/symdb/block_writer.go b/pkg/phlaredb/symdb/block_writer.go
index 35c8687d2e..19be26e071 100644
--- a/pkg/phlaredb/symdb/block_writer.go
+++ b/pkg/phlaredb/symdb/block_writer.go
@@ -37,11 +37,10 @@ func newWriter(c *Config) *writer {
 			},
 		},
 
-		// TODO(kolesnikovae): encoder pool.
-		stringsEncoder:   newSymbolsEncoder[string](new(stringsBlockEncoder)),
-		mappingsEncoder:  newSymbolsEncoder[v1.InMemoryMapping](new(mappingsBlockEncoder)),
-		functionsEncoder: newSymbolsEncoder[v1.InMemoryFunction](new(functionsBlockEncoder)),
-		locationsEncoder: newSymbolsEncoder[v1.InMemoryLocation](new(locationsBlockEncoder)),
+		stringsEncoder:   newStringsEncoder(),
+		mappingsEncoder:  newMappingsEncoder(),
+		functionsEncoder: newFunctionsEncoder(),
+		locationsEncoder: newLocationsEncoder(),
 	}
 }
 
diff --git a/pkg/phlaredb/symdb/format.go b/pkg/phlaredb/symdb/format.go
index 0164c77ba0..81bda15889 100644
--- a/pkg/phlaredb/symdb/format.go
+++ b/pkg/phlaredb/symdb/format.go
@@ -350,8 +350,20 @@ type SymbolsBlockHeader struct {
 	Length uint32
 	// BlockSize denotes the number of items per block.
 	BlockSize uint32
+	// Format of the encoded data.
+	Format SymbolsBlockFormat
 }
 
+type SymbolsBlockFormat uint32
+
+const (
+	_ SymbolsBlockFormat = iota
+	BlockLocationsV1
+	BlockFunctionsV1
+	BlockMappingsV1
+	BlockStringsV1
+)
+
 const symbolsBlockReferenceSize = int(unsafe.Sizeof(SymbolsBlockHeader{}))
 
 func (h *SymbolsBlockHeader) marshal(b []byte) {
@@ -360,6 +372,7 @@ func (h *SymbolsBlockHeader) marshal(b []byte) {
 	binary.BigEndian.PutUint32(b[12:16], h.CRC)
 	binary.BigEndian.PutUint32(b[16:20], h.Length)
 	binary.BigEndian.PutUint32(b[20:24], h.BlockSize)
+	binary.BigEndian.PutUint32(b[24:28], uint32(h.Format))
 }
 
 func (h *SymbolsBlockHeader) unmarshal(b []byte) {
@@ -368,6 +381,7 @@ func (h *SymbolsBlockHeader) unmarshal(b []byte) {
 	h.CRC = binary.BigEndian.Uint32(b[12:16])
 	h.Length = binary.BigEndian.Uint32(b[16:20])
 	h.BlockSize = binary.BigEndian.Uint32(b[20:24])
+	h.Format = SymbolsBlockFormat(binary.BigEndian.Uint32(b[24:28]))
 }
 
 func marshalSymbolsBlockReferences(b []byte, refs ...SymbolsBlockHeader) int {
@@ -589,31 +603,35 @@ func (h *StacktraceBlockHeader) unmarshal(b []byte) {
 
 type symbolsBlockEncoder[T any] interface {
 	encode(w io.Writer, block []T) error
+	format() SymbolsBlockFormat
 }
 
 type symbolsEncoder[T any] struct {
-	e  symbolsBlockEncoder[T]
-	bs int
+	blockEncoder symbolsBlockEncoder[T]
+	blockSize    int
 }
 
 const defaultSymbolsBlockSize = 1 << 10
 
 func newSymbolsEncoder[T any](e symbolsBlockEncoder[T]) *symbolsEncoder[T] {
-	return &symbolsEncoder[T]{e: e, bs: defaultSymbolsBlockSize}
+	return &symbolsEncoder[T]{blockEncoder: e, blockSize: defaultSymbolsBlockSize}
 }
 
-func (e *symbolsEncoder[T]) Encode(w io.Writer, items []T) (err error) {
+func (e *symbolsEncoder[T]) encode(w io.Writer, items []T) (err error) {
 	l := len(items)
-	for i := 0; i < l; i += e.bs {
-		block := items[i:math.Min(i+e.bs, l)]
-		if err = e.e.encode(w, block); err != nil {
+	for i := 0; i < l; i += e.blockSize {
+		block := items[i:math.Min(i+e.blockSize, l)]
+		if err = e.blockEncoder.encode(w, block); err != nil {
 			return err
 		}
 	}
 	return nil
 }
 
-// TODO: args order
+func (e *symbolsEncoder[T]) format() SymbolsBlockFormat {
+	return e.blockEncoder.format()
+}
+
 type symbolsBlockDecoder[T any] interface {
 	decode(r io.Reader, dst []T) error
 }
@@ -627,7 +645,7 @@ func newSymbolsDecoder[T any](h SymbolsBlockHeader, d symbolsBlockDecoder[T]) *s
 	return &symbolsDecoder[T]{h: h, d: d}
 }
 
-func (d *symbolsDecoder[T]) Decode(dst []T, r io.Reader) error {
+func (d *symbolsDecoder[T]) decode(dst []T, r io.Reader) error {
 	if d.h.BlockSize == 0 || d.h.Length == 0 {
 		return nil
 	}
diff --git a/pkg/phlaredb/symdb/functions.go b/pkg/phlaredb/symdb/functions.go
index ef2de67237..14c09ee4b6 100644
--- a/pkg/phlaredb/symdb/functions.go
+++ b/pkg/phlaredb/symdb/functions.go
@@ -57,6 +57,14 @@ type functionsBlockEncoder struct {
 	ints []int32
 }
 
+func newFunctionsEncoder() *symbolsEncoder[v1.InMemoryFunction] {
+	return newSymbolsEncoder[v1.InMemoryFunction](new(functionsBlockEncoder))
+}
+
+func (e *functionsBlockEncoder) format() SymbolsBlockFormat {
+	return BlockFunctionsV1
+}
+
 func (e *functionsBlockEncoder) encode(w io.Writer, functions []v1.InMemoryFunction) error {
 	e.initWrite(len(functions))
 	var enc delta.BinaryPackedEncoding
@@ -118,6 +126,13 @@ type functionsBlockDecoder struct {
 	tmp  []byte
 }
 
+func functionsDecoder(h SymbolsBlockHeader) (*symbolsDecoder[v1.InMemoryFunction], error) {
+	if h.Format == BlockFunctionsV1 {
+		return newSymbolsDecoder[v1.InMemoryFunction](h, new(functionsBlockDecoder)), nil
+	}
+	return nil, fmt.Errorf("%w: unknown functions format: %d", ErrUnknownVersion, h.Format)
+}
+
 func (d *functionsBlockDecoder) readHeader(r io.Reader) error {
 	d.tmp = slices.GrowLen(d.tmp, functionsBlockHeaderSize)
 	if _, err := io.ReadFull(r, d.tmp); err != nil {
diff --git a/pkg/phlaredb/symdb/locations.go b/pkg/phlaredb/symdb/locations.go
index d0a1665f34..8d93aac522 100644
--- a/pkg/phlaredb/symdb/locations.go
+++ b/pkg/phlaredb/symdb/locations.go
@@ -74,6 +74,14 @@ type locationsBlockEncoder struct {
 	buf bytes.Buffer
 }
 
+func newLocationsEncoder() *symbolsEncoder[v1.InMemoryLocation] {
+	return newSymbolsEncoder[v1.InMemoryLocation](new(locationsBlockEncoder))
+}
+
+func (e *locationsBlockEncoder) format() SymbolsBlockFormat {
+	return BlockLocationsV1
+}
+
 func (e *locationsBlockEncoder) encode(w io.Writer, locations []v1.InMemoryLocation) error {
 	e.initWrite(len(locations))
 	var addr int64
@@ -162,6 +170,13 @@ type locationsBlockDecoder struct {
 	tmp []byte
 }
 
+func locationsDecoder(h SymbolsBlockHeader) (*symbolsDecoder[v1.InMemoryLocation], error) {
+	if h.Format == BlockLocationsV1 {
+		return newSymbolsDecoder[v1.InMemoryLocation](h, new(locationsBlockDecoder)), nil
+	}
+	return nil, fmt.Errorf("%w: unknown locations format: %d", ErrUnknownVersion, h.Format)
+}
+
 func (d *locationsBlockDecoder) readHeader(r io.Reader) error {
 	d.tmp = slices.GrowLen(d.tmp, locationsBlockHeaderSize)
 	if _, err := io.ReadFull(r, d.tmp); err != nil {
diff --git a/pkg/phlaredb/symdb/mappings.go b/pkg/phlaredb/symdb/mappings.go
index fbbac30f05..c77ad8bd88 100644
--- a/pkg/phlaredb/symdb/mappings.go
+++ b/pkg/phlaredb/symdb/mappings.go
@@ -65,6 +65,14 @@ type mappingsBlockEncoder struct {
 	ints64 []int64
 }
 
+func newMappingsEncoder() *symbolsEncoder[v1.InMemoryMapping] {
+	return newSymbolsEncoder[v1.InMemoryMapping](new(mappingsBlockEncoder))
+}
+
+func (e *mappingsBlockEncoder) format() SymbolsBlockFormat {
+	return BlockMappingsV1
+}
+
 func (e *mappingsBlockEncoder) encode(w io.Writer, mappings []v1.InMemoryMapping) error {
 	e.initWrite(len(mappings))
 	var enc delta.BinaryPackedEncoding
@@ -167,6 +175,13 @@ type mappingsBlockDecoder struct {
 	tmp    []byte
 }
 
+func mappingsDecoder(h SymbolsBlockHeader) (*symbolsDecoder[v1.InMemoryMapping], error) {
+	if h.Format == BlockMappingsV1 {
+		return newSymbolsDecoder[v1.InMemoryMapping](h, new(mappingsBlockDecoder)), nil
+	}
+	return nil, fmt.Errorf("%w: unknown mappings format: %d", ErrUnknownVersion, h.Format)
+}
+
 func (d *mappingsBlockDecoder) readHeader(r io.Reader) error {
 	d.tmp = slices.GrowLen(d.tmp, mappingsBlockHeaderSize)
 	if _, err := io.ReadFull(r, d.tmp); err != nil {
diff --git a/pkg/phlaredb/symdb/partition_memory.go b/pkg/phlaredb/symdb/partition_memory.go
index 2ba82073a0..ab44852dc0 100644
--- a/pkg/phlaredb/symdb/partition_memory.go
+++ b/pkg/phlaredb/symdb/partition_memory.go
@@ -408,12 +408,13 @@ func writeSymbolsBlock[T any](w *fileWriter, s []T, e *symbolsEncoder[T]) (h Sym
 	h.Offset = uint64(w.w.offset)
 	crc := crc32.New(castagnoli)
 	mw := io.MultiWriter(crc, w.w)
-	if err = e.Encode(mw, s); err != nil {
+	if err = e.encode(mw, s); err != nil {
 		return h, err
 	}
 	h.Size = uint32(w.w.offset) - uint32(h.Offset)
 	h.CRC = crc.Sum32()
 	h.Length = uint32(len(s))
-	h.BlockSize = uint32(e.bs)
+	h.BlockSize = uint32(e.blockSize)
+	h.Format = e.format()
 	return h, nil
 }
diff --git a/pkg/phlaredb/symdb/strings.go b/pkg/phlaredb/symdb/strings.go
index b488284646..bc27701c47 100644
--- a/pkg/phlaredb/symdb/strings.go
+++ b/pkg/phlaredb/symdb/strings.go
@@ -42,6 +42,14 @@ type stringsBlockEncoder struct {
 	tmp    []byte
 }
 
+func newStringsEncoder() *symbolsEncoder[string] {
+	return newSymbolsEncoder[string](new(stringsBlockEncoder))
+}
+
+func (e *stringsBlockEncoder) format() SymbolsBlockFormat {
+	return BlockStringsV1
+}
+
 func (e *stringsBlockEncoder) encode(w io.Writer, strings []string) error {
 	e.initWrite(len(strings))
 	e.header.BlockEncoding = e.blockEncoding(strings)
@@ -99,6 +107,13 @@ type stringsBlockDecoder struct {
 	tmp    []byte
 }
 
+func stringsDecoder(h SymbolsBlockHeader) (*symbolsDecoder[string], error) {
+	if h.Format == BlockStringsV1 {
+		return newSymbolsDecoder[string](h, new(stringsBlockDecoder)), nil
+	}
+	return nil, fmt.Errorf("%w: unknown strings format: %d", ErrUnknownVersion, h.Format)
+}
+
 func (d *stringsBlockDecoder) readHeader(r io.Reader) error {
 	d.tmp = slices.GrowLen(d.tmp, stringsBlockHeaderSize)
 	if _, err := io.ReadFull(r, d.tmp); err != nil {
diff --git a/pkg/phlaredb/symdb/strings_test.go b/pkg/phlaredb/symdb/strings_test.go
index 49c306c408..f462886043 100644
--- a/pkg/phlaredb/symdb/strings_test.go
+++ b/pkg/phlaredb/symdb/strings_test.go
@@ -80,18 +80,18 @@ func Test_StringsEncoding(t *testing.T) {
 			var buf bytes.Buffer
 			e := newSymbolsEncoder[string](new(stringsBlockEncoder))
 			if tc.blockSize > 0 {
-				e.bs = tc.blockSize
+				e.blockSize = tc.blockSize
 			}
-			require.NoError(t, e.Encode(&buf, tc.strings))
+			require.NoError(t, e.encode(&buf, tc.strings))
 
 			h := SymbolsBlockHeader{
 				Length:    uint32(len(tc.strings)),
-				BlockSize: uint32(e.bs),
+				BlockSize: uint32(e.blockSize),
 			}
 			d := newSymbolsDecoder[string](h, new(stringsBlockDecoder))
 
 			out := make([]string, h.Length)
-			require.NoError(t, d.Decode(out, &buf))
+			require.NoError(t, d.decode(out, &buf))
 			require.Equal(t, tc.strings, out)
 		})
 	}

From 3df24259a1bff49e09b598f8624668a8e7c0ced4 Mon Sep 17 00:00:00 2001
From: Anton Kolesnikov <anton.e.kolesnikov@gmail.com>
Date: Wed, 24 Apr 2024 20:04:06 +0800
Subject: [PATCH 19/36] small improvements

---
 pkg/phlaredb/symdb/functions.go               |  37 ++++++-------
 pkg/phlaredb/symdb/locations.go               |  37 ++++++-------
 pkg/phlaredb/symdb/mappings.go                |  49 +++++++++---------
 pkg/phlaredb/symdb/strings.go                 |  39 ++++++++------
 .../symdb/testdata/symbols/v3/index.symdb     | Bin 640 -> 704 bytes
 5 files changed, 87 insertions(+), 75 deletions(-)

diff --git a/pkg/phlaredb/symdb/functions.go b/pkg/phlaredb/symdb/functions.go
index 14c09ee4b6..71696339c2 100644
--- a/pkg/phlaredb/symdb/functions.go
+++ b/pkg/phlaredb/symdb/functions.go
@@ -120,26 +120,27 @@ func (e *functionsBlockEncoder) initWrite(functions int) {
 }
 
 type functionsBlockDecoder struct {
+	format SymbolsBlockFormat
 	header functionsBlockHeader
 
 	ints []int32
-	tmp  []byte
+	buf  []byte
 }
 
 func functionsDecoder(h SymbolsBlockHeader) (*symbolsDecoder[v1.InMemoryFunction], error) {
 	if h.Format == BlockFunctionsV1 {
-		return newSymbolsDecoder[v1.InMemoryFunction](h, new(functionsBlockDecoder)), nil
+		return newSymbolsDecoder[v1.InMemoryFunction](h, &functionsBlockDecoder{format: h.Format}), nil
 	}
 	return nil, fmt.Errorf("%w: unknown functions format: %d", ErrUnknownVersion, h.Format)
 }
 
 func (d *functionsBlockDecoder) readHeader(r io.Reader) error {
-	d.tmp = slices.GrowLen(d.tmp, functionsBlockHeaderSize)
-	if _, err := io.ReadFull(r, d.tmp); err != nil {
+	d.buf = slices.GrowLen(d.buf, functionsBlockHeaderSize)
+	if _, err := io.ReadFull(r, d.buf); err != nil {
 		return nil
 	}
-	d.header.unmarshal(d.tmp)
-	if crc32.Checksum(d.tmp[:functionsBlockHeaderSize-4], castagnoli) != d.header.CRC {
+	d.header.unmarshal(d.buf)
+	if crc32.Checksum(d.buf[:functionsBlockHeaderSize-4], castagnoli) != d.header.CRC {
 		return ErrInvalidSize
 	}
 	return nil
@@ -155,11 +156,11 @@ func (d *functionsBlockDecoder) decode(r io.Reader, functions []v1.InMemoryFunct
 
 	var enc delta.BinaryPackedEncoding
 	d.ints = slices.GrowLen(d.ints, int(d.header.FunctionsLen))
-	d.tmp = slices.GrowLen(d.tmp, int(d.header.NameSize))
-	if _, err = io.ReadFull(r, d.tmp); err != nil {
+	d.buf = slices.GrowLen(d.buf, int(d.header.NameSize))
+	if _, err = io.ReadFull(r, d.buf); err != nil {
 		return err
 	}
-	d.ints, err = enc.DecodeInt32(d.ints, d.tmp)
+	d.ints, err = enc.DecodeInt32(d.ints, d.buf)
 	if err != nil {
 		return err
 	}
@@ -167,11 +168,11 @@ func (d *functionsBlockDecoder) decode(r io.Reader, functions []v1.InMemoryFunct
 		functions[i].Name = uint32(v)
 	}
 
-	d.tmp = slices.GrowLen(d.tmp, int(d.header.SystemNameSize))
-	if _, err = io.ReadFull(r, d.tmp); err != nil {
+	d.buf = slices.GrowLen(d.buf, int(d.header.SystemNameSize))
+	if _, err = io.ReadFull(r, d.buf); err != nil {
 		return err
 	}
-	d.ints, err = enc.DecodeInt32(d.ints, d.tmp)
+	d.ints, err = enc.DecodeInt32(d.ints, d.buf)
 	if err != nil {
 		return err
 	}
@@ -179,11 +180,11 @@ func (d *functionsBlockDecoder) decode(r io.Reader, functions []v1.InMemoryFunct
 		functions[i].SystemName = uint32(v)
 	}
 
-	d.tmp = slices.GrowLen(d.tmp, int(d.header.FileNameSize))
-	if _, err = io.ReadFull(r, d.tmp); err != nil {
+	d.buf = slices.GrowLen(d.buf, int(d.header.FileNameSize))
+	if _, err = io.ReadFull(r, d.buf); err != nil {
 		return err
 	}
-	d.ints, err = enc.DecodeInt32(d.ints, d.tmp)
+	d.ints, err = enc.DecodeInt32(d.ints, d.buf)
 	if err != nil {
 		return err
 	}
@@ -191,11 +192,11 @@ func (d *functionsBlockDecoder) decode(r io.Reader, functions []v1.InMemoryFunct
 		functions[i].Filename = uint32(v)
 	}
 
-	d.tmp = slices.GrowLen(d.tmp, int(d.header.StartLineSize))
-	if _, err = io.ReadFull(r, d.tmp); err != nil {
+	d.buf = slices.GrowLen(d.buf, int(d.header.StartLineSize))
+	if _, err = io.ReadFull(r, d.buf); err != nil {
 		return err
 	}
-	d.ints, err = enc.DecodeInt32(d.ints, d.tmp)
+	d.ints, err = enc.DecodeInt32(d.ints, d.buf)
 	if err != nil {
 		return err
 	}
diff --git a/pkg/phlaredb/symdb/locations.go b/pkg/phlaredb/symdb/locations.go
index 8d93aac522..653fbae4a8 100644
--- a/pkg/phlaredb/symdb/locations.go
+++ b/pkg/phlaredb/symdb/locations.go
@@ -158,6 +158,7 @@ func (e *locationsBlockEncoder) initWrite(locations int) {
 }
 
 type locationsBlockDecoder struct {
+	format SymbolsBlockFormat
 	header locationsBlockHeader
 
 	mappings  []int32
@@ -167,23 +168,23 @@ type locationsBlockDecoder struct {
 	address []int64
 	folded  []bool
 
-	tmp []byte
+	buf []byte
 }
 
 func locationsDecoder(h SymbolsBlockHeader) (*symbolsDecoder[v1.InMemoryLocation], error) {
 	if h.Format == BlockLocationsV1 {
-		return newSymbolsDecoder[v1.InMemoryLocation](h, new(locationsBlockDecoder)), nil
+		return newSymbolsDecoder[v1.InMemoryLocation](h, &locationsBlockDecoder{format: h.Format}), nil
 	}
 	return nil, fmt.Errorf("%w: unknown locations format: %d", ErrUnknownVersion, h.Format)
 }
 
 func (d *locationsBlockDecoder) readHeader(r io.Reader) error {
-	d.tmp = slices.GrowLen(d.tmp, locationsBlockHeaderSize)
-	if _, err := io.ReadFull(r, d.tmp); err != nil {
+	d.buf = slices.GrowLen(d.buf, locationsBlockHeaderSize)
+	if _, err := io.ReadFull(r, d.buf); err != nil {
 		return err
 	}
-	d.header.unmarshal(d.tmp)
-	if crc32.Checksum(d.tmp[:locationsBlockHeaderSize-4], castagnoli) != d.header.CRC {
+	d.header.unmarshal(d.buf)
+	if crc32.Checksum(d.buf[:locationsBlockHeaderSize-4], castagnoli) != d.header.CRC {
 		return ErrInvalidCRC
 	}
 	return nil
@@ -199,11 +200,11 @@ func (d *locationsBlockDecoder) decode(r io.Reader, locations []v1.InMemoryLocat
 
 	var enc delta.BinaryPackedEncoding
 	// First we decode mapping_id and assign them to locations.
-	d.tmp = slices.GrowLen(d.tmp, int(d.header.MappingSize))
-	if _, err = io.ReadFull(r, d.tmp); err != nil {
+	d.buf = slices.GrowLen(d.buf, int(d.header.MappingSize))
+	if _, err = io.ReadFull(r, d.buf); err != nil {
 		return err
 	}
-	d.mappings, err = enc.DecodeInt32(d.mappings, d.tmp)
+	d.mappings, err = enc.DecodeInt32(d.mappings, d.buf)
 	if err != nil {
 		return err
 	}
@@ -219,12 +220,12 @@ func (d *locationsBlockDecoder) decode(r io.Reader, locations []v1.InMemoryLocat
 	// sub-slices. But it has to be allocated as we can't
 	// reference d.lines, which is reusable.
 	lines := make([]v1.InMemoryLine, d.header.LinesLen)
-	d.tmp = slices.GrowLen(d.tmp, int(d.header.LinesSize))
-	if _, err = io.ReadFull(r, d.tmp); err != nil {
+	d.buf = slices.GrowLen(d.buf, int(d.header.LinesSize))
+	if _, err = io.ReadFull(r, d.buf); err != nil {
 		return err
 	}
 	d.lines = slices.GrowLen(d.lines, int(d.header.LinesLen))
-	d.lines, err = enc.DecodeInt32(d.lines, d.tmp)
+	d.lines, err = enc.DecodeInt32(d.lines, d.buf)
 	if err != nil {
 		return err
 	}
@@ -244,23 +245,23 @@ func (d *locationsBlockDecoder) decode(r io.Reader, locations []v1.InMemoryLocat
 
 	// Otherwise, inspect all the optional fields.
 	if int(d.header.AddrSize) > 0 {
-		d.tmp = slices.GrowLen(d.tmp, int(d.header.AddrSize))
-		if _, err = io.ReadFull(r, d.tmp); err != nil {
+		d.buf = slices.GrowLen(d.buf, int(d.header.AddrSize))
+		if _, err = io.ReadFull(r, d.buf); err != nil {
 			return err
 		}
 		d.address = slices.GrowLen(d.address, int(d.header.LocationsLen))
-		d.address, err = enc.DecodeInt64(d.address, d.tmp)
+		d.address, err = enc.DecodeInt64(d.address, d.buf)
 		if err != nil {
 			return err
 		}
 	}
 	if int(d.header.IsFoldedSize) > 0 {
-		d.tmp = slices.GrowLen(d.tmp, int(d.header.IsFoldedSize))
-		if _, err = io.ReadFull(r, d.tmp); err != nil {
+		d.buf = slices.GrowLen(d.buf, int(d.header.IsFoldedSize))
+		if _, err = io.ReadFull(r, d.buf); err != nil {
 			return err
 		}
 		d.folded = slices.GrowLen(d.folded, int(d.header.LocationsLen))
-		decodeBoolean(d.folded, d.tmp)
+		decodeBoolean(d.folded, d.buf)
 	}
 
 	var o int // Offset within the lines slice.
diff --git a/pkg/phlaredb/symdb/mappings.go b/pkg/phlaredb/symdb/mappings.go
index c77ad8bd88..7eede61b4f 100644
--- a/pkg/phlaredb/symdb/mappings.go
+++ b/pkg/phlaredb/symdb/mappings.go
@@ -168,27 +168,28 @@ func (e *mappingsBlockEncoder) initWrite(mappings int) {
 }
 
 type mappingsBlockDecoder struct {
+	format SymbolsBlockFormat
 	header mappingsBlockHeader
 
 	ints   []int32
 	ints64 []int64
-	tmp    []byte
+	buf    []byte
 }
 
 func mappingsDecoder(h SymbolsBlockHeader) (*symbolsDecoder[v1.InMemoryMapping], error) {
 	if h.Format == BlockMappingsV1 {
-		return newSymbolsDecoder[v1.InMemoryMapping](h, new(mappingsBlockDecoder)), nil
+		return newSymbolsDecoder[v1.InMemoryMapping](h, &mappingsBlockDecoder{format: h.Format}), nil
 	}
 	return nil, fmt.Errorf("%w: unknown mappings format: %d", ErrUnknownVersion, h.Format)
 }
 
 func (d *mappingsBlockDecoder) readHeader(r io.Reader) error {
-	d.tmp = slices.GrowLen(d.tmp, mappingsBlockHeaderSize)
-	if _, err := io.ReadFull(r, d.tmp); err != nil {
+	d.buf = slices.GrowLen(d.buf, mappingsBlockHeaderSize)
+	if _, err := io.ReadFull(r, d.buf); err != nil {
 		return nil
 	}
-	d.header.unmarshal(d.tmp)
-	if crc32.Checksum(d.tmp[:mappingsBlockHeaderSize-4], castagnoli) != d.header.CRC {
+	d.header.unmarshal(d.buf)
+	if crc32.Checksum(d.buf[:mappingsBlockHeaderSize-4], castagnoli) != d.header.CRC {
 		return ErrInvalidCRC
 	}
 	return nil
@@ -205,11 +206,11 @@ func (d *mappingsBlockDecoder) decode(r io.Reader, mappings []v1.InMemoryMapping
 	var enc delta.BinaryPackedEncoding
 	d.ints = slices.GrowLen(d.ints, int(d.header.MappingsLen))
 
-	d.tmp = slices.GrowLen(d.tmp, int(d.header.FileNameSize))
-	if _, err = io.ReadFull(r, d.tmp); err != nil {
+	d.buf = slices.GrowLen(d.buf, int(d.header.FileNameSize))
+	if _, err = io.ReadFull(r, d.buf); err != nil {
 		return err
 	}
-	d.ints, err = enc.DecodeInt32(d.ints, d.tmp)
+	d.ints, err = enc.DecodeInt32(d.ints, d.buf)
 	if err != nil {
 		return err
 	}
@@ -217,11 +218,11 @@ func (d *mappingsBlockDecoder) decode(r io.Reader, mappings []v1.InMemoryMapping
 		mappings[i].Filename = uint32(v)
 	}
 
-	d.tmp = slices.GrowLen(d.tmp, int(d.header.BuildIDSize))
-	if _, err = io.ReadFull(r, d.tmp); err != nil {
+	d.buf = slices.GrowLen(d.buf, int(d.header.BuildIDSize))
+	if _, err = io.ReadFull(r, d.buf); err != nil {
 		return err
 	}
-	d.ints, err = enc.DecodeInt32(d.ints, d.tmp)
+	d.ints, err = enc.DecodeInt32(d.ints, d.buf)
 	if err != nil {
 		return err
 	}
@@ -229,11 +230,11 @@ func (d *mappingsBlockDecoder) decode(r io.Reader, mappings []v1.InMemoryMapping
 		mappings[i].BuildId = uint32(v)
 	}
 
-	d.tmp = slices.GrowLen(d.tmp, int(d.header.FlagsSize))
-	if _, err = io.ReadFull(r, d.tmp); err != nil {
+	d.buf = slices.GrowLen(d.buf, int(d.header.FlagsSize))
+	if _, err = io.ReadFull(r, d.buf); err != nil {
 		return err
 	}
-	d.ints, err = enc.DecodeInt32(d.ints, d.tmp)
+	d.ints, err = enc.DecodeInt32(d.ints, d.buf)
 	if err != nil {
 		return err
 	}
@@ -246,11 +247,11 @@ func (d *mappingsBlockDecoder) decode(r io.Reader, mappings []v1.InMemoryMapping
 
 	if d.header.MemoryStartSize > 0 {
 		d.ints64 = slices.GrowLen(d.ints64, int(d.header.MappingsLen))
-		d.tmp = slices.GrowLen(d.tmp, int(d.header.MemoryStartSize))
-		if _, err = io.ReadFull(r, d.tmp); err != nil {
+		d.buf = slices.GrowLen(d.buf, int(d.header.MemoryStartSize))
+		if _, err = io.ReadFull(r, d.buf); err != nil {
 			return err
 		}
-		d.ints64, err = enc.DecodeInt64(d.ints64, d.tmp)
+		d.ints64, err = enc.DecodeInt64(d.ints64, d.buf)
 		if err != nil {
 			return err
 		}
@@ -260,11 +261,11 @@ func (d *mappingsBlockDecoder) decode(r io.Reader, mappings []v1.InMemoryMapping
 	}
 	if d.header.MemoryLimitSize > 0 {
 		d.ints64 = slices.GrowLen(d.ints64, int(d.header.MappingsLen))
-		d.tmp = slices.GrowLen(d.tmp, int(d.header.MemoryLimitSize))
-		if _, err = io.ReadFull(r, d.tmp); err != nil {
+		d.buf = slices.GrowLen(d.buf, int(d.header.MemoryLimitSize))
+		if _, err = io.ReadFull(r, d.buf); err != nil {
 			return err
 		}
-		d.ints64, err = enc.DecodeInt64(d.ints64, d.tmp)
+		d.ints64, err = enc.DecodeInt64(d.ints64, d.buf)
 		if err != nil {
 			return err
 		}
@@ -274,11 +275,11 @@ func (d *mappingsBlockDecoder) decode(r io.Reader, mappings []v1.InMemoryMapping
 	}
 	if d.header.FileOffsetSize > 0 {
 		d.ints64 = slices.GrowLen(d.ints64, int(d.header.MappingsLen))
-		d.tmp = slices.GrowLen(d.tmp, int(d.header.FileOffsetSize))
-		if _, err = io.ReadFull(r, d.tmp); err != nil {
+		d.buf = slices.GrowLen(d.buf, int(d.header.FileOffsetSize))
+		if _, err = io.ReadFull(r, d.buf); err != nil {
 			return err
 		}
-		d.ints64, err = enc.DecodeInt64(d.ints64, d.tmp)
+		d.ints64, err = enc.DecodeInt64(d.ints64, d.buf)
 		if err != nil {
 			return err
 		}
diff --git a/pkg/phlaredb/symdb/strings.go b/pkg/phlaredb/symdb/strings.go
index bc27701c47..4400218f61 100644
--- a/pkg/phlaredb/symdb/strings.go
+++ b/pkg/phlaredb/symdb/strings.go
@@ -4,6 +4,7 @@ import (
 	"bytes"
 	"encoding/binary"
 	"fmt"
+	"hash/crc32"
 	"io"
 	"unsafe"
 
@@ -24,16 +25,20 @@ type stringsBlockHeader struct {
 	StringsLen    uint32
 	BlockEncoding byte
 	_             [3]byte
+	CRC           uint32
 }
 
 func (h *stringsBlockHeader) marshal(b []byte) {
 	binary.BigEndian.PutUint32(b[0:4], h.StringsLen)
-	b[5] = h.BlockEncoding
+	b[5], b[6], b[7], b[8] = h.BlockEncoding, 0, 0, 0
+	h.CRC = crc32.Checksum(b[0:8], castagnoli)
+	binary.BigEndian.PutUint32(b[8:12], h.CRC)
 }
 
 func (h *stringsBlockHeader) unmarshal(b []byte) {
 	h.StringsLen = binary.BigEndian.Uint32(b[0:4])
 	h.BlockEncoding = b[5]
+	h.CRC = binary.BigEndian.Uint32(b[8:12])
 }
 
 type stringsBlockEncoder struct {
@@ -103,27 +108,31 @@ func (e *stringsBlockEncoder) initWrite(strings int) {
 }
 
 type stringsBlockDecoder struct {
+	format SymbolsBlockFormat
 	header stringsBlockHeader
-	tmp    []byte
+	buf    []byte
 }
 
 func stringsDecoder(h SymbolsBlockHeader) (*symbolsDecoder[string], error) {
 	if h.Format == BlockStringsV1 {
-		return newSymbolsDecoder[string](h, new(stringsBlockDecoder)), nil
+		return newSymbolsDecoder[string](h, &stringsBlockDecoder{format: h.Format}), nil
 	}
 	return nil, fmt.Errorf("%w: unknown strings format: %d", ErrUnknownVersion, h.Format)
 }
 
 func (d *stringsBlockDecoder) readHeader(r io.Reader) error {
-	d.tmp = slices.GrowLen(d.tmp, stringsBlockHeaderSize)
-	if _, err := io.ReadFull(r, d.tmp); err != nil {
+	d.buf = slices.GrowLen(d.buf, stringsBlockHeaderSize)
+	if _, err := io.ReadFull(r, d.buf); err != nil {
 		return err
 	}
-	d.header.unmarshal(d.tmp)
-	if d.header.BlockEncoding == 8 || d.header.BlockEncoding == 16 {
-		return nil
+	d.header.unmarshal(d.buf)
+	if crc32.Checksum(d.buf[:stringsBlockHeaderSize-4], castagnoli) != d.header.CRC {
+		return ErrInvalidCRC
+	}
+	if d.header.BlockEncoding != 8 && d.header.BlockEncoding != 16 {
+		return fmt.Errorf("invalid string block encoding: %d", d.header.BlockEncoding)
 	}
-	return fmt.Errorf("invalid string block encoding: %d", d.header.BlockEncoding)
+	return nil
 }
 
 func (d *stringsBlockDecoder) decode(r io.Reader, strings []string) (err error) {
@@ -140,12 +149,12 @@ func (d *stringsBlockDecoder) decode(r io.Reader, strings []string) (err error)
 }
 
 func (d *stringsBlockDecoder) decodeStrings8(r io.Reader, dst []string) (err error) {
-	d.tmp = slices.GrowLen(d.tmp, len(dst)) // 1 byte per string.
-	if _, err = io.ReadFull(r, d.tmp); err != nil {
+	d.buf = slices.GrowLen(d.buf, len(dst)) // 1 byte per string.
+	if _, err = io.ReadFull(r, d.buf); err != nil {
 		return err
 	}
 	for i := 0; i < len(dst); i++ {
-		s := make([]byte, d.tmp[i])
+		s := make([]byte, d.buf[i])
 		if _, err = io.ReadFull(r, s); err != nil {
 			return err
 		}
@@ -155,12 +164,12 @@ func (d *stringsBlockDecoder) decodeStrings8(r io.Reader, dst []string) (err err
 }
 
 func (d *stringsBlockDecoder) decodeStrings16(r io.Reader, dst []string) (err error) {
-	d.tmp = slices.GrowLen(d.tmp, len(dst)*2) // 2 bytes per string.
-	if _, err = io.ReadFull(r, d.tmp); err != nil {
+	d.buf = slices.GrowLen(d.buf, len(dst)*2) // 2 bytes per string.
+	if _, err = io.ReadFull(r, d.buf); err != nil {
 		return err
 	}
 	for i := 0; i < len(dst); i++ {
-		l := binary.BigEndian.Uint16(d.tmp[i*2:])
+		l := binary.BigEndian.Uint16(d.buf[i*2:])
 		s := make([]byte, l)
 		if _, err = io.ReadFull(r, s); err != nil {
 			return err
diff --git a/pkg/phlaredb/symdb/testdata/symbols/v3/index.symdb b/pkg/phlaredb/symdb/testdata/symbols/v3/index.symdb
index da922afdaaf82367ac39784e397de8ab2c557851..7eb2bf119673f22c8a86ea5921c1272fec00df68 100644
GIT binary patch
literal 704
zcmXS}%r#_SU|<Gf26J|QTM*5_z%&O!Gbn&KP&SAHt7Bl85C;+CsYQ}wgs>Uefy&-M
z*epoW3=HhyvwlMa=G=zRJk4+gj0{ly41a+7UqZxD^|#3=PlbrDgxb%%0Ir`2rk@Gu
zMn)@a`ZrGq?uCdqtz}@~-PpWD>JZR-=NK5k_Q321DyadQ!6XsSt@;JX0kXmJKpF<>
zq=90S#Lk&20PXq(l}8RIpu!sF#g?wX@MPKzl}Gjr$S#O^jJ7clQ{iC=l7qU1h_Gbz
zyb0Cc455K~NeN5FFsS|Tuw(?rIMjY3!je&cHQumf)C8J^CoCDYCGm&lnt!;$QcU%*
FHvkTYIhg<e

literal 640
zcmXS}%r#_SU|<Gf26J|QTM*5_z!U?a85BSqC>un9)iE$kh=Pdm)FR0-LfDM#KxJ<r
zY!)PG1_t)<S-&9ybFM>Zo@TfLMh2*UhCe|4FCpTn`rBlbr$WS6LhWZ>0N2k1)6WES
zBcl~I{hKEQ_d>**Rx&W~Zfss6bqMIaa|{eHd#e8f`4aKms$YQWfabu&>%@WVNn+<r
z6@X^{LWqM*tNy<|_pKRF-)<yvkY6BS#b_G>QpLak4=a!y@nOa2c@eCg0UlPMkRm>;
u7{j3U!^4UZ7^6`AM1&Qi{$i|Q#i;QUYgjRA3t|teHNbE~4Xa1$RS^KQRX<Sx


From 261ed074d7e12d8c342db2612b5e626497d03832 Mon Sep 17 00:00:00 2001
From: Anton Kolesnikov <anton.e.kolesnikov@gmail.com>
Date: Wed, 24 Apr 2024 20:04:51 +0800
Subject: [PATCH 20/36] small improvements

---
 .../symdb/testdata/symbols/v3/data.symdb      | Bin 88564 -> 88572 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)

diff --git a/pkg/phlaredb/symdb/testdata/symbols/v3/data.symdb b/pkg/phlaredb/symdb/testdata/symbols/v3/data.symdb
index 486c57d5984b5e3ec14a3e9793b63813aa780b7e..bc22ef6213a164690280fbc5bcf7be432f446bc5 100644
GIT binary patch
delta 34
kcmeyenf1?RRvrchrrq2e3=BQ*Yu;_-__2l!#{9Gf0P5`y!2kdN

delta 26
gcmeyfnf1$NRt^RRrrq2e+|C<$eym{vQJ>ZT0DssClK=n!


From 4c621abd48fe1893a3ea922d600260ff1c8f105e Mon Sep 17 00:00:00 2001
From: Anton Kolesnikov <anton.e.kolesnikov@gmail.com>
Date: Fri, 26 Apr 2024 09:53:41 +0800
Subject: [PATCH 21/36] fix test

---
 pkg/phlaredb/head_test.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pkg/phlaredb/head_test.go b/pkg/phlaredb/head_test.go
index c83cd0b883..7d3da2d11d 100644
--- a/pkg/phlaredb/head_test.go
+++ b/pkg/phlaredb/head_test.go
@@ -371,11 +371,11 @@ func TestHeadFlush(t *testing.T) {
 				},
 				{
 					RelPath:   "symbols/data.symdb",
-					SizeBytes: 159203,
+					SizeBytes: 159215,
 				},
 				{
 					RelPath:   "symbols/index.symdb",
-					SizeBytes: 384,
+					SizeBytes: 448,
 				},
 			},
 			Compaction: block.BlockMetaCompaction{

From df69a7dd8edaf3e3a9f06298f958ea467d257b32 Mon Sep 17 00:00:00 2001
From: Anton Kolesnikov <anton.e.kolesnikov@gmail.com>
Date: Fri, 26 Apr 2024 16:43:37 +0800
Subject: [PATCH 22/36] add integration tests

---
 pkg/phlaredb/symdb/block_reader_test.go    |  2 +-
 pkg/pprof/pprof.go                         | 19 ++++++----
 pkg/pprof/pprof_test.go                    | 33 +++++++++++++++++
 pkg/test/integration/microservices_test.go | 42 ++++++++++++++++++++++
 4 files changed, 88 insertions(+), 8 deletions(-)

diff --git a/pkg/phlaredb/symdb/block_reader_test.go b/pkg/phlaredb/symdb/block_reader_test.go
index 98167a022b..40a451669f 100644
--- a/pkg/phlaredb/symdb/block_reader_test.go
+++ b/pkg/phlaredb/symdb/block_reader_test.go
@@ -154,7 +154,7 @@ func Benchmark_Reader_ResolvePprof(b *testing.B) {
 	b.ResetTimer()
 	for i := 0; i < b.N; i++ {
 		r := NewResolver(ctx, bs.reader)
-		r.AddSamples(0, s.indexed[0][0].Samples)
+		r.AddSamples(0, schemav1.Samples{})
 		_, err := r.Pprof()
 		require.NoError(b, err)
 		r.Release()
diff --git a/pkg/pprof/pprof.go b/pkg/pprof/pprof.go
index ecf66cba3d..d155b2fbab 100644
--- a/pkg/pprof/pprof.go
+++ b/pkg/pprof/pprof.go
@@ -1089,18 +1089,23 @@ func GetLanguage(profile *Profile, logger log.Logger) string {
 // SetProfileMetadata sets the metadata on the profile.
 func SetProfileMetadata(p *profilev1.Profile, ty *typesv1.ProfileType, timeNanos int64, period int64) {
 	m := map[string]int64{
-		ty.SampleUnit: 0,
-		ty.SampleType: 0,
-		ty.PeriodType: 0,
-		ty.PeriodUnit: 0,
+		ty.SampleUnit: -1,
+		ty.SampleType: -1,
+		ty.PeriodType: -1,
+		ty.PeriodUnit: -1,
 	}
 	for i, s := range p.StringTable {
-		if _, ok := m[s]; !ok {
+		if _, ok := m[s]; ok {
 			m[s] = int64(i)
 		}
 	}
-	for k, v := range m {
-		if v == 0 {
+	for _, k := range []string{
+		ty.SampleUnit,
+		ty.SampleType,
+		ty.PeriodType,
+		ty.PeriodUnit,
+	} {
+		if m[k] == -1 {
 			i := int64(len(p.StringTable))
 			p.StringTable = append(p.StringTable, k)
 			m[k] = i
diff --git a/pkg/pprof/pprof_test.go b/pkg/pprof/pprof_test.go
index c906600dc6..c22263a9f6 100644
--- a/pkg/pprof/pprof_test.go
+++ b/pkg/pprof/pprof_test.go
@@ -15,6 +15,7 @@ import (
 	"google.golang.org/protobuf/proto"
 
 	profilev1 "github.com/grafana/pyroscope/api/gen/proto/go/google/v1"
+	typesv1 "github.com/grafana/pyroscope/api/gen/proto/go/types/v1"
 	"github.com/grafana/pyroscope/pkg/pprof/testhelper"
 )
 
@@ -1432,3 +1433,35 @@ func Benchmark_GetProfileLanguage(b *testing.B) {
 		})
 	}
 }
+
+func Test_SetProfileMetadata(t *testing.T) {
+	p := &profilev1.Profile{
+		SampleType:  []*profilev1.ValueType{{}},
+		StringTable: []string{"", "qux"},
+		PeriodType:  &profilev1.ValueType{},
+	}
+	pt := &typesv1.ProfileType{
+		ID:         "alfa",
+		Name:       "bravo",
+		SampleType: "foo",
+		SampleUnit: "bar",
+		PeriodType: "baz",
+		PeriodUnit: "qux",
+	}
+	SetProfileMetadata(p, pt, 1, 2)
+	expected := &profilev1.Profile{
+		SampleType: []*profilev1.ValueType{{
+			Type: 3, // foo
+			Unit: 2, // bar
+		}},
+		StringTable: []string{"", "qux", "bar", "foo", "baz"},
+		PeriodType: &profilev1.ValueType{
+			Type: 4, // baz
+			Unit: 1, // qux
+		},
+		TimeNanos:         1,
+		Period:            1,
+		DefaultSampleType: 3, // foo
+	}
+	require.Equal(t, expected.String(), p.String())
+}
diff --git a/pkg/test/integration/microservices_test.go b/pkg/test/integration/microservices_test.go
index 9cc6d9a597..5a13d50c87 100644
--- a/pkg/test/integration/microservices_test.go
+++ b/pkg/test/integration/microservices_test.go
@@ -12,6 +12,7 @@ import (
 	"github.com/stretchr/testify/require"
 	"golang.org/x/sync/errgroup"
 
+	profilev1 "github.com/grafana/pyroscope/api/gen/proto/go/google/v1"
 	pushv1 "github.com/grafana/pyroscope/api/gen/proto/go/push/v1"
 	"github.com/grafana/pyroscope/api/gen/proto/go/push/v1/pushv1connect"
 	querierv1 "github.com/grafana/pyroscope/api/gen/proto/go/querier/v1"
@@ -181,4 +182,45 @@ func (tc *testCtx) runQueryTest(ctx context.Context, t *testing.T) {
 		sort.Strings(expectedValues)
 		assert.Equal(t, expectedValues, resp.Msg.Names)
 	})
+
+	t.Run("QuerySelectMergeProfile", func(t *testing.T) {
+		req := &querierv1.SelectMergeProfileRequest{
+			ProfileTypeID: "process_cpu:cpu:nanoseconds:cpu:nanoseconds",
+			LabelSelector: "{}",
+			Start:         tc.now.Add(-time.Hour).UnixMilli(),
+			End:           tc.now.Add(time.Hour).UnixMilli(),
+		}
+		resp, err := tc.querier.SelectMergeProfile(ctx, connect.NewRequest(req))
+		require.NoError(t, err)
+
+		expected := &profilev1.Profile{
+			SampleType: []*profilev1.ValueType{
+				{Type: 6, Unit: 5},
+			},
+			Sample: []*profilev1.Sample{
+				{LocationId: []uint64{1, 2, 3}, Value: []int64{100}},
+				{LocationId: []uint64{1, 2, 4}, Value: []int64{501}},
+			},
+			Mapping: []*profilev1.Mapping{{Id: 1, HasFunctions: true}},
+			Location: []*profilev1.Location{
+				{Id: 1, MappingId: 1, Line: []*profilev1.Line{{FunctionId: 1}}},
+				{Id: 2, MappingId: 1, Line: []*profilev1.Line{{FunctionId: 2}}},
+				{Id: 3, MappingId: 1, Line: []*profilev1.Line{{FunctionId: 3}}},
+				{Id: 4, MappingId: 1, Line: []*profilev1.Line{{FunctionId: 4}}},
+			},
+			Function: []*profilev1.Function{
+				{Id: 1, Name: 1},
+				{Id: 2, Name: 2},
+				{Id: 3, Name: 3},
+				{Id: 4, Name: 4},
+			},
+			StringTable:       []string{"", "foo", "bar", "baz", "boz", "nanoseconds", "cpu"},
+			TimeNanos:         req.End * 1e6,
+			DurationNanos:     7200000000000,
+			PeriodType:        &profilev1.ValueType{Type: 6, Unit: 5},
+			Period:            1000000000,
+			DefaultSampleType: 6,
+		}
+		require.Equal(t, expected.String(), resp.Msg.String())
+	})
 }

From 83d2100954a0031b07513aba0e879d7172fafe04 Mon Sep 17 00:00:00 2001
From: Anton Kolesnikov <anton.e.kolesnikov@gmail.com>
Date: Sat, 27 Apr 2024 17:42:50 +0800
Subject: [PATCH 23/36] merge index and data files

---
 pkg/phlaredb/block/block_test.go              |  10 +-
 pkg/phlaredb/compact.go                       |  15 +-
 pkg/phlaredb/compact_test.go                  |   9 +-
 pkg/phlaredb/head.go                          |   9 +-
 pkg/phlaredb/head_test.go                     |   8 +-
 pkg/phlaredb/symdb/block_reader.go            | 188 +++++++++++++----
 pkg/phlaredb/symdb/block_reader_parquet.go    |   4 +-
 pkg/phlaredb/symdb/block_reader_test.go       |  90 ++++++--
 pkg/phlaredb/symdb/block_writer.go            |  56 ++---
 pkg/phlaredb/symdb/format.go                  | 196 +++++++++++++-----
 pkg/phlaredb/symdb/functions.go               |   9 +-
 pkg/phlaredb/symdb/locations.go               |  16 +-
 pkg/phlaredb/symdb/mappings.go                |  16 +-
 pkg/phlaredb/symdb/stacktrace_tree.go         |  16 +-
 pkg/phlaredb/symdb/stacktrace_tree_test.go    |   1 -
 pkg/phlaredb/symdb/symdb.go                   |  23 +-
 pkg/phlaredb/symdb/symdb_test.go              |   6 -
 .../symdb/testdata/symbols/v3/index.symdb     | Bin 704 -> 0 bytes
 .../symbols/v3/{data.symdb => symbols.symdb}  | Bin 88572 -> 89300 bytes
 19 files changed, 441 insertions(+), 231 deletions(-)
 delete mode 100644 pkg/phlaredb/symdb/testdata/symbols/v3/index.symdb
 rename pkg/phlaredb/symdb/testdata/symbols/v3/{data.symdb => symbols.symdb} (99%)

diff --git a/pkg/phlaredb/block/block_test.go b/pkg/phlaredb/block/block_test.go
index c93da7e2b6..0fa9979e69 100644
--- a/pkg/phlaredb/block/block_test.go
+++ b/pkg/phlaredb/block/block_test.go
@@ -96,7 +96,7 @@ func TestDelete(t *testing.T) {
 			})
 
 			require.NoError(t, block.Upload(ctx, log.NewNopLogger(), bkt, path.Join(dir, meta.ULID.String())))
-			require.Equal(t, 5, len(objects(t, bkt, meta.ULID)))
+			require.Equal(t, 4, len(objects(t, bkt, meta.ULID)))
 
 			markedForDeletion := promauto.With(prometheus.NewRegistry()).NewCounter(prometheus.CounterOpts{Name: "test"})
 			require.NoError(t, block.MarkForDeletion(ctx, log.NewNopLogger(), bkt, meta.ULID, "", false, markedForDeletion))
@@ -116,7 +116,7 @@ func TestDelete(t *testing.T) {
 				}
 			})
 			require.NoError(t, block.Upload(ctx, log.NewNopLogger(), bkt, path.Join(tmpDir, b2.ULID.String())))
-			require.Equal(t, 5, len(objects(t, bkt, b2.ULID)))
+			require.Equal(t, 4, len(objects(t, bkt, b2.ULID)))
 
 			// Remove meta.json and check if delete can delete it.
 			require.NoError(t, bkt.Delete(ctx, path.Join(b2.ULID.String(), block.MetaFilename)))
@@ -196,7 +196,7 @@ func TestUpload(t *testing.T) {
 
 	t.Run("full block", func(t *testing.T) {
 		require.NoError(t, block.Upload(ctx, log.NewNopLogger(), bkt, path.Join(tmpDir, b1.ULID.String())))
-		require.Equal(t, 5, len(bkt.Objects()))
+		require.Equal(t, 4, len(bkt.Objects()))
 		objs := bkt.Objects()
 		require.Contains(t, objs, path.Join(b1.ULID.String(), block.MetaFilename))
 		require.Contains(t, objs, path.Join(b1.ULID.String(), block.IndexFilename))
@@ -205,7 +205,7 @@ func TestUpload(t *testing.T) {
 
 	t.Run("upload is idempotent", func(t *testing.T) {
 		require.NoError(t, block.Upload(ctx, log.NewNopLogger(), bkt, path.Join(tmpDir, b1.ULID.String())))
-		require.Equal(t, 5, len(bkt.Objects()))
+		require.Equal(t, 4, len(bkt.Objects()))
 		objs := bkt.Objects()
 		require.Contains(t, objs, path.Join(b1.ULID.String(), block.MetaFilename))
 		require.Contains(t, objs, path.Join(b1.ULID.String(), block.IndexFilename))
@@ -355,7 +355,7 @@ func TestUploadCleanup(t *testing.T) {
 		require.ErrorIs(t, uploadErr, errUploadFailed)
 
 		// If upload of meta.json fails, nothing is cleaned up.
-		require.Equal(t, 5, len(bkt.Objects()))
+		require.Equal(t, 4, len(bkt.Objects()))
 		require.Greater(t, len(bkt.Objects()[path.Join(b1.String(), block.IndexFilename)]), 0)
 		require.Greater(t, len(bkt.Objects()[path.Join(b1.String(), block.MetaFilename)]), 0)
 	}
diff --git a/pkg/phlaredb/compact.go b/pkg/phlaredb/compact.go
index f34edb30bb..548b6e9197 100644
--- a/pkg/phlaredb/compact.go
+++ b/pkg/phlaredb/compact.go
@@ -734,14 +734,9 @@ type symbolsCompactor struct {
 }
 
 func newSymbolsCompactor(path string) *symbolsCompactor {
-	dst := filepath.Join(path, symdb.DefaultDirName)
 	return &symbolsCompactor{
-		w: symdb.NewSymDB(symdb.DefaultConfig().
-			WithDirectory(dst).
-			WithParquetConfig(symdb.ParquetConfig{
-				MaxBufferRowCount: defaultParquetConfig.MaxBufferRowCount,
-			})),
-		dst:       dst,
+		w:         symdb.NewSymDB(symdb.DefaultConfig().WithDirectory(path)),
+		dst:       path,
 		rewriters: make(map[BlockReader]*symdb.Rewriter),
 	}
 }
@@ -772,7 +767,9 @@ func (s *symbolsRewriter) Close() (uint64, error) {
 	if err := s.symbolsCompactor.Flush(); err != nil {
 		return 0, err
 	}
-	return s.numSamples, util.CopyDir(s.symbolsCompactor.dst, filepath.Join(s.dst, symdb.DefaultDirName))
+	dst := filepath.Join(s.dst, symdb.DefaultFileName)
+	src := filepath.Join(s.symbolsCompactor.dst, symdb.DefaultFileName)
+	return s.numSamples, util.CopyFile(src, dst)
 }
 
 func (s *symbolsCompactor) ReWriteRow(profile profileRow) (uint64, error) {
@@ -814,7 +811,7 @@ func (s *symbolsCompactor) Flush() error {
 }
 
 func (s *symbolsCompactor) Close() error {
-	return os.RemoveAll(s.dst)
+	return os.RemoveAll(filepath.Join(s.dst, symdb.DefaultFileName))
 }
 
 func (s *symbolsCompactor) loadStacktracesID(values []parquet.Value) {
diff --git a/pkg/phlaredb/compact_test.go b/pkg/phlaredb/compact_test.go
index 7fdca44d86..31a20b4707 100644
--- a/pkg/phlaredb/compact_test.go
+++ b/pkg/phlaredb/compact_test.go
@@ -27,7 +27,6 @@ import (
 	phlarecontext "github.com/grafana/pyroscope/pkg/phlare/context"
 	"github.com/grafana/pyroscope/pkg/phlaredb/block"
 	"github.com/grafana/pyroscope/pkg/phlaredb/sharding"
-	"github.com/grafana/pyroscope/pkg/phlaredb/symdb"
 	"github.com/grafana/pyroscope/pkg/phlaredb/tsdb/index"
 	"github.com/grafana/pyroscope/pkg/pprof/testhelper"
 )
@@ -198,7 +197,7 @@ func TestCompactWithSplitting(t *testing.T) {
 	})
 	require.NoError(t, err)
 
-	require.NoDirExists(t, filepath.Join(dst, symdb.DefaultDirName))
+	require.NoFileExists(t, dst)
 
 	// 4 shards one per series.
 	require.Equal(t, 4, len(compacted))
@@ -628,11 +627,10 @@ func TestFlushMeta(t *testing.T) {
 	require.Equal(t, uint64(3), b.Meta().Stats.NumSeries)
 	require.Equal(t, uint64(3), b.Meta().Stats.NumSamples)
 	require.Equal(t, uint64(3), b.Meta().Stats.NumProfiles)
-	require.Len(t, b.Meta().Files, 4)
+	require.Len(t, b.Meta().Files, 3)
 	require.Equal(t, "index.tsdb", b.Meta().Files[0].RelPath)
 	require.Equal(t, "profiles.parquet", b.Meta().Files[1].RelPath)
-	require.Equal(t, "symbols/data.symdb", b.Meta().Files[2].RelPath)
-	require.Equal(t, "symbols/index.symdb", b.Meta().Files[3].RelPath)
+	require.Equal(t, "symbols.symdb", b.Meta().Files[2].RelPath)
 }
 
 func newBlock(t testing.TB, generator func() []*testhelper.ProfileBuilder) *singleBlockQuerier {
@@ -693,7 +691,6 @@ func blockQuerierFromMeta(t *testing.T, dir string, m block.Meta) *singleBlockQu
 	require.NoError(t, err)
 	blk := NewSingleBlockQuerierFromMeta(ctx, bkt, &m)
 	require.NoError(t, blk.Open(ctx))
-	//	require.NoError(t, blk.symbols.Load(ctx))
 	return blk
 }
 
diff --git a/pkg/phlaredb/head.go b/pkg/phlaredb/head.go
index 74ecf28d92..7bda7b917e 100644
--- a/pkg/phlaredb/head.go
+++ b/pkg/phlaredb/head.go
@@ -124,12 +124,7 @@ func NewHead(phlarectx context.Context, cfg Config, limiter TenantLimiter) (*Hea
 		}
 	}
 
-	h.symdb = symdb.NewSymDB(symdb.DefaultConfig().
-		WithDirectory(filepath.Join(h.headPath, symdb.DefaultDirName)).
-		WithParquetConfig(symdb.ParquetConfig{
-			MaxBufferRowCount: h.parquetConfig.MaxBufferRowCount,
-		}))
-
+	h.symdb = symdb.NewSymDB(symdb.DefaultConfig().WithDirectory(h.headPath))
 	h.wg.Add(1)
 	go h.loop()
 
@@ -562,8 +557,6 @@ func (h *Head) flush(ctx context.Context) error {
 		return errors.Wrap(err, "flushing symdb")
 	}
 	for _, file := range h.symdb.Files() {
-		// Files' path is relative to the symdb dir.
-		file.RelPath = filepath.Join(symdb.DefaultDirName, file.RelPath)
 		files = append(files, file)
 		blockSize += file.SizeBytes
 		h.metrics.flushedFileSizeBytes.WithLabelValues(file.RelPath).Observe(float64(file.SizeBytes))
diff --git a/pkg/phlaredb/head_test.go b/pkg/phlaredb/head_test.go
index 7d3da2d11d..2d36950588 100644
--- a/pkg/phlaredb/head_test.go
+++ b/pkg/phlaredb/head_test.go
@@ -370,12 +370,8 @@ func TestHeadFlush(t *testing.T) {
 					},
 				},
 				{
-					RelPath:   "symbols/data.symdb",
-					SizeBytes: 159215,
-				},
-				{
-					RelPath:   "symbols/index.symdb",
-					SizeBytes: 448,
+					RelPath:   "symbols.symdb",
+					SizeBytes: 159687,
 				},
 			},
 			Compaction: block.BlockMetaCompaction{
diff --git a/pkg/phlaredb/symdb/block_reader.go b/pkg/phlaredb/symdb/block_reader.go
index a204dcc7c1..79a46f3b76 100644
--- a/pkg/phlaredb/symdb/block_reader.go
+++ b/pkg/phlaredb/symdb/block_reader.go
@@ -23,13 +23,16 @@ import (
 
 type Reader struct {
 	bucket objstore.BucketReader
-	files  map[string]block.File
-	meta   *block.Meta
+	file   block.File
+	index  IndexFile
+	footer Footer
 
-	index         IndexFile
 	partitions    []*partition
 	partitionsMap map[uint64]*partition
 
+	// Not used in v3; left for compatibility.
+	meta         *block.Meta
+	files        map[string]block.File
 	parquetFiles *parquetFiles
 }
 
@@ -38,44 +41,145 @@ func Open(ctx context.Context, b objstore.BucketReader, m *block.Meta) (*Reader,
 		bucket: b,
 		meta:   m,
 		files:  make(map[string]block.File),
+		file:   block.File{RelPath: DefaultFileName},
 	}
 	for _, f := range r.meta.Files {
 		r.files[filepath.Base(f.RelPath)] = f
 	}
-	var err error
-	if err = r.openIndexFile(ctx); err != nil {
-		return nil, fmt.Errorf("opening index file: %w", err)
+	if err := r.open(ctx); err != nil {
+		return nil, err
+	}
+	if err := r.buildPartitions(); err != nil {
+		return nil, err
+	}
+	return r, nil
+}
+
+func (r *Reader) open(ctx context.Context) (err error) {
+	if r.file, err = r.lookupFile(r.file.RelPath); err == nil {
+		if err = r.openIndex(ctx); err != nil {
+			return fmt.Errorf("opening index section: %w", err)
+		}
+		return nil
+	}
+	if err = r.openIndexV12(ctx); err != nil {
+		return fmt.Errorf("opening index file: %w", err)
 	}
 	if r.index.Header.Version == FormatV2 {
 		if err = openParquetFiles(ctx, r); err != nil {
-			return nil, err
+			return fmt.Errorf("opening parquet files: %w", err)
 		}
 	}
+	return nil
+}
+
+func (r *Reader) buildPartitions() (err error) {
 	r.partitionsMap = make(map[uint64]*partition, len(r.index.PartitionHeaders))
 	r.partitions = make([]*partition, len(r.index.PartitionHeaders))
 	for i, h := range r.index.PartitionHeaders {
 		var p *partition
 		if p, err = r.partitionReader(h); err != nil {
-			return nil, err
+			return err
 		}
 		r.partitionsMap[h.Partition] = p
 		r.partitions[i] = p
 	}
-	return r, nil
+	// Cleanup the index to not retain unused objects.
+	r.index = IndexFile{
+		Header: IndexHeader{
+			Version: r.index.Header.Version,
+		},
+	}
+	return nil
 }
 
-func (r *Reader) Close() error {
-	if r == nil {
-		return nil
+func (r *Reader) partitionReader(h *PartitionHeader) (*partition, error) {
+	p := &partition{reader: r}
+	switch r.index.Header.Version {
+	case FormatV1:
+		p.initEmptyTables(h)
+	case FormatV2:
+		p.initParquetTables(h)
+	case FormatV3:
+		if err := p.initTables(h); err != nil {
+			return nil, err
+		}
 	}
-	if r.parquetFiles != nil {
-		return r.parquetFiles.Close()
+	p.initStacktraces(h.Stacktraces)
+	return p, nil
+}
+
+// openIndex locates footer and loads the index section from
+// the file into the memory.
+//
+// NOTE(kolesnikovae): Pre-fetch: we could speculatively fetch
+// the footer and the index section into a larger buffer rather
+// than retrieving them synchronously.
+//
+// NOTE(kolesnikovae): It is possible to skip the footer, if it
+// was cached, and the index section offset and size are known.
+func (r *Reader) openIndex(ctx context.Context) error {
+	if r.file.SizeBytes == 0 {
+		attrs, err := r.bucket.Attributes(ctx, r.file.RelPath)
+		if err != nil {
+			return fmt.Errorf("fetching file attributes: %w", err)
+		}
+		r.file.SizeBytes = uint64(attrs.Size)
+	}
+	// Read footer.
+	offset := int64(r.file.SizeBytes) - int64(FooterSize)
+	if offset < int64(IndexHeaderSize) {
+		return fmt.Errorf("%w: footer offset: %d", ErrInvalidSize, offset)
+	}
+	if err := r.readFooter(ctx, offset, int64(FooterSize)); err != nil {
+		return err
+	}
+	indexSize := offset - int64(r.footer.IndexOffset)
+	if indexSize < int64(IndexHeaderSize) {
+		return fmt.Errorf("%w: index section size: %d", ErrInvalidSize, indexSize)
+	}
+	return r.readIndexSection(ctx, int64(r.footer.IndexOffset), indexSize)
+}
+
+func (r *Reader) readFooter(ctx context.Context, offset, size int64) error {
+	o, err := r.bucket.GetRange(ctx, r.file.RelPath, offset, size)
+	if err != nil {
+		return fmt.Errorf("fetching footer: %w", err)
+	}
+	defer func() {
+		_ = o.Close()
+	}()
+	buf := make([]byte, size)
+	if _, err = io.ReadFull(o, buf); err != nil {
+		return fmt.Errorf("reading footer: %w", err)
+	}
+	if err = r.footer.UnmarshalBinary(buf); err != nil {
+		return fmt.Errorf("unmarshaling footer: %w", err)
 	}
 	return nil
 }
 
-func (r *Reader) openIndexFile(ctx context.Context) error {
-	f, err := r.file(IndexFileName)
+func (r *Reader) readIndexSection(ctx context.Context, offset, size int64) error {
+	o, err := r.bucket.GetRange(ctx, r.file.RelPath, offset, size)
+	if err != nil {
+		return fmt.Errorf("fetching index: %w", err)
+	}
+	defer func() {
+		_ = o.Close()
+	}()
+	buf := make([]byte, int(size))
+	if _, err = io.ReadFull(o, buf); err != nil {
+		return fmt.Errorf("reading index: %w", err)
+	}
+	r.index, err = OpenIndex(buf)
+	if err != nil {
+		return fmt.Errorf("openning index: %w", err)
+	}
+	return nil
+}
+
+func (r *Reader) openIndexV12(ctx context.Context) error {
+	f, err := r.lookupFile(IndexFileName)
 	if err != nil {
 		return err
 	}
@@ -87,11 +191,11 @@ func (r *Reader) openIndexFile(ctx context.Context) error {
 	if err != nil {
 		return err
 	}
-	r.index, err = ReadIndexFile(b)
+	r.index, err = OpenIndex(b)
 	return err
 }
 
-func (r *Reader) file(name string) (block.File, error) {
+func (r *Reader) lookupFile(name string) (block.File, error) {
 	f, ok := r.files[name]
 	if !ok {
 		return block.File{}, fmt.Errorf("%q: %w", name, os.ErrNotExist)
@@ -99,20 +203,14 @@ func (r *Reader) file(name string) (block.File, error) {
 	return f, nil
 }
 
-func (r *Reader) partitionReader(h *PartitionHeader) (*partition, error) {
-	p := &partition{reader: r}
-	switch r.index.Header.Version {
-	case FormatV1:
-		p.initEmptyTables(h)
-	case FormatV2:
-		p.initParquetTables(h)
-	case FormatV3:
-		if err := p.initTables(h); err != nil {
-			return nil, err
-		}
+func (r *Reader) Close() error {
+	if r == nil {
+		return nil
 	}
-	p.initStacktraces(h.Stacktraces)
-	return p, nil
+	if r.parquetFiles != nil {
+		return r.parquetFiles.Close()
+	}
+	return nil
 }
 
 var ErrPartitionNotFound = fmt.Errorf("partition not found")
@@ -369,15 +467,11 @@ func (c *stacktraceBlock) fetch(ctx context.Context) error {
 	)
 	defer span.Finish()
 	return c.r.Inc(func() error {
-		filename := DataFileName
-		if c.reader.index.Header.Version < 3 {
-			filename = StacktracesFileName
-		}
-		f, err := c.reader.file(filename)
+		path, err := c.stacktracesFile()
 		if err != nil {
 			return err
 		}
-		rc, err := c.reader.bucket.GetRange(ctx, f.RelPath, c.header.Offset, c.header.Size)
+		rc, err := c.reader.bucket.GetRange(ctx, path, c.header.Offset, c.header.Size)
 		if err != nil {
 			return err
 		}
@@ -390,6 +484,17 @@ func (c *stacktraceBlock) fetch(ctx context.Context) error {
 	})
 }
 
+func (c *stacktraceBlock) stacktracesFile() (string, error) {
+	f := c.reader.file
+	if c.reader.index.Header.Version < 3 {
+		var err error
+		if f, err = c.reader.lookupFile(StacktracesFileName); err != nil {
+			return "", err
+		}
+	}
+	return f.RelPath, nil
+}
+
 func (c *stacktraceBlock) readFrom(r *bufio.Reader) error {
 	// NOTE(kolesnikovae): Pool of node chunks could reduce
 	//   the alloc size, but it may affect memory locality.
@@ -435,11 +540,10 @@ func (t *rawTable[T]) fetch(ctx context.Context) error {
 	)
 	defer span.Finish()
 	return t.r.Inc(func() error {
-		f, err := t.reader.file(DataFileName)
-		if err != nil {
-			return err
-		}
-		rc, err := t.reader.bucket.GetRange(ctx, f.RelPath, int64(t.header.Offset), int64(t.header.Size))
+		rc, err := t.reader.bucket.GetRange(ctx,
+			t.reader.file.RelPath,
+			int64(t.header.Offset),
+			int64(t.header.Size))
 		if err != nil {
 			return err
 		}
diff --git a/pkg/phlaredb/symdb/block_reader_parquet.go b/pkg/phlaredb/symdb/block_reader_parquet.go
index 1addb2764e..364a678344 100644
--- a/pkg/phlaredb/symdb/block_reader_parquet.go
+++ b/pkg/phlaredb/symdb/block_reader_parquet.go
@@ -19,6 +19,8 @@ import (
 	"github.com/grafana/pyroscope/pkg/util/refctr"
 )
 
+// Used in v2. Left for compatibility.
+
 type parquetTable[M schemav1.Models, P schemav1.Persister[M]] struct {
 	headers   []RowRangeReference
 	bucket    objstore.BucketReader
@@ -145,7 +147,7 @@ func openParquetFiles(ctx context.Context, r *Reader) error {
 		n := n
 		fp := fp
 		g.Go(func() error {
-			fm, err := r.file(n)
+			fm, err := r.lookupFile(n)
 			if err != nil {
 				return err
 			}
diff --git a/pkg/phlaredb/symdb/block_reader_test.go b/pkg/phlaredb/symdb/block_reader_test.go
index 40a451669f..67521b2350 100644
--- a/pkg/phlaredb/symdb/block_reader_test.go
+++ b/pkg/phlaredb/symdb/block_reader_test.go
@@ -1,29 +1,46 @@
 package symdb
 
 import (
+	"bytes"
 	"context"
 	"os"
 	"testing"
 
 	"github.com/stretchr/testify/mock"
 	"github.com/stretchr/testify/require"
+	"github.com/thanos-io/objstore"
 
+	pystore "github.com/grafana/pyroscope/pkg/objstore"
 	"github.com/grafana/pyroscope/pkg/objstore/providers/filesystem"
 	"github.com/grafana/pyroscope/pkg/phlaredb/block"
 	schemav1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1"
 )
 
-var testBlockMeta = &block.Meta{
-	Files: []block.File{
-		{RelPath: IndexFileName},
-		{RelPath: StacktracesFileName},
-		{RelPath: DataFileName},
-		{RelPath: "locations.parquet"},
-		{RelPath: "mappings.parquet"},
-		{RelPath: "functions.parquet"},
-		{RelPath: "strings.parquet"},
-	},
-}
+var (
+	testBlockMeta = &block.Meta{
+		Files: []block.File{
+			{RelPath: DefaultFileName},
+		},
+	}
+
+	testBlockMetaV1 = &block.Meta{
+		Files: []block.File{
+			{RelPath: IndexFileName},
+			{RelPath: StacktracesFileName},
+		},
+	}
+
+	testBlockMetaV2 = &block.Meta{
+		Files: []block.File{
+			{RelPath: IndexFileName},
+			{RelPath: StacktracesFileName},
+			{RelPath: "locations.parquet"},
+			{RelPath: "mappings.parquet"},
+			{RelPath: "functions.parquet"},
+			{RelPath: "strings.parquet"},
+		},
+	}
+)
 
 func Test_write_block_fixture(t *testing.T) {
 	t.Skip()
@@ -36,9 +53,8 @@ func Test_write_block_fixture(t *testing.T) {
 	require.NoError(t, os.Rename(b.config.Dir, fixtureDir))
 }
 
-func Fuzz_ReadIndexFile(f *testing.F) {
+func Fuzz_ReadIndexFile_v12(f *testing.F) {
 	files := []string{
-		"testdata/symbols/v3/index.symdb",
 		"testdata/symbols/v2/index.symdb",
 		"testdata/symbols/v1/index.symdb",
 	}
@@ -48,7 +64,7 @@ func Fuzz_ReadIndexFile(f *testing.F) {
 		f.Add(data)
 	}
 	f.Fuzz(func(_ *testing.T, b []byte) {
-		_, _ = ReadIndexFile(b)
+		_, _ = OpenIndex(b)
 	})
 }
 
@@ -85,12 +101,54 @@ func Test_Reader_Open_v3(t *testing.T) {
 	require.Equal(t, expected, resolved.String())
 }
 
+func Test_Reader_Open_v3_fuzz(t *testing.T) {
+	// Make sure the test is valid.
+	corpus, err := os.ReadFile("testdata/symbols/v3/symbols.symdb")
+	require.NoError(t, err)
+	ctx := context.Background()
+
+	bucket := pystore.NewBucket(objstore.NewInMemBucket())
+	require.NoError(t, bucket.Upload(ctx, DefaultFileName, bytes.NewReader(corpus)))
+	b, err := Open(ctx, bucket, testBlockMeta)
+	require.NoError(t, err)
+
+	r := NewResolver(context.Background(), b)
+	defer r.Release()
+	r.AddSamples(0, schemav1.Samples{})
+	r.AddSamples(1, schemav1.Samples{})
+	_, err = r.Pprof()
+	require.NoError(t, err)
+}
+
+func Fuzz_Reader_Open_v3(f *testing.F) {
+	corpus, err := os.ReadFile("testdata/symbols/v3/symbols.symdb")
+	require.NoError(f, err)
+	ctx := context.Background()
+
+	f.Add(corpus)
+	f.Fuzz(func(t *testing.T, data []byte) {
+		bucket := pystore.NewBucket(objstore.NewInMemBucket())
+		require.NoError(t, bucket.Upload(ctx, DefaultFileName, bytes.NewReader(data)))
+
+		b, err := Open(context.Background(), bucket, testBlockMeta)
+		if err != nil {
+			return
+		}
+
+		r := NewResolver(context.Background(), b)
+		defer r.Release()
+		r.AddSamples(0, schemav1.Samples{})
+		r.AddSamples(1, schemav1.Samples{})
+		_, _ = r.Pprof()
+	})
+}
+
 func Test_Reader_Open_v2(t *testing.T) {
 	// The block contains two partitions (0 and 1), each partition
 	// stores symbols of the testdata/profile.pb.gz profile
 	b, err := filesystem.NewBucket("testdata/symbols/v2")
 	require.NoError(t, err)
-	x, err := Open(context.Background(), b, testBlockMeta)
+	x, err := Open(context.Background(), b, testBlockMetaV2)
 	require.NoError(t, err)
 
 	r := NewResolver(context.Background(), x)
@@ -120,7 +178,7 @@ func Test_Reader_Open_v2(t *testing.T) {
 func Test_Reader_Open_v1(t *testing.T) {
 	b, err := filesystem.NewBucket("testdata/symbols/v1")
 	require.NoError(t, err)
-	x, err := Open(context.Background(), b, testBlockMeta)
+	x, err := Open(context.Background(), b, testBlockMetaV1)
 	require.NoError(t, err)
 	r, err := x.partition(context.Background(), 1)
 	require.NoError(t, err)
diff --git a/pkg/phlaredb/symdb/block_writer.go b/pkg/phlaredb/symdb/block_writer.go
index 19be26e071..585b498f2d 100644
--- a/pkg/phlaredb/symdb/block_writer.go
+++ b/pkg/phlaredb/symdb/block_writer.go
@@ -7,8 +7,6 @@ import (
 	"os"
 	"path/filepath"
 
-	"github.com/grafana/dskit/multierror"
-
 	"github.com/grafana/pyroscope/pkg/phlaredb/block"
 	v1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1"
 )
@@ -20,6 +18,7 @@ type writer struct {
 	indexFile *fileWriter
 	dataFile  *fileWriter
 	files     []block.File
+	footer    Footer
 
 	stringsEncoder   *symbolsEncoder[string]
 	mappingsEncoder  *symbolsEncoder[v1.InMemoryMapping]
@@ -31,11 +30,15 @@ func newWriter(c *Config) *writer {
 	return &writer{
 		config: c,
 		index: IndexFile{
-			Header: Header{
+			Header: IndexHeader{
 				Magic:   symdbMagic,
 				Version: FormatV3,
 			},
 		},
+		footer: Footer{
+			Magic:   symdbMagic,
+			Version: FormatV3,
+		},
 
 		stringsEncoder:   newStringsEncoder(),
 		mappingsEncoder:  newMappingsEncoder(),
@@ -45,53 +48,32 @@ func newWriter(c *Config) *writer {
 }
 
 func (w *writer) writePartitions(partitions []*PartitionWriter) (err error) {
-	if w.dataFile, err = w.newFile(DataFileName); err != nil {
+	if err = os.MkdirAll(w.config.Dir, 0o755); err != nil {
+		return fmt.Errorf("failed to create directory %q: %w", w.config.Dir, err)
+	}
+	if w.dataFile, err = w.newFile(DefaultFileName); err != nil {
 		return err
 	}
 	defer func() {
 		err = w.dataFile.Close()
+		w.files = []block.File{w.dataFile.meta()}
 	}()
 	for _, p := range partitions {
 		if err = p.writeTo(w); err != nil {
-			return err
+			return fmt.Errorf("failed to write partition: %w", err)
 		}
 		w.index.PartitionHeaders = append(w.index.PartitionHeaders, &p.header)
 	}
-	return nil
-}
-
-func (w *writer) Flush() (err error) {
-	if err = w.writeIndexFile(); err != nil {
-		return err
-	}
-	w.files = []block.File{
-		w.indexFile.meta(),
-		w.dataFile.meta(),
+	w.footer.IndexOffset = uint64(w.dataFile.w.offset)
+	if _, err = w.index.WriteTo(w.dataFile); err != nil {
+		return fmt.Errorf("failed to write index: %w", err)
 	}
-	return nil
-}
-
-func (w *writer) createDir() error {
-	if err := os.MkdirAll(w.config.Dir, 0o755); err != nil {
-		return fmt.Errorf("failed to create directory %q: %w", w.config.Dir, err)
+	if _, err = w.dataFile.Write(w.footer.MarshalBinary()); err != nil {
+		return fmt.Errorf("failed to write footer: %w", err)
 	}
 	return nil
 }
 
-func (w *writer) writeIndexFile() (err error) {
-	// Write the index file only after all the files were flushed.
-	if w.indexFile, err = w.newFile(IndexFileName); err != nil {
-		return err
-	}
-	defer func() {
-		err = multierror.New(err, w.indexFile.Close()).Err()
-	}()
-	if _, err = w.index.WriteTo(w.indexFile); err != nil {
-		return fmt.Errorf("failed to write index file: %w", err)
-	}
-	return err
-}
-
 func (w *writer) newFile(path string) (f *fileWriter, err error) {
 	path = filepath.Join(w.config.Dir, path)
 	if f, err = newFileWriter(path); err != nil {
@@ -129,7 +111,7 @@ func (f *fileWriter) Write(p []byte) (n int, err error) {
 	return f.w.Write(p)
 }
 
-func (f *fileWriter) sync() (err error) {
+func (f *fileWriter) Flush() (err error) {
 	if err = f.buf.Flush(); err != nil {
 		return err
 	}
@@ -137,7 +119,7 @@ func (f *fileWriter) sync() (err error) {
 }
 
 func (f *fileWriter) Close() (err error) {
-	if err = f.sync(); err != nil {
+	if err = f.Flush(); err != nil {
 		return err
 	}
 	return f.f.Close()
diff --git a/pkg/phlaredb/symdb/format.go b/pkg/phlaredb/symdb/format.go
index 81bda15889..780b965494 100644
--- a/pkg/phlaredb/symdb/format.go
+++ b/pkg/phlaredb/symdb/format.go
@@ -8,40 +8,65 @@ import (
 	"io"
 	"unsafe"
 
+	"github.com/parquet-go/parquet-go/encoding/delta"
+
 	"github.com/grafana/pyroscope/pkg/slices"
 	"github.com/grafana/pyroscope/pkg/util/math"
 )
 
+// V1 and V2:
+//
 // The database is a collection of files. The only file that is guaranteed
 // to be present is the index file: it indicates the version of the format,
 // and the structure of the database contents. The file is supposed to be
-// read into memory entirely and opened with a ReadIndexFile call.
-//
-// Big endian order is used unless otherwise noted.
+// read into memory entirely and opened with an OpenIndex call.
+
+// V3:
 //
-// Layout of the index file (single-pass write):
+// The database is a single file. The file consists of the following sections:
+//  [Data  ]
+//  [Index ]
+//  [Footer]
 //
-// [Header] Header defines the format version and denotes the content type.
+// The file is supposed to be open with Open call: it reads the footer, locates
+// index section, and fetches it into memory.
 //
-// [TOC]    Table of contents. Its entries refer to the Data section.
-//          It is of a fixed size for a given version (number of entries).
+// Data section is version specific.
+//   v3: Partitions.
 //
-// [Data]   Data is an arbitrary structured section. The exact structure is
-//          defined by the TOC and Header (version, flags, etc).
+// Index section is structured in the following way:
 //
-// [CRC32]  Checksum.
+// [IndexHeader] Header defines the format version and denotes the content type.
+// [TOC        ] Table of contents. Its entries refer to the Data section.
+//               It is of a fixed size for a given version (number of entries).
+// [Data       ] Data is an arbitrary structured section. The exact structure is
+//               defined by the TOC and Header (version, flags, etc).
+//                 v1: StacktraceChunkHeaders.
+//                 v2: PartitionHeadersV2.
+//                 v3: PartitionHeadersV3.
+// [CRC32      ] Checksum.
 //
+// Footer section is version agnostic and is only needed to locate
+// the index offset within the file.
+
+// In all version big endian order is used unless otherwise noted.
 
 const (
-	DefaultDirName = "symbols"
+	DefaultFileName = "symbols.symdb" // Added in v3.
 
+	// Pre-v3 assets. Left for compatibility reasons.
+
+	DefaultDirName      = "symbols"
 	IndexFileName       = "index.symdb"
-	StacktracesFileName = "stacktraces.symdb" // Used in v1 and v2.
-	DataFileName        = "data.symdb"        // Added in v3.
+	StacktracesFileName = "stacktraces.symdb"
 )
 
+type FormatVersion uint32
+
 const (
-	_ = iota
+	// Within a database, the same format version
+	// must be used in all places.
+	_ FormatVersion = iota
 
 	FormatV1
 	FormatV2
@@ -52,9 +77,14 @@ const (
 
 const (
 	// TOC entries are version-specific.
+	// The constants point to the entry index in the TOC.
 	tocEntryStacktraceChunkHeaders = 0
 	tocEntryPartitionHeaders       = 0
-	tocEntries                     = 1
+
+	// Total number of entries in the current version.
+	// TODO(kolesnikovae): TOC size is version specific,
+	//   but at the moment, all versions have the same size: 1.
+	tocEntriesTotal = 1
 )
 
 // https://en.wikipedia.org/wiki/List_of_file_signatures
@@ -81,42 +111,83 @@ func (e *FormatError) Error() string {
 }
 
 type IndexFile struct {
-	Header Header
+	Header IndexHeader
 	TOC    TOC
 
-	// Version-specific parts.
+	// Version-specific.
 	PartitionHeaders PartitionHeaders
 
-	CRC uint32
+	CRC uint32 // Checksum of the index.
 }
 
-type Header struct {
-	Magic    [4]byte
-	Version  uint32
-	Reserved [8]byte // Reserved for future use.
+// NOTE(kolesnikovae): IndexHeader is rudimentary and is left for compatibility.
+
+type IndexHeader struct {
+	Magic   [4]byte
+	Version FormatVersion
+	_       [4]byte // Reserved for future use.
+	_       [4]byte // Reserved for future use.
 }
 
-const HeaderSize = int(unsafe.Sizeof(Header{}))
+const IndexHeaderSize = int(unsafe.Sizeof(IndexHeader{}))
 
-func (h *Header) MarshalBinary() ([]byte, error) {
-	b := make([]byte, HeaderSize)
+func (h *IndexHeader) MarshalBinary() []byte {
+	b := make([]byte, IndexHeaderSize)
 	copy(b[0:4], h.Magic[:])
-	binary.BigEndian.PutUint32(b[4:8], h.Version)
-	binary.BigEndian.PutUint32(b[HeaderSize-4:], crc32.Checksum(b[:HeaderSize-4], castagnoli))
-	return b, nil
+	binary.BigEndian.PutUint32(b[4:8], uint32(h.Version))
+	return b
 }
 
-func (h *Header) UnmarshalBinary(b []byte) error {
-	if len(b) != HeaderSize {
+func (h *IndexHeader) UnmarshalBinary(b []byte) error {
+	if len(b) != IndexHeaderSize {
 		return ErrInvalidSize
 	}
 	if copy(h.Magic[:], b[0:4]); !bytes.Equal(h.Magic[:], symdbMagic[:]) {
 		return ErrInvalidMagic
 	}
-	// Reserved space may change from version to version.
-	if h.Version = binary.BigEndian.Uint32(b[4:8]); h.Version >= unknownVersion {
+	h.Version = FormatVersion(binary.BigEndian.Uint32(b[4:8]))
+	if h.Version >= unknownVersion {
+		return ErrUnknownVersion
+	}
+	return nil
+}
+
+type Footer struct {
+	Magic       [4]byte
+	Version     FormatVersion
+	IndexOffset uint64  // Index header offset in the file.
+	_           [4]byte // Reserved for future use.
+	CRC         uint32  // CRC of the footer.
+}
+
+const FooterSize = int(unsafe.Sizeof(Footer{}))
+
+func (f *Footer) MarshalBinary() []byte {
+	b := make([]byte, FooterSize)
+	copy(b[0:4], f.Magic[:])
+	binary.BigEndian.PutUint32(b[4:8], uint32(f.Version))
+	binary.BigEndian.PutUint64(b[8:16], f.IndexOffset)
+	binary.BigEndian.PutUint32(b[16:20], 0)
+	binary.BigEndian.PutUint32(b[20:24], crc32.Checksum(b[0:20], castagnoli))
+	return b
+}
+
+func (f *Footer) UnmarshalBinary(b []byte) error {
+	if len(b) != FooterSize {
+		return ErrInvalidSize
+	}
+	if copy(f.Magic[:], b[0:4]); !bytes.Equal(f.Magic[:], symdbMagic[:]) {
+		return ErrInvalidMagic
+	}
+	f.Version = FormatVersion(binary.BigEndian.Uint32(b[4:8]))
+	if f.Version >= unknownVersion {
 		return ErrUnknownVersion
 	}
+	f.IndexOffset = binary.BigEndian.Uint64(b[8:16])
+	f.CRC = binary.BigEndian.Uint32(b[20:24])
+	if crc32.Checksum(b[0:20], castagnoli) != f.CRC {
+		return ErrInvalidCRC
+	}
 	return nil
 }
 
@@ -128,13 +199,15 @@ type TOC struct {
 	Entries []TOCEntry
 }
 
+// TOCEntry refers to a section within the index.
+// Offset is relative to the header offset.
 type TOCEntry struct {
 	Offset int64
 	Size   int64
 }
 
 func (toc *TOC) Size() int {
-	return tocEntrySize * tocEntries
+	return tocEntrySize * tocEntriesTotal
 }
 
 func (toc *TOC) MarshalBinary() ([]byte, error) {
@@ -171,7 +244,8 @@ func (h *TOCEntry) unmarshal(b []byte) {
 type PartitionHeaders []*PartitionHeader
 
 type PartitionHeader struct {
-	Partition   uint64
+	Partition uint64
+	// TODO(kolesnikovae): Switch to SymbolsBlock encoding.
 	Stacktraces []StacktraceBlockHeader
 	V2          *PartitionHeaderV2
 	V3          *PartitionHeaderV3
@@ -192,7 +266,7 @@ func (h *PartitionHeaders) WriteTo(dst io.Writer) (_ int64, err error) {
 	w.write(buf)
 	for _, p := range *h {
 		if p.V3 == nil {
-			return 0, fmt.Errorf("v2 format is not supported")
+			return 0, fmt.Errorf("only v3 format is supported")
 		}
 		buf = slices.GrowLen(buf, int(p.Size()))
 		p.marshal(buf)
@@ -226,7 +300,7 @@ func (h *PartitionHeaders) UnmarshalV2(b []byte) error { return h.unmarshal(b, F
 
 func (h *PartitionHeaders) UnmarshalV3(b []byte) error { return h.unmarshal(b, FormatV3) }
 
-func (h *PartitionHeaders) unmarshal(b []byte, version int) error {
+func (h *PartitionHeaders) unmarshal(b []byte, version FormatVersion) error {
 	partitions := binary.BigEndian.Uint32(b[0:4])
 	b = b[4:]
 	*h = make(PartitionHeaders, partitions)
@@ -255,7 +329,7 @@ func (h *PartitionHeader) marshal(buf []byte) {
 	marshalSymbolsBlockReferences(buf[n:], h.V3.Strings)
 }
 
-func (h *PartitionHeader) unmarshal(buf []byte, version int) (err error) {
+func (h *PartitionHeader) unmarshal(buf []byte, version FormatVersion) (err error) {
 	h.Partition = binary.BigEndian.Uint64(buf[0:8])
 	h.Stacktraces = make([]StacktraceBlockHeader, int(binary.BigEndian.Uint32(buf[8:12])))
 	switch version {
@@ -461,7 +535,7 @@ func (r *RowRangeReference) unmarshal(b []byte) {
 	r.Rows = binary.BigEndian.Uint32(b[8:12])
 }
 
-func ReadIndexFile(b []byte) (f IndexFile, err error) {
+func OpenIndex(b []byte) (f IndexFile, err error) {
 	s := len(b)
 	if !f.assertSizeIsValid(b) {
 		return f, ErrInvalidSize
@@ -470,10 +544,10 @@ func ReadIndexFile(b []byte) (f IndexFile, err error) {
 	if f.CRC != crc32.Checksum(b[:s+indexChecksumOffset], castagnoli) {
 		return f, ErrInvalidCRC
 	}
-	if err = f.Header.UnmarshalBinary(b[:HeaderSize]); err != nil {
+	if err = f.Header.UnmarshalBinary(b[:IndexHeaderSize]); err != nil {
 		return f, fmt.Errorf("unmarshal header: %w", err)
 	}
-	if err = f.TOC.UnmarshalBinary(b[HeaderSize:f.dataOffset()]); err != nil {
+	if err = f.TOC.UnmarshalBinary(b[IndexHeaderSize:f.dataOffset()]); err != nil {
 		return f, fmt.Errorf("unmarshal table of contents: %w", err)
 	}
 
@@ -507,22 +581,21 @@ func ReadIndexFile(b []byte) (f IndexFile, err error) {
 }
 
 func (f *IndexFile) assertSizeIsValid(b []byte) bool {
-	return len(b) >= HeaderSize+f.TOC.Size()+checksumSize
+	return len(b) >= IndexHeaderSize+f.TOC.Size()+checksumSize
 }
 
 func (f *IndexFile) dataOffset() int {
-	return HeaderSize + f.TOC.Size()
+	return IndexHeaderSize + f.TOC.Size()
 }
 
 func (f *IndexFile) WriteTo(dst io.Writer) (n int64, err error) {
 	checksum := crc32.New(castagnoli)
 	w := withWriterOffset(io.MultiWriter(dst, checksum), 0)
-	headerBytes, _ := f.Header.MarshalBinary()
-	if _, err = w.Write(headerBytes); err != nil {
+	if _, err = w.Write(f.Header.MarshalBinary()); err != nil {
 		return w.offset, fmt.Errorf("header write: %w", err)
 	}
 
-	toc := TOC{Entries: make([]TOCEntry, tocEntries)}
+	toc := TOC{Entries: make([]TOCEntry, tocEntriesTotal)}
 	toc.Entries[tocEntryPartitionHeaders] = TOCEntry{
 		Offset: int64(f.dataOffset()),
 		Size:   f.PartitionHeaders.Size(),
@@ -650,7 +723,7 @@ func (d *symbolsDecoder[T]) decode(dst []T, r io.Reader) error {
 		return nil
 	}
 	if len(dst) < int(d.h.Length) {
-		return fmt.Errorf("%w: buffer too short", ErrInvalidSize)
+		return fmt.Errorf("decoder buffer too short (format %d)", d.h.Format)
 	}
 	blocks := int((d.h.Length + d.h.BlockSize - 1) / d.h.BlockSize)
 	for i := 0; i < blocks; i++ {
@@ -658,8 +731,37 @@ func (d *symbolsDecoder[T]) decode(dst []T, r io.Reader) error {
 		hi := math.Min(lo+int(d.h.BlockSize), int(d.h.Length))
 		block := dst[lo:hi]
 		if err := d.d.decode(r, block); err != nil {
-			return err
+			return fmt.Errorf("malformed block (format %d): %w", d.h.Format, err)
 		}
 	}
 	return nil
 }
+
+// NOTE(kolesnikovae): delta.BinaryPackedEncoding may
+// silently fail on malformed data, producing empty slice.
+
+func decodeBinaryPackedInt32(dst []int32, data []byte, length int) ([]int32, error) {
+	var enc delta.BinaryPackedEncoding
+	var err error
+	dst, err = enc.DecodeInt32(dst, data)
+	if err != nil {
+		return dst, err
+	}
+	if len(dst) != length {
+		return dst, fmt.Errorf("%w: binary packed: expected %d, got %d", ErrInvalidSize, length, len(dst))
+	}
+	return dst, nil
+}
+
+func decodeBinaryPackedInt64(dst []int64, data []byte, length int) ([]int64, error) {
+	var enc delta.BinaryPackedEncoding
+	var err error
+	dst, err = enc.DecodeInt64(dst, data)
+	if err != nil {
+		return dst, err
+	}
+	if len(dst) != length {
+		return dst, fmt.Errorf("%w: binary packed: expected %d, got %d", ErrInvalidSize, length, len(dst))
+	}
+	return dst, nil
+}
diff --git a/pkg/phlaredb/symdb/functions.go b/pkg/phlaredb/symdb/functions.go
index 71696339c2..9fbbbd8c74 100644
--- a/pkg/phlaredb/symdb/functions.go
+++ b/pkg/phlaredb/symdb/functions.go
@@ -154,13 +154,12 @@ func (d *functionsBlockDecoder) decode(r io.Reader, functions []v1.InMemoryFunct
 		return fmt.Errorf("functions buffer is too short")
 	}
 
-	var enc delta.BinaryPackedEncoding
 	d.ints = slices.GrowLen(d.ints, int(d.header.FunctionsLen))
 	d.buf = slices.GrowLen(d.buf, int(d.header.NameSize))
 	if _, err = io.ReadFull(r, d.buf); err != nil {
 		return err
 	}
-	d.ints, err = enc.DecodeInt32(d.ints, d.buf)
+	d.ints, err = decodeBinaryPackedInt32(d.ints, d.buf, int(d.header.FunctionsLen))
 	if err != nil {
 		return err
 	}
@@ -172,7 +171,7 @@ func (d *functionsBlockDecoder) decode(r io.Reader, functions []v1.InMemoryFunct
 	if _, err = io.ReadFull(r, d.buf); err != nil {
 		return err
 	}
-	d.ints, err = enc.DecodeInt32(d.ints, d.buf)
+	d.ints, err = decodeBinaryPackedInt32(d.ints, d.buf, int(d.header.FunctionsLen))
 	if err != nil {
 		return err
 	}
@@ -184,7 +183,7 @@ func (d *functionsBlockDecoder) decode(r io.Reader, functions []v1.InMemoryFunct
 	if _, err = io.ReadFull(r, d.buf); err != nil {
 		return err
 	}
-	d.ints, err = enc.DecodeInt32(d.ints, d.buf)
+	d.ints, err = decodeBinaryPackedInt32(d.ints, d.buf, int(d.header.FunctionsLen))
 	if err != nil {
 		return err
 	}
@@ -196,7 +195,7 @@ func (d *functionsBlockDecoder) decode(r io.Reader, functions []v1.InMemoryFunct
 	if _, err = io.ReadFull(r, d.buf); err != nil {
 		return err
 	}
-	d.ints, err = enc.DecodeInt32(d.ints, d.buf)
+	d.ints, err = decodeBinaryPackedInt32(d.ints, d.buf, int(d.header.FunctionsLen))
 	if err != nil {
 		return err
 	}
diff --git a/pkg/phlaredb/symdb/locations.go b/pkg/phlaredb/symdb/locations.go
index 653fbae4a8..74b7a4216b 100644
--- a/pkg/phlaredb/symdb/locations.go
+++ b/pkg/phlaredb/symdb/locations.go
@@ -198,13 +198,12 @@ func (d *locationsBlockDecoder) decode(r io.Reader, locations []v1.InMemoryLocat
 		return fmt.Errorf("locations buffer: %w", ErrInvalidSize)
 	}
 
-	var enc delta.BinaryPackedEncoding
 	// First we decode mapping_id and assign them to locations.
 	d.buf = slices.GrowLen(d.buf, int(d.header.MappingSize))
 	if _, err = io.ReadFull(r, d.buf); err != nil {
 		return err
 	}
-	d.mappings, err = enc.DecodeInt32(d.mappings, d.buf)
+	d.mappings, err = decodeBinaryPackedInt32(d.mappings, d.buf, int(d.header.LocationsLen))
 	if err != nil {
 		return err
 	}
@@ -224,8 +223,8 @@ func (d *locationsBlockDecoder) decode(r io.Reader, locations []v1.InMemoryLocat
 	if _, err = io.ReadFull(r, d.buf); err != nil {
 		return err
 	}
-	d.lines = slices.GrowLen(d.lines, int(d.header.LinesLen))
-	d.lines, err = enc.DecodeInt32(d.lines, d.buf)
+	// Lines are encoded as pairs of uint32 (function_id and line number).
+	d.lines, err = decodeBinaryPackedInt32(d.lines, d.buf, int(d.header.LinesLen)*2)
 	if err != nil {
 		return err
 	}
@@ -234,9 +233,15 @@ func (d *locationsBlockDecoder) decode(r io.Reader, locations []v1.InMemoryLocat
 	// In most cases we end up here.
 	if d.header.AddrSize == 0 && d.header.IsFoldedSize == 0 {
 		var o int // Offset within the lines slice.
+		// In case if the block is malformed, an invalid
+		// line count may cause an out-of-bounds panic.
+		maxLines := len(lines)
 		for i := 0; i < len(locations); i++ {
 			locations[i].MappingId = uint32(d.mappings[i])
 			n := o + int(d.lineCount[i])
+			if n > maxLines {
+				return fmt.Errorf("%w: location lines out of bounds", ErrInvalidSize)
+			}
 			locations[i].Line = lines[o:n]
 			o = n
 		}
@@ -249,8 +254,7 @@ func (d *locationsBlockDecoder) decode(r io.Reader, locations []v1.InMemoryLocat
 		if _, err = io.ReadFull(r, d.buf); err != nil {
 			return err
 		}
-		d.address = slices.GrowLen(d.address, int(d.header.LocationsLen))
-		d.address, err = enc.DecodeInt64(d.address, d.buf)
+		d.address, err = decodeBinaryPackedInt64(d.address, d.buf, int(d.header.LocationsLen))
 		if err != nil {
 			return err
 		}
diff --git a/pkg/phlaredb/symdb/mappings.go b/pkg/phlaredb/symdb/mappings.go
index 7eede61b4f..55eb7beefb 100644
--- a/pkg/phlaredb/symdb/mappings.go
+++ b/pkg/phlaredb/symdb/mappings.go
@@ -203,14 +203,13 @@ func (d *mappingsBlockDecoder) decode(r io.Reader, mappings []v1.InMemoryMapping
 		return fmt.Errorf("mappings buffer is too short")
 	}
 
-	var enc delta.BinaryPackedEncoding
 	d.ints = slices.GrowLen(d.ints, int(d.header.MappingsLen))
 
 	d.buf = slices.GrowLen(d.buf, int(d.header.FileNameSize))
 	if _, err = io.ReadFull(r, d.buf); err != nil {
 		return err
 	}
-	d.ints, err = enc.DecodeInt32(d.ints, d.buf)
+	d.ints, err = decodeBinaryPackedInt32(d.ints, d.buf, int(d.header.MappingsLen))
 	if err != nil {
 		return err
 	}
@@ -222,7 +221,7 @@ func (d *mappingsBlockDecoder) decode(r io.Reader, mappings []v1.InMemoryMapping
 	if _, err = io.ReadFull(r, d.buf); err != nil {
 		return err
 	}
-	d.ints, err = enc.DecodeInt32(d.ints, d.buf)
+	d.ints, err = decodeBinaryPackedInt32(d.ints, d.buf, int(d.header.MappingsLen))
 	if err != nil {
 		return err
 	}
@@ -234,7 +233,7 @@ func (d *mappingsBlockDecoder) decode(r io.Reader, mappings []v1.InMemoryMapping
 	if _, err = io.ReadFull(r, d.buf); err != nil {
 		return err
 	}
-	d.ints, err = enc.DecodeInt32(d.ints, d.buf)
+	d.ints, err = decodeBinaryPackedInt32(d.ints, d.buf, int(d.header.MappingsLen))
 	if err != nil {
 		return err
 	}
@@ -246,12 +245,11 @@ func (d *mappingsBlockDecoder) decode(r io.Reader, mappings []v1.InMemoryMapping
 	}
 
 	if d.header.MemoryStartSize > 0 {
-		d.ints64 = slices.GrowLen(d.ints64, int(d.header.MappingsLen))
 		d.buf = slices.GrowLen(d.buf, int(d.header.MemoryStartSize))
 		if _, err = io.ReadFull(r, d.buf); err != nil {
 			return err
 		}
-		d.ints64, err = enc.DecodeInt64(d.ints64, d.buf)
+		d.ints64, err = decodeBinaryPackedInt64(d.ints64, d.buf, int(d.header.MappingsLen))
 		if err != nil {
 			return err
 		}
@@ -260,12 +258,11 @@ func (d *mappingsBlockDecoder) decode(r io.Reader, mappings []v1.InMemoryMapping
 		}
 	}
 	if d.header.MemoryLimitSize > 0 {
-		d.ints64 = slices.GrowLen(d.ints64, int(d.header.MappingsLen))
 		d.buf = slices.GrowLen(d.buf, int(d.header.MemoryLimitSize))
 		if _, err = io.ReadFull(r, d.buf); err != nil {
 			return err
 		}
-		d.ints64, err = enc.DecodeInt64(d.ints64, d.buf)
+		d.ints64, err = decodeBinaryPackedInt64(d.ints64, d.buf, int(d.header.MappingsLen))
 		if err != nil {
 			return err
 		}
@@ -274,12 +271,11 @@ func (d *mappingsBlockDecoder) decode(r io.Reader, mappings []v1.InMemoryMapping
 		}
 	}
 	if d.header.FileOffsetSize > 0 {
-		d.ints64 = slices.GrowLen(d.ints64, int(d.header.MappingsLen))
 		d.buf = slices.GrowLen(d.buf, int(d.header.FileOffsetSize))
 		if _, err = io.ReadFull(r, d.buf); err != nil {
 			return err
 		}
-		d.ints64, err = enc.DecodeInt64(d.ints64, d.buf)
+		d.ints64, err = decodeBinaryPackedInt64(d.ints64, d.buf, int(d.header.MappingsLen))
 		if err != nil {
 			return err
 		}
diff --git a/pkg/phlaredb/symdb/stacktrace_tree.go b/pkg/phlaredb/symdb/stacktrace_tree.go
index d4cc9ef749..de726917ec 100644
--- a/pkg/phlaredb/symdb/stacktrace_tree.go
+++ b/pkg/phlaredb/symdb/stacktrace_tree.go
@@ -260,6 +260,9 @@ func (d *treeDecoder) unmarshal(t *parentPointerTree, r io.Reader) error {
 			}
 			eof = true
 		}
+		// len(b) is always >= b.Buffered(),
+		// therefore Discard does not invalidate
+		// the buffer.
 		if _, err = buf.Discard(len(b)); err != nil {
 			return err
 		}
@@ -272,12 +275,14 @@ func (d *treeDecoder) unmarshal(t *parentPointerTree, r io.Reader) error {
 			xn := len(t.nodes) - np // remaining nodes
 			// Note that g should always be a multiple of 4.
 			g = g[:math.Min((xn+xn%2)*2, d.groupBuffer)]
-			var gp int
-
+			if len(g)%4 != 0 {
+				return io.ErrUnexpectedEOF
+			}
 			// Check if there is a remainder. If this is the case,
 			// decode the group and advance gp.
+			var gp int
 			if len(rb) > 0 {
-				// It's expected that r contains a single complete group.
+				// It's expected that rb contains a single complete group.
 				m := groupvarint.BytesUsed[rb[0]] - len(rb)
 				if m >= (len(b) + len(rb)) {
 					return io.ErrUnexpectedEOF
@@ -295,12 +300,15 @@ func (d *treeDecoder) unmarshal(t *parentPointerTree, r io.Reader) error {
 			// Re-fill g.
 			gi, n, rn := decodeU32Groups(g[gp:], b[read:])
 			gp += gi
-			read += n + rn // Mark remainder bytes as read, we copy them.
+			read += n + rn // Mark the remaining bytes as read; we copy them.
 			if rn > 0 {
 				// If there is a remainder, it is copied and decoded on
 				// the next Peek. This should not be possible with eof.
 				rb = append(rb, b[len(b)-rn:]...)
 			}
+			if len(g) == 0 && len(rb) == 0 {
+				break
+			}
 
 			// g is full, or no more data in buf.
 			for i := 0; i < len(g[:gp])-1; i += 2 {
diff --git a/pkg/phlaredb/symdb/stacktrace_tree_test.go b/pkg/phlaredb/symdb/stacktrace_tree_test.go
index 83b2dd09f2..55eef46a2f 100644
--- a/pkg/phlaredb/symdb/stacktrace_tree_test.go
+++ b/pkg/phlaredb/symdb/stacktrace_tree_test.go
@@ -88,7 +88,6 @@ func Test_stacktrace_tree_encoding_group(t *testing.T) {
 }
 
 func Test_stacktrace_tree_encoding_rand(t *testing.T) {
-	// TODO: Fuzzing.
 	nodes := make([]node, 1<<20)
 	for i := range nodes {
 		nodes[i] = node{
diff --git a/pkg/phlaredb/symdb/symdb.go b/pkg/phlaredb/symdb/symdb.go
index 588296c5a2..c78b71fa09 100644
--- a/pkg/phlaredb/symdb/symdb.go
+++ b/pkg/phlaredb/symdb/symdb.go
@@ -2,7 +2,6 @@ package symdb
 
 import (
 	"context"
-	"fmt"
 	"sort"
 	"sync"
 	"time"
@@ -78,17 +77,12 @@ type SymDB struct {
 type Config struct {
 	Dir         string
 	Stacktraces StacktracesConfig
-	Parquet     ParquetConfig
 }
 
 type StacktracesConfig struct {
 	MaxNodesPerChunk uint32
 }
 
-type ParquetConfig struct {
-	MaxBufferRowCount int
-}
-
 type MemoryStats struct {
 	StacktracesSize uint64
 	LocationsSize   uint64
@@ -109,16 +103,12 @@ const statsUpdateInterval = 5 * time.Second
 
 func DefaultConfig() *Config {
 	return &Config{
-		Dir: DefaultDirName,
 		Stacktraces: StacktracesConfig{
 			// At the moment chunks are loaded in memory at once.
 			// Due to the fact that chunking causes some duplication,
 			// it's better to keep them large.
 			MaxNodesPerChunk: 4 << 20,
 		},
-		Parquet: ParquetConfig{
-			MaxBufferRowCount: 100 << 10,
-		},
 	}
 }
 
@@ -127,11 +117,6 @@ func (c *Config) WithDirectory(dir string) *Config {
 	return c
 }
 
-func (c *Config) WithParquetConfig(pc ParquetConfig) *Config {
-	c.Parquet = pc
-	return c
-}
-
 func NewSymDB(c *Config) *SymDB {
 	if c == nil {
 		c = DefaultConfig()
@@ -262,13 +247,7 @@ func (s *SymDB) Flush() error {
 	sort.Slice(partitions, func(i, j int) bool {
 		return partitions[i].header.Partition < partitions[j].header.Partition
 	})
-	if err := s.writer.createDir(); err != nil {
-		return err
-	}
-	if err := s.writer.writePartitions(partitions); err != nil {
-		return fmt.Errorf("writing partitions: %w", err)
-	}
-	return s.writer.Flush()
+	return s.writer.writePartitions(partitions)
 }
 
 func (s *SymDB) Files() []block.File {
diff --git a/pkg/phlaredb/symdb/symdb_test.go b/pkg/phlaredb/symdb/symdb_test.go
index dccb731650..c5bb7d9576 100644
--- a/pkg/phlaredb/symdb/symdb_test.go
+++ b/pkg/phlaredb/symdb/symdb_test.go
@@ -53,9 +53,6 @@ func (s *memSuite) init() {
 			Stacktraces: StacktracesConfig{
 				MaxNodesPerChunk: 1 << 10,
 			},
-			Parquet: ParquetConfig{
-				MaxBufferRowCount: 512,
-			},
 		}
 	}
 	if s.db == nil {
@@ -155,9 +152,6 @@ func Test_Stats(t *testing.T) {
 			Stacktraces: StacktracesConfig{
 				MaxNodesPerChunk: 4 << 20,
 			},
-			Parquet: ParquetConfig{
-				MaxBufferRowCount: 100 << 10,
-			},
 		},
 	}
 
diff --git a/pkg/phlaredb/symdb/testdata/symbols/v3/index.symdb b/pkg/phlaredb/symdb/testdata/symbols/v3/index.symdb
deleted file mode 100644
index 7eb2bf119673f22c8a86ea5921c1272fec00df68..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 704
zcmXS}%r#_SU|<Gf26J|QTM*5_z%&O!Gbn&KP&SAHt7Bl85C;+CsYQ}wgs>Uefy&-M
z*epoW3=HhyvwlMa=G=zRJk4+gj0{ly41a+7UqZxD^|#3=PlbrDgxb%%0Ir`2rk@Gu
zMn)@a`ZrGq?uCdqtz}@~-PpWD>JZR-=NK5k_Q321DyadQ!6XsSt@;JX0kXmJKpF<>
zq=90S#Lk&20PXq(l}8RIpu!sF#g?wX@MPKzl}Gjr$S#O^jJ7clQ{iC=l7qU1h_Gbz
zyb0Cc455K~NeN5FFsS|Tuw(?rIMjY3!je&cHQumf)C8J^CoCDYCGm&lnt!;$QcU%*
FHvkTYIhg<e

diff --git a/pkg/phlaredb/symdb/testdata/symbols/v3/data.symdb b/pkg/phlaredb/symdb/testdata/symbols/v3/symbols.symdb
similarity index 99%
rename from pkg/phlaredb/symdb/testdata/symbols/v3/data.symdb
rename to pkg/phlaredb/symdb/testdata/symbols/v3/symbols.symdb
index bc22ef6213a164690280fbc5bcf7be432f446bc5..af10787561f7fe081804ad6e8aebb49e9fe54269 100644
GIT binary patch
delta 745
zcmeyfnf1y})`l&Ne<JIPD{~DQ7#Ns=7zr@VfpQdpG!v2pRNaI)h!{^Tk{lz1&BzWk
z?hS;^f+Wqrz#cy9H$-60Z3xX%-wap5$N)8f;SbP&mk@DO1KMPir$WS6LLI=o0Ir`2
zrk@GuNJcAc`ZrGq?uCdqtz}@~-PpXOUg{9ggXb6+z!t$A08~>0G=xbao?G<`kOO3c
z<$*K|)JX%yCW)OhRRCJ{3o4HsQb2_@%!@5uZGq<OhRP%R24ojALp{hmM%x$=#lQd$
zPLL$jF+>C>qvuVqb_Ski2o2OrWN3ozXJ8D2IshJ=jKEljI)I4aWYk{`2~HfL$*2i5
n3u|b`LLAJfEr~xk*Zjj3oQ_A{$f6`-#z<iLVPL3zv3MH*u#rdI

delta 11
Scmcbzll9ML)`l&Ne<A@T6$Q}%


From 3d5d9b8065dc2b8172a378e3c71704759b131ab8 Mon Sep 17 00:00:00 2001
From: Anton Kolesnikov <anton.e.kolesnikov@gmail.com>
Date: Sat, 27 Apr 2024 17:43:19 +0800
Subject: [PATCH 24/36] go mod

---
 examples/golang-pgo/go.work.sum | 105 +-------------------------------
 1 file changed, 2 insertions(+), 103 deletions(-)

diff --git a/examples/golang-pgo/go.work.sum b/examples/golang-pgo/go.work.sum
index d6b3c94a30..17fedccdc6 100644
--- a/examples/golang-pgo/go.work.sum
+++ b/examples/golang-pgo/go.work.sum
@@ -1,132 +1,31 @@
 cloud.google.com/go/compute v1.23.0 h1:tP41Zoavr8ptEqaW6j+LQOnyBBhO7OkOMAGrgLopTwY=
-cloud.google.com/go/compute v1.23.0/go.mod h1:4tCnrn48xsqlwSAiLf1HXMQk8CONslYbdiEZc9FEIbM=
 cloud.google.com/go/compute/metadata v0.2.3 h1:mg4jlk7mCAj6xXp9UJ4fjI9VUI5rubuGBW5aJ7UnBMY=
-cloud.google.com/go/compute/metadata v0.2.3/go.mod h1:VAV5nSsACxMJvgaAuX6Pk2AawlZn8kiOGuCv6gTkwuA=
-github.com/agoda-com/opentelemetry-logs-go v0.4.1 h1:PWGqIxkEEg4HIjnHsHmNa+yGu0lhxHz4XPGKeT4o6T0=
-github.com/agoda-com/opentelemetry-logs-go v0.4.1/go.mod h1:CeDuVaK9yCWN+8UjOW8AciYJE0rl7K/mw4ejBntGYkc=
 github.com/antihax/optional v1.0.0 h1:xK2lYat7ZLaVVcIuj82J8kIro4V6kDe0AUDFboUCwcg=
-github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY=
-github.com/cenkalti/backoff/v4 v4.2.1 h1:y4OZtCnogmCPw98Zjyt5a6+QwPLGkiQsYW5oUqylYbM=
-github.com/cenkalti/backoff/v4 v4.2.1/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=
 github.com/census-instrumentation/opencensus-proto v0.4.1 h1:iKLQ0xPNFxR/2hzXZMrBo8f1j86j5WHzznCCQxV/b8g=
-github.com/census-instrumentation/opencensus-proto v0.4.1/go.mod h1:4T9NM4+4Vw91VeyqjLS6ao50K5bOcLKN6Q42XnYaRYw=
 github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44=
-github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
 github.com/cncf/udpa/go v0.0.0-20220112060539-c52dc94e7fbe h1:QQ3GSy+MqSHxm/d8nCtnAiZdYFd45cYZPs8vOOIYKfk=
-github.com/cncf/udpa/go v0.0.0-20220112060539-c52dc94e7fbe/go.mod h1:6pvJx4me5XPnfI9Z40ddWsdw2W/uZgQLFXToKeRcDiI=
 github.com/cncf/xds/go v0.0.0-20230607035331-e9ce68804cb4 h1:/inchEIKaYC1Akx+H+gqO04wryn5h75LSazbRlnya1k=
-github.com/cncf/xds/go v0.0.0-20230607035331-e9ce68804cb4/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs=
-github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
-github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
-github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
-github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/envoyproxy/go-control-plane v0.11.1 h1:wSUXTlLfiAQRWs2F+p+EKOY9rUyis1MyGqJ2DIk5HpM=
-github.com/envoyproxy/go-control-plane v0.11.1/go.mod h1:uhMcXKCQMEJHiAb0w+YGefQLaTEw+YhGluxZkrTmD0g=
 github.com/envoyproxy/protoc-gen-validate v1.0.2 h1:QkIBuU5k+x7/QXPvPPnWXWlCdaBFApVqftFV6k087DA=
-github.com/envoyproxy/protoc-gen-validate v1.0.2/go.mod h1:GpiZQP3dDbg4JouG/NNS7QWXpgx6x8QiMKdmN72jogE=
-github.com/felixge/httpsnoop v1.0.3 h1:s/nj+GCswXYzN5v2DpNMuMQYe+0DDwt5WVCU6CWBdXk=
-github.com/felixge/httpsnoop v1.0.3/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
-github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
-github.com/go-logr/logr v1.3.0 h1:2y3SDp0ZXuc6/cjLSZ+Q3ir+QB9T/iG5yYRXqsagWSY=
-github.com/go-logr/logr v1.3.0/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
-github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
-github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
 github.com/golang/glog v1.1.2 h1:DVjP2PbBOzHyzA+dn3WhHIq4NdVu3Q+pvivFICf/7fo=
-github.com/golang/glog v1.1.2/go.mod h1:zR+okUeTbrL6EL3xHUDxZuEtGv04p5shwip1+mL/rLQ=
-github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
-github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg=
-github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
-github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
-github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
-github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
 github.com/google/uuid v1.3.1 h1:KjJaJ9iWZ3jOFZIf1Lqf4laDRCasjl0BCmnEGxkdLb4=
-github.com/google/uuid v1.3.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
-github.com/grafana/otel-profiling-go v0.5.1 h1:stVPKAFZSa7eGiqbYuG25VcqYksR6iWvF3YH66t4qL8=
-github.com/grafana/otel-profiling-go v0.5.1/go.mod h1:ftN/t5A/4gQI19/8MoWurBEtC6gFw8Dns1sJZ9W4Tls=
-github.com/grafana/pyroscope-go v1.1.1 h1:PQoUU9oWtO3ve/fgIiklYuGilvsm8qaGhlY4Vw6MAcQ=
-github.com/grafana/pyroscope-go v1.1.1/go.mod h1:Mw26jU7jsL/KStNSGGuuVYdUq7Qghem5P8aXYXSXG88=
-github.com/grafana/pyroscope-go/godeltaprof v0.1.6 h1:nEdZ8louGAplSvIJi1HVp7kWvFvdiiYg3COLlTwJiFo=
-github.com/grafana/pyroscope-go/godeltaprof v0.1.6/go.mod h1:Tk376Nbldo4Cha9RgiU7ik8WKFkNpfds98aUzS8omLE=
-github.com/grpc-ecosystem/grpc-gateway/v2 v2.18.1 h1:6UKoz5ujsI55KNpsJH3UwCq3T8kKbZwNZBNPuTTje8U=
-github.com/grpc-ecosystem/grpc-gateway/v2 v2.18.1/go.mod h1:YvJ2f6MplWDhfxiUC3KpyTy76kYUZA4W3pTv/wdKQ9Y=
-github.com/klauspost/compress v1.17.3 h1:qkRjuerhUU1EmXLYGkSH6EZL+vPSxIrYjLNAK4slzwA=
-github.com/klauspost/compress v1.17.3/go.mod h1:/dCuZOvVtNoHsyb+cuJD3itjs3NbnF6KH9zAO4BDxPM=
-github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
-github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
-github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
-github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
-github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
-github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
-github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
 github.com/rogpeppe/fastuuid v1.2.0 h1:Ppwyp6VYCF1nvBTXL3trRso7mXMlRrw9ooo375wvi2s=
-github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ=
-github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M=
-github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA=
-github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
-github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
 github.com/stretchr/objx v0.5.0 h1:1zr/of2m5FGMsad5YfcqgdqdWrIhu+EBEJRhR1U7z/c=
-github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
-github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
-github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
-github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
-github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
 go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.42.0 h1:pginetY7+onl4qN1vl0xW/V/v6OBZ0vVdH+esuJgvmM=
 go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.42.0/go.mod h1:XiYsayHc36K3EByOO6nbAXnAWbrUxdjUROCEeeROOH8=
-go.opentelemetry.io/otel v1.21.0 h1:hzLeKBZEL7Okw2mGzZ0cc4k/A7Fta0uoPgaJCr8fsFc=
-go.opentelemetry.io/otel v1.21.0/go.mod h1:QZzNPQPm1zLX4gZK4cMi+71eaorMSGT3A4znnUvNNEo=
-go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.20.0 h1:DeFD0VgTZ+Cj6hxravYYZE2W4GlneVH81iAOPjZkzk8=
-go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.20.0/go.mod h1:GijYcYmNpX1KazD5JmWGsi4P7dDTTTnfv1UbGn84MnU=
-go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.20.0 h1:CsBiKCiQPdSjS+MlRiqeTI9JDDpSuk0Hb6QTRfwer8k=
-go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.20.0/go.mod h1:CMJYNAfooOwSZSAmAeMUV1M+TXld3BiK++z9fqIm2xk=
-go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.20.0 h1:4s9HxB4azeeQkhY0GE5wZlMj4/pz8tE5gx2OQpGUw58=
-go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.20.0/go.mod h1:djVA3TUJ2fSdMX0JE5XxFBOaZzprElJoP7fD4vnV2SU=
-go.opentelemetry.io/otel/metric v1.21.0 h1:tlYWfeo+Bocx5kLEloTjbcDwBuELRrIFxwdQ36PlJu4=
-go.opentelemetry.io/otel/metric v1.21.0/go.mod h1:o1p3CA8nNHW8j5yuQLdc1eeqEaPfzug24uvsyIEJRWM=
-go.opentelemetry.io/otel/sdk v1.21.0 h1:FTt8qirL1EysG6sTQRZ5TokkU8d0ugCj8htOgThZXQ8=
-go.opentelemetry.io/otel/sdk v1.21.0/go.mod h1:Nna6Yv7PWTdgJHVRD9hIYywQBRx7pbox6nwBnZIxl/E=
-go.opentelemetry.io/otel/trace v1.21.0 h1:WD9i5gzvoUPuXIXH24ZNBudiarZDKuekPqi/E8fpfLc=
-go.opentelemetry.io/otel/trace v1.21.0/go.mod h1:LGbsEB0f9LGjN+OZaQQ26sohbOmiMR+BaslueVtS/qQ=
-go.opentelemetry.io/proto/otlp v1.0.0 h1:T0TX0tmXU8a3CbNXzEKGeU5mIVOdf0oykP+u2lIVU/I=
-go.opentelemetry.io/proto/otlp v1.0.0/go.mod h1:Sy6pihPLfYHkr3NkUbEhGHFhINUSI/v80hjKIs5JXpM=
-go.uber.org/goleak v1.2.1 h1:NBol2c7O1ZokfZ0LEU9K6Whx/KnwvepVetCUhtKja4A=
-go.uber.org/goleak v1.2.1/go.mod h1:qlT2yGI9QafXHhZZLxlSuNsMw3FFLxBr+tBRlmO1xH4=
 golang.org/x/crypto v0.15.0 h1:frVn1TEaCEaZcn3Tmd7Y2b5KKPaZ+I32Q2OA3kYp5TA=
 golang.org/x/crypto v0.15.0/go.mod h1:4ChreQoLWfG3xLDer1WdlH5NdlQ3+mwnQq1YTKY+72g=
+golang.org/x/crypto v0.21.0 h1:X31++rzVUdKhX5sWmSOFZxx8UW/ldWx55cbf08iNAMA=
 golang.org/x/mod v0.8.0 h1:LUYupSeNrTNCGzR/hVBk2NHZO4hXcVaW1k4Qx7rjPx8=
-golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
 golang.org/x/net v0.18.0 h1:mIYleuAkSbHh0tCv7RvjL3F6ZVbLjq4+R7zbOn3Kokg=
 golang.org/x/net v0.18.0/go.mod h1:/czyP5RqHAH4odGYxBJ1qz0+CE5WZ+2j1YgoEo8F2jQ=
 golang.org/x/oauth2 v0.13.0 h1:jDDenyj+WgFtmV3zYVoi8aE2BwtXFLWOA67ZfNWftiY=
-golang.org/x/oauth2 v0.13.0/go.mod h1:/JMhi4ZRXAf4HG9LiNmxvk+45+96RUlVThiH8FzNBn0=
 golang.org/x/sync v0.3.0 h1:ftCYgMx6zT/asHUrPw8BLLscYtGznsLAnjq5RH9P66E=
-golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y=
 golang.org/x/sys v0.14.0 h1:Vz7Qs629MkJkGyHxUlRHizWJRG2j8fbQKjELVSNhy7Q=
-golang.org/x/sys v0.14.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
 golang.org/x/term v0.14.0 h1:LGK9IlZ8T9jvdy6cTdfKUCltatMFOehAQo9SRC46UQ8=
 golang.org/x/term v0.14.0/go.mod h1:TySc+nGkYR6qt8km8wUhuFRTVSMIX3XPR58y2lC8vww=
-golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ=
-golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
+golang.org/x/term v0.18.0 h1:FcHjZXDMxI8mM3nwhX9HlKop4C0YQvCVCdwYl2wOtE8=
 golang.org/x/tools v0.6.0 h1:BOw41kyTf3PuCW1pVQf8+Cyg8pMlkYB1oo9iJ6D/lKM=
-golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
-golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 google.golang.org/appengine v1.6.7 h1:FZR1q0exgwxzPzp/aF+VccGrSfxfPpkBqjIIEq3ru6c=
-google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=
-google.golang.org/genproto v0.0.0-20231030173426-d783a09b4405 h1:I6WNifs6pF9tNdSob2W24JtyxIYjzFB9qDlpUC76q+U=
-google.golang.org/genproto v0.0.0-20231030173426-d783a09b4405/go.mod h1:3WDQMjmJk36UQhjQ89emUzb1mdaHcPeeAh4SCBKznB4=
-google.golang.org/genproto/googleapis/api v0.0.0-20231106174013-bbf56f31fb17 h1:JpwMPBpFN3uKhdaekDpiNlImDdkUAyiJ6ez/uxGaUSo=
-google.golang.org/genproto/googleapis/api v0.0.0-20231106174013-bbf56f31fb17/go.mod h1:0xJLfVdJqpAPl8tDg1ujOCGzx6LFLttXT5NhllGOXY4=
-google.golang.org/genproto/googleapis/rpc v0.0.0-20231106174013-bbf56f31fb17 h1:Jyp0Hsi0bmHXG6k9eATXoYtjd6e2UzZ1SCn/wIupY14=
-google.golang.org/genproto/googleapis/rpc v0.0.0-20231106174013-bbf56f31fb17/go.mod h1:oQ5rr10WTTMvP4A36n8JpR1OrO1BEiV4f78CneXZxkA=
-google.golang.org/grpc v1.59.0 h1:Z5Iec2pjwb+LEOqzpB2MR12/eKFhDPhuqW91O+4bwUk=
-google.golang.org/grpc v1.59.0/go.mod h1:aUPDwccQo6OTjy7Hct4AfBPD1GptF4fyUjIkQ9YtF98=
-google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
-google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
 google.golang.org/protobuf v1.31.0 h1:g0LDEJHgrBl9N9r17Ru3sqWhkIx2NB67okBHPwC7hs8=
-google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
-gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
-gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
-gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
-gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
-gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
-gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

From b6b25c343dddf3c3920d38551ca0eadf72ab30bc Mon Sep 17 00:00:00 2001
From: Anton Kolesnikov <anton.e.kolesnikov@gmail.com>
Date: Sat, 27 Apr 2024 17:50:37 +0800
Subject: [PATCH 25/36] fix comments

---
 pkg/phlaredb/symdb/locations.go | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pkg/phlaredb/symdb/locations.go b/pkg/phlaredb/symdb/locations.go
index 74b7a4216b..07e79e01b6 100644
--- a/pkg/phlaredb/symdb/locations.go
+++ b/pkg/phlaredb/symdb/locations.go
@@ -60,7 +60,7 @@ type locationsBlockEncoder struct {
 	header locationsBlockHeader
 
 	mapping []int32
-	// Assuming there is no locations with more than 255 lines.
+	// Assuming there are no locations with more than 255 lines.
 	// We could even use a nibble (4 bits), but there are locations
 	// with 10 and more functions, therefore there is a change that
 	// capacity of 2^4 is not enough in all cases.
@@ -89,7 +89,6 @@ func (e *locationsBlockEncoder) encode(w io.Writer, locations []v1.InMemoryLocat
 	for i, loc := range locations {
 		e.mapping[i] = int32(loc.MappingId)
 		e.lineCount[i] = byte(len(loc.Line))
-		// Append lines but the first one.
 		for j := 0; j < len(loc.Line) && j < maxLocationLines; j++ {
 			e.lines = append(e.lines,
 				int32(loc.Line[j].FunctionId),

From d491c4249e38904d2eaa7abb7138c6a970d8319d Mon Sep 17 00:00:00 2001
From: Anton Kolesnikov <anton.e.kolesnikov@gmail.com>
Date: Sun, 28 Apr 2024 13:04:19 +0800
Subject: [PATCH 26/36] add unit tests

---
 pkg/phlaredb/symdb/block_reader.go      |   8 +-
 pkg/phlaredb/symdb/block_reader_test.go |  30 +++----
 pkg/phlaredb/symdb/functions.go         |   2 +-
 pkg/phlaredb/symdb/functions_test.go    |  62 +++++++++++++
 pkg/phlaredb/symdb/locations.go         |   9 +-
 pkg/phlaredb/symdb/locations_test.go    | 111 ++++++++++++++++++++++++
 pkg/phlaredb/symdb/mappings.go          |   2 +-
 pkg/phlaredb/symdb/mappings_test.go     | 109 +++++++++++++++++++++++
 pkg/phlaredb/symdb/strings.go           |   2 +-
 pkg/phlaredb/symdb/strings_test.go      |  24 ++---
 pkg/phlaredb/symdb/symdb_test.go        |   4 +
 11 files changed, 320 insertions(+), 43 deletions(-)
 create mode 100644 pkg/phlaredb/symdb/functions_test.go
 create mode 100644 pkg/phlaredb/symdb/locations_test.go
 create mode 100644 pkg/phlaredb/symdb/mappings_test.go

diff --git a/pkg/phlaredb/symdb/block_reader.go b/pkg/phlaredb/symdb/block_reader.go
index 79a46f3b76..e7eab9d19f 100644
--- a/pkg/phlaredb/symdb/block_reader.go
+++ b/pkg/phlaredb/symdb/block_reader.go
@@ -309,7 +309,7 @@ func (p *partition) initTables(h *PartitionHeader) (err error) {
 		reader: p.reader,
 		header: h.V3.Locations,
 	}
-	if locations.dec, err = locationsDecoder(h.V3.Locations); err != nil {
+	if locations.dec, err = newLocationsDecoder(h.V3.Locations); err != nil {
 		return err
 	}
 	p.locations = locations
@@ -318,7 +318,7 @@ func (p *partition) initTables(h *PartitionHeader) (err error) {
 		reader: p.reader,
 		header: h.V3.Mappings,
 	}
-	if mappings.dec, err = mappingsDecoder(h.V3.Mappings); err != nil {
+	if mappings.dec, err = newMappingsDecoder(h.V3.Mappings); err != nil {
 		return err
 	}
 	p.mappings = mappings
@@ -327,7 +327,7 @@ func (p *partition) initTables(h *PartitionHeader) (err error) {
 		reader: p.reader,
 		header: h.V3.Functions,
 	}
-	if functions.dec, err = functionsDecoder(h.V3.Functions); err != nil {
+	if functions.dec, err = newFunctionsDecoder(h.V3.Functions); err != nil {
 		return err
 	}
 	p.functions = functions
@@ -336,7 +336,7 @@ func (p *partition) initTables(h *PartitionHeader) (err error) {
 		reader: p.reader,
 		header: h.V3.Strings,
 	}
-	if strings.dec, err = stringsDecoder(h.V3.Strings); err != nil {
+	if strings.dec, err = newStringsDecoder(h.V3.Strings); err != nil {
 		return err
 	}
 	p.strings = strings
diff --git a/pkg/phlaredb/symdb/block_reader_test.go b/pkg/phlaredb/symdb/block_reader_test.go
index 67521b2350..b037108cff 100644
--- a/pkg/phlaredb/symdb/block_reader_test.go
+++ b/pkg/phlaredb/symdb/block_reader_test.go
@@ -53,21 +53,6 @@ func Test_write_block_fixture(t *testing.T) {
 	require.NoError(t, os.Rename(b.config.Dir, fixtureDir))
 }
 
-func Fuzz_ReadIndexFile_v12(f *testing.F) {
-	files := []string{
-		"testdata/symbols/v2/index.symdb",
-		"testdata/symbols/v1/index.symdb",
-	}
-	for _, path := range files {
-		data, err := os.ReadFile(path)
-		require.NoError(f, err)
-		f.Add(data)
-	}
-	f.Fuzz(func(_ *testing.T, b []byte) {
-		_, _ = OpenIndex(b)
-	})
-}
-
 func Test_Reader_Open_v3(t *testing.T) {
 	// The block contains two partitions (0 and 1), each partition
 	// stores symbols of the testdata/profile.pb.gz profile
@@ -194,6 +179,21 @@ func Test_Reader_Open_v1(t *testing.T) {
 	require.NoError(t, err)
 }
 
+func Fuzz_ReadIndexFile_v12(f *testing.F) {
+	files := []string{
+		"testdata/symbols/v2/index.symdb",
+		"testdata/symbols/v1/index.symdb",
+	}
+	for _, path := range files {
+		data, err := os.ReadFile(path)
+		require.NoError(f, err)
+		f.Add(data)
+	}
+	f.Fuzz(func(_ *testing.T, b []byte) {
+		_, _ = OpenIndex(b)
+	})
+}
+
 type mockStacktraceInserter struct{ mock.Mock }
 
 func (m *mockStacktraceInserter) InsertStacktrace(stacktraceID uint32, locations []int32) {
diff --git a/pkg/phlaredb/symdb/functions.go b/pkg/phlaredb/symdb/functions.go
index 9fbbbd8c74..ef485d0e2e 100644
--- a/pkg/phlaredb/symdb/functions.go
+++ b/pkg/phlaredb/symdb/functions.go
@@ -127,7 +127,7 @@ type functionsBlockDecoder struct {
 	buf  []byte
 }
 
-func functionsDecoder(h SymbolsBlockHeader) (*symbolsDecoder[v1.InMemoryFunction], error) {
+func newFunctionsDecoder(h SymbolsBlockHeader) (*symbolsDecoder[v1.InMemoryFunction], error) {
 	if h.Format == BlockFunctionsV1 {
 		return newSymbolsDecoder[v1.InMemoryFunction](h, &functionsBlockDecoder{format: h.Format}), nil
 	}
diff --git a/pkg/phlaredb/symdb/functions_test.go b/pkg/phlaredb/symdb/functions_test.go
new file mode 100644
index 0000000000..190e42ef56
--- /dev/null
+++ b/pkg/phlaredb/symdb/functions_test.go
@@ -0,0 +1,62 @@
+package symdb
+
+import (
+	"bytes"
+	"testing"
+
+	"github.com/stretchr/testify/require"
+
+	v1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1"
+)
+
+func Test_FunctionsEncoding(t *testing.T) {
+	type testCase struct {
+		description string
+		funcs       []v1.InMemoryFunction
+	}
+
+	testCases := []testCase{
+		{
+			description: "empty",
+			funcs:       []v1.InMemoryFunction{},
+		},
+		{
+			description: "zero",
+			funcs:       []v1.InMemoryFunction{{}},
+		},
+		{
+			description: "single function",
+			funcs: []v1.InMemoryFunction{
+				{Name: 1, SystemName: 2, Filename: 3, StartLine: 4},
+			},
+		},
+		{
+			description: "multiline blocks",
+			funcs: []v1.InMemoryFunction{
+				{Name: 1, SystemName: 2, Filename: 3, StartLine: 4},
+				{Name: 5, SystemName: 6, Filename: 7, StartLine: 8},
+				{Name: 9, SystemName: 10, Filename: 11},
+				{},
+				{Name: 13, SystemName: 14, Filename: 15, StartLine: 16},
+			},
+		},
+	}
+
+	for _, tc := range testCases {
+		tc := tc
+		t.Run(tc.description, func(t *testing.T) {
+			var buf bytes.Buffer
+			w := newTestFileWriter(&buf)
+			e := newFunctionsEncoder()
+			e.blockSize = 3
+			h, err := writeSymbolsBlock(w, tc.funcs, e)
+			require.NoError(t, err)
+
+			d, err := newFunctionsDecoder(h)
+			require.NoError(t, err)
+			out := make([]v1.InMemoryFunction, h.Length)
+			require.NoError(t, d.decode(out, &buf))
+			require.Equal(t, tc.funcs, out)
+		})
+	}
+}
diff --git a/pkg/phlaredb/symdb/locations.go b/pkg/phlaredb/symdb/locations.go
index 07e79e01b6..5da7ae0f9e 100644
--- a/pkg/phlaredb/symdb/locations.go
+++ b/pkg/phlaredb/symdb/locations.go
@@ -84,7 +84,7 @@ func (e *locationsBlockEncoder) format() SymbolsBlockFormat {
 
 func (e *locationsBlockEncoder) encode(w io.Writer, locations []v1.InMemoryLocation) error {
 	e.initWrite(len(locations))
-	var addr int64
+	var addr uint64
 	var folded bool
 	for i, loc := range locations {
 		e.mapping[i] = int32(loc.MappingId)
@@ -94,7 +94,7 @@ func (e *locationsBlockEncoder) encode(w io.Writer, locations []v1.InMemoryLocat
 				int32(loc.Line[j].FunctionId),
 				loc.Line[j].Line)
 		}
-		addr |= int64(loc.Address)
+		addr |= loc.Address
 		e.addr[i] = int64(loc.Address)
 		folded = folded || loc.IsFolded
 		e.folded[i] = loc.IsFolded
@@ -170,7 +170,7 @@ type locationsBlockDecoder struct {
 	buf []byte
 }
 
-func locationsDecoder(h SymbolsBlockHeader) (*symbolsDecoder[v1.InMemoryLocation], error) {
+func newLocationsDecoder(h SymbolsBlockHeader) (*symbolsDecoder[v1.InMemoryLocation], error) {
 	if h.Format == BlockLocationsV1 {
 		return newSymbolsDecoder[v1.InMemoryLocation](h, &locationsBlockDecoder{format: h.Format}), nil
 	}
@@ -248,6 +248,8 @@ func (d *locationsBlockDecoder) decode(r io.Reader, locations []v1.InMemoryLocat
 	}
 
 	// Otherwise, inspect all the optional fields.
+	d.address = slices.GrowLen(d.address, int(d.header.LocationsLen))
+	d.folded = slices.GrowLen(d.folded, int(d.header.LocationsLen))
 	if int(d.header.AddrSize) > 0 {
 		d.buf = slices.GrowLen(d.buf, int(d.header.AddrSize))
 		if _, err = io.ReadFull(r, d.buf); err != nil {
@@ -263,7 +265,6 @@ func (d *locationsBlockDecoder) decode(r io.Reader, locations []v1.InMemoryLocat
 		if _, err = io.ReadFull(r, d.buf); err != nil {
 			return err
 		}
-		d.folded = slices.GrowLen(d.folded, int(d.header.LocationsLen))
 		decodeBoolean(d.folded, d.buf)
 	}
 
diff --git a/pkg/phlaredb/symdb/locations_test.go b/pkg/phlaredb/symdb/locations_test.go
new file mode 100644
index 0000000000..729fe59500
--- /dev/null
+++ b/pkg/phlaredb/symdb/locations_test.go
@@ -0,0 +1,111 @@
+package symdb
+
+import (
+	"bytes"
+	"math"
+	"testing"
+
+	"github.com/stretchr/testify/require"
+
+	v1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1"
+)
+
+func Test_LocationsEncoding(t *testing.T) {
+	type testCase struct {
+		description string
+		locs        []v1.InMemoryLocation
+	}
+
+	testCases := []testCase{
+		{
+			description: "empty",
+			locs:        []v1.InMemoryLocation{},
+		},
+		{
+			description: "zero",
+			locs:        []v1.InMemoryLocation{{Line: []v1.InMemoryLine{}}},
+		},
+		{
+			description: "single location",
+			locs: []v1.InMemoryLocation{
+				{
+					Address:   math.MaxUint64,
+					MappingId: 1,
+					IsFolded:  false,
+					Line: []v1.InMemoryLine{
+						{FunctionId: 1, Line: 1},
+					},
+				},
+			},
+		},
+		{
+			description: "multiline locations",
+			locs: []v1.InMemoryLocation{
+				{
+					Line: []v1.InMemoryLine{
+						{FunctionId: 1, Line: 1},
+					},
+				},
+				{
+					Line: []v1.InMemoryLine{
+						{FunctionId: 1, Line: 1},
+						{FunctionId: 2, Line: 1},
+					},
+				},
+				{
+					Line: []v1.InMemoryLine{
+						{FunctionId: 1, Line: 1},
+						{FunctionId: 2, Line: 1},
+						{FunctionId: 3, Line: 1},
+					},
+				},
+			},
+		},
+		{
+			description: "optional fields mix",
+			locs: []v1.InMemoryLocation{
+				{Line: []v1.InMemoryLine{{FunctionId: 1, Line: 1}}},
+				{Line: []v1.InMemoryLine{{FunctionId: 1, Line: 1}}},
+				{
+					Address:   math.MaxUint64,
+					MappingId: 1,
+					IsFolded:  true,
+					Line:      []v1.InMemoryLine{{FunctionId: 1, Line: 1}},
+				},
+				{Line: []v1.InMemoryLine{{FunctionId: 1, Line: 1}}},
+			},
+		},
+		{
+			description: "optional fields mix split",
+			locs: []v1.InMemoryLocation{
+				{Line: []v1.InMemoryLine{{FunctionId: 1, Line: 1}}},
+				{Line: []v1.InMemoryLine{{FunctionId: 1, Line: 1}}},
+				{Line: []v1.InMemoryLine{{FunctionId: 1, Line: 1}}},
+				{
+					Address:   math.MaxUint64,
+					MappingId: 1,
+					IsFolded:  true,
+					Line:      []v1.InMemoryLine{{FunctionId: 1, Line: 1}},
+				},
+			},
+		},
+	}
+
+	for _, tc := range testCases {
+		tc := tc
+		t.Run(tc.description, func(t *testing.T) {
+			var buf bytes.Buffer
+			w := newTestFileWriter(&buf)
+			e := newLocationsEncoder()
+			e.blockSize = 3
+			h, err := writeSymbolsBlock(w, tc.locs, e)
+			require.NoError(t, err)
+
+			d, err := newLocationsDecoder(h)
+			require.NoError(t, err)
+			out := make([]v1.InMemoryLocation, h.Length)
+			require.NoError(t, d.decode(out, &buf))
+			require.Equal(t, tc.locs, out)
+		})
+	}
+}
diff --git a/pkg/phlaredb/symdb/mappings.go b/pkg/phlaredb/symdb/mappings.go
index 55eb7beefb..f2885c6ef0 100644
--- a/pkg/phlaredb/symdb/mappings.go
+++ b/pkg/phlaredb/symdb/mappings.go
@@ -176,7 +176,7 @@ type mappingsBlockDecoder struct {
 	buf    []byte
 }
 
-func mappingsDecoder(h SymbolsBlockHeader) (*symbolsDecoder[v1.InMemoryMapping], error) {
+func newMappingsDecoder(h SymbolsBlockHeader) (*symbolsDecoder[v1.InMemoryMapping], error) {
 	if h.Format == BlockMappingsV1 {
 		return newSymbolsDecoder[v1.InMemoryMapping](h, &mappingsBlockDecoder{format: h.Format}), nil
 	}
diff --git a/pkg/phlaredb/symdb/mappings_test.go b/pkg/phlaredb/symdb/mappings_test.go
new file mode 100644
index 0000000000..406acea458
--- /dev/null
+++ b/pkg/phlaredb/symdb/mappings_test.go
@@ -0,0 +1,109 @@
+package symdb
+
+import (
+	"bytes"
+	"math"
+	"testing"
+
+	"github.com/stretchr/testify/require"
+
+	v1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1"
+)
+
+func Test_MappingsEncoding(t *testing.T) {
+	type testCase struct {
+		description string
+		mappings    []v1.InMemoryMapping
+	}
+
+	testCases := []testCase{
+		{
+			description: "empty",
+			mappings:    []v1.InMemoryMapping{},
+		},
+		{
+			description: "zero",
+			mappings:    []v1.InMemoryMapping{{}},
+		},
+		{
+			description: "single mapping",
+			mappings: []v1.InMemoryMapping{
+				{
+					MemoryStart:     math.MaxUint64,
+					MemoryLimit:     math.MaxUint64,
+					FileOffset:      math.MaxUint64,
+					Filename:        1,
+					BuildId:         2,
+					HasFunctions:    true,
+					HasFilenames:    false,
+					HasLineNumbers:  false,
+					HasInlineFrames: false,
+				},
+			},
+		},
+		{
+			description: "optional fields mix",
+			mappings: []v1.InMemoryMapping{
+				// Block size == 3
+				{MemoryStart: math.MaxUint64},
+				{},
+				{},
+
+				{},
+				{MemoryLimit: math.MaxUint64},
+				{},
+
+				{},
+				{},
+				{FileOffset: math.MaxUint64},
+
+				{MemoryStart: math.MaxUint64},
+				{MemoryLimit: math.MaxUint64},
+				{FileOffset: math.MaxUint64},
+
+				{},
+				{},
+				{},
+			},
+		},
+		{
+			description: "flag combinations",
+			mappings: []v1.InMemoryMapping{
+				{HasFunctions: false, HasFilenames: false, HasLineNumbers: false, HasInlineFrames: false},
+				{HasFunctions: false, HasFilenames: false, HasLineNumbers: false, HasInlineFrames: true},
+				{HasFunctions: false, HasFilenames: false, HasLineNumbers: true, HasInlineFrames: false},
+				{HasFunctions: false, HasFilenames: false, HasLineNumbers: true, HasInlineFrames: true},
+				{HasFunctions: false, HasFilenames: true, HasLineNumbers: false, HasInlineFrames: false},
+				{HasFunctions: false, HasFilenames: true, HasLineNumbers: false, HasInlineFrames: true},
+				{HasFunctions: false, HasFilenames: true, HasLineNumbers: true, HasInlineFrames: false},
+				{HasFunctions: false, HasFilenames: true, HasLineNumbers: true, HasInlineFrames: true},
+				{HasFunctions: true, HasFilenames: false, HasLineNumbers: false, HasInlineFrames: false},
+				{HasFunctions: true, HasFilenames: false, HasLineNumbers: false, HasInlineFrames: true},
+				{HasFunctions: true, HasFilenames: false, HasLineNumbers: true, HasInlineFrames: false},
+				{HasFunctions: true, HasFilenames: false, HasLineNumbers: true, HasInlineFrames: true},
+				{HasFunctions: true, HasFilenames: true, HasLineNumbers: false, HasInlineFrames: false},
+				{HasFunctions: true, HasFilenames: true, HasLineNumbers: false, HasInlineFrames: true},
+				{HasFunctions: true, HasFilenames: true, HasLineNumbers: true, HasInlineFrames: false},
+				{HasFunctions: true, HasFilenames: true, HasLineNumbers: true, HasInlineFrames: true},
+			},
+		},
+	}
+
+	for _, tc := range testCases {
+		tc := tc
+		t.Run(tc.description, func(t *testing.T) {
+			var buf bytes.Buffer
+			w := newTestFileWriter(&buf)
+			e := newMappingsEncoder()
+			e.blockSize = 3
+			h, err := writeSymbolsBlock(w, tc.mappings, e)
+			require.NoError(t, err)
+
+			d, err := newMappingsDecoder(h)
+			require.NoError(t, err)
+			out := make([]v1.InMemoryMapping, h.Length)
+			require.NoError(t, d.decode(out, &buf))
+			require.Equal(t, tc.mappings, out)
+		})
+	}
+}
diff --git a/pkg/phlaredb/symdb/strings.go b/pkg/phlaredb/symdb/strings.go
index 4400218f61..596f313957 100644
--- a/pkg/phlaredb/symdb/strings.go
+++ b/pkg/phlaredb/symdb/strings.go
@@ -113,7 +113,7 @@ type stringsBlockDecoder struct {
 	buf    []byte
 }
 
-func stringsDecoder(h SymbolsBlockHeader) (*symbolsDecoder[string], error) {
+func newStringsDecoder(h SymbolsBlockHeader) (*symbolsDecoder[string], error) {
 	if h.Format == BlockStringsV1 {
 		return newSymbolsDecoder[string](h, &stringsBlockDecoder{format: h.Format}), nil
 	}
diff --git a/pkg/phlaredb/symdb/strings_test.go b/pkg/phlaredb/symdb/strings_test.go
index f462886043..ca95b03afa 100644
--- a/pkg/phlaredb/symdb/strings_test.go
+++ b/pkg/phlaredb/symdb/strings_test.go
@@ -12,7 +12,6 @@ func Test_StringsEncoding(t *testing.T) {
 	type testCase struct {
 		description string
 		strings     []string
-		blockSize   int
 	}
 
 	testCases := []testCase{
@@ -26,7 +25,6 @@ func Test_StringsEncoding(t *testing.T) {
 				"a",
 				"b",
 			},
-			blockSize: 4,
 		},
 		{
 			description: "exact block size",
@@ -36,7 +34,6 @@ func Test_StringsEncoding(t *testing.T) {
 				"cde",
 				"def",
 			},
-			blockSize: 4,
 		},
 		{
 			description: "greater than block size",
@@ -47,7 +44,6 @@ func Test_StringsEncoding(t *testing.T) {
 				"def",
 				"e",
 			},
-			blockSize: 4,
 		},
 		{
 			description: "mixed encoding",
@@ -56,7 +52,6 @@ func Test_StringsEncoding(t *testing.T) {
 				"bcd",
 				strings.Repeat("e", 256),
 			},
-			blockSize: 4,
 		},
 		{
 			description: "mixed encoding exact block",
@@ -70,7 +65,6 @@ func Test_StringsEncoding(t *testing.T) {
 				strings.Repeat("j", 256),
 				strings.Repeat("h", 256),
 			},
-			blockSize: 4,
 		},
 	}
 
@@ -78,18 +72,14 @@ func Test_StringsEncoding(t *testing.T) {
 		tc := tc
 		t.Run(tc.description, func(t *testing.T) {
 			var buf bytes.Buffer
-			e := newSymbolsEncoder[string](new(stringsBlockEncoder))
-			if tc.blockSize > 0 {
-				e.blockSize = tc.blockSize
-			}
-			require.NoError(t, e.encode(&buf, tc.strings))
-
-			h := SymbolsBlockHeader{
-				Length:    uint32(len(tc.strings)),
-				BlockSize: uint32(e.blockSize),
-			}
-			d := newSymbolsDecoder[string](h, new(stringsBlockDecoder))
+			w := newTestFileWriter(&buf)
+			e := newStringsEncoder()
+			e.blockSize = 4
+			h, err := writeSymbolsBlock(w, tc.strings, e)
+			require.NoError(t, err)
 
+			d, err := newStringsDecoder(h)
+			require.NoError(t, err)
 			out := make([]string, h.Length)
 			require.NoError(t, d.decode(out, &buf))
 			require.Equal(t, tc.strings, out)
diff --git a/pkg/phlaredb/symdb/symdb_test.go b/pkg/phlaredb/symdb/symdb_test.go
index c5bb7d9576..a1bab91bc5 100644
--- a/pkg/phlaredb/symdb/symdb_test.go
+++ b/pkg/phlaredb/symdb/symdb_test.go
@@ -103,6 +103,10 @@ func (b *testBucket) GetRange(ctx context.Context, name string, off, length int6
 	return b.Bucket.GetRange(ctx, name, off, length)
 }
 
+func newTestFileWriter(w io.Writer) *fileWriter {
+	return &fileWriter{w: &writerOffset{Writer: w}}
+}
+
 //nolint:unparam
 func pprofFingerprint(p *googlev1.Profile, typ int) [][2]uint64 {
 	m := make(map[uint64]uint64, len(p.Sample))

From ea5f4a8b29746390ac7e6a75f9a7a74a1467e680 Mon Sep 17 00:00:00 2001
From: Anton Kolesnikov <anton.e.kolesnikov@gmail.com>
Date: Sun, 28 Apr 2024 13:19:58 +0800
Subject: [PATCH 27/36] ignore "unused" false-positive

---
 pkg/phlaredb/symdb/block_reader.go         | 1 +
 pkg/phlaredb/symdb/block_reader_parquet.go | 1 +
 pkg/phlaredb/symdb/block_writer.go         | 9 ++++-----
 pkg/phlaredb/symdb/format.go               | 7 +------
 pkg/phlaredb/symdb/functions.go            | 1 +
 pkg/phlaredb/symdb/locations.go            | 1 +
 pkg/phlaredb/symdb/mappings.go             | 1 +
 pkg/phlaredb/symdb/strings.go              | 1 +
 8 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/pkg/phlaredb/symdb/block_reader.go b/pkg/phlaredb/symdb/block_reader.go
index e7eab9d19f..34f0a5eb0e 100644
--- a/pkg/phlaredb/symdb/block_reader.go
+++ b/pkg/phlaredb/symdb/block_reader.go
@@ -1,3 +1,4 @@
+//nolint:unused
 package symdb
 
 import (
diff --git a/pkg/phlaredb/symdb/block_reader_parquet.go b/pkg/phlaredb/symdb/block_reader_parquet.go
index 364a678344..90ba014b85 100644
--- a/pkg/phlaredb/symdb/block_reader_parquet.go
+++ b/pkg/phlaredb/symdb/block_reader_parquet.go
@@ -1,3 +1,4 @@
+//nolint:unused
 package symdb
 
 import (
diff --git a/pkg/phlaredb/symdb/block_writer.go b/pkg/phlaredb/symdb/block_writer.go
index 585b498f2d..d489905279 100644
--- a/pkg/phlaredb/symdb/block_writer.go
+++ b/pkg/phlaredb/symdb/block_writer.go
@@ -14,11 +14,10 @@ import (
 type writer struct {
 	config *Config
 
-	index     IndexFile
-	indexFile *fileWriter
-	dataFile  *fileWriter
-	files     []block.File
-	footer    Footer
+	index    IndexFile
+	dataFile *fileWriter
+	files    []block.File
+	footer   Footer
 
 	stringsEncoder   *symbolsEncoder[string]
 	mappingsEncoder  *symbolsEncoder[v1.InMemoryMapping]
diff --git a/pkg/phlaredb/symdb/format.go b/pkg/phlaredb/symdb/format.go
index 780b965494..562519aaf9 100644
--- a/pkg/phlaredb/symdb/format.go
+++ b/pkg/phlaredb/symdb/format.go
@@ -1,3 +1,4 @@
+//nolint:unused
 package symdb
 
 import (
@@ -523,12 +524,6 @@ type RowRangeReference struct {
 	Rows     uint32
 }
 
-func (r *RowRangeReference) marshal(b []byte) {
-	binary.BigEndian.PutUint32(b[0:4], r.RowGroup)
-	binary.BigEndian.PutUint32(b[4:8], r.Index)
-	binary.BigEndian.PutUint32(b[8:12], r.Rows)
-}
-
 func (r *RowRangeReference) unmarshal(b []byte) {
 	r.RowGroup = binary.BigEndian.Uint32(b[0:4])
 	r.Index = binary.BigEndian.Uint32(b[4:8])
diff --git a/pkg/phlaredb/symdb/functions.go b/pkg/phlaredb/symdb/functions.go
index ef485d0e2e..66963dfbab 100644
--- a/pkg/phlaredb/symdb/functions.go
+++ b/pkg/phlaredb/symdb/functions.go
@@ -1,3 +1,4 @@
+//nolint:unused
 package symdb
 
 import (
diff --git a/pkg/phlaredb/symdb/locations.go b/pkg/phlaredb/symdb/locations.go
index 5da7ae0f9e..78ac4cd988 100644
--- a/pkg/phlaredb/symdb/locations.go
+++ b/pkg/phlaredb/symdb/locations.go
@@ -1,3 +1,4 @@
+//nolint:unused
 package symdb
 
 import (
diff --git a/pkg/phlaredb/symdb/mappings.go b/pkg/phlaredb/symdb/mappings.go
index f2885c6ef0..3094b74c44 100644
--- a/pkg/phlaredb/symdb/mappings.go
+++ b/pkg/phlaredb/symdb/mappings.go
@@ -1,3 +1,4 @@
+//nolint:unused
 package symdb
 
 import (
diff --git a/pkg/phlaredb/symdb/strings.go b/pkg/phlaredb/symdb/strings.go
index 596f313957..1992e85c1b 100644
--- a/pkg/phlaredb/symdb/strings.go
+++ b/pkg/phlaredb/symdb/strings.go
@@ -1,3 +1,4 @@
+//nolint:unused
 package symdb
 
 import (

From 6abb099eb0a993aecab9c49f71aefdea128fb92d Mon Sep 17 00:00:00 2001
From: Anton Kolesnikov <anton.e.kolesnikov@gmail.com>
Date: Mon, 29 Apr 2024 10:15:05 +0800
Subject: [PATCH 28/36] update docs

---
 .../block-format/_index.md                                 | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/docs/sources/reference-pyroscope-architecture/block-format/_index.md b/docs/sources/reference-pyroscope-architecture/block-format/_index.md
index 915c6b1b9e..d45a85480f 100644
--- a/docs/sources/reference-pyroscope-architecture/block-format/_index.md
+++ b/docs/sources/reference-pyroscope-architecture/block-format/_index.md
@@ -23,12 +23,7 @@ the block there are multiple files:
 
 * `profiles.parquet` [parquet] table that contains profiles.
 
-* `symbols` sub-directory contains profiling symbols that provide a link between 
-  the compiled or interpreted binary code and the original source code:
-   - A `index.symdb` file with meta information, which helps to find symbols for a specific profile.
-   - A `stacktraces.symdb` file contains stack traces compacted in the [parent pointer tree].
-   - Parquet tables for models referenced by stack traces:
-     `locations.parquet`, `functions.parquet`, `mappings.parquet`, `strings.parquet`.
+* `symbols.symdb` that contains symbolic information for the profiles stored in the block.
 
 ## Data model
 

From 9ad268decf747b0ef3ef99e2766f685f2c1820ef Mon Sep 17 00:00:00 2001
From: Anton Kolesnikov <anton.e.kolesnikov@gmail.com>
Date: Mon, 13 May 2024 14:09:32 +0800
Subject: [PATCH 29/36] add support for forward compatibility

---
 pkg/phlaredb/symdb/format.go                  |  37 ++++++++++---
 pkg/phlaredb/symdb/functions.go               |  42 +++++++--------
 pkg/phlaredb/symdb/locations.go               |  43 +++++++--------
 pkg/phlaredb/symdb/mappings.go                |  40 +++++++-------
 pkg/phlaredb/symdb/partition_memory.go        |   3 +-
 pkg/phlaredb/symdb/strings.go                 |  49 ++++++++----------
 .../symdb/testdata/symbols/v3/symbols.symdb   | Bin 89300 -> 89300 bytes
 7 files changed, 114 insertions(+), 100 deletions(-)

diff --git a/pkg/phlaredb/symdb/format.go b/pkg/phlaredb/symdb/format.go
index 562519aaf9..0dbbcae529 100644
--- a/pkg/phlaredb/symdb/format.go
+++ b/pkg/phlaredb/symdb/format.go
@@ -425,11 +425,19 @@ type SymbolsBlockHeader struct {
 	Length uint32
 	// BlockSize denotes the number of items per block.
 	BlockSize uint32
+	// BlockSize denotes the encoder block header size in bytes.
+	// This enables forward compatibility within the same format version:
+	// as long as fields are not removed, or reordered, and the encoding
+	// scheme does not change, the format can be extended with no change
+	// of the format version. Decoder is able to read the whole header and
+	// skip unknown fields.
+	BlockHeaderSize uint16
 	// Format of the encoded data.
+	// Change of the format _version_ may break forward compatibility.
 	Format SymbolsBlockFormat
 }
 
-type SymbolsBlockFormat uint32
+type SymbolsBlockFormat uint16
 
 const (
 	_ SymbolsBlockFormat = iota
@@ -439,6 +447,22 @@ const (
 	BlockStringsV1
 )
 
+type headerUnmarshaler interface {
+	unmarshal([]byte)
+	checksum() uint32
+}
+
+func readSymbolsBlockHeader(buf []byte, r io.Reader, v headerUnmarshaler) error {
+	if _, err := io.ReadFull(r, buf); err != nil {
+		return err
+	}
+	v.unmarshal(buf)
+	if crc32.Checksum(buf[:len(buf)-checksumSize], castagnoli) != v.checksum() {
+		return ErrInvalidSize
+	}
+	return nil
+}
+
 const symbolsBlockReferenceSize = int(unsafe.Sizeof(SymbolsBlockHeader{}))
 
 func (h *SymbolsBlockHeader) marshal(b []byte) {
@@ -447,7 +471,8 @@ func (h *SymbolsBlockHeader) marshal(b []byte) {
 	binary.BigEndian.PutUint32(b[12:16], h.CRC)
 	binary.BigEndian.PutUint32(b[16:20], h.Length)
 	binary.BigEndian.PutUint32(b[20:24], h.BlockSize)
-	binary.BigEndian.PutUint32(b[24:28], uint32(h.Format))
+	binary.BigEndian.PutUint16(b[24:26], h.BlockHeaderSize)
+	binary.BigEndian.PutUint16(b[26:28], uint16(h.Format))
 }
 
 func (h *SymbolsBlockHeader) unmarshal(b []byte) {
@@ -456,7 +481,8 @@ func (h *SymbolsBlockHeader) unmarshal(b []byte) {
 	h.CRC = binary.BigEndian.Uint32(b[12:16])
 	h.Length = binary.BigEndian.Uint32(b[16:20])
 	h.BlockSize = binary.BigEndian.Uint32(b[20:24])
-	h.Format = SymbolsBlockFormat(binary.BigEndian.Uint32(b[24:28]))
+	h.BlockHeaderSize = binary.BigEndian.Uint16(b[24:26])
+	h.Format = SymbolsBlockFormat(binary.BigEndian.Uint16(b[26:28]))
 }
 
 func marshalSymbolsBlockReferences(b []byte, refs ...SymbolsBlockHeader) int {
@@ -672,6 +698,7 @@ func (h *StacktraceBlockHeader) unmarshal(b []byte) {
 type symbolsBlockEncoder[T any] interface {
 	encode(w io.Writer, block []T) error
 	format() SymbolsBlockFormat
+	headerSize() uintptr
 }
 
 type symbolsEncoder[T any] struct {
@@ -696,10 +723,6 @@ func (e *symbolsEncoder[T]) encode(w io.Writer, items []T) (err error) {
 	return nil
 }
 
-func (e *symbolsEncoder[T]) format() SymbolsBlockFormat {
-	return e.blockEncoder.format()
-}
-
 type symbolsBlockDecoder[T any] interface {
 	decode(r io.Reader, dst []T) error
 }
diff --git a/pkg/phlaredb/symdb/functions.go b/pkg/phlaredb/symdb/functions.go
index 66963dfbab..6d7a53240f 100644
--- a/pkg/phlaredb/symdb/functions.go
+++ b/pkg/phlaredb/symdb/functions.go
@@ -13,10 +13,9 @@ import (
 
 	v1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1"
 	"github.com/grafana/pyroscope/pkg/slices"
+	"github.com/grafana/pyroscope/pkg/util/math"
 )
 
-const functionsBlockHeaderSize = int(unsafe.Sizeof(functionsBlockHeader{}))
-
 var (
 	_ symbolsBlockEncoder[v1.InMemoryFunction] = (*functionsBlockEncoder)(nil)
 	_ symbolsBlockDecoder[v1.InMemoryFunction] = (*functionsBlockDecoder)(nil)
@@ -37,6 +36,8 @@ func (h *functionsBlockHeader) marshal(b []byte) {
 	binary.BigEndian.PutUint32(b[8:12], h.SystemNameSize)
 	binary.BigEndian.PutUint32(b[12:16], h.FileNameSize)
 	binary.BigEndian.PutUint32(b[16:20], h.StartLineSize)
+	// Fields can be added here in the future.
+	// CRC must be the last four bytes.
 	h.CRC = crc32.Checksum(b[0:20], castagnoli)
 	binary.BigEndian.PutUint32(b[20:24], h.CRC)
 }
@@ -47,9 +48,13 @@ func (h *functionsBlockHeader) unmarshal(b []byte) {
 	h.SystemNameSize = binary.BigEndian.Uint32(b[8:12])
 	h.FileNameSize = binary.BigEndian.Uint32(b[12:16])
 	h.StartLineSize = binary.BigEndian.Uint32(b[16:20])
-	h.CRC = binary.BigEndian.Uint32(b[20:24])
+	// In future versions, new fields are decoded here;
+	// if pos < len(b)-checksumSize, then there are more fields.
+	h.CRC = binary.BigEndian.Uint32(b[len(b)-checksumSize:])
 }
 
+func (h *functionsBlockHeader) checksum() uint32 { return h.CRC }
+
 type functionsBlockEncoder struct {
 	header functionsBlockHeader
 
@@ -62,9 +67,9 @@ func newFunctionsEncoder() *symbolsEncoder[v1.InMemoryFunction] {
 	return newSymbolsEncoder[v1.InMemoryFunction](new(functionsBlockEncoder))
 }
 
-func (e *functionsBlockEncoder) format() SymbolsBlockFormat {
-	return BlockFunctionsV1
-}
+func (e *functionsBlockEncoder) format() SymbolsBlockFormat { return BlockFunctionsV1 }
+
+func (e *functionsBlockEncoder) headerSize() uintptr { return unsafe.Sizeof(functionsBlockHeader{}) }
 
 func (e *functionsBlockEncoder) encode(w io.Writer, functions []v1.InMemoryFunction) error {
 	e.initWrite(len(functions))
@@ -98,7 +103,7 @@ func (e *functionsBlockEncoder) encode(w io.Writer, functions []v1.InMemoryFunct
 	e.header.StartLineSize = uint32(len(e.tmp))
 	e.buf.Write(e.tmp)
 
-	e.tmp = slices.GrowLen(e.tmp, functionsBlockHeaderSize)
+	e.tmp = slices.GrowLen(e.tmp, int(e.headerSize()))
 	e.header.marshal(e.tmp)
 	if _, err := w.Write(e.tmp); err != nil {
 		return err
@@ -121,8 +126,8 @@ func (e *functionsBlockEncoder) initWrite(functions int) {
 }
 
 type functionsBlockDecoder struct {
-	format SymbolsBlockFormat
-	header functionsBlockHeader
+	headerSize uint16
+	header     functionsBlockHeader
 
 	ints []int32
 	buf  []byte
@@ -130,25 +135,18 @@ type functionsBlockDecoder struct {
 
 func newFunctionsDecoder(h SymbolsBlockHeader) (*symbolsDecoder[v1.InMemoryFunction], error) {
 	if h.Format == BlockFunctionsV1 {
-		return newSymbolsDecoder[v1.InMemoryFunction](h, &functionsBlockDecoder{format: h.Format}), nil
+		headerSize := math.Max(functionsBlockHeaderMinSize, h.BlockHeaderSize)
+		return newSymbolsDecoder[v1.InMemoryFunction](h, &functionsBlockDecoder{headerSize: headerSize}), nil
 	}
 	return nil, fmt.Errorf("%w: unknown functions format: %d", ErrUnknownVersion, h.Format)
 }
 
-func (d *functionsBlockDecoder) readHeader(r io.Reader) error {
-	d.buf = slices.GrowLen(d.buf, functionsBlockHeaderSize)
-	if _, err := io.ReadFull(r, d.buf); err != nil {
-		return nil
-	}
-	d.header.unmarshal(d.buf)
-	if crc32.Checksum(d.buf[:functionsBlockHeaderSize-4], castagnoli) != d.header.CRC {
-		return ErrInvalidSize
-	}
-	return nil
-}
+// In early versions, block header size is not specified. Must not change.
+const functionsBlockHeaderMinSize = 24
 
 func (d *functionsBlockDecoder) decode(r io.Reader, functions []v1.InMemoryFunction) (err error) {
-	if err = d.readHeader(r); err != nil {
+	d.buf = slices.GrowLen(d.buf, int(d.headerSize))
+	if err = readSymbolsBlockHeader(d.buf, r, &d.header); err != nil {
 		return err
 	}
 	if d.header.FunctionsLen > uint32(len(functions)) {
diff --git a/pkg/phlaredb/symdb/locations.go b/pkg/phlaredb/symdb/locations.go
index 78ac4cd988..6d75311ad7 100644
--- a/pkg/phlaredb/symdb/locations.go
+++ b/pkg/phlaredb/symdb/locations.go
@@ -13,12 +13,10 @@ import (
 
 	v1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1"
 	"github.com/grafana/pyroscope/pkg/slices"
+	"github.com/grafana/pyroscope/pkg/util/math"
 )
 
-const (
-	maxLocationLines         = 255
-	locationsBlockHeaderSize = int(unsafe.Sizeof(locationsBlockHeader{}))
-)
+const maxLocationLines = 255
 
 var (
 	_ symbolsBlockEncoder[v1.InMemoryLocation] = (*locationsBlockEncoder)(nil)
@@ -43,6 +41,8 @@ func (h *locationsBlockHeader) marshal(b []byte) {
 	binary.BigEndian.PutUint32(b[12:16], h.LinesSize)
 	binary.BigEndian.PutUint32(b[16:20], h.AddrSize)
 	binary.BigEndian.PutUint32(b[20:24], h.IsFoldedSize)
+	// Fields can be added here in the future.
+	// CRC must be the last four bytes.
 	h.CRC = crc32.Checksum(b[0:24], castagnoli)
 	binary.BigEndian.PutUint32(b[24:28], h.CRC)
 }
@@ -54,9 +54,13 @@ func (h *locationsBlockHeader) unmarshal(b []byte) {
 	h.LinesSize = binary.BigEndian.Uint32(b[12:16])
 	h.AddrSize = binary.BigEndian.Uint32(b[16:20])
 	h.IsFoldedSize = binary.BigEndian.Uint32(b[20:24])
+	// In future versions, new fields are decoded here;
+	// if pos < len(b)-checksumSize, then there are more fields.
 	h.CRC = binary.BigEndian.Uint32(b[24:28])
 }
 
+func (h *locationsBlockHeader) checksum() uint32 { return h.CRC }
+
 type locationsBlockEncoder struct {
 	header locationsBlockHeader
 
@@ -79,9 +83,9 @@ func newLocationsEncoder() *symbolsEncoder[v1.InMemoryLocation] {
 	return newSymbolsEncoder[v1.InMemoryLocation](new(locationsBlockEncoder))
 }
 
-func (e *locationsBlockEncoder) format() SymbolsBlockFormat {
-	return BlockLocationsV1
-}
+func (e *locationsBlockEncoder) format() SymbolsBlockFormat { return BlockLocationsV1 }
+
+func (e *locationsBlockEncoder) headerSize() uintptr { return unsafe.Sizeof(locationsBlockHeader{}) }
 
 func (e *locationsBlockEncoder) encode(w io.Writer, locations []v1.InMemoryLocation) error {
 	e.initWrite(len(locations))
@@ -128,7 +132,7 @@ func (e *locationsBlockEncoder) encode(w io.Writer, locations []v1.InMemoryLocat
 		e.buf.Write(e.tmp)
 	}
 
-	e.tmp = slices.GrowLen(e.tmp, locationsBlockHeaderSize)
+	e.tmp = slices.GrowLen(e.tmp, int(e.headerSize()))
 	e.header.marshal(e.tmp)
 	if _, err := w.Write(e.tmp); err != nil {
 		return err
@@ -158,8 +162,8 @@ func (e *locationsBlockEncoder) initWrite(locations int) {
 }
 
 type locationsBlockDecoder struct {
-	format SymbolsBlockFormat
-	header locationsBlockHeader
+	headerSize uint16
+	header     locationsBlockHeader
 
 	mappings  []int32
 	lineCount []byte
@@ -173,25 +177,18 @@ type locationsBlockDecoder struct {
 
 func newLocationsDecoder(h SymbolsBlockHeader) (*symbolsDecoder[v1.InMemoryLocation], error) {
 	if h.Format == BlockLocationsV1 {
-		return newSymbolsDecoder[v1.InMemoryLocation](h, &locationsBlockDecoder{format: h.Format}), nil
+		headerSize := math.Max(locationsBlockHeaderMinSize, h.BlockHeaderSize)
+		return newSymbolsDecoder[v1.InMemoryLocation](h, &locationsBlockDecoder{headerSize: headerSize}), nil
 	}
 	return nil, fmt.Errorf("%w: unknown locations format: %d", ErrUnknownVersion, h.Format)
 }
 
-func (d *locationsBlockDecoder) readHeader(r io.Reader) error {
-	d.buf = slices.GrowLen(d.buf, locationsBlockHeaderSize)
-	if _, err := io.ReadFull(r, d.buf); err != nil {
-		return err
-	}
-	d.header.unmarshal(d.buf)
-	if crc32.Checksum(d.buf[:locationsBlockHeaderSize-4], castagnoli) != d.header.CRC {
-		return ErrInvalidCRC
-	}
-	return nil
-}
+// In early versions, block header size is not specified. Must not change.
+const locationsBlockHeaderMinSize = 28
 
 func (d *locationsBlockDecoder) decode(r io.Reader, locations []v1.InMemoryLocation) (err error) {
-	if err = d.readHeader(r); err != nil {
+	d.buf = slices.GrowLen(d.buf, int(d.headerSize))
+	if err = readSymbolsBlockHeader(d.buf, r, &d.header); err != nil {
 		return err
 	}
 	if d.header.LocationsLen != uint32(len(locations)) {
diff --git a/pkg/phlaredb/symdb/mappings.go b/pkg/phlaredb/symdb/mappings.go
index 3094b74c44..371e774068 100644
--- a/pkg/phlaredb/symdb/mappings.go
+++ b/pkg/phlaredb/symdb/mappings.go
@@ -13,10 +13,9 @@ import (
 
 	v1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1"
 	"github.com/grafana/pyroscope/pkg/slices"
+	"github.com/grafana/pyroscope/pkg/util/math"
 )
 
-const mappingsBlockHeaderSize = int(unsafe.Sizeof(mappingsBlockHeader{}))
-
 var (
 	_ symbolsBlockEncoder[v1.InMemoryMapping] = (*mappingsBlockEncoder)(nil)
 	_ symbolsBlockDecoder[v1.InMemoryMapping] = (*mappingsBlockDecoder)(nil)
@@ -42,6 +41,8 @@ func (h *mappingsBlockHeader) marshal(b []byte) {
 	binary.BigEndian.PutUint32(b[16:20], h.MemoryStartSize)
 	binary.BigEndian.PutUint32(b[20:24], h.MemoryLimitSize)
 	binary.BigEndian.PutUint32(b[24:28], h.FileOffsetSize)
+	// Fields can be added here in the future.
+	// CRC must be the last four bytes.
 	h.CRC = crc32.Checksum(b[0:28], castagnoli)
 	binary.BigEndian.PutUint32(b[28:32], h.CRC)
 }
@@ -54,9 +55,13 @@ func (h *mappingsBlockHeader) unmarshal(b []byte) {
 	h.MemoryStartSize = binary.BigEndian.Uint32(b[16:20])
 	h.MemoryLimitSize = binary.BigEndian.Uint32(b[20:24])
 	h.FileOffsetSize = binary.BigEndian.Uint32(b[24:28])
+	// In future versions, new fields are decoded here;
+	// if pos < len(b)-checksumSize, then there are more fields.
 	h.CRC = binary.BigEndian.Uint32(b[28:32])
 }
 
+func (h *mappingsBlockHeader) checksum() uint32 { return h.CRC }
+
 type mappingsBlockEncoder struct {
 	header mappingsBlockHeader
 
@@ -70,9 +75,9 @@ func newMappingsEncoder() *symbolsEncoder[v1.InMemoryMapping] {
 	return newSymbolsEncoder[v1.InMemoryMapping](new(mappingsBlockEncoder))
 }
 
-func (e *mappingsBlockEncoder) format() SymbolsBlockFormat {
-	return BlockMappingsV1
-}
+func (e *mappingsBlockEncoder) format() SymbolsBlockFormat { return BlockMappingsV1 }
+
+func (e *mappingsBlockEncoder) headerSize() uintptr { return unsafe.Sizeof(mappingsBlockHeader{}) }
 
 func (e *mappingsBlockEncoder) encode(w io.Writer, mappings []v1.InMemoryMapping) error {
 	e.initWrite(len(mappings))
@@ -145,7 +150,7 @@ func (e *mappingsBlockEncoder) encode(w io.Writer, mappings []v1.InMemoryMapping
 		e.buf.Write(e.tmp)
 	}
 
-	e.tmp = slices.GrowLen(e.tmp, mappingsBlockHeaderSize)
+	e.tmp = slices.GrowLen(e.tmp, int(e.headerSize()))
 	e.header.marshal(e.tmp)
 	if _, err := w.Write(e.tmp); err != nil {
 		return err
@@ -169,8 +174,8 @@ func (e *mappingsBlockEncoder) initWrite(mappings int) {
 }
 
 type mappingsBlockDecoder struct {
-	format SymbolsBlockFormat
-	header mappingsBlockHeader
+	headerSize uint16
+	header     mappingsBlockHeader
 
 	ints   []int32
 	ints64 []int64
@@ -179,25 +184,18 @@ type mappingsBlockDecoder struct {
 
 func newMappingsDecoder(h SymbolsBlockHeader) (*symbolsDecoder[v1.InMemoryMapping], error) {
 	if h.Format == BlockMappingsV1 {
-		return newSymbolsDecoder[v1.InMemoryMapping](h, &mappingsBlockDecoder{format: h.Format}), nil
+		headerSize := math.Max(mappingsBlockHeaderMinSize, h.BlockHeaderSize)
+		return newSymbolsDecoder[v1.InMemoryMapping](h, &mappingsBlockDecoder{headerSize: headerSize}), nil
 	}
 	return nil, fmt.Errorf("%w: unknown mappings format: %d", ErrUnknownVersion, h.Format)
 }
 
-func (d *mappingsBlockDecoder) readHeader(r io.Reader) error {
-	d.buf = slices.GrowLen(d.buf, mappingsBlockHeaderSize)
-	if _, err := io.ReadFull(r, d.buf); err != nil {
-		return nil
-	}
-	d.header.unmarshal(d.buf)
-	if crc32.Checksum(d.buf[:mappingsBlockHeaderSize-4], castagnoli) != d.header.CRC {
-		return ErrInvalidCRC
-	}
-	return nil
-}
+// In early versions, block header size is not specified. Must not change.
+const mappingsBlockHeaderMinSize = 32
 
 func (d *mappingsBlockDecoder) decode(r io.Reader, mappings []v1.InMemoryMapping) (err error) {
-	if err = d.readHeader(r); err != nil {
+	d.buf = slices.GrowLen(d.buf, int(d.headerSize))
+	if err = readSymbolsBlockHeader(d.buf, r, &d.header); err != nil {
 		return err
 	}
 	if d.header.MappingsLen > uint32(len(mappings)) {
diff --git a/pkg/phlaredb/symdb/partition_memory.go b/pkg/phlaredb/symdb/partition_memory.go
index ab44852dc0..b6c8339789 100644
--- a/pkg/phlaredb/symdb/partition_memory.go
+++ b/pkg/phlaredb/symdb/partition_memory.go
@@ -415,6 +415,7 @@ func writeSymbolsBlock[T any](w *fileWriter, s []T, e *symbolsEncoder[T]) (h Sym
 	h.CRC = crc.Sum32()
 	h.Length = uint32(len(s))
 	h.BlockSize = uint32(e.blockSize)
-	h.Format = e.format()
+	h.BlockHeaderSize = uint16(e.blockEncoder.headerSize())
+	h.Format = e.blockEncoder.format()
 	return h, nil
 }
diff --git a/pkg/phlaredb/symdb/strings.go b/pkg/phlaredb/symdb/strings.go
index 1992e85c1b..a228f0b69c 100644
--- a/pkg/phlaredb/symdb/strings.go
+++ b/pkg/phlaredb/symdb/strings.go
@@ -10,12 +10,10 @@ import (
 	"unsafe"
 
 	"github.com/grafana/pyroscope/pkg/slices"
+	"github.com/grafana/pyroscope/pkg/util/math"
 )
 
-const (
-	maxStringLen           = 1<<16 - 1
-	stringsBlockHeaderSize = int(unsafe.Sizeof(stringsBlockHeader{}))
-)
+const maxStringLen = 1<<16 - 1
 
 var (
 	_ symbolsBlockEncoder[string] = (*stringsBlockEncoder)(nil)
@@ -32,6 +30,8 @@ type stringsBlockHeader struct {
 func (h *stringsBlockHeader) marshal(b []byte) {
 	binary.BigEndian.PutUint32(b[0:4], h.StringsLen)
 	b[5], b[6], b[7], b[8] = h.BlockEncoding, 0, 0, 0
+	// Fields can be added here in the future.
+	// CRC must be the last four bytes.
 	h.CRC = crc32.Checksum(b[0:8], castagnoli)
 	binary.BigEndian.PutUint32(b[8:12], h.CRC)
 }
@@ -39,9 +39,13 @@ func (h *stringsBlockHeader) marshal(b []byte) {
 func (h *stringsBlockHeader) unmarshal(b []byte) {
 	h.StringsLen = binary.BigEndian.Uint32(b[0:4])
 	h.BlockEncoding = b[5]
+	// In future versions, new fields are decoded here;
+	// if pos < len(b)-checksumSize, then there are more fields.
 	h.CRC = binary.BigEndian.Uint32(b[8:12])
 }
 
+func (h *stringsBlockHeader) checksum() uint32 { return h.CRC }
+
 type stringsBlockEncoder struct {
 	header stringsBlockHeader
 	buf    bytes.Buffer
@@ -52,9 +56,9 @@ func newStringsEncoder() *symbolsEncoder[string] {
 	return newSymbolsEncoder[string](new(stringsBlockEncoder))
 }
 
-func (e *stringsBlockEncoder) format() SymbolsBlockFormat {
-	return BlockStringsV1
-}
+func (e *stringsBlockEncoder) format() SymbolsBlockFormat { return BlockStringsV1 }
+
+func (e *stringsBlockEncoder) headerSize() uintptr { return unsafe.Sizeof(stringsBlockHeader{}) }
 
 func (e *stringsBlockEncoder) encode(w io.Writer, strings []string) error {
 	e.initWrite(len(strings))
@@ -80,7 +84,7 @@ func (e *stringsBlockEncoder) encode(w io.Writer, strings []string) error {
 			return err
 		}
 	}
-	e.tmp = slices.GrowLen(e.tmp, stringsBlockHeaderSize)
+	e.tmp = slices.GrowLen(e.tmp, int(e.headerSize()))
 	e.header.marshal(e.tmp)
 	if _, err := w.Write(e.tmp); err != nil {
 		return err
@@ -109,37 +113,30 @@ func (e *stringsBlockEncoder) initWrite(strings int) {
 }
 
 type stringsBlockDecoder struct {
-	format SymbolsBlockFormat
-	header stringsBlockHeader
-	buf    []byte
+	headerSize uint16
+	header     stringsBlockHeader
+	buf        []byte
 }
 
 func newStringsDecoder(h SymbolsBlockHeader) (*symbolsDecoder[string], error) {
 	if h.Format == BlockStringsV1 {
-		return newSymbolsDecoder[string](h, &stringsBlockDecoder{format: h.Format}), nil
+		headerSize := math.Max(stringsBlockHeaderMinSize, h.BlockHeaderSize)
+		return newSymbolsDecoder[string](h, &stringsBlockDecoder{headerSize: headerSize}), nil
 	}
 	return nil, fmt.Errorf("%w: unknown strings format: %d", ErrUnknownVersion, h.Format)
 }
 
-func (d *stringsBlockDecoder) readHeader(r io.Reader) error {
-	d.buf = slices.GrowLen(d.buf, stringsBlockHeaderSize)
-	if _, err := io.ReadFull(r, d.buf); err != nil {
+// In early versions, block header size is not specified. Must not change.
+const stringsBlockHeaderMinSize = 12
+
+func (d *stringsBlockDecoder) decode(r io.Reader, strings []string) (err error) {
+	d.buf = slices.GrowLen(d.buf, int(d.headerSize))
+	if err = readSymbolsBlockHeader(d.buf, r, &d.header); err != nil {
 		return err
 	}
-	d.header.unmarshal(d.buf)
-	if crc32.Checksum(d.buf[:stringsBlockHeaderSize-4], castagnoli) != d.header.CRC {
-		return ErrInvalidCRC
-	}
 	if d.header.BlockEncoding != 8 && d.header.BlockEncoding != 16 {
 		return fmt.Errorf("invalid string block encoding: %d", d.header.BlockEncoding)
 	}
-	return nil
-}
-
-func (d *stringsBlockDecoder) decode(r io.Reader, strings []string) (err error) {
-	if err = d.readHeader(r); err != nil {
-		return err
-	}
 	if d.header.StringsLen != uint32(len(strings)) {
 		return fmt.Errorf("invalid string buffer size")
 	}
diff --git a/pkg/phlaredb/symdb/testdata/symbols/v3/symbols.symdb b/pkg/phlaredb/symdb/testdata/symbols/v3/symbols.symdb
index af10787561f7fe081804ad6e8aebb49e9fe54269..a58190c627f6abf39d2456eba86190ac8b70fc72 100644
GIT binary patch
delta 172
zcmcbzll96@)`l&NtkH}z)7hdK<rx*GtAQwq>24s3XL?#R<Li1E21W)LVAKS%nIz)5
zRlfj*m>C#Y7#I{lG!!suO9I6viJdc5U|?YU1(ug!V1h|AFs%8<z)-`y*wWP&XwGi1
hJP*ha28KU%-mi-*a}9w^h<S{We?a2XFT^mi001q)9C!c#

delta 172
zcmcbzll96@)`l&NtkH}N)7hdK<$;tMgmQyWY0-?Y>lqjr8DM}>6Ub(gi04-Q0u*9q
zU|<2s0BI;-)RqK_O%gk2ssPmX3o6e9lV)I8^N)d{hIz52t1Zx+-B5Y3A&y7h$QD=T
U8Uocq%wvrF1JXDBLJT7d0KjG&761SM


From 9cfe9aea60a43582c49ac5f5fb15109e26517f31 Mon Sep 17 00:00:00 2001
From: Anton Kolesnikov <anton.e.kolesnikov@gmail.com>
Date: Mon, 13 May 2024 16:19:07 +0800
Subject: [PATCH 30/36] use v2 by default

---
 pkg/phlaredb/symdb/block_writer.go     |  73 +-----
 pkg/phlaredb/symdb/block_writer_v2.go  | 316 +++++++++++++++++++++++++
 pkg/phlaredb/symdb/block_writer_v3.go  | 137 +++++++++++
 pkg/phlaredb/symdb/format.go           |  68 +++++-
 pkg/phlaredb/symdb/partition_memory.go |  55 -----
 pkg/phlaredb/symdb/resolver.go         |   1 -
 pkg/phlaredb/symdb/symdb.go            |  35 ++-
 pkg/phlaredb/symdb/symdb_test.go       |   9 +-
 8 files changed, 556 insertions(+), 138 deletions(-)
 create mode 100644 pkg/phlaredb/symdb/block_writer_v2.go
 create mode 100644 pkg/phlaredb/symdb/block_writer_v3.go

diff --git a/pkg/phlaredb/symdb/block_writer.go b/pkg/phlaredb/symdb/block_writer.go
index d489905279..bf31dd9216 100644
--- a/pkg/phlaredb/symdb/block_writer.go
+++ b/pkg/phlaredb/symdb/block_writer.go
@@ -2,83 +2,16 @@ package symdb
 
 import (
 	"bufio"
-	"fmt"
 	"io"
 	"os"
 	"path/filepath"
 
 	"github.com/grafana/pyroscope/pkg/phlaredb/block"
-	v1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1"
 )
 
-type writer struct {
-	config *Config
-
-	index    IndexFile
-	dataFile *fileWriter
-	files    []block.File
-	footer   Footer
-
-	stringsEncoder   *symbolsEncoder[string]
-	mappingsEncoder  *symbolsEncoder[v1.InMemoryMapping]
-	functionsEncoder *symbolsEncoder[v1.InMemoryFunction]
-	locationsEncoder *symbolsEncoder[v1.InMemoryLocation]
-}
-
-func newWriter(c *Config) *writer {
-	return &writer{
-		config: c,
-		index: IndexFile{
-			Header: IndexHeader{
-				Magic:   symdbMagic,
-				Version: FormatV3,
-			},
-		},
-		footer: Footer{
-			Magic:   symdbMagic,
-			Version: FormatV3,
-		},
-
-		stringsEncoder:   newStringsEncoder(),
-		mappingsEncoder:  newMappingsEncoder(),
-		functionsEncoder: newFunctionsEncoder(),
-		locationsEncoder: newLocationsEncoder(),
-	}
-}
-
-func (w *writer) writePartitions(partitions []*PartitionWriter) (err error) {
-	if err = os.MkdirAll(w.config.Dir, 0o755); err != nil {
-		return fmt.Errorf("failed to create directory %q: %w", w.config.Dir, err)
-	}
-	if w.dataFile, err = w.newFile(DefaultFileName); err != nil {
-		return err
-	}
-	defer func() {
-		err = w.dataFile.Close()
-		w.files = []block.File{w.dataFile.meta()}
-	}()
-	for _, p := range partitions {
-		if err = p.writeTo(w); err != nil {
-			return fmt.Errorf("failed to write partition: %w", err)
-		}
-		w.index.PartitionHeaders = append(w.index.PartitionHeaders, &p.header)
-	}
-	w.footer.IndexOffset = uint64(w.dataFile.w.offset)
-	if _, err = w.index.WriteTo(w.dataFile); err != nil {
-		return fmt.Errorf("failed to write index: %w", err)
-	}
-	if _, err = w.dataFile.Write(w.footer.MarshalBinary()); err != nil {
-		return fmt.Errorf("failed to write footer: %w", err)
-	}
-	return nil
-}
-
-func (w *writer) newFile(path string) (f *fileWriter, err error) {
-	path = filepath.Join(w.config.Dir, path)
-	if f, err = newFileWriter(path); err != nil {
-		return nil, fmt.Errorf("failed to create %q: %w", path, err)
-	}
-	return f, err
+type blockWriter interface {
+	writePartitions(partitions []*PartitionWriter) error
+	meta() []block.File
 }
 
 type fileWriter struct {
diff --git a/pkg/phlaredb/symdb/block_writer_v2.go b/pkg/phlaredb/symdb/block_writer_v2.go
new file mode 100644
index 0000000000..40dd149dbf
--- /dev/null
+++ b/pkg/phlaredb/symdb/block_writer_v2.go
@@ -0,0 +1,316 @@
+package symdb
+
+import (
+	"context"
+	"fmt"
+	"hash/crc32"
+	"io"
+	"os"
+	"path/filepath"
+
+	"github.com/grafana/dskit/multierror"
+	"github.com/parquet-go/parquet-go"
+	"golang.org/x/sync/errgroup"
+
+	"github.com/grafana/pyroscope/pkg/phlaredb/block"
+	schemav1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1"
+	"github.com/grafana/pyroscope/pkg/util/build"
+	"github.com/grafana/pyroscope/pkg/util/math"
+)
+
+type writerV2 struct {
+	config *Config
+
+	index       IndexFile
+	indexWriter *fileWriter
+	stacktraces *fileWriter
+	files       []block.File
+
+	// Parquet tables.
+	mappings  parquetWriter[schemav1.InMemoryMapping, schemav1.MappingPersister]
+	functions parquetWriter[schemav1.InMemoryFunction, schemav1.FunctionPersister]
+	locations parquetWriter[schemav1.InMemoryLocation, schemav1.LocationPersister]
+	strings   parquetWriter[string, schemav1.StringPersister]
+}
+
+func newWriterV2(c *Config) *writerV2 {
+	return &writerV2{
+		config: c,
+		index: IndexFile{
+			Header: IndexHeader{
+				Magic:   symdbMagic,
+				Version: FormatV2,
+			},
+		},
+	}
+}
+
+func (w *writerV2) writePartitions(partitions []*PartitionWriter) error {
+	if err := w.createDir(); err != nil {
+		return err
+	}
+
+	g, _ := errgroup.WithContext(context.Background())
+	g.Go(func() (err error) {
+		if w.stacktraces, err = w.newFile(StacktracesFileName); err != nil {
+			return err
+		}
+		for _, partition := range partitions {
+			if err = w.writeStacktraces(partition); err != nil {
+				return err
+			}
+		}
+		return w.stacktraces.Close()
+	})
+
+	g.Go(func() (err error) {
+		if err = w.strings.init(w.config.Dir, w.config.Parquet); err != nil {
+			return err
+		}
+		for _, partition := range partitions {
+			if partition.header.V2.Strings, err = w.strings.readFrom(partition.strings.slice); err != nil {
+				return err
+			}
+		}
+		return w.strings.Close()
+	})
+
+	g.Go(func() (err error) {
+		if err = w.functions.init(w.config.Dir, w.config.Parquet); err != nil {
+			return err
+		}
+		for _, partition := range partitions {
+			if partition.header.V2.Functions, err = w.functions.readFrom(partition.functions.slice); err != nil {
+				return err
+			}
+		}
+		return w.functions.Close()
+	})
+
+	g.Go(func() (err error) {
+		if err = w.mappings.init(w.config.Dir, w.config.Parquet); err != nil {
+			return err
+		}
+		for _, partition := range partitions {
+			if partition.header.V2.Mappings, err = w.mappings.readFrom(partition.mappings.slice); err != nil {
+				return err
+			}
+		}
+		return w.mappings.Close()
+	})
+
+	g.Go(func() (err error) {
+		if err = w.locations.init(w.config.Dir, w.config.Parquet); err != nil {
+			return err
+		}
+		for _, partition := range partitions {
+			if partition.header.V2.Locations, err = w.locations.readFrom(partition.locations.slice); err != nil {
+				return err
+			}
+		}
+		return w.locations.Close()
+	})
+
+	if err := g.Wait(); err != nil {
+		return err
+	}
+
+	for _, partition := range partitions {
+		w.index.PartitionHeaders = append(w.index.PartitionHeaders, &partition.header)
+	}
+
+	return w.Flush()
+}
+
+func (w *writerV2) Flush() (err error) {
+	if err = w.writeIndexFile(); err != nil {
+		return err
+	}
+	w.files = []block.File{
+		w.indexWriter.meta(),
+		w.stacktraces.meta(),
+		w.locations.meta(),
+		w.mappings.meta(),
+		w.functions.meta(),
+		w.strings.meta(),
+	}
+	return nil
+}
+
+func (w *writerV2) writeStacktraces(partition *PartitionWriter) (err error) {
+	for ci, c := range partition.stacktraces.chunks {
+		stacks := c.stacks
+		if stacks == 0 {
+			stacks = uint32(len(partition.stacktraces.hashToIdx))
+		}
+		h := StacktraceBlockHeader{
+			Offset:             w.stacktraces.w.offset,
+			Size:               0, // Set later.
+			Partition:          partition.header.Partition,
+			BlockIndex:         uint16(ci),
+			Encoding:           StacktraceEncodingGroupVarint,
+			Stacktraces:        stacks,
+			StacktraceNodes:    c.tree.len(),
+			StacktraceMaxDepth: 0, // TODO
+			StacktraceMaxNodes: c.partition.maxNodesPerChunk,
+			CRC:                0, // Set later.
+		}
+		crc := crc32.New(castagnoli)
+		if h.Size, err = c.WriteTo(io.MultiWriter(crc, w.stacktraces)); err != nil {
+			return fmt.Errorf("writing stacktrace chunk data: %w", err)
+		}
+		h.CRC = crc.Sum32()
+		partition.header.Stacktraces = append(partition.header.Stacktraces, h)
+	}
+	return nil
+}
+
+func (w *writerV2) createDir() error {
+	if err := os.MkdirAll(w.config.Dir, 0o755); err != nil {
+		return fmt.Errorf("failed to create directory %q: %w", w.config.Dir, err)
+	}
+	return nil
+}
+
+func (w *writerV2) writeIndexFile() (err error) {
+	// Write the index file only after all the files were flushed.
+	if w.indexWriter, err = w.newFile(IndexFileName); err != nil {
+		return err
+	}
+	defer func() {
+		err = multierror.New(err, w.indexWriter.Close()).Err()
+	}()
+	if _, err = w.index.WriteTo(w.indexWriter); err != nil {
+		return fmt.Errorf("failed to write index file: %w", err)
+	}
+	return err
+}
+
+func (w *writerV2) newFile(path string) (f *fileWriter, err error) {
+	path = filepath.Join(w.config.Dir, path)
+	if f, err = newFileWriter(path); err != nil {
+		return nil, fmt.Errorf("failed to create %q: %w", path, err)
+	}
+	return f, err
+}
+
+func (w *writerV2) meta() []block.File { return w.files }
+
+type parquetWriter[M schemav1.Models, P schemav1.Persister[M]] struct {
+	persister P
+	config    ParquetConfig
+
+	currentRowGroup uint32
+	currentRows     uint32
+	rowsTotal       uint64
+
+	buffer    *parquet.Buffer
+	rowsBatch []parquet.Row
+
+	writer *parquet.GenericWriter[P]
+	file   *os.File
+	path   string
+}
+
+func (s *parquetWriter[M, P]) init(dir string, c ParquetConfig) (err error) {
+	s.config = c
+	s.path = filepath.Join(dir, s.persister.Name()+block.ParquetSuffix)
+	s.file, err = os.OpenFile(s.path, os.O_RDWR|os.O_CREATE|os.O_EXCL, 0o644)
+	if err != nil {
+		return err
+	}
+	s.rowsBatch = make([]parquet.Row, 0, 128)
+	s.buffer = parquet.NewBuffer(s.persister.Schema(), parquet.ColumnBufferCapacity(s.config.MaxBufferRowCount))
+	s.writer = parquet.NewGenericWriter[P](s.file, s.persister.Schema(),
+		parquet.CreatedBy("github.com/grafana/pyroscope/", build.Version, build.Revision),
+		parquet.PageBufferSize(3*1024*1024),
+	)
+	return nil
+}
+
+func (s *parquetWriter[M, P]) readFrom(values []M) (ranges []RowRangeReference, err error) {
+	for len(values) > 0 {
+		var r RowRangeReference
+		if r, err = s.writeRows(values); err != nil {
+			return nil, err
+		}
+		ranges = append(ranges, r)
+		values = values[r.Rows:]
+	}
+	return ranges, nil
+}
+
+func (s *parquetWriter[M, P]) writeRows(values []M) (r RowRangeReference, err error) {
+	r.RowGroup = s.currentRowGroup
+	r.Index = s.currentRows
+	if len(values) == 0 {
+		return r, nil
+	}
+	var n int
+	for len(values) > 0 && int(s.currentRows) < s.config.MaxBufferRowCount {
+		s.fillBatch(values)
+		if n, err = s.buffer.WriteRows(s.rowsBatch); err != nil {
+			return r, err
+		}
+		s.currentRows += uint32(n)
+		r.Rows += uint32(n)
+		values = values[n:]
+	}
+	if int(s.currentRows)+cap(s.rowsBatch) >= s.config.MaxBufferRowCount {
+		if err = s.flushBuffer(); err != nil {
+			return r, err
+		}
+	}
+	return r, nil
+}
+
+func (s *parquetWriter[M, P]) fillBatch(values []M) int {
+	m := math.Min(len(values), cap(s.rowsBatch))
+	s.rowsBatch = s.rowsBatch[:m]
+	for i := 0; i < m; i++ {
+		row := s.rowsBatch[i][:0]
+		s.rowsBatch[i] = s.persister.Deconstruct(row, 0, values[i])
+	}
+	return m
+}
+
+func (s *parquetWriter[M, P]) flushBuffer() error {
+	if _, err := s.writer.WriteRowGroup(s.buffer); err != nil {
+		return err
+	}
+	s.rowsTotal += uint64(s.buffer.NumRows())
+	s.currentRowGroup++
+	s.currentRows = 0
+	s.buffer.Reset()
+	return nil
+}
+
+func (s *parquetWriter[M, P]) meta() block.File {
+	f := block.File{
+		// Note that the path is relative to the symdb root dir.
+		RelPath: filepath.Base(s.path),
+		Parquet: &block.ParquetFile{
+			NumRows: s.rowsTotal,
+		},
+	}
+	if f.Parquet.NumRows > 0 {
+		f.Parquet.NumRowGroups = uint64(s.currentRowGroup + 1)
+	}
+	if stat, err := os.Stat(s.path); err == nil {
+		f.SizeBytes = uint64(stat.Size())
+	}
+	return f
+}
+
+func (s *parquetWriter[M, P]) Close() error {
+	if err := s.flushBuffer(); err != nil {
+		return fmt.Errorf("flushing parquet buffer: %w", err)
+	}
+	if err := s.writer.Close(); err != nil {
+		return fmt.Errorf("closing parquet writer: %w", err)
+	}
+	if err := s.file.Close(); err != nil {
+		return fmt.Errorf("closing parquet file: %w", err)
+	}
+	return nil
+}
diff --git a/pkg/phlaredb/symdb/block_writer_v3.go b/pkg/phlaredb/symdb/block_writer_v3.go
new file mode 100644
index 0000000000..842ce6d12d
--- /dev/null
+++ b/pkg/phlaredb/symdb/block_writer_v3.go
@@ -0,0 +1,137 @@
+package symdb
+
+import (
+	"fmt"
+	"hash/crc32"
+	"io"
+	"os"
+	"path/filepath"
+
+	"github.com/grafana/pyroscope/pkg/phlaredb/block"
+	v1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1"
+)
+
+type writerV3 struct {
+	config *Config
+
+	index    IndexFile
+	dataFile *fileWriter
+	files    []block.File
+	footer   Footer
+
+	stringsEncoder   *symbolsEncoder[string]
+	mappingsEncoder  *symbolsEncoder[v1.InMemoryMapping]
+	functionsEncoder *symbolsEncoder[v1.InMemoryFunction]
+	locationsEncoder *symbolsEncoder[v1.InMemoryLocation]
+}
+
+func newWriterV3(c *Config) *writerV3 {
+	return &writerV3{
+		config: c,
+		index: IndexFile{
+			Header: IndexHeader{
+				Magic:   symdbMagic,
+				Version: FormatV3,
+			},
+		},
+		footer: Footer{
+			Magic:   symdbMagic,
+			Version: FormatV3,
+		},
+
+		stringsEncoder:   newStringsEncoder(),
+		mappingsEncoder:  newMappingsEncoder(),
+		functionsEncoder: newFunctionsEncoder(),
+		locationsEncoder: newLocationsEncoder(),
+	}
+}
+
+func (w *writerV3) writePartitions(partitions []*PartitionWriter) (err error) {
+	if err = os.MkdirAll(w.config.Dir, 0o755); err != nil {
+		return fmt.Errorf("failed to create directory %q: %w", w.config.Dir, err)
+	}
+	if w.dataFile, err = w.newFile(DefaultFileName); err != nil {
+		return err
+	}
+	defer func() {
+		err = w.dataFile.Close()
+		w.files = []block.File{w.dataFile.meta()}
+	}()
+	for _, p := range partitions {
+		if err = writePartitionV3(w, p); err != nil {
+			return fmt.Errorf("failed to write partition: %w", err)
+		}
+		w.index.PartitionHeaders = append(w.index.PartitionHeaders, &p.header)
+	}
+	w.footer.IndexOffset = uint64(w.dataFile.w.offset)
+	if _, err = w.index.WriteTo(w.dataFile); err != nil {
+		return fmt.Errorf("failed to write index: %w", err)
+	}
+	if _, err = w.dataFile.Write(w.footer.MarshalBinary()); err != nil {
+		return fmt.Errorf("failed to write footer: %w", err)
+	}
+	return nil
+}
+
+func (w *writerV3) meta() []block.File { return w.files }
+
+func (w *writerV3) newFile(path string) (f *fileWriter, err error) {
+	path = filepath.Join(w.config.Dir, path)
+	if f, err = newFileWriter(path); err != nil {
+		return nil, fmt.Errorf("failed to create %q: %w", path, err)
+	}
+	return f, err
+}
+
+func writePartitionV3(w *writerV3, p *PartitionWriter) (err error) {
+	if p.header.V3.Strings, err = writeSymbolsBlock(w.dataFile, p.strings.slice, w.stringsEncoder); err != nil {
+		return err
+	}
+	if p.header.V3.Mappings, err = writeSymbolsBlock(w.dataFile, p.mappings.slice, w.mappingsEncoder); err != nil {
+		return err
+	}
+	if p.header.V3.Functions, err = writeSymbolsBlock(w.dataFile, p.functions.slice, w.functionsEncoder); err != nil {
+		return err
+	}
+	if p.header.V3.Locations, err = writeSymbolsBlock(w.dataFile, p.locations.slice, w.locationsEncoder); err != nil {
+		return err
+	}
+	for ci, c := range p.stacktraces.chunks {
+		stacks := c.stacks
+		if stacks == 0 {
+			stacks = uint32(len(p.stacktraces.hashToIdx))
+		}
+		h := StacktraceBlockHeader{
+			Offset:             w.dataFile.w.offset,
+			Partition:          p.header.Partition,
+			BlockIndex:         uint16(ci),
+			Encoding:           StacktraceEncodingGroupVarint,
+			Stacktraces:        stacks,
+			StacktraceNodes:    c.tree.len(),
+			StacktraceMaxNodes: c.partition.maxNodesPerChunk,
+		}
+		crc := crc32.New(castagnoli)
+		if h.Size, err = c.WriteTo(io.MultiWriter(crc, w.dataFile)); err != nil {
+			return fmt.Errorf("writing stacktrace chunk data: %w", err)
+		}
+		h.CRC = crc.Sum32()
+		p.header.Stacktraces = append(p.header.Stacktraces, h)
+	}
+	return nil
+}
+
+func writeSymbolsBlock[T any](w *fileWriter, s []T, e *symbolsEncoder[T]) (h SymbolsBlockHeader, err error) {
+	h.Offset = uint64(w.w.offset)
+	crc := crc32.New(castagnoli)
+	mw := io.MultiWriter(crc, w.w)
+	if err = e.encode(mw, s); err != nil {
+		return h, err
+	}
+	h.Size = uint32(w.w.offset) - uint32(h.Offset)
+	h.CRC = crc.Sum32()
+	h.Length = uint32(len(s))
+	h.BlockSize = uint32(e.blockSize)
+	h.BlockHeaderSize = uint16(e.blockEncoder.headerSize())
+	h.Format = e.blockEncoder.format()
+	return h, nil
+}
diff --git a/pkg/phlaredb/symdb/format.go b/pkg/phlaredb/symdb/format.go
index 0dbbcae529..7afb72bcac 100644
--- a/pkg/phlaredb/symdb/format.go
+++ b/pkg/phlaredb/symdb/format.go
@@ -260,17 +260,31 @@ func (h *PartitionHeaders) Size() int64 {
 	return s
 }
 
-func (h *PartitionHeaders) WriteTo(dst io.Writer) (_ int64, err error) {
+func (h *PartitionHeaders) MarshalV3To(dst io.Writer) (_ int64, err error) {
 	w := withWriterOffset(dst, 0)
 	buf := make([]byte, 4, 128)
 	binary.BigEndian.PutUint32(buf, uint32(len(*h)))
 	w.write(buf)
 	for _, p := range *h {
-		if p.V3 == nil {
-			return 0, fmt.Errorf("only v3 format is supported")
-		}
 		buf = slices.GrowLen(buf, int(p.Size()))
-		p.marshal(buf)
+		p.marshalV3(buf)
+		w.write(buf)
+	}
+	return w.offset, w.err
+}
+
+func (h *PartitionHeaders) MarshalV2To(dst io.Writer) (_ int64, err error) {
+	w := withWriterOffset(dst, 0)
+	buf := make([]byte, 4, 128)
+	binary.BigEndian.PutUint32(buf, uint32(len(*h)))
+	w.write(buf)
+	for _, p := range *h {
+		s := p.Size()
+		if int(s) > cap(buf) {
+			buf = make([]byte, s)
+		}
+		buf = buf[:s]
+		p.marshalV2(buf)
 		w.write(buf)
 	}
 	return w.offset, w.err
@@ -316,7 +330,25 @@ func (h *PartitionHeaders) unmarshal(b []byte, version FormatVersion) error {
 	return nil
 }
 
-func (h *PartitionHeader) marshal(buf []byte) {
+func (h *PartitionHeader) marshalV2(buf []byte) {
+	binary.BigEndian.PutUint64(buf[0:8], h.Partition)
+	binary.BigEndian.PutUint32(buf[8:12], uint32(len(h.Stacktraces)))
+	binary.BigEndian.PutUint32(buf[12:16], uint32(len(h.V2.Locations)))
+	binary.BigEndian.PutUint32(buf[16:20], uint32(len(h.V2.Mappings)))
+	binary.BigEndian.PutUint32(buf[20:24], uint32(len(h.V2.Functions)))
+	binary.BigEndian.PutUint32(buf[24:28], uint32(len(h.V2.Strings)))
+	n := 28
+	for i := range h.Stacktraces {
+		h.Stacktraces[i].marshal(buf[n:])
+		n += stacktraceBlockHeaderSize
+	}
+	n += marshalRowRangeReferences(buf[n:], h.V2.Locations)
+	n += marshalRowRangeReferences(buf[n:], h.V2.Mappings)
+	n += marshalRowRangeReferences(buf[n:], h.V2.Functions)
+	marshalRowRangeReferences(buf[n:], h.V2.Strings)
+}
+
+func (h *PartitionHeader) marshalV3(buf []byte) {
 	binary.BigEndian.PutUint64(buf[0:8], h.Partition)
 	binary.BigEndian.PutUint32(buf[8:12], uint32(len(h.Stacktraces)))
 	n := 12
@@ -542,6 +574,15 @@ func (h *PartitionHeaderV2) unmarshalRowRangeReferences(refs []RowRangeReference
 	return nil
 }
 
+func marshalRowRangeReferences(b []byte, refs []RowRangeReference) int {
+	var off int
+	for i := range refs {
+		refs[i].marshal(b[off : off+rowRangeReferenceSize])
+		off += rowRangeReferenceSize
+	}
+	return off
+}
+
 const rowRangeReferenceSize = int(unsafe.Sizeof(RowRangeReference{}))
 
 type RowRangeReference struct {
@@ -550,6 +591,12 @@ type RowRangeReference struct {
 	Rows     uint32
 }
 
+func (r *RowRangeReference) marshal(b []byte) {
+	binary.BigEndian.PutUint32(b[0:4], r.RowGroup)
+	binary.BigEndian.PutUint32(b[4:8], r.Index)
+	binary.BigEndian.PutUint32(b[8:12], r.Rows)
+}
+
 func (r *RowRangeReference) unmarshal(b []byte) {
 	r.RowGroup = binary.BigEndian.Uint32(b[0:4])
 	r.Index = binary.BigEndian.Uint32(b[4:8])
@@ -625,7 +672,14 @@ func (f *IndexFile) WriteTo(dst io.Writer) (n int64, err error) {
 	if _, err = w.Write(tocBytes); err != nil {
 		return w.offset, fmt.Errorf("toc write: %w", err)
 	}
-	if _, err = f.PartitionHeaders.WriteTo(w); err != nil {
+
+	switch f.Header.Version {
+	case FormatV3:
+		_, err = f.PartitionHeaders.MarshalV3To(w)
+	default:
+		_, err = f.PartitionHeaders.MarshalV2To(w)
+	}
+	if err != nil {
 		return w.offset, fmt.Errorf("partitions headers: %w", err)
 	}
 
diff --git a/pkg/phlaredb/symdb/partition_memory.go b/pkg/phlaredb/symdb/partition_memory.go
index b6c8339789..d410aee449 100644
--- a/pkg/phlaredb/symdb/partition_memory.go
+++ b/pkg/phlaredb/symdb/partition_memory.go
@@ -2,8 +2,6 @@ package symdb
 
 import (
 	"context"
-	"fmt"
-	"hash/crc32"
 	"io"
 	"sync"
 
@@ -366,56 +364,3 @@ func (p *PartitionWriter) WriteStats(s *PartitionStats) {
 func (p *PartitionWriter) Release() {
 	// Noop. Satisfies PartitionReader interface.
 }
-
-func (p *PartitionWriter) writeTo(w *writer) (err error) {
-	if p.header.V3.Strings, err = writeSymbolsBlock(w.dataFile, p.strings.slice, w.stringsEncoder); err != nil {
-		return err
-	}
-	if p.header.V3.Mappings, err = writeSymbolsBlock(w.dataFile, p.mappings.slice, w.mappingsEncoder); err != nil {
-		return err
-	}
-	if p.header.V3.Functions, err = writeSymbolsBlock(w.dataFile, p.functions.slice, w.functionsEncoder); err != nil {
-		return err
-	}
-	if p.header.V3.Locations, err = writeSymbolsBlock(w.dataFile, p.locations.slice, w.locationsEncoder); err != nil {
-		return err
-	}
-	for ci, c := range p.stacktraces.chunks {
-		stacks := c.stacks
-		if stacks == 0 {
-			stacks = uint32(len(p.stacktraces.hashToIdx))
-		}
-		h := StacktraceBlockHeader{
-			Offset:             w.dataFile.w.offset,
-			Partition:          p.header.Partition,
-			BlockIndex:         uint16(ci),
-			Encoding:           StacktraceEncodingGroupVarint,
-			Stacktraces:        stacks,
-			StacktraceNodes:    c.tree.len(),
-			StacktraceMaxNodes: c.partition.maxNodesPerChunk,
-		}
-		crc := crc32.New(castagnoli)
-		if h.Size, err = c.WriteTo(io.MultiWriter(crc, w.dataFile)); err != nil {
-			return fmt.Errorf("writing stacktrace chunk data: %w", err)
-		}
-		h.CRC = crc.Sum32()
-		p.header.Stacktraces = append(p.header.Stacktraces, h)
-	}
-	return nil
-}
-
-func writeSymbolsBlock[T any](w *fileWriter, s []T, e *symbolsEncoder[T]) (h SymbolsBlockHeader, err error) {
-	h.Offset = uint64(w.w.offset)
-	crc := crc32.New(castagnoli)
-	mw := io.MultiWriter(crc, w.w)
-	if err = e.encode(mw, s); err != nil {
-		return h, err
-	}
-	h.Size = uint32(w.w.offset) - uint32(h.Offset)
-	h.CRC = crc.Sum32()
-	h.Length = uint32(len(s))
-	h.BlockSize = uint32(e.blockSize)
-	h.BlockHeaderSize = uint16(e.blockEncoder.headerSize())
-	h.Format = e.blockEncoder.format()
-	return h, nil
-}
diff --git a/pkg/phlaredb/symdb/resolver.go b/pkg/phlaredb/symdb/resolver.go
index af56812d9e..8705ae31cd 100644
--- a/pkg/phlaredb/symdb/resolver.go
+++ b/pkg/phlaredb/symdb/resolver.go
@@ -247,7 +247,6 @@ func (r *Resolver) Pprof() (*googlev1.Profile, error) {
 		}
 		lock.Lock()
 		defer lock.Unlock()
-		// TODO(kolesnikovae): Use MergeNoClone.
 		return p.Merge(resolved)
 	})
 	if err != nil {
diff --git a/pkg/phlaredb/symdb/symdb.go b/pkg/phlaredb/symdb/symdb.go
index c78b71fa09..c5df560473 100644
--- a/pkg/phlaredb/symdb/symdb.go
+++ b/pkg/phlaredb/symdb/symdb.go
@@ -64,7 +64,7 @@ type StacktraceInserter interface {
 
 type SymDB struct {
 	config *Config
-	writer *writer
+	writer blockWriter
 	stats  MemoryStats
 
 	m          sync.RWMutex
@@ -76,13 +76,19 @@ type SymDB struct {
 
 type Config struct {
 	Dir         string
+	Version     FormatVersion
 	Stacktraces StacktracesConfig
+	Parquet     ParquetConfig
 }
 
 type StacktracesConfig struct {
 	MaxNodesPerChunk uint32
 }
 
+type ParquetConfig struct {
+	MaxBufferRowCount int
+}
+
 type MemoryStats struct {
 	StacktracesSize uint64
 	LocationsSize   uint64
@@ -103,12 +109,16 @@ const statsUpdateInterval = 5 * time.Second
 
 func DefaultConfig() *Config {
 	return &Config{
+		Version: FormatV2,
 		Stacktraces: StacktracesConfig{
 			// At the moment chunks are loaded in memory at once.
 			// Due to the fact that chunking causes some duplication,
 			// it's better to keep them large.
 			MaxNodesPerChunk: 4 << 20,
 		},
+		Parquet: ParquetConfig{
+			MaxBufferRowCount: 100 << 10,
+		},
 	}
 }
 
@@ -117,16 +127,27 @@ func (c *Config) WithDirectory(dir string) *Config {
 	return c
 }
 
+func (c *Config) WithParquetConfig(pc ParquetConfig) *Config {
+	c.Parquet = pc
+	return c
+}
+
 func NewSymDB(c *Config) *SymDB {
 	if c == nil {
 		c = DefaultConfig()
 	}
 	db := &SymDB{
 		config:     c,
-		writer:     newWriter(c),
 		partitions: make(map[uint64]*PartitionWriter),
 		stop:       make(chan struct{}),
 	}
+	switch c.Version {
+	case FormatV3:
+		db.writer = newWriterV3(c)
+	default:
+		c.Version = FormatV2
+		db.writer = newWriterV2(c)
+	}
 	db.wg.Add(1)
 	go db.updateStatsLoop()
 	return db
@@ -150,9 +171,15 @@ func (s *SymDB) PartitionWriter(partition uint64) *PartitionWriter {
 
 func (s *SymDB) newPartition(partition uint64) *PartitionWriter {
 	p := PartitionWriter{
-		header:      PartitionHeader{Partition: partition, V3: new(PartitionHeaderV3)},
+		header:      PartitionHeader{Partition: partition},
 		stacktraces: newStacktracesPartition(s.config.Stacktraces.MaxNodesPerChunk),
 	}
+	switch s.config.Version {
+	case FormatV2:
+		p.header.V2 = new(PartitionHeaderV2)
+	case FormatV3:
+		p.header.V3 = new(PartitionHeaderV3)
+	}
 	p.strings.init()
 	p.mappings.init()
 	p.functions.init()
@@ -251,5 +278,5 @@ func (s *SymDB) Flush() error {
 }
 
 func (s *SymDB) Files() []block.File {
-	return s.writer.files
+	return s.writer.meta()
 }
diff --git a/pkg/phlaredb/symdb/symdb_test.go b/pkg/phlaredb/symdb/symdb_test.go
index a1bab91bc5..b641e826a6 100644
--- a/pkg/phlaredb/symdb/symdb_test.go
+++ b/pkg/phlaredb/symdb/symdb_test.go
@@ -14,6 +14,7 @@ import (
 	googlev1 "github.com/grafana/pyroscope/api/gen/proto/go/google/v1"
 	phlaremodel "github.com/grafana/pyroscope/pkg/model"
 	"github.com/grafana/pyroscope/pkg/objstore/providers/filesystem"
+	"github.com/grafana/pyroscope/pkg/phlaredb/block"
 	v1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1"
 	"github.com/grafana/pyroscope/pkg/pprof"
 )
@@ -53,6 +54,9 @@ func (s *memSuite) init() {
 			Stacktraces: StacktracesConfig{
 				MaxNodesPerChunk: 1 << 10,
 			},
+			Parquet: ParquetConfig{
+				MaxBufferRowCount: 512,
+			},
 		}
 	}
 	if s.db == nil {
@@ -83,7 +87,7 @@ func (s *blockSuite) flush() {
 		return &s.testBucket, nil
 	})
 	require.NoError(s.t, err)
-	s.reader, err = Open(context.Background(), b, testBlockMeta)
+	s.reader, err = Open(context.Background(), b, &block.Meta{Files: s.db.Files()})
 	require.NoError(s.t, err)
 }
 
@@ -156,6 +160,9 @@ func Test_Stats(t *testing.T) {
 			Stacktraces: StacktracesConfig{
 				MaxNodesPerChunk: 4 << 20,
 			},
+			Parquet: ParquetConfig{
+				MaxBufferRowCount: 100 << 10,
+			},
 		},
 	}
 

From 355de0e9afecf3812a29c2d7b579163305c0c607 Mon Sep 17 00:00:00 2001
From: Anton Kolesnikov <anton.e.kolesnikov@gmail.com>
Date: Mon, 13 May 2024 16:21:19 +0800
Subject: [PATCH 31/36] fix ConvertToBlockStats

---
 pkg/phlaredb/block/metadata.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pkg/phlaredb/block/metadata.go b/pkg/phlaredb/block/metadata.go
index 3d9add4c16..0c2cc0c854 100644
--- a/pkg/phlaredb/block/metadata.go
+++ b/pkg/phlaredb/block/metadata.go
@@ -368,7 +368,7 @@ func (stats MetaStats) ConvertToBlockStats() *ingestv1.BlockStats {
 			indexBytes = f.SizeBytes
 		} else if f.RelPath == "profiles.parquet" {
 			profileBytes += f.SizeBytes
-		} else if strings.HasPrefix(f.RelPath, "symbols") {
+		} else if strings.HasPrefix(f.RelPath, "symbols") || filepath.Ext(f.RelPath) == ".symdb" {
 			symbolBytes += f.SizeBytes
 		}
 	}

From af2dae04eb8b5af36e3262886f00f8b7f0fec673 Mon Sep 17 00:00:00 2001
From: Anton Kolesnikov <anton.e.kolesnikov@gmail.com>
Date: Mon, 13 May 2024 17:00:13 +0800
Subject: [PATCH 32/36] add explicit format config option

---
 pkg/phlaredb/block/block_test.go | 10 ++++----
 pkg/phlaredb/compact.go          | 41 +++++++++++++++++++++++++-------
 pkg/phlaredb/compact_test.go     | 12 +++++++---
 pkg/phlaredb/head.go             |  8 ++++++-
 pkg/phlaredb/head_test.go        | 36 ++++++++++++++++++++++++++--
 pkg/phlaredb/symdb/symdb.go      |  5 ++++
 6 files changed, 93 insertions(+), 19 deletions(-)

diff --git a/pkg/phlaredb/block/block_test.go b/pkg/phlaredb/block/block_test.go
index 0fa9979e69..058c9a31c2 100644
--- a/pkg/phlaredb/block/block_test.go
+++ b/pkg/phlaredb/block/block_test.go
@@ -96,7 +96,7 @@ func TestDelete(t *testing.T) {
 			})
 
 			require.NoError(t, block.Upload(ctx, log.NewNopLogger(), bkt, path.Join(dir, meta.ULID.String())))
-			require.Equal(t, 4, len(objects(t, bkt, meta.ULID)))
+			require.Equal(t, 9, len(objects(t, bkt, meta.ULID)))
 
 			markedForDeletion := promauto.With(prometheus.NewRegistry()).NewCounter(prometheus.CounterOpts{Name: "test"})
 			require.NoError(t, block.MarkForDeletion(ctx, log.NewNopLogger(), bkt, meta.ULID, "", false, markedForDeletion))
@@ -116,7 +116,7 @@ func TestDelete(t *testing.T) {
 				}
 			})
 			require.NoError(t, block.Upload(ctx, log.NewNopLogger(), bkt, path.Join(tmpDir, b2.ULID.String())))
-			require.Equal(t, 4, len(objects(t, bkt, b2.ULID)))
+			require.Equal(t, 9, len(objects(t, bkt, b2.ULID)))
 
 			// Remove meta.json and check if delete can delete it.
 			require.NoError(t, bkt.Delete(ctx, path.Join(b2.ULID.String(), block.MetaFilename)))
@@ -196,7 +196,7 @@ func TestUpload(t *testing.T) {
 
 	t.Run("full block", func(t *testing.T) {
 		require.NoError(t, block.Upload(ctx, log.NewNopLogger(), bkt, path.Join(tmpDir, b1.ULID.String())))
-		require.Equal(t, 4, len(bkt.Objects()))
+		require.Equal(t, 9, len(bkt.Objects()))
 		objs := bkt.Objects()
 		require.Contains(t, objs, path.Join(b1.ULID.String(), block.MetaFilename))
 		require.Contains(t, objs, path.Join(b1.ULID.String(), block.IndexFilename))
@@ -205,7 +205,7 @@ func TestUpload(t *testing.T) {
 
 	t.Run("upload is idempotent", func(t *testing.T) {
 		require.NoError(t, block.Upload(ctx, log.NewNopLogger(), bkt, path.Join(tmpDir, b1.ULID.String())))
-		require.Equal(t, 4, len(bkt.Objects()))
+		require.Equal(t, 9, len(bkt.Objects()))
 		objs := bkt.Objects()
 		require.Contains(t, objs, path.Join(b1.ULID.String(), block.MetaFilename))
 		require.Contains(t, objs, path.Join(b1.ULID.String(), block.IndexFilename))
@@ -355,7 +355,7 @@ func TestUploadCleanup(t *testing.T) {
 		require.ErrorIs(t, uploadErr, errUploadFailed)
 
 		// If upload of meta.json fails, nothing is cleaned up.
-		require.Equal(t, 4, len(bkt.Objects()))
+		require.Equal(t, 9, len(bkt.Objects()))
 		require.Greater(t, len(bkt.Objects()[path.Join(b1.String(), block.IndexFilename)]), 0)
 		require.Greater(t, len(bkt.Objects()[path.Join(b1.String(), block.MetaFilename)]), 0)
 	}
diff --git a/pkg/phlaredb/compact.go b/pkg/phlaredb/compact.go
index 548b6e9197..844eedd14f 100644
--- a/pkg/phlaredb/compact.go
+++ b/pkg/phlaredb/compact.go
@@ -100,7 +100,7 @@ func CompactWithSplitting(ctx context.Context, opts CompactWithSplittingOpts) (
 		srcMetas[i] = b.Meta()
 	}
 
-	symbolsCompactor := newSymbolsCompactor(opts.Dst)
+	symbolsCompactor := newSymbolsCompactor(opts.Dst, symdb.FormatV2)
 	defer runutil.CloseWithLogOnErr(util.Logger, symbolsCompactor, "close symbols compactor")
 
 	outMeta := compactMetas(srcMetas...)
@@ -725,6 +725,7 @@ func (it *dedupeProfileRowIterator) Next() bool {
 }
 
 type symbolsCompactor struct {
+	version     symdb.FormatVersion
 	rewriters   map[BlockReader]*symdb.Rewriter
 	w           *symdb.SymDB
 	stacktraces []uint32
@@ -733,10 +734,27 @@ type symbolsCompactor struct {
 	flushed bool
 }
 
-func newSymbolsCompactor(path string) *symbolsCompactor {
+func newSymbolsCompactor(path string, version symdb.FormatVersion) *symbolsCompactor {
+	if version == symdb.FormatV3 {
+		return &symbolsCompactor{
+			version: version,
+			w: symdb.NewSymDB(symdb.DefaultConfig().
+				WithVersion(symdb.FormatV3).
+				WithDirectory(path)),
+			dst:       path,
+			rewriters: make(map[BlockReader]*symdb.Rewriter),
+		}
+	}
+	dst := filepath.Join(path, symdb.DefaultDirName)
 	return &symbolsCompactor{
-		w:         symdb.NewSymDB(symdb.DefaultConfig().WithDirectory(path)),
-		dst:       path,
+		version: symdb.FormatV2,
+		w: symdb.NewSymDB(symdb.DefaultConfig().
+			WithVersion(symdb.FormatV2).
+			WithDirectory(dst).
+			WithParquetConfig(symdb.ParquetConfig{
+				MaxBufferRowCount: defaultParquetConfig.MaxBufferRowCount,
+			})),
+		dst:       dst,
 		rewriters: make(map[BlockReader]*symdb.Rewriter),
 	}
 }
@@ -767,9 +785,13 @@ func (s *symbolsRewriter) Close() (uint64, error) {
 	if err := s.symbolsCompactor.Flush(); err != nil {
 		return 0, err
 	}
-	dst := filepath.Join(s.dst, symdb.DefaultFileName)
-	src := filepath.Join(s.symbolsCompactor.dst, symdb.DefaultFileName)
-	return s.numSamples, util.CopyFile(src, dst)
+	if s.version == symdb.FormatV3 {
+		dst := filepath.Join(s.dst, symdb.DefaultFileName)
+		src := filepath.Join(s.symbolsCompactor.dst, symdb.DefaultFileName)
+		return s.numSamples, util.CopyFile(src, dst)
+	} else {
+		return s.numSamples, util.CopyDir(s.symbolsCompactor.dst, filepath.Join(s.dst, symdb.DefaultDirName))
+	}
 }
 
 func (s *symbolsCompactor) ReWriteRow(profile profileRow) (uint64, error) {
@@ -811,7 +833,10 @@ func (s *symbolsCompactor) Flush() error {
 }
 
 func (s *symbolsCompactor) Close() error {
-	return os.RemoveAll(filepath.Join(s.dst, symdb.DefaultFileName))
+	if s.version == symdb.FormatV3 {
+		return os.RemoveAll(filepath.Join(s.dst, symdb.DefaultFileName))
+	}
+	return os.RemoveAll(s.dst)
 }
 
 func (s *symbolsCompactor) loadStacktracesID(values []parquet.Value) {
diff --git a/pkg/phlaredb/compact_test.go b/pkg/phlaredb/compact_test.go
index 31a20b4707..5912506255 100644
--- a/pkg/phlaredb/compact_test.go
+++ b/pkg/phlaredb/compact_test.go
@@ -27,6 +27,7 @@ import (
 	phlarecontext "github.com/grafana/pyroscope/pkg/phlare/context"
 	"github.com/grafana/pyroscope/pkg/phlaredb/block"
 	"github.com/grafana/pyroscope/pkg/phlaredb/sharding"
+	"github.com/grafana/pyroscope/pkg/phlaredb/symdb"
 	"github.com/grafana/pyroscope/pkg/phlaredb/tsdb/index"
 	"github.com/grafana/pyroscope/pkg/pprof/testhelper"
 )
@@ -197,7 +198,7 @@ func TestCompactWithSplitting(t *testing.T) {
 	})
 	require.NoError(t, err)
 
-	require.NoFileExists(t, dst)
+	require.NoDirExists(t, filepath.Join(dst, symdb.DefaultDirName))
 
 	// 4 shards one per series.
 	require.Equal(t, 4, len(compacted))
@@ -627,10 +628,15 @@ func TestFlushMeta(t *testing.T) {
 	require.Equal(t, uint64(3), b.Meta().Stats.NumSeries)
 	require.Equal(t, uint64(3), b.Meta().Stats.NumSamples)
 	require.Equal(t, uint64(3), b.Meta().Stats.NumProfiles)
-	require.Len(t, b.Meta().Files, 3)
+	require.Len(t, b.Meta().Files, 8)
 	require.Equal(t, "index.tsdb", b.Meta().Files[0].RelPath)
 	require.Equal(t, "profiles.parquet", b.Meta().Files[1].RelPath)
-	require.Equal(t, "symbols.symdb", b.Meta().Files[2].RelPath)
+	require.Equal(t, "symbols/functions.parquet", b.Meta().Files[2].RelPath)
+	require.Equal(t, "symbols/index.symdb", b.Meta().Files[3].RelPath)
+	require.Equal(t, "symbols/locations.parquet", b.Meta().Files[4].RelPath)
+	require.Equal(t, "symbols/mappings.parquet", b.Meta().Files[5].RelPath)
+	require.Equal(t, "symbols/stacktraces.symdb", b.Meta().Files[6].RelPath)
+	require.Equal(t, "symbols/strings.parquet", b.Meta().Files[7].RelPath)
 }
 
 func newBlock(t testing.TB, generator func() []*testhelper.ProfileBuilder) *singleBlockQuerier {
diff --git a/pkg/phlaredb/head.go b/pkg/phlaredb/head.go
index 76107eab9b..9db3440d5b 100644
--- a/pkg/phlaredb/head.go
+++ b/pkg/phlaredb/head.go
@@ -124,7 +124,13 @@ func NewHead(phlarectx context.Context, cfg Config, limiter TenantLimiter) (*Hea
 		}
 	}
 
-	h.symdb = symdb.NewSymDB(symdb.DefaultConfig().WithDirectory(h.headPath))
+	h.symdb = symdb.NewSymDB(symdb.DefaultConfig().
+		WithVersion(symdb.FormatV2).
+		WithDirectory(filepath.Join(h.headPath, symdb.DefaultDirName)).
+		WithParquetConfig(symdb.ParquetConfig{
+			MaxBufferRowCount: h.parquetConfig.MaxBufferRowCount,
+		}))
+
 	h.wg.Add(1)
 	go h.loop()
 
diff --git a/pkg/phlaredb/head_test.go b/pkg/phlaredb/head_test.go
index 2d36950588..7f2c5cbd5b 100644
--- a/pkg/phlaredb/head_test.go
+++ b/pkg/phlaredb/head_test.go
@@ -370,8 +370,40 @@ func TestHeadFlush(t *testing.T) {
 					},
 				},
 				{
-					RelPath:   "symbols.symdb",
-					SizeBytes: 159687,
+					RelPath: "symbols/functions.parquet",
+					Parquet: &block.ParquetFile{
+						NumRowGroups: 2,
+						NumRows:      1423,
+					},
+				},
+				{
+					RelPath:   "symbols/index.symdb",
+					SizeBytes: 308,
+				},
+				{
+					RelPath: "symbols/locations.parquet",
+					Parquet: &block.ParquetFile{
+						NumRowGroups: 2,
+						NumRows:      2469,
+					},
+				},
+				{
+					RelPath: "symbols/mappings.parquet",
+					Parquet: &block.ParquetFile{
+						NumRowGroups: 2,
+						NumRows:      3,
+					},
+				},
+				{
+					RelPath:   "symbols/stacktraces.symdb",
+					SizeBytes: 60366,
+				},
+				{
+					RelPath: "symbols/strings.parquet",
+					Parquet: &block.ParquetFile{
+						NumRowGroups: 2,
+						NumRows:      1722,
+					},
 				},
 			},
 			Compaction: block.BlockMetaCompaction{
diff --git a/pkg/phlaredb/symdb/symdb.go b/pkg/phlaredb/symdb/symdb.go
index c5df560473..b6103da5d3 100644
--- a/pkg/phlaredb/symdb/symdb.go
+++ b/pkg/phlaredb/symdb/symdb.go
@@ -132,6 +132,11 @@ func (c *Config) WithParquetConfig(pc ParquetConfig) *Config {
 	return c
 }
 
+func (c *Config) WithVersion(v FormatVersion) *Config {
+	c.Version = v
+	return c
+}
+
 func NewSymDB(c *Config) *SymDB {
 	if c == nil {
 		c = DefaultConfig()

From e8e7fc17959ccd2f4d1e4fcfcd5f5eb14e4553c8 Mon Sep 17 00:00:00 2001
From: Anton Kolesnikov <anton.e.kolesnikov@gmail.com>
Date: Mon, 13 May 2024 17:38:42 +0800
Subject: [PATCH 33/36] fix metadata

---
 pkg/phlaredb/head.go        | 21 ++++++++++++++++-----
 pkg/phlaredb/phlaredb.go    |  6 +++++-
 pkg/phlaredb/symdb/symdb.go | 10 +++++++---
 3 files changed, 28 insertions(+), 9 deletions(-)

diff --git a/pkg/phlaredb/head.go b/pkg/phlaredb/head.go
index 9db3440d5b..748246dc78 100644
--- a/pkg/phlaredb/head.go
+++ b/pkg/phlaredb/head.go
@@ -124,12 +124,19 @@ func NewHead(phlarectx context.Context, cfg Config, limiter TenantLimiter) (*Hea
 		}
 	}
 
-	h.symdb = symdb.NewSymDB(symdb.DefaultConfig().
-		WithVersion(symdb.FormatV2).
-		WithDirectory(filepath.Join(h.headPath, symdb.DefaultDirName)).
-		WithParquetConfig(symdb.ParquetConfig{
+	symdbConfig := symdb.DefaultConfig()
+	if cfg.SymDBFormat == symdb.FormatV3 {
+		symdbConfig.Version = symdb.FormatV3
+		symdbConfig.Dir = h.headPath
+	} else {
+		symdbConfig.Version = symdb.FormatV2
+		symdbConfig.Dir = filepath.Join(h.headPath, symdb.DefaultDirName)
+		symdbConfig.Parquet = symdb.ParquetConfig{
 			MaxBufferRowCount: h.parquetConfig.MaxBufferRowCount,
-		}))
+		}
+	}
+
+	h.symdb = symdb.NewSymDB(symdbConfig)
 
 	h.wg.Add(1)
 	go h.loop()
@@ -563,6 +570,10 @@ func (h *Head) flush(ctx context.Context) error {
 		return errors.Wrap(err, "flushing symdb")
 	}
 	for _, file := range h.symdb.Files() {
+		// Files' path is relative to the symdb dir.
+		if h.symdb.FormatVersion() == symdb.FormatV2 {
+			file.RelPath = filepath.Join(symdb.DefaultDirName, file.RelPath)
+		}
 		files = append(files, file)
 		blockSize += file.SizeBytes
 		h.metrics.flushedFileSizeBytes.WithLabelValues(file.RelPath).Observe(float64(file.SizeBytes))
diff --git a/pkg/phlaredb/phlaredb.go b/pkg/phlaredb/phlaredb.go
index a503c7769e..a204b4335b 100644
--- a/pkg/phlaredb/phlaredb.go
+++ b/pkg/phlaredb/phlaredb.go
@@ -31,6 +31,7 @@ import (
 	phlareobj "github.com/grafana/pyroscope/pkg/objstore"
 	phlarecontext "github.com/grafana/pyroscope/pkg/phlare/context"
 	"github.com/grafana/pyroscope/pkg/phlaredb/block"
+	"github.com/grafana/pyroscope/pkg/phlaredb/symdb"
 	"github.com/grafana/pyroscope/pkg/util"
 )
 
@@ -49,7 +50,10 @@ type Config struct {
 	// TODO: docs
 	RowGroupTargetSize uint64 `yaml:"row_group_target_size"`
 
-	Parquet *ParquetConfig `yaml:"-"` // Those configs should not be exposed to the user, rather they should be determined by pyroscope itself. Currently, they are solely used for test cases.
+	// Those configs should not be exposed to the user, rather they should be determined by pyroscope itself.
+	// Currently, they are solely used for test cases.
+	Parquet     *ParquetConfig      `yaml:"-"`
+	SymDBFormat symdb.FormatVersion `yaml:"-"`
 
 	MinFreeDisk                uint64        `yaml:"min_free_disk_gb"`
 	MinDiskAvailablePercentage float64       `yaml:"min_disk_available_percentage"`
diff --git a/pkg/phlaredb/symdb/symdb.go b/pkg/phlaredb/symdb/symdb.go
index b6103da5d3..def5882753 100644
--- a/pkg/phlaredb/symdb/symdb.go
+++ b/pkg/phlaredb/symdb/symdb.go
@@ -63,7 +63,7 @@ type StacktraceInserter interface {
 }
 
 type SymDB struct {
-	config *Config
+	config Config
 	writer blockWriter
 	stats  MemoryStats
 
@@ -142,7 +142,7 @@ func NewSymDB(c *Config) *SymDB {
 		c = DefaultConfig()
 	}
 	db := &SymDB{
-		config:     c,
+		config:     *c,
 		partitions: make(map[uint64]*PartitionWriter),
 		stop:       make(chan struct{}),
 	}
@@ -150,7 +150,7 @@ func NewSymDB(c *Config) *SymDB {
 	case FormatV3:
 		db.writer = newWriterV3(c)
 	default:
-		c.Version = FormatV2
+		db.config.Version = FormatV2
 		db.writer = newWriterV2(c)
 	}
 	db.wg.Add(1)
@@ -285,3 +285,7 @@ func (s *SymDB) Flush() error {
 func (s *SymDB) Files() []block.File {
 	return s.writer.meta()
 }
+
+func (s *SymDB) FormatVersion() FormatVersion {
+	return s.config.Version
+}

From 1a10f6fc94ec97e96af1e4b2c1f5b6cb606a8e56 Mon Sep 17 00:00:00 2001
From: Anton Kolesnikov <anton.e.kolesnikov@gmail.com>
Date: Mon, 13 May 2024 17:41:05 +0800
Subject: [PATCH 34/36] remove file writer base parameter

---
 pkg/phlaredb/symdb/block_writer.go | 6 +++---
 pkg/phlaredb/symdb/format.go       | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/pkg/phlaredb/symdb/block_writer.go b/pkg/phlaredb/symdb/block_writer.go
index bf31dd9216..28f7a3d44c 100644
--- a/pkg/phlaredb/symdb/block_writer.go
+++ b/pkg/phlaredb/symdb/block_writer.go
@@ -29,7 +29,7 @@ func newFileWriter(path string) (*fileWriter, error) {
 	// There is no particular reason to use
 	// a buffer larger than the default 4K.
 	b := bufio.NewWriterSize(f, 4096)
-	w := withWriterOffset(b, 0)
+	w := withWriterOffset(b)
 	fw := fileWriter{
 		path: path,
 		buf:  b,
@@ -71,8 +71,8 @@ type writerOffset struct {
 	err    error
 }
 
-func withWriterOffset(w io.Writer, base int64) *writerOffset {
-	return &writerOffset{Writer: w, offset: base}
+func withWriterOffset(w io.Writer) *writerOffset {
+	return &writerOffset{Writer: w}
 }
 
 func (w *writerOffset) write(p []byte) {
diff --git a/pkg/phlaredb/symdb/format.go b/pkg/phlaredb/symdb/format.go
index 7afb72bcac..43a23196c2 100644
--- a/pkg/phlaredb/symdb/format.go
+++ b/pkg/phlaredb/symdb/format.go
@@ -261,7 +261,7 @@ func (h *PartitionHeaders) Size() int64 {
 }
 
 func (h *PartitionHeaders) MarshalV3To(dst io.Writer) (_ int64, err error) {
-	w := withWriterOffset(dst, 0)
+	w := withWriterOffset(dst)
 	buf := make([]byte, 4, 128)
 	binary.BigEndian.PutUint32(buf, uint32(len(*h)))
 	w.write(buf)
@@ -274,7 +274,7 @@ func (h *PartitionHeaders) MarshalV3To(dst io.Writer) (_ int64, err error) {
 }
 
 func (h *PartitionHeaders) MarshalV2To(dst io.Writer) (_ int64, err error) {
-	w := withWriterOffset(dst, 0)
+	w := withWriterOffset(dst)
 	buf := make([]byte, 4, 128)
 	binary.BigEndian.PutUint32(buf, uint32(len(*h)))
 	w.write(buf)
@@ -658,7 +658,7 @@ func (f *IndexFile) dataOffset() int {
 
 func (f *IndexFile) WriteTo(dst io.Writer) (n int64, err error) {
 	checksum := crc32.New(castagnoli)
-	w := withWriterOffset(io.MultiWriter(dst, checksum), 0)
+	w := withWriterOffset(io.MultiWriter(dst, checksum))
 	if _, err = w.Write(f.Header.MarshalBinary()); err != nil {
 		return w.offset, fmt.Errorf("header write: %w", err)
 	}

From 309866ec5b7a57157ca3c8be94971d81b28415ef Mon Sep 17 00:00:00 2001
From: Anton Kolesnikov <anton.e.kolesnikov@gmail.com>
Date: Mon, 13 May 2024 20:49:17 +0800
Subject: [PATCH 35/36] fixes

---
 pkg/phlaredb/symdb/format.go | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/pkg/phlaredb/symdb/format.go b/pkg/phlaredb/symdb/format.go
index 43a23196c2..d7be2ef672 100644
--- a/pkg/phlaredb/symdb/format.go
+++ b/pkg/phlaredb/symdb/format.go
@@ -457,11 +457,11 @@ type SymbolsBlockHeader struct {
 	Length uint32
 	// BlockSize denotes the number of items per block.
 	BlockSize uint32
-	// BlockSize denotes the encoder block header size in bytes.
+	// BlockHeaderSize denotes the encoder block header size in bytes.
 	// This enables forward compatibility within the same format version:
-	// as long as fields are not removed, or reordered, and the encoding
-	// scheme does not change, the format can be extended with no change
-	// of the format version. Decoder is able to read the whole header and
+	// as long as fields are not removed or reordered, and the encoding
+	// scheme does not change, the format can be extended without updating
+	// the format version. Decoder is able to read the whole header and
 	// skip unknown fields.
 	BlockHeaderSize uint16
 	// Format of the encoded data.
@@ -490,7 +490,7 @@ func readSymbolsBlockHeader(buf []byte, r io.Reader, v headerUnmarshaler) error
 	}
 	v.unmarshal(buf)
 	if crc32.Checksum(buf[:len(buf)-checksumSize], castagnoli) != v.checksum() {
-		return ErrInvalidSize
+		return ErrInvalidCRC
 	}
 	return nil
 }

From 22c896aaf8bf9f9af42ac9a94afcae65f02e88bb Mon Sep 17 00:00:00 2001
From: Anton Kolesnikov <anton.e.kolesnikov@gmail.com>
Date: Wed, 26 Jun 2024 11:49:40 +0800
Subject: [PATCH 36/36] resolve post-merge conflicts

---
 pkg/phlaredb/schemas/v1/functions.go       | 2 +-
 pkg/phlaredb/schemas/v1/locations.go       | 4 ++--
 pkg/phlaredb/schemas/v1/mappings.go        | 6 +++---
 pkg/phlaredb/schemas/v1/schema_test.go     | 2 +-
 pkg/phlaredb/symdb/block_reader.go         | 6 +++---
 pkg/phlaredb/symdb/block_reader_parquet.go | 2 +-
 pkg/phlaredb/symdb/block_writer_v2.go      | 2 +-
 7 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/pkg/phlaredb/schemas/v1/functions.go b/pkg/phlaredb/schemas/v1/functions.go
index 6bd6e7e3e3..bdaac2f22e 100644
--- a/pkg/phlaredb/schemas/v1/functions.go
+++ b/pkg/phlaredb/schemas/v1/functions.go
@@ -14,7 +14,7 @@ func (FunctionPersister) Name() string { return "functions" }
 
 func (FunctionPersister) Schema() *parquet.Schema { return functionsSchema }
 
-func (FunctionPersister) Deconstruct(row parquet.Row, _ uint64, fn InMemoryFunction) parquet.Row {
+func (FunctionPersister) Deconstruct(row parquet.Row, fn InMemoryFunction) parquet.Row {
 	if cap(row) < 5 {
 		row = make(parquet.Row, 0, 5)
 	}
diff --git a/pkg/phlaredb/schemas/v1/locations.go b/pkg/phlaredb/schemas/v1/locations.go
index 748d82389c..f04056fc44 100644
--- a/pkg/phlaredb/schemas/v1/locations.go
+++ b/pkg/phlaredb/schemas/v1/locations.go
@@ -59,7 +59,7 @@ func (LocationPersister) Deconstruct(row parquet.Row, loc InMemoryLocation) parq
 	return row
 }
 
-func (LocationPersister) Reconstruct(row parquet.Row) (*InMemoryLocation, error) {
+func (LocationPersister) Reconstruct(row parquet.Row) (InMemoryLocation, error) {
 	loc := InMemoryLocation{
 		Id:        row[0].Uint64(),
 		MappingId: uint32(row[1].Uint64()),
@@ -74,7 +74,7 @@ func (LocationPersister) Reconstruct(row parquet.Row) (*InMemoryLocation, error)
 	for i, v := range lines[len(lines)/2:] {
 		loc.Line[i].Line = int32(v.Uint64())
 	}
-	return &loc, nil
+	return loc, nil
 }
 
 type InMemoryLocation struct {
diff --git a/pkg/phlaredb/schemas/v1/mappings.go b/pkg/phlaredb/schemas/v1/mappings.go
index 8c06c61dbf..c8220dcbc6 100644
--- a/pkg/phlaredb/schemas/v1/mappings.go
+++ b/pkg/phlaredb/schemas/v1/mappings.go
@@ -14,7 +14,7 @@ func (MappingPersister) Name() string { return "mappings" }
 
 func (MappingPersister) Schema() *parquet.Schema { return mappingsSchema }
 
-func (MappingPersister) Deconstruct(row parquet.Row, m *InMemoryMapping) parquet.Row {
+func (MappingPersister) Deconstruct(row parquet.Row, m InMemoryMapping) parquet.Row {
 	if cap(row) < 10 {
 		row = make(parquet.Row, 0, 10)
 	}
@@ -32,7 +32,7 @@ func (MappingPersister) Deconstruct(row parquet.Row, m *InMemoryMapping) parquet
 	return row
 }
 
-func (MappingPersister) Reconstruct(row parquet.Row) (*InMemoryMapping, error) {
+func (MappingPersister) Reconstruct(row parquet.Row) (InMemoryMapping, error) {
 	mapping := InMemoryMapping{
 		Id:              row[0].Uint64(),
 		MemoryStart:     row[1].Uint64(),
@@ -45,7 +45,7 @@ func (MappingPersister) Reconstruct(row parquet.Row) (*InMemoryMapping, error) {
 		HasLineNumbers:  row[8].Boolean(),
 		HasInlineFrames: row[9].Boolean(),
 	}
-	return &mapping, nil
+	return mapping, nil
 }
 
 type InMemoryMapping struct {
diff --git a/pkg/phlaredb/schemas/v1/schema_test.go b/pkg/phlaredb/schemas/v1/schema_test.go
index de63dc3fe5..8573471c6e 100644
--- a/pkg/phlaredb/schemas/v1/schema_test.go
+++ b/pkg/phlaredb/schemas/v1/schema_test.go
@@ -428,7 +428,7 @@ func (*pprofMappingPersister) Reconstruct(row parquet.Row) (*profilev1.Mapping,
 	return &m, nil
 }
 
-type ReadWriter[T any, P Persister[T]] struct {}
+type ReadWriter[T any, P Persister[T]] struct{}
 
 func (r *ReadWriter[T, P]) WriteParquetFile(file io.Writer, elements []T) error {
 	var (
diff --git a/pkg/phlaredb/symdb/block_reader.go b/pkg/phlaredb/symdb/block_reader.go
index cd962b3a06..8561b71c07 100644
--- a/pkg/phlaredb/symdb/block_reader.go
+++ b/pkg/phlaredb/symdb/block_reader.go
@@ -403,14 +403,14 @@ func (p *partition) ResolveStacktraceLocations(ctx context.Context, dst Stacktra
 }
 
 func (p *partition) SplitStacktraceIDRanges(appender *SampleAppender) iter.Iterator[*StacktraceIDRange] {
-	if len(p.stacktraceChunks) == 0 {
+	if len(p.stacktraces) == 0 {
 		return iter.NewEmptyIterator[*StacktraceIDRange]()
 	}
 	var n int
 	samples := appender.Samples()
-	ranges := SplitStacktraces(samples.StacktraceIDs, p.stacktraceChunks[0].header.StacktraceMaxNodes)
+	ranges := SplitStacktraces(samples.StacktraceIDs, p.stacktraces[0].header.StacktraceMaxNodes)
 	for _, sr := range ranges {
-		c := p.stacktraceChunks[sr.chunk]
+		c := p.stacktraces[sr.chunk]
 		sr.ParentPointerTree = c.t
 		sr.Samples = samples.Range(n, n+len(sr.IDs))
 		n += len(sr.IDs)
diff --git a/pkg/phlaredb/symdb/block_reader_parquet.go b/pkg/phlaredb/symdb/block_reader_parquet.go
index 90ba014b85..ec97c3f9c1 100644
--- a/pkg/phlaredb/symdb/block_reader_parquet.go
+++ b/pkg/phlaredb/symdb/block_reader_parquet.go
@@ -88,7 +88,7 @@ func (t *parquetTable[M, P]) readRows(dst []M, buf []parquet.Row, rows parquet.R
 				if i == len(dst) {
 					return nil
 				}
-				_, v, err := t.persister.Reconstruct(row)
+				v, err := t.persister.Reconstruct(row)
 				if err != nil {
 					return err
 				}
diff --git a/pkg/phlaredb/symdb/block_writer_v2.go b/pkg/phlaredb/symdb/block_writer_v2.go
index 40dd149dbf..6d675fe8d4 100644
--- a/pkg/phlaredb/symdb/block_writer_v2.go
+++ b/pkg/phlaredb/symdb/block_writer_v2.go
@@ -269,7 +269,7 @@ func (s *parquetWriter[M, P]) fillBatch(values []M) int {
 	s.rowsBatch = s.rowsBatch[:m]
 	for i := 0; i < m; i++ {
 		row := s.rowsBatch[i][:0]
-		s.rowsBatch[i] = s.persister.Deconstruct(row, 0, values[i])
+		s.rowsBatch[i] = s.persister.Deconstruct(row, values[i])
 	}
 	return m
 }