diff --git a/pkg/pprof/fix_go_heap_truncated.go b/pkg/pprof/fix_go_heap_truncated.go new file mode 100644 index 0000000000..1faafe3ff1 --- /dev/null +++ b/pkg/pprof/fix_go_heap_truncated.go @@ -0,0 +1,266 @@ +package pprof + +import ( + "bytes" + "reflect" + "sort" + "unsafe" + + "golang.org/x/exp/slices" + + profilev1 "github.com/grafana/pyroscope/api/gen/proto/go/google/v1" +) + +const ( + minGroupSize = 2 + + tokens = 8 + tokenLen = 16 + suffixLen = tokens + tokenLen + + tokenBytesLen = tokenLen * 8 + suffixBytesLen = suffixLen * 8 +) + +// MayHaveGoHeapTruncatedStacktraces reports whether there are +// any chances that the profile may have truncated stack traces. +func MayHaveGoHeapTruncatedStacktraces(p *profilev1.Profile) bool { + if !hasGoHeapSampleTypes(p) { + return false + } + // Some truncated stacks have depth less than the depth limit (32). + const minDepth = 28 + for _, s := range p.Sample { + if len(s.LocationId) >= minDepth { + return true + } + } + return false +} + +func hasGoHeapSampleTypes(p *profilev1.Profile) bool { + for _, st := range p.SampleType { + switch p.StringTable[st.Type] { + case + "alloc_objects", + "alloc_space", + "inuse_objects", + "inuse_space": + return true + } + } + return false +} + +// RepairGoHeapTruncatedStacktraces repairs truncated stack traces +// in Go heap profiles. +// +// Go heap profile has a depth limit of 32 frames, which often +// renders profiles unreadable, and also increases cardinality +// of stack traces. +// +// The function guesses truncated frames based on neighbors and +// repairs stack traces if there are high chances that this +// part is present in the profile. The heuristic is as follows: +// +// For each stack trace S taller than 24 frames: if there is another +// stack trace R taller than 24 frames that overlaps with the given +// one by at least 16 frames in a row from the top, and has frames +// above its root, stack S considered truncated, and the missing part +// is copied from R. +func RepairGoHeapTruncatedStacktraces(p *profilev1.Profile) { + // Group stack traces by bottom (closest to the root) locations. + // Typically, there are very few groups (a hundred or two). + samples, groups := split(p) + // Each group's suffix is then tokenized: each part is shifted by one + // location from the previous one (like n-grams). + // Tokens are written into the token=>group map, Where the value is the + // index of the group with the token found at the furthest position from + // the root (across all groups). + m := make(map[string]group, len(groups)/2) + for i := 0; i < len(groups); i++ { + g := groups[i] + n := len(groups) + if i+1 < len(groups) { + n = groups[i+1] + } + if s := n - g; s < minGroupSize { + continue + } + // We take suffix of the first sample in the group. + s := suffix(samples[g].LocationId) + // Tokenize the suffix: token position is relative to the stack + // trace root: 0 means that the token is the closest to the root. + // TODO: unroll? + // 0 : 64 : 192 // No need. + // 1 : 56 : 184 + // 2 : 48 : 176 + // 3 : 40 : 168 + // 4 : 32 : 160 + // 5 : 24 : 152 + // 6 : 16 : 144 + // 7 : 8 : 136 + // 8 : 0 : 128 + // + // We skip the top/right-most token, as it is not needed, + // because there can be no more complete stack trace. + for j := uint32(1); j <= tokens; j++ { + hi := suffixBytesLen - j*tokens + lo := hi - tokenBytesLen + // By taking a string representation of the slice, + // we eliminate the need to hash the token explicitly: + // Go map will do it this way or another. + k := unsafeString(s[lo:hi]) + // Current candidate: the group where the token is + // located at the furthest position from the root. + c, ok := m[k] + if !ok || j > c.off { + // This group has more complete stack traces: + m[k] = group{ + gid: uint32(i), + off: j, + } + } + } + } + + // Now we handle chaining. Consider the following stacks: + // 1 2 3 4 + // a b [d] (f) + // b c [e] (g) + // c [d] (f) h + // d [e] (g) i + // + // We can't associate 3-rd stack with the 1-st one because their tokens + // do not overlap (given the token size is 2). However, we can associate + // it transitively through the 2nd stack. + // + // Dependencies: + // - group i depends on d[i]. + // - d[i] depends on d[d[i].gid]. + d := make([]group, len(groups)) + for i := 0; i < len(groups); i++ { + g := groups[i] + t := topToken(samples[g].LocationId) + k := unsafeString(t) + c, ok := m[k] + if !ok || c.off == 0 || groups[c.gid] == g { + // The current group has the most complete stack trace. + continue + } + d[i] = c + } + + // Then, for each group, we test, if there is another group with a more + // complete suffix, overlapping with the given one by at least one token. + // If such stack trace exists, all stack traces of the group are appended + // with the missing part. + for i := 0; i < len(groups); i++ { + g := groups[i] + c := d[i] + var off uint32 + for c.off > 0 { + off += c.off + n := d[c.gid] + if n.off == 0 { + // Stop early to preserve c. + break + } + c = n + } + if off == 0 { + // The current group has the most complete stack trace. + continue + } + // The reference stack trace. + appx := samples[groups[c.gid]].LocationId + // It's possible that the reference stack trace does not + // include the part we're looking for. In this case, we + // simply ignore the group. Although it's possible to infer + // this piece from other stacks, this is left for further + // improvements. + if int(off) >= len(appx) { + continue + } + appx = appx[uint32(len(appx))-off:] + // Now we append the missing part to all stack traces of the group. + n := len(groups) + if i+1 < len(groups) { + n = groups[i+1] + } + for j := g; j < n; j++ { + // Locations typically already have some extra capacity, + // therefore no major allocations are expected here. + samples[j].LocationId = append(samples[j].LocationId, appx...) + } + } +} + +type group struct { + gid uint32 + off uint32 +} + +// suffix returns the last suffixLen locations +// of the given stack trace represented as bytes. +// The return slice is always suffixBytesLen long. +// function panics if s is shorter than suffixLen. +func suffix(s []uint64) []byte { + return locBytes(s[len(s)-suffixLen:]) +} + +// topToken returns the last tokenLen locations +// of the given stack trace represented as bytes. +// The return slice is always tokenBytesLen long. +// function panics if s is shorter than tokenLen. +func topToken(s []uint64) []byte { + return locBytes(s[len(s)-tokenLen:]) +} + +func locBytes(s []uint64) []byte { + size := len(s) * 8 + h := (*reflect.SliceHeader)(unsafe.Pointer(&s)) + h.Len = size + h.Cap = size + return *(*[]byte)(unsafe.Pointer(h)) +} + +func unsafeString(b []byte) string { + return *(*string)(unsafe.Pointer(&b)) +} + +// split into groups of samples by stack trace suffixes. +// Return slice contains indices of the first sample +// of each group in the collection of selected samples. +func split(p *profilev1.Profile) ([]*profilev1.Sample, []int) { + slices.SortFunc(p.Sample, func(a, b *profilev1.Sample) int { + if len(a.LocationId) < suffixLen { + return -1 + } + if len(b.LocationId) < suffixLen { + return 1 + } + return bytes.Compare( + suffix(a.LocationId), + suffix(b.LocationId), + ) + }) + o := sort.Search(len(p.Sample), func(i int) bool { + return len(p.Sample[i].LocationId) >= suffixLen + }) + if o == len(p.Sample) { + return nil, nil + } + samples := p.Sample[o:] + const avgGroupSize = 16 // Estimate. + groups := make([]int, 0, len(samples)/avgGroupSize) + var prev []byte + for i := 0; i < len(samples); i++ { + cur := suffix(samples[i].LocationId) + if !bytes.Equal(cur, prev) { + groups = append(groups, i) + prev = cur + } + } + return samples, groups +} diff --git a/pkg/pprof/fix_go_heap_truncated_test.go b/pkg/pprof/fix_go_heap_truncated_test.go new file mode 100644 index 0000000000..f900c32b6e --- /dev/null +++ b/pkg/pprof/fix_go_heap_truncated_test.go @@ -0,0 +1,43 @@ +package pprof + +import ( + "os" + "testing" + + "github.com/stretchr/testify/require" +) + +func Benchmark_RepairGoTruncatedStacktraces(b *testing.B) { + p, err := OpenFile("testdata/goheapfix/heap_go_truncated_3.pb.gz") + require.NoError(b, err) + b.ResetTimer() + b.ReportAllocs() + for i := 0; i < b.N; i++ { + RepairGoHeapTruncatedStacktraces(FixGoProfile(p.CloneVT())) + } +} + +func Test_UpdateFixtures_RepairGoTruncatedStacktraces(t *testing.T) { + t.Skip() + t.Helper() + paths := []string{ + "testdata/goheapfix/heap_go_truncated_1.pb.gz", // Cortex. + "testdata/goheapfix/heap_go_truncated_2.pb.gz", // Cortex. + "testdata/goheapfix/heap_go_truncated_3.pb.gz", // Loki. Pathological. + "testdata/goheapfix/heap_go_truncated_4.pb.gz", // Pyroscope. + } + for _, path := range paths { + func() { + p, err := OpenFile(path) + require.NoError(t, err, path) + defer p.Close() + f, err := os.Create(path + ".fixed") + require.NoError(t, err, path) + defer f.Close() + p.Profile = FixGoProfile(p.Profile) + RepairGoHeapTruncatedStacktraces(p.Profile) + _, err = p.WriteTo(f) + require.NoError(t, err, path) + }() + } +} diff --git a/pkg/pprof/fix_go_profile.go b/pkg/pprof/fix_go_profile.go index 7c4818f7fd..e01b80f17a 100644 --- a/pkg/pprof/fix_go_profile.go +++ b/pkg/pprof/fix_go_profile.go @@ -1,25 +1,47 @@ package pprof import ( + "regexp" + "strings" + profilev1 "github.com/grafana/pyroscope/api/gen/proto/go/google/v1" ) -// FixGoProfile removes type parameters from Go function names. -// -// In go 1.21 and above, function names include type parameters, -// however, due to the bug, a function name can include any -// of the type instances regardless of the call site. Thus, e.g., -// x[T1].foo and x[T2].foo can't be distinguished in a profile. -// This leads to incorrect profiles and incorrect flame graphs, -// and hugely increases cardinality of stack traces. +// FixGoProfile fixes known issues with profiles collected with +// the standard Go profiler. // -// FixGoProfile will change x[T1].foo and x[T2].foo to x[...].foo -// and normalize the profile, if type parameters are present in -// the profile. Otherwise, the profile returned unchanged. +// Note that the function presumes that p is a Go profile and does +// not verify this. It is expected that the function is called +// very early in the profile processing chain and normalized after, +// regardless of the function outcome. func FixGoProfile(p *profilev1.Profile) *profilev1.Profile { + p = DropGoTypeParameters(p) + // Now that the profile is normalized, we can try to repair + // truncated stack traces, if any. Note that repaired stacks + // are not deduplicated, so the caller need to normalize the + if MayHaveGoHeapTruncatedStacktraces(p) { + RepairGoHeapTruncatedStacktraces(p) + } + return p +} + +// DropGoTypeParameters removes of type parameters from Go function names. +// +// In go 1.21 and above, function names include type parameters, however, +// due to a bug, a function name could include any of the type instances +// regardless of the call site. Thus, e.g., x[T1].foo and x[T2].foo can't +// be distinguished in a profile. This leads to incorrect profiles and +// incorrect flame graphs, and hugely increases cardinality of stack traces. +// +// The function renames x[T1].foo and x[T2].foo to x[...].foo and normalizes +// the profile, if type parameters are present in the profile. Otherwise, the +// profile returns unchanged. +// +// See https://github.com/golang/go/issues/64528. +func DropGoTypeParameters(p *profilev1.Profile) *profilev1.Profile { var n int for i, s := range p.StringTable { - c := DropGoTypeParameters(s) + c := dropGoTypeParameters(s) if c != s { p.StringTable[i] = c n++ @@ -37,3 +59,21 @@ func FixGoProfile(p *profilev1.Profile) *profilev1.Profile { _ = m.MergeNoClone(p) return m.Profile() } + +var goStructTypeParameterRegex = regexp.MustCompile(`\[go\.shape\..*\]`) + +func dropGoTypeParameters(input string) string { + matchesIndices := goStructTypeParameterRegex.FindAllStringIndex(input, -1) + if len(matchesIndices) == 0 { + return input + } + var modified strings.Builder + prevEnd := 0 + for _, indices := range matchesIndices { + start, end := indices[0], indices[1] + modified.WriteString(input[prevEnd:start] + "[...]") + prevEnd = end + } + modified.WriteString(input[prevEnd:]) + return modified.String() +} diff --git a/pkg/pprof/fix_go_profile_test.go b/pkg/pprof/fix_go_profile_test.go index 61c416ebdf..30be1864ad 100644 --- a/pkg/pprof/fix_go_profile_test.go +++ b/pkg/pprof/fix_go_profile_test.go @@ -1,6 +1,8 @@ package pprof import ( + "bufio" + "os" "testing" "github.com/stretchr/testify/assert" @@ -8,7 +10,7 @@ import ( ) func Test_FixGoProfile(t *testing.T) { - p, err := OpenFile("testdata/heap_go1.21.pprof") + p, err := OpenFile("testdata/goheapfix/heap_go_truncated_4.pb.gz") require.NoError(t, err) f := FixGoProfile(p.Profile) @@ -34,3 +36,24 @@ func Test_FixGoProfile(t *testing.T) { assert.Equal(t, 77, len(p.Function)-len(f.Function)) assert.Equal(t, 78, len(p.StringTable)-len(f.StringTable)) } + +func Test_DropGoTypeParameters(t *testing.T) { + ef, err := os.Open("testdata/go_type_parameters.expected.txt") + require.NoError(t, err) + defer ef.Close() + + in, err := os.Open("testdata/go_type_parameters.txt") + require.NoError(t, err) + defer in.Close() + + input := bufio.NewScanner(in) + expected := bufio.NewScanner(ef) + for input.Scan() { + expected.Scan() + require.Equal(t, expected.Text(), dropGoTypeParameters(input.Text())) + } + + require.NoError(t, input.Err()) + require.NoError(t, expected.Err()) + require.False(t, expected.Scan()) +} diff --git a/pkg/pprof/fmt.go b/pkg/pprof/fmt.go deleted file mode 100644 index 35b3c96826..0000000000 --- a/pkg/pprof/fmt.go +++ /dev/null @@ -1,24 +0,0 @@ -package pprof - -import ( - "regexp" - "strings" -) - -var goStructTypeParameterRegex = regexp.MustCompile(`\[go\.shape\..*\]`) - -func DropGoTypeParameters(input string) string { - matchesIndices := goStructTypeParameterRegex.FindAllStringIndex(input, -1) - if len(matchesIndices) == 0 { - return input - } - var modified strings.Builder - prevEnd := 0 - for _, indices := range matchesIndices { - start, end := indices[0], indices[1] - modified.WriteString(input[prevEnd:start] + "[...]") - prevEnd = end - } - modified.WriteString(input[prevEnd:]) - return modified.String() -} diff --git a/pkg/pprof/fmt_test.go b/pkg/pprof/fmt_test.go deleted file mode 100644 index 36dcf82838..0000000000 --- a/pkg/pprof/fmt_test.go +++ /dev/null @@ -1,30 +0,0 @@ -package pprof - -import ( - "bufio" - "os" - "testing" - - "github.com/stretchr/testify/require" -) - -func Test_DropGoTypeParameters(t *testing.T) { - ef, err := os.Open("testdata/go_type_parameters.expected.txt") - require.NoError(t, err) - defer ef.Close() - - in, err := os.Open("testdata/go_type_parameters.txt") - require.NoError(t, err) - defer in.Close() - - input := bufio.NewScanner(in) - expected := bufio.NewScanner(ef) - for input.Scan() { - expected.Scan() - require.Equal(t, expected.Text(), DropGoTypeParameters(input.Text())) - } - - require.NoError(t, input.Err()) - require.NoError(t, expected.Err()) - require.False(t, expected.Scan()) -} diff --git a/pkg/pprof/testdata/goheapfix/heap_go_truncated_1.pb.gz b/pkg/pprof/testdata/goheapfix/heap_go_truncated_1.pb.gz new file mode 100644 index 0000000000..2d2981f8f4 Binary files /dev/null and b/pkg/pprof/testdata/goheapfix/heap_go_truncated_1.pb.gz differ diff --git a/pkg/pprof/testdata/goheapfix/heap_go_truncated_1.pb.gz.fixed b/pkg/pprof/testdata/goheapfix/heap_go_truncated_1.pb.gz.fixed new file mode 100644 index 0000000000..24bc1a2842 Binary files /dev/null and b/pkg/pprof/testdata/goheapfix/heap_go_truncated_1.pb.gz.fixed differ diff --git a/pkg/pprof/testdata/goheapfix/heap_go_truncated_2.pb.gz b/pkg/pprof/testdata/goheapfix/heap_go_truncated_2.pb.gz new file mode 100644 index 0000000000..bcb8235f4e Binary files /dev/null and b/pkg/pprof/testdata/goheapfix/heap_go_truncated_2.pb.gz differ diff --git a/pkg/pprof/testdata/goheapfix/heap_go_truncated_2.pb.gz.fixed b/pkg/pprof/testdata/goheapfix/heap_go_truncated_2.pb.gz.fixed new file mode 100644 index 0000000000..5e453a71e4 Binary files /dev/null and b/pkg/pprof/testdata/goheapfix/heap_go_truncated_2.pb.gz.fixed differ diff --git a/pkg/pprof/testdata/goheapfix/heap_go_truncated_3.pb.gz b/pkg/pprof/testdata/goheapfix/heap_go_truncated_3.pb.gz new file mode 100644 index 0000000000..bcdf4e665a Binary files /dev/null and b/pkg/pprof/testdata/goheapfix/heap_go_truncated_3.pb.gz differ diff --git a/pkg/pprof/testdata/goheapfix/heap_go_truncated_3.pb.gz.fixed b/pkg/pprof/testdata/goheapfix/heap_go_truncated_3.pb.gz.fixed new file mode 100644 index 0000000000..85921c5125 Binary files /dev/null and b/pkg/pprof/testdata/goheapfix/heap_go_truncated_3.pb.gz.fixed differ diff --git a/pkg/pprof/testdata/heap_go1.21.pprof b/pkg/pprof/testdata/goheapfix/heap_go_truncated_4.pb.gz similarity index 100% rename from pkg/pprof/testdata/heap_go1.21.pprof rename to pkg/pprof/testdata/goheapfix/heap_go_truncated_4.pb.gz diff --git a/pkg/pprof/testdata/goheapfix/heap_go_truncated_4.pb.gz.fixed b/pkg/pprof/testdata/goheapfix/heap_go_truncated_4.pb.gz.fixed new file mode 100644 index 0000000000..081bda6150 Binary files /dev/null and b/pkg/pprof/testdata/goheapfix/heap_go_truncated_4.pb.gz.fixed differ diff --git a/pkg/test/integration/ingest_pprof_test.go b/pkg/test/integration/ingest_pprof_test.go index 7701c3e928..964b3d9e86 100644 --- a/pkg/test/integration/ingest_pprof_test.go +++ b/pkg/test/integration/ingest_pprof_test.go @@ -37,6 +37,7 @@ var ( expectStatusIngest: 200, expectStatusPush: 200, metrics: golangHeap, + needsGoHeapFix: true, }, { profile: repoRoot + "pkg/pprof/testdata/profile_java", @@ -92,12 +93,14 @@ var ( expectStatusIngest: 200, expectStatusPush: 200, metrics: golangHeap, + needsGoHeapFix: true, }, { profile: repoRoot + "pkg/og/convert/pprof/testdata/heap.pb.gz", expectStatusIngest: 200, expectStatusPush: 200, metrics: golangHeap, + needsGoHeapFix: true, }, { profile: repoRoot + "pkg/og/convert/pprof/testdata/heap-js.pprof", @@ -250,7 +253,7 @@ func TestIngest(t *testing.T) { for _, metric := range td.metrics { rb.Render(metric.name) profile := rb.SelectMergeProfile(metric.name, metric.query) - assertPPROF(t, profile, metric, td, true) + assertPPROF(t, profile, metric, td, td.fixAtIngest) } } }) @@ -277,14 +280,14 @@ func TestPush(t *testing.T) { rb.Render(metric.name) profile := rb.SelectMergeProfile(metric.name, metric.query) - assertPPROF(t, profile, metric, td, false) + assertPPROF(t, profile, metric, td, td.fixAtPush) } } }) } } -func assertPPROF(t *testing.T, resp *connect.Response[profilev1.Profile], metric expectedMetric, testdatum pprofTestData, fixes bool) { +func assertPPROF(t *testing.T, resp *connect.Response[profilev1.Profile], metric expectedMetric, testdatum pprofTestData, fix func(*pprof.Profile) *pprof.Profile) { assert.Equal(t, 1, len(resp.Msg.SampleType)) @@ -293,16 +296,10 @@ func assertPPROF(t *testing.T, resp *connect.Response[profilev1.Profile], metric expectedProfile, err := pprof.RawFromBytes(profileBytes) require.NoError(t, err) - if fixes { - if testdatum.spyName == pprof2.SpyNameForFunctionNameRewrite() { - pprof2.FixFunctionNamesForScriptingLanguages(expectedProfile, ingestion.Metadata{SpyName: testdatum.spyName}) - } - - if testdatum.needFunctionIDFix { - pprof2.FixFunctionIDForBrokenDotnet(expectedProfile.Profile) - } - + if fix != nil { + expectedProfile = fix(expectedProfile) } + actualStacktraces := bench.StackCollapseProto(resp.Msg, 0, 1) expectedStacktraces := bench.StackCollapseProto(expectedProfile.Profile, metric.valueIDX, 1) @@ -322,6 +319,27 @@ type pprofTestData struct { expectedError string metrics []expectedMetric needFunctionIDFix bool + needsGoHeapFix bool +} + +func (d *pprofTestData) fixAtPush(p *pprof.Profile) *pprof.Profile { + if d.needsGoHeapFix { + p.Profile = pprof.FixGoProfile(p.Profile) + } + return p +} + +func (d *pprofTestData) fixAtIngest(p *pprof.Profile) *pprof.Profile { + if d.spyName == pprof2.SpyNameForFunctionNameRewrite() { + pprof2.FixFunctionNamesForScriptingLanguages(p, ingestion.Metadata{SpyName: d.spyName}) + } + if d.needFunctionIDFix { + pprof2.FixFunctionIDForBrokenDotnet(p.Profile) + } + if d.needsGoHeapFix { + p.Profile = pprof.FixGoProfile(p.Profile) + } + return p } type expectedMetric struct {