Skip to content

Commit

Permalink
feat: repair Go stack traces (#3014)
Browse files Browse the repository at this point in the history
  • Loading branch information
kolesnikovae committed Feb 27, 2024
1 parent aa403a7 commit c286ca2
Show file tree
Hide file tree
Showing 15 changed files with 415 additions and 79 deletions.
266 changes: 266 additions & 0 deletions pkg/pprof/fix_go_heap_truncated.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,266 @@
package pprof

import (
"bytes"
"reflect"
"sort"
"unsafe"

"golang.org/x/exp/slices"

profilev1 "github.com/grafana/pyroscope/api/gen/proto/go/google/v1"
)

const (
minGroupSize = 2

tokens = 8
tokenLen = 16
suffixLen = tokens + tokenLen

tokenBytesLen = tokenLen * 8
suffixBytesLen = suffixLen * 8
)

// MayHaveGoHeapTruncatedStacktraces reports whether there are
// any chances that the profile may have truncated stack traces.
func MayHaveGoHeapTruncatedStacktraces(p *profilev1.Profile) bool {
if !hasGoHeapSampleTypes(p) {
return false
}
// Some truncated stacks have depth less than the depth limit (32).
const minDepth = 28
for _, s := range p.Sample {
if len(s.LocationId) >= minDepth {
return true
}
}
return false
}

func hasGoHeapSampleTypes(p *profilev1.Profile) bool {
for _, st := range p.SampleType {
switch p.StringTable[st.Type] {
case
"alloc_objects",
"alloc_space",
"inuse_objects",
"inuse_space":
return true
}
}
return false
}

// RepairGoHeapTruncatedStacktraces repairs truncated stack traces
// in Go heap profiles.
//
// Go heap profile has a depth limit of 32 frames, which often
// renders profiles unreadable, and also increases cardinality
// of stack traces.
//
// The function guesses truncated frames based on neighbors and
// repairs stack traces if there are high chances that this
// part is present in the profile. The heuristic is as follows:
//
// For each stack trace S taller than 24 frames: if there is another
// stack trace R taller than 24 frames that overlaps with the given
// one by at least 16 frames in a row from the top, and has frames
// above its root, stack S considered truncated, and the missing part
// is copied from R.
func RepairGoHeapTruncatedStacktraces(p *profilev1.Profile) {
// Group stack traces by bottom (closest to the root) locations.
// Typically, there are very few groups (a hundred or two).
samples, groups := split(p)
// Each group's suffix is then tokenized: each part is shifted by one
// location from the previous one (like n-grams).
// Tokens are written into the token=>group map, Where the value is the
// index of the group with the token found at the furthest position from
// the root (across all groups).
m := make(map[string]group, len(groups)/2)
for i := 0; i < len(groups); i++ {
g := groups[i]
n := len(groups)
if i+1 < len(groups) {
n = groups[i+1]
}
if s := n - g; s < minGroupSize {
continue
}
// We take suffix of the first sample in the group.
s := suffix(samples[g].LocationId)
// Tokenize the suffix: token position is relative to the stack
// trace root: 0 means that the token is the closest to the root.
// TODO: unroll?
// 0 : 64 : 192 // No need.
// 1 : 56 : 184
// 2 : 48 : 176
// 3 : 40 : 168
// 4 : 32 : 160
// 5 : 24 : 152
// 6 : 16 : 144
// 7 : 8 : 136
// 8 : 0 : 128
//
// We skip the top/right-most token, as it is not needed,
// because there can be no more complete stack trace.
for j := uint32(1); j <= tokens; j++ {
hi := suffixBytesLen - j*tokens
lo := hi - tokenBytesLen
// By taking a string representation of the slice,
// we eliminate the need to hash the token explicitly:
// Go map will do it this way or another.
k := unsafeString(s[lo:hi])
// Current candidate: the group where the token is
// located at the furthest position from the root.
c, ok := m[k]
if !ok || j > c.off {
// This group has more complete stack traces:
m[k] = group{
gid: uint32(i),
off: j,
}
}
}
}

// Now we handle chaining. Consider the following stacks:
// 1 2 3 4
// a b [d] (f)
// b c [e] (g)
// c [d] (f) h
// d [e] (g) i
//
// We can't associate 3-rd stack with the 1-st one because their tokens
// do not overlap (given the token size is 2). However, we can associate
// it transitively through the 2nd stack.
//
// Dependencies:
// - group i depends on d[i].
// - d[i] depends on d[d[i].gid].
d := make([]group, len(groups))
for i := 0; i < len(groups); i++ {
g := groups[i]
t := topToken(samples[g].LocationId)
k := unsafeString(t)
c, ok := m[k]
if !ok || c.off == 0 || groups[c.gid] == g {
// The current group has the most complete stack trace.
continue
}
d[i] = c
}

// Then, for each group, we test, if there is another group with a more
// complete suffix, overlapping with the given one by at least one token.
// If such stack trace exists, all stack traces of the group are appended
// with the missing part.
for i := 0; i < len(groups); i++ {
g := groups[i]
c := d[i]
var off uint32
for c.off > 0 {
off += c.off
n := d[c.gid]
if n.off == 0 {
// Stop early to preserve c.
break
}
c = n
}
if off == 0 {
// The current group has the most complete stack trace.
continue
}
// The reference stack trace.
appx := samples[groups[c.gid]].LocationId
// It's possible that the reference stack trace does not
// include the part we're looking for. In this case, we
// simply ignore the group. Although it's possible to infer
// this piece from other stacks, this is left for further
// improvements.
if int(off) >= len(appx) {
continue
}
appx = appx[uint32(len(appx))-off:]
// Now we append the missing part to all stack traces of the group.
n := len(groups)
if i+1 < len(groups) {
n = groups[i+1]
}
for j := g; j < n; j++ {
// Locations typically already have some extra capacity,
// therefore no major allocations are expected here.
samples[j].LocationId = append(samples[j].LocationId, appx...)
}
}
}

type group struct {
gid uint32
off uint32
}

// suffix returns the last suffixLen locations
// of the given stack trace represented as bytes.
// The return slice is always suffixBytesLen long.
// function panics if s is shorter than suffixLen.
func suffix(s []uint64) []byte {
return locBytes(s[len(s)-suffixLen:])
}

// topToken returns the last tokenLen locations
// of the given stack trace represented as bytes.
// The return slice is always tokenBytesLen long.
// function panics if s is shorter than tokenLen.
func topToken(s []uint64) []byte {
return locBytes(s[len(s)-tokenLen:])
}

func locBytes(s []uint64) []byte {
size := len(s) * 8
h := (*reflect.SliceHeader)(unsafe.Pointer(&s))
h.Len = size
h.Cap = size
return *(*[]byte)(unsafe.Pointer(h))
}

func unsafeString(b []byte) string {
return *(*string)(unsafe.Pointer(&b))
}

// split into groups of samples by stack trace suffixes.
// Return slice contains indices of the first sample
// of each group in the collection of selected samples.
func split(p *profilev1.Profile) ([]*profilev1.Sample, []int) {
slices.SortFunc(p.Sample, func(a, b *profilev1.Sample) int {
if len(a.LocationId) < suffixLen {
return -1
}
if len(b.LocationId) < suffixLen {
return 1
}
return bytes.Compare(
suffix(a.LocationId),
suffix(b.LocationId),
)
})
o := sort.Search(len(p.Sample), func(i int) bool {
return len(p.Sample[i].LocationId) >= suffixLen
})
if o == len(p.Sample) {
return nil, nil
}
samples := p.Sample[o:]
const avgGroupSize = 16 // Estimate.
groups := make([]int, 0, len(samples)/avgGroupSize)
var prev []byte
for i := 0; i < len(samples); i++ {
cur := suffix(samples[i].LocationId)
if !bytes.Equal(cur, prev) {
groups = append(groups, i)
prev = cur
}
}
return samples, groups
}
43 changes: 43 additions & 0 deletions pkg/pprof/fix_go_heap_truncated_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
package pprof

import (
"os"
"testing"

"github.com/stretchr/testify/require"
)

func Benchmark_RepairGoTruncatedStacktraces(b *testing.B) {
p, err := OpenFile("testdata/goheapfix/heap_go_truncated_3.pb.gz")
require.NoError(b, err)
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
RepairGoHeapTruncatedStacktraces(FixGoProfile(p.CloneVT()))
}
}

func Test_UpdateFixtures_RepairGoTruncatedStacktraces(t *testing.T) {
t.Skip()
t.Helper()
paths := []string{
"testdata/goheapfix/heap_go_truncated_1.pb.gz", // Cortex.
"testdata/goheapfix/heap_go_truncated_2.pb.gz", // Cortex.
"testdata/goheapfix/heap_go_truncated_3.pb.gz", // Loki. Pathological.
"testdata/goheapfix/heap_go_truncated_4.pb.gz", // Pyroscope.
}
for _, path := range paths {
func() {
p, err := OpenFile(path)
require.NoError(t, err, path)
defer p.Close()
f, err := os.Create(path + ".fixed")
require.NoError(t, err, path)
defer f.Close()
p.Profile = FixGoProfile(p.Profile)
RepairGoHeapTruncatedStacktraces(p.Profile)
_, err = p.WriteTo(f)
require.NoError(t, err, path)
}()
}
}
64 changes: 52 additions & 12 deletions pkg/pprof/fix_go_profile.go
Original file line number Diff line number Diff line change
@@ -1,25 +1,47 @@
package pprof

import (
"regexp"
"strings"

profilev1 "github.com/grafana/pyroscope/api/gen/proto/go/google/v1"
)

// FixGoProfile removes type parameters from Go function names.
//
// In go 1.21 and above, function names include type parameters,
// however, due to the bug, a function name can include any
// of the type instances regardless of the call site. Thus, e.g.,
// x[T1].foo and x[T2].foo can't be distinguished in a profile.
// This leads to incorrect profiles and incorrect flame graphs,
// and hugely increases cardinality of stack traces.
// FixGoProfile fixes known issues with profiles collected with
// the standard Go profiler.
//
// FixGoProfile will change x[T1].foo and x[T2].foo to x[...].foo
// and normalize the profile, if type parameters are present in
// the profile. Otherwise, the profile returned unchanged.
// Note that the function presumes that p is a Go profile and does
// not verify this. It is expected that the function is called
// very early in the profile processing chain and normalized after,
// regardless of the function outcome.
func FixGoProfile(p *profilev1.Profile) *profilev1.Profile {
p = DropGoTypeParameters(p)
// Now that the profile is normalized, we can try to repair
// truncated stack traces, if any. Note that repaired stacks
// are not deduplicated, so the caller need to normalize the
if MayHaveGoHeapTruncatedStacktraces(p) {
RepairGoHeapTruncatedStacktraces(p)
}
return p
}

// DropGoTypeParameters removes of type parameters from Go function names.
//
// In go 1.21 and above, function names include type parameters, however,
// due to a bug, a function name could include any of the type instances
// regardless of the call site. Thus, e.g., x[T1].foo and x[T2].foo can't
// be distinguished in a profile. This leads to incorrect profiles and
// incorrect flame graphs, and hugely increases cardinality of stack traces.
//
// The function renames x[T1].foo and x[T2].foo to x[...].foo and normalizes
// the profile, if type parameters are present in the profile. Otherwise, the
// profile returns unchanged.
//
// See https://github.com/golang/go/issues/64528.
func DropGoTypeParameters(p *profilev1.Profile) *profilev1.Profile {
var n int
for i, s := range p.StringTable {
c := DropGoTypeParameters(s)
c := dropGoTypeParameters(s)
if c != s {
p.StringTable[i] = c
n++
Expand All @@ -37,3 +59,21 @@ func FixGoProfile(p *profilev1.Profile) *profilev1.Profile {
_ = m.MergeNoClone(p)
return m.Profile()
}

var goStructTypeParameterRegex = regexp.MustCompile(`\[go\.shape\..*\]`)

func dropGoTypeParameters(input string) string {
matchesIndices := goStructTypeParameterRegex.FindAllStringIndex(input, -1)
if len(matchesIndices) == 0 {
return input
}
var modified strings.Builder
prevEnd := 0
for _, indices := range matchesIndices {
start, end := indices[0], indices[1]
modified.WriteString(input[prevEnd:start] + "[...]")
prevEnd = end
}
modified.WriteString(input[prevEnd:])
return modified.String()
}
Loading

0 comments on commit c286ca2

Please sign in to comment.