Skip to content

Commit

Permalink
perf: optimize pprof parsing in pull mode.
Browse files Browse the repository at this point in the history
Our profiling data suggested that a lot of time was invested in teh
binary search to find locations and functions while writing scraped
profiles. This is an attempt to improve the performance by
preprocessing the functions and locations and putting them into a
table.

A benchmark is included to showcase the results with smaller and
bigger profiles. As expected, there's no gain with small profiles,
quite the opposite, as there's now an extra preprocessing.
On the other hand, there are big gains as profiles get bigger (2x for
the bigger case).

While it'd be possible to find some heuristic to disable the
optimization below a certain threshold, the absolute difference
between small and big profiles is so big that I think it's not worth
it, at least as a first approach.
  • Loading branch information
abeaumont committed Dec 17, 2021
1 parent fc05077 commit 4aaec20
Show file tree
Hide file tree
Showing 3 changed files with 134 additions and 4 deletions.
104 changes: 104 additions & 0 deletions pkg/convert/profile_extra_bench_test.go
Expand Up @@ -3,10 +3,12 @@ package convert
import (
"bytes"
"compress/gzip"
"fmt"
"os"
"testing"

"github.com/pyroscope-io/pyroscope/pkg/agent/spy"
"github.com/pyroscope-io/pyroscope/pkg/storage/tree"
)

func BenchmarkProfile_Get(b *testing.B) {
Expand All @@ -22,3 +24,105 @@ func BenchmarkProfile_Get(b *testing.B) {
}
})
}

// parse emulates the parsing work needed to write profiles, without the writing part.
func parse(p *tree.Profile) int {
var b bytes.Buffer
for _, s := range p.Sample {
for i := len(s.LocationId) - 1; i >= 0; i-- {
loc, ok := tree.FindLocation(p, s.LocationId[i])
if !ok {
continue
}
for j := len(loc.Line) - 1; j >= 0; j-- {
fn, found := tree.FindFunction(p, loc.Line[j].FunctionId)
if !found {
continue
}
if b.Len() > 0 {
_ = b.WriteByte(';')
}
_, _ = b.WriteString(p.StringTable[fn.Name])
}
}
}
return len(b.Bytes())
}

// parseWithCache is like parse, but locations and functions are tabled first.
func parseWithCache(p *tree.Profile) int {
locs := tree.Locations(p)
fns := tree.Functions(p)
var b bytes.Buffer
for _, s := range p.Sample {
for i := len(s.LocationId) - 1; i >= 0; i-- {
loc, ok := locs[s.LocationId[i]]
if !ok {
continue
}
for j := len(loc.Line) - 1; j >= 0; j-- {
fn, found := fns[loc.Line[j].FunctionId]
if !found {
continue
}
if b.Len() > 0 {
_ = b.WriteByte(';')
}
_, _ = b.WriteString(p.StringTable[fn.Name])
}
}
}
return len(b.Bytes())
}

func BenchmarkProfile_ParseNoCache(b *testing.B) {
buf, _ := os.ReadFile("testdata/cpu.pprof")
p, _ := ParsePprof(bytes.NewReader(buf))

b.ResetTimer()

b.Run(fmt.Sprintf("Locations: %d, functions %d", len(p.Location), len(p.Function)), func(b *testing.B) {
for i := 0; i < b.N; i++ {
_ = parse(p)
}
})
}

func BenchmarkProfile_ParseWithCache(b *testing.B) {
buf, _ := os.ReadFile("testdata/cpu.pprof")
p, _ := ParsePprof(bytes.NewReader(buf))

b.ResetTimer()

b.Run(fmt.Sprintf("Locations: %d, functions %d", len(p.Location), len(p.Function)), func(b *testing.B) {
for i := 0; i < b.N; i++ {
_ = parseWithCache(p)
}
})
}

func BenchmarkProfile_ParseNoCache_Big(b *testing.B) {
buf, _ := os.ReadFile("testdata/cpu-big.pprof")
p, _ := ParsePprof(bytes.NewReader(buf))

b.ResetTimer()

b.Run(fmt.Sprintf("Locations: %d, functions %d", len(p.Location), len(p.Function)), func(b *testing.B) {
for i := 0; i < b.N; i++ {
_ = parse(p)
}
})
}

func BenchmarkProfile_ParseWithCache_Big(b *testing.B) {
buf, _ := os.ReadFile("testdata/cpu-big.pprof")
p, _ := ParsePprof(bytes.NewReader(buf))

b.ResetTimer()

b.Run(fmt.Sprintf("Locations %d, functions %d", len(p.Location), len(p.Function)), func(b *testing.B) {
for i := 0; i < b.N; i++ {
_ = parseWithCache(p)
}
})
}
18 changes: 14 additions & 4 deletions pkg/scrape/pprof.go
Expand Up @@ -72,14 +72,24 @@ func (w *pprofWriter) writeProfile(b []byte) error {
profileTime = time.Now()
}

var locs map[uint64]*tree.Location
var fns map[uint64]*tree.Function

for _, s := range p.GetSampleType() {
sampleTypeName := p.StringTable[s.Type]
sampleTypeConfig, ok := w.config.SampleTypes[sampleTypeName]
if !ok && !w.config.AllSampleTypes {
continue
}

c.writeProfiles(&p, s.Type)
if locs == nil {
locs = tree.Locations(&p)
}
if fns == nil {
fns = tree.Functions(&p)
}

c.writeProfiles(&p, s.Type, locs, fns)
for hash, entry := range c[s.Type] {
j := &upstream.UploadJob{SpyName: "scrape", Trie: entry.Trie}
// Cumulative profiles require two consecutive samples,
Expand Down Expand Up @@ -174,7 +184,7 @@ func newCacheEntry(l []*tree.Label) *cacheEntry {
return &cacheEntry{Trie: transporttrie.New(), labels: l}
}

func (t *cache) writeProfiles(x *tree.Profile, sampleType int64) {
func (t *cache) writeProfiles(x *tree.Profile, sampleType int64, locs map[uint64]*tree.Location, fns map[uint64]*tree.Function) {
valueIndex := 0
if sampleType != 0 {
for i, v := range x.SampleType {
Expand All @@ -191,7 +201,7 @@ func (t *cache) writeProfiles(x *tree.Profile, sampleType int64) {
for _, s := range x.Sample {
entry := t.getOrCreate(sampleType, s.Label)
for i := len(s.LocationId) - 1; i >= 0; i-- {
loc, ok := tree.FindLocation(x, s.LocationId[i])
loc, ok := locs[s.LocationId[i]]
if !ok {
continue
}
Expand All @@ -205,7 +215,7 @@ func (t *cache) writeProfiles(x *tree.Profile, sampleType int64) {
//
// Therefore iteration goes in reverse order.
for j := len(loc.Line) - 1; j >= 0; j-- {
fn, found := tree.FindFunction(x, loc.Line[j].FunctionId)
fn, found := fns[loc.Line[j].FunctionId]
if !found {
continue
}
Expand Down
16 changes: 16 additions & 0 deletions pkg/storage/tree/profile_extra.go
Expand Up @@ -146,3 +146,19 @@ func FindFunction(x *Profile, fid uint64) (*Function, bool) {
}
return nil, false
}

func Locations(x *Profile) map[uint64]*Location {
m := make(map[uint64]*Location, len(x.Location))
for _, l := range x.Location {
m[l.Id] = l
}
return m
}

func Functions(x *Profile) map[uint64]*Function {
m := make(map[uint64]*Function, len(x.Function))
for _, f := range x.Function {
m[f.Id] = f
}
return m
}

0 comments on commit 4aaec20

Please sign in to comment.