Skip to content

Commit

Permalink
perf: optimize pprof parsing in pull mode. (#628)
Browse files Browse the repository at this point in the history
* perf: optimize pprof parsing in pull mode.

Our profiling data suggested that a lot of time was invested in teh
binary search to find locations and functions while writing scraped
profiles. This is an attempt to improve the performance by
preprocessing the functions and locations and putting them into a
table.

A benchmark is included to showcase the results with smaller and
bigger profiles. As expected, there's no gain with small profiles,
quite the opposite, as there's now an extra preprocessing.
On the other hand, there are big gains as profiles get bigger (2x for
the bigger case).

While it'd be possible to find some heuristic to disable the
optimization below a certain threshold, the absolute difference
between small and big profiles is so big that I think it's not worth
it, at least as a first approach.

* Replace the map by a slice in function and location caching.

* Add the bigger CPU profile to testdata.

* Create a flexible data structure to find location and functions.

The pprof specification doesn't guarantee that IDs are consecutive,
and that is currently supported, while still providing a fast path for
the commmon case in which functions and locations have (sorted)
consecutive IDs starting from 1.
  • Loading branch information
abeaumont committed Dec 22, 2021
1 parent 6290e45 commit c626be1
Show file tree
Hide file tree
Showing 5 changed files with 5,098 additions and 5 deletions.
103 changes: 103 additions & 0 deletions pkg/convert/profile_extra_bench_test.go
Expand Up @@ -3,10 +3,12 @@ package convert
import (
"bytes"
"compress/gzip"
"fmt"
"os"
"testing"

"github.com/pyroscope-io/pyroscope/pkg/agent/spy"
"github.com/pyroscope-io/pyroscope/pkg/storage/tree"
)

func BenchmarkProfile_Get(b *testing.B) {
Expand All @@ -22,3 +24,104 @@ func BenchmarkProfile_Get(b *testing.B) {
}
})
}

// parse emulates the parsing work needed to write profiles, without the writing part.
func parse(p *tree.Profile) int {
var b bytes.Buffer
for _, s := range p.Sample {
for i := len(s.LocationId) - 1; i >= 0; i-- {
loc, ok := tree.FindLocation(p, s.LocationId[i])
if !ok {
continue
}
for j := len(loc.Line) - 1; j >= 0; j-- {
fn, found := tree.FindFunction(p, loc.Line[j].FunctionId)
if !found {
continue
}
if b.Len() > 0 {
_ = b.WriteByte(';')
}
_, _ = b.WriteString(p.StringTable[fn.Name])
}
}
}
return len(b.Bytes())
}

// parseWithCache is like parse, but locations and functions are tabled first.
func parseWithCache(p *tree.Profile) int {
finder := tree.NewFinder(p)
var b bytes.Buffer
for _, s := range p.Sample {
for i := len(s.LocationId) - 1; i >= 0; i-- {
loc, ok := finder.FindLocation(s.LocationId[i])
if !ok {
continue
}
for j := len(loc.Line) - 1; j >= 0; j-- {
fn, ok := finder.FindFunction(loc.Line[j].FunctionId)
if !ok {
continue
}
if b.Len() > 0 {
_ = b.WriteByte(';')
}
_, _ = b.WriteString(p.StringTable[fn.Name])
}
}
}
return len(b.Bytes())
}

func BenchmarkProfile_ParseNoCache(b *testing.B) {
buf, _ := os.ReadFile("testdata/cpu.pprof")
p, _ := ParsePprof(bytes.NewReader(buf))

b.ResetTimer()

b.Run(fmt.Sprintf("Locations: %d, functions %d", len(p.Location), len(p.Function)), func(b *testing.B) {
for i := 0; i < b.N; i++ {
_ = parse(p)
}
})
}

func BenchmarkProfile_ParseWithCache(b *testing.B) {
buf, _ := os.ReadFile("testdata/cpu.pprof")
p, _ := ParsePprof(bytes.NewReader(buf))

b.ResetTimer()

b.Run(fmt.Sprintf("Locations: %d, functions %d", len(p.Location), len(p.Function)), func(b *testing.B) {
for i := 0; i < b.N; i++ {
_ = parseWithCache(p)
}
})
}

func BenchmarkProfile_ParseNoCache_Big(b *testing.B) {
buf, _ := os.ReadFile("testdata/cpu-big.pprof")
p, _ := ParsePprof(bytes.NewReader(buf))

b.ResetTimer()

b.Run(fmt.Sprintf("Locations: %d, functions %d", len(p.Location), len(p.Function)), func(b *testing.B) {
for i := 0; i < b.N; i++ {
_ = parse(p)
}
})
}

func BenchmarkProfile_ParseWithCache_Big(b *testing.B) {
buf, _ := os.ReadFile("testdata/cpu-big.pprof")
p, _ := ParsePprof(bytes.NewReader(buf))

b.ResetTimer()

b.Run(fmt.Sprintf("Locations %d, functions %d", len(p.Location), len(p.Function)), func(b *testing.B) {
for i := 0; i < b.N; i++ {
_ = parseWithCache(p)
}
})
}

0 comments on commit c626be1

Please sign in to comment.