perf: optimize pprof parsing in pull mode.

Our profiling data suggested that a lot of time was invested in teh binary search to find locations and functions while writing scraped profiles. This is an attempt to improve the performance by preprocessing the functions and locations and putting them into a table. A benchmark is included to showcase the results with smaller and bigger profiles. As expected, there's no gain with small profiles, quite the opposite, as there's now an extra preprocessing. On the other hand, there are big gains as profiles get bigger (2x for the bigger case). While it'd be possible to find some heuristic to disable the optimization below a certain threshold, the absolute difference between small and big profiles is so big that I think it's not worth it, at least as a first approach.
grafana · Dec 17, 2021 · 4aaec20 · 4aaec20
1 parent fc05077
commit 4aaec20
Show file tree

Hide file tree

Showing 3 changed files with 134 additions and 4 deletions.
diff --git a/pkg/convert/profile_extra_bench_test.go b/pkg/convert/profile_extra_bench_test.go
@@ -3,10 +3,12 @@ package convert
 import (
 	"bytes"
 	"compress/gzip"
+	"fmt"
 	"os"
 	"testing"
 
 	"github.com/pyroscope-io/pyroscope/pkg/agent/spy"
+	"github.com/pyroscope-io/pyroscope/pkg/storage/tree"
 )
 
 func BenchmarkProfile_Get(b *testing.B) {
@@ -22,3 +24,105 @@ func BenchmarkProfile_Get(b *testing.B) {
 		}
 	})
 }
+
+// parse emulates the parsing work needed to write profiles, without the writing part.
+func parse(p *tree.Profile) int {
+	var b bytes.Buffer
+	for _, s := range p.Sample {
+		for i := len(s.LocationId) - 1; i >= 0; i-- {
+			loc, ok := tree.FindLocation(p, s.LocationId[i])
+			if !ok {
+				continue
+			}
+			for j := len(loc.Line) - 1; j >= 0; j-- {
+				fn, found := tree.FindFunction(p, loc.Line[j].FunctionId)
+				if !found {
+					continue
+				}
+				if b.Len() > 0 {
+					_ = b.WriteByte(';')
+				}
+				_, _ = b.WriteString(p.StringTable[fn.Name])
+			}
+		}
+	}
+	return len(b.Bytes())
+}
+
+// parseWithCache is like parse, but locations and functions are tabled first.
+func parseWithCache(p *tree.Profile) int {
+	locs := tree.Locations(p)
+	fns := tree.Functions(p)
+	var b bytes.Buffer
+	for _, s := range p.Sample {
+		for i := len(s.LocationId) - 1; i >= 0; i-- {
+			loc, ok := locs[s.LocationId[i]]
+			if !ok {
+				continue
+			}
+			for j := len(loc.Line) - 1; j >= 0; j-- {
+				fn, found := fns[loc.Line[j].FunctionId]
+				if !found {
+					continue
+				}
+				if b.Len() > 0 {
+					_ = b.WriteByte(';')
+				}
+				_, _ = b.WriteString(p.StringTable[fn.Name])
+			}
+		}
+	}
+	return len(b.Bytes())
+}
+
+func BenchmarkProfile_ParseNoCache(b *testing.B) {
+	buf, _ := os.ReadFile("testdata/cpu.pprof")
+	p, _ := ParsePprof(bytes.NewReader(buf))
+
+	b.ResetTimer()
+
+	b.Run(fmt.Sprintf("Locations: %d, functions %d", len(p.Location), len(p.Function)), func(b *testing.B) {
+		for i := 0; i < b.N; i++ {
+			_ = parse(p)
+		}
+	})
+}
+
+func BenchmarkProfile_ParseWithCache(b *testing.B) {
+	buf, _ := os.ReadFile("testdata/cpu.pprof")
+	p, _ := ParsePprof(bytes.NewReader(buf))
+
+	b.ResetTimer()
+
+	b.Run(fmt.Sprintf("Locations: %d, functions %d", len(p.Location), len(p.Function)), func(b *testing.B) {
+		for i := 0; i < b.N; i++ {
+			_ = parseWithCache(p)
+		}
+	})
+}
+
+func BenchmarkProfile_ParseNoCache_Big(b *testing.B) {
+	buf, _ := os.ReadFile("testdata/cpu-big.pprof")
+	p, _ := ParsePprof(bytes.NewReader(buf))
+
+	b.ResetTimer()
+
+	b.Run(fmt.Sprintf("Locations: %d, functions %d", len(p.Location), len(p.Function)), func(b *testing.B) {
+		for i := 0; i < b.N; i++ {
+			_ = parse(p)
+		}
+	})
+}
+
+func BenchmarkProfile_ParseWithCache_Big(b *testing.B) {
+	buf, _ := os.ReadFile("testdata/cpu-big.pprof")
+	p, _ := ParsePprof(bytes.NewReader(buf))
+
+	b.ResetTimer()
+
+	b.Run(fmt.Sprintf("Locations %d, functions %d", len(p.Location), len(p.Function)), func(b *testing.B) {
+		for i := 0; i < b.N; i++ {
+			_ = parseWithCache(p)
+		}
+	})
+}
diff --git a/pkg/scrape/pprof.go b/pkg/scrape/pprof.go
@@ -72,14 +72,24 @@ func (w *pprofWriter) writeProfile(b []byte) error {
 		profileTime = time.Now()
 	}
 
+	var locs map[uint64]*tree.Location
+	var fns map[uint64]*tree.Function
+
 	for _, s := range p.GetSampleType() {
 		sampleTypeName := p.StringTable[s.Type]
 		sampleTypeConfig, ok := w.config.SampleTypes[sampleTypeName]
 		if !ok && !w.config.AllSampleTypes {
 			continue
 		}
 
-		c.writeProfiles(&p, s.Type)
+		if locs == nil {
+			locs = tree.Locations(&p)
+		}
+		if fns == nil {
+			fns = tree.Functions(&p)
+		}
+
+		c.writeProfiles(&p, s.Type, locs, fns)
 		for hash, entry := range c[s.Type] {
 			j := &upstream.UploadJob{SpyName: "scrape", Trie: entry.Trie}
 			// Cumulative profiles require two consecutive samples,
@@ -174,7 +184,7 @@ func newCacheEntry(l []*tree.Label) *cacheEntry {
 	return &cacheEntry{Trie: transporttrie.New(), labels: l}
 }
 
-func (t *cache) writeProfiles(x *tree.Profile, sampleType int64) {
+func (t *cache) writeProfiles(x *tree.Profile, sampleType int64, locs map[uint64]*tree.Location, fns map[uint64]*tree.Function) {
 	valueIndex := 0
 	if sampleType != 0 {
 		for i, v := range x.SampleType {
@@ -191,7 +201,7 @@ func (t *cache) writeProfiles(x *tree.Profile, sampleType int64) {
 	for _, s := range x.Sample {
 		entry := t.getOrCreate(sampleType, s.Label)
 		for i := len(s.LocationId) - 1; i >= 0; i-- {
-			loc, ok := tree.FindLocation(x, s.LocationId[i])
+			loc, ok := locs[s.LocationId[i]]
 			if !ok {
 				continue
 			}
@@ -205,7 +215,7 @@ func (t *cache) writeProfiles(x *tree.Profile, sampleType int64) {
 			//
 			// Therefore iteration goes in reverse order.
 			for j := len(loc.Line) - 1; j >= 0; j-- {
-				fn, found := tree.FindFunction(x, loc.Line[j].FunctionId)
+				fn, found := fns[loc.Line[j].FunctionId]
 				if !found {
 					continue
 				}

diff --git a/pkg/storage/tree/profile_extra.go b/pkg/storage/tree/profile_extra.go
@@ -146,3 +146,19 @@ func FindFunction(x *Profile, fid uint64) (*Function, bool) {
 	}
 	return nil, false
 }
+
+func Locations(x *Profile) map[uint64]*Location {
+	m := make(map[uint64]*Location, len(x.Location))
+	for _, l := range x.Location {
+		m[l.Id] = l
+	}
+	return m
+}
+
+func Functions(x *Profile) map[uint64]*Function {
+	m := make(map[uint64]*Function, len(x.Function))
+	for _, f := range x.Function {
+		m[f.Id] = f
+	}
+	return m
+}