perf: optimize pprof parsing in pull mode. (#628)

* perf: optimize pprof parsing in pull mode. Our profiling data suggested that a lot of time was invested in teh binary search to find locations and functions while writing scraped profiles. This is an attempt to improve the performance by preprocessing the functions and locations and putting them into a table. A benchmark is included to showcase the results with smaller and bigger profiles. As expected, there's no gain with small profiles, quite the opposite, as there's now an extra preprocessing. On the other hand, there are big gains as profiles get bigger (2x for the bigger case). While it'd be possible to find some heuristic to disable the optimization below a certain threshold, the absolute difference between small and big profiles is so big that I think it's not worth it, at least as a first approach. * Replace the map by a slice in function and location caching. * Add the bigger CPU profile to testdata. * Create a flexible data structure to find location and functions. The pprof specification doesn't guarantee that IDs are consecutive, and that is currently supported, while still providing a fast path for the commmon case in which functions and locations have (sorted) consecutive IDs starting from 1.
grafana · Dec 22, 2021 · c626be1 · c626be1
1 parent 6290e45
commit c626be1
Show file tree

Hide file tree

Showing 5 changed files with 5,098 additions and 5 deletions.
diff --git a/pkg/convert/profile_extra_bench_test.go b/pkg/convert/profile_extra_bench_test.go
@@ -3,10 +3,12 @@ package convert
 import (
 	"bytes"
 	"compress/gzip"
+	"fmt"
 	"os"
 	"testing"
 
 	"github.com/pyroscope-io/pyroscope/pkg/agent/spy"
+	"github.com/pyroscope-io/pyroscope/pkg/storage/tree"
 )
 
 func BenchmarkProfile_Get(b *testing.B) {
@@ -22,3 +24,104 @@ func BenchmarkProfile_Get(b *testing.B) {
 		}
 	})
 }
+
+// parse emulates the parsing work needed to write profiles, without the writing part.
+func parse(p *tree.Profile) int {
+	var b bytes.Buffer
+	for _, s := range p.Sample {
+		for i := len(s.LocationId) - 1; i >= 0; i-- {
+			loc, ok := tree.FindLocation(p, s.LocationId[i])
+			if !ok {
+				continue
+			}
+			for j := len(loc.Line) - 1; j >= 0; j-- {
+				fn, found := tree.FindFunction(p, loc.Line[j].FunctionId)
+				if !found {
+					continue
+				}
+				if b.Len() > 0 {
+					_ = b.WriteByte(';')
+				}
+				_, _ = b.WriteString(p.StringTable[fn.Name])
+			}
+		}
+	}
+	return len(b.Bytes())
+}
+
+// parseWithCache is like parse, but locations and functions are tabled first.
+func parseWithCache(p *tree.Profile) int {
+	finder := tree.NewFinder(p)
+	var b bytes.Buffer
+	for _, s := range p.Sample {
+		for i := len(s.LocationId) - 1; i >= 0; i-- {
+			loc, ok := finder.FindLocation(s.LocationId[i])
+			if !ok {
+				continue
+			}
+			for j := len(loc.Line) - 1; j >= 0; j-- {
+				fn, ok := finder.FindFunction(loc.Line[j].FunctionId)
+				if !ok {
+					continue
+				}
+				if b.Len() > 0 {
+					_ = b.WriteByte(';')
+				}
+				_, _ = b.WriteString(p.StringTable[fn.Name])
+			}
+		}
+	}
+	return len(b.Bytes())
+}
+
+func BenchmarkProfile_ParseNoCache(b *testing.B) {
+	buf, _ := os.ReadFile("testdata/cpu.pprof")
+	p, _ := ParsePprof(bytes.NewReader(buf))
+
+	b.ResetTimer()
+
+	b.Run(fmt.Sprintf("Locations: %d, functions %d", len(p.Location), len(p.Function)), func(b *testing.B) {
+		for i := 0; i < b.N; i++ {
+			_ = parse(p)
+		}
+	})
+}
+
+func BenchmarkProfile_ParseWithCache(b *testing.B) {
+	buf, _ := os.ReadFile("testdata/cpu.pprof")
+	p, _ := ParsePprof(bytes.NewReader(buf))
+
+	b.ResetTimer()
+
+	b.Run(fmt.Sprintf("Locations: %d, functions %d", len(p.Location), len(p.Function)), func(b *testing.B) {
+		for i := 0; i < b.N; i++ {
+			_ = parseWithCache(p)
+		}
+	})
+}
+
+func BenchmarkProfile_ParseNoCache_Big(b *testing.B) {
+	buf, _ := os.ReadFile("testdata/cpu-big.pprof")
+	p, _ := ParsePprof(bytes.NewReader(buf))
+
+	b.ResetTimer()
+
+	b.Run(fmt.Sprintf("Locations: %d, functions %d", len(p.Location), len(p.Function)), func(b *testing.B) {
+		for i := 0; i < b.N; i++ {
+			_ = parse(p)
+		}
+	})
+}
+
+func BenchmarkProfile_ParseWithCache_Big(b *testing.B) {
+	buf, _ := os.ReadFile("testdata/cpu-big.pprof")
+	p, _ := ParsePprof(bytes.NewReader(buf))
+
+	b.ResetTimer()
+
+	b.Run(fmt.Sprintf("Locations %d, functions %d", len(p.Location), len(p.Function)), func(b *testing.B) {
+		for i := 0; i < b.N; i++ {
+			_ = parseWithCache(p)
+		}
+	})
+}