From d86dd395df9577f78e6ca319fba028f18ffd0cea Mon Sep 17 00:00:00 2001
From: Adrian Serrano <adrisr83@gmail.com>
Date: Wed, 29 Jun 2022 17:06:01 +0200
Subject: [PATCH 01/20] Add benchmarks to pipeline tests

---
 cmd/testrunner.go                             |  33 +-
 internal/cobraext/flags.go                    |   9 +
 internal/testrunner/benchmark.go              |  86 ++++
 internal/testrunner/report_format.go          |   8 +-
 internal/testrunner/report_output.go          |  14 +-
 .../testrunner/reporters/formats/human.go     |  54 ++-
 .../testrunner/reporters/formats/xunit.go     |  29 +-
 internal/testrunner/reporters/outputs/file.go |  24 +-
 .../testrunner/reporters/outputs/stdout.go    |  10 +-
 .../testrunner/runners/pipeline/benchmark.go  | 384 ++++++++++++++++++
 .../testrunner/runners/pipeline/runner.go     |  29 +-
 internal/testrunner/testrunner.go             |   4 +
 12 files changed, 658 insertions(+), 26 deletions(-)
 create mode 100644 internal/testrunner/benchmark.go
 create mode 100644 internal/testrunner/runners/pipeline/benchmark.go

diff --git a/cmd/testrunner.go b/cmd/testrunner.go
index 9247b56b8f..13b83dedc9 100644
--- a/cmd/testrunner.go
+++ b/cmd/testrunner.go
@@ -9,6 +9,7 @@ import (
 	"os"
 	"path/filepath"
 	"strings"
+	"time"
 
 	"github.com/pkg/errors"
 	"github.com/spf13/cobra"
@@ -68,6 +69,9 @@ func setupTestCommand() *cobraext.Command {
 	cmd.PersistentFlags().StringP(cobraext.ReportFormatFlagName, "", string(formats.ReportFormatHuman), cobraext.ReportFormatFlagDescription)
 	cmd.PersistentFlags().StringP(cobraext.ReportOutputFlagName, "", string(outputs.ReportOutputSTDOUT), cobraext.ReportOutputFlagDescription)
 	cmd.PersistentFlags().BoolP(cobraext.TestCoverageFlagName, "", false, cobraext.TestCoverageFlagDescription)
+	cmd.PersistentFlags().BoolP(cobraext.TestPerfFlagName, "", false, cobraext.TestPerfFlagDescription)
+	cmd.PersistentFlags().IntP(cobraext.TestPerfCountFlagName, "", 1000, cobraext.TestPerfCountFlagDescription)
+	cmd.PersistentFlags().DurationP(cobraext.TestPerfDurationFlagName, "", time.Duration(0), cobraext.TestPerfDurationFlagDescription)
 	cmd.PersistentFlags().DurationP(cobraext.DeferCleanupFlagName, "", 0, cobraext.DeferCleanupFlagDescription)
 	cmd.PersistentFlags().String(cobraext.VariantFlagName, "", cobraext.VariantFlagDescription)
 
@@ -122,6 +126,21 @@ func testTypeCommandActionFactory(runner testrunner.TestRunner) cobraext.Command
 			return cobraext.FlagParsingError(err, cobraext.TestCoverageFlagName)
 		}
 
+		testPerf, err := cmd.Flags().GetBool(cobraext.TestPerfFlagName)
+		if err != nil {
+			return cobraext.FlagParsingError(err, cobraext.TestPerfFlagName)
+		}
+
+		testPerfCount, err := cmd.Flags().GetInt(cobraext.TestPerfCountFlagName)
+		if err != nil {
+			return cobraext.FlagParsingError(err, cobraext.TestPerfCountFlagName)
+		}
+
+		testPerfDur, err := cmd.Flags().GetDuration(cobraext.TestPerfDurationFlagName)
+		if err != nil {
+			return cobraext.FlagParsingError(err, cobraext.TestPerfCountFlagDescription)
+		}
+
 		packageRootPath, found, err := packages.FindPackageRoot()
 		if !found {
 			return errors.New("package root not found")
@@ -200,6 +219,11 @@ func testTypeCommandActionFactory(runner testrunner.TestRunner) cobraext.Command
 				DeferCleanup:       deferCleanup,
 				ServiceVariant:     variantFlag,
 				WithCoverage:       testCoverage,
+				Benchmark: testrunner.BenchmarkConfig{
+					Enabled:  testPerf,
+					NumDocs:  testPerfCount,
+					Duration: testPerfDur,
+				},
 			})
 
 			results = append(results, r...)
@@ -210,7 +234,7 @@ func testTypeCommandActionFactory(runner testrunner.TestRunner) cobraext.Command
 		}
 
 		format := testrunner.TestReportFormat(reportFormat)
-		report, err := testrunner.FormatReport(format, results)
+		testReport, benchReport, err := testrunner.FormatReport(format, results)
 		if err != nil {
 			return errors.Wrap(err, "error formatting test report")
 		}
@@ -220,10 +244,15 @@ func testTypeCommandActionFactory(runner testrunner.TestRunner) cobraext.Command
 			return errors.Wrapf(err, "reading package manifest failed (path: %s)", packageRootPath)
 		}
 
-		if err := testrunner.WriteReport(m.Name, testrunner.TestReportOutput(reportOutput), report, format); err != nil {
+		if err := testrunner.WriteReport(m.Name, testrunner.TestReportOutput(reportOutput), testReport, format, testrunner.ReportTypeTest); err != nil {
 			return errors.Wrap(err, "error writing test report")
 		}
 
+		if benchReport != "" {
+			if err := testrunner.WriteReport(m.Name, testrunner.TestReportOutput(reportOutput), benchReport, format, testrunner.ReportTypeBench); err != nil {
+				return errors.Wrap(err, "error writing benchmark report")
+			}
+		}
 		if testCoverage {
 			err := testrunner.WriteCoverage(packageRootPath, m.Name, runner.Type(), results)
 			if err != nil {
diff --git a/internal/cobraext/flags.go b/internal/cobraext/flags.go
index abc1e1f0ed..7073318641 100644
--- a/internal/cobraext/flags.go
+++ b/internal/cobraext/flags.go
@@ -131,6 +131,15 @@ const (
 	TestCoverageFlagName        = "test-coverage"
 	TestCoverageFlagDescription = "generate Cobertura test coverage reports"
 
+	TestPerfFlagName        = "bench"
+	TestPerfFlagDescription = "run benchmarks"
+
+	TestPerfCountFlagName        = "bench-count"
+	TestPerfCountFlagDescription = "number of docs to use for benchmark"
+
+	TestPerfDurationFlagName        = "bench-duration"
+	TestPerfDurationFlagDescription = "TODO"
+
 	VariantFlagName        = "variant"
 	VariantFlagDescription = "service variant"
 )
diff --git a/internal/testrunner/benchmark.go b/internal/testrunner/benchmark.go
new file mode 100644
index 0000000000..b2d348b77f
--- /dev/null
+++ b/internal/testrunner/benchmark.go
@@ -0,0 +1,86 @@
+// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+// or more contributor license agreements. Licensed under the Elastic License;
+// you may not use this file except in compliance with the Elastic License.
+
+package testrunner
+
+import (
+	"fmt"
+	"time"
+)
+
+// BenchmarkConfig is the configuration used for benchmarks.
+type BenchmarkConfig struct {
+	// Enabled controls if benchmarks are run.
+	Enabled bool
+
+	// NumDocs is the number of documents to be used during benchmark.
+	NumDocs int
+
+	// Duration is the optional benchmark duration.
+	Duration time.Duration
+}
+
+// BenchmarkResult represents the result of a benchmark run.
+// This is modeled after the xUnit benchmark schema.
+// See https://github.com/Autodesk/jenkinsci-benchmark-plugin/blob/master/doc/EXAMPLE_SCHEMA_XML_DEFAULT.md
+type BenchmarkResult struct {
+	// XMLName is a zero-length field used as an annotation for XML marshaling.
+	XMLName struct{} `xml:"group"`
+
+	// Name of this benchmark run.
+	Name string `xml:"name,attr"`
+
+	// Description of the benchmark run.
+	Description string `xml:"description,omitempty"`
+
+	// Parameters used for this benchmark.
+	Parameters []BenchmarkValue `xml:"parameter"`
+
+	// Tests holds the results for the benchmark.
+	Tests []BenchmarkTest `xml:"test"`
+}
+
+// BenchmarkTest models a particular test performed during a benchmark.
+type BenchmarkTest struct {
+	// Name of this test.
+	Name string `xml:"name,attr"`
+	// Description of this test.
+	Description string `xml:"description,omitempty"`
+	// Parameters for this test.
+	Parameters []BenchmarkValue `xml:"parameter"`
+	// Results of the test.
+	Results []BenchmarkValue `xml:"result"`
+}
+
+// BenchmarkValue represents a value (result or parameter)
+// with an optional associated unit.
+type BenchmarkValue struct {
+	// Name of the value.
+	Name string `xml:"name,attr"`
+
+	// Description of the value.
+	Description string `xml:"description,omitempty"`
+
+	// Unit used for this value.
+	Unit string `xml:"unit,omitempty"`
+
+	// Value is of any type, usually string or numeric.
+	Value interface{} `xml:"value,omitempty"`
+}
+
+// PrettyValue returns a BenchmarkValue's value nicely-formatted.
+func (p BenchmarkValue) PrettyValue() (r string) {
+	if str, ok := p.Value.(fmt.Stringer); ok {
+		return str.String()
+	}
+	if float, ok := p.Value.(float64); ok {
+		r = fmt.Sprintf("%.02f", float)
+	} else {
+		r = fmt.Sprintf("%v", p.Value)
+	}
+	if p.Unit != "" {
+		r += p.Unit
+	}
+	return r
+}
diff --git a/internal/testrunner/report_format.go b/internal/testrunner/report_format.go
index 0a014eff5a..cb298c76c0 100644
--- a/internal/testrunner/report_format.go
+++ b/internal/testrunner/report_format.go
@@ -10,7 +10,7 @@ import "fmt"
 type TestReportFormat string
 
 // ReportFormatFunc defines the report formatter function.
-type ReportFormatFunc func(results []TestResult) (string, error)
+type ReportFormatFunc func(results []TestResult) (string, string, error)
 
 var reportFormatters = map[TestReportFormat]ReportFormatFunc{}
 
@@ -19,11 +19,11 @@ func RegisterReporterFormat(name TestReportFormat, formatFunc ReportFormatFunc)
 	reportFormatters[name] = formatFunc
 }
 
-// FormatReport delegates formatting of test results to the registered test report formatter
-func FormatReport(name TestReportFormat, results []TestResult) (string, error) {
+// FormatReport delegates formatting of test results to the registered test report formatter.
+func FormatReport(name TestReportFormat, results []TestResult) (testReport string, benchmarkReport string, err error) {
 	reportFunc, defined := reportFormatters[name]
 	if !defined {
-		return "", fmt.Errorf("unregistered test report format: %s", name)
+		return "", "", fmt.Errorf("unregistered test report format: %s", name)
 	}
 
 	return reportFunc(results)
diff --git a/internal/testrunner/report_output.go b/internal/testrunner/report_output.go
index 57e55cceb6..cb0002a584 100644
--- a/internal/testrunner/report_output.go
+++ b/internal/testrunner/report_output.go
@@ -11,8 +11,16 @@ import (
 // TestReportOutput represents an output for a test report
 type TestReportOutput string
 
+// TestReportType represents a test report type (test, benchmark)
+type TestReportType string
+
+const (
+	ReportTypeTest  TestReportType = "test"
+	ReportTypeBench TestReportType = "bench"
+)
+
 // ReportOutputFunc defines the report writer function.
-type ReportOutputFunc func(pkg, report string, format TestReportFormat) error
+type ReportOutputFunc func(pkg, report string, format TestReportFormat, ttype TestReportType) error
 
 var reportOutputs = map[TestReportOutput]ReportOutputFunc{}
 
@@ -22,11 +30,11 @@ func RegisterReporterOutput(name TestReportOutput, outputFunc ReportOutputFunc)
 }
 
 // WriteReport delegates writing of test results to the registered test report output
-func WriteReport(pkg string, name TestReportOutput, report string, format TestReportFormat) error {
+func WriteReport(pkg string, name TestReportOutput, report string, format TestReportFormat, ttype TestReportType) error {
 	outputFunc, defined := reportOutputs[name]
 	if !defined {
 		return fmt.Errorf("unregistered test report output: %s", name)
 	}
 
-	return outputFunc(pkg, report, format)
+	return outputFunc(pkg, report, format, ttype)
 }
diff --git a/internal/testrunner/reporters/formats/human.go b/internal/testrunner/reporters/formats/human.go
index fd83a4d2a3..c7459961db 100644
--- a/internal/testrunner/reporters/formats/human.go
+++ b/internal/testrunner/reporters/formats/human.go
@@ -9,6 +9,7 @@ import (
 	"strings"
 
 	"github.com/jedib0t/go-pretty/table"
+	"github.com/jedib0t/go-pretty/text"
 
 	"github.com/elastic/elastic-package/internal/testrunner"
 )
@@ -22,11 +23,30 @@ const (
 	ReportFormatHuman testrunner.TestReportFormat = "human"
 )
 
-func reportHumanFormat(results []testrunner.TestResult) (string, error) {
+func reportHumanFormat(results []testrunner.TestResult) (string, string, error) {
 	if len(results) == 0 {
-		return "No test results", nil
+		return "No test results", "", nil
 	}
 
+	var benchmarks []testrunner.BenchmarkResult
+	for _, r := range results {
+		if r.Benchmark != nil {
+			benchmarks = append(benchmarks, *r.Benchmark)
+		}
+	}
+
+	testFmtd, err := reportHumanFormatTest(results)
+	if err != nil {
+		return "", "", err
+	}
+	benchFmtd, err := reportHumanFormatBenchmark(benchmarks)
+	if err != nil {
+		return "", "", err
+	}
+	return testFmtd, benchFmtd, nil
+}
+
+func reportHumanFormatTest(results []testrunner.TestResult) (string, error) {
 	var report strings.Builder
 
 	headerPrinted := false
@@ -68,6 +88,36 @@ func reportHumanFormat(results []testrunner.TestResult) (string, error) {
 	t.SetStyle(table.StyleRounded)
 
 	report.WriteString(t.Render())
+	return report.String(), nil
+}
 
+func reportHumanFormatBenchmark(benchmarks []testrunner.BenchmarkResult) (string, error) {
+	var report strings.Builder
+	for idx, b := range benchmarks {
+		report.WriteString(fmt.Sprintf("\n\nBenchmark results %d/%d:\n\n", idx+1, len(benchmarks)))
+
+		if len(b.Parameters) > 0 {
+			report.WriteString(renderBenchmarkTable("parameters", b.Parameters) + "\n")
+		}
+		for _, test := range b.Tests {
+			report.WriteString(renderBenchmarkTable(test.Name, test.Results) + "\n")
+		}
+	}
 	return report.String(), nil
 }
+
+func renderBenchmarkTable(title string, values []testrunner.BenchmarkValue) string {
+	t := table.NewWriter()
+	t.SetStyle(table.StyleRounded)
+	t.SetTitle(title)
+	t.SetColumnConfigs([]table.ColumnConfig{
+		{
+			Number: 2,
+			Align:  text.AlignRight,
+		},
+	})
+	for _, r := range values {
+		t.AppendRow(table.Row{r.Name, r.PrettyValue()})
+	}
+	return t.Render()
+}
diff --git a/internal/testrunner/reporters/formats/xunit.go b/internal/testrunner/reporters/formats/xunit.go
index e2255b0b57..7176227cbe 100644
--- a/internal/testrunner/reporters/formats/xunit.go
+++ b/internal/testrunner/reporters/formats/xunit.go
@@ -52,7 +52,25 @@ type skipped struct {
 	Message string `xml:"message,attr"`
 }
 
-func reportXUnitFormat(results []testrunner.TestResult) (string, error) {
+func reportXUnitFormat(results []testrunner.TestResult) (string, string, error) {
+	var benchmarks []testrunner.BenchmarkResult
+	for _, r := range results {
+		if r.Benchmark != nil {
+			benchmarks = append(benchmarks, *r.Benchmark)
+		}
+	}
+	testFmtd, err := reportXUnitFormatTest(results)
+	if err != nil {
+		return "", "", err
+	}
+	benchFmtd, err := reportXUnitFormatBenchmark(benchmarks)
+	if err != nil {
+		return "", "", err
+	}
+	return testFmtd, benchFmtd, nil
+}
+
+func reportXUnitFormatTest(results []testrunner.TestResult) (string, error) {
 	// test type => package => data stream => test cases
 	tests := map[string]map[string]map[string][]testCase{}
 
@@ -143,3 +161,12 @@ func reportXUnitFormat(results []testrunner.TestResult) (string, error) {
 
 	return xml.Header + string(out), nil
 }
+
+func reportXUnitFormatBenchmark(benchmarks []testrunner.BenchmarkResult) (string, error) {
+	out, err := xml.MarshalIndent(benchmarks, "", "  ")
+	if err != nil {
+		return "", errors.Wrap(err, "unable to format benchmark results as xUnit")
+	}
+
+	return xml.Header + string(out), nil
+}
diff --git a/internal/testrunner/reporters/outputs/file.go b/internal/testrunner/reporters/outputs/file.go
index ce589b87c9..b90df02876 100644
--- a/internal/testrunner/reporters/outputs/file.go
+++ b/internal/testrunner/reporters/outputs/file.go
@@ -26,8 +26,8 @@ const (
 	ReportOutputFile testrunner.TestReportOutput = "file"
 )
 
-func reportToFile(pkg, report string, format testrunner.TestReportFormat) error {
-	dest, err := testReportsDir()
+func reportToFile(pkg, report string, format testrunner.TestReportFormat, ttype testrunner.TestReportType) error {
+	dest, err := reportsDir(ttype)
 	if err != nil {
 		return errors.Wrap(err, "could not determine test reports folder")
 	}
@@ -36,7 +36,7 @@ func reportToFile(pkg, report string, format testrunner.TestReportFormat) error
 	_, err = os.Stat(dest)
 	if err != nil && errors.Is(err, os.ErrNotExist) {
 		if err := os.MkdirAll(dest, 0755); err != nil {
-			return errors.Wrap(err, "could not create test reports folder")
+			return errors.Wrapf(err, "could not create %s reports folder", ttype)
 		}
 	}
 
@@ -44,22 +44,30 @@ func reportToFile(pkg, report string, format testrunner.TestReportFormat) error
 	if format == formats.ReportFormatXUnit {
 		ext = "xml"
 	}
-
 	fileName := fmt.Sprintf("%s_%d.%s", pkg, time.Now().UnixNano(), ext)
 	filePath := filepath.Join(dest, fileName)
 
 	if err := os.WriteFile(filePath, []byte(report+"\n"), 0644); err != nil {
-		return errors.Wrap(err, "could not write report file")
+		return errors.Wrapf(err, "could not write %s report file", ttype)
 	}
 
 	return nil
 }
 
-// testReportsDir returns the location of the directory to store test reports.
-func testReportsDir() (string, error) {
+// reportsDir returns the location of the directory to store reports.
+func reportsDir(ttype testrunner.TestReportType) (string, error) {
 	buildDir, err := builder.BuildDirectory()
 	if err != nil {
 		return "", errors.Wrap(err, "locating build directory failed")
 	}
-	return filepath.Join(buildDir, "test-results"), nil
+	var folder string
+	switch ttype {
+	case testrunner.ReportTypeTest:
+		folder = "test-results"
+	case testrunner.ReportTypeBench:
+		folder = "benchmark-results"
+	default:
+		return "", fmt.Errorf("unsupported report type: %s", ttype)
+	}
+	return filepath.Join(buildDir, folder), nil
 }
diff --git a/internal/testrunner/reporters/outputs/stdout.go b/internal/testrunner/reporters/outputs/stdout.go
index db243a2382..226cafd337 100644
--- a/internal/testrunner/reporters/outputs/stdout.go
+++ b/internal/testrunner/reporters/outputs/stdout.go
@@ -19,10 +19,14 @@ const (
 	ReportOutputSTDOUT testrunner.TestReportOutput = "stdout"
 )
 
-func reportToSTDOUT(pkg, report string, _ testrunner.TestReportFormat) error {
-	fmt.Printf("--- Test results for package: %s - START ---\n", pkg)
+func reportToSTDOUT(pkg, report string, _ testrunner.TestReportFormat, ttype testrunner.TestReportType) error {
+	reportType := "Test"
+	if ttype == testrunner.ReportTypeBench {
+		reportType = "Benchmark"
+	}
+	fmt.Printf("--- %s results for package: %s - START ---\n", reportType, pkg)
 	fmt.Println(report)
-	fmt.Printf("--- Test results for package: %s - END   ---\n", pkg)
+	fmt.Printf("--- %s results for package: %s - END   ---\n", reportType, pkg)
 	fmt.Println("Done")
 
 	return nil
diff --git a/internal/testrunner/runners/pipeline/benchmark.go b/internal/testrunner/runners/pipeline/benchmark.go
new file mode 100644
index 0000000000..dd3c2c5f01
--- /dev/null
+++ b/internal/testrunner/runners/pipeline/benchmark.go
@@ -0,0 +1,384 @@
+// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+// or more contributor license agreements. Licensed under the Elastic License;
+// you may not use this file except in compliance with the Elastic License.
+
+package pipeline
+
+import (
+	"encoding/json"
+	"fmt"
+	"path/filepath"
+	"sort"
+	"time"
+
+	"github.com/pkg/errors"
+
+	"github.com/elastic/elastic-package/internal/elasticsearch/ingest"
+	"github.com/elastic/elastic-package/internal/packages"
+	"github.com/elastic/elastic-package/internal/testrunner"
+)
+
+const (
+	// How many attempts to make while approximating
+	// benchmark duration by adjusting document count.
+	durationAdjustMaxTries = 3
+
+	// How close to the target duration for a benchmark
+	// to be is accepted.
+	durationToleranceSeconds = 0.5
+
+	// Same, but as a percentage of the target duration.
+	durationTolerancePercent = 0.9
+
+	// Minimum acceptable length for a benchmark result.
+	minDurationSeconds = 0.001 // 1ms
+
+	// How many top processors to return.
+	numTopProcs = 10
+)
+
+func BenchmarkPipeline(options testrunner.TestOptions) (*testrunner.BenchmarkResult, error) {
+	// Load all test documents
+	docs, err := loadAllTestDocs(options.TestFolder.Path)
+	if err != nil {
+		return nil, errors.Wrap(err, "failed loading test documents")
+	}
+
+	// Run benchmark
+	bench, err := benchmarkIngest(options, docs)
+	if err != nil {
+		return nil, errors.Wrap(err, "failed running benchmark")
+	}
+
+	// Extract performance measurements
+	processorKey := func(pipeline ingest.Pipeline, processor ingest.Processor) string {
+		// Don't want to use pipeline processors time in benchmark, as they
+		// aggregate the time of all the processors in their pipeline.
+		if processor.Type == "pipeline" {
+			return ""
+		}
+		return fmt.Sprintf("%s @ %s:%d", processor.Type, pipeline.Filename(), processor.FirstLine)
+	}
+	byAbsoluteTime := func(record ingest.StatsRecord) int64 {
+		return record.TimeInMillis * int64(time.Millisecond)
+	}
+	byRelativeTime := func(record ingest.StatsRecord) int64 {
+		if record.Count == 0 {
+			return 0
+		}
+		return record.TimeInMillis * int64(time.Millisecond) / record.Count
+	}
+	asPercentageOfTotalDuration := func(perf processorPerformance) testrunner.BenchmarkValue {
+		return testrunner.BenchmarkValue{
+			Name:        perf.key,
+			Description: perf.key,
+			Unit:        "%",
+			Value:       time.Duration(perf.value).Seconds() * 100 / bench.elapsed.Seconds(),
+		}
+	}
+	asDuration := func(perf processorPerformance) testrunner.BenchmarkValue {
+		return testrunner.BenchmarkValue{
+			Name:        perf.key,
+			Description: perf.key,
+			Value:       time.Duration(perf.value),
+		}
+	}
+	nonZero := func(p processorPerformance) bool {
+		// This removes pipeline processors (marked with key="") and zero values.
+		return p.key != "" && p.value != 0
+	}
+
+	topAbsProc, err := bench.
+		aggregate(processorKey, byAbsoluteTime).
+		filter(nonZero).
+		sort(descending).
+		top(numTopProcs).
+		collect(asPercentageOfTotalDuration)
+	if err != nil {
+		return nil, err
+	}
+
+	topRelProcs, err := bench.
+		aggregate(processorKey, byRelativeTime).
+		filter(nonZero).
+		sort(descending).
+		top(numTopProcs).
+		collect(asDuration)
+	if err != nil {
+		return nil, err
+	}
+
+	// Build result
+	result := &testrunner.BenchmarkResult{
+		Name: fmt.Sprintf("pipeline benchmark for %s/%s", options.TestFolder.Package, options.TestFolder.DataStream),
+		Parameters: []testrunner.BenchmarkValue{
+			{
+				Name:  "package",
+				Value: options.TestFolder.Package,
+			},
+			{
+				Name:  "data_stream",
+				Value: options.TestFolder.DataStream,
+			},
+			{
+				Name:  "source doc count",
+				Value: len(docs),
+			},
+			{
+				Name:  "doc count",
+				Value: bench.numDocs,
+			},
+		},
+		Tests: []testrunner.BenchmarkTest{
+			{
+				Name: "ingest performance",
+				Results: []testrunner.BenchmarkValue{
+					{
+						Name:        "ingest time",
+						Description: "time elapsed in ingest processors",
+						Value:       bench.elapsed.Seconds(),
+						Unit:        "s",
+					},
+					{
+						Name:        "eps",
+						Description: "ingested events per second",
+						Value:       float64(bench.numDocs) / bench.elapsed.Seconds(),
+					},
+				},
+			},
+			{
+				Name:        "processors by total time",
+				Description: fmt.Sprintf("top %d processors by time spent", numTopProcs),
+				Results:     topAbsProc,
+			},
+			{
+				Name:        "processors by average time per doc",
+				Description: fmt.Sprintf("top %d processors by average time per document", numTopProcs),
+				Results:     topRelProcs,
+			},
+		},
+	}
+
+	return result, nil
+}
+
+type ingestResult struct {
+	pipelines []ingest.Pipeline
+	stats     ingest.PipelineStatsMap
+	elapsed   time.Duration
+	numDocs   int
+}
+
+func benchmarkIngest(options testrunner.TestOptions, baseDocs []json.RawMessage) (ingestResult, error) {
+	if options.Benchmark.Duration == time.Duration(0) {
+		// Run with a fixed doc count
+		return runSingleBenchmark(options, resizeDocs(baseDocs, options.Benchmark.NumDocs))
+	}
+
+	// Approximate doc count to target duration
+	step, err := runSingleBenchmark(options, baseDocs)
+	if err != nil {
+		return step, err
+	}
+
+	for i, n := 0, len(baseDocs); i < durationAdjustMaxTries && compareFuzzy(step.elapsed, options.Benchmark.Duration) == -1; i++ {
+		n = int(seconds(options.Benchmark.Duration) * float64(n) / seconds(step.elapsed))
+		baseDocs = resizeDocs(baseDocs, n)
+		if step, err = runSingleBenchmark(options, baseDocs); err != nil {
+			return step, err
+		}
+	}
+	return step, nil
+}
+
+type processorPerformance struct {
+	key   string
+	value int64
+}
+
+type aggregation struct {
+	result []processorPerformance
+	err    error
+}
+
+type keyFn func(ingest.Pipeline, ingest.Processor) string
+type valueFn func(record ingest.StatsRecord) int64
+type mapFn func(processorPerformance) testrunner.BenchmarkValue
+type compareFn func(a, b processorPerformance) bool
+type filterFn func(processorPerformance) bool
+
+func (ir ingestResult) aggregate(key keyFn, value valueFn) (agg aggregation) {
+	pipelines := make(map[string]ingest.Pipeline, len(ir.pipelines))
+	for _, p := range ir.pipelines {
+		pipelines[p.Name] = p
+	}
+
+	for pipelineName, pipelineStats := range ir.stats {
+		pipeline, ok := pipelines[pipelineName]
+		if !ok {
+			return aggregation{err: fmt.Errorf("unexpected pipeline '%s'", pipelineName)}
+		}
+		processors, err := pipeline.Processors()
+		if err != nil {
+			return aggregation{err: err}
+		}
+		if nSrc, nStats := len(processors), len(pipelineStats.Processors); nSrc != nStats {
+			return aggregation{err: fmt.Errorf("pipeline '%s' processor count mismatch. source=%d stats=%d", pipelineName, nSrc, nStats)}
+		}
+		for procId, procStats := range pipelineStats.Processors {
+			agg.result = append(agg.result, processorPerformance{
+				key:   key(pipeline, processors[procId]),
+				value: value(procStats.Stats),
+			})
+		}
+	}
+	return agg
+}
+
+func (agg aggregation) sort(compare compareFn) aggregation {
+	if agg.err != nil {
+		return agg
+	}
+	sort.Slice(agg.result, func(i, j int) bool {
+		return compare(agg.result[i], agg.result[j])
+	})
+	return agg
+}
+
+func ascending(a, b processorPerformance) bool {
+	return a.value < b.value
+}
+
+func descending(a, b processorPerformance) bool {
+	return !ascending(a, b)
+}
+
+func (agg aggregation) top(n int) aggregation {
+	if n < len(agg.result) {
+		agg.result = agg.result[:n]
+	}
+	return agg
+}
+
+func (agg aggregation) filter(keep filterFn) aggregation {
+	if agg.err != nil {
+		return agg
+	}
+	o := 0
+	for _, entry := range agg.result {
+		if keep(entry) {
+			agg.result[o] = entry
+			o++
+		}
+	}
+	agg.result = agg.result[:o]
+	return agg
+}
+
+func (agg aggregation) collect(fn mapFn) ([]testrunner.BenchmarkValue, error) {
+	if agg.err != nil {
+		return nil, agg.err
+	}
+	r := make([]testrunner.BenchmarkValue, len(agg.result))
+	for idx := range r {
+		r[idx] = fn(agg.result[idx])
+	}
+	return r, nil
+}
+
+func runSingleBenchmark(options testrunner.TestOptions, docs []json.RawMessage) (ingestResult, error) {
+	if len(docs) == 0 {
+		return ingestResult{}, errors.New("no docs supplied for benchmark")
+	}
+	dataStreamPath, found, err := packages.FindDataStreamRootForPath(options.TestFolder.Path)
+	if err != nil {
+		return ingestResult{}, errors.Wrap(err, "locating data_stream root failed")
+	}
+	if !found {
+		return ingestResult{}, errors.New("data stream root not found")
+	}
+
+	testCase := testCase{
+		events: docs,
+	}
+	entryPipeline, pipelines, err := installIngestPipelines(options.API, dataStreamPath)
+	if err != nil {
+		return ingestResult{}, errors.Wrap(err, "installing ingest pipelines failed")
+	}
+	defer uninstallIngestPipelines(options.API, pipelines)
+
+	if _, err = simulatePipelineProcessing(options.API, entryPipeline, &testCase); err != nil {
+		return ingestResult{}, errors.Wrap(err, "simulate failed")
+	}
+
+	stats, err := ingest.GetPipelineStats(options.API, pipelines)
+	if err != nil {
+		return ingestResult{}, errors.Wrap(err, "error fetching pipeline stats")
+	}
+	var took time.Duration
+	for _, pSt := range stats {
+		took += time.Millisecond * time.Duration(pSt.TimeInMillis)
+	}
+	return ingestResult{
+		pipelines: pipelines,
+		stats:     stats,
+		elapsed:   took,
+		numDocs:   len(docs),
+	}, nil
+}
+
+func resizeDocs(inputDocs []json.RawMessage, want int) []json.RawMessage {
+	n := len(inputDocs)
+	if n == 0 {
+		return nil
+	}
+	if want == 0 {
+		want = 1
+	}
+	result := make([]json.RawMessage, want)
+	for i := 0; i < want; i++ {
+		result[i] = inputDocs[i%n]
+	}
+	return result
+}
+
+func seconds(d time.Duration) float64 {
+	s := d.Seconds()
+	// Don't return durations less than the safe value.
+	if s < minDurationSeconds {
+		return minDurationSeconds
+	}
+	return s
+}
+
+func compareFuzzy(a, b time.Duration) int {
+	sa, sb := seconds(a), seconds(b)
+	if sa > sb {
+		sa, sb = sb, sa
+	}
+	if sb-sa <= durationToleranceSeconds || sa/sb >= durationTolerancePercent {
+		return 0
+	}
+	if a < b {
+		return -1
+	}
+	return 1
+}
+
+func loadAllTestDocs(testFolderPath string) ([]json.RawMessage, error) {
+	testCaseFiles, err := listTestCaseFiles(testFolderPath)
+	if err != nil {
+		return nil, err
+	}
+
+	var docs []json.RawMessage
+	for _, file := range testCaseFiles {
+		path := filepath.Join(testFolderPath, file)
+		tc, err := loadTestCaseFile(path)
+		if err != nil {
+			return nil, err
+		}
+		docs = append(docs, tc.events...)
+	}
+	return docs, err
+}
diff --git a/internal/testrunner/runners/pipeline/runner.go b/internal/testrunner/runners/pipeline/runner.go
index cdd5045f85..37956245c0 100644
--- a/internal/testrunner/runners/pipeline/runner.go
+++ b/internal/testrunner/runners/pipeline/runner.go
@@ -169,13 +169,32 @@ func (r *runner) run() ([]testrunner.TestResult, error) {
 		}
 		results = append(results, tr)
 	}
+
+	if r.options.Benchmark.Enabled {
+		start := time.Now()
+		tr := testrunner.TestResult{
+			TestType:   TestType + " benchmark",
+			Package:    r.options.TestFolder.Package,
+			DataStream: r.options.TestFolder.DataStream,
+		}
+		if tr.Benchmark, err = BenchmarkPipeline(r.options); err != nil {
+			tr.ErrorMsg = err.Error()
+		}
+		tr.TimeElapsed = time.Since(start)
+		results = append(results, tr)
+	}
+
 	return results, nil
 }
 
 func (r *runner) listTestCaseFiles() ([]string, error) {
-	fis, err := os.ReadDir(r.options.TestFolder.Path)
+	return listTestCaseFiles(r.options.TestFolder.Path)
+}
+
+func listTestCaseFiles(path string) ([]string, error) {
+	fis, err := os.ReadDir(path)
 	if err != nil {
-		return nil, errors.Wrapf(err, "reading pipeline tests failed (path: %s)", r.options.TestFolder.Path)
+		return nil, errors.Wrapf(err, "reading pipeline tests failed (path: %s)", path)
 	}
 
 	var files []string
@@ -190,7 +209,10 @@ func (r *runner) listTestCaseFiles() ([]string, error) {
 }
 
 func (r *runner) loadTestCaseFile(testCaseFile string) (*testCase, error) {
-	testCasePath := filepath.Join(r.options.TestFolder.Path, testCaseFile)
+	return loadTestCaseFile(filepath.Join(r.options.TestFolder.Path, testCaseFile))
+}
+
+func loadTestCaseFile(testCasePath string) (*testCase, error) {
 	testCaseData, err := os.ReadFile(testCasePath)
 	if err != nil {
 		return nil, errors.Wrapf(err, "reading input file failed (testCasePath: %s)", testCasePath)
@@ -201,6 +223,7 @@ func (r *runner) loadTestCaseFile(testCaseFile string) (*testCase, error) {
 		return nil, errors.Wrapf(err, "reading config for test case failed (testCasePath: %s)", testCasePath)
 	}
 
+	testCaseFile := filepath.Base(testCasePath)
 	if config.Skip != nil {
 		return &testCase{
 			name:   testCaseFile,
diff --git a/internal/testrunner/testrunner.go b/internal/testrunner/testrunner.go
index 3c6fb4f1d8..d6ab320f24 100644
--- a/internal/testrunner/testrunner.go
+++ b/internal/testrunner/testrunner.go
@@ -30,6 +30,7 @@ type TestOptions struct {
 	DeferCleanup   time.Duration
 	ServiceVariant string
 	WithCoverage   bool
+	Benchmark      BenchmarkConfig
 }
 
 // TestRunner is the interface all test runners must implement.
@@ -90,6 +91,9 @@ type TestResult struct {
 
 	// Coverage details in Cobertura format (optional).
 	Coverage *CoberturaCoverage
+
+	// Benchmark results (optional).
+	Benchmark *BenchmarkResult
 }
 
 // ResultComposer wraps a TestResult and provides convenience methods for

From 9364cff034aea69464c83c5339892401b72e4b76 Mon Sep 17 00:00:00 2001
From: Adrian Serrano <adrisr83@gmail.com>
Date: Fri, 22 Jul 2022 01:39:28 +0200
Subject: [PATCH 02/20] gofumpt

---
 internal/testrunner/runners/pipeline/benchmark.go | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/internal/testrunner/runners/pipeline/benchmark.go b/internal/testrunner/runners/pipeline/benchmark.go
index dd3c2c5f01..957864c5ec 100644
--- a/internal/testrunner/runners/pipeline/benchmark.go
+++ b/internal/testrunner/runners/pipeline/benchmark.go
@@ -201,11 +201,13 @@ type aggregation struct {
 	err    error
 }
 
-type keyFn func(ingest.Pipeline, ingest.Processor) string
-type valueFn func(record ingest.StatsRecord) int64
-type mapFn func(processorPerformance) testrunner.BenchmarkValue
-type compareFn func(a, b processorPerformance) bool
-type filterFn func(processorPerformance) bool
+type (
+	keyFn     func(ingest.Pipeline, ingest.Processor) string
+	valueFn   func(record ingest.StatsRecord) int64
+	mapFn     func(processorPerformance) testrunner.BenchmarkValue
+	compareFn func(a, b processorPerformance) bool
+	filterFn  func(processorPerformance) bool
+)
 
 func (ir ingestResult) aggregate(key keyFn, value valueFn) (agg aggregation) {
 	pipelines := make(map[string]ingest.Pipeline, len(ir.pipelines))

From ded422db8f0740de743b5015e1cf344e35481852 Mon Sep 17 00:00:00 2001
From: Adrian Serrano <adrisr83@gmail.com>
Date: Fri, 22 Jul 2022 09:06:36 +0200
Subject: [PATCH 03/20] Minor rename

---
 internal/testrunner/runners/pipeline/benchmark.go | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/internal/testrunner/runners/pipeline/benchmark.go b/internal/testrunner/runners/pipeline/benchmark.go
index 957864c5ec..ec3edb5f52 100644
--- a/internal/testrunner/runners/pipeline/benchmark.go
+++ b/internal/testrunner/runners/pipeline/benchmark.go
@@ -68,7 +68,7 @@ func BenchmarkPipeline(options testrunner.TestOptions) (*testrunner.BenchmarkRes
 		}
 		return record.TimeInMillis * int64(time.Millisecond) / record.Count
 	}
-	asPercentageOfTotalDuration := func(perf processorPerformance) testrunner.BenchmarkValue {
+	asPercentageOfTotalTime := func(perf processorPerformance) testrunner.BenchmarkValue {
 		return testrunner.BenchmarkValue{
 			Name:        perf.key,
 			Description: perf.key,
@@ -93,7 +93,7 @@ func BenchmarkPipeline(options testrunner.TestOptions) (*testrunner.BenchmarkRes
 		filter(nonZero).
 		sort(descending).
 		top(numTopProcs).
-		collect(asPercentageOfTotalDuration)
+		collect(asPercentageOfTotalTime)
 	if err != nil {
 		return nil, err
 	}

From e097ac1d9e28e93d62aa50289dea3346ce9ed863 Mon Sep 17 00:00:00 2001
From: Adrian Serrano <adrisr83@gmail.com>
Date: Fri, 22 Jul 2022 10:34:46 +0200
Subject: [PATCH 04/20] Generate benchmark output files per datastream

xUnit format doesn't allow multiple results in a single file.
---
 cmd/testrunner.go                             |  6 ++---
 internal/testrunner/report_format.go          |  6 ++---
 .../testrunner/reporters/formats/human.go     | 20 ++++++++--------
 .../testrunner/reporters/formats/xunit.go     | 23 +++++++++++--------
 4 files changed, 30 insertions(+), 25 deletions(-)

diff --git a/cmd/testrunner.go b/cmd/testrunner.go
index 13b83dedc9..29680b7ec6 100644
--- a/cmd/testrunner.go
+++ b/cmd/testrunner.go
@@ -234,7 +234,7 @@ func testTypeCommandActionFactory(runner testrunner.TestRunner) cobraext.Command
 		}
 
 		format := testrunner.TestReportFormat(reportFormat)
-		testReport, benchReport, err := testrunner.FormatReport(format, results)
+		testReport, benchReports, err := testrunner.FormatReport(format, results)
 		if err != nil {
 			return errors.Wrap(err, "error formatting test report")
 		}
@@ -248,8 +248,8 @@ func testTypeCommandActionFactory(runner testrunner.TestRunner) cobraext.Command
 			return errors.Wrap(err, "error writing test report")
 		}
 
-		if benchReport != "" {
-			if err := testrunner.WriteReport(m.Name, testrunner.TestReportOutput(reportOutput), benchReport, format, testrunner.ReportTypeBench); err != nil {
+		for idx, report := range benchReports {
+			if err := testrunner.WriteReport(fmt.Sprintf("%s(%d)", m.Name, idx+1), testrunner.TestReportOutput(reportOutput), report, format, testrunner.ReportTypeBench); err != nil {
 				return errors.Wrap(err, "error writing benchmark report")
 			}
 		}
diff --git a/internal/testrunner/report_format.go b/internal/testrunner/report_format.go
index cb298c76c0..746952c598 100644
--- a/internal/testrunner/report_format.go
+++ b/internal/testrunner/report_format.go
@@ -10,7 +10,7 @@ import "fmt"
 type TestReportFormat string
 
 // ReportFormatFunc defines the report formatter function.
-type ReportFormatFunc func(results []TestResult) (string, string, error)
+type ReportFormatFunc func(results []TestResult) (string, []string, error)
 
 var reportFormatters = map[TestReportFormat]ReportFormatFunc{}
 
@@ -20,10 +20,10 @@ func RegisterReporterFormat(name TestReportFormat, formatFunc ReportFormatFunc)
 }
 
 // FormatReport delegates formatting of test results to the registered test report formatter.
-func FormatReport(name TestReportFormat, results []TestResult) (testReport string, benchmarkReport string, err error) {
+func FormatReport(name TestReportFormat, results []TestResult) (testReport string, benchmarkReports []string, err error) {
 	reportFunc, defined := reportFormatters[name]
 	if !defined {
-		return "", "", fmt.Errorf("unregistered test report format: %s", name)
+		return "", nil, fmt.Errorf("unregistered test report format: %s", name)
 	}
 
 	return reportFunc(results)
diff --git a/internal/testrunner/reporters/formats/human.go b/internal/testrunner/reporters/formats/human.go
index c7459961db..dcc4916ed2 100644
--- a/internal/testrunner/reporters/formats/human.go
+++ b/internal/testrunner/reporters/formats/human.go
@@ -23,9 +23,9 @@ const (
 	ReportFormatHuman testrunner.TestReportFormat = "human"
 )
 
-func reportHumanFormat(results []testrunner.TestResult) (string, string, error) {
+func reportHumanFormat(results []testrunner.TestResult) (string, []string, error) {
 	if len(results) == 0 {
-		return "No test results", "", nil
+		return "No test results", nil, nil
 	}
 
 	var benchmarks []testrunner.BenchmarkResult
@@ -37,11 +37,11 @@ func reportHumanFormat(results []testrunner.TestResult) (string, string, error)
 
 	testFmtd, err := reportHumanFormatTest(results)
 	if err != nil {
-		return "", "", err
+		return "", nil, err
 	}
 	benchFmtd, err := reportHumanFormatBenchmark(benchmarks)
 	if err != nil {
-		return "", "", err
+		return "", nil, err
 	}
 	return testFmtd, benchFmtd, nil
 }
@@ -91,19 +91,19 @@ func reportHumanFormatTest(results []testrunner.TestResult) (string, error) {
 	return report.String(), nil
 }
 
-func reportHumanFormatBenchmark(benchmarks []testrunner.BenchmarkResult) (string, error) {
-	var report strings.Builder
-	for idx, b := range benchmarks {
-		report.WriteString(fmt.Sprintf("\n\nBenchmark results %d/%d:\n\n", idx+1, len(benchmarks)))
-
+func reportHumanFormatBenchmark(benchmarks []testrunner.BenchmarkResult) ([]string, error) {
+	var textReports []string
+	for _, b := range benchmarks {
+		var report strings.Builder
 		if len(b.Parameters) > 0 {
 			report.WriteString(renderBenchmarkTable("parameters", b.Parameters) + "\n")
 		}
 		for _, test := range b.Tests {
 			report.WriteString(renderBenchmarkTable(test.Name, test.Results) + "\n")
 		}
+		textReports = append(textReports, report.String())
 	}
-	return report.String(), nil
+	return textReports, nil
 }
 
 func renderBenchmarkTable(title string, values []testrunner.BenchmarkValue) string {
diff --git a/internal/testrunner/reporters/formats/xunit.go b/internal/testrunner/reporters/formats/xunit.go
index 7176227cbe..6282b4824a 100644
--- a/internal/testrunner/reporters/formats/xunit.go
+++ b/internal/testrunner/reporters/formats/xunit.go
@@ -52,7 +52,7 @@ type skipped struct {
 	Message string `xml:"message,attr"`
 }
 
-func reportXUnitFormat(results []testrunner.TestResult) (string, string, error) {
+func reportXUnitFormat(results []testrunner.TestResult) (string, []string, error) {
 	var benchmarks []testrunner.BenchmarkResult
 	for _, r := range results {
 		if r.Benchmark != nil {
@@ -61,11 +61,11 @@ func reportXUnitFormat(results []testrunner.TestResult) (string, string, error)
 	}
 	testFmtd, err := reportXUnitFormatTest(results)
 	if err != nil {
-		return "", "", err
+		return "", nil, err
 	}
 	benchFmtd, err := reportXUnitFormatBenchmark(benchmarks)
 	if err != nil {
-		return "", "", err
+		return "", nil, err
 	}
 	return testFmtd, benchFmtd, nil
 }
@@ -162,11 +162,16 @@ func reportXUnitFormatTest(results []testrunner.TestResult) (string, error) {
 	return xml.Header + string(out), nil
 }
 
-func reportXUnitFormatBenchmark(benchmarks []testrunner.BenchmarkResult) (string, error) {
-	out, err := xml.MarshalIndent(benchmarks, "", "  ")
-	if err != nil {
-		return "", errors.Wrap(err, "unable to format benchmark results as xUnit")
+func reportXUnitFormatBenchmark(benchmarks []testrunner.BenchmarkResult) ([]string, error) {
+	var reports []string
+	for _, b := range benchmarks {
+		// Remove detailed by-processor tables from xUnit report
+		b.Tests = b.Tests[:2]
+		out, err := xml.MarshalIndent(b, "", "  ")
+		if err != nil {
+			return nil, errors.Wrap(err, "unable to format benchmark results as xUnit")
+		}
+		reports = append(reports, xml.Header+string(out))
 	}
-
-	return xml.Header + string(out), nil
+	return reports, nil
 }

From f8281060d7cc68c2d940d58bea6575cad257444f Mon Sep 17 00:00:00 2001
From: Adrian Serrano <adrisr83@gmail.com>
Date: Fri, 22 Jul 2022 11:07:43 +0200
Subject: [PATCH 05/20] Filter-out detailed reports in xUnit

---
 cmd/testrunner.go                                 |  2 +-
 internal/testrunner/benchmark.go                  |  3 +++
 internal/testrunner/reporters/formats/xunit.go    | 11 +++++++++--
 internal/testrunner/runners/pipeline/benchmark.go |  2 ++
 4 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/cmd/testrunner.go b/cmd/testrunner.go
index 29680b7ec6..a061cdca6a 100644
--- a/cmd/testrunner.go
+++ b/cmd/testrunner.go
@@ -249,7 +249,7 @@ func testTypeCommandActionFactory(runner testrunner.TestRunner) cobraext.Command
 		}
 
 		for idx, report := range benchReports {
-			if err := testrunner.WriteReport(fmt.Sprintf("%s(%d)", m.Name, idx+1), testrunner.TestReportOutput(reportOutput), report, format, testrunner.ReportTypeBench); err != nil {
+			if err := testrunner.WriteReport(fmt.Sprintf("%s-%d", m.Name, idx+1), testrunner.TestReportOutput(reportOutput), report, format, testrunner.ReportTypeBench); err != nil {
 				return errors.Wrap(err, "error writing benchmark report")
 			}
 		}
diff --git a/internal/testrunner/benchmark.go b/internal/testrunner/benchmark.go
index b2d348b77f..cf72b86965 100644
--- a/internal/testrunner/benchmark.go
+++ b/internal/testrunner/benchmark.go
@@ -45,6 +45,9 @@ type BenchmarkResult struct {
 type BenchmarkTest struct {
 	// Name of this test.
 	Name string `xml:"name,attr"`
+	// Detailed benchmark tests will be printed to the output but not
+	// included in xUnit reports.
+	Detailed bool `xml:"-"`
 	// Description of this test.
 	Description string `xml:"description,omitempty"`
 	// Parameters for this test.
diff --git a/internal/testrunner/reporters/formats/xunit.go b/internal/testrunner/reporters/formats/xunit.go
index 6282b4824a..52f6ba1b86 100644
--- a/internal/testrunner/reporters/formats/xunit.go
+++ b/internal/testrunner/reporters/formats/xunit.go
@@ -165,8 +165,15 @@ func reportXUnitFormatTest(results []testrunner.TestResult) (string, error) {
 func reportXUnitFormatBenchmark(benchmarks []testrunner.BenchmarkResult) ([]string, error) {
 	var reports []string
 	for _, b := range benchmarks {
-		// Remove detailed by-processor tables from xUnit report
-		b.Tests = b.Tests[:2]
+		// Filter out detailed tests. These add too much information for the
+		// aggregated nature of xUnit reports, creating a lot of noise in Jenkins.
+		var tests []testrunner.BenchmarkTest
+		for _, t := range b.Tests {
+			if !t.Detailed {
+				tests = append(tests, t)
+			}
+		}
+		b.Tests = tests
 		out, err := xml.MarshalIndent(b, "", "  ")
 		if err != nil {
 			return nil, errors.Wrap(err, "unable to format benchmark results as xUnit")
diff --git a/internal/testrunner/runners/pipeline/benchmark.go b/internal/testrunner/runners/pipeline/benchmark.go
index ec3edb5f52..03d1af0d53 100644
--- a/internal/testrunner/runners/pipeline/benchmark.go
+++ b/internal/testrunner/runners/pipeline/benchmark.go
@@ -148,11 +148,13 @@ func BenchmarkPipeline(options testrunner.TestOptions) (*testrunner.BenchmarkRes
 			},
 			{
 				Name:        "processors by total time",
+				Detailed:    true,
 				Description: fmt.Sprintf("top %d processors by time spent", numTopProcs),
 				Results:     topAbsProc,
 			},
 			{
 				Name:        "processors by average time per doc",
+				Detailed:    true,
 				Description: fmt.Sprintf("top %d processors by average time per document", numTopProcs),
 				Results:     topRelProcs,
 			},

From ea3760cb07ff8c3e8667da357864fe3a4edb04b5 Mon Sep 17 00:00:00 2001
From: Adrian Serrano <adrisr83@gmail.com>
Date: Fri, 22 Jul 2022 11:29:30 +0200
Subject: [PATCH 06/20] Cleanup config options

---
 cmd/testrunner.go          | 22 +++++++++++-----------
 internal/cobraext/flags.go | 10 +++++-----
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/cmd/testrunner.go b/cmd/testrunner.go
index a061cdca6a..97cd141550 100644
--- a/cmd/testrunner.go
+++ b/cmd/testrunner.go
@@ -69,8 +69,8 @@ func setupTestCommand() *cobraext.Command {
 	cmd.PersistentFlags().StringP(cobraext.ReportFormatFlagName, "", string(formats.ReportFormatHuman), cobraext.ReportFormatFlagDescription)
 	cmd.PersistentFlags().StringP(cobraext.ReportOutputFlagName, "", string(outputs.ReportOutputSTDOUT), cobraext.ReportOutputFlagDescription)
 	cmd.PersistentFlags().BoolP(cobraext.TestCoverageFlagName, "", false, cobraext.TestCoverageFlagDescription)
-	cmd.PersistentFlags().BoolP(cobraext.TestPerfFlagName, "", false, cobraext.TestPerfFlagDescription)
-	cmd.PersistentFlags().IntP(cobraext.TestPerfCountFlagName, "", 1000, cobraext.TestPerfCountFlagDescription)
+	cmd.PersistentFlags().BoolP(cobraext.TestBenchFlagName, "", false, cobraext.TestBenchFlagDescription)
+	cmd.PersistentFlags().IntP(cobraext.TestBenchCountFlagName, "", 1000, cobraext.TestBenchCountFlagDescription)
 	cmd.PersistentFlags().DurationP(cobraext.TestPerfDurationFlagName, "", time.Duration(0), cobraext.TestPerfDurationFlagDescription)
 	cmd.PersistentFlags().DurationP(cobraext.DeferCleanupFlagName, "", 0, cobraext.DeferCleanupFlagDescription)
 	cmd.PersistentFlags().String(cobraext.VariantFlagName, "", cobraext.VariantFlagDescription)
@@ -126,19 +126,19 @@ func testTypeCommandActionFactory(runner testrunner.TestRunner) cobraext.Command
 			return cobraext.FlagParsingError(err, cobraext.TestCoverageFlagName)
 		}
 
-		testPerf, err := cmd.Flags().GetBool(cobraext.TestPerfFlagName)
+		testBench, err := cmd.Flags().GetBool(cobraext.TestBenchFlagName)
 		if err != nil {
-			return cobraext.FlagParsingError(err, cobraext.TestPerfFlagName)
+			return cobraext.FlagParsingError(err, cobraext.TestBenchFlagName)
 		}
 
-		testPerfCount, err := cmd.Flags().GetInt(cobraext.TestPerfCountFlagName)
+		testBenchCount, err := cmd.Flags().GetInt(cobraext.TestBenchCountFlagName)
 		if err != nil {
-			return cobraext.FlagParsingError(err, cobraext.TestPerfCountFlagName)
+			return cobraext.FlagParsingError(err, cobraext.TestBenchCountFlagName)
 		}
 
-		testPerfDur, err := cmd.Flags().GetDuration(cobraext.TestPerfDurationFlagName)
+		testBenchDur, err := cmd.Flags().GetDuration(cobraext.TestPerfDurationFlagName)
 		if err != nil {
-			return cobraext.FlagParsingError(err, cobraext.TestPerfCountFlagDescription)
+			return cobraext.FlagParsingError(err, cobraext.TestBenchCountFlagDescription)
 		}
 
 		packageRootPath, found, err := packages.FindPackageRoot()
@@ -220,9 +220,9 @@ func testTypeCommandActionFactory(runner testrunner.TestRunner) cobraext.Command
 				ServiceVariant:     variantFlag,
 				WithCoverage:       testCoverage,
 				Benchmark: testrunner.BenchmarkConfig{
-					Enabled:  testPerf,
-					NumDocs:  testPerfCount,
-					Duration: testPerfDur,
+					Enabled:  testBench,
+					NumDocs:  testBenchCount,
+					Duration: testBenchDur,
 				},
 			})
 
diff --git a/internal/cobraext/flags.go b/internal/cobraext/flags.go
index 7073318641..6ef4c22661 100644
--- a/internal/cobraext/flags.go
+++ b/internal/cobraext/flags.go
@@ -131,14 +131,14 @@ const (
 	TestCoverageFlagName        = "test-coverage"
 	TestCoverageFlagDescription = "generate Cobertura test coverage reports"
 
-	TestPerfFlagName        = "bench"
-	TestPerfFlagDescription = "run benchmarks"
+	TestBenchFlagName        = "bench"
+	TestBenchFlagDescription = "run benchmarks"
 
-	TestPerfCountFlagName        = "bench-count"
-	TestPerfCountFlagDescription = "number of docs to use for benchmark"
+	TestBenchCountFlagName        = "bench-count"
+	TestBenchCountFlagDescription = "fixed number of docs to use for benchmark"
 
 	TestPerfDurationFlagName        = "bench-duration"
-	TestPerfDurationFlagDescription = "TODO"
+	TestPerfDurationFlagDescription = "adjust the number of docs so that the benchmark runs for this duration"
 
 	VariantFlagName        = "variant"
 	VariantFlagDescription = "service variant"

From 5c16fe25880ff75765b197a7c03bcbe1083dafdc Mon Sep 17 00:00:00 2001
From: Marc Guasch <marc.guasch@elastic.co>
Date: Thu, 1 Sep 2022 11:27:24 +0200
Subject: [PATCH 07/20] Move benchmark code to its own command

---
 cmd/benchrunner.go                            | 248 +++++++++++
 cmd/root.go                                   |   1 +
 cmd/testrunner.go                             |  33 +-
 .../{testrunner => benchrunner}/benchmark.go  |   2 +-
 internal/benchrunner/benchrunner.go           | 279 ++++++++++++
 internal/benchrunner/coverageoutput.go        | 400 ++++++++++++++++++
 internal/benchrunner/coverageoutput_test.go   | 279 ++++++++++++
 internal/benchrunner/errors.go                |  18 +
 internal/benchrunner/report_format.go         |  30 ++
 internal/benchrunner/report_output.go         |  40 ++
 .../benchrunner/reporters/formats/human.go    | 123 ++++++
 .../benchrunner/reporters/formats/xunit.go    | 184 ++++++++
 .../benchrunner/reporters/outputs/file.go     |  73 ++++
 .../benchrunner/reporters/outputs/stdout.go   |  33 ++
 .../runners/pipeline/benchmark.go             |  30 +-
 .../benchrunner/runners/pipeline/coverage.go  | 136 ++++++
 .../runners/pipeline/ingest_pipeline.go       | 222 ++++++++++
 .../benchrunner/runners/pipeline/runner.go    | 398 +++++++++++++++++
 .../runners/pipeline/runner_test.go           | 149 +++++++
 .../benchrunner/runners/pipeline/test_case.go | 129 ++++++
 .../runners/pipeline/test_config.go           |  72 ++++
 .../runners/pipeline/test_result.go           | 260 ++++++++++++
 .../runners/pipeline/test_result_test.go      |  45 ++
 internal/benchrunner/runners/runners.go       |  13 +
 internal/benchrunner/test_config.go           |  30 ++
 internal/cobraext/flags.go                    |   3 -
 internal/testrunner/report_format.go          |   8 +-
 internal/testrunner/report_output.go          |  14 +-
 .../testrunner/reporters/formats/human.go     |  55 +--
 .../testrunner/reporters/formats/xunit.go     |  41 +-
 internal/testrunner/reporters/outputs/file.go |  25 +-
 .../testrunner/reporters/outputs/stdout.go    |  10 +-
 .../testrunner/runners/pipeline/runner.go     |  28 +-
 internal/testrunner/testrunner.go             |   4 -
 34 files changed, 3204 insertions(+), 211 deletions(-)
 create mode 100644 cmd/benchrunner.go
 rename internal/{testrunner => benchrunner}/benchmark.go (99%)
 create mode 100644 internal/benchrunner/benchrunner.go
 create mode 100644 internal/benchrunner/coverageoutput.go
 create mode 100644 internal/benchrunner/coverageoutput_test.go
 create mode 100644 internal/benchrunner/errors.go
 create mode 100644 internal/benchrunner/report_format.go
 create mode 100644 internal/benchrunner/report_output.go
 create mode 100644 internal/benchrunner/reporters/formats/human.go
 create mode 100644 internal/benchrunner/reporters/formats/xunit.go
 create mode 100644 internal/benchrunner/reporters/outputs/file.go
 create mode 100644 internal/benchrunner/reporters/outputs/stdout.go
 rename internal/{testrunner => benchrunner}/runners/pipeline/benchmark.go (90%)
 create mode 100644 internal/benchrunner/runners/pipeline/coverage.go
 create mode 100644 internal/benchrunner/runners/pipeline/ingest_pipeline.go
 create mode 100644 internal/benchrunner/runners/pipeline/runner.go
 create mode 100644 internal/benchrunner/runners/pipeline/runner_test.go
 create mode 100644 internal/benchrunner/runners/pipeline/test_case.go
 create mode 100644 internal/benchrunner/runners/pipeline/test_config.go
 create mode 100644 internal/benchrunner/runners/pipeline/test_result.go
 create mode 100644 internal/benchrunner/runners/pipeline/test_result_test.go
 create mode 100644 internal/benchrunner/runners/runners.go
 create mode 100644 internal/benchrunner/test_config.go

diff --git a/cmd/benchrunner.go b/cmd/benchrunner.go
new file mode 100644
index 0000000000..bae3b3875b
--- /dev/null
+++ b/cmd/benchrunner.go
@@ -0,0 +1,248 @@
+// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+// or more contributor license agreements. Licensed under the Elastic License;
+// you may not use this file except in compliance with the Elastic License.
+
+package cmd
+
+import (
+	"fmt"
+	"path/filepath"
+	"strings"
+	"time"
+
+	"github.com/pkg/errors"
+	"github.com/spf13/cobra"
+
+	"github.com/elastic/elastic-package/internal/benchrunner"
+	"github.com/elastic/elastic-package/internal/benchrunner/reporters/formats"
+	"github.com/elastic/elastic-package/internal/benchrunner/reporters/outputs"
+	_ "github.com/elastic/elastic-package/internal/benchrunner/runners" // register all test runners
+	"github.com/elastic/elastic-package/internal/cobraext"
+	"github.com/elastic/elastic-package/internal/common"
+	"github.com/elastic/elastic-package/internal/elasticsearch"
+	"github.com/elastic/elastic-package/internal/packages"
+	"github.com/elastic/elastic-package/internal/signal"
+)
+
+const benchLongDescription = `Use this command to run benchmarks on a package. Currently, the following types of benchmarks are available:
+
+#### Pipeline Benchmarks
+These benchmarks allow you to benchmark any Ingest Node Pipelines defined by your packages.
+
+For details on how to configure pipeline test for a package, review the [HOWTO guide](https://github.com/elastic/elastic-package/blob/main/docs/howto/pipeline_benchmarks.md).`
+
+func setupBenchmarkCommand() *cobraext.Command {
+	var benchTypeCmdActions []cobraext.CommandAction
+
+	cmd := &cobra.Command{
+		Use:   "benchmark",
+		Short: "Run benchmarks for the package",
+		Long:  benchLongDescription,
+		RunE: func(cmd *cobra.Command, args []string) error {
+			cmd.Println("Run benchmarks for the package")
+
+			if len(args) > 0 {
+				return fmt.Errorf("unsupported benchmark type: %s", args[0])
+			}
+
+			return cobraext.ComposeCommandActions(cmd, args, benchTypeCmdActions...)
+		}}
+
+	cmd.PersistentFlags().BoolP(cobraext.FailOnMissingFlagName, "m", false, cobraext.FailOnMissingFlagDescription)
+	cmd.PersistentFlags().BoolP(cobraext.GenerateTestResultFlagName, "g", false, cobraext.GenerateTestResultFlagDescription)
+	cmd.PersistentFlags().StringP(cobraext.ReportFormatFlagName, "", string(formats.ReportFormatHuman), cobraext.ReportFormatFlagDescription)
+	cmd.PersistentFlags().StringP(cobraext.ReportOutputFlagName, "", string(outputs.ReportOutputSTDOUT), cobraext.ReportOutputFlagDescription)
+	cmd.PersistentFlags().BoolP(cobraext.TestCoverageFlagName, "", false, cobraext.TestCoverageFlagDescription)
+	cmd.PersistentFlags().IntP(cobraext.TestBenchCountFlagName, "", 1000, cobraext.TestBenchCountFlagDescription)
+	cmd.PersistentFlags().DurationP(cobraext.TestPerfDurationFlagName, "", time.Duration(0), cobraext.TestPerfDurationFlagDescription)
+	cmd.PersistentFlags().DurationP(cobraext.DeferCleanupFlagName, "", 0, cobraext.DeferCleanupFlagDescription)
+	cmd.PersistentFlags().String(cobraext.VariantFlagName, "", cobraext.VariantFlagDescription)
+
+	for benchType, runner := range benchrunner.TestRunners() {
+		action := benchTypeCommandActionFactory(runner)
+		benchTypeCmdActions = append(benchTypeCmdActions, action)
+
+		benchTypeCmd := &cobra.Command{
+			Use:   string(benchType),
+			Short: fmt.Sprintf("Run %s benchmarks", runner.String()),
+			Long:  fmt.Sprintf("Run %s benchmarks for the package.", runner.String()),
+			RunE:  action,
+		}
+
+		if runner.CanRunPerDataStream() {
+			benchTypeCmd.Flags().StringSliceP(cobraext.DataStreamsFlagName, "d", nil, cobraext.DataStreamsFlagDescription)
+		}
+
+		cmd.AddCommand(benchTypeCmd)
+	}
+
+	return cobraext.NewCommand(cmd, cobraext.ContextPackage)
+}
+
+func benchTypeCommandActionFactory(runner benchrunner.TestRunner) cobraext.CommandAction {
+	benchType := runner.Type()
+	return func(cmd *cobra.Command, args []string) error {
+		cmd.Printf("Run %s tests for the package\n", benchType)
+
+		failOnMissing, err := cmd.Flags().GetBool(cobraext.FailOnMissingFlagName)
+		if err != nil {
+			return cobraext.FlagParsingError(err, cobraext.FailOnMissingFlagName)
+		}
+
+		generateTestResult, err := cmd.Flags().GetBool(cobraext.GenerateTestResultFlagName)
+		if err != nil {
+			return cobraext.FlagParsingError(err, cobraext.GenerateTestResultFlagName)
+		}
+
+		reportFormat, err := cmd.Flags().GetString(cobraext.ReportFormatFlagName)
+		if err != nil {
+			return cobraext.FlagParsingError(err, cobraext.ReportFormatFlagName)
+		}
+
+		reportOutput, err := cmd.Flags().GetString(cobraext.ReportOutputFlagName)
+		if err != nil {
+			return cobraext.FlagParsingError(err, cobraext.ReportOutputFlagName)
+		}
+
+		testCoverage, err := cmd.Flags().GetBool(cobraext.TestCoverageFlagName)
+		if err != nil {
+			return cobraext.FlagParsingError(err, cobraext.TestCoverageFlagName)
+		}
+
+		testBenchCount, err := cmd.Flags().GetInt(cobraext.TestBenchCountFlagName)
+		if err != nil {
+			return cobraext.FlagParsingError(err, cobraext.TestBenchCountFlagName)
+		}
+
+		testBenchDur, err := cmd.Flags().GetDuration(cobraext.TestPerfDurationFlagName)
+		if err != nil {
+			return cobraext.FlagParsingError(err, cobraext.TestBenchCountFlagDescription)
+		}
+
+		packageRootPath, found, err := packages.FindPackageRoot()
+		if !found {
+			return errors.New("package root not found")
+		}
+		if err != nil {
+			return errors.Wrap(err, "locating package root failed")
+		}
+
+		signal.Enable()
+
+		var testFolders []benchrunner.TestFolder
+		if runner.CanRunPerDataStream() {
+			var dataStreams []string
+			// We check for the existence of the data streams flag before trying to
+			// parse it because if the root test command is run instead of one of the
+			// subcommands of test, the data streams flag will not be defined.
+			if cmd.Flags().Lookup(cobraext.DataStreamsFlagName) != nil {
+				dataStreams, err = cmd.Flags().GetStringSlice(cobraext.DataStreamsFlagName)
+				common.TrimStringSlice(dataStreams)
+				if err != nil {
+					return cobraext.FlagParsingError(err, cobraext.DataStreamsFlagName)
+				}
+
+				err = validateDataStreamsFlag(packageRootPath, dataStreams)
+				if err != nil {
+					return cobraext.FlagParsingError(err, cobraext.DataStreamsFlagName)
+				}
+			}
+
+			if runner.TestFolderRequired() {
+				testFolders, err = benchrunner.FindTestFolders(packageRootPath, dataStreams, benchType)
+				if err != nil {
+					return errors.Wrap(err, "unable to determine test folder paths")
+				}
+			} else {
+				testFolders, err = benchrunner.AssumeTestFolders(packageRootPath, dataStreams, benchType)
+				if err != nil {
+					return errors.Wrap(err, "unable to assume test folder paths")
+				}
+			}
+
+			if failOnMissing && len(testFolders) == 0 {
+				if len(dataStreams) > 0 {
+					return fmt.Errorf("no %s tests found for %s data stream(s)", benchType, strings.Join(dataStreams, ","))
+				}
+				return fmt.Errorf("no %s tests found", benchType)
+			}
+		} else {
+			_, pkg := filepath.Split(packageRootPath)
+			testFolders = []benchrunner.TestFolder{
+				{
+					Package: pkg,
+				},
+			}
+		}
+
+		deferCleanup, err := cmd.Flags().GetDuration(cobraext.DeferCleanupFlagName)
+		if err != nil {
+			return cobraext.FlagParsingError(err, cobraext.DeferCleanupFlagName)
+		}
+
+		variantFlag, _ := cmd.Flags().GetString(cobraext.VariantFlagName)
+
+		esClient, err := elasticsearch.Client()
+		if err != nil {
+			return errors.Wrap(err, "can't create Elasticsearch client")
+		}
+
+		var results []benchrunner.TestResult
+		for _, folder := range testFolders {
+			r, err := benchrunner.Run(benchType, benchrunner.TestOptions{
+				TestFolder:         folder,
+				PackageRootPath:    packageRootPath,
+				GenerateTestResult: generateTestResult,
+				API:                esClient.API,
+				DeferCleanup:       deferCleanup,
+				ServiceVariant:     variantFlag,
+				WithCoverage:       testCoverage,
+				Benchmark: benchrunner.BenchmarkConfig{
+					NumDocs:  testBenchCount,
+					Duration: testBenchDur,
+				},
+			})
+
+			results = append(results, r...)
+
+			if err != nil {
+				return errors.Wrapf(err, "error running package %s tests", benchType)
+			}
+		}
+
+		format := benchrunner.TestReportFormat(reportFormat)
+		testReport, benchReports, err := benchrunner.FormatReport(format, results)
+		if err != nil {
+			return errors.Wrap(err, "error formatting test report")
+		}
+
+		m, err := packages.ReadPackageManifestFromPackageRoot(packageRootPath)
+		if err != nil {
+			return errors.Wrapf(err, "reading package manifest failed (path: %s)", packageRootPath)
+		}
+
+		if err := benchrunner.WriteReport(m.Name, benchrunner.TestReportOutput(reportOutput), testReport, format, benchrunner.ReportTypeTest); err != nil {
+			return errors.Wrap(err, "error writing test report")
+		}
+
+		for idx, report := range benchReports {
+			if err := benchrunner.WriteReport(fmt.Sprintf("%s-%d", m.Name, idx+1), benchrunner.TestReportOutput(reportOutput), report, format, benchrunner.ReportTypeBench); err != nil {
+				return errors.Wrap(err, "error writing benchmark report")
+			}
+		}
+		if testCoverage {
+			err := benchrunner.WriteCoverage(packageRootPath, m.Name, runner.Type(), results)
+			if err != nil {
+				return errors.Wrap(err, "error writing test coverage")
+			}
+		}
+
+		// Check if there is any error or failure reported
+		for _, r := range results {
+			if r.ErrorMsg != "" || r.FailureMsg != "" {
+				return errors.New("one or more test cases failed")
+			}
+		}
+		return nil
+	}
+}
diff --git a/cmd/root.go b/cmd/root.go
index 0997b70eec..9cb7ba3902 100644
--- a/cmd/root.go
+++ b/cmd/root.go
@@ -15,6 +15,7 @@ import (
 )
 
 var commands = []*cobraext.Command{
+	setupBenchmarkCommand(),
 	setupBuildCommand(),
 	setupChangelogCommand(),
 	setupCheckCommand(),
diff --git a/cmd/testrunner.go b/cmd/testrunner.go
index 97cd141550..9247b56b8f 100644
--- a/cmd/testrunner.go
+++ b/cmd/testrunner.go
@@ -9,7 +9,6 @@ import (
 	"os"
 	"path/filepath"
 	"strings"
-	"time"
 
 	"github.com/pkg/errors"
 	"github.com/spf13/cobra"
@@ -69,9 +68,6 @@ func setupTestCommand() *cobraext.Command {
 	cmd.PersistentFlags().StringP(cobraext.ReportFormatFlagName, "", string(formats.ReportFormatHuman), cobraext.ReportFormatFlagDescription)
 	cmd.PersistentFlags().StringP(cobraext.ReportOutputFlagName, "", string(outputs.ReportOutputSTDOUT), cobraext.ReportOutputFlagDescription)
 	cmd.PersistentFlags().BoolP(cobraext.TestCoverageFlagName, "", false, cobraext.TestCoverageFlagDescription)
-	cmd.PersistentFlags().BoolP(cobraext.TestBenchFlagName, "", false, cobraext.TestBenchFlagDescription)
-	cmd.PersistentFlags().IntP(cobraext.TestBenchCountFlagName, "", 1000, cobraext.TestBenchCountFlagDescription)
-	cmd.PersistentFlags().DurationP(cobraext.TestPerfDurationFlagName, "", time.Duration(0), cobraext.TestPerfDurationFlagDescription)
 	cmd.PersistentFlags().DurationP(cobraext.DeferCleanupFlagName, "", 0, cobraext.DeferCleanupFlagDescription)
 	cmd.PersistentFlags().String(cobraext.VariantFlagName, "", cobraext.VariantFlagDescription)
 
@@ -126,21 +122,6 @@ func testTypeCommandActionFactory(runner testrunner.TestRunner) cobraext.Command
 			return cobraext.FlagParsingError(err, cobraext.TestCoverageFlagName)
 		}
 
-		testBench, err := cmd.Flags().GetBool(cobraext.TestBenchFlagName)
-		if err != nil {
-			return cobraext.FlagParsingError(err, cobraext.TestBenchFlagName)
-		}
-
-		testBenchCount, err := cmd.Flags().GetInt(cobraext.TestBenchCountFlagName)
-		if err != nil {
-			return cobraext.FlagParsingError(err, cobraext.TestBenchCountFlagName)
-		}
-
-		testBenchDur, err := cmd.Flags().GetDuration(cobraext.TestPerfDurationFlagName)
-		if err != nil {
-			return cobraext.FlagParsingError(err, cobraext.TestBenchCountFlagDescription)
-		}
-
 		packageRootPath, found, err := packages.FindPackageRoot()
 		if !found {
 			return errors.New("package root not found")
@@ -219,11 +200,6 @@ func testTypeCommandActionFactory(runner testrunner.TestRunner) cobraext.Command
 				DeferCleanup:       deferCleanup,
 				ServiceVariant:     variantFlag,
 				WithCoverage:       testCoverage,
-				Benchmark: testrunner.BenchmarkConfig{
-					Enabled:  testBench,
-					NumDocs:  testBenchCount,
-					Duration: testBenchDur,
-				},
 			})
 
 			results = append(results, r...)
@@ -234,7 +210,7 @@ func testTypeCommandActionFactory(runner testrunner.TestRunner) cobraext.Command
 		}
 
 		format := testrunner.TestReportFormat(reportFormat)
-		testReport, benchReports, err := testrunner.FormatReport(format, results)
+		report, err := testrunner.FormatReport(format, results)
 		if err != nil {
 			return errors.Wrap(err, "error formatting test report")
 		}
@@ -244,15 +220,10 @@ func testTypeCommandActionFactory(runner testrunner.TestRunner) cobraext.Command
 			return errors.Wrapf(err, "reading package manifest failed (path: %s)", packageRootPath)
 		}
 
-		if err := testrunner.WriteReport(m.Name, testrunner.TestReportOutput(reportOutput), testReport, format, testrunner.ReportTypeTest); err != nil {
+		if err := testrunner.WriteReport(m.Name, testrunner.TestReportOutput(reportOutput), report, format); err != nil {
 			return errors.Wrap(err, "error writing test report")
 		}
 
-		for idx, report := range benchReports {
-			if err := testrunner.WriteReport(fmt.Sprintf("%s-%d", m.Name, idx+1), testrunner.TestReportOutput(reportOutput), report, format, testrunner.ReportTypeBench); err != nil {
-				return errors.Wrap(err, "error writing benchmark report")
-			}
-		}
 		if testCoverage {
 			err := testrunner.WriteCoverage(packageRootPath, m.Name, runner.Type(), results)
 			if err != nil {
diff --git a/internal/testrunner/benchmark.go b/internal/benchrunner/benchmark.go
similarity index 99%
rename from internal/testrunner/benchmark.go
rename to internal/benchrunner/benchmark.go
index cf72b86965..da5f8d15d6 100644
--- a/internal/testrunner/benchmark.go
+++ b/internal/benchrunner/benchmark.go
@@ -2,7 +2,7 @@
 // or more contributor license agreements. Licensed under the Elastic License;
 // you may not use this file except in compliance with the Elastic License.
 
-package testrunner
+package benchrunner
 
 import (
 	"fmt"
diff --git a/internal/benchrunner/benchrunner.go b/internal/benchrunner/benchrunner.go
new file mode 100644
index 0000000000..6699f238a5
--- /dev/null
+++ b/internal/benchrunner/benchrunner.go
@@ -0,0 +1,279 @@
+// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+// or more contributor license agreements. Licensed under the Elastic License;
+// you may not use this file except in compliance with the Elastic License.
+
+package benchrunner
+
+import (
+	"fmt"
+	"os"
+	"path/filepath"
+	"sort"
+	"strings"
+	"time"
+
+	"github.com/pkg/errors"
+
+	"github.com/elastic/elastic-package/internal/elasticsearch"
+)
+
+// TestType represents the various supported test types
+type TestType string
+
+// TestOptions contains test runner options.
+type TestOptions struct {
+	TestFolder         TestFolder
+	PackageRootPath    string
+	GenerateTestResult bool
+	API                *elasticsearch.API
+
+	DeferCleanup   time.Duration
+	ServiceVariant string
+	WithCoverage   bool
+	Benchmark      BenchmarkConfig
+}
+
+// TestRunner is the interface all test runners must implement.
+type TestRunner interface {
+	// Type returns the test runner's type.
+	Type() TestType
+
+	// String returns the human-friendly name of the test runner.
+	String() string
+
+	// Run executes the test runner.
+	Run(TestOptions) ([]TestResult, error)
+
+	// TearDown cleans up any test runner resources. It must be called
+	// after the test runner has finished executing.
+	TearDown() error
+
+	CanRunPerDataStream() bool
+
+	TestFolderRequired() bool
+}
+
+var runners = map[TestType]TestRunner{}
+
+// TestResult contains a single test's results
+type TestResult struct {
+	// Name of test result. Optional.
+	Name string
+
+	// Package to which this test result belongs.
+	Package string
+
+	// TestType indicates the type of test.
+	TestType TestType
+
+	// Data stream to which this test result belongs.
+	DataStream string
+
+	// Time elapsed from running a test case to arriving at its result.
+	TimeElapsed time.Duration
+
+	// If test case failed, short description of the failure. A failure is
+	// when the test completes execution but the actual results of the test
+	// don't match the expected results.
+	FailureMsg string
+
+	// If test case failed, longer description of the failure.
+	FailureDetails string
+
+	// If there was an error while running the test case, description
+	// of the error. An error is when the test cannot complete execution due
+	// to an unexpected runtime error in the test execution.
+	ErrorMsg string
+
+	// If the test was skipped, the reason it was skipped and a link for more
+	// details.
+	Skipped *SkipConfig
+
+	// Coverage details in Cobertura format (optional).
+	Coverage *CoberturaCoverage
+
+	// Benchmark results (optional).
+	Benchmark *BenchmarkResult
+}
+
+// ResultComposer wraps a TestResult and provides convenience methods for
+// manipulating this TestResult.
+type ResultComposer struct {
+	TestResult
+	StartTime time.Time
+}
+
+// NewResultComposer returns a new ResultComposer with the StartTime
+// initialized to now.
+func NewResultComposer(tr TestResult) *ResultComposer {
+	return &ResultComposer{
+		TestResult: tr,
+		StartTime:  time.Now(),
+	}
+}
+
+// WithError sets an error on the test result wrapped by ResultComposer.
+func (rc *ResultComposer) WithError(err error) ([]TestResult, error) {
+	rc.TimeElapsed = time.Since(rc.StartTime)
+	if err == nil {
+		return []TestResult{rc.TestResult}, nil
+	}
+
+	if tcf, ok := err.(ErrTestCaseFailed); ok {
+		rc.FailureMsg += tcf.Reason
+		rc.FailureDetails += tcf.Details
+		return []TestResult{rc.TestResult}, nil
+	}
+
+	rc.ErrorMsg += err.Error()
+	return []TestResult{rc.TestResult}, err
+}
+
+// WithSuccess marks the test result wrapped by ResultComposer as successful.
+func (rc *ResultComposer) WithSuccess() ([]TestResult, error) {
+	return rc.WithError(nil)
+}
+
+// WithSkip marks the test result wrapped by ResultComposer as skipped.
+func (rc *ResultComposer) WithSkip(s *SkipConfig) ([]TestResult, error) {
+	rc.TestResult.Skipped = s
+	return rc.WithError(nil)
+}
+
+// TestFolder encapsulates the test folder path and names of the package + data stream
+// to which the test folder belongs.
+type TestFolder struct {
+	Path       string
+	Package    string
+	DataStream string
+}
+
+// AssumeTestFolders assumes potential test folders for the given package, data streams and test types.
+func AssumeTestFolders(packageRootPath string, dataStreams []string, testType TestType) ([]TestFolder, error) {
+	// Expected folder structure:
+	// <packageRootPath>/
+	//   data_stream/
+	//     <dataStream>/
+
+	dataStreamsPath := filepath.Join(packageRootPath, "data_stream")
+
+	if len(dataStreams) == 0 {
+		fileInfos, err := os.ReadDir(dataStreamsPath)
+		if errors.Is(err, os.ErrNotExist) {
+			return []TestFolder{}, nil // data streams defined
+		}
+		if err != nil {
+			return nil, errors.Wrapf(err, "can't read directory (path: %s)", dataStreamsPath)
+		}
+
+		for _, fi := range fileInfos {
+			if !fi.IsDir() {
+				continue
+			}
+			dataStreams = append(dataStreams, fi.Name())
+		}
+	}
+
+	var folders []TestFolder
+	for _, dataStream := range dataStreams {
+		folders = append(folders, TestFolder{
+			Path:       filepath.Join(dataStreamsPath, dataStream, "_dev", "test", string(testType)),
+			Package:    filepath.Base(packageRootPath),
+			DataStream: dataStream,
+		})
+	}
+	return folders, nil
+}
+
+// FindTestFolders finds test folders for the given package and, optionally, test type and data streams
+func FindTestFolders(packageRootPath string, dataStreams []string, testType TestType) ([]TestFolder, error) {
+	// Expected folder structure:
+	// <packageRootPath>/
+	//   data_stream/
+	//     <dataStream>/
+	//       _dev/
+	//         test/
+	//           <testType>/
+
+	testTypeGlob := "*"
+	if testType != "" {
+		testTypeGlob = string(testType)
+	}
+
+	var paths []string
+	if len(dataStreams) > 0 {
+		sort.Strings(dataStreams)
+		for _, dataStream := range dataStreams {
+			p, err := findTestFolderPaths(packageRootPath, dataStream, testTypeGlob)
+			if err != nil {
+				return nil, err
+			}
+
+			paths = append(paths, p...)
+		}
+	} else {
+		p, err := findTestFolderPaths(packageRootPath, "*", testTypeGlob)
+		if err != nil {
+			return nil, err
+		}
+
+		paths = p
+	}
+
+	folders := make([]TestFolder, len(paths))
+	_, pkg := filepath.Split(packageRootPath)
+	for idx, p := range paths {
+		relP := strings.TrimPrefix(p, packageRootPath)
+		parts := strings.Split(relP, string(filepath.Separator))
+		dataStream := parts[2]
+
+		folder := TestFolder{
+			p,
+			pkg,
+			dataStream,
+		}
+
+		folders[idx] = folder
+	}
+
+	return folders, nil
+}
+
+// RegisterRunner method registers the test runner.
+func RegisterRunner(runner TestRunner) {
+	runners[runner.Type()] = runner
+}
+
+// Run method delegates execution to the registered test runner, based on the test type.
+func Run(testType TestType, options TestOptions) ([]TestResult, error) {
+	runner, defined := runners[testType]
+	if !defined {
+		return nil, fmt.Errorf("unregistered runner test: %s", testType)
+	}
+
+	results, err := runner.Run(options)
+	tdErr := runner.TearDown()
+	if err != nil {
+		return nil, errors.Wrap(err, "could not complete test run")
+	}
+	if tdErr != nil {
+		return results, errors.Wrap(err, "could not teardown test runner")
+	}
+	return results, nil
+}
+
+// TestRunners returns registered test runners.
+func TestRunners() map[TestType]TestRunner {
+	return runners
+}
+
+// findTestFoldersPaths can only be called for test runners that require tests to be defined
+// at the data stream level.
+func findTestFolderPaths(packageRootPath, dataStreamGlob, testTypeGlob string) ([]string, error) {
+	testFoldersGlob := filepath.Join(packageRootPath, "data_stream", dataStreamGlob, "_dev", "test", testTypeGlob)
+	paths, err := filepath.Glob(testFoldersGlob)
+	if err != nil {
+		return nil, errors.Wrap(err, "error finding test folders")
+	}
+	return paths, err
+}
diff --git a/internal/benchrunner/coverageoutput.go b/internal/benchrunner/coverageoutput.go
new file mode 100644
index 0000000000..93aa1028b3
--- /dev/null
+++ b/internal/benchrunner/coverageoutput.go
@@ -0,0 +1,400 @@
+// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+// or more contributor license agreements. Licensed under the Elastic License;
+// you may not use this file except in compliance with the Elastic License.
+
+package benchrunner
+
+import (
+	"bytes"
+	"encoding/xml"
+	"fmt"
+	"os"
+	"path/filepath"
+	"time"
+
+	"github.com/pkg/errors"
+
+	"github.com/elastic/elastic-package/internal/builder"
+	"github.com/elastic/elastic-package/internal/multierror"
+)
+
+const coverageDtd = `<!DOCTYPE coverage SYSTEM "http://cobertura.sourceforge.net/xml/coverage-04.dtd">`
+
+type testCoverageDetails struct {
+	packageName string
+	testType    TestType
+	dataStreams map[string][]string // <data_stream> : <test case 1, test case 2, ...>
+	cobertura   *CoberturaCoverage  // For tests to provide custom Cobertura results.
+	errors      multierror.Error
+}
+
+func newTestCoverageDetails(packageName string, testType TestType) *testCoverageDetails {
+	return &testCoverageDetails{packageName: packageName, testType: testType, dataStreams: map[string][]string{}}
+}
+
+func (tcd *testCoverageDetails) withUncoveredDataStreams(dataStreams []string) *testCoverageDetails {
+	for _, wt := range dataStreams {
+		tcd.dataStreams[wt] = []string{}
+	}
+	return tcd
+}
+
+func (tcd *testCoverageDetails) withTestResults(results []TestResult) *testCoverageDetails {
+	for _, result := range results {
+		if _, ok := tcd.dataStreams[result.DataStream]; !ok {
+			tcd.dataStreams[result.DataStream] = []string{}
+		}
+		tcd.dataStreams[result.DataStream] = append(tcd.dataStreams[result.DataStream], result.Name)
+		if tcd.cobertura != nil && result.Coverage != nil {
+			if err := tcd.cobertura.merge(result.Coverage); err != nil {
+				tcd.errors = append(tcd.errors, errors.Wrapf(err, "can't merge Cobertura coverage for test `%s`", result.Name))
+			}
+		} else if tcd.cobertura == nil {
+			tcd.cobertura = result.Coverage
+		}
+	}
+	return tcd
+}
+
+// CoberturaCoverage is the root element for a Cobertura XML report.
+type CoberturaCoverage struct {
+	XMLName         xml.Name            `xml:"coverage"`
+	LineRate        float32             `xml:"line-rate,attr"`
+	BranchRate      float32             `xml:"branch-rate,attr"`
+	Version         string              `xml:"version,attr"`
+	Timestamp       int64               `xml:"timestamp,attr"`
+	LinesCovered    int64               `xml:"lines-covered,attr"`
+	LinesValid      int64               `xml:"lines-valid,attr"`
+	BranchesCovered int64               `xml:"branches-covered,attr"`
+	BranchesValid   int64               `xml:"branches-valid,attr"`
+	Complexity      float32             `xml:"complexity,attr"`
+	Sources         []*CoberturaSource  `xml:"sources>source"`
+	Packages        []*CoberturaPackage `xml:"packages>package"`
+}
+
+// CoberturaSource represents a base path to the covered source code.
+type CoberturaSource struct {
+	Path string `xml:",chardata"`
+}
+
+// CoberturaPackage represents a package in a Cobertura XML report.
+type CoberturaPackage struct {
+	Name       string            `xml:"name,attr"`
+	LineRate   float32           `xml:"line-rate,attr"`
+	BranchRate float32           `xml:"branch-rate,attr"`
+	Complexity float32           `xml:"complexity,attr"`
+	Classes    []*CoberturaClass `xml:"classes>class"`
+}
+
+// CoberturaClass represents a class in a Cobertura XML report.
+type CoberturaClass struct {
+	Name       string             `xml:"name,attr"`
+	Filename   string             `xml:"filename,attr"`
+	LineRate   float32            `xml:"line-rate,attr"`
+	BranchRate float32            `xml:"branch-rate,attr"`
+	Complexity float32            `xml:"complexity,attr"`
+	Methods    []*CoberturaMethod `xml:"methods>method"`
+	Lines      []*CoberturaLine   `xml:"lines>line"`
+}
+
+// CoberturaMethod represents a method in a Cobertura XML report.
+type CoberturaMethod struct {
+	Name       string           `xml:"name,attr"`
+	Signature  string           `xml:"signature,attr"`
+	LineRate   float32          `xml:"line-rate,attr"`
+	BranchRate float32          `xml:"branch-rate,attr"`
+	Complexity float32          `xml:"complexity,attr"`
+	Hits       int64            `xml:"hits,attr"`
+	Lines      []*CoberturaLine `xml:"lines>line"`
+}
+
+// CoberturaLine represents a source line in a Cobertura XML report.
+type CoberturaLine struct {
+	Number int   `xml:"number,attr"`
+	Hits   int64 `xml:"hits,attr"`
+}
+
+func (c *CoberturaCoverage) bytes() ([]byte, error) {
+	out, err := xml.MarshalIndent(&c, "", "  ")
+	if err != nil {
+		return nil, errors.Wrap(err, "unable to format test results as xUnit")
+	}
+
+	var buffer bytes.Buffer
+	buffer.WriteString(xml.Header)
+	buffer.WriteString("\n")
+	buffer.WriteString(coverageDtd)
+	buffer.WriteString("\n")
+	buffer.Write(out)
+	return buffer.Bytes(), nil
+}
+
+// merge merges two coverage reports for a given class.
+func (c *CoberturaClass) merge(b *CoberturaClass) error {
+	// Check preconditions: classes should be the same.
+	equal := c.Name == b.Name &&
+		c.Filename == b.Filename &&
+		len(c.Lines) == len(b.Lines) &&
+		len(c.Methods) == len(b.Methods)
+	for idx := range c.Lines {
+		equal = equal && c.Lines[idx].Number == b.Lines[idx].Number
+	}
+	for idx := range c.Methods {
+		equal = equal && c.Methods[idx].Name == b.Methods[idx].Name &&
+			len(c.Methods[idx].Lines) == len(b.Methods[idx].Lines)
+	}
+	if !equal {
+		return errors.Errorf("merging incompatible classes: %+v != %+v", *c, *b)
+	}
+	// Update methods
+	for idx := range b.Methods {
+		c.Methods[idx].Hits += b.Methods[idx].Hits
+		for l := range b.Methods[idx].Lines {
+			c.Methods[idx].Lines[l].Hits += b.Methods[idx].Lines[l].Hits
+		}
+	}
+	// Rebuild lines
+	c.Lines = nil
+	for _, m := range c.Methods {
+		c.Lines = append(c.Lines, m.Lines...)
+	}
+	return nil
+}
+
+// merge merges two coverage reports for a given package.
+func (p *CoberturaPackage) merge(b *CoberturaPackage) error {
+	// Merge classes
+	for _, class := range b.Classes {
+		var target *CoberturaClass
+		for _, existing := range p.Classes {
+			if existing.Name == class.Name {
+				target = existing
+				break
+			}
+		}
+		if target != nil {
+			if err := target.merge(class); err != nil {
+				return err
+			}
+		} else {
+			p.Classes = append(p.Classes, class)
+		}
+	}
+	return nil
+}
+
+// merge merges two coverage reports.
+func (c *CoberturaCoverage) merge(b *CoberturaCoverage) error {
+	// Merge source paths
+	for _, path := range b.Sources {
+		found := false
+		for _, existing := range c.Sources {
+			if found = existing.Path == path.Path; found {
+				break
+			}
+		}
+		if !found {
+			c.Sources = append(c.Sources, path)
+		}
+	}
+
+	// Merge packages
+	for _, pkg := range b.Packages {
+		var target *CoberturaPackage
+		for _, existing := range c.Packages {
+			if existing.Name == pkg.Name {
+				target = existing
+				break
+			}
+		}
+		if target != nil {
+			if err := target.merge(pkg); err != nil {
+				return err
+			}
+		} else {
+			c.Packages = append(c.Packages, pkg)
+		}
+	}
+
+	// Recalculate global line coverage count
+	c.LinesValid = 0
+	c.LinesCovered = 0
+	for _, pkg := range c.Packages {
+		for _, cls := range pkg.Classes {
+			for _, line := range cls.Lines {
+				c.LinesValid++
+				if line.Hits > 0 {
+					c.LinesCovered++
+				}
+			}
+		}
+	}
+	return nil
+}
+
+// WriteCoverage function calculates test coverage for the given package.
+// It requires to execute tests for all data streams (same test type), so the coverage can be calculated properly.
+func WriteCoverage(packageRootPath, packageName string, testType TestType, results []TestResult) error {
+	details, err := collectTestCoverageDetails(packageRootPath, packageName, testType, results)
+	if err != nil {
+		return errors.Wrap(err, "can't collect test coverage details")
+	}
+
+	// Use provided cobertura report, or generate a custom report if not available.
+	report := details.cobertura
+	if report == nil {
+		report = transformToCoberturaReport(details)
+	}
+
+	err = writeCoverageReportFile(report, packageName)
+	if err != nil {
+		return errors.Wrap(err, "can't write test coverage report file")
+	}
+	return nil
+}
+
+func collectTestCoverageDetails(packageRootPath, packageName string, testType TestType, results []TestResult) (*testCoverageDetails, error) {
+	withoutTests, err := findDataStreamsWithoutTests(packageRootPath, testType)
+	if err != nil {
+		return nil, errors.Wrap(err, "can't find data streams without tests")
+	}
+
+	details := newTestCoverageDetails(packageName, testType).
+		withUncoveredDataStreams(withoutTests).
+		withTestResults(results)
+	if len(details.errors) > 0 {
+		return nil, details.errors
+	}
+	return details, nil
+}
+
+func findDataStreamsWithoutTests(packageRootPath string, testType TestType) ([]string, error) {
+	var noTests []string
+
+	dataStreamDir := filepath.Join(packageRootPath, "data_stream")
+	dataStreams, err := os.ReadDir(dataStreamDir)
+	if errors.Is(err, os.ErrNotExist) {
+		return noTests, nil // there are packages that don't have any data streams (fleet_server, security_detection_engine)
+	} else if err != nil {
+		return nil, errors.Wrap(err, "can't list data streams directory")
+	}
+
+	for _, dataStream := range dataStreams {
+		if !dataStream.IsDir() {
+			continue
+		}
+
+		expected, err := verifyTestExpected(packageRootPath, dataStream.Name(), testType)
+		if err != nil {
+			return nil, errors.Wrap(err, "can't verify if test is expected")
+		}
+		if !expected {
+			continue
+		}
+
+		dataStreamTestPath := filepath.Join(packageRootPath, "data_stream", dataStream.Name(), "_dev", "test", string(testType))
+		_, err = os.Stat(dataStreamTestPath)
+		if errors.Is(err, os.ErrNotExist) {
+			noTests = append(noTests, dataStream.Name())
+			continue
+		}
+		if err != nil {
+			return nil, errors.Wrapf(err, "can't stat path: %s", dataStreamTestPath)
+		}
+	}
+	return noTests, nil
+}
+
+// verifyTestExpected function checks if tests are actually expected.
+// Pipeline tests require an ingest pipeline to be defined in the data stream.
+func verifyTestExpected(packageRootPath string, dataStreamName string, testType TestType) (bool, error) {
+	if testType != "pipeline" {
+		return true, nil
+	}
+
+	ingestPipelinePath := filepath.Join(packageRootPath, "data_stream", dataStreamName, "elasticsearch", "ingest_pipeline")
+	_, err := os.Stat(ingestPipelinePath)
+	if errors.Is(err, os.ErrNotExist) {
+		return false, nil
+	}
+	if err != nil {
+		return false, errors.Wrapf(err, "can't stat path: %s", ingestPipelinePath)
+	}
+	return true, nil
+}
+
+func transformToCoberturaReport(details *testCoverageDetails) *CoberturaCoverage {
+	var classes []*CoberturaClass
+	for dataStream, testCases := range details.dataStreams {
+		if dataStream == "" {
+			continue // ignore tests running in the package context (not data stream), mostly referring to installed assets
+		}
+
+		var methods []*CoberturaMethod
+
+		if len(testCases) == 0 {
+			methods = append(methods, &CoberturaMethod{
+				Name:  "Missing",
+				Lines: []*CoberturaLine{{Number: 1, Hits: 0}},
+			})
+		} else {
+			methods = append(methods, &CoberturaMethod{
+				Name:  "OK",
+				Lines: []*CoberturaLine{{Number: 1, Hits: 1}},
+			})
+		}
+
+		aClass := &CoberturaClass{
+			Name:     string(details.testType),
+			Filename: details.packageName + "/" + dataStream,
+			Methods:  methods,
+		}
+		classes = append(classes, aClass)
+	}
+
+	return &CoberturaCoverage{
+		Timestamp: time.Now().UnixNano(),
+		Packages: []*CoberturaPackage{
+			{
+				Name:    details.packageName,
+				Classes: classes,
+			},
+		},
+	}
+}
+
+func writeCoverageReportFile(report *CoberturaCoverage, packageName string) error {
+	dest, err := testCoverageReportsDir()
+	if err != nil {
+		return errors.Wrap(err, "could not determine test coverage reports folder")
+	}
+
+	// Create test coverage reports folder if it doesn't exist
+	_, err = os.Stat(dest)
+	if err != nil && errors.Is(err, os.ErrNotExist) {
+		if err := os.MkdirAll(dest, 0755); err != nil {
+			return errors.Wrap(err, "could not create test coverage reports folder")
+		}
+	}
+
+	fileName := fmt.Sprintf("coverage-%s-%d-report.xml", packageName, report.Timestamp)
+	filePath := filepath.Join(dest, fileName)
+
+	b, err := report.bytes()
+	if err != nil {
+		return errors.Wrap(err, "can't marshal test coverage report")
+	}
+
+	if err := os.WriteFile(filePath, b, 0644); err != nil {
+		return errors.Wrap(err, "could not write test coverage report file")
+	}
+	return nil
+}
+
+func testCoverageReportsDir() (string, error) {
+	buildDir, err := builder.BuildDirectory()
+	if err != nil {
+		return "", errors.Wrap(err, "locating build directory failed")
+	}
+	return filepath.Join(buildDir, "test-coverage"), nil
+}
diff --git a/internal/benchrunner/coverageoutput_test.go b/internal/benchrunner/coverageoutput_test.go
new file mode 100644
index 0000000000..adc5ceae34
--- /dev/null
+++ b/internal/benchrunner/coverageoutput_test.go
@@ -0,0 +1,279 @@
+// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+// or more contributor license agreements. Licensed under the Elastic License;
+// you may not use this file except in compliance with the Elastic License.
+
+package benchrunner
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func TestCoberturaCoverage_Merge(t *testing.T) {
+	tests := []struct {
+		name               string
+		rhs, lhs, expected CoberturaCoverage
+		wantErr            bool
+	}{
+		{
+			name: "merge sources",
+			rhs: CoberturaCoverage{
+				Sources: []*CoberturaSource{
+					{Path: "/a"},
+					{Path: "/c"},
+				},
+			},
+			lhs: CoberturaCoverage{
+				Sources: []*CoberturaSource{
+					{Path: "/b"},
+					{Path: "/c"},
+				},
+			},
+			expected: CoberturaCoverage{
+				Sources: []*CoberturaSource{
+					{Path: "/a"},
+					{Path: "/c"},
+					{Path: "/b"},
+				},
+			},
+		},
+		{
+			name: "merge packages and classes",
+			rhs: CoberturaCoverage{
+				Packages: []*CoberturaPackage{
+					{
+						Name: "a",
+						Classes: []*CoberturaClass{
+							{Name: "a.a"},
+							{Name: "a.b"},
+						},
+					},
+					{
+						Name: "b",
+						Classes: []*CoberturaClass{
+							{Name: "b.a"},
+						},
+					},
+				},
+			},
+			lhs: CoberturaCoverage{
+				Packages: []*CoberturaPackage{
+					{
+						Name: "c",
+						Classes: []*CoberturaClass{
+							{Name: "a.a"},
+						},
+					},
+					{
+						Name: "b",
+						Classes: []*CoberturaClass{
+							{Name: "b.a"},
+							{Name: "b.b"},
+						},
+					},
+				},
+			},
+			expected: CoberturaCoverage{
+				Packages: []*CoberturaPackage{
+					{
+						Name: "a",
+						Classes: []*CoberturaClass{
+							{Name: "a.a"},
+							{Name: "a.b"},
+						},
+					},
+					{
+						Name: "b",
+						Classes: []*CoberturaClass{
+							{Name: "b.a"},
+							{Name: "b.b"},
+						},
+					},
+					{
+						Name: "c",
+						Classes: []*CoberturaClass{
+							{Name: "a.a"},
+						},
+					},
+				},
+			},
+		},
+		{
+			name: "merge methods and lines",
+			rhs: CoberturaCoverage{
+				Packages: []*CoberturaPackage{
+					{
+						Name: "a",
+						Classes: []*CoberturaClass{
+							{
+								Name: "a.a",
+								Methods: []*CoberturaMethod{
+									{
+										Name: "foo",
+										Hits: 2,
+										Lines: []*CoberturaLine{
+											{
+												Number: 13,
+												Hits:   2,
+											},
+											{
+												Number: 14,
+												Hits:   2,
+											},
+										},
+									},
+									{
+										Name: "bar",
+										Hits: 1,
+										Lines: []*CoberturaLine{
+											{
+												Number: 24,
+												Hits:   1,
+											},
+										},
+									},
+								},
+								Lines: []*CoberturaLine{
+									{
+										Number: 13,
+										Hits:   2,
+									},
+									{
+										Number: 14,
+										Hits:   2,
+									},
+									{
+										Number: 24,
+										Hits:   1,
+									},
+								},
+							},
+						},
+					},
+				},
+			},
+			lhs: CoberturaCoverage{
+				Packages: []*CoberturaPackage{
+					{
+						Name: "a",
+						Classes: []*CoberturaClass{
+							{
+								Name: "a.a",
+								Methods: []*CoberturaMethod{
+									{
+										Name: "foo",
+										Hits: 1,
+										Lines: []*CoberturaLine{
+											{
+												Number: 13,
+												Hits:   1,
+											},
+											{
+												Number: 14,
+												Hits:   1,
+											},
+										},
+									},
+									{
+										Name: "bar",
+										Hits: 1,
+										Lines: []*CoberturaLine{
+											{
+												Number: 24,
+												Hits:   1,
+											},
+										},
+									},
+								},
+								Lines: []*CoberturaLine{
+									{
+										Number: 13,
+										Hits:   1,
+									},
+									{
+										Number: 14,
+										Hits:   1,
+									},
+									{
+										Number: 24,
+										Hits:   1,
+									},
+								},
+							},
+						},
+					},
+				},
+			},
+			expected: CoberturaCoverage{
+				LinesCovered: 3,
+				LinesValid:   3,
+				Packages: []*CoberturaPackage{
+					{
+						Name: "a",
+						Classes: []*CoberturaClass{
+							{
+								Name: "a.a",
+								Methods: []*CoberturaMethod{
+									{
+										Name: "foo",
+										Hits: 3,
+										Lines: []*CoberturaLine{
+											{
+												Number: 13,
+												Hits:   3,
+											},
+											{
+												Number: 14,
+												Hits:   3,
+											},
+										},
+									},
+									{
+										Name: "bar",
+										Hits: 2,
+										Lines: []*CoberturaLine{
+											{
+												Number: 24,
+												Hits:   2,
+											},
+										},
+									},
+								},
+								Lines: []*CoberturaLine{
+									{
+										Number: 13,
+										Hits:   3,
+									},
+									{
+										Number: 14,
+										Hits:   3,
+									},
+									{
+										Number: 24,
+										Hits:   2,
+									},
+								},
+							},
+						},
+					},
+				},
+			},
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			err := tt.rhs.merge(&tt.lhs)
+			if !tt.wantErr {
+				if !assert.NoError(t, err) {
+					t.Fatal(err)
+				}
+			} else {
+				if !assert.Error(t, err) {
+					t.Fatal("error expected")
+				}
+			}
+			assert.Equal(t, tt.expected, tt.rhs)
+		})
+	}
+}
diff --git a/internal/benchrunner/errors.go b/internal/benchrunner/errors.go
new file mode 100644
index 0000000000..0a532adf46
--- /dev/null
+++ b/internal/benchrunner/errors.go
@@ -0,0 +1,18 @@
+// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+// or more contributor license agreements. Licensed under the Elastic License;
+// you may not use this file except in compliance with the Elastic License.
+
+package benchrunner
+
+import "fmt"
+
+// ErrTestCaseFailed represents a test case failure result
+type ErrTestCaseFailed struct {
+	Reason  string
+	Details string
+}
+
+// Error returns the message detailing the test case failure.
+func (e ErrTestCaseFailed) Error() string {
+	return fmt.Sprintf("test case failed: %s", e.Reason)
+}
diff --git a/internal/benchrunner/report_format.go b/internal/benchrunner/report_format.go
new file mode 100644
index 0000000000..3ec489a0a6
--- /dev/null
+++ b/internal/benchrunner/report_format.go
@@ -0,0 +1,30 @@
+// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+// or more contributor license agreements. Licensed under the Elastic License;
+// you may not use this file except in compliance with the Elastic License.
+
+package benchrunner
+
+import "fmt"
+
+// TestReportFormat represents a test report format
+type TestReportFormat string
+
+// ReportFormatFunc defines the report formatter function.
+type ReportFormatFunc func(results []TestResult) (string, []string, error)
+
+var reportFormatters = map[TestReportFormat]ReportFormatFunc{}
+
+// RegisterReporterFormat registers a test report formatter.
+func RegisterReporterFormat(name TestReportFormat, formatFunc ReportFormatFunc) {
+	reportFormatters[name] = formatFunc
+}
+
+// FormatReport delegates formatting of test results to the registered test report formatter.
+func FormatReport(name TestReportFormat, results []TestResult) (testReport string, benchmarkReports []string, err error) {
+	reportFunc, defined := reportFormatters[name]
+	if !defined {
+		return "", nil, fmt.Errorf("unregistered test report format: %s", name)
+	}
+
+	return reportFunc(results)
+}
diff --git a/internal/benchrunner/report_output.go b/internal/benchrunner/report_output.go
new file mode 100644
index 0000000000..3155c4e3a7
--- /dev/null
+++ b/internal/benchrunner/report_output.go
@@ -0,0 +1,40 @@
+// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+// or more contributor license agreements. Licensed under the Elastic License;
+// you may not use this file except in compliance with the Elastic License.
+
+package benchrunner
+
+import (
+	"fmt"
+)
+
+// TestReportOutput represents an output for a test report
+type TestReportOutput string
+
+// TestReportType represents a test report type (test, benchmark)
+type TestReportType string
+
+const (
+	ReportTypeTest  TestReportType = "test"
+	ReportTypeBench TestReportType = "bench"
+)
+
+// ReportOutputFunc defines the report writer function.
+type ReportOutputFunc func(pkg, report string, format TestReportFormat, ttype TestReportType) error
+
+var reportOutputs = map[TestReportOutput]ReportOutputFunc{}
+
+// RegisterReporterOutput registers a test report output.
+func RegisterReporterOutput(name TestReportOutput, outputFunc ReportOutputFunc) {
+	reportOutputs[name] = outputFunc
+}
+
+// WriteReport delegates writing of test results to the registered test report output
+func WriteReport(pkg string, name TestReportOutput, report string, format TestReportFormat, ttype TestReportType) error {
+	outputFunc, defined := reportOutputs[name]
+	if !defined {
+		return fmt.Errorf("unregistered test report output: %s", name)
+	}
+
+	return outputFunc(pkg, report, format, ttype)
+}
diff --git a/internal/benchrunner/reporters/formats/human.go b/internal/benchrunner/reporters/formats/human.go
new file mode 100644
index 0000000000..f493ee04c1
--- /dev/null
+++ b/internal/benchrunner/reporters/formats/human.go
@@ -0,0 +1,123 @@
+// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+// or more contributor license agreements. Licensed under the Elastic License;
+// you may not use this file except in compliance with the Elastic License.
+
+package formats
+
+import (
+	"fmt"
+	"strings"
+
+	"github.com/jedib0t/go-pretty/table"
+	"github.com/jedib0t/go-pretty/text"
+
+	"github.com/elastic/elastic-package/internal/benchrunner"
+)
+
+func init() {
+	benchrunner.RegisterReporterFormat(ReportFormatHuman, reportHumanFormat)
+}
+
+const (
+	// ReportFormatHuman reports test results in a human-readable format
+	ReportFormatHuman benchrunner.TestReportFormat = "human"
+)
+
+func reportHumanFormat(results []benchrunner.TestResult) (string, []string, error) {
+	if len(results) == 0 {
+		return "No test results", nil, nil
+	}
+
+	var benchmarks []benchrunner.BenchmarkResult
+	for _, r := range results {
+		if r.Benchmark != nil {
+			benchmarks = append(benchmarks, *r.Benchmark)
+		}
+	}
+
+	testFmtd, err := reportHumanFormatTest(results)
+	if err != nil {
+		return "", nil, err
+	}
+	benchFmtd, err := reportHumanFormatBenchmark(benchmarks)
+	if err != nil {
+		return "", nil, err
+	}
+	return testFmtd, benchFmtd, nil
+}
+
+func reportHumanFormatTest(results []benchrunner.TestResult) (string, error) {
+	var report strings.Builder
+
+	headerPrinted := false
+	for _, r := range results {
+		if r.FailureMsg == "" {
+			continue
+		}
+
+		if !headerPrinted {
+			report.WriteString("FAILURE DETAILS:\n")
+			headerPrinted = true
+		}
+
+		detail := fmt.Sprintf("%s/%s %s:\n%s\n", r.Package, r.DataStream, r.Name, r.FailureDetails)
+		report.WriteString(detail)
+	}
+	if headerPrinted {
+		report.WriteString("\n\n")
+	}
+
+	t := table.NewWriter()
+	t.AppendHeader(table.Row{"Package", "Data stream", "Test type", "Test name", "Result", "Time elapsed"})
+
+	for _, r := range results {
+		var result string
+		if r.ErrorMsg != "" {
+			result = fmt.Sprintf("ERROR: %s", r.ErrorMsg)
+		} else if r.FailureMsg != "" {
+			result = fmt.Sprintf("FAIL: %s", r.FailureMsg)
+		} else if r.Skipped != nil {
+			result = r.Skipped.String()
+		} else {
+			result = "PASS"
+		}
+
+		t.AppendRow(table.Row{r.Package, r.DataStream, r.TestType, r.Name, result, r.TimeElapsed})
+	}
+
+	t.SetStyle(table.StyleRounded)
+
+	report.WriteString(t.Render())
+	return report.String(), nil
+}
+
+func reportHumanFormatBenchmark(benchmarks []benchrunner.BenchmarkResult) ([]string, error) {
+	var textReports []string
+	for _, b := range benchmarks {
+		var report strings.Builder
+		if len(b.Parameters) > 0 {
+			report.WriteString(renderBenchmarkTable("parameters", b.Parameters) + "\n")
+		}
+		for _, test := range b.Tests {
+			report.WriteString(renderBenchmarkTable(test.Name, test.Results) + "\n")
+		}
+		textReports = append(textReports, report.String())
+	}
+	return textReports, nil
+}
+
+func renderBenchmarkTable(title string, values []benchrunner.BenchmarkValue) string {
+	t := table.NewWriter()
+	t.SetStyle(table.StyleRounded)
+	t.SetTitle(title)
+	t.SetColumnConfigs([]table.ColumnConfig{
+		{
+			Number: 2,
+			Align:  text.AlignRight,
+		},
+	})
+	for _, r := range values {
+		t.AppendRow(table.Row{r.Name, r.PrettyValue()})
+	}
+	return t.Render()
+}
diff --git a/internal/benchrunner/reporters/formats/xunit.go b/internal/benchrunner/reporters/formats/xunit.go
new file mode 100644
index 0000000000..fbcfa4512a
--- /dev/null
+++ b/internal/benchrunner/reporters/formats/xunit.go
@@ -0,0 +1,184 @@
+// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+// or more contributor license agreements. Licensed under the Elastic License;
+// you may not use this file except in compliance with the Elastic License.
+
+package formats
+
+import (
+	"encoding/xml"
+	"fmt"
+
+	"github.com/pkg/errors"
+
+	"github.com/elastic/elastic-package/internal/benchrunner"
+)
+
+func init() {
+	benchrunner.RegisterReporterFormat(ReportFormatXUnit, reportXUnitFormat)
+}
+
+const (
+	// ReportFormatXUnit reports test results in the xUnit format
+	ReportFormatXUnit benchrunner.TestReportFormat = "xUnit"
+)
+
+type testSuites struct {
+	XMLName xml.Name    `xml:"testsuites"`
+	Suites  []testSuite `xml:"testsuite"`
+}
+type testSuite struct {
+	Comment string `xml:",comment"`
+
+	Name        string `xml:"name,attr"`
+	NumTests    int    `xml:"tests,attr,omitempty"`
+	NumFailures int    `xml:"failures,attr,omitempty"`
+	NumErrors   int    `xml:"errors,attr,omitempty"`
+	NumSkipped  int    `xml:"skipped,attr,omitempty"`
+
+	Suites []testSuite `xml:"testsuite,omitempty"`
+	Cases  []testCase  `xml:"testcase,omitempty"`
+}
+type testCase struct {
+	Name          string  `xml:"name,attr"`
+	ClassName     string  `xml:"classname,attr"`
+	TimeInSeconds float64 `xml:"time,attr"`
+
+	Error   string   `xml:"error,omitempty"`
+	Failure string   `xml:"failure,omitempty"`
+	Skipped *skipped `xml:"skipped,omitempty"`
+}
+
+type skipped struct {
+	Message string `xml:"message,attr"`
+}
+
+func reportXUnitFormat(results []benchrunner.TestResult) (string, []string, error) {
+	var benchmarks []benchrunner.BenchmarkResult
+	for _, r := range results {
+		if r.Benchmark != nil {
+			benchmarks = append(benchmarks, *r.Benchmark)
+		}
+	}
+	testFmtd, err := reportXUnitFormatTest(results)
+	if err != nil {
+		return "", nil, err
+	}
+	benchFmtd, err := reportXUnitFormatBenchmark(benchmarks)
+	if err != nil {
+		return "", nil, err
+	}
+	return testFmtd, benchFmtd, nil
+}
+
+func reportXUnitFormatTest(results []benchrunner.TestResult) (string, error) {
+	// test type => package => data stream => test cases
+	tests := map[string]map[string]map[string][]testCase{}
+
+	var numTests, numFailures, numErrors, numSkipped int
+	for _, r := range results {
+		testType := string(r.TestType)
+		if _, exists := tests[testType]; !exists {
+			tests[testType] = map[string]map[string][]testCase{}
+		}
+
+		if _, exists := tests[testType][r.Package]; !exists {
+			tests[testType][r.Package] = map[string][]testCase{}
+		}
+
+		if _, exists := tests[testType][r.Package][r.DataStream]; !exists {
+			tests[testType][r.Package][r.DataStream] = make([]testCase, 0)
+		}
+
+		var failure string
+		if r.FailureMsg != "" {
+			failure = r.FailureMsg
+			numFailures++
+		}
+
+		if r.FailureDetails != "" {
+			failure += ": " + r.FailureDetails
+		}
+
+		if r.ErrorMsg != "" {
+			numErrors++
+		}
+
+		if r.Skipped != nil {
+			numSkipped++
+		}
+
+		name := fmt.Sprintf("%s test", r.TestType)
+		if r.Name != "" {
+			name += ": " + r.Name
+		}
+
+		c := testCase{
+			Name:          name,
+			ClassName:     fmt.Sprintf("%s.%s", r.Package, r.DataStream),
+			TimeInSeconds: r.TimeElapsed.Seconds(),
+			Error:         r.ErrorMsg,
+			Failure:       failure,
+		}
+
+		if r.Skipped != nil {
+			c.Skipped = &skipped{r.Skipped.String()}
+		}
+
+		numTests++
+
+		tests[testType][r.Package][r.DataStream] = append(tests[testType][r.Package][r.DataStream], c)
+	}
+
+	var ts testSuites
+	ts.Suites = make([]testSuite, 0)
+
+	for testType, packages := range tests {
+		testTypeSuite := testSuite{
+			Comment: fmt.Sprintf("test suite for %s tests", testType),
+			Name:    testType,
+
+			NumTests:    numTests,
+			NumFailures: numFailures,
+			NumErrors:   numErrors,
+			NumSkipped:  numSkipped,
+
+			Cases: make([]testCase, 0),
+		}
+
+		for _, pkg := range packages {
+			for _, ds := range pkg {
+				testTypeSuite.Cases = append(testTypeSuite.Cases, ds...)
+			}
+		}
+
+		ts.Suites = append(ts.Suites, testTypeSuite)
+	}
+
+	out, err := xml.MarshalIndent(&ts, "", "  ")
+	if err != nil {
+		return "", errors.Wrap(err, "unable to format test results as xUnit")
+	}
+
+	return xml.Header + string(out), nil
+}
+
+func reportXUnitFormatBenchmark(benchmarks []benchrunner.BenchmarkResult) ([]string, error) {
+	var reports []string
+	for _, b := range benchmarks {
+		// Filter out detailed tests. These add too much information for the
+		// aggregated nature of xUnit reports, creating a lot of noise in Jenkins.
+		var tests []benchrunner.BenchmarkTest
+		for _, t := range b.Tests {
+			if !t.Detailed {
+				tests = append(tests, t)
+			}
+		}
+		b.Tests = tests
+		out, err := xml.MarshalIndent(b, "", "  ")
+		if err != nil {
+			return nil, errors.Wrap(err, "unable to format benchmark results as xUnit")
+		}
+		reports = append(reports, xml.Header+string(out))
+	}
+	return reports, nil
+}
diff --git a/internal/benchrunner/reporters/outputs/file.go b/internal/benchrunner/reporters/outputs/file.go
new file mode 100644
index 0000000000..1294c76348
--- /dev/null
+++ b/internal/benchrunner/reporters/outputs/file.go
@@ -0,0 +1,73 @@
+// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+// or more contributor license agreements. Licensed under the Elastic License;
+// you may not use this file except in compliance with the Elastic License.
+
+package outputs
+
+import (
+	"fmt"
+	"os"
+	"path/filepath"
+	"time"
+
+	"github.com/pkg/errors"
+
+	"github.com/elastic/elastic-package/internal/benchrunner"
+	"github.com/elastic/elastic-package/internal/benchrunner/reporters/formats"
+	"github.com/elastic/elastic-package/internal/builder"
+)
+
+func init() {
+	benchrunner.RegisterReporterOutput(ReportOutputFile, reportToFile)
+}
+
+const (
+	// ReportOutputFile reports test results to files in a folder
+	ReportOutputFile benchrunner.TestReportOutput = "file"
+)
+
+func reportToFile(pkg, report string, format benchrunner.TestReportFormat, ttype benchrunner.TestReportType) error {
+	dest, err := reportsDir(ttype)
+	if err != nil {
+		return errors.Wrap(err, "could not determine test reports folder")
+	}
+
+	// Create test reports folder if it doesn't exist
+	_, err = os.Stat(dest)
+	if err != nil && errors.Is(err, os.ErrNotExist) {
+		if err := os.MkdirAll(dest, 0755); err != nil {
+			return errors.Wrapf(err, "could not create %s reports folder", ttype)
+		}
+	}
+
+	ext := "txt"
+	if format == formats.ReportFormatXUnit {
+		ext = "xml"
+	}
+	fileName := fmt.Sprintf("%s_%d.%s", pkg, time.Now().UnixNano(), ext)
+	filePath := filepath.Join(dest, fileName)
+
+	if err := os.WriteFile(filePath, []byte(report+"\n"), 0644); err != nil {
+		return errors.Wrapf(err, "could not write %s report file", ttype)
+	}
+
+	return nil
+}
+
+// reportsDir returns the location of the directory to store reports.
+func reportsDir(ttype benchrunner.TestReportType) (string, error) {
+	buildDir, err := builder.BuildDirectory()
+	if err != nil {
+		return "", errors.Wrap(err, "locating build directory failed")
+	}
+	var folder string
+	switch ttype {
+	case benchrunner.ReportTypeTest:
+		folder = "test-results"
+	case benchrunner.ReportTypeBench:
+		folder = "benchmark-results"
+	default:
+		return "", fmt.Errorf("unsupported report type: %s", ttype)
+	}
+	return filepath.Join(buildDir, folder), nil
+}
diff --git a/internal/benchrunner/reporters/outputs/stdout.go b/internal/benchrunner/reporters/outputs/stdout.go
new file mode 100644
index 0000000000..442023b82f
--- /dev/null
+++ b/internal/benchrunner/reporters/outputs/stdout.go
@@ -0,0 +1,33 @@
+// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+// or more contributor license agreements. Licensed under the Elastic License;
+// you may not use this file except in compliance with the Elastic License.
+
+package outputs
+
+import (
+	"fmt"
+
+	"github.com/elastic/elastic-package/internal/benchrunner"
+)
+
+func init() {
+	benchrunner.RegisterReporterOutput(ReportOutputSTDOUT, reportToSTDOUT)
+}
+
+const (
+	// ReportOutputSTDOUT reports test results to STDOUT
+	ReportOutputSTDOUT benchrunner.TestReportOutput = "stdout"
+)
+
+func reportToSTDOUT(pkg, report string, _ benchrunner.TestReportFormat, ttype benchrunner.TestReportType) error {
+	reportType := "Test"
+	if ttype == benchrunner.ReportTypeBench {
+		reportType = "Benchmark"
+	}
+	fmt.Printf("--- %s results for package: %s - START ---\n", reportType, pkg)
+	fmt.Println(report)
+	fmt.Printf("--- %s results for package: %s - END   ---\n", reportType, pkg)
+	fmt.Println("Done")
+
+	return nil
+}
diff --git a/internal/testrunner/runners/pipeline/benchmark.go b/internal/benchrunner/runners/pipeline/benchmark.go
similarity index 90%
rename from internal/testrunner/runners/pipeline/benchmark.go
rename to internal/benchrunner/runners/pipeline/benchmark.go
index 03d1af0d53..d2f7e5cde8 100644
--- a/internal/testrunner/runners/pipeline/benchmark.go
+++ b/internal/benchrunner/runners/pipeline/benchmark.go
@@ -13,9 +13,9 @@ import (
 
 	"github.com/pkg/errors"
 
+	"github.com/elastic/elastic-package/internal/benchrunner"
 	"github.com/elastic/elastic-package/internal/elasticsearch/ingest"
 	"github.com/elastic/elastic-package/internal/packages"
-	"github.com/elastic/elastic-package/internal/testrunner"
 )
 
 const (
@@ -37,7 +37,7 @@ const (
 	numTopProcs = 10
 )
 
-func BenchmarkPipeline(options testrunner.TestOptions) (*testrunner.BenchmarkResult, error) {
+func BenchmarkPipeline(options benchrunner.TestOptions) (*benchrunner.BenchmarkResult, error) {
 	// Load all test documents
 	docs, err := loadAllTestDocs(options.TestFolder.Path)
 	if err != nil {
@@ -68,16 +68,16 @@ func BenchmarkPipeline(options testrunner.TestOptions) (*testrunner.BenchmarkRes
 		}
 		return record.TimeInMillis * int64(time.Millisecond) / record.Count
 	}
-	asPercentageOfTotalTime := func(perf processorPerformance) testrunner.BenchmarkValue {
-		return testrunner.BenchmarkValue{
+	asPercentageOfTotalTime := func(perf processorPerformance) benchrunner.BenchmarkValue {
+		return benchrunner.BenchmarkValue{
 			Name:        perf.key,
 			Description: perf.key,
 			Unit:        "%",
 			Value:       time.Duration(perf.value).Seconds() * 100 / bench.elapsed.Seconds(),
 		}
 	}
-	asDuration := func(perf processorPerformance) testrunner.BenchmarkValue {
-		return testrunner.BenchmarkValue{
+	asDuration := func(perf processorPerformance) benchrunner.BenchmarkValue {
+		return benchrunner.BenchmarkValue{
 			Name:        perf.key,
 			Description: perf.key,
 			Value:       time.Duration(perf.value),
@@ -109,9 +109,9 @@ func BenchmarkPipeline(options testrunner.TestOptions) (*testrunner.BenchmarkRes
 	}
 
 	// Build result
-	result := &testrunner.BenchmarkResult{
+	result := &benchrunner.BenchmarkResult{
 		Name: fmt.Sprintf("pipeline benchmark for %s/%s", options.TestFolder.Package, options.TestFolder.DataStream),
-		Parameters: []testrunner.BenchmarkValue{
+		Parameters: []benchrunner.BenchmarkValue{
 			{
 				Name:  "package",
 				Value: options.TestFolder.Package,
@@ -129,10 +129,10 @@ func BenchmarkPipeline(options testrunner.TestOptions) (*testrunner.BenchmarkRes
 				Value: bench.numDocs,
 			},
 		},
-		Tests: []testrunner.BenchmarkTest{
+		Tests: []benchrunner.BenchmarkTest{
 			{
 				Name: "ingest performance",
-				Results: []testrunner.BenchmarkValue{
+				Results: []benchrunner.BenchmarkValue{
 					{
 						Name:        "ingest time",
 						Description: "time elapsed in ingest processors",
@@ -171,7 +171,7 @@ type ingestResult struct {
 	numDocs   int
 }
 
-func benchmarkIngest(options testrunner.TestOptions, baseDocs []json.RawMessage) (ingestResult, error) {
+func benchmarkIngest(options benchrunner.TestOptions, baseDocs []json.RawMessage) (ingestResult, error) {
 	if options.Benchmark.Duration == time.Duration(0) {
 		// Run with a fixed doc count
 		return runSingleBenchmark(options, resizeDocs(baseDocs, options.Benchmark.NumDocs))
@@ -206,7 +206,7 @@ type aggregation struct {
 type (
 	keyFn     func(ingest.Pipeline, ingest.Processor) string
 	valueFn   func(record ingest.StatsRecord) int64
-	mapFn     func(processorPerformance) testrunner.BenchmarkValue
+	mapFn     func(processorPerformance) benchrunner.BenchmarkValue
 	compareFn func(a, b processorPerformance) bool
 	filterFn  func(processorPerformance) bool
 )
@@ -279,18 +279,18 @@ func (agg aggregation) filter(keep filterFn) aggregation {
 	return agg
 }
 
-func (agg aggregation) collect(fn mapFn) ([]testrunner.BenchmarkValue, error) {
+func (agg aggregation) collect(fn mapFn) ([]benchrunner.BenchmarkValue, error) {
 	if agg.err != nil {
 		return nil, agg.err
 	}
-	r := make([]testrunner.BenchmarkValue, len(agg.result))
+	r := make([]benchrunner.BenchmarkValue, len(agg.result))
 	for idx := range r {
 		r[idx] = fn(agg.result[idx])
 	}
 	return r, nil
 }
 
-func runSingleBenchmark(options testrunner.TestOptions, docs []json.RawMessage) (ingestResult, error) {
+func runSingleBenchmark(options benchrunner.TestOptions, docs []json.RawMessage) (ingestResult, error) {
 	if len(docs) == 0 {
 		return ingestResult{}, errors.New("no docs supplied for benchmark")
 	}
diff --git a/internal/benchrunner/runners/pipeline/coverage.go b/internal/benchrunner/runners/pipeline/coverage.go
new file mode 100644
index 0000000000..a54b6da58b
--- /dev/null
+++ b/internal/benchrunner/runners/pipeline/coverage.go
@@ -0,0 +1,136 @@
+// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+// or more contributor license agreements. Licensed under the Elastic License;
+// you may not use this file except in compliance with the Elastic License.
+
+package pipeline
+
+import (
+	"path/filepath"
+	"strings"
+	"time"
+
+	"github.com/pkg/errors"
+
+	"github.com/elastic/elastic-package/internal/benchrunner"
+	"github.com/elastic/elastic-package/internal/elasticsearch/ingest"
+	"github.com/elastic/elastic-package/internal/packages"
+)
+
+// GetPipelineCoverage returns a coverage report for the provided set of ingest pipelines.
+func GetPipelineCoverage(options benchrunner.TestOptions, pipelines []ingest.Pipeline) (*benchrunner.CoberturaCoverage, error) {
+	dataStreamPath, found, err := packages.FindDataStreamRootForPath(options.TestFolder.Path)
+	if err != nil {
+		return nil, errors.Wrap(err, "locating data_stream root failed")
+	}
+	if !found {
+		return nil, errors.New("data stream root not found")
+	}
+
+	// Use the Node Stats API to get stats for all installed pipelines.
+	// These stats contain hit counts for all main processors in a pipeline.
+	stats, err := ingest.GetPipelineStats(options.API, pipelines)
+	if err != nil {
+		return nil, errors.Wrap(err, "error fetching pipeline stats for code coverage calculations")
+	}
+
+	// Construct the Cobertura report.
+	pkg := &benchrunner.CoberturaPackage{
+		Name: options.TestFolder.Package + "." + options.TestFolder.DataStream,
+	}
+
+	// Use the package's parent directory as base path, so that the relative paths
+	// for each class (pipeline) include the package name. This prevents paths for
+	// different packages colliding (i.e. a lot of packages have a "log" datastream
+	// and a default.yml pipeline).
+	basePath := filepath.Dir(options.PackageRootPath)
+
+	coverage := &benchrunner.CoberturaCoverage{
+		Sources: []*benchrunner.CoberturaSource{
+			{
+				Path: basePath,
+			},
+		},
+		Packages:  []*benchrunner.CoberturaPackage{pkg},
+		Timestamp: time.Now().UnixNano(),
+	}
+
+	// Calculate coverage for each pipeline
+	for _, pipeline := range pipelines {
+		covered, class, err := coverageForSinglePipeline(pipeline, stats, basePath, dataStreamPath)
+		if err != nil {
+			return nil, errors.Wrapf(err, "error calculating coverage for pipeline '%s'", pipeline.Filename())
+		}
+		pkg.Classes = append(pkg.Classes, class)
+		coverage.LinesValid += int64(len(class.Methods))
+		coverage.LinesCovered += covered
+	}
+	return coverage, nil
+}
+
+func coverageForSinglePipeline(pipeline ingest.Pipeline, stats ingest.PipelineStatsMap, basePath, dataStreamPath string) (linesCovered int64, class *benchrunner.CoberturaClass, err error) {
+	// Load the list of main processors from the pipeline source code, annotated with line numbers.
+	src, err := pipeline.Processors()
+	if err != nil {
+		return 0, nil, err
+	}
+
+	pstats, found := stats[pipeline.Name]
+	if !found {
+		return 0, nil, errors.Errorf("pipeline '%s' not installed in Elasticsearch", pipeline.Name)
+	}
+
+	// Ensure there is no inconsistency in the list of processors in stats vs obtained from source.
+	if len(src) != len(pstats.Processors) {
+		return 0, nil, errors.Errorf("processor count mismatch for %s (src:%d stats:%d)", pipeline.Filename(), len(src), len(pstats.Processors))
+	}
+	for idx, st := range pstats.Processors {
+		// Check that we have the expected type of processor, except for `compound` processors.
+		// Elasticsearch will return a `compound` processor in the case of `foreach` and
+		// any processor that defines `on_failure` processors.
+		if st.Type != "compound" && st.Type != src[idx].Type {
+			return 0, nil, errors.Errorf("processor type mismatch for %s processor %d (src:%s stats:%s)", pipeline.Filename(), idx, src[idx].Type, st.Type)
+		}
+	}
+
+	// Tests install pipelines as `filename-<nonce>` (without original extension).
+	// Use the filename part for the report.
+	pipelineName := pipeline.Name
+	if nameEnd := strings.LastIndexByte(pipelineName, '-'); nameEnd != -1 {
+		pipelineName = pipelineName[:nameEnd]
+	}
+
+	// File path has to be relative to the packagePath added to the cobertura Sources list
+	// so that the source is reachable by the report tool.
+	pipelinePath := filepath.Join(dataStreamPath, "elasticsearch", "ingest_pipeline", pipeline.Filename())
+	pipelineRelPath, err := filepath.Rel(basePath, pipelinePath)
+	if err != nil {
+		return 0, nil, errors.Wrapf(err, "cannot create relative path to pipeline file. Package root: '%s', pipeline path: '%s'", basePath, pipelinePath)
+	}
+
+	// Report every pipeline as a "class".
+	class = &benchrunner.CoberturaClass{
+		Name:     pipelineName,
+		Filename: pipelineRelPath,
+	}
+
+	// Calculate covered and total processors (reported as both lines and methods).
+	for idx, srcProc := range src {
+		if pstats.Processors[idx].Stats.Count > 0 {
+			linesCovered++
+		}
+		method := benchrunner.CoberturaMethod{
+			Name: srcProc.Type,
+			Hits: pstats.Processors[idx].Stats.Count,
+		}
+		for num := srcProc.FirstLine; num <= srcProc.LastLine; num++ {
+			line := &benchrunner.CoberturaLine{
+				Number: num,
+				Hits:   pstats.Processors[idx].Stats.Count,
+			}
+			class.Lines = append(class.Lines, line)
+			method.Lines = append(method.Lines, line)
+		}
+		class.Methods = append(class.Methods, &method)
+	}
+	return linesCovered, class, nil
+}
diff --git a/internal/benchrunner/runners/pipeline/ingest_pipeline.go b/internal/benchrunner/runners/pipeline/ingest_pipeline.go
new file mode 100644
index 0000000000..fab8fd8035
--- /dev/null
+++ b/internal/benchrunner/runners/pipeline/ingest_pipeline.go
@@ -0,0 +1,222 @@
+// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+// or more contributor license agreements. Licensed under the Elastic License;
+// you may not use this file except in compliance with the Elastic License.
+
+package pipeline
+
+import (
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"io"
+	"log"
+	"net/http"
+	"os"
+	"path/filepath"
+	"regexp"
+	"strings"
+	"time"
+
+	"github.com/pkg/errors"
+
+	"github.com/elastic/elastic-package/internal/elasticsearch"
+	"github.com/elastic/elastic-package/internal/elasticsearch/ingest"
+	"github.com/elastic/elastic-package/internal/packages"
+)
+
+var ingestPipelineTag = regexp.MustCompile(`{{\s*IngestPipeline.+}}`)
+
+type simulatePipelineRequest struct {
+	Docs []pipelineDocument `json:"docs"`
+}
+
+type pipelineDocument struct {
+	Source json.RawMessage `json:"_source"`
+}
+
+type simulatePipelineResponse struct {
+	Docs []pipelineIngestedDocument `json:"docs"`
+}
+
+type pipelineIngestedDocument struct {
+	Doc pipelineDocument `json:"doc"`
+}
+
+func installIngestPipelines(api *elasticsearch.API, dataStreamPath string) (string, []ingest.Pipeline, error) {
+	dataStreamManifest, err := packages.ReadDataStreamManifest(filepath.Join(dataStreamPath, packages.DataStreamManifestFile))
+	if err != nil {
+		return "", nil, errors.Wrap(err, "reading data stream manifest failed")
+	}
+
+	nonce := time.Now().UnixNano()
+
+	mainPipeline := getWithPipelineNameWithNonce(dataStreamManifest.GetPipelineNameOrDefault(), nonce)
+	pipelines, err := loadIngestPipelineFiles(dataStreamPath, nonce)
+	if err != nil {
+		return "", nil, errors.Wrap(err, "loading ingest pipeline files failed")
+	}
+
+	err = installPipelinesInElasticsearch(api, pipelines)
+
+	if err != nil {
+		return "", nil, errors.Wrap(err, "installing pipelines failed")
+	}
+	return mainPipeline, pipelines, nil
+}
+
+func loadIngestPipelineFiles(dataStreamPath string, nonce int64) ([]ingest.Pipeline, error) {
+	elasticsearchPath := filepath.Join(dataStreamPath, "elasticsearch", "ingest_pipeline")
+
+	var pipelineFiles []string
+	for _, pattern := range []string{"*.json", "*.yml"} {
+		files, err := filepath.Glob(filepath.Join(elasticsearchPath, pattern))
+		if err != nil {
+			return nil, errors.Wrapf(err, "listing '%s' in '%s'", pattern, elasticsearchPath)
+		}
+		pipelineFiles = append(pipelineFiles, files...)
+	}
+
+	var pipelines []ingest.Pipeline
+	for _, path := range pipelineFiles {
+		c, err := os.ReadFile(path)
+		if err != nil {
+			return nil, errors.Wrapf(err, "reading ingest pipeline failed (path: %s)", path)
+		}
+
+		c = ingestPipelineTag.ReplaceAllFunc(c, func(found []byte) []byte {
+			s := strings.Split(string(found), `"`)
+			if len(s) != 3 {
+				log.Fatalf("invalid IngestPipeline tag in template (path: %s)", path)
+			}
+			pipelineTag := s[1]
+			return []byte(getWithPipelineNameWithNonce(pipelineTag, nonce))
+		})
+		name := filepath.Base(path)
+		pipelines = append(pipelines, ingest.Pipeline{
+			Name:    getWithPipelineNameWithNonce(name[:strings.Index(name, ".")], nonce),
+			Format:  filepath.Ext(path)[1:],
+			Content: c,
+		})
+	}
+	return pipelines, nil
+}
+
+func installPipelinesInElasticsearch(api *elasticsearch.API, pipelines []ingest.Pipeline) error {
+	for _, p := range pipelines {
+		if err := installPipeline(api, p); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func installPipeline(api *elasticsearch.API, pipeline ingest.Pipeline) error {
+	if err := putIngestPipeline(api, pipeline); err != nil {
+		return err
+	}
+	// Just to be sure the pipeline has been uploaded.
+	return getIngestPipeline(api, pipeline.Name)
+}
+
+func putIngestPipeline(api *elasticsearch.API, pipeline ingest.Pipeline) error {
+	source, err := pipeline.MarshalJSON()
+	if err != nil {
+		return err
+	}
+	r, err := api.Ingest.PutPipeline(pipeline.Name, bytes.NewReader(source))
+	if err != nil {
+		return errors.Wrapf(err, "PutPipeline API call failed (pipelineName: %s)", pipeline.Name)
+	}
+	defer r.Body.Close()
+
+	body, err := io.ReadAll(r.Body)
+	if err != nil {
+		return errors.Wrapf(err, "failed to read PutPipeline API response body (pipelineName: %s)", pipeline.Name)
+	}
+
+	if r.StatusCode != http.StatusOK {
+
+		return errors.Wrapf(elasticsearch.NewError(body), "unexpected response status for PutPipeline (%d): %s (pipelineName: %s)",
+			r.StatusCode, r.Status(), pipeline.Name)
+	}
+	return nil
+}
+
+func getIngestPipeline(api *elasticsearch.API, pipelineName string) error {
+	r, err := api.Ingest.GetPipeline(func(request *elasticsearch.IngestGetPipelineRequest) {
+		request.PipelineID = pipelineName
+	})
+	if err != nil {
+		return errors.Wrapf(err, "GetPipeline API call failed (pipelineName: %s)", pipelineName)
+	}
+	defer r.Body.Close()
+
+	body, err := io.ReadAll(r.Body)
+	if err != nil {
+		return errors.Wrapf(err, "failed to read GetPipeline API response body (pipelineName: %s)", pipelineName)
+	}
+
+	if r.StatusCode != http.StatusOK {
+		return errors.Wrapf(elasticsearch.NewError(body), "unexpected response status for GetPipeline (%d): %s (pipelineName: %s)",
+			r.StatusCode, r.Status(), pipelineName)
+	}
+	return nil
+}
+
+func uninstallIngestPipelines(api *elasticsearch.API, pipelines []ingest.Pipeline) error {
+	for _, pipeline := range pipelines {
+		resp, err := api.Ingest.DeletePipeline(pipeline.Name)
+		if err != nil {
+			return errors.Wrapf(err, "DeletePipeline API call failed (pipelineName: %s)", pipeline.Name)
+		}
+		resp.Body.Close()
+	}
+	return nil
+}
+
+func getWithPipelineNameWithNonce(pipelineName string, nonce int64) string {
+	return fmt.Sprintf("%s-%d", pipelineName, nonce)
+}
+
+func simulatePipelineProcessing(api *elasticsearch.API, pipelineName string, tc *testCase) (*testResult, error) {
+	var request simulatePipelineRequest
+	for _, event := range tc.events {
+		request.Docs = append(request.Docs, pipelineDocument{
+			Source: event,
+		})
+	}
+
+	requestBody, err := json.Marshal(&request)
+	if err != nil {
+		return nil, errors.Wrap(err, "marshalling simulate request failed")
+	}
+
+	r, err := api.Ingest.Simulate(bytes.NewReader(requestBody), func(request *elasticsearch.IngestSimulateRequest) {
+		request.PipelineID = pipelineName
+	})
+	if err != nil {
+		return nil, errors.Wrapf(err, "Simulate API call failed (pipelineName: %s)", pipelineName)
+	}
+	defer r.Body.Close()
+
+	body, err := io.ReadAll(r.Body)
+	if err != nil {
+		return nil, errors.Wrap(err, "failed to read Simulate API response body")
+	}
+
+	if r.StatusCode != http.StatusOK {
+		return nil, errors.Wrapf(elasticsearch.NewError(body), "unexpected response status for Simulate (%d): %s", r.StatusCode, r.Status())
+	}
+
+	var response simulatePipelineResponse
+	err = json.Unmarshal(body, &response)
+	if err != nil {
+		return nil, errors.Wrap(err, "unmarshalling simulate request failed")
+	}
+
+	var tr testResult
+	for _, doc := range response.Docs {
+		tr.events = append(tr.events, doc.Doc.Source)
+	}
+	return &tr, nil
+}
diff --git a/internal/benchrunner/runners/pipeline/runner.go b/internal/benchrunner/runners/pipeline/runner.go
new file mode 100644
index 0000000000..a73b401f3f
--- /dev/null
+++ b/internal/benchrunner/runners/pipeline/runner.go
@@ -0,0 +1,398 @@
+// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+// or more contributor license agreements. Licensed under the Elastic License;
+// you may not use this file except in compliance with the Elastic License.
+
+package pipeline
+
+import (
+	"encoding/json"
+	"fmt"
+	"os"
+	"path/filepath"
+	"regexp"
+	"strings"
+	"time"
+
+	"github.com/pkg/errors"
+
+	"github.com/elastic/elastic-package/internal/benchrunner"
+	"github.com/elastic/elastic-package/internal/common"
+	"github.com/elastic/elastic-package/internal/elasticsearch/ingest"
+	"github.com/elastic/elastic-package/internal/fields"
+	"github.com/elastic/elastic-package/internal/logger"
+	"github.com/elastic/elastic-package/internal/multierror"
+	"github.com/elastic/elastic-package/internal/packages"
+	"github.com/elastic/elastic-package/internal/signal"
+)
+
+const (
+	// TestType defining pipeline tests
+	TestType benchrunner.TestType = "pipeline"
+)
+
+type runner struct {
+	options   benchrunner.TestOptions
+	pipelines []ingest.Pipeline
+}
+
+func (r *runner) TestFolderRequired() bool {
+	return true
+}
+
+// Type returns the type of test that can be run by this test runner.
+func (r *runner) Type() benchrunner.TestType {
+	return TestType
+}
+
+// String returns the human-friendly name of the test runner.
+func (r *runner) String() string {
+	return "pipeline"
+}
+
+// Run runs the pipeline tests defined under the given folder
+func (r *runner) Run(options benchrunner.TestOptions) ([]benchrunner.TestResult, error) {
+	r.options = options
+	return r.run()
+}
+
+// TearDown shuts down the pipeline test runner.
+func (r *runner) TearDown() error {
+	if r.options.DeferCleanup > 0 {
+		logger.Debugf("Waiting for %s before cleanup...", r.options.DeferCleanup)
+		signal.Sleep(r.options.DeferCleanup)
+	}
+
+	err := uninstallIngestPipelines(r.options.API, r.pipelines)
+	if err != nil {
+		return errors.Wrap(err, "uninstalling ingest pipelines failed")
+	}
+	return nil
+}
+
+// CanRunPerDataStream returns whether this test runner can run on individual
+// data streams within the package.
+func (r *runner) CanRunPerDataStream() bool {
+	return true
+}
+
+func (r *runner) run() ([]benchrunner.TestResult, error) {
+	testCaseFiles, err := r.listTestCaseFiles()
+	if err != nil {
+		return nil, errors.Wrap(err, "listing test case definitions failed")
+	}
+
+	dataStreamPath, found, err := packages.FindDataStreamRootForPath(r.options.TestFolder.Path)
+	if err != nil {
+		return nil, errors.Wrap(err, "locating data_stream root failed")
+	}
+	if !found {
+		return nil, errors.New("data stream root not found")
+	}
+
+	var entryPipeline string
+	entryPipeline, r.pipelines, err = installIngestPipelines(r.options.API, dataStreamPath)
+	if err != nil {
+		return nil, errors.Wrap(err, "installing ingest pipelines failed")
+	}
+
+	results := make([]benchrunner.TestResult, 0)
+	for _, testCaseFile := range testCaseFiles {
+		tr := benchrunner.TestResult{
+			TestType:   TestType,
+			Package:    r.options.TestFolder.Package,
+			DataStream: r.options.TestFolder.DataStream,
+		}
+		startTime := time.Now()
+
+		// TODO: Add tests to cover regressive use of json.Unmarshal in loadTestCaseFile.
+		// See https://github.com/elastic/elastic-package/pull/717.
+		tc, err := r.loadTestCaseFile(testCaseFile)
+		if err != nil {
+			err := errors.Wrap(err, "loading test case failed")
+			tr.ErrorMsg = err.Error()
+			results = append(results, tr)
+			continue
+		}
+		tr.Name = tc.name
+
+		if tc.config.Skip != nil {
+			logger.Warnf("skipping %s test for %s/%s: %s (details: %s)",
+				TestType, r.options.TestFolder.Package, r.options.TestFolder.DataStream,
+				tc.config.Skip.Reason, tc.config.Skip.Link.String())
+
+			tr.Skipped = tc.config.Skip
+			results = append(results, tr)
+			continue
+		}
+
+		result, err := simulatePipelineProcessing(r.options.API, entryPipeline, tc)
+		if err != nil {
+			err := errors.Wrap(err, "simulating pipeline processing failed")
+			tr.ErrorMsg = err.Error()
+			results = append(results, tr)
+			continue
+		}
+
+		tr.TimeElapsed = time.Since(startTime)
+		fieldsValidator, err := fields.CreateValidatorForDirectory(dataStreamPath,
+			fields.WithNumericKeywordFields(tc.config.NumericKeywordFields),
+			// explicitly enabled for pipeline tests only
+			// since system tests can have dynamic public IPs
+			fields.WithEnabledAllowedIPCheck(),
+		)
+		if err != nil {
+			return nil, errors.Wrapf(err, "creating fields validator for data stream failed (path: %s, test case file: %s)", dataStreamPath, testCaseFile)
+		}
+
+		// TODO: Add tests to cover regressive use of json.Unmarshal in verifyResults.
+		// See https://github.com/elastic/elastic-package/pull/717.
+		err = r.verifyResults(testCaseFile, tc.config, result, fieldsValidator)
+		if e, ok := err.(benchrunner.ErrTestCaseFailed); ok {
+			tr.FailureMsg = e.Error()
+			tr.FailureDetails = e.Details
+
+			results = append(results, tr)
+			continue
+		}
+		if err != nil {
+			err := errors.Wrap(err, "verifying test result failed")
+			tr.ErrorMsg = err.Error()
+			results = append(results, tr)
+			continue
+		}
+
+		if r.options.WithCoverage {
+			tr.Coverage, err = GetPipelineCoverage(r.options, r.pipelines)
+			if err != nil {
+				return nil, errors.Wrap(err, "error calculating pipeline coverage")
+			}
+		}
+		results = append(results, tr)
+	}
+
+	if r.options.Benchmark.Enabled {
+		start := time.Now()
+		tr := benchrunner.TestResult{
+			TestType:   TestType + " benchmark",
+			Package:    r.options.TestFolder.Package,
+			DataStream: r.options.TestFolder.DataStream,
+		}
+		if tr.Benchmark, err = BenchmarkPipeline(r.options); err != nil {
+			tr.ErrorMsg = err.Error()
+		}
+		tr.TimeElapsed = time.Since(start)
+		results = append(results, tr)
+	}
+
+	return results, nil
+}
+
+func (r *runner) listTestCaseFiles() ([]string, error) {
+	return listTestCaseFiles(r.options.TestFolder.Path)
+}
+
+func listTestCaseFiles(path string) ([]string, error) {
+	fis, err := os.ReadDir(path)
+	if err != nil {
+		return nil, errors.Wrapf(err, "reading pipeline tests failed (path: %s)", path)
+	}
+
+	var files []string
+	for _, fi := range fis {
+		if strings.HasSuffix(fi.Name(), expectedTestResultSuffix) ||
+			strings.HasSuffix(fi.Name(), configTestSuffixYAML) {
+			continue
+		}
+		files = append(files, fi.Name())
+	}
+	return files, nil
+}
+
+func (r *runner) loadTestCaseFile(testCaseFile string) (*testCase, error) {
+	return loadTestCaseFile(filepath.Join(r.options.TestFolder.Path, testCaseFile))
+}
+
+func loadTestCaseFile(testCasePath string) (*testCase, error) {
+	testCaseData, err := os.ReadFile(testCasePath)
+	if err != nil {
+		return nil, errors.Wrapf(err, "reading input file failed (testCasePath: %s)", testCasePath)
+	}
+
+	config, err := readConfigForTestCase(testCasePath)
+	if err != nil {
+		return nil, errors.Wrapf(err, "reading config for test case failed (testCasePath: %s)", testCasePath)
+	}
+
+	testCaseFile := filepath.Base(testCasePath)
+	if config.Skip != nil {
+		return &testCase{
+			name:   testCaseFile,
+			config: config,
+		}, nil
+	}
+
+	ext := filepath.Ext(testCaseFile)
+
+	var entries []json.RawMessage
+	switch ext {
+	case ".json":
+		entries, err = readTestCaseEntriesForEvents(testCaseData)
+		if err != nil {
+			return nil, errors.Wrapf(err, "reading test case entries for events failed (testCasePath: %s)", testCasePath)
+		}
+	case ".log":
+		entries, err = readTestCaseEntriesForRawInput(testCaseData, config)
+		if err != nil {
+			return nil, errors.Wrapf(err, "creating test case entries for raw input failed (testCasePath: %s)", testCasePath)
+		}
+	default:
+		return nil, fmt.Errorf("unsupported extension for test case file (ext: %s)", ext)
+	}
+
+	tc, err := createTestCase(testCaseFile, entries, config)
+	if err != nil {
+		return nil, errors.Wrapf(err, "can't create test case (testCasePath: %s)", testCasePath)
+	}
+	return tc, nil
+}
+
+func (r *runner) verifyResults(testCaseFile string, config *testConfig, result *testResult, fieldsValidator *fields.Validator) error {
+	testCasePath := filepath.Join(r.options.TestFolder.Path, testCaseFile)
+
+	if r.options.GenerateTestResult {
+		// TODO: Add tests to cover regressive use of json.Unmarshal in writeTestResult.
+		// See https://github.com/elastic/elastic-package/pull/717.
+		err := writeTestResult(testCasePath, result)
+		if err != nil {
+			return errors.Wrap(err, "writing test result failed")
+		}
+	}
+
+	err := compareResults(testCasePath, config, result)
+	if _, ok := err.(benchrunner.ErrTestCaseFailed); ok {
+		return err
+	}
+	if err != nil {
+		return errors.Wrap(err, "comparing test results failed")
+	}
+
+	result = stripEmptyTestResults(result)
+
+	err = verifyDynamicFields(result, config)
+	if err != nil {
+		return err
+	}
+
+	err = verifyFieldsInTestResult(result, fieldsValidator)
+	if err != nil {
+		return err
+	}
+	return nil
+}
+
+// stripEmptyTestResults function removes events which are nils. These nils can represent
+// documents processed by a pipeline which potentially used a "drop" processor (to drop the event at all).
+func stripEmptyTestResults(result *testResult) *testResult {
+	var tr testResult
+	for _, event := range result.events {
+		if event == nil {
+			continue
+		}
+		tr.events = append(tr.events, event)
+	}
+	return &tr
+}
+
+func verifyDynamicFields(result *testResult, config *testConfig) error {
+	if config == nil || config.DynamicFields == nil {
+		return nil
+	}
+
+	var multiErr multierror.Error
+	for _, event := range result.events {
+		var m common.MapStr
+		err := jsonUnmarshalUsingNumber(event, &m)
+		if err != nil {
+			return errors.Wrap(err, "can't unmarshal event")
+		}
+
+		for key, pattern := range config.DynamicFields {
+			val, err := m.GetValue(key)
+			if err != nil && err != common.ErrKeyNotFound {
+				return errors.Wrap(err, "can't remove dynamic field")
+			}
+
+			valStr, ok := val.(string)
+			if !ok {
+				continue // regular expressions can be verify only string values
+			}
+
+			matched, err := regexp.MatchString(pattern, valStr)
+			if err != nil {
+				return errors.Wrap(err, "pattern matching for dynamic field failed")
+			}
+
+			if !matched {
+				multiErr = append(multiErr, fmt.Errorf("dynamic field \"%s\" doesn't match the pattern (%s): %s",
+					key, pattern, valStr))
+			}
+		}
+	}
+
+	if len(multiErr) > 0 {
+		return benchrunner.ErrTestCaseFailed{
+			Reason:  "one or more problems with dynamic fields found in documents",
+			Details: multiErr.Unique().Error(),
+		}
+	}
+	return nil
+}
+
+func verifyFieldsInTestResult(result *testResult, fieldsValidator *fields.Validator) error {
+	var multiErr multierror.Error
+	for _, event := range result.events {
+		err := checkErrorMessage(event)
+		if err != nil {
+			multiErr = append(multiErr, err)
+			continue // all fields can be wrong, no need validate them
+		}
+
+		errs := fieldsValidator.ValidateDocumentBody(event)
+		if errs != nil {
+			multiErr = append(multiErr, errs...)
+		}
+	}
+
+	if len(multiErr) > 0 {
+		return benchrunner.ErrTestCaseFailed{
+			Reason:  "one or more problems with fields found in documents",
+			Details: multiErr.Unique().Error(),
+		}
+	}
+	return nil
+}
+
+func checkErrorMessage(event json.RawMessage) error {
+	var pipelineError struct {
+		Error struct {
+			Message interface{}
+		}
+	}
+	err := jsonUnmarshalUsingNumber(event, &pipelineError)
+	if err != nil {
+		return errors.Wrapf(err, "can't unmarshal event to check pipeline error: %#q", event)
+	}
+
+	switch m := pipelineError.Error.Message.(type) {
+	case nil:
+		return nil
+	case string, []string:
+		return fmt.Errorf("unexpected pipeline error: %s", m)
+	default:
+		return fmt.Errorf("unexpected pipeline error (unexpected error.message type %T): %[1]v", m)
+	}
+}
+
+func init() {
+	benchrunner.RegisterRunner(&runner{})
+}
diff --git a/internal/benchrunner/runners/pipeline/runner_test.go b/internal/benchrunner/runners/pipeline/runner_test.go
new file mode 100644
index 0000000000..d8c04ddbb0
--- /dev/null
+++ b/internal/benchrunner/runners/pipeline/runner_test.go
@@ -0,0 +1,149 @@
+// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+// or more contributor license agreements. Licensed under the Elastic License;
+// you may not use this file except in compliance with the Elastic License.
+
+package pipeline
+
+import (
+	"encoding/json"
+	"fmt"
+	"strings"
+	"testing"
+
+	"github.com/stretchr/testify/require"
+)
+
+const (
+	firstTestResult  = "first"
+	secondTestResult = "second"
+	thirdTestResult  = "third"
+
+	emptyTestResult = ""
+)
+
+func TestStripEmptyTestResults(t *testing.T) {
+	given := &testResult{
+		events: []json.RawMessage{
+			[]byte(firstTestResult),
+			nil,
+			nil,
+			[]byte(emptyTestResult),
+			[]byte(secondTestResult),
+			nil,
+			[]byte(thirdTestResult),
+			nil,
+		},
+	}
+
+	actual := stripEmptyTestResults(given)
+	require.Len(t, actual.events, 4)
+	require.Equal(t, actual.events[0], json.RawMessage(firstTestResult))
+	require.Equal(t, actual.events[1], json.RawMessage(emptyTestResult))
+	require.Equal(t, actual.events[2], json.RawMessage(secondTestResult))
+	require.Equal(t, actual.events[3], json.RawMessage(thirdTestResult))
+}
+
+var jsonUnmarshalUsingNumberTests = []struct {
+	name string
+	msg  string
+}{
+	{
+		name: "empty",
+		msg:  "", // Will error "unexpected end of JSON input".
+	},
+	{
+		name: "string",
+		msg:  `"message"`,
+	},
+	{
+		name: "array",
+		msg:  "[1,2,3,4,5]",
+	},
+	{
+		name: "object",
+		msg:  `{"key":42}`,
+	},
+	{
+		name: "object",
+		msg:  `{"key":42}answer`, // Will error "invalid character 'a' after top-level value".
+	},
+	// Test extra data whitespace parity with json.Unmarshal for error parity.
+	{
+		name: "object",
+		msg:  `{"key":42} `,
+	},
+	{
+		name: "object",
+		msg:  `{"key":42}` + "\t",
+	},
+	{
+		name: "object",
+		msg:  `{"key":42}` + "\r",
+	},
+	{
+		name: "object",
+		msg:  `{"key":42}` + "\n",
+	},
+	{
+		name: "0x1p52+1",
+		msg:  fmt.Sprint(uint64(0x1p52) + 1),
+	},
+	{
+		name: "0x1p53-1",
+		msg:  fmt.Sprint(uint64(0x1p53) - 1),
+	},
+	// The following three cases will fail if json.Unmarshal is used in place
+	// of jsonUnmarshalUsingNumber, as they are past the cutover.
+	{
+		name: "0x1p53+1",
+		msg:  fmt.Sprint(uint64(0x1p53) + 1),
+	},
+	{
+		name: "0x1p54+1",
+		msg:  fmt.Sprint(uint64(0x1p54) + 1),
+	},
+	{
+		name: "long",
+		msg:  "9223372036854773807",
+	},
+}
+
+func TestJsonUnmarshalUsingNumberRoundTrip(t *testing.T) {
+	// This tests that jsonUnmarshalUsingNumber behaves the same
+	// way as json.Unmarshal with the exception that numbers are
+	// not unmarshaled through float64. This is important to avoid
+	// low-bit truncation of long numeric values that are greater
+	// than or equal to 0x1p53, the limit of bijective equivalence
+	// with 64 bit-integers.
+
+	for _, test := range jsonUnmarshalUsingNumberTests {
+		t.Run(test.name, func(t *testing.T) {
+			var val interface{}
+			err := jsonUnmarshalUsingNumber([]byte(test.msg), &val)
+
+			// Confirm that we get the same errors with jsonUnmarshalUsingNumber
+			// as are returned by json.Unmarshal.
+			jerr := json.Unmarshal([]byte(test.msg), new(interface{}))
+			if (err == nil) != (jerr == nil) {
+				t.Errorf("unexpected error: got:%#v want:%#v", err, jerr)
+			}
+			if err != nil {
+				return
+			}
+
+			// Confirm that we round-trip the message correctly without
+			// alteration beyond trailing whitespace.
+			got, err := json.Marshal(val)
+			if err != nil {
+				t.Errorf("unexpected error: got:%#v want:%#v", err, jerr)
+			}
+			// Truncate trailing whitespace from the input since it won't
+			// be rendered in the output. This set of space characters is
+			// defined in encoding/json/scanner.go as func isSpace.
+			want := strings.TrimRight(test.msg, " \t\r\n")
+			if string(got) != want {
+				t.Errorf("unexpected result: got:%v want:%v", val, want)
+			}
+		})
+	}
+}
diff --git a/internal/benchrunner/runners/pipeline/test_case.go b/internal/benchrunner/runners/pipeline/test_case.go
new file mode 100644
index 0000000000..296b4d5bd5
--- /dev/null
+++ b/internal/benchrunner/runners/pipeline/test_case.go
@@ -0,0 +1,129 @@
+// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+// or more contributor license agreements. Licensed under the Elastic License;
+// you may not use this file except in compliance with the Elastic License.
+
+package pipeline
+
+import (
+	"bufio"
+	"bytes"
+	"encoding/json"
+	"regexp"
+	"strings"
+
+	"github.com/elastic/elastic-package/internal/common"
+
+	"github.com/pkg/errors"
+)
+
+type testCase struct {
+	name   string
+	config *testConfig
+	events []json.RawMessage
+}
+
+type testCaseDefinition struct {
+	Events []json.RawMessage `json:"events"`
+}
+
+func readTestCaseEntriesForEvents(inputData []byte) ([]json.RawMessage, error) {
+	var tcd testCaseDefinition
+	err := jsonUnmarshalUsingNumber(inputData, &tcd)
+	if err != nil {
+		return nil, errors.Wrap(err, "unmarshalling input data failed")
+	}
+	return tcd.Events, nil
+}
+
+func readTestCaseEntriesForRawInput(inputData []byte, config *testConfig) ([]json.RawMessage, error) {
+	entries, err := readRawInputEntries(inputData, config)
+	if err != nil {
+		return nil, errors.Wrap(err, "reading raw input entries failed")
+	}
+
+	var events []json.RawMessage
+	for _, entry := range entries {
+		event := map[string]interface{}{}
+		event["message"] = entry
+
+		m, err := json.Marshal(&event)
+		if err != nil {
+			return nil, errors.Wrap(err, "marshalling mocked event failed")
+		}
+		events = append(events, m)
+	}
+	return events, nil
+}
+
+func createTestCase(filename string, entries []json.RawMessage, config *testConfig) (*testCase, error) {
+	var events []json.RawMessage
+	for _, entry := range entries {
+		var m common.MapStr
+		err := jsonUnmarshalUsingNumber(entry, &m)
+		if err != nil {
+			return nil, errors.Wrap(err, "can't unmarshal test case entry")
+		}
+
+		for k, v := range config.Fields {
+			_, err = m.Put(k, v)
+			if err != nil {
+				return nil, errors.Wrap(err, "can't set custom field")
+			}
+		}
+
+		event, err := json.Marshal(&m)
+		if err != nil {
+			return nil, errors.Wrap(err, "marshalling event failed")
+		}
+		events = append(events, event)
+	}
+	return &testCase{
+		name:   filename,
+		config: config,
+		events: events,
+	}, nil
+}
+
+func readRawInputEntries(inputData []byte, c *testConfig) ([]string, error) {
+	var inputDataEntries []string
+
+	var builder strings.Builder
+	scanner := bufio.NewScanner(bytes.NewReader(inputData))
+	for scanner.Scan() {
+		line := scanner.Text()
+
+		var body string
+		if c.Multiline != nil && c.Multiline.FirstLinePattern != "" {
+			matched, err := regexp.MatchString(c.Multiline.FirstLinePattern, line)
+			if err != nil {
+				return nil, errors.Wrapf(err, "regexp matching failed (pattern: %s)", c.Multiline.FirstLinePattern)
+			}
+
+			if matched {
+				body = builder.String()
+				builder.Reset()
+			}
+			if builder.Len() > 0 {
+				builder.WriteByte('\n')
+			}
+			builder.WriteString(line)
+			if !matched || body == "" {
+				continue
+			}
+		} else {
+			body = line
+		}
+
+		inputDataEntries = append(inputDataEntries, body)
+	}
+	err := scanner.Err()
+	if err != nil {
+		return nil, errors.Wrap(err, "reading raw input test file failed")
+	}
+
+	lastEntry := builder.String()
+	if len(lastEntry) > 0 {
+		inputDataEntries = append(inputDataEntries, lastEntry)
+	}
+	return inputDataEntries, nil
+}
diff --git a/internal/benchrunner/runners/pipeline/test_config.go b/internal/benchrunner/runners/pipeline/test_config.go
new file mode 100644
index 0000000000..9ec0a2530c
--- /dev/null
+++ b/internal/benchrunner/runners/pipeline/test_config.go
@@ -0,0 +1,72 @@
+// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+// or more contributor license agreements. Licensed under the Elastic License;
+// you may not use this file except in compliance with the Elastic License.
+
+package pipeline
+
+import (
+	"fmt"
+	"os"
+	"path/filepath"
+
+	"github.com/elastic/go-ucfg/yaml"
+	"github.com/pkg/errors"
+
+	"github.com/elastic/elastic-package/internal/benchrunner"
+)
+
+const (
+	configTestSuffixYAML = "-config.yml"
+	commonTestConfigYAML = "test-common-config.yml"
+)
+
+type testConfig struct {
+	benchrunner.SkippableConfig `config:",inline"`
+
+	Multiline     *multiline             `config:"multiline"`
+	Fields        map[string]interface{} `config:"fields"`
+	DynamicFields map[string]string      `config:"dynamic_fields"`
+
+	// NumericKeywordFields holds a list of fields that have keyword
+	// type but can be ingested as numeric type.
+	NumericKeywordFields []string `config:"numeric_keyword_fields"`
+}
+
+type multiline struct {
+	FirstLinePattern string `config:"first_line_pattern"`
+}
+
+func readConfigForTestCase(testCasePath string) (*testConfig, error) {
+	testCaseDir := filepath.Dir(testCasePath)
+	testCaseFile := filepath.Base(testCasePath)
+
+	commonConfigPath := filepath.Join(testCaseDir, commonTestConfigYAML)
+	var c testConfig
+	cfg, err := yaml.NewConfigWithFile(commonConfigPath)
+	if err != nil && !errors.Is(err, os.ErrNotExist) {
+		return nil, errors.Wrapf(err, "can't load common configuration: %s", commonConfigPath)
+	}
+
+	if err == nil {
+		if err := cfg.Unpack(&c); err != nil {
+			return nil, errors.Wrapf(err, "can't unpack test configuration: %s", commonConfigPath)
+		}
+	}
+
+	configPath := filepath.Join(testCaseDir, expectedTestConfigFile(testCaseFile, configTestSuffixYAML))
+	cfg, err = yaml.NewConfigWithFile(configPath)
+	if err != nil && !errors.Is(err, os.ErrNotExist) {
+		return nil, errors.Wrapf(err, "can't load test configuration: %s", configPath)
+	}
+
+	if err == nil {
+		if err := cfg.Unpack(&c); err != nil {
+			return nil, errors.Wrapf(err, "can't unpack test configuration: %s", configPath)
+		}
+	}
+	return &c, nil
+}
+
+func expectedTestConfigFile(testFile, configTestSuffix string) string {
+	return fmt.Sprintf("%s%s", testFile, configTestSuffix)
+}
diff --git a/internal/benchrunner/runners/pipeline/test_result.go b/internal/benchrunner/runners/pipeline/test_result.go
new file mode 100644
index 0000000000..b3760e0bb3
--- /dev/null
+++ b/internal/benchrunner/runners/pipeline/test_result.go
@@ -0,0 +1,260 @@
+// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+// or more contributor license agreements. Licensed under the Elastic License;
+// you may not use this file except in compliance with the Elastic License.
+
+package pipeline
+
+import (
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"io"
+	"os"
+	"path/filepath"
+
+	"github.com/google/go-cmp/cmp"
+	"github.com/pkg/errors"
+	"github.com/pmezard/go-difflib/difflib"
+
+	"github.com/elastic/elastic-package/internal/benchrunner"
+	"github.com/elastic/elastic-package/internal/common"
+)
+
+const expectedTestResultSuffix = "-expected.json"
+
+type testResult struct {
+	events []json.RawMessage
+}
+
+type testResultDefinition struct {
+	Expected []json.RawMessage `json:"expected"`
+}
+
+func writeTestResult(testCasePath string, result *testResult) error {
+	testCaseDir := filepath.Dir(testCasePath)
+	testCaseFile := filepath.Base(testCasePath)
+
+	data, err := marshalTestResultDefinition(result)
+	if err != nil {
+		return errors.Wrap(err, "marshalling test result failed")
+	}
+	err = os.WriteFile(filepath.Join(testCaseDir, expectedTestResultFile(testCaseFile)), data, 0644)
+	if err != nil {
+		return errors.Wrap(err, "writing test result failed")
+	}
+	return nil
+}
+
+func compareResults(testCasePath string, config *testConfig, result *testResult) error {
+	resultsWithoutDynamicFields, err := adjustTestResult(result, config)
+	if err != nil {
+		return errors.Wrap(err, "can't adjust test results")
+	}
+
+	actual, err := marshalTestResultDefinition(resultsWithoutDynamicFields)
+	if err != nil {
+		return errors.Wrap(err, "marshalling actual test results failed")
+	}
+
+	expectedResults, err := readExpectedTestResult(testCasePath, config)
+	if err != nil {
+		return errors.Wrap(err, "reading expected test result failed")
+	}
+
+	expected, err := marshalTestResultDefinition(expectedResults)
+	if err != nil {
+		return errors.Wrap(err, "marshalling expected test results failed")
+	}
+
+	report, err := diffJson(expected, actual)
+	if err != nil {
+		return errors.Wrap(err, "comparing expected test result")
+	}
+	if report != "" {
+		return benchrunner.ErrTestCaseFailed{
+			Reason:  "Expected results are different from actual ones",
+			Details: report,
+		}
+	}
+
+	return nil
+}
+
+func compareJsonNumbers(a, b json.Number) bool {
+	if a == b {
+		// Equal literals, so they are the same.
+		return true
+	}
+	if inta, err := a.Int64(); err == nil {
+		if intb, err := b.Int64(); err == nil {
+			return inta == intb
+		}
+		if floatb, err := b.Float64(); err == nil {
+			return float64(inta) == floatb
+		}
+	} else if floata, err := a.Float64(); err == nil {
+		if intb, err := b.Int64(); err == nil {
+			return floata == float64(intb)
+		}
+		if floatb, err := b.Float64(); err == nil {
+			return floata == floatb
+		}
+	}
+	return false
+}
+
+func diffJson(want, got []byte) (string, error) {
+	var gotVal, wantVal interface{}
+	err := jsonUnmarshalUsingNumber(want, &wantVal)
+	if err != nil {
+		return "", fmt.Errorf("invalid want value: %w", err)
+	}
+	err = jsonUnmarshalUsingNumber(got, &gotVal)
+	if err != nil {
+		return "", fmt.Errorf("invalid got value: %w", err)
+	}
+	if cmp.Equal(gotVal, wantVal, cmp.Comparer(compareJsonNumbers)) {
+		return "", nil
+	}
+
+	got, err = marshalNormalizedJSON(gotVal)
+	if err != nil {
+		return "", err
+	}
+	want, err = marshalNormalizedJSON(wantVal)
+	if err != nil {
+		return "", err
+	}
+
+	var buf bytes.Buffer
+	err = difflib.WriteUnifiedDiff(&buf, difflib.UnifiedDiff{
+		A:        difflib.SplitLines(string(want)),
+		B:        difflib.SplitLines(string(got)),
+		FromFile: "want",
+		ToFile:   "got",
+		Context:  3,
+	})
+	return buf.String(), err
+}
+
+func readExpectedTestResult(testCasePath string, config *testConfig) (*testResult, error) {
+	testCaseDir := filepath.Dir(testCasePath)
+	testCaseFile := filepath.Base(testCasePath)
+
+	path := filepath.Join(testCaseDir, expectedTestResultFile(testCaseFile))
+	data, err := os.ReadFile(path)
+	if err != nil {
+		return nil, errors.Wrap(err, "reading test result file failed")
+	}
+
+	u, err := unmarshalTestResult(data)
+	if err != nil {
+		return nil, errors.Wrap(err, "unmarshalling expected test result failed")
+	}
+
+	adjusted, err := adjustTestResult(u, config)
+	if err != nil {
+		return nil, errors.Wrap(err, "adjusting test result failed")
+	}
+	return adjusted, nil
+}
+
+func adjustTestResult(result *testResult, config *testConfig) (*testResult, error) {
+	if config == nil || config.DynamicFields == nil {
+		return result, nil
+	}
+
+	// Strip dynamic fields from test result
+	var stripped testResult
+	for _, event := range result.events {
+		if event == nil {
+			stripped.events = append(stripped.events, nil)
+			continue
+		}
+
+		var m common.MapStr
+		err := jsonUnmarshalUsingNumber(event, &m)
+		if err != nil {
+			return nil, errors.Wrapf(err, "can't unmarshal event: %s", string(event))
+		}
+
+		for key := range config.DynamicFields {
+			err := m.Delete(key)
+			if err != nil && err != common.ErrKeyNotFound {
+				return nil, errors.Wrap(err, "can't remove dynamic field")
+			}
+		}
+
+		b, err := json.Marshal(&m)
+		if err != nil {
+			return nil, errors.Wrap(err, "can't marshal event")
+		}
+
+		stripped.events = append(stripped.events, b)
+	}
+	return &stripped, nil
+}
+
+func unmarshalTestResult(body []byte) (*testResult, error) {
+	var trd testResultDefinition
+	err := jsonUnmarshalUsingNumber(body, &trd)
+	if err != nil {
+		return nil, errors.Wrap(err, "unmarshalling test result failed")
+	}
+
+	var tr testResult
+	tr.events = append(tr.events, trd.Expected...)
+	return &tr, nil
+}
+
+// jsonUnmarshalUsingNumber is a drop-in replacement for json.Unmarshal that
+// does not default to unmarshaling numeric values to float64 in order to
+// prevent low bit truncation of values greater than 1<<53.
+// See https://golang.org/cl/6202068 for details.
+func jsonUnmarshalUsingNumber(data []byte, v interface{}) error {
+	dec := json.NewDecoder(bytes.NewReader(data))
+	dec.UseNumber()
+	err := dec.Decode(v)
+	if err != nil {
+		if err == io.EOF {
+			return errors.New("unexpected end of JSON input")
+		}
+		return err
+	}
+	// Make sure there is no more data after the message
+	// to approximate json.Unmarshal's behaviour.
+	if dec.More() {
+		return fmt.Errorf("more data after top-level value")
+	}
+	return nil
+}
+
+func marshalTestResultDefinition(result *testResult) ([]byte, error) {
+	var trd testResultDefinition
+	trd.Expected = result.events
+	body, err := marshalNormalizedJSON(trd)
+	if err != nil {
+		return nil, errors.Wrap(err, "marshalling test result definition failed")
+	}
+	return body, nil
+}
+
+// marshalNormalizedJSON marshals test results ensuring that field
+// order remains consistent independent of field order returned by
+// ES to minimize diff noise during changes.
+func marshalNormalizedJSON(v interface{}) ([]byte, error) {
+	msg, err := json.Marshal(v)
+	if err != nil {
+		return msg, err
+	}
+	var obj interface{}
+	err = jsonUnmarshalUsingNumber(msg, &obj)
+	if err != nil {
+		return msg, err
+	}
+	return json.MarshalIndent(obj, "", "    ")
+}
+
+func expectedTestResultFile(testFile string) string {
+	return fmt.Sprintf("%s%s", testFile, expectedTestResultSuffix)
+}
diff --git a/internal/benchrunner/runners/pipeline/test_result_test.go b/internal/benchrunner/runners/pipeline/test_result_test.go
new file mode 100644
index 0000000000..4f57135938
--- /dev/null
+++ b/internal/benchrunner/runners/pipeline/test_result_test.go
@@ -0,0 +1,45 @@
+// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+// or more contributor license agreements. Licensed under the Elastic License;
+// you may not use this file except in compliance with the Elastic License.
+
+package pipeline
+
+import (
+	"encoding/json"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func TestCompareJsonNumber(t *testing.T) {
+	cases := []struct {
+		want  json.Number
+		got   json.Number
+		equal bool
+	}{
+		{"0", "0", true},
+		{"0.0", "0", true},
+		{"0", "0.0", true},
+		{"42", "42", true},
+		{"42.0", "42", true},
+		{"42", "42.0", true},
+		{"0.42", "0.42", true},
+		{"-10", "-10", true},
+		{"-10.0", "-10", true},
+		{"6920071768563516000", "6920071768563516000", true},
+		{"6920071768563516847", "6920071768563516847", true},
+		{"1624617166.182", "1.624617166182E9", true},
+
+		{"0", "1", false},
+		{"0.1", "0", false},
+		{"6920071768563516000", "6920071768563516847", false},
+		{"1624617166.182", "1.624617166181E9", false},
+	}
+
+	for _, c := range cases {
+		t.Run(c.want.String()+" == "+c.got.String(), func(t *testing.T) {
+			equal := compareJsonNumbers(c.want, c.got)
+			assert.Equal(t, c.equal, equal)
+		})
+	}
+}
diff --git a/internal/benchrunner/runners/runners.go b/internal/benchrunner/runners/runners.go
new file mode 100644
index 0000000000..21da99db8a
--- /dev/null
+++ b/internal/benchrunner/runners/runners.go
@@ -0,0 +1,13 @@
+// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+// or more contributor license agreements. Licensed under the Elastic License;
+// you may not use this file except in compliance with the Elastic License.
+
+package runners
+
+import (
+	// Registered test runners
+	_ "github.com/elastic/elastic-package/internal/benchrunner/runners/asset"
+	_ "github.com/elastic/elastic-package/internal/benchrunner/runners/pipeline"
+	_ "github.com/elastic/elastic-package/internal/benchrunner/runners/static"
+	_ "github.com/elastic/elastic-package/internal/benchrunner/runners/system"
+)
diff --git a/internal/benchrunner/test_config.go b/internal/benchrunner/test_config.go
new file mode 100644
index 0000000000..8f57dee91e
--- /dev/null
+++ b/internal/benchrunner/test_config.go
@@ -0,0 +1,30 @@
+// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+// or more contributor license agreements. Licensed under the Elastic License;
+// you may not use this file except in compliance with the Elastic License.
+
+package benchrunner
+
+import (
+	"fmt"
+	"net/url"
+)
+
+// SkipConfig allows a test to be marked as skipped
+type SkipConfig struct {
+	// Reason is the short reason for why this test should be skipped.
+	Reason string `config:"reason"`
+
+	// Link is a URL where more details about the skipped test can be found.
+	Link url.URL `config:"url"`
+}
+
+func (s SkipConfig) String() string {
+	return fmt.Sprintf("%s [%s]", s.Reason, s.Link.String())
+}
+
+// SkippableConfig is a test configuration that allows skipping. This
+// struct is intended for embedding in concrete test configuration structs.
+type SkippableConfig struct {
+	// Skip allows this test to be skipped.
+	Skip *SkipConfig `config:"skip"`
+}
diff --git a/internal/cobraext/flags.go b/internal/cobraext/flags.go
index 6ef4c22661..76ec0aa7d6 100644
--- a/internal/cobraext/flags.go
+++ b/internal/cobraext/flags.go
@@ -131,9 +131,6 @@ const (
 	TestCoverageFlagName        = "test-coverage"
 	TestCoverageFlagDescription = "generate Cobertura test coverage reports"
 
-	TestBenchFlagName        = "bench"
-	TestBenchFlagDescription = "run benchmarks"
-
 	TestBenchCountFlagName        = "bench-count"
 	TestBenchCountFlagDescription = "fixed number of docs to use for benchmark"
 
diff --git a/internal/testrunner/report_format.go b/internal/testrunner/report_format.go
index 746952c598..0a014eff5a 100644
--- a/internal/testrunner/report_format.go
+++ b/internal/testrunner/report_format.go
@@ -10,7 +10,7 @@ import "fmt"
 type TestReportFormat string
 
 // ReportFormatFunc defines the report formatter function.
-type ReportFormatFunc func(results []TestResult) (string, []string, error)
+type ReportFormatFunc func(results []TestResult) (string, error)
 
 var reportFormatters = map[TestReportFormat]ReportFormatFunc{}
 
@@ -19,11 +19,11 @@ func RegisterReporterFormat(name TestReportFormat, formatFunc ReportFormatFunc)
 	reportFormatters[name] = formatFunc
 }
 
-// FormatReport delegates formatting of test results to the registered test report formatter.
-func FormatReport(name TestReportFormat, results []TestResult) (testReport string, benchmarkReports []string, err error) {
+// FormatReport delegates formatting of test results to the registered test report formatter
+func FormatReport(name TestReportFormat, results []TestResult) (string, error) {
 	reportFunc, defined := reportFormatters[name]
 	if !defined {
-		return "", nil, fmt.Errorf("unregistered test report format: %s", name)
+		return "", fmt.Errorf("unregistered test report format: %s", name)
 	}
 
 	return reportFunc(results)
diff --git a/internal/testrunner/report_output.go b/internal/testrunner/report_output.go
index cb0002a584..57e55cceb6 100644
--- a/internal/testrunner/report_output.go
+++ b/internal/testrunner/report_output.go
@@ -11,16 +11,8 @@ import (
 // TestReportOutput represents an output for a test report
 type TestReportOutput string
 
-// TestReportType represents a test report type (test, benchmark)
-type TestReportType string
-
-const (
-	ReportTypeTest  TestReportType = "test"
-	ReportTypeBench TestReportType = "bench"
-)
-
 // ReportOutputFunc defines the report writer function.
-type ReportOutputFunc func(pkg, report string, format TestReportFormat, ttype TestReportType) error
+type ReportOutputFunc func(pkg, report string, format TestReportFormat) error
 
 var reportOutputs = map[TestReportOutput]ReportOutputFunc{}
 
@@ -30,11 +22,11 @@ func RegisterReporterOutput(name TestReportOutput, outputFunc ReportOutputFunc)
 }
 
 // WriteReport delegates writing of test results to the registered test report output
-func WriteReport(pkg string, name TestReportOutput, report string, format TestReportFormat, ttype TestReportType) error {
+func WriteReport(pkg string, name TestReportOutput, report string, format TestReportFormat) error {
 	outputFunc, defined := reportOutputs[name]
 	if !defined {
 		return fmt.Errorf("unregistered test report output: %s", name)
 	}
 
-	return outputFunc(pkg, report, format, ttype)
+	return outputFunc(pkg, report, format)
 }
diff --git a/internal/testrunner/reporters/formats/human.go b/internal/testrunner/reporters/formats/human.go
index dcc4916ed2..fc0a01f97a 100644
--- a/internal/testrunner/reporters/formats/human.go
+++ b/internal/testrunner/reporters/formats/human.go
@@ -9,7 +9,6 @@ import (
 	"strings"
 
 	"github.com/jedib0t/go-pretty/table"
-	"github.com/jedib0t/go-pretty/text"
 
 	"github.com/elastic/elastic-package/internal/testrunner"
 )
@@ -23,30 +22,11 @@ const (
 	ReportFormatHuman testrunner.TestReportFormat = "human"
 )
 
-func reportHumanFormat(results []testrunner.TestResult) (string, []string, error) {
+func reportHumanFormat(results []testrunner.TestResult) (string, error) {
 	if len(results) == 0 {
-		return "No test results", nil, nil
+		return "No test results", nil
 	}
 
-	var benchmarks []testrunner.BenchmarkResult
-	for _, r := range results {
-		if r.Benchmark != nil {
-			benchmarks = append(benchmarks, *r.Benchmark)
-		}
-	}
-
-	testFmtd, err := reportHumanFormatTest(results)
-	if err != nil {
-		return "", nil, err
-	}
-	benchFmtd, err := reportHumanFormatBenchmark(benchmarks)
-	if err != nil {
-		return "", nil, err
-	}
-	return testFmtd, benchFmtd, nil
-}
-
-func reportHumanFormatTest(results []testrunner.TestResult) (string, error) {
 	var report strings.Builder
 
 	headerPrinted := false
@@ -90,34 +70,3 @@ func reportHumanFormatTest(results []testrunner.TestResult) (string, error) {
 	report.WriteString(t.Render())
 	return report.String(), nil
 }
-
-func reportHumanFormatBenchmark(benchmarks []testrunner.BenchmarkResult) ([]string, error) {
-	var textReports []string
-	for _, b := range benchmarks {
-		var report strings.Builder
-		if len(b.Parameters) > 0 {
-			report.WriteString(renderBenchmarkTable("parameters", b.Parameters) + "\n")
-		}
-		for _, test := range b.Tests {
-			report.WriteString(renderBenchmarkTable(test.Name, test.Results) + "\n")
-		}
-		textReports = append(textReports, report.String())
-	}
-	return textReports, nil
-}
-
-func renderBenchmarkTable(title string, values []testrunner.BenchmarkValue) string {
-	t := table.NewWriter()
-	t.SetStyle(table.StyleRounded)
-	t.SetTitle(title)
-	t.SetColumnConfigs([]table.ColumnConfig{
-		{
-			Number: 2,
-			Align:  text.AlignRight,
-		},
-	})
-	for _, r := range values {
-		t.AppendRow(table.Row{r.Name, r.PrettyValue()})
-	}
-	return t.Render()
-}
diff --git a/internal/testrunner/reporters/formats/xunit.go b/internal/testrunner/reporters/formats/xunit.go
index 52f6ba1b86..e2255b0b57 100644
--- a/internal/testrunner/reporters/formats/xunit.go
+++ b/internal/testrunner/reporters/formats/xunit.go
@@ -52,25 +52,7 @@ type skipped struct {
 	Message string `xml:"message,attr"`
 }
 
-func reportXUnitFormat(results []testrunner.TestResult) (string, []string, error) {
-	var benchmarks []testrunner.BenchmarkResult
-	for _, r := range results {
-		if r.Benchmark != nil {
-			benchmarks = append(benchmarks, *r.Benchmark)
-		}
-	}
-	testFmtd, err := reportXUnitFormatTest(results)
-	if err != nil {
-		return "", nil, err
-	}
-	benchFmtd, err := reportXUnitFormatBenchmark(benchmarks)
-	if err != nil {
-		return "", nil, err
-	}
-	return testFmtd, benchFmtd, nil
-}
-
-func reportXUnitFormatTest(results []testrunner.TestResult) (string, error) {
+func reportXUnitFormat(results []testrunner.TestResult) (string, error) {
 	// test type => package => data stream => test cases
 	tests := map[string]map[string]map[string][]testCase{}
 
@@ -161,24 +143,3 @@ func reportXUnitFormatTest(results []testrunner.TestResult) (string, error) {
 
 	return xml.Header + string(out), nil
 }
-
-func reportXUnitFormatBenchmark(benchmarks []testrunner.BenchmarkResult) ([]string, error) {
-	var reports []string
-	for _, b := range benchmarks {
-		// Filter out detailed tests. These add too much information for the
-		// aggregated nature of xUnit reports, creating a lot of noise in Jenkins.
-		var tests []testrunner.BenchmarkTest
-		for _, t := range b.Tests {
-			if !t.Detailed {
-				tests = append(tests, t)
-			}
-		}
-		b.Tests = tests
-		out, err := xml.MarshalIndent(b, "", "  ")
-		if err != nil {
-			return nil, errors.Wrap(err, "unable to format benchmark results as xUnit")
-		}
-		reports = append(reports, xml.Header+string(out))
-	}
-	return reports, nil
-}
diff --git a/internal/testrunner/reporters/outputs/file.go b/internal/testrunner/reporters/outputs/file.go
index b90df02876..232d714263 100644
--- a/internal/testrunner/reporters/outputs/file.go
+++ b/internal/testrunner/reporters/outputs/file.go
@@ -26,17 +26,16 @@ const (
 	ReportOutputFile testrunner.TestReportOutput = "file"
 )
 
-func reportToFile(pkg, report string, format testrunner.TestReportFormat, ttype testrunner.TestReportType) error {
-	dest, err := reportsDir(ttype)
+func reportToFile(pkg, report string, format testrunner.TestReportFormat) error {
+	dest, err := testReportsDir()
 	if err != nil {
 		return errors.Wrap(err, "could not determine test reports folder")
 	}
-
 	// Create test reports folder if it doesn't exist
 	_, err = os.Stat(dest)
 	if err != nil && errors.Is(err, os.ErrNotExist) {
 		if err := os.MkdirAll(dest, 0755); err != nil {
-			return errors.Wrapf(err, "could not create %s reports folder", ttype)
+			return errors.Wrap(err, "could not create test reports folder")
 		}
 	}
 
@@ -44,30 +43,22 @@ func reportToFile(pkg, report string, format testrunner.TestReportFormat, ttype
 	if format == formats.ReportFormatXUnit {
 		ext = "xml"
 	}
+
 	fileName := fmt.Sprintf("%s_%d.%s", pkg, time.Now().UnixNano(), ext)
 	filePath := filepath.Join(dest, fileName)
 
 	if err := os.WriteFile(filePath, []byte(report+"\n"), 0644); err != nil {
-		return errors.Wrapf(err, "could not write %s report file", ttype)
+		return errors.Wrap(err, "could not write report file")
 	}
 
 	return nil
 }
 
-// reportsDir returns the location of the directory to store reports.
-func reportsDir(ttype testrunner.TestReportType) (string, error) {
+// testReportsDir returns the location of the directory to store test reports.
+func testReportsDir() (string, error) {
 	buildDir, err := builder.BuildDirectory()
 	if err != nil {
 		return "", errors.Wrap(err, "locating build directory failed")
 	}
-	var folder string
-	switch ttype {
-	case testrunner.ReportTypeTest:
-		folder = "test-results"
-	case testrunner.ReportTypeBench:
-		folder = "benchmark-results"
-	default:
-		return "", fmt.Errorf("unsupported report type: %s", ttype)
-	}
-	return filepath.Join(buildDir, folder), nil
+	return filepath.Join(buildDir, "test-results"), nil
 }
diff --git a/internal/testrunner/reporters/outputs/stdout.go b/internal/testrunner/reporters/outputs/stdout.go
index 226cafd337..db243a2382 100644
--- a/internal/testrunner/reporters/outputs/stdout.go
+++ b/internal/testrunner/reporters/outputs/stdout.go
@@ -19,14 +19,10 @@ const (
 	ReportOutputSTDOUT testrunner.TestReportOutput = "stdout"
 )
 
-func reportToSTDOUT(pkg, report string, _ testrunner.TestReportFormat, ttype testrunner.TestReportType) error {
-	reportType := "Test"
-	if ttype == testrunner.ReportTypeBench {
-		reportType = "Benchmark"
-	}
-	fmt.Printf("--- %s results for package: %s - START ---\n", reportType, pkg)
+func reportToSTDOUT(pkg, report string, _ testrunner.TestReportFormat) error {
+	fmt.Printf("--- Test results for package: %s - START ---\n", pkg)
 	fmt.Println(report)
-	fmt.Printf("--- %s results for package: %s - END   ---\n", reportType, pkg)
+	fmt.Printf("--- Test results for package: %s - END   ---\n", pkg)
 	fmt.Println("Done")
 
 	return nil
diff --git a/internal/testrunner/runners/pipeline/runner.go b/internal/testrunner/runners/pipeline/runner.go
index 37956245c0..bb76cc0f2d 100644
--- a/internal/testrunner/runners/pipeline/runner.go
+++ b/internal/testrunner/runners/pipeline/runner.go
@@ -170,31 +170,13 @@ func (r *runner) run() ([]testrunner.TestResult, error) {
 		results = append(results, tr)
 	}
 
-	if r.options.Benchmark.Enabled {
-		start := time.Now()
-		tr := testrunner.TestResult{
-			TestType:   TestType + " benchmark",
-			Package:    r.options.TestFolder.Package,
-			DataStream: r.options.TestFolder.DataStream,
-		}
-		if tr.Benchmark, err = BenchmarkPipeline(r.options); err != nil {
-			tr.ErrorMsg = err.Error()
-		}
-		tr.TimeElapsed = time.Since(start)
-		results = append(results, tr)
-	}
-
 	return results, nil
 }
 
 func (r *runner) listTestCaseFiles() ([]string, error) {
-	return listTestCaseFiles(r.options.TestFolder.Path)
-}
-
-func listTestCaseFiles(path string) ([]string, error) {
-	fis, err := os.ReadDir(path)
+	fis, err := os.ReadDir(r.options.TestFolder.Path)
 	if err != nil {
-		return nil, errors.Wrapf(err, "reading pipeline tests failed (path: %s)", path)
+		return nil, errors.Wrapf(err, "reading pipeline tests failed (path: %s)", r.options.TestFolder.Path)
 	}
 
 	var files []string
@@ -209,10 +191,7 @@ func listTestCaseFiles(path string) ([]string, error) {
 }
 
 func (r *runner) loadTestCaseFile(testCaseFile string) (*testCase, error) {
-	return loadTestCaseFile(filepath.Join(r.options.TestFolder.Path, testCaseFile))
-}
-
-func loadTestCaseFile(testCasePath string) (*testCase, error) {
+	testCasePath := filepath.Join(r.options.TestFolder.Path, testCaseFile)
 	testCaseData, err := os.ReadFile(testCasePath)
 	if err != nil {
 		return nil, errors.Wrapf(err, "reading input file failed (testCasePath: %s)", testCasePath)
@@ -223,7 +202,6 @@ func loadTestCaseFile(testCasePath string) (*testCase, error) {
 		return nil, errors.Wrapf(err, "reading config for test case failed (testCasePath: %s)", testCasePath)
 	}
 
-	testCaseFile := filepath.Base(testCasePath)
 	if config.Skip != nil {
 		return &testCase{
 			name:   testCaseFile,
diff --git a/internal/testrunner/testrunner.go b/internal/testrunner/testrunner.go
index d6ab320f24..3c6fb4f1d8 100644
--- a/internal/testrunner/testrunner.go
+++ b/internal/testrunner/testrunner.go
@@ -30,7 +30,6 @@ type TestOptions struct {
 	DeferCleanup   time.Duration
 	ServiceVariant string
 	WithCoverage   bool
-	Benchmark      BenchmarkConfig
 }
 
 // TestRunner is the interface all test runners must implement.
@@ -91,9 +90,6 @@ type TestResult struct {
 
 	// Coverage details in Cobertura format (optional).
 	Coverage *CoberturaCoverage
-
-	// Benchmark results (optional).
-	Benchmark *BenchmarkResult
 }
 
 // ResultComposer wraps a TestResult and provides convenience methods for

From 8f756bcf4f2bb8df81ca6ad45ca131487d02a84e Mon Sep 17 00:00:00 2001
From: Marc Guasch <marc.guasch@elastic.co>
Date: Mon, 5 Sep 2022 10:19:03 +0200
Subject: [PATCH 08/20] Extract common ingest pipeline code

---
 .../benchrunner/runners/pipeline/runner.go    |   3 +-
 .../ingest/datastream.go}                     |  96 +-------
 internal/elasticsearch/ingest/pipeline.go     |  74 ++++++
 .../runners/pipeline/ingest_pipeline.go       | 222 ------------------
 .../testrunner/runners/pipeline/runner.go     |   9 +-
 5 files changed, 92 insertions(+), 312 deletions(-)
 rename internal/{benchrunner/runners/pipeline/ingest_pipeline.go => elasticsearch/ingest/datastream.go} (58%)
 delete mode 100644 internal/testrunner/runners/pipeline/ingest_pipeline.go

diff --git a/internal/benchrunner/runners/pipeline/runner.go b/internal/benchrunner/runners/pipeline/runner.go
index a73b401f3f..3fea84580a 100644
--- a/internal/benchrunner/runners/pipeline/runner.go
+++ b/internal/benchrunner/runners/pipeline/runner.go
@@ -62,8 +62,7 @@ func (r *runner) TearDown() error {
 		signal.Sleep(r.options.DeferCleanup)
 	}
 
-	err := uninstallIngestPipelines(r.options.API, r.pipelines)
-	if err != nil {
+	if err := ingest.UninstallPipelines(r.options.API, r.pipelines); err != nil {
 		return errors.Wrap(err, "uninstalling ingest pipelines failed")
 	}
 	return nil
diff --git a/internal/benchrunner/runners/pipeline/ingest_pipeline.go b/internal/elasticsearch/ingest/datastream.go
similarity index 58%
rename from internal/benchrunner/runners/pipeline/ingest_pipeline.go
rename to internal/elasticsearch/ingest/datastream.go
index fab8fd8035..a421e0583d 100644
--- a/internal/benchrunner/runners/pipeline/ingest_pipeline.go
+++ b/internal/elasticsearch/ingest/datastream.go
@@ -2,11 +2,10 @@
 // or more contributor license agreements. Licensed under the Elastic License;
 // you may not use this file except in compliance with the Elastic License.
 
-package pipeline
+package ingest
 
 import (
 	"bytes"
-	"encoding/json"
 	"fmt"
 	"io"
 	"log"
@@ -20,29 +19,12 @@ import (
 	"github.com/pkg/errors"
 
 	"github.com/elastic/elastic-package/internal/elasticsearch"
-	"github.com/elastic/elastic-package/internal/elasticsearch/ingest"
 	"github.com/elastic/elastic-package/internal/packages"
 )
 
 var ingestPipelineTag = regexp.MustCompile(`{{\s*IngestPipeline.+}}`)
 
-type simulatePipelineRequest struct {
-	Docs []pipelineDocument `json:"docs"`
-}
-
-type pipelineDocument struct {
-	Source json.RawMessage `json:"_source"`
-}
-
-type simulatePipelineResponse struct {
-	Docs []pipelineIngestedDocument `json:"docs"`
-}
-
-type pipelineIngestedDocument struct {
-	Doc pipelineDocument `json:"doc"`
-}
-
-func installIngestPipelines(api *elasticsearch.API, dataStreamPath string) (string, []ingest.Pipeline, error) {
+func InstallDataStreamPipelines(api *elasticsearch.API, dataStreamPath string) (string, []Pipeline, error) {
 	dataStreamManifest, err := packages.ReadDataStreamManifest(filepath.Join(dataStreamPath, packages.DataStreamManifestFile))
 	if err != nil {
 		return "", nil, errors.Wrap(err, "reading data stream manifest failed")
@@ -50,7 +32,7 @@ func installIngestPipelines(api *elasticsearch.API, dataStreamPath string) (stri
 
 	nonce := time.Now().UnixNano()
 
-	mainPipeline := getWithPipelineNameWithNonce(dataStreamManifest.GetPipelineNameOrDefault(), nonce)
+	mainPipeline := getPipelineNameWithNonce(dataStreamManifest.GetPipelineNameOrDefault(), nonce)
 	pipelines, err := loadIngestPipelineFiles(dataStreamPath, nonce)
 	if err != nil {
 		return "", nil, errors.Wrap(err, "loading ingest pipeline files failed")
@@ -64,7 +46,7 @@ func installIngestPipelines(api *elasticsearch.API, dataStreamPath string) (stri
 	return mainPipeline, pipelines, nil
 }
 
-func loadIngestPipelineFiles(dataStreamPath string, nonce int64) ([]ingest.Pipeline, error) {
+func loadIngestPipelineFiles(dataStreamPath string, nonce int64) ([]Pipeline, error) {
 	elasticsearchPath := filepath.Join(dataStreamPath, "elasticsearch", "ingest_pipeline")
 
 	var pipelineFiles []string
@@ -76,7 +58,7 @@ func loadIngestPipelineFiles(dataStreamPath string, nonce int64) ([]ingest.Pipel
 		pipelineFiles = append(pipelineFiles, files...)
 	}
 
-	var pipelines []ingest.Pipeline
+	var pipelines []Pipeline
 	for _, path := range pipelineFiles {
 		c, err := os.ReadFile(path)
 		if err != nil {
@@ -89,11 +71,11 @@ func loadIngestPipelineFiles(dataStreamPath string, nonce int64) ([]ingest.Pipel
 				log.Fatalf("invalid IngestPipeline tag in template (path: %s)", path)
 			}
 			pipelineTag := s[1]
-			return []byte(getWithPipelineNameWithNonce(pipelineTag, nonce))
+			return []byte(getPipelineNameWithNonce(pipelineTag, nonce))
 		})
 		name := filepath.Base(path)
-		pipelines = append(pipelines, ingest.Pipeline{
-			Name:    getWithPipelineNameWithNonce(name[:strings.Index(name, ".")], nonce),
+		pipelines = append(pipelines, Pipeline{
+			Name:    getPipelineNameWithNonce(name[:strings.Index(name, ".")], nonce),
 			Format:  filepath.Ext(path)[1:],
 			Content: c,
 		})
@@ -101,7 +83,7 @@ func loadIngestPipelineFiles(dataStreamPath string, nonce int64) ([]ingest.Pipel
 	return pipelines, nil
 }
 
-func installPipelinesInElasticsearch(api *elasticsearch.API, pipelines []ingest.Pipeline) error {
+func installPipelinesInElasticsearch(api *elasticsearch.API, pipelines []Pipeline) error {
 	for _, p := range pipelines {
 		if err := installPipeline(api, p); err != nil {
 			return err
@@ -110,7 +92,7 @@ func installPipelinesInElasticsearch(api *elasticsearch.API, pipelines []ingest.
 	return nil
 }
 
-func installPipeline(api *elasticsearch.API, pipeline ingest.Pipeline) error {
+func installPipeline(api *elasticsearch.API, pipeline Pipeline) error {
 	if err := putIngestPipeline(api, pipeline); err != nil {
 		return err
 	}
@@ -118,7 +100,7 @@ func installPipeline(api *elasticsearch.API, pipeline ingest.Pipeline) error {
 	return getIngestPipeline(api, pipeline.Name)
 }
 
-func putIngestPipeline(api *elasticsearch.API, pipeline ingest.Pipeline) error {
+func putIngestPipeline(api *elasticsearch.API, pipeline Pipeline) error {
 	source, err := pipeline.MarshalJSON()
 	if err != nil {
 		return err
@@ -163,60 +145,6 @@ func getIngestPipeline(api *elasticsearch.API, pipelineName string) error {
 	return nil
 }
 
-func uninstallIngestPipelines(api *elasticsearch.API, pipelines []ingest.Pipeline) error {
-	for _, pipeline := range pipelines {
-		resp, err := api.Ingest.DeletePipeline(pipeline.Name)
-		if err != nil {
-			return errors.Wrapf(err, "DeletePipeline API call failed (pipelineName: %s)", pipeline.Name)
-		}
-		resp.Body.Close()
-	}
-	return nil
-}
-
-func getWithPipelineNameWithNonce(pipelineName string, nonce int64) string {
+func getPipelineNameWithNonce(pipelineName string, nonce int64) string {
 	return fmt.Sprintf("%s-%d", pipelineName, nonce)
 }
-
-func simulatePipelineProcessing(api *elasticsearch.API, pipelineName string, tc *testCase) (*testResult, error) {
-	var request simulatePipelineRequest
-	for _, event := range tc.events {
-		request.Docs = append(request.Docs, pipelineDocument{
-			Source: event,
-		})
-	}
-
-	requestBody, err := json.Marshal(&request)
-	if err != nil {
-		return nil, errors.Wrap(err, "marshalling simulate request failed")
-	}
-
-	r, err := api.Ingest.Simulate(bytes.NewReader(requestBody), func(request *elasticsearch.IngestSimulateRequest) {
-		request.PipelineID = pipelineName
-	})
-	if err != nil {
-		return nil, errors.Wrapf(err, "Simulate API call failed (pipelineName: %s)", pipelineName)
-	}
-	defer r.Body.Close()
-
-	body, err := io.ReadAll(r.Body)
-	if err != nil {
-		return nil, errors.Wrap(err, "failed to read Simulate API response body")
-	}
-
-	if r.StatusCode != http.StatusOK {
-		return nil, errors.Wrapf(elasticsearch.NewError(body), "unexpected response status for Simulate (%d): %s", r.StatusCode, r.Status())
-	}
-
-	var response simulatePipelineResponse
-	err = json.Unmarshal(body, &response)
-	if err != nil {
-		return nil, errors.Wrap(err, "unmarshalling simulate request failed")
-	}
-
-	var tr testResult
-	for _, doc := range response.Docs {
-		tr.events = append(tr.events, doc.Doc.Source)
-	}
-	return &tr, nil
-}
diff --git a/internal/elasticsearch/ingest/pipeline.go b/internal/elasticsearch/ingest/pipeline.go
index 97b68f0117..7d032f6dbc 100644
--- a/internal/elasticsearch/ingest/pipeline.go
+++ b/internal/elasticsearch/ingest/pipeline.go
@@ -5,13 +5,33 @@
 package ingest
 
 import (
+	"bytes"
 	"encoding/json"
+	"io"
+	"net/http"
 	"strings"
 
+	"github.com/elastic/elastic-package/internal/elasticsearch"
 	"github.com/pkg/errors"
 	"gopkg.in/yaml.v3"
 )
 
+type simulatePipelineRequest struct {
+	Docs []pipelineDocument `json:"docs"`
+}
+
+type simulatePipelineResponse struct {
+	Docs []pipelineIngestedDocument `json:"docs"`
+}
+
+type pipelineDocument struct {
+	Source json.RawMessage `json:"_source"`
+}
+
+type pipelineIngestedDocument struct {
+	Doc pipelineDocument `json:"doc"`
+}
+
 // Pipeline represents a pipeline resource loaded from a file
 type Pipeline struct {
 	Name    string // Name of the pipeline
@@ -47,3 +67,57 @@ func (p *Pipeline) MarshalJSON() (asJSON []byte, err error) {
 	}
 	return asJSON, nil
 }
+
+func SimulatePipeline(api *elasticsearch.API, pipelineName string, events []json.RawMessage) ([]json.RawMessage, error) {
+	var request simulatePipelineRequest
+	for _, event := range events {
+		request.Docs = append(request.Docs, pipelineDocument{
+			Source: event,
+		})
+	}
+
+	requestBody, err := json.Marshal(&request)
+	if err != nil {
+		return nil, errors.Wrap(err, "marshalling simulate request failed")
+	}
+
+	r, err := api.Ingest.Simulate(bytes.NewReader(requestBody), func(request *elasticsearch.IngestSimulateRequest) {
+		request.PipelineID = pipelineName
+	})
+	if err != nil {
+		return nil, errors.Wrapf(err, "Simulate API call failed (pipelineName: %s)", pipelineName)
+	}
+	defer r.Body.Close()
+
+	body, err := io.ReadAll(r.Body)
+	if err != nil {
+		return nil, errors.Wrap(err, "failed to read Simulate API response body")
+	}
+
+	if r.StatusCode != http.StatusOK {
+		return nil, errors.Wrapf(elasticsearch.NewError(body), "unexpected response status for Simulate (%d): %s", r.StatusCode, r.Status())
+	}
+
+	var response simulatePipelineResponse
+	err = json.Unmarshal(body, &response)
+	if err != nil {
+		return nil, errors.Wrap(err, "unmarshalling simulate request failed")
+	}
+
+	processedEvents := make([]json.RawMessage, len(response.Docs))
+	for i, doc := range response.Docs {
+		processedEvents[i] = doc.Doc.Source
+	}
+	return processedEvents, nil
+}
+
+func UninstallPipelines(api *elasticsearch.API, pipelines []Pipeline) error {
+	for _, p := range pipelines {
+		resp, err := api.Ingest.DeletePipeline(p.Name)
+		if err != nil {
+			return errors.Wrapf(err, "DeletePipeline API call failed (pipelineName: %s)", p.Name)
+		}
+		resp.Body.Close()
+	}
+	return nil
+}
diff --git a/internal/testrunner/runners/pipeline/ingest_pipeline.go b/internal/testrunner/runners/pipeline/ingest_pipeline.go
deleted file mode 100644
index fab8fd8035..0000000000
--- a/internal/testrunner/runners/pipeline/ingest_pipeline.go
+++ /dev/null
@@ -1,222 +0,0 @@
-// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
-// or more contributor license agreements. Licensed under the Elastic License;
-// you may not use this file except in compliance with the Elastic License.
-
-package pipeline
-
-import (
-	"bytes"
-	"encoding/json"
-	"fmt"
-	"io"
-	"log"
-	"net/http"
-	"os"
-	"path/filepath"
-	"regexp"
-	"strings"
-	"time"
-
-	"github.com/pkg/errors"
-
-	"github.com/elastic/elastic-package/internal/elasticsearch"
-	"github.com/elastic/elastic-package/internal/elasticsearch/ingest"
-	"github.com/elastic/elastic-package/internal/packages"
-)
-
-var ingestPipelineTag = regexp.MustCompile(`{{\s*IngestPipeline.+}}`)
-
-type simulatePipelineRequest struct {
-	Docs []pipelineDocument `json:"docs"`
-}
-
-type pipelineDocument struct {
-	Source json.RawMessage `json:"_source"`
-}
-
-type simulatePipelineResponse struct {
-	Docs []pipelineIngestedDocument `json:"docs"`
-}
-
-type pipelineIngestedDocument struct {
-	Doc pipelineDocument `json:"doc"`
-}
-
-func installIngestPipelines(api *elasticsearch.API, dataStreamPath string) (string, []ingest.Pipeline, error) {
-	dataStreamManifest, err := packages.ReadDataStreamManifest(filepath.Join(dataStreamPath, packages.DataStreamManifestFile))
-	if err != nil {
-		return "", nil, errors.Wrap(err, "reading data stream manifest failed")
-	}
-
-	nonce := time.Now().UnixNano()
-
-	mainPipeline := getWithPipelineNameWithNonce(dataStreamManifest.GetPipelineNameOrDefault(), nonce)
-	pipelines, err := loadIngestPipelineFiles(dataStreamPath, nonce)
-	if err != nil {
-		return "", nil, errors.Wrap(err, "loading ingest pipeline files failed")
-	}
-
-	err = installPipelinesInElasticsearch(api, pipelines)
-
-	if err != nil {
-		return "", nil, errors.Wrap(err, "installing pipelines failed")
-	}
-	return mainPipeline, pipelines, nil
-}
-
-func loadIngestPipelineFiles(dataStreamPath string, nonce int64) ([]ingest.Pipeline, error) {
-	elasticsearchPath := filepath.Join(dataStreamPath, "elasticsearch", "ingest_pipeline")
-
-	var pipelineFiles []string
-	for _, pattern := range []string{"*.json", "*.yml"} {
-		files, err := filepath.Glob(filepath.Join(elasticsearchPath, pattern))
-		if err != nil {
-			return nil, errors.Wrapf(err, "listing '%s' in '%s'", pattern, elasticsearchPath)
-		}
-		pipelineFiles = append(pipelineFiles, files...)
-	}
-
-	var pipelines []ingest.Pipeline
-	for _, path := range pipelineFiles {
-		c, err := os.ReadFile(path)
-		if err != nil {
-			return nil, errors.Wrapf(err, "reading ingest pipeline failed (path: %s)", path)
-		}
-
-		c = ingestPipelineTag.ReplaceAllFunc(c, func(found []byte) []byte {
-			s := strings.Split(string(found), `"`)
-			if len(s) != 3 {
-				log.Fatalf("invalid IngestPipeline tag in template (path: %s)", path)
-			}
-			pipelineTag := s[1]
-			return []byte(getWithPipelineNameWithNonce(pipelineTag, nonce))
-		})
-		name := filepath.Base(path)
-		pipelines = append(pipelines, ingest.Pipeline{
-			Name:    getWithPipelineNameWithNonce(name[:strings.Index(name, ".")], nonce),
-			Format:  filepath.Ext(path)[1:],
-			Content: c,
-		})
-	}
-	return pipelines, nil
-}
-
-func installPipelinesInElasticsearch(api *elasticsearch.API, pipelines []ingest.Pipeline) error {
-	for _, p := range pipelines {
-		if err := installPipeline(api, p); err != nil {
-			return err
-		}
-	}
-	return nil
-}
-
-func installPipeline(api *elasticsearch.API, pipeline ingest.Pipeline) error {
-	if err := putIngestPipeline(api, pipeline); err != nil {
-		return err
-	}
-	// Just to be sure the pipeline has been uploaded.
-	return getIngestPipeline(api, pipeline.Name)
-}
-
-func putIngestPipeline(api *elasticsearch.API, pipeline ingest.Pipeline) error {
-	source, err := pipeline.MarshalJSON()
-	if err != nil {
-		return err
-	}
-	r, err := api.Ingest.PutPipeline(pipeline.Name, bytes.NewReader(source))
-	if err != nil {
-		return errors.Wrapf(err, "PutPipeline API call failed (pipelineName: %s)", pipeline.Name)
-	}
-	defer r.Body.Close()
-
-	body, err := io.ReadAll(r.Body)
-	if err != nil {
-		return errors.Wrapf(err, "failed to read PutPipeline API response body (pipelineName: %s)", pipeline.Name)
-	}
-
-	if r.StatusCode != http.StatusOK {
-
-		return errors.Wrapf(elasticsearch.NewError(body), "unexpected response status for PutPipeline (%d): %s (pipelineName: %s)",
-			r.StatusCode, r.Status(), pipeline.Name)
-	}
-	return nil
-}
-
-func getIngestPipeline(api *elasticsearch.API, pipelineName string) error {
-	r, err := api.Ingest.GetPipeline(func(request *elasticsearch.IngestGetPipelineRequest) {
-		request.PipelineID = pipelineName
-	})
-	if err != nil {
-		return errors.Wrapf(err, "GetPipeline API call failed (pipelineName: %s)", pipelineName)
-	}
-	defer r.Body.Close()
-
-	body, err := io.ReadAll(r.Body)
-	if err != nil {
-		return errors.Wrapf(err, "failed to read GetPipeline API response body (pipelineName: %s)", pipelineName)
-	}
-
-	if r.StatusCode != http.StatusOK {
-		return errors.Wrapf(elasticsearch.NewError(body), "unexpected response status for GetPipeline (%d): %s (pipelineName: %s)",
-			r.StatusCode, r.Status(), pipelineName)
-	}
-	return nil
-}
-
-func uninstallIngestPipelines(api *elasticsearch.API, pipelines []ingest.Pipeline) error {
-	for _, pipeline := range pipelines {
-		resp, err := api.Ingest.DeletePipeline(pipeline.Name)
-		if err != nil {
-			return errors.Wrapf(err, "DeletePipeline API call failed (pipelineName: %s)", pipeline.Name)
-		}
-		resp.Body.Close()
-	}
-	return nil
-}
-
-func getWithPipelineNameWithNonce(pipelineName string, nonce int64) string {
-	return fmt.Sprintf("%s-%d", pipelineName, nonce)
-}
-
-func simulatePipelineProcessing(api *elasticsearch.API, pipelineName string, tc *testCase) (*testResult, error) {
-	var request simulatePipelineRequest
-	for _, event := range tc.events {
-		request.Docs = append(request.Docs, pipelineDocument{
-			Source: event,
-		})
-	}
-
-	requestBody, err := json.Marshal(&request)
-	if err != nil {
-		return nil, errors.Wrap(err, "marshalling simulate request failed")
-	}
-
-	r, err := api.Ingest.Simulate(bytes.NewReader(requestBody), func(request *elasticsearch.IngestSimulateRequest) {
-		request.PipelineID = pipelineName
-	})
-	if err != nil {
-		return nil, errors.Wrapf(err, "Simulate API call failed (pipelineName: %s)", pipelineName)
-	}
-	defer r.Body.Close()
-
-	body, err := io.ReadAll(r.Body)
-	if err != nil {
-		return nil, errors.Wrap(err, "failed to read Simulate API response body")
-	}
-
-	if r.StatusCode != http.StatusOK {
-		return nil, errors.Wrapf(elasticsearch.NewError(body), "unexpected response status for Simulate (%d): %s", r.StatusCode, r.Status())
-	}
-
-	var response simulatePipelineResponse
-	err = json.Unmarshal(body, &response)
-	if err != nil {
-		return nil, errors.Wrap(err, "unmarshalling simulate request failed")
-	}
-
-	var tr testResult
-	for _, doc := range response.Docs {
-		tr.events = append(tr.events, doc.Doc.Source)
-	}
-	return &tr, nil
-}
diff --git a/internal/testrunner/runners/pipeline/runner.go b/internal/testrunner/runners/pipeline/runner.go
index bb76cc0f2d..487564d18b 100644
--- a/internal/testrunner/runners/pipeline/runner.go
+++ b/internal/testrunner/runners/pipeline/runner.go
@@ -62,8 +62,7 @@ func (r *runner) TearDown() error {
 		signal.Sleep(r.options.DeferCleanup)
 	}
 
-	err := uninstallIngestPipelines(r.options.API, r.pipelines)
-	if err != nil {
+	if err := ingest.UninstallPipelines(r.options.API, r.pipelines); err != nil {
 		return errors.Wrap(err, "uninstalling ingest pipelines failed")
 	}
 	return nil
@@ -90,7 +89,7 @@ func (r *runner) run() ([]testrunner.TestResult, error) {
 	}
 
 	var entryPipeline string
-	entryPipeline, r.pipelines, err = installIngestPipelines(r.options.API, dataStreamPath)
+	entryPipeline, r.pipelines, err = ingest.InstallDataStreamPipelines(r.options.API, dataStreamPath)
 	if err != nil {
 		return nil, errors.Wrap(err, "installing ingest pipelines failed")
 	}
@@ -125,7 +124,7 @@ func (r *runner) run() ([]testrunner.TestResult, error) {
 			continue
 		}
 
-		result, err := simulatePipelineProcessing(r.options.API, entryPipeline, tc)
+		processedEvents, err := ingest.SimulatePipeline(r.options.API, entryPipeline, tc.events)
 		if err != nil {
 			err := errors.Wrap(err, "simulating pipeline processing failed")
 			tr.ErrorMsg = err.Error()
@@ -133,6 +132,8 @@ func (r *runner) run() ([]testrunner.TestResult, error) {
 			continue
 		}
 
+		result := &testResult{events: processedEvents}
+
 		tr.TimeElapsed = time.Since(startTime)
 		fieldsValidator, err := fields.CreateValidatorForDirectory(dataStreamPath,
 			fields.WithNumericKeywordFields(tc.config.NumericKeywordFields),

From 1f4c47428ec244e1d828b551aae43cc93fe53820 Mon Sep 17 00:00:00 2001
From: Marc Guasch <marc.guasch@elastic.co>
Date: Mon, 5 Sep 2022 10:19:58 +0200
Subject: [PATCH 09/20] Remove unused code from benchmark runner

---
 internal/benchrunner/coverageoutput.go        | 400 ------------------
 internal/benchrunner/coverageoutput_test.go   | 279 ------------
 .../benchrunner/runners/pipeline/coverage.go  | 136 ------
 .../runners/pipeline/test_config.go           |  72 ----
 .../runners/pipeline/test_result.go           | 260 ------------
 .../runners/pipeline/test_result_test.go      |  45 --
 internal/benchrunner/test_config.go           |  30 --
 7 files changed, 1222 deletions(-)
 delete mode 100644 internal/benchrunner/coverageoutput.go
 delete mode 100644 internal/benchrunner/coverageoutput_test.go
 delete mode 100644 internal/benchrunner/runners/pipeline/coverage.go
 delete mode 100644 internal/benchrunner/runners/pipeline/test_config.go
 delete mode 100644 internal/benchrunner/runners/pipeline/test_result.go
 delete mode 100644 internal/benchrunner/runners/pipeline/test_result_test.go
 delete mode 100644 internal/benchrunner/test_config.go

diff --git a/internal/benchrunner/coverageoutput.go b/internal/benchrunner/coverageoutput.go
deleted file mode 100644
index 93aa1028b3..0000000000
--- a/internal/benchrunner/coverageoutput.go
+++ /dev/null
@@ -1,400 +0,0 @@
-// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
-// or more contributor license agreements. Licensed under the Elastic License;
-// you may not use this file except in compliance with the Elastic License.
-
-package benchrunner
-
-import (
-	"bytes"
-	"encoding/xml"
-	"fmt"
-	"os"
-	"path/filepath"
-	"time"
-
-	"github.com/pkg/errors"
-
-	"github.com/elastic/elastic-package/internal/builder"
-	"github.com/elastic/elastic-package/internal/multierror"
-)
-
-const coverageDtd = `<!DOCTYPE coverage SYSTEM "http://cobertura.sourceforge.net/xml/coverage-04.dtd">`
-
-type testCoverageDetails struct {
-	packageName string
-	testType    TestType
-	dataStreams map[string][]string // <data_stream> : <test case 1, test case 2, ...>
-	cobertura   *CoberturaCoverage  // For tests to provide custom Cobertura results.
-	errors      multierror.Error
-}
-
-func newTestCoverageDetails(packageName string, testType TestType) *testCoverageDetails {
-	return &testCoverageDetails{packageName: packageName, testType: testType, dataStreams: map[string][]string{}}
-}
-
-func (tcd *testCoverageDetails) withUncoveredDataStreams(dataStreams []string) *testCoverageDetails {
-	for _, wt := range dataStreams {
-		tcd.dataStreams[wt] = []string{}
-	}
-	return tcd
-}
-
-func (tcd *testCoverageDetails) withTestResults(results []TestResult) *testCoverageDetails {
-	for _, result := range results {
-		if _, ok := tcd.dataStreams[result.DataStream]; !ok {
-			tcd.dataStreams[result.DataStream] = []string{}
-		}
-		tcd.dataStreams[result.DataStream] = append(tcd.dataStreams[result.DataStream], result.Name)
-		if tcd.cobertura != nil && result.Coverage != nil {
-			if err := tcd.cobertura.merge(result.Coverage); err != nil {
-				tcd.errors = append(tcd.errors, errors.Wrapf(err, "can't merge Cobertura coverage for test `%s`", result.Name))
-			}
-		} else if tcd.cobertura == nil {
-			tcd.cobertura = result.Coverage
-		}
-	}
-	return tcd
-}
-
-// CoberturaCoverage is the root element for a Cobertura XML report.
-type CoberturaCoverage struct {
-	XMLName         xml.Name            `xml:"coverage"`
-	LineRate        float32             `xml:"line-rate,attr"`
-	BranchRate      float32             `xml:"branch-rate,attr"`
-	Version         string              `xml:"version,attr"`
-	Timestamp       int64               `xml:"timestamp,attr"`
-	LinesCovered    int64               `xml:"lines-covered,attr"`
-	LinesValid      int64               `xml:"lines-valid,attr"`
-	BranchesCovered int64               `xml:"branches-covered,attr"`
-	BranchesValid   int64               `xml:"branches-valid,attr"`
-	Complexity      float32             `xml:"complexity,attr"`
-	Sources         []*CoberturaSource  `xml:"sources>source"`
-	Packages        []*CoberturaPackage `xml:"packages>package"`
-}
-
-// CoberturaSource represents a base path to the covered source code.
-type CoberturaSource struct {
-	Path string `xml:",chardata"`
-}
-
-// CoberturaPackage represents a package in a Cobertura XML report.
-type CoberturaPackage struct {
-	Name       string            `xml:"name,attr"`
-	LineRate   float32           `xml:"line-rate,attr"`
-	BranchRate float32           `xml:"branch-rate,attr"`
-	Complexity float32           `xml:"complexity,attr"`
-	Classes    []*CoberturaClass `xml:"classes>class"`
-}
-
-// CoberturaClass represents a class in a Cobertura XML report.
-type CoberturaClass struct {
-	Name       string             `xml:"name,attr"`
-	Filename   string             `xml:"filename,attr"`
-	LineRate   float32            `xml:"line-rate,attr"`
-	BranchRate float32            `xml:"branch-rate,attr"`
-	Complexity float32            `xml:"complexity,attr"`
-	Methods    []*CoberturaMethod `xml:"methods>method"`
-	Lines      []*CoberturaLine   `xml:"lines>line"`
-}
-
-// CoberturaMethod represents a method in a Cobertura XML report.
-type CoberturaMethod struct {
-	Name       string           `xml:"name,attr"`
-	Signature  string           `xml:"signature,attr"`
-	LineRate   float32          `xml:"line-rate,attr"`
-	BranchRate float32          `xml:"branch-rate,attr"`
-	Complexity float32          `xml:"complexity,attr"`
-	Hits       int64            `xml:"hits,attr"`
-	Lines      []*CoberturaLine `xml:"lines>line"`
-}
-
-// CoberturaLine represents a source line in a Cobertura XML report.
-type CoberturaLine struct {
-	Number int   `xml:"number,attr"`
-	Hits   int64 `xml:"hits,attr"`
-}
-
-func (c *CoberturaCoverage) bytes() ([]byte, error) {
-	out, err := xml.MarshalIndent(&c, "", "  ")
-	if err != nil {
-		return nil, errors.Wrap(err, "unable to format test results as xUnit")
-	}
-
-	var buffer bytes.Buffer
-	buffer.WriteString(xml.Header)
-	buffer.WriteString("\n")
-	buffer.WriteString(coverageDtd)
-	buffer.WriteString("\n")
-	buffer.Write(out)
-	return buffer.Bytes(), nil
-}
-
-// merge merges two coverage reports for a given class.
-func (c *CoberturaClass) merge(b *CoberturaClass) error {
-	// Check preconditions: classes should be the same.
-	equal := c.Name == b.Name &&
-		c.Filename == b.Filename &&
-		len(c.Lines) == len(b.Lines) &&
-		len(c.Methods) == len(b.Methods)
-	for idx := range c.Lines {
-		equal = equal && c.Lines[idx].Number == b.Lines[idx].Number
-	}
-	for idx := range c.Methods {
-		equal = equal && c.Methods[idx].Name == b.Methods[idx].Name &&
-			len(c.Methods[idx].Lines) == len(b.Methods[idx].Lines)
-	}
-	if !equal {
-		return errors.Errorf("merging incompatible classes: %+v != %+v", *c, *b)
-	}
-	// Update methods
-	for idx := range b.Methods {
-		c.Methods[idx].Hits += b.Methods[idx].Hits
-		for l := range b.Methods[idx].Lines {
-			c.Methods[idx].Lines[l].Hits += b.Methods[idx].Lines[l].Hits
-		}
-	}
-	// Rebuild lines
-	c.Lines = nil
-	for _, m := range c.Methods {
-		c.Lines = append(c.Lines, m.Lines...)
-	}
-	return nil
-}
-
-// merge merges two coverage reports for a given package.
-func (p *CoberturaPackage) merge(b *CoberturaPackage) error {
-	// Merge classes
-	for _, class := range b.Classes {
-		var target *CoberturaClass
-		for _, existing := range p.Classes {
-			if existing.Name == class.Name {
-				target = existing
-				break
-			}
-		}
-		if target != nil {
-			if err := target.merge(class); err != nil {
-				return err
-			}
-		} else {
-			p.Classes = append(p.Classes, class)
-		}
-	}
-	return nil
-}
-
-// merge merges two coverage reports.
-func (c *CoberturaCoverage) merge(b *CoberturaCoverage) error {
-	// Merge source paths
-	for _, path := range b.Sources {
-		found := false
-		for _, existing := range c.Sources {
-			if found = existing.Path == path.Path; found {
-				break
-			}
-		}
-		if !found {
-			c.Sources = append(c.Sources, path)
-		}
-	}
-
-	// Merge packages
-	for _, pkg := range b.Packages {
-		var target *CoberturaPackage
-		for _, existing := range c.Packages {
-			if existing.Name == pkg.Name {
-				target = existing
-				break
-			}
-		}
-		if target != nil {
-			if err := target.merge(pkg); err != nil {
-				return err
-			}
-		} else {
-			c.Packages = append(c.Packages, pkg)
-		}
-	}
-
-	// Recalculate global line coverage count
-	c.LinesValid = 0
-	c.LinesCovered = 0
-	for _, pkg := range c.Packages {
-		for _, cls := range pkg.Classes {
-			for _, line := range cls.Lines {
-				c.LinesValid++
-				if line.Hits > 0 {
-					c.LinesCovered++
-				}
-			}
-		}
-	}
-	return nil
-}
-
-// WriteCoverage function calculates test coverage for the given package.
-// It requires to execute tests for all data streams (same test type), so the coverage can be calculated properly.
-func WriteCoverage(packageRootPath, packageName string, testType TestType, results []TestResult) error {
-	details, err := collectTestCoverageDetails(packageRootPath, packageName, testType, results)
-	if err != nil {
-		return errors.Wrap(err, "can't collect test coverage details")
-	}
-
-	// Use provided cobertura report, or generate a custom report if not available.
-	report := details.cobertura
-	if report == nil {
-		report = transformToCoberturaReport(details)
-	}
-
-	err = writeCoverageReportFile(report, packageName)
-	if err != nil {
-		return errors.Wrap(err, "can't write test coverage report file")
-	}
-	return nil
-}
-
-func collectTestCoverageDetails(packageRootPath, packageName string, testType TestType, results []TestResult) (*testCoverageDetails, error) {
-	withoutTests, err := findDataStreamsWithoutTests(packageRootPath, testType)
-	if err != nil {
-		return nil, errors.Wrap(err, "can't find data streams without tests")
-	}
-
-	details := newTestCoverageDetails(packageName, testType).
-		withUncoveredDataStreams(withoutTests).
-		withTestResults(results)
-	if len(details.errors) > 0 {
-		return nil, details.errors
-	}
-	return details, nil
-}
-
-func findDataStreamsWithoutTests(packageRootPath string, testType TestType) ([]string, error) {
-	var noTests []string
-
-	dataStreamDir := filepath.Join(packageRootPath, "data_stream")
-	dataStreams, err := os.ReadDir(dataStreamDir)
-	if errors.Is(err, os.ErrNotExist) {
-		return noTests, nil // there are packages that don't have any data streams (fleet_server, security_detection_engine)
-	} else if err != nil {
-		return nil, errors.Wrap(err, "can't list data streams directory")
-	}
-
-	for _, dataStream := range dataStreams {
-		if !dataStream.IsDir() {
-			continue
-		}
-
-		expected, err := verifyTestExpected(packageRootPath, dataStream.Name(), testType)
-		if err != nil {
-			return nil, errors.Wrap(err, "can't verify if test is expected")
-		}
-		if !expected {
-			continue
-		}
-
-		dataStreamTestPath := filepath.Join(packageRootPath, "data_stream", dataStream.Name(), "_dev", "test", string(testType))
-		_, err = os.Stat(dataStreamTestPath)
-		if errors.Is(err, os.ErrNotExist) {
-			noTests = append(noTests, dataStream.Name())
-			continue
-		}
-		if err != nil {
-			return nil, errors.Wrapf(err, "can't stat path: %s", dataStreamTestPath)
-		}
-	}
-	return noTests, nil
-}
-
-// verifyTestExpected function checks if tests are actually expected.
-// Pipeline tests require an ingest pipeline to be defined in the data stream.
-func verifyTestExpected(packageRootPath string, dataStreamName string, testType TestType) (bool, error) {
-	if testType != "pipeline" {
-		return true, nil
-	}
-
-	ingestPipelinePath := filepath.Join(packageRootPath, "data_stream", dataStreamName, "elasticsearch", "ingest_pipeline")
-	_, err := os.Stat(ingestPipelinePath)
-	if errors.Is(err, os.ErrNotExist) {
-		return false, nil
-	}
-	if err != nil {
-		return false, errors.Wrapf(err, "can't stat path: %s", ingestPipelinePath)
-	}
-	return true, nil
-}
-
-func transformToCoberturaReport(details *testCoverageDetails) *CoberturaCoverage {
-	var classes []*CoberturaClass
-	for dataStream, testCases := range details.dataStreams {
-		if dataStream == "" {
-			continue // ignore tests running in the package context (not data stream), mostly referring to installed assets
-		}
-
-		var methods []*CoberturaMethod
-
-		if len(testCases) == 0 {
-			methods = append(methods, &CoberturaMethod{
-				Name:  "Missing",
-				Lines: []*CoberturaLine{{Number: 1, Hits: 0}},
-			})
-		} else {
-			methods = append(methods, &CoberturaMethod{
-				Name:  "OK",
-				Lines: []*CoberturaLine{{Number: 1, Hits: 1}},
-			})
-		}
-
-		aClass := &CoberturaClass{
-			Name:     string(details.testType),
-			Filename: details.packageName + "/" + dataStream,
-			Methods:  methods,
-		}
-		classes = append(classes, aClass)
-	}
-
-	return &CoberturaCoverage{
-		Timestamp: time.Now().UnixNano(),
-		Packages: []*CoberturaPackage{
-			{
-				Name:    details.packageName,
-				Classes: classes,
-			},
-		},
-	}
-}
-
-func writeCoverageReportFile(report *CoberturaCoverage, packageName string) error {
-	dest, err := testCoverageReportsDir()
-	if err != nil {
-		return errors.Wrap(err, "could not determine test coverage reports folder")
-	}
-
-	// Create test coverage reports folder if it doesn't exist
-	_, err = os.Stat(dest)
-	if err != nil && errors.Is(err, os.ErrNotExist) {
-		if err := os.MkdirAll(dest, 0755); err != nil {
-			return errors.Wrap(err, "could not create test coverage reports folder")
-		}
-	}
-
-	fileName := fmt.Sprintf("coverage-%s-%d-report.xml", packageName, report.Timestamp)
-	filePath := filepath.Join(dest, fileName)
-
-	b, err := report.bytes()
-	if err != nil {
-		return errors.Wrap(err, "can't marshal test coverage report")
-	}
-
-	if err := os.WriteFile(filePath, b, 0644); err != nil {
-		return errors.Wrap(err, "could not write test coverage report file")
-	}
-	return nil
-}
-
-func testCoverageReportsDir() (string, error) {
-	buildDir, err := builder.BuildDirectory()
-	if err != nil {
-		return "", errors.Wrap(err, "locating build directory failed")
-	}
-	return filepath.Join(buildDir, "test-coverage"), nil
-}
diff --git a/internal/benchrunner/coverageoutput_test.go b/internal/benchrunner/coverageoutput_test.go
deleted file mode 100644
index adc5ceae34..0000000000
--- a/internal/benchrunner/coverageoutput_test.go
+++ /dev/null
@@ -1,279 +0,0 @@
-// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
-// or more contributor license agreements. Licensed under the Elastic License;
-// you may not use this file except in compliance with the Elastic License.
-
-package benchrunner
-
-import (
-	"testing"
-
-	"github.com/stretchr/testify/assert"
-)
-
-func TestCoberturaCoverage_Merge(t *testing.T) {
-	tests := []struct {
-		name               string
-		rhs, lhs, expected CoberturaCoverage
-		wantErr            bool
-	}{
-		{
-			name: "merge sources",
-			rhs: CoberturaCoverage{
-				Sources: []*CoberturaSource{
-					{Path: "/a"},
-					{Path: "/c"},
-				},
-			},
-			lhs: CoberturaCoverage{
-				Sources: []*CoberturaSource{
-					{Path: "/b"},
-					{Path: "/c"},
-				},
-			},
-			expected: CoberturaCoverage{
-				Sources: []*CoberturaSource{
-					{Path: "/a"},
-					{Path: "/c"},
-					{Path: "/b"},
-				},
-			},
-		},
-		{
-			name: "merge packages and classes",
-			rhs: CoberturaCoverage{
-				Packages: []*CoberturaPackage{
-					{
-						Name: "a",
-						Classes: []*CoberturaClass{
-							{Name: "a.a"},
-							{Name: "a.b"},
-						},
-					},
-					{
-						Name: "b",
-						Classes: []*CoberturaClass{
-							{Name: "b.a"},
-						},
-					},
-				},
-			},
-			lhs: CoberturaCoverage{
-				Packages: []*CoberturaPackage{
-					{
-						Name: "c",
-						Classes: []*CoberturaClass{
-							{Name: "a.a"},
-						},
-					},
-					{
-						Name: "b",
-						Classes: []*CoberturaClass{
-							{Name: "b.a"},
-							{Name: "b.b"},
-						},
-					},
-				},
-			},
-			expected: CoberturaCoverage{
-				Packages: []*CoberturaPackage{
-					{
-						Name: "a",
-						Classes: []*CoberturaClass{
-							{Name: "a.a"},
-							{Name: "a.b"},
-						},
-					},
-					{
-						Name: "b",
-						Classes: []*CoberturaClass{
-							{Name: "b.a"},
-							{Name: "b.b"},
-						},
-					},
-					{
-						Name: "c",
-						Classes: []*CoberturaClass{
-							{Name: "a.a"},
-						},
-					},
-				},
-			},
-		},
-		{
-			name: "merge methods and lines",
-			rhs: CoberturaCoverage{
-				Packages: []*CoberturaPackage{
-					{
-						Name: "a",
-						Classes: []*CoberturaClass{
-							{
-								Name: "a.a",
-								Methods: []*CoberturaMethod{
-									{
-										Name: "foo",
-										Hits: 2,
-										Lines: []*CoberturaLine{
-											{
-												Number: 13,
-												Hits:   2,
-											},
-											{
-												Number: 14,
-												Hits:   2,
-											},
-										},
-									},
-									{
-										Name: "bar",
-										Hits: 1,
-										Lines: []*CoberturaLine{
-											{
-												Number: 24,
-												Hits:   1,
-											},
-										},
-									},
-								},
-								Lines: []*CoberturaLine{
-									{
-										Number: 13,
-										Hits:   2,
-									},
-									{
-										Number: 14,
-										Hits:   2,
-									},
-									{
-										Number: 24,
-										Hits:   1,
-									},
-								},
-							},
-						},
-					},
-				},
-			},
-			lhs: CoberturaCoverage{
-				Packages: []*CoberturaPackage{
-					{
-						Name: "a",
-						Classes: []*CoberturaClass{
-							{
-								Name: "a.a",
-								Methods: []*CoberturaMethod{
-									{
-										Name: "foo",
-										Hits: 1,
-										Lines: []*CoberturaLine{
-											{
-												Number: 13,
-												Hits:   1,
-											},
-											{
-												Number: 14,
-												Hits:   1,
-											},
-										},
-									},
-									{
-										Name: "bar",
-										Hits: 1,
-										Lines: []*CoberturaLine{
-											{
-												Number: 24,
-												Hits:   1,
-											},
-										},
-									},
-								},
-								Lines: []*CoberturaLine{
-									{
-										Number: 13,
-										Hits:   1,
-									},
-									{
-										Number: 14,
-										Hits:   1,
-									},
-									{
-										Number: 24,
-										Hits:   1,
-									},
-								},
-							},
-						},
-					},
-				},
-			},
-			expected: CoberturaCoverage{
-				LinesCovered: 3,
-				LinesValid:   3,
-				Packages: []*CoberturaPackage{
-					{
-						Name: "a",
-						Classes: []*CoberturaClass{
-							{
-								Name: "a.a",
-								Methods: []*CoberturaMethod{
-									{
-										Name: "foo",
-										Hits: 3,
-										Lines: []*CoberturaLine{
-											{
-												Number: 13,
-												Hits:   3,
-											},
-											{
-												Number: 14,
-												Hits:   3,
-											},
-										},
-									},
-									{
-										Name: "bar",
-										Hits: 2,
-										Lines: []*CoberturaLine{
-											{
-												Number: 24,
-												Hits:   2,
-											},
-										},
-									},
-								},
-								Lines: []*CoberturaLine{
-									{
-										Number: 13,
-										Hits:   3,
-									},
-									{
-										Number: 14,
-										Hits:   3,
-									},
-									{
-										Number: 24,
-										Hits:   2,
-									},
-								},
-							},
-						},
-					},
-				},
-			},
-		},
-	}
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			err := tt.rhs.merge(&tt.lhs)
-			if !tt.wantErr {
-				if !assert.NoError(t, err) {
-					t.Fatal(err)
-				}
-			} else {
-				if !assert.Error(t, err) {
-					t.Fatal("error expected")
-				}
-			}
-			assert.Equal(t, tt.expected, tt.rhs)
-		})
-	}
-}
diff --git a/internal/benchrunner/runners/pipeline/coverage.go b/internal/benchrunner/runners/pipeline/coverage.go
deleted file mode 100644
index a54b6da58b..0000000000
--- a/internal/benchrunner/runners/pipeline/coverage.go
+++ /dev/null
@@ -1,136 +0,0 @@
-// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
-// or more contributor license agreements. Licensed under the Elastic License;
-// you may not use this file except in compliance with the Elastic License.
-
-package pipeline
-
-import (
-	"path/filepath"
-	"strings"
-	"time"
-
-	"github.com/pkg/errors"
-
-	"github.com/elastic/elastic-package/internal/benchrunner"
-	"github.com/elastic/elastic-package/internal/elasticsearch/ingest"
-	"github.com/elastic/elastic-package/internal/packages"
-)
-
-// GetPipelineCoverage returns a coverage report for the provided set of ingest pipelines.
-func GetPipelineCoverage(options benchrunner.TestOptions, pipelines []ingest.Pipeline) (*benchrunner.CoberturaCoverage, error) {
-	dataStreamPath, found, err := packages.FindDataStreamRootForPath(options.TestFolder.Path)
-	if err != nil {
-		return nil, errors.Wrap(err, "locating data_stream root failed")
-	}
-	if !found {
-		return nil, errors.New("data stream root not found")
-	}
-
-	// Use the Node Stats API to get stats for all installed pipelines.
-	// These stats contain hit counts for all main processors in a pipeline.
-	stats, err := ingest.GetPipelineStats(options.API, pipelines)
-	if err != nil {
-		return nil, errors.Wrap(err, "error fetching pipeline stats for code coverage calculations")
-	}
-
-	// Construct the Cobertura report.
-	pkg := &benchrunner.CoberturaPackage{
-		Name: options.TestFolder.Package + "." + options.TestFolder.DataStream,
-	}
-
-	// Use the package's parent directory as base path, so that the relative paths
-	// for each class (pipeline) include the package name. This prevents paths for
-	// different packages colliding (i.e. a lot of packages have a "log" datastream
-	// and a default.yml pipeline).
-	basePath := filepath.Dir(options.PackageRootPath)
-
-	coverage := &benchrunner.CoberturaCoverage{
-		Sources: []*benchrunner.CoberturaSource{
-			{
-				Path: basePath,
-			},
-		},
-		Packages:  []*benchrunner.CoberturaPackage{pkg},
-		Timestamp: time.Now().UnixNano(),
-	}
-
-	// Calculate coverage for each pipeline
-	for _, pipeline := range pipelines {
-		covered, class, err := coverageForSinglePipeline(pipeline, stats, basePath, dataStreamPath)
-		if err != nil {
-			return nil, errors.Wrapf(err, "error calculating coverage for pipeline '%s'", pipeline.Filename())
-		}
-		pkg.Classes = append(pkg.Classes, class)
-		coverage.LinesValid += int64(len(class.Methods))
-		coverage.LinesCovered += covered
-	}
-	return coverage, nil
-}
-
-func coverageForSinglePipeline(pipeline ingest.Pipeline, stats ingest.PipelineStatsMap, basePath, dataStreamPath string) (linesCovered int64, class *benchrunner.CoberturaClass, err error) {
-	// Load the list of main processors from the pipeline source code, annotated with line numbers.
-	src, err := pipeline.Processors()
-	if err != nil {
-		return 0, nil, err
-	}
-
-	pstats, found := stats[pipeline.Name]
-	if !found {
-		return 0, nil, errors.Errorf("pipeline '%s' not installed in Elasticsearch", pipeline.Name)
-	}
-
-	// Ensure there is no inconsistency in the list of processors in stats vs obtained from source.
-	if len(src) != len(pstats.Processors) {
-		return 0, nil, errors.Errorf("processor count mismatch for %s (src:%d stats:%d)", pipeline.Filename(), len(src), len(pstats.Processors))
-	}
-	for idx, st := range pstats.Processors {
-		// Check that we have the expected type of processor, except for `compound` processors.
-		// Elasticsearch will return a `compound` processor in the case of `foreach` and
-		// any processor that defines `on_failure` processors.
-		if st.Type != "compound" && st.Type != src[idx].Type {
-			return 0, nil, errors.Errorf("processor type mismatch for %s processor %d (src:%s stats:%s)", pipeline.Filename(), idx, src[idx].Type, st.Type)
-		}
-	}
-
-	// Tests install pipelines as `filename-<nonce>` (without original extension).
-	// Use the filename part for the report.
-	pipelineName := pipeline.Name
-	if nameEnd := strings.LastIndexByte(pipelineName, '-'); nameEnd != -1 {
-		pipelineName = pipelineName[:nameEnd]
-	}
-
-	// File path has to be relative to the packagePath added to the cobertura Sources list
-	// so that the source is reachable by the report tool.
-	pipelinePath := filepath.Join(dataStreamPath, "elasticsearch", "ingest_pipeline", pipeline.Filename())
-	pipelineRelPath, err := filepath.Rel(basePath, pipelinePath)
-	if err != nil {
-		return 0, nil, errors.Wrapf(err, "cannot create relative path to pipeline file. Package root: '%s', pipeline path: '%s'", basePath, pipelinePath)
-	}
-
-	// Report every pipeline as a "class".
-	class = &benchrunner.CoberturaClass{
-		Name:     pipelineName,
-		Filename: pipelineRelPath,
-	}
-
-	// Calculate covered and total processors (reported as both lines and methods).
-	for idx, srcProc := range src {
-		if pstats.Processors[idx].Stats.Count > 0 {
-			linesCovered++
-		}
-		method := benchrunner.CoberturaMethod{
-			Name: srcProc.Type,
-			Hits: pstats.Processors[idx].Stats.Count,
-		}
-		for num := srcProc.FirstLine; num <= srcProc.LastLine; num++ {
-			line := &benchrunner.CoberturaLine{
-				Number: num,
-				Hits:   pstats.Processors[idx].Stats.Count,
-			}
-			class.Lines = append(class.Lines, line)
-			method.Lines = append(method.Lines, line)
-		}
-		class.Methods = append(class.Methods, &method)
-	}
-	return linesCovered, class, nil
-}
diff --git a/internal/benchrunner/runners/pipeline/test_config.go b/internal/benchrunner/runners/pipeline/test_config.go
deleted file mode 100644
index 9ec0a2530c..0000000000
--- a/internal/benchrunner/runners/pipeline/test_config.go
+++ /dev/null
@@ -1,72 +0,0 @@
-// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
-// or more contributor license agreements. Licensed under the Elastic License;
-// you may not use this file except in compliance with the Elastic License.
-
-package pipeline
-
-import (
-	"fmt"
-	"os"
-	"path/filepath"
-
-	"github.com/elastic/go-ucfg/yaml"
-	"github.com/pkg/errors"
-
-	"github.com/elastic/elastic-package/internal/benchrunner"
-)
-
-const (
-	configTestSuffixYAML = "-config.yml"
-	commonTestConfigYAML = "test-common-config.yml"
-)
-
-type testConfig struct {
-	benchrunner.SkippableConfig `config:",inline"`
-
-	Multiline     *multiline             `config:"multiline"`
-	Fields        map[string]interface{} `config:"fields"`
-	DynamicFields map[string]string      `config:"dynamic_fields"`
-
-	// NumericKeywordFields holds a list of fields that have keyword
-	// type but can be ingested as numeric type.
-	NumericKeywordFields []string `config:"numeric_keyword_fields"`
-}
-
-type multiline struct {
-	FirstLinePattern string `config:"first_line_pattern"`
-}
-
-func readConfigForTestCase(testCasePath string) (*testConfig, error) {
-	testCaseDir := filepath.Dir(testCasePath)
-	testCaseFile := filepath.Base(testCasePath)
-
-	commonConfigPath := filepath.Join(testCaseDir, commonTestConfigYAML)
-	var c testConfig
-	cfg, err := yaml.NewConfigWithFile(commonConfigPath)
-	if err != nil && !errors.Is(err, os.ErrNotExist) {
-		return nil, errors.Wrapf(err, "can't load common configuration: %s", commonConfigPath)
-	}
-
-	if err == nil {
-		if err := cfg.Unpack(&c); err != nil {
-			return nil, errors.Wrapf(err, "can't unpack test configuration: %s", commonConfigPath)
-		}
-	}
-
-	configPath := filepath.Join(testCaseDir, expectedTestConfigFile(testCaseFile, configTestSuffixYAML))
-	cfg, err = yaml.NewConfigWithFile(configPath)
-	if err != nil && !errors.Is(err, os.ErrNotExist) {
-		return nil, errors.Wrapf(err, "can't load test configuration: %s", configPath)
-	}
-
-	if err == nil {
-		if err := cfg.Unpack(&c); err != nil {
-			return nil, errors.Wrapf(err, "can't unpack test configuration: %s", configPath)
-		}
-	}
-	return &c, nil
-}
-
-func expectedTestConfigFile(testFile, configTestSuffix string) string {
-	return fmt.Sprintf("%s%s", testFile, configTestSuffix)
-}
diff --git a/internal/benchrunner/runners/pipeline/test_result.go b/internal/benchrunner/runners/pipeline/test_result.go
deleted file mode 100644
index b3760e0bb3..0000000000
--- a/internal/benchrunner/runners/pipeline/test_result.go
+++ /dev/null
@@ -1,260 +0,0 @@
-// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
-// or more contributor license agreements. Licensed under the Elastic License;
-// you may not use this file except in compliance with the Elastic License.
-
-package pipeline
-
-import (
-	"bytes"
-	"encoding/json"
-	"fmt"
-	"io"
-	"os"
-	"path/filepath"
-
-	"github.com/google/go-cmp/cmp"
-	"github.com/pkg/errors"
-	"github.com/pmezard/go-difflib/difflib"
-
-	"github.com/elastic/elastic-package/internal/benchrunner"
-	"github.com/elastic/elastic-package/internal/common"
-)
-
-const expectedTestResultSuffix = "-expected.json"
-
-type testResult struct {
-	events []json.RawMessage
-}
-
-type testResultDefinition struct {
-	Expected []json.RawMessage `json:"expected"`
-}
-
-func writeTestResult(testCasePath string, result *testResult) error {
-	testCaseDir := filepath.Dir(testCasePath)
-	testCaseFile := filepath.Base(testCasePath)
-
-	data, err := marshalTestResultDefinition(result)
-	if err != nil {
-		return errors.Wrap(err, "marshalling test result failed")
-	}
-	err = os.WriteFile(filepath.Join(testCaseDir, expectedTestResultFile(testCaseFile)), data, 0644)
-	if err != nil {
-		return errors.Wrap(err, "writing test result failed")
-	}
-	return nil
-}
-
-func compareResults(testCasePath string, config *testConfig, result *testResult) error {
-	resultsWithoutDynamicFields, err := adjustTestResult(result, config)
-	if err != nil {
-		return errors.Wrap(err, "can't adjust test results")
-	}
-
-	actual, err := marshalTestResultDefinition(resultsWithoutDynamicFields)
-	if err != nil {
-		return errors.Wrap(err, "marshalling actual test results failed")
-	}
-
-	expectedResults, err := readExpectedTestResult(testCasePath, config)
-	if err != nil {
-		return errors.Wrap(err, "reading expected test result failed")
-	}
-
-	expected, err := marshalTestResultDefinition(expectedResults)
-	if err != nil {
-		return errors.Wrap(err, "marshalling expected test results failed")
-	}
-
-	report, err := diffJson(expected, actual)
-	if err != nil {
-		return errors.Wrap(err, "comparing expected test result")
-	}
-	if report != "" {
-		return benchrunner.ErrTestCaseFailed{
-			Reason:  "Expected results are different from actual ones",
-			Details: report,
-		}
-	}
-
-	return nil
-}
-
-func compareJsonNumbers(a, b json.Number) bool {
-	if a == b {
-		// Equal literals, so they are the same.
-		return true
-	}
-	if inta, err := a.Int64(); err == nil {
-		if intb, err := b.Int64(); err == nil {
-			return inta == intb
-		}
-		if floatb, err := b.Float64(); err == nil {
-			return float64(inta) == floatb
-		}
-	} else if floata, err := a.Float64(); err == nil {
-		if intb, err := b.Int64(); err == nil {
-			return floata == float64(intb)
-		}
-		if floatb, err := b.Float64(); err == nil {
-			return floata == floatb
-		}
-	}
-	return false
-}
-
-func diffJson(want, got []byte) (string, error) {
-	var gotVal, wantVal interface{}
-	err := jsonUnmarshalUsingNumber(want, &wantVal)
-	if err != nil {
-		return "", fmt.Errorf("invalid want value: %w", err)
-	}
-	err = jsonUnmarshalUsingNumber(got, &gotVal)
-	if err != nil {
-		return "", fmt.Errorf("invalid got value: %w", err)
-	}
-	if cmp.Equal(gotVal, wantVal, cmp.Comparer(compareJsonNumbers)) {
-		return "", nil
-	}
-
-	got, err = marshalNormalizedJSON(gotVal)
-	if err != nil {
-		return "", err
-	}
-	want, err = marshalNormalizedJSON(wantVal)
-	if err != nil {
-		return "", err
-	}
-
-	var buf bytes.Buffer
-	err = difflib.WriteUnifiedDiff(&buf, difflib.UnifiedDiff{
-		A:        difflib.SplitLines(string(want)),
-		B:        difflib.SplitLines(string(got)),
-		FromFile: "want",
-		ToFile:   "got",
-		Context:  3,
-	})
-	return buf.String(), err
-}
-
-func readExpectedTestResult(testCasePath string, config *testConfig) (*testResult, error) {
-	testCaseDir := filepath.Dir(testCasePath)
-	testCaseFile := filepath.Base(testCasePath)
-
-	path := filepath.Join(testCaseDir, expectedTestResultFile(testCaseFile))
-	data, err := os.ReadFile(path)
-	if err != nil {
-		return nil, errors.Wrap(err, "reading test result file failed")
-	}
-
-	u, err := unmarshalTestResult(data)
-	if err != nil {
-		return nil, errors.Wrap(err, "unmarshalling expected test result failed")
-	}
-
-	adjusted, err := adjustTestResult(u, config)
-	if err != nil {
-		return nil, errors.Wrap(err, "adjusting test result failed")
-	}
-	return adjusted, nil
-}
-
-func adjustTestResult(result *testResult, config *testConfig) (*testResult, error) {
-	if config == nil || config.DynamicFields == nil {
-		return result, nil
-	}
-
-	// Strip dynamic fields from test result
-	var stripped testResult
-	for _, event := range result.events {
-		if event == nil {
-			stripped.events = append(stripped.events, nil)
-			continue
-		}
-
-		var m common.MapStr
-		err := jsonUnmarshalUsingNumber(event, &m)
-		if err != nil {
-			return nil, errors.Wrapf(err, "can't unmarshal event: %s", string(event))
-		}
-
-		for key := range config.DynamicFields {
-			err := m.Delete(key)
-			if err != nil && err != common.ErrKeyNotFound {
-				return nil, errors.Wrap(err, "can't remove dynamic field")
-			}
-		}
-
-		b, err := json.Marshal(&m)
-		if err != nil {
-			return nil, errors.Wrap(err, "can't marshal event")
-		}
-
-		stripped.events = append(stripped.events, b)
-	}
-	return &stripped, nil
-}
-
-func unmarshalTestResult(body []byte) (*testResult, error) {
-	var trd testResultDefinition
-	err := jsonUnmarshalUsingNumber(body, &trd)
-	if err != nil {
-		return nil, errors.Wrap(err, "unmarshalling test result failed")
-	}
-
-	var tr testResult
-	tr.events = append(tr.events, trd.Expected...)
-	return &tr, nil
-}
-
-// jsonUnmarshalUsingNumber is a drop-in replacement for json.Unmarshal that
-// does not default to unmarshaling numeric values to float64 in order to
-// prevent low bit truncation of values greater than 1<<53.
-// See https://golang.org/cl/6202068 for details.
-func jsonUnmarshalUsingNumber(data []byte, v interface{}) error {
-	dec := json.NewDecoder(bytes.NewReader(data))
-	dec.UseNumber()
-	err := dec.Decode(v)
-	if err != nil {
-		if err == io.EOF {
-			return errors.New("unexpected end of JSON input")
-		}
-		return err
-	}
-	// Make sure there is no more data after the message
-	// to approximate json.Unmarshal's behaviour.
-	if dec.More() {
-		return fmt.Errorf("more data after top-level value")
-	}
-	return nil
-}
-
-func marshalTestResultDefinition(result *testResult) ([]byte, error) {
-	var trd testResultDefinition
-	trd.Expected = result.events
-	body, err := marshalNormalizedJSON(trd)
-	if err != nil {
-		return nil, errors.Wrap(err, "marshalling test result definition failed")
-	}
-	return body, nil
-}
-
-// marshalNormalizedJSON marshals test results ensuring that field
-// order remains consistent independent of field order returned by
-// ES to minimize diff noise during changes.
-func marshalNormalizedJSON(v interface{}) ([]byte, error) {
-	msg, err := json.Marshal(v)
-	if err != nil {
-		return msg, err
-	}
-	var obj interface{}
-	err = jsonUnmarshalUsingNumber(msg, &obj)
-	if err != nil {
-		return msg, err
-	}
-	return json.MarshalIndent(obj, "", "    ")
-}
-
-func expectedTestResultFile(testFile string) string {
-	return fmt.Sprintf("%s%s", testFile, expectedTestResultSuffix)
-}
diff --git a/internal/benchrunner/runners/pipeline/test_result_test.go b/internal/benchrunner/runners/pipeline/test_result_test.go
deleted file mode 100644
index 4f57135938..0000000000
--- a/internal/benchrunner/runners/pipeline/test_result_test.go
+++ /dev/null
@@ -1,45 +0,0 @@
-// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
-// or more contributor license agreements. Licensed under the Elastic License;
-// you may not use this file except in compliance with the Elastic License.
-
-package pipeline
-
-import (
-	"encoding/json"
-	"testing"
-
-	"github.com/stretchr/testify/assert"
-)
-
-func TestCompareJsonNumber(t *testing.T) {
-	cases := []struct {
-		want  json.Number
-		got   json.Number
-		equal bool
-	}{
-		{"0", "0", true},
-		{"0.0", "0", true},
-		{"0", "0.0", true},
-		{"42", "42", true},
-		{"42.0", "42", true},
-		{"42", "42.0", true},
-		{"0.42", "0.42", true},
-		{"-10", "-10", true},
-		{"-10.0", "-10", true},
-		{"6920071768563516000", "6920071768563516000", true},
-		{"6920071768563516847", "6920071768563516847", true},
-		{"1624617166.182", "1.624617166182E9", true},
-
-		{"0", "1", false},
-		{"0.1", "0", false},
-		{"6920071768563516000", "6920071768563516847", false},
-		{"1624617166.182", "1.624617166181E9", false},
-	}
-
-	for _, c := range cases {
-		t.Run(c.want.String()+" == "+c.got.String(), func(t *testing.T) {
-			equal := compareJsonNumbers(c.want, c.got)
-			assert.Equal(t, c.equal, equal)
-		})
-	}
-}
diff --git a/internal/benchrunner/test_config.go b/internal/benchrunner/test_config.go
deleted file mode 100644
index 8f57dee91e..0000000000
--- a/internal/benchrunner/test_config.go
+++ /dev/null
@@ -1,30 +0,0 @@
-// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
-// or more contributor license agreements. Licensed under the Elastic License;
-// you may not use this file except in compliance with the Elastic License.
-
-package benchrunner
-
-import (
-	"fmt"
-	"net/url"
-)
-
-// SkipConfig allows a test to be marked as skipped
-type SkipConfig struct {
-	// Reason is the short reason for why this test should be skipped.
-	Reason string `config:"reason"`
-
-	// Link is a URL where more details about the skipped test can be found.
-	Link url.URL `config:"url"`
-}
-
-func (s SkipConfig) String() string {
-	return fmt.Sprintf("%s [%s]", s.Reason, s.Link.String())
-}
-
-// SkippableConfig is a test configuration that allows skipping. This
-// struct is intended for embedding in concrete test configuration structs.
-type SkippableConfig struct {
-	// Skip allows this test to be skipped.
-	Skip *SkipConfig `config:"skip"`
-}

From 26ba4cc7fc01588a0ab682c0c350cea10f8a8064 Mon Sep 17 00:00:00 2001
From: Marc Guasch <marc.guasch@elastic.co>
Date: Mon, 5 Sep 2022 10:21:17 +0200
Subject: [PATCH 10/20] Benchmark runner reporting

---
 internal/benchrunner/report_format.go         |  14 +-
 internal/benchrunner/report_output.go         |  23 +--
 .../benchrunner/reporters/formats/human.go    |  60 +-------
 .../benchrunner/reporters/formats/xunit.go    | 136 +-----------------
 .../benchrunner/reporters/outputs/file.go     |  22 +--
 .../benchrunner/reporters/outputs/stdout.go   |  13 +-
 6 files changed, 35 insertions(+), 233 deletions(-)

diff --git a/internal/benchrunner/report_format.go b/internal/benchrunner/report_format.go
index 3ec489a0a6..b366a36206 100644
--- a/internal/benchrunner/report_format.go
+++ b/internal/benchrunner/report_format.go
@@ -6,24 +6,24 @@ package benchrunner
 
 import "fmt"
 
-// TestReportFormat represents a test report format
-type TestReportFormat string
+// BenchReportFormat represents a test report format
+type BenchReportFormat string
 
 // ReportFormatFunc defines the report formatter function.
-type ReportFormatFunc func(results []TestResult) (string, []string, error)
+type ReportFormatFunc func(results []BenchResult) ([]string, error)
 
-var reportFormatters = map[TestReportFormat]ReportFormatFunc{}
+var reportFormatters = map[BenchReportFormat]ReportFormatFunc{}
 
 // RegisterReporterFormat registers a test report formatter.
-func RegisterReporterFormat(name TestReportFormat, formatFunc ReportFormatFunc) {
+func RegisterReporterFormat(name BenchReportFormat, formatFunc ReportFormatFunc) {
 	reportFormatters[name] = formatFunc
 }
 
 // FormatReport delegates formatting of test results to the registered test report formatter.
-func FormatReport(name TestReportFormat, results []TestResult) (testReport string, benchmarkReports []string, err error) {
+func FormatReport(name BenchReportFormat, results []BenchResult) (benchmarkReports []string, err error) {
 	reportFunc, defined := reportFormatters[name]
 	if !defined {
-		return "", nil, fmt.Errorf("unregistered test report format: %s", name)
+		return nil, fmt.Errorf("unregistered test report format: %s", name)
 	}
 
 	return reportFunc(results)
diff --git a/internal/benchrunner/report_output.go b/internal/benchrunner/report_output.go
index 3155c4e3a7..87745bf480 100644
--- a/internal/benchrunner/report_output.go
+++ b/internal/benchrunner/report_output.go
@@ -8,33 +8,24 @@ import (
 	"fmt"
 )
 
-// TestReportOutput represents an output for a test report
-type TestReportOutput string
-
-// TestReportType represents a test report type (test, benchmark)
-type TestReportType string
-
-const (
-	ReportTypeTest  TestReportType = "test"
-	ReportTypeBench TestReportType = "bench"
-)
+// BenchReportOutput represents an output for a test report
+type BenchReportOutput string
 
 // ReportOutputFunc defines the report writer function.
-type ReportOutputFunc func(pkg, report string, format TestReportFormat, ttype TestReportType) error
+type ReportOutputFunc func(pkg, report string, format BenchReportFormat) error
 
-var reportOutputs = map[TestReportOutput]ReportOutputFunc{}
+var reportOutputs = map[BenchReportOutput]ReportOutputFunc{}
 
 // RegisterReporterOutput registers a test report output.
-func RegisterReporterOutput(name TestReportOutput, outputFunc ReportOutputFunc) {
+func RegisterReporterOutput(name BenchReportOutput, outputFunc ReportOutputFunc) {
 	reportOutputs[name] = outputFunc
 }
 
 // WriteReport delegates writing of test results to the registered test report output
-func WriteReport(pkg string, name TestReportOutput, report string, format TestReportFormat, ttype TestReportType) error {
+func WriteReport(pkg string, name BenchReportOutput, report string, format BenchReportFormat) error {
 	outputFunc, defined := reportOutputs[name]
 	if !defined {
 		return fmt.Errorf("unregistered test report output: %s", name)
 	}
-
-	return outputFunc(pkg, report, format, ttype)
+	return outputFunc(pkg, report, format)
 }
diff --git a/internal/benchrunner/reporters/formats/human.go b/internal/benchrunner/reporters/formats/human.go
index f493ee04c1..ba1b404d68 100644
--- a/internal/benchrunner/reporters/formats/human.go
+++ b/internal/benchrunner/reporters/formats/human.go
@@ -5,7 +5,6 @@
 package formats
 
 import (
-	"fmt"
 	"strings"
 
 	"github.com/jedib0t/go-pretty/table"
@@ -20,12 +19,12 @@ func init() {
 
 const (
 	// ReportFormatHuman reports test results in a human-readable format
-	ReportFormatHuman benchrunner.TestReportFormat = "human"
+	ReportFormatHuman benchrunner.BenchReportFormat = "human"
 )
 
-func reportHumanFormat(results []benchrunner.TestResult) (string, []string, error) {
+func reportHumanFormat(results []benchrunner.BenchResult) ([]string, error) {
 	if len(results) == 0 {
-		return "No test results", nil, nil
+		return nil, nil
 	}
 
 	var benchmarks []benchrunner.BenchmarkResult
@@ -35,60 +34,11 @@ func reportHumanFormat(results []benchrunner.TestResult) (string, []string, erro
 		}
 	}
 
-	testFmtd, err := reportHumanFormatTest(results)
-	if err != nil {
-		return "", nil, err
-	}
 	benchFmtd, err := reportHumanFormatBenchmark(benchmarks)
 	if err != nil {
-		return "", nil, err
-	}
-	return testFmtd, benchFmtd, nil
-}
-
-func reportHumanFormatTest(results []benchrunner.TestResult) (string, error) {
-	var report strings.Builder
-
-	headerPrinted := false
-	for _, r := range results {
-		if r.FailureMsg == "" {
-			continue
-		}
-
-		if !headerPrinted {
-			report.WriteString("FAILURE DETAILS:\n")
-			headerPrinted = true
-		}
-
-		detail := fmt.Sprintf("%s/%s %s:\n%s\n", r.Package, r.DataStream, r.Name, r.FailureDetails)
-		report.WriteString(detail)
+		return nil, err
 	}
-	if headerPrinted {
-		report.WriteString("\n\n")
-	}
-
-	t := table.NewWriter()
-	t.AppendHeader(table.Row{"Package", "Data stream", "Test type", "Test name", "Result", "Time elapsed"})
-
-	for _, r := range results {
-		var result string
-		if r.ErrorMsg != "" {
-			result = fmt.Sprintf("ERROR: %s", r.ErrorMsg)
-		} else if r.FailureMsg != "" {
-			result = fmt.Sprintf("FAIL: %s", r.FailureMsg)
-		} else if r.Skipped != nil {
-			result = r.Skipped.String()
-		} else {
-			result = "PASS"
-		}
-
-		t.AppendRow(table.Row{r.Package, r.DataStream, r.TestType, r.Name, result, r.TimeElapsed})
-	}
-
-	t.SetStyle(table.StyleRounded)
-
-	report.WriteString(t.Render())
-	return report.String(), nil
+	return benchFmtd, nil
 }
 
 func reportHumanFormatBenchmark(benchmarks []benchrunner.BenchmarkResult) ([]string, error) {
diff --git a/internal/benchrunner/reporters/formats/xunit.go b/internal/benchrunner/reporters/formats/xunit.go
index fbcfa4512a..7fa9fc8a12 100644
--- a/internal/benchrunner/reporters/formats/xunit.go
+++ b/internal/benchrunner/reporters/formats/xunit.go
@@ -6,7 +6,6 @@ package formats
 
 import (
 	"encoding/xml"
-	"fmt"
 
 	"github.com/pkg/errors"
 
@@ -19,147 +18,22 @@ func init() {
 
 const (
 	// ReportFormatXUnit reports test results in the xUnit format
-	ReportFormatXUnit benchrunner.TestReportFormat = "xUnit"
+	ReportFormatXUnit benchrunner.BenchReportFormat = "xUnit"
 )
 
-type testSuites struct {
-	XMLName xml.Name    `xml:"testsuites"`
-	Suites  []testSuite `xml:"testsuite"`
-}
-type testSuite struct {
-	Comment string `xml:",comment"`
-
-	Name        string `xml:"name,attr"`
-	NumTests    int    `xml:"tests,attr,omitempty"`
-	NumFailures int    `xml:"failures,attr,omitempty"`
-	NumErrors   int    `xml:"errors,attr,omitempty"`
-	NumSkipped  int    `xml:"skipped,attr,omitempty"`
-
-	Suites []testSuite `xml:"testsuite,omitempty"`
-	Cases  []testCase  `xml:"testcase,omitempty"`
-}
-type testCase struct {
-	Name          string  `xml:"name,attr"`
-	ClassName     string  `xml:"classname,attr"`
-	TimeInSeconds float64 `xml:"time,attr"`
-
-	Error   string   `xml:"error,omitempty"`
-	Failure string   `xml:"failure,omitempty"`
-	Skipped *skipped `xml:"skipped,omitempty"`
-}
-
-type skipped struct {
-	Message string `xml:"message,attr"`
-}
-
-func reportXUnitFormat(results []benchrunner.TestResult) (string, []string, error) {
+func reportXUnitFormat(results []benchrunner.BenchResult) ([]string, error) {
 	var benchmarks []benchrunner.BenchmarkResult
 	for _, r := range results {
 		if r.Benchmark != nil {
 			benchmarks = append(benchmarks, *r.Benchmark)
 		}
 	}
-	testFmtd, err := reportXUnitFormatTest(results)
-	if err != nil {
-		return "", nil, err
-	}
-	benchFmtd, err := reportXUnitFormatBenchmark(benchmarks)
-	if err != nil {
-		return "", nil, err
-	}
-	return testFmtd, benchFmtd, nil
-}
-
-func reportXUnitFormatTest(results []benchrunner.TestResult) (string, error) {
-	// test type => package => data stream => test cases
-	tests := map[string]map[string]map[string][]testCase{}
-
-	var numTests, numFailures, numErrors, numSkipped int
-	for _, r := range results {
-		testType := string(r.TestType)
-		if _, exists := tests[testType]; !exists {
-			tests[testType] = map[string]map[string][]testCase{}
-		}
-
-		if _, exists := tests[testType][r.Package]; !exists {
-			tests[testType][r.Package] = map[string][]testCase{}
-		}
-
-		if _, exists := tests[testType][r.Package][r.DataStream]; !exists {
-			tests[testType][r.Package][r.DataStream] = make([]testCase, 0)
-		}
-
-		var failure string
-		if r.FailureMsg != "" {
-			failure = r.FailureMsg
-			numFailures++
-		}
-
-		if r.FailureDetails != "" {
-			failure += ": " + r.FailureDetails
-		}
-
-		if r.ErrorMsg != "" {
-			numErrors++
-		}
-
-		if r.Skipped != nil {
-			numSkipped++
-		}
-
-		name := fmt.Sprintf("%s test", r.TestType)
-		if r.Name != "" {
-			name += ": " + r.Name
-		}
-
-		c := testCase{
-			Name:          name,
-			ClassName:     fmt.Sprintf("%s.%s", r.Package, r.DataStream),
-			TimeInSeconds: r.TimeElapsed.Seconds(),
-			Error:         r.ErrorMsg,
-			Failure:       failure,
-		}
-
-		if r.Skipped != nil {
-			c.Skipped = &skipped{r.Skipped.String()}
-		}
-
-		numTests++
-
-		tests[testType][r.Package][r.DataStream] = append(tests[testType][r.Package][r.DataStream], c)
-	}
-
-	var ts testSuites
-	ts.Suites = make([]testSuite, 0)
-
-	for testType, packages := range tests {
-		testTypeSuite := testSuite{
-			Comment: fmt.Sprintf("test suite for %s tests", testType),
-			Name:    testType,
-
-			NumTests:    numTests,
-			NumFailures: numFailures,
-			NumErrors:   numErrors,
-			NumSkipped:  numSkipped,
 
-			Cases: make([]testCase, 0),
-		}
-
-		for _, pkg := range packages {
-			for _, ds := range pkg {
-				testTypeSuite.Cases = append(testTypeSuite.Cases, ds...)
-			}
-		}
-
-		ts.Suites = append(ts.Suites, testTypeSuite)
-	}
-
-	out, err := xml.MarshalIndent(&ts, "", "  ")
+	benchFmtd, err := reportXUnitFormatBenchmark(benchmarks)
 	if err != nil {
-		return "", errors.Wrap(err, "unable to format test results as xUnit")
+		return nil, err
 	}
-
-	return xml.Header + string(out), nil
+	return benchFmtd, nil
 }
 
 func reportXUnitFormatBenchmark(benchmarks []benchrunner.BenchmarkResult) ([]string, error) {
diff --git a/internal/benchrunner/reporters/outputs/file.go b/internal/benchrunner/reporters/outputs/file.go
index 1294c76348..ae9ddff2a7 100644
--- a/internal/benchrunner/reporters/outputs/file.go
+++ b/internal/benchrunner/reporters/outputs/file.go
@@ -23,11 +23,11 @@ func init() {
 
 const (
 	// ReportOutputFile reports test results to files in a folder
-	ReportOutputFile benchrunner.TestReportOutput = "file"
+	ReportOutputFile benchrunner.BenchReportOutput = "file"
 )
 
-func reportToFile(pkg, report string, format benchrunner.TestReportFormat, ttype benchrunner.TestReportType) error {
-	dest, err := reportsDir(ttype)
+func reportToFile(pkg, report string, format benchrunner.BenchReportFormat) error {
+	dest, err := reportsDir()
 	if err != nil {
 		return errors.Wrap(err, "could not determine test reports folder")
 	}
@@ -36,7 +36,7 @@ func reportToFile(pkg, report string, format benchrunner.TestReportFormat, ttype
 	_, err = os.Stat(dest)
 	if err != nil && errors.Is(err, os.ErrNotExist) {
 		if err := os.MkdirAll(dest, 0755); err != nil {
-			return errors.Wrapf(err, "could not create %s reports folder", ttype)
+			return errors.Wrapf(err, "could not create benchmark reports folder")
 		}
 	}
 
@@ -48,26 +48,18 @@ func reportToFile(pkg, report string, format benchrunner.TestReportFormat, ttype
 	filePath := filepath.Join(dest, fileName)
 
 	if err := os.WriteFile(filePath, []byte(report+"\n"), 0644); err != nil {
-		return errors.Wrapf(err, "could not write %s report file", ttype)
+		return errors.Wrapf(err, "could not write benchmark report file")
 	}
 
 	return nil
 }
 
 // reportsDir returns the location of the directory to store reports.
-func reportsDir(ttype benchrunner.TestReportType) (string, error) {
+func reportsDir() (string, error) {
 	buildDir, err := builder.BuildDirectory()
 	if err != nil {
 		return "", errors.Wrap(err, "locating build directory failed")
 	}
-	var folder string
-	switch ttype {
-	case benchrunner.ReportTypeTest:
-		folder = "test-results"
-	case benchrunner.ReportTypeBench:
-		folder = "benchmark-results"
-	default:
-		return "", fmt.Errorf("unsupported report type: %s", ttype)
-	}
+	const folder = "benchmark-results"
 	return filepath.Join(buildDir, folder), nil
 }
diff --git a/internal/benchrunner/reporters/outputs/stdout.go b/internal/benchrunner/reporters/outputs/stdout.go
index 442023b82f..3f1874fdc6 100644
--- a/internal/benchrunner/reporters/outputs/stdout.go
+++ b/internal/benchrunner/reporters/outputs/stdout.go
@@ -16,18 +16,13 @@ func init() {
 
 const (
 	// ReportOutputSTDOUT reports test results to STDOUT
-	ReportOutputSTDOUT benchrunner.TestReportOutput = "stdout"
+	ReportOutputSTDOUT benchrunner.BenchReportOutput = "stdout"
 )
 
-func reportToSTDOUT(pkg, report string, _ benchrunner.TestReportFormat, ttype benchrunner.TestReportType) error {
-	reportType := "Test"
-	if ttype == benchrunner.ReportTypeBench {
-		reportType = "Benchmark"
-	}
-	fmt.Printf("--- %s results for package: %s - START ---\n", reportType, pkg)
+func reportToSTDOUT(pkg, report string, _ benchrunner.BenchReportFormat) error {
+	fmt.Printf("--- Benchmark results for package: %s - START ---\n", pkg)
 	fmt.Println(report)
-	fmt.Printf("--- %s results for package: %s - END   ---\n", reportType, pkg)
+	fmt.Printf("--- Benchmark results for package: %s - END   ---\n", pkg)
 	fmt.Println("Done")
-
 	return nil
 }

From 254a2620966e01dac62fcc58f7a7b6eaa27aeb40 Mon Sep 17 00:00:00 2001
From: Marc Guasch <marc.guasch@elastic.co>
Date: Mon, 5 Sep 2022 13:31:45 +0200
Subject: [PATCH 11/20] Make benchmarks have a dedicated _dev config folder

---
 cmd/benchrunner.go                            | 155 +++-----
 internal/benchrunner/benchmark.go             |  13 -
 internal/benchrunner/benchrunner.go           | 235 ++++--------
 internal/benchrunner/errors.go                |  18 -
 internal/benchrunner/report_format.go         |  12 +-
 internal/benchrunner/report_output.go         |   8 +-
 .../benchrunner/reporters/formats/human.go    |   8 +-
 .../benchrunner/reporters/formats/xunit.go    |  18 +-
 .../benchrunner/reporters/outputs/file.go     |   6 +-
 .../benchrunner/reporters/outputs/stdout.go   |   2 +-
 .../benchrunner/runners/pipeline/benchmark.go | 122 +-----
 .../benchrunner/runners/pipeline/config.go    |  38 ++
 .../benchrunner/runners/pipeline/runner.go    | 357 +++---------------
 .../runners/pipeline/runner_test.go           |  32 --
 .../benchrunner/runners/pipeline/test_case.go |  85 ++---
 internal/benchrunner/runners/runners.go       |   5 +-
 internal/cobraext/flags.go                    |   6 -
 17 files changed, 292 insertions(+), 828 deletions(-)
 delete mode 100644 internal/benchrunner/errors.go
 create mode 100644 internal/benchrunner/runners/pipeline/config.go

diff --git a/cmd/benchrunner.go b/cmd/benchrunner.go
index bae3b3875b..793d77f176 100644
--- a/cmd/benchrunner.go
+++ b/cmd/benchrunner.go
@@ -6,9 +6,7 @@ package cmd
 
 import (
 	"fmt"
-	"path/filepath"
 	"strings"
-	"time"
 
 	"github.com/pkg/errors"
 	"github.com/spf13/cobra"
@@ -16,7 +14,7 @@ import (
 	"github.com/elastic/elastic-package/internal/benchrunner"
 	"github.com/elastic/elastic-package/internal/benchrunner/reporters/formats"
 	"github.com/elastic/elastic-package/internal/benchrunner/reporters/outputs"
-	_ "github.com/elastic/elastic-package/internal/benchrunner/runners" // register all test runners
+	_ "github.com/elastic/elastic-package/internal/benchrunner/runners" // register all benchmark runners
 	"github.com/elastic/elastic-package/internal/cobraext"
 	"github.com/elastic/elastic-package/internal/common"
 	"github.com/elastic/elastic-package/internal/elasticsearch"
@@ -29,7 +27,7 @@ const benchLongDescription = `Use this command to run benchmarks on a package. C
 #### Pipeline Benchmarks
 These benchmarks allow you to benchmark any Ingest Node Pipelines defined by your packages.
 
-For details on how to configure pipeline test for a package, review the [HOWTO guide](https://github.com/elastic/elastic-package/blob/main/docs/howto/pipeline_benchmarks.md).`
+For details on how to configure pipeline benchmarks for a package, review the [HOWTO guide](https://github.com/elastic/elastic-package/blob/main/docs/howto/pipeline_benchmarks.md).`
 
 func setupBenchmarkCommand() *cobraext.Command {
 	var benchTypeCmdActions []cobraext.CommandAction
@@ -49,16 +47,10 @@ func setupBenchmarkCommand() *cobraext.Command {
 		}}
 
 	cmd.PersistentFlags().BoolP(cobraext.FailOnMissingFlagName, "m", false, cobraext.FailOnMissingFlagDescription)
-	cmd.PersistentFlags().BoolP(cobraext.GenerateTestResultFlagName, "g", false, cobraext.GenerateTestResultFlagDescription)
 	cmd.PersistentFlags().StringP(cobraext.ReportFormatFlagName, "", string(formats.ReportFormatHuman), cobraext.ReportFormatFlagDescription)
 	cmd.PersistentFlags().StringP(cobraext.ReportOutputFlagName, "", string(outputs.ReportOutputSTDOUT), cobraext.ReportOutputFlagDescription)
-	cmd.PersistentFlags().BoolP(cobraext.TestCoverageFlagName, "", false, cobraext.TestCoverageFlagDescription)
-	cmd.PersistentFlags().IntP(cobraext.TestBenchCountFlagName, "", 1000, cobraext.TestBenchCountFlagDescription)
-	cmd.PersistentFlags().DurationP(cobraext.TestPerfDurationFlagName, "", time.Duration(0), cobraext.TestPerfDurationFlagDescription)
-	cmd.PersistentFlags().DurationP(cobraext.DeferCleanupFlagName, "", 0, cobraext.DeferCleanupFlagDescription)
-	cmd.PersistentFlags().String(cobraext.VariantFlagName, "", cobraext.VariantFlagDescription)
 
-	for benchType, runner := range benchrunner.TestRunners() {
+	for benchType, runner := range benchrunner.BenchRunners() {
 		action := benchTypeCommandActionFactory(runner)
 		benchTypeCmdActions = append(benchTypeCmdActions, action)
 
@@ -69,9 +61,7 @@ func setupBenchmarkCommand() *cobraext.Command {
 			RunE:  action,
 		}
 
-		if runner.CanRunPerDataStream() {
-			benchTypeCmd.Flags().StringSliceP(cobraext.DataStreamsFlagName, "d", nil, cobraext.DataStreamsFlagDescription)
-		}
+		benchTypeCmd.Flags().StringSliceP(cobraext.DataStreamsFlagName, "d", nil, cobraext.DataStreamsFlagDescription)
 
 		cmd.AddCommand(benchTypeCmd)
 	}
@@ -79,21 +69,16 @@ func setupBenchmarkCommand() *cobraext.Command {
 	return cobraext.NewCommand(cmd, cobraext.ContextPackage)
 }
 
-func benchTypeCommandActionFactory(runner benchrunner.TestRunner) cobraext.CommandAction {
+func benchTypeCommandActionFactory(runner benchrunner.BenchRunner) cobraext.CommandAction {
 	benchType := runner.Type()
 	return func(cmd *cobra.Command, args []string) error {
-		cmd.Printf("Run %s tests for the package\n", benchType)
+		cmd.Printf("Run %s benchmarks for the package\n", benchType)
 
 		failOnMissing, err := cmd.Flags().GetBool(cobraext.FailOnMissingFlagName)
 		if err != nil {
 			return cobraext.FlagParsingError(err, cobraext.FailOnMissingFlagName)
 		}
 
-		generateTestResult, err := cmd.Flags().GetBool(cobraext.GenerateTestResultFlagName)
-		if err != nil {
-			return cobraext.FlagParsingError(err, cobraext.GenerateTestResultFlagName)
-		}
-
 		reportFormat, err := cmd.Flags().GetString(cobraext.ReportFormatFlagName)
 		if err != nil {
 			return cobraext.FlagParsingError(err, cobraext.ReportFormatFlagName)
@@ -104,21 +89,6 @@ func benchTypeCommandActionFactory(runner benchrunner.TestRunner) cobraext.Comma
 			return cobraext.FlagParsingError(err, cobraext.ReportOutputFlagName)
 		}
 
-		testCoverage, err := cmd.Flags().GetBool(cobraext.TestCoverageFlagName)
-		if err != nil {
-			return cobraext.FlagParsingError(err, cobraext.TestCoverageFlagName)
-		}
-
-		testBenchCount, err := cmd.Flags().GetInt(cobraext.TestBenchCountFlagName)
-		if err != nil {
-			return cobraext.FlagParsingError(err, cobraext.TestBenchCountFlagName)
-		}
-
-		testBenchDur, err := cmd.Flags().GetDuration(cobraext.TestPerfDurationFlagName)
-		if err != nil {
-			return cobraext.FlagParsingError(err, cobraext.TestBenchCountFlagDescription)
-		}
-
 		packageRootPath, found, err := packages.FindPackageRoot()
 		if !found {
 			return errors.New("package root not found")
@@ -129,91 +99,60 @@ func benchTypeCommandActionFactory(runner benchrunner.TestRunner) cobraext.Comma
 
 		signal.Enable()
 
-		var testFolders []benchrunner.TestFolder
-		if runner.CanRunPerDataStream() {
-			var dataStreams []string
-			// We check for the existence of the data streams flag before trying to
-			// parse it because if the root test command is run instead of one of the
-			// subcommands of test, the data streams flag will not be defined.
-			if cmd.Flags().Lookup(cobraext.DataStreamsFlagName) != nil {
-				dataStreams, err = cmd.Flags().GetStringSlice(cobraext.DataStreamsFlagName)
-				common.TrimStringSlice(dataStreams)
-				if err != nil {
-					return cobraext.FlagParsingError(err, cobraext.DataStreamsFlagName)
-				}
-
-				err = validateDataStreamsFlag(packageRootPath, dataStreams)
-				if err != nil {
-					return cobraext.FlagParsingError(err, cobraext.DataStreamsFlagName)
-				}
-			}
-
-			if runner.TestFolderRequired() {
-				testFolders, err = benchrunner.FindTestFolders(packageRootPath, dataStreams, benchType)
-				if err != nil {
-					return errors.Wrap(err, "unable to determine test folder paths")
-				}
-			} else {
-				testFolders, err = benchrunner.AssumeTestFolders(packageRootPath, dataStreams, benchType)
-				if err != nil {
-					return errors.Wrap(err, "unable to assume test folder paths")
-				}
+		var benchFolders []benchrunner.BenchmarkFolder
+		var dataStreams []string
+		// We check for the existence of the data streams flag before trying to
+		// parse it because if the root benchmark command is run instead of one of the
+		// subcommands of benchmark, the data streams flag will not be defined.
+		if cmd.Flags().Lookup(cobraext.DataStreamsFlagName) != nil {
+			dataStreams, err = cmd.Flags().GetStringSlice(cobraext.DataStreamsFlagName)
+			common.TrimStringSlice(dataStreams)
+			if err != nil {
+				return cobraext.FlagParsingError(err, cobraext.DataStreamsFlagName)
 			}
 
-			if failOnMissing && len(testFolders) == 0 {
-				if len(dataStreams) > 0 {
-					return fmt.Errorf("no %s tests found for %s data stream(s)", benchType, strings.Join(dataStreams, ","))
-				}
-				return fmt.Errorf("no %s tests found", benchType)
-			}
-		} else {
-			_, pkg := filepath.Split(packageRootPath)
-			testFolders = []benchrunner.TestFolder{
-				{
-					Package: pkg,
-				},
+			err = validateDataStreamsFlag(packageRootPath, dataStreams)
+			if err != nil {
+				return cobraext.FlagParsingError(err, cobraext.DataStreamsFlagName)
 			}
 		}
 
-		deferCleanup, err := cmd.Flags().GetDuration(cobraext.DeferCleanupFlagName)
+		benchFolders, err = benchrunner.FindBenchmarkFolders(packageRootPath, dataStreams, benchType)
 		if err != nil {
-			return cobraext.FlagParsingError(err, cobraext.DeferCleanupFlagName)
+			return errors.Wrap(err, "unable to determine benchmark folder paths")
 		}
 
-		variantFlag, _ := cmd.Flags().GetString(cobraext.VariantFlagName)
+		if failOnMissing && len(benchFolders) == 0 {
+			if len(dataStreams) > 0 {
+				return fmt.Errorf("no %s benchmarks found for %s data stream(s)", benchType, strings.Join(dataStreams, ","))
+			}
+			return fmt.Errorf("no %s benchmarks found", benchType)
+		}
 
 		esClient, err := elasticsearch.Client()
 		if err != nil {
 			return errors.Wrap(err, "can't create Elasticsearch client")
 		}
 
-		var results []benchrunner.TestResult
-		for _, folder := range testFolders {
-			r, err := benchrunner.Run(benchType, benchrunner.TestOptions{
-				TestFolder:         folder,
-				PackageRootPath:    packageRootPath,
-				GenerateTestResult: generateTestResult,
-				API:                esClient.API,
-				DeferCleanup:       deferCleanup,
-				ServiceVariant:     variantFlag,
-				WithCoverage:       testCoverage,
-				Benchmark: benchrunner.BenchmarkConfig{
-					NumDocs:  testBenchCount,
-					Duration: testBenchDur,
-				},
+		var results []*benchrunner.Result
+		for _, folder := range benchFolders {
+			r, err := benchrunner.Run(benchType, benchrunner.BenchOptions{
+				BenchmarkFolder: folder,
+				PackageRootPath: packageRootPath,
+				API:             esClient.API,
 			})
 
-			results = append(results, r...)
-
 			if err != nil {
-				return errors.Wrapf(err, "error running package %s tests", benchType)
+				return errors.Wrapf(err, "error running package %s benchmarks", benchType)
 			}
+
+			results = append(results, r)
 		}
 
-		format := benchrunner.TestReportFormat(reportFormat)
-		testReport, benchReports, err := benchrunner.FormatReport(format, results)
+		format := benchrunner.BenchReportFormat(reportFormat)
+		benchReports, err := benchrunner.FormatReport(format, results)
 		if err != nil {
-			return errors.Wrap(err, "error formatting test report")
+			return errors.Wrap(err, "error formatting benchmark report")
 		}
 
 		m, err := packages.ReadPackageManifestFromPackageRoot(packageRootPath)
@@ -221,26 +160,16 @@ func benchTypeCommandActionFactory(runner benchrunner.TestRunner) cobraext.Comma
 			return errors.Wrapf(err, "reading package manifest failed (path: %s)", packageRootPath)
 		}
 
-		if err := benchrunner.WriteReport(m.Name, benchrunner.TestReportOutput(reportOutput), testReport, format, benchrunner.ReportTypeTest); err != nil {
-			return errors.Wrap(err, "error writing test report")
-		}
-
 		for idx, report := range benchReports {
-			if err := benchrunner.WriteReport(fmt.Sprintf("%s-%d", m.Name, idx+1), benchrunner.TestReportOutput(reportOutput), report, format, benchrunner.ReportTypeBench); err != nil {
+			if err := benchrunner.WriteReport(fmt.Sprintf("%s-%d", m.Name, idx+1), benchrunner.BenchReportOutput(reportOutput), report, format); err != nil {
 				return errors.Wrap(err, "error writing benchmark report")
 			}
 		}
-		if testCoverage {
-			err := benchrunner.WriteCoverage(packageRootPath, m.Name, runner.Type(), results)
-			if err != nil {
-				return errors.Wrap(err, "error writing test coverage")
-			}
-		}
 
 		// Check if there is any error or failure reported
 		for _, r := range results {
-			if r.ErrorMsg != "" || r.FailureMsg != "" {
-				return errors.New("one or more test cases failed")
+			if r.ErrorMsg != "" {
+				return fmt.Errorf("one or more benchmarks failed: %v", r.ErrorMsg)
 			}
 		}
 		return nil
diff --git a/internal/benchrunner/benchmark.go b/internal/benchrunner/benchmark.go
index da5f8d15d6..ee7ecbbfac 100644
--- a/internal/benchrunner/benchmark.go
+++ b/internal/benchrunner/benchmark.go
@@ -6,21 +6,8 @@ package benchrunner
 
 import (
 	"fmt"
-	"time"
 )
 
-// BenchmarkConfig is the configuration used for benchmarks.
-type BenchmarkConfig struct {
-	// Enabled controls if benchmarks are run.
-	Enabled bool
-
-	// NumDocs is the number of documents to be used during benchmark.
-	NumDocs int
-
-	// Duration is the optional benchmark duration.
-	Duration time.Duration
-}
-
 // BenchmarkResult represents the result of a benchmark run.
 // This is modeled after the xUnit benchmark schema.
 // See https://github.com/Autodesk/jenkinsci-benchmark-plugin/blob/master/doc/EXAMPLE_SCHEMA_XML_DEFAULT.md
diff --git a/internal/benchrunner/benchrunner.go b/internal/benchrunner/benchrunner.go
index 6699f238a5..4942bebc19 100644
--- a/internal/benchrunner/benchrunner.go
+++ b/internal/benchrunner/benchrunner.go
@@ -6,7 +6,6 @@ package benchrunner
 
 import (
 	"fmt"
-	"os"
 	"path/filepath"
 	"sort"
 	"strings"
@@ -17,217 +16,135 @@ import (
 	"github.com/elastic/elastic-package/internal/elasticsearch"
 )
 
-// TestType represents the various supported test types
-type TestType string
+// BenchType represents the various supported benchmark types
+type BenchType string
 
-// TestOptions contains test runner options.
-type TestOptions struct {
-	TestFolder         TestFolder
-	PackageRootPath    string
-	GenerateTestResult bool
-	API                *elasticsearch.API
-
-	DeferCleanup   time.Duration
-	ServiceVariant string
-	WithCoverage   bool
-	Benchmark      BenchmarkConfig
+// BenchOptions contains benchmark runner options.
+type BenchOptions struct {
+	BenchmarkFolder BenchmarkFolder
+	PackageRootPath string
+	API             *elasticsearch.API
 }
 
-// TestRunner is the interface all test runners must implement.
-type TestRunner interface {
-	// Type returns the test runner's type.
-	Type() TestType
+// BenchRunner is the interface all benchmark runners must implement.
+type BenchRunner interface {
+	// Type returns the benchmark runner's type.
+	Type() BenchType
 
-	// String returns the human-friendly name of the test runner.
+	// String returns the human-friendly name of the benchmark runner.
 	String() string
 
-	// Run executes the test runner.
-	Run(TestOptions) ([]TestResult, error)
+	// Run executes the benchmark runner.
+	Run(BenchOptions) (*Result, error)
 
-	// TearDown cleans up any test runner resources. It must be called
-	// after the test runner has finished executing.
+	// TearDown cleans up any benchmark runner resources. It must be called
+	// after the benchmark runner has finished executing.
 	TearDown() error
-
-	CanRunPerDataStream() bool
-
-	TestFolderRequired() bool
 }
 
-var runners = map[TestType]TestRunner{}
+var runners = map[BenchType]BenchRunner{}
 
-// TestResult contains a single test's results
-type TestResult struct {
-	// Name of test result. Optional.
-	Name string
-
-	// Package to which this test result belongs.
+// Result contains a single benchmark's results
+type Result struct {
+	// Package to which this benchmark result belongs.
 	Package string
 
-	// TestType indicates the type of test.
-	TestType TestType
+	// BenchType indicates the type of benchmark.
+	BenchType BenchType
 
-	// Data stream to which this test result belongs.
+	// Data stream to which this benchmark result belongs.
 	DataStream string
 
-	// Time elapsed from running a test case to arriving at its result.
+	// Time elapsed from running a benchmark case to arriving at its result.
 	TimeElapsed time.Duration
 
-	// If test case failed, short description of the failure. A failure is
-	// when the test completes execution but the actual results of the test
-	// don't match the expected results.
-	FailureMsg string
-
-	// If test case failed, longer description of the failure.
-	FailureDetails string
-
-	// If there was an error while running the test case, description
-	// of the error. An error is when the test cannot complete execution due
-	// to an unexpected runtime error in the test execution.
+	// If there was an error while running the benchmark case, description
+	// of the error. An error is when the benchmark cannot complete execution due
+	// to an unexpected runtime error in the benchmark execution.
 	ErrorMsg string
 
-	// If the test was skipped, the reason it was skipped and a link for more
-	// details.
-	Skipped *SkipConfig
-
-	// Coverage details in Cobertura format (optional).
-	Coverage *CoberturaCoverage
-
-	// Benchmark results (optional).
+	// Benchmark results.
 	Benchmark *BenchmarkResult
 }
 
-// ResultComposer wraps a TestResult and provides convenience methods for
-// manipulating this TestResult.
+// ResultComposer wraps a Result and provides convenience methods for
+// manipulating this Result.
 type ResultComposer struct {
-	TestResult
+	Result
 	StartTime time.Time
 }
 
 // NewResultComposer returns a new ResultComposer with the StartTime
 // initialized to now.
-func NewResultComposer(tr TestResult) *ResultComposer {
+func NewResultComposer(tr Result) *ResultComposer {
 	return &ResultComposer{
-		TestResult: tr,
-		StartTime:  time.Now(),
+		Result:    tr,
+		StartTime: time.Now(),
 	}
 }
 
-// WithError sets an error on the test result wrapped by ResultComposer.
-func (rc *ResultComposer) WithError(err error) ([]TestResult, error) {
+// WithError sets an error on the benchmark result wrapped by ResultComposer.
+func (rc *ResultComposer) WithError(err error) ([]Result, error) {
 	rc.TimeElapsed = time.Since(rc.StartTime)
 	if err == nil {
-		return []TestResult{rc.TestResult}, nil
-	}
-
-	if tcf, ok := err.(ErrTestCaseFailed); ok {
-		rc.FailureMsg += tcf.Reason
-		rc.FailureDetails += tcf.Details
-		return []TestResult{rc.TestResult}, nil
+		return []Result{rc.Result}, nil
 	}
 
 	rc.ErrorMsg += err.Error()
-	return []TestResult{rc.TestResult}, err
+	return []Result{rc.Result}, err
 }
 
-// WithSuccess marks the test result wrapped by ResultComposer as successful.
-func (rc *ResultComposer) WithSuccess() ([]TestResult, error) {
+// WithSuccess marks the benchmark result wrapped by ResultComposer as successful.
+func (rc *ResultComposer) WithSuccess() ([]Result, error) {
 	return rc.WithError(nil)
 }
 
-// WithSkip marks the test result wrapped by ResultComposer as skipped.
-func (rc *ResultComposer) WithSkip(s *SkipConfig) ([]TestResult, error) {
-	rc.TestResult.Skipped = s
-	return rc.WithError(nil)
-}
-
-// TestFolder encapsulates the test folder path and names of the package + data stream
-// to which the test folder belongs.
-type TestFolder struct {
+// BenchmarkFolder encapsulates the benchmark folder path and names of the package + data stream
+// to which the benchmark folder belongs.
+type BenchmarkFolder struct {
 	Path       string
 	Package    string
 	DataStream string
 }
 
-// AssumeTestFolders assumes potential test folders for the given package, data streams and test types.
-func AssumeTestFolders(packageRootPath string, dataStreams []string, testType TestType) ([]TestFolder, error) {
+// FindBenchmarkFolders finds benchmark folders for the given package and, optionally, benchmark type and data streams
+func FindBenchmarkFolders(packageRootPath string, dataStreams []string, benchType BenchType) ([]BenchmarkFolder, error) {
 	// Expected folder structure:
 	// <packageRootPath>/
 	//   data_stream/
 	//     <dataStream>/
+	//       _dev/
+	//         benchmark/
+	//           <benchType>/
 
-	dataStreamsPath := filepath.Join(packageRootPath, "data_stream")
+	benchTypeGlob := "*"
+	if benchType != "" {
+		benchTypeGlob = string(benchType)
+	}
 
+	var paths []string
 	if len(dataStreams) == 0 {
-		fileInfos, err := os.ReadDir(dataStreamsPath)
-		if errors.Is(err, os.ErrNotExist) {
-			return []TestFolder{}, nil // data streams defined
-		}
-		if err != nil {
-			return nil, errors.Wrapf(err, "can't read directory (path: %s)", dataStreamsPath)
-		}
-
-		for _, fi := range fileInfos {
-			if !fi.IsDir() {
-				continue
-			}
-			dataStreams = append(dataStreams, fi.Name())
-		}
+		return nil, errors.New("benchmarks can only be defined at the data_stream level")
 	}
 
-	var folders []TestFolder
+	sort.Strings(dataStreams)
 	for _, dataStream := range dataStreams {
-		folders = append(folders, TestFolder{
-			Path:       filepath.Join(dataStreamsPath, dataStream, "_dev", "test", string(testType)),
-			Package:    filepath.Base(packageRootPath),
-			DataStream: dataStream,
-		})
-	}
-	return folders, nil
-}
-
-// FindTestFolders finds test folders for the given package and, optionally, test type and data streams
-func FindTestFolders(packageRootPath string, dataStreams []string, testType TestType) ([]TestFolder, error) {
-	// Expected folder structure:
-	// <packageRootPath>/
-	//   data_stream/
-	//     <dataStream>/
-	//       _dev/
-	//         test/
-	//           <testType>/
-
-	testTypeGlob := "*"
-	if testType != "" {
-		testTypeGlob = string(testType)
-	}
-
-	var paths []string
-	if len(dataStreams) > 0 {
-		sort.Strings(dataStreams)
-		for _, dataStream := range dataStreams {
-			p, err := findTestFolderPaths(packageRootPath, dataStream, testTypeGlob)
-			if err != nil {
-				return nil, err
-			}
-
-			paths = append(paths, p...)
-		}
-	} else {
-		p, err := findTestFolderPaths(packageRootPath, "*", testTypeGlob)
+		p, err := findBenchFolderPaths(packageRootPath, dataStream, benchTypeGlob)
 		if err != nil {
 			return nil, err
 		}
 
-		paths = p
+		paths = append(paths, p...)
 	}
 
-	folders := make([]TestFolder, len(paths))
+	folders := make([]BenchmarkFolder, len(paths))
 	_, pkg := filepath.Split(packageRootPath)
 	for idx, p := range paths {
 		relP := strings.TrimPrefix(p, packageRootPath)
 		parts := strings.Split(relP, string(filepath.Separator))
 		dataStream := parts[2]
 
-		folder := TestFolder{
+		folder := BenchmarkFolder{
 			p,
 			pkg,
 			dataStream,
@@ -239,41 +156,41 @@ func FindTestFolders(packageRootPath string, dataStreams []string, testType Test
 	return folders, nil
 }
 
-// RegisterRunner method registers the test runner.
-func RegisterRunner(runner TestRunner) {
+// RegisterRunner method registers the benchmark runner.
+func RegisterRunner(runner BenchRunner) {
 	runners[runner.Type()] = runner
 }
 
-// Run method delegates execution to the registered test runner, based on the test type.
-func Run(testType TestType, options TestOptions) ([]TestResult, error) {
-	runner, defined := runners[testType]
+// Run method delegates execution to the registered benchmark runner, based on the benchmark type.
+func Run(benchType BenchType, options BenchOptions) (*Result, error) {
+	runner, defined := runners[benchType]
 	if !defined {
-		return nil, fmt.Errorf("unregistered runner test: %s", testType)
+		return nil, fmt.Errorf("unregistered runner benchmark: %s", benchType)
 	}
 
-	results, err := runner.Run(options)
+	result, err := runner.Run(options)
 	tdErr := runner.TearDown()
 	if err != nil {
-		return nil, errors.Wrap(err, "could not complete test run")
+		return nil, errors.Wrap(err, "could not complete benchmark run")
 	}
 	if tdErr != nil {
-		return results, errors.Wrap(err, "could not teardown test runner")
+		return result, errors.Wrap(err, "could not teardown benchmark runner")
 	}
-	return results, nil
+	return result, nil
 }
 
-// TestRunners returns registered test runners.
-func TestRunners() map[TestType]TestRunner {
+// BenchRunners returns registered benchmark runners.
+func BenchRunners() map[BenchType]BenchRunner {
 	return runners
 }
 
-// findTestFoldersPaths can only be called for test runners that require tests to be defined
+// findBenchFoldersPaths can only be called for benchmark runners that require benchmarks to be defined
 // at the data stream level.
-func findTestFolderPaths(packageRootPath, dataStreamGlob, testTypeGlob string) ([]string, error) {
-	testFoldersGlob := filepath.Join(packageRootPath, "data_stream", dataStreamGlob, "_dev", "test", testTypeGlob)
-	paths, err := filepath.Glob(testFoldersGlob)
+func findBenchFolderPaths(packageRootPath, dataStreamGlob, benchTypeGlob string) ([]string, error) {
+	benchFoldersGlob := filepath.Join(packageRootPath, "data_stream", dataStreamGlob, "_dev", "benchmark", benchTypeGlob)
+	paths, err := filepath.Glob(benchFoldersGlob)
 	if err != nil {
-		return nil, errors.Wrap(err, "error finding test folders")
+		return nil, errors.Wrap(err, "error finding benchmark folders")
 	}
 	return paths, err
 }
diff --git a/internal/benchrunner/errors.go b/internal/benchrunner/errors.go
deleted file mode 100644
index 0a532adf46..0000000000
--- a/internal/benchrunner/errors.go
+++ /dev/null
@@ -1,18 +0,0 @@
-// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
-// or more contributor license agreements. Licensed under the Elastic License;
-// you may not use this file except in compliance with the Elastic License.
-
-package benchrunner
-
-import "fmt"
-
-// ErrTestCaseFailed represents a test case failure result
-type ErrTestCaseFailed struct {
-	Reason  string
-	Details string
-}
-
-// Error returns the message detailing the test case failure.
-func (e ErrTestCaseFailed) Error() string {
-	return fmt.Sprintf("test case failed: %s", e.Reason)
-}
diff --git a/internal/benchrunner/report_format.go b/internal/benchrunner/report_format.go
index b366a36206..816dcaa9ff 100644
--- a/internal/benchrunner/report_format.go
+++ b/internal/benchrunner/report_format.go
@@ -6,24 +6,24 @@ package benchrunner
 
 import "fmt"
 
-// BenchReportFormat represents a test report format
+// BenchReportFormat represents a benchmark report format
 type BenchReportFormat string
 
 // ReportFormatFunc defines the report formatter function.
-type ReportFormatFunc func(results []BenchResult) ([]string, error)
+type ReportFormatFunc func(results []*Result) ([]string, error)
 
 var reportFormatters = map[BenchReportFormat]ReportFormatFunc{}
 
-// RegisterReporterFormat registers a test report formatter.
+// RegisterReporterFormat registers a benchmark report formatter.
 func RegisterReporterFormat(name BenchReportFormat, formatFunc ReportFormatFunc) {
 	reportFormatters[name] = formatFunc
 }
 
-// FormatReport delegates formatting of test results to the registered test report formatter.
-func FormatReport(name BenchReportFormat, results []BenchResult) (benchmarkReports []string, err error) {
+// FormatReport delegates formatting of benchmark results to the registered benchmark report formatter.
+func FormatReport(name BenchReportFormat, results []*Result) (benchmarkReports []string, err error) {
 	reportFunc, defined := reportFormatters[name]
 	if !defined {
-		return nil, fmt.Errorf("unregistered test report format: %s", name)
+		return nil, fmt.Errorf("unregistered benchmark report format: %s", name)
 	}
 
 	return reportFunc(results)
diff --git a/internal/benchrunner/report_output.go b/internal/benchrunner/report_output.go
index 87745bf480..3f6db9b95f 100644
--- a/internal/benchrunner/report_output.go
+++ b/internal/benchrunner/report_output.go
@@ -8,7 +8,7 @@ import (
 	"fmt"
 )
 
-// BenchReportOutput represents an output for a test report
+// BenchReportOutput represents an output for a benchmark report
 type BenchReportOutput string
 
 // ReportOutputFunc defines the report writer function.
@@ -16,16 +16,16 @@ type ReportOutputFunc func(pkg, report string, format BenchReportFormat) error
 
 var reportOutputs = map[BenchReportOutput]ReportOutputFunc{}
 
-// RegisterReporterOutput registers a test report output.
+// RegisterReporterOutput registers a benchmark report output.
 func RegisterReporterOutput(name BenchReportOutput, outputFunc ReportOutputFunc) {
 	reportOutputs[name] = outputFunc
 }
 
-// WriteReport delegates writing of test results to the registered test report output
+// WriteReport delegates writing of benchmark results to the registered benchmark report output
 func WriteReport(pkg string, name BenchReportOutput, report string, format BenchReportFormat) error {
 	outputFunc, defined := reportOutputs[name]
 	if !defined {
-		return fmt.Errorf("unregistered test report output: %s", name)
+		return fmt.Errorf("unregistered benchmark report output: %s", name)
 	}
 	return outputFunc(pkg, report, format)
 }
diff --git a/internal/benchrunner/reporters/formats/human.go b/internal/benchrunner/reporters/formats/human.go
index ba1b404d68..36ad8c6ad9 100644
--- a/internal/benchrunner/reporters/formats/human.go
+++ b/internal/benchrunner/reporters/formats/human.go
@@ -18,11 +18,11 @@ func init() {
 }
 
 const (
-	// ReportFormatHuman reports test results in a human-readable format
+	// ReportFormatHuman reports benchmark results in a human-readable format
 	ReportFormatHuman benchrunner.BenchReportFormat = "human"
 )
 
-func reportHumanFormat(results []benchrunner.BenchResult) ([]string, error) {
+func reportHumanFormat(results []*benchrunner.Result) ([]string, error) {
 	if len(results) == 0 {
 		return nil, nil
 	}
@@ -48,8 +48,8 @@ func reportHumanFormatBenchmark(benchmarks []benchrunner.BenchmarkResult) ([]str
 		if len(b.Parameters) > 0 {
 			report.WriteString(renderBenchmarkTable("parameters", b.Parameters) + "\n")
 		}
-		for _, test := range b.Tests {
-			report.WriteString(renderBenchmarkTable(test.Name, test.Results) + "\n")
+		for _, t := range b.Tests {
+			report.WriteString(renderBenchmarkTable(t.Name, t.Results) + "\n")
 		}
 		textReports = append(textReports, report.String())
 	}
diff --git a/internal/benchrunner/reporters/formats/xunit.go b/internal/benchrunner/reporters/formats/xunit.go
index 7fa9fc8a12..209efb0144 100644
--- a/internal/benchrunner/reporters/formats/xunit.go
+++ b/internal/benchrunner/reporters/formats/xunit.go
@@ -17,15 +17,15 @@ func init() {
 }
 
 const (
-	// ReportFormatXUnit reports test results in the xUnit format
+	// ReportFormatXUnit reports benchmark results in the xUnit format
 	ReportFormatXUnit benchrunner.BenchReportFormat = "xUnit"
 )
 
-func reportXUnitFormat(results []benchrunner.BenchResult) ([]string, error) {
-	var benchmarks []benchrunner.BenchmarkResult
+func reportXUnitFormat(results []*benchrunner.Result) ([]string, error) {
+	var benchmarks []*benchrunner.BenchmarkResult
 	for _, r := range results {
 		if r.Benchmark != nil {
-			benchmarks = append(benchmarks, *r.Benchmark)
+			benchmarks = append(benchmarks, r.Benchmark)
 		}
 	}
 
@@ -36,18 +36,18 @@ func reportXUnitFormat(results []benchrunner.BenchResult) ([]string, error) {
 	return benchFmtd, nil
 }
 
-func reportXUnitFormatBenchmark(benchmarks []benchrunner.BenchmarkResult) ([]string, error) {
+func reportXUnitFormatBenchmark(benchmarks []*benchrunner.BenchmarkResult) ([]string, error) {
 	var reports []string
 	for _, b := range benchmarks {
-		// Filter out detailed tests. These add too much information for the
+		// Filter out detailed benchmarks. These add too much information for the
 		// aggregated nature of xUnit reports, creating a lot of noise in Jenkins.
-		var tests []benchrunner.BenchmarkTest
+		var benchmarks []benchrunner.BenchmarkTest
 		for _, t := range b.Tests {
 			if !t.Detailed {
-				tests = append(tests, t)
+				benchmarks = append(benchmarks, t)
 			}
 		}
-		b.Tests = tests
+		b.Tests = benchmarks
 		out, err := xml.MarshalIndent(b, "", "  ")
 		if err != nil {
 			return nil, errors.Wrap(err, "unable to format benchmark results as xUnit")
diff --git a/internal/benchrunner/reporters/outputs/file.go b/internal/benchrunner/reporters/outputs/file.go
index ae9ddff2a7..6e8656190f 100644
--- a/internal/benchrunner/reporters/outputs/file.go
+++ b/internal/benchrunner/reporters/outputs/file.go
@@ -22,17 +22,17 @@ func init() {
 }
 
 const (
-	// ReportOutputFile reports test results to files in a folder
+	// ReportOutputFile reports benchmark results to files in a folder
 	ReportOutputFile benchrunner.BenchReportOutput = "file"
 )
 
 func reportToFile(pkg, report string, format benchrunner.BenchReportFormat) error {
 	dest, err := reportsDir()
 	if err != nil {
-		return errors.Wrap(err, "could not determine test reports folder")
+		return errors.Wrap(err, "could not determine benchmark reports folder")
 	}
 
-	// Create test reports folder if it doesn't exist
+	// Create benchmark reports folder if it doesn't exist
 	_, err = os.Stat(dest)
 	if err != nil && errors.Is(err, os.ErrNotExist) {
 		if err := os.MkdirAll(dest, 0755); err != nil {
diff --git a/internal/benchrunner/reporters/outputs/stdout.go b/internal/benchrunner/reporters/outputs/stdout.go
index 3f1874fdc6..b0fb25bf72 100644
--- a/internal/benchrunner/reporters/outputs/stdout.go
+++ b/internal/benchrunner/reporters/outputs/stdout.go
@@ -15,7 +15,7 @@ func init() {
 }
 
 const (
-	// ReportOutputSTDOUT reports test results to STDOUT
+	// ReportOutputSTDOUT reports benchmark results to STDOUT
 	ReportOutputSTDOUT benchrunner.BenchReportOutput = "stdout"
 )
 
diff --git a/internal/benchrunner/runners/pipeline/benchmark.go b/internal/benchrunner/runners/pipeline/benchmark.go
index d2f7e5cde8..8102f4eb41 100644
--- a/internal/benchrunner/runners/pipeline/benchmark.go
+++ b/internal/benchrunner/runners/pipeline/benchmark.go
@@ -7,7 +7,6 @@ package pipeline
 import (
 	"encoding/json"
 	"fmt"
-	"path/filepath"
 	"sort"
 	"time"
 
@@ -15,37 +14,16 @@ import (
 
 	"github.com/elastic/elastic-package/internal/benchrunner"
 	"github.com/elastic/elastic-package/internal/elasticsearch/ingest"
-	"github.com/elastic/elastic-package/internal/packages"
 )
 
 const (
-	// How many attempts to make while approximating
-	// benchmark duration by adjusting document count.
-	durationAdjustMaxTries = 3
-
-	// How close to the target duration for a benchmark
-	// to be is accepted.
-	durationToleranceSeconds = 0.5
-
-	// Same, but as a percentage of the target duration.
-	durationTolerancePercent = 0.9
-
-	// Minimum acceptable length for a benchmark result.
-	minDurationSeconds = 0.001 // 1ms
-
 	// How many top processors to return.
 	numTopProcs = 10
 )
 
-func BenchmarkPipeline(options benchrunner.TestOptions) (*benchrunner.BenchmarkResult, error) {
-	// Load all test documents
-	docs, err := loadAllTestDocs(options.TestFolder.Path)
-	if err != nil {
-		return nil, errors.Wrap(err, "failed loading test documents")
-	}
-
+func (r *runner) benchmarkPipeline(b *benchmark, entryPipeline string) (*benchrunner.BenchmarkResult, error) {
 	// Run benchmark
-	bench, err := benchmarkIngest(options, docs)
+	bench, err := r.benchmarkIngest(b, entryPipeline)
 	if err != nil {
 		return nil, errors.Wrap(err, "failed running benchmark")
 	}
@@ -110,19 +88,19 @@ func BenchmarkPipeline(options benchrunner.TestOptions) (*benchrunner.BenchmarkR
 
 	// Build result
 	result := &benchrunner.BenchmarkResult{
-		Name: fmt.Sprintf("pipeline benchmark for %s/%s", options.TestFolder.Package, options.TestFolder.DataStream),
+		Name: fmt.Sprintf("pipeline benchmark for %s/%s", r.options.BenchmarkFolder.Package, r.options.BenchmarkFolder.DataStream),
 		Parameters: []benchrunner.BenchmarkValue{
 			{
 				Name:  "package",
-				Value: options.TestFolder.Package,
+				Value: r.options.BenchmarkFolder.Package,
 			},
 			{
 				Name:  "data_stream",
-				Value: options.TestFolder.DataStream,
+				Value: r.options.BenchmarkFolder.DataStream,
 			},
 			{
 				Name:  "source doc count",
-				Value: len(docs),
+				Value: len(b.events),
 			},
 			{
 				Name:  "doc count",
@@ -171,26 +149,9 @@ type ingestResult struct {
 	numDocs   int
 }
 
-func benchmarkIngest(options benchrunner.TestOptions, baseDocs []json.RawMessage) (ingestResult, error) {
-	if options.Benchmark.Duration == time.Duration(0) {
-		// Run with a fixed doc count
-		return runSingleBenchmark(options, resizeDocs(baseDocs, options.Benchmark.NumDocs))
-	}
-
-	// Approximate doc count to target duration
-	step, err := runSingleBenchmark(options, baseDocs)
-	if err != nil {
-		return step, err
-	}
-
-	for i, n := 0, len(baseDocs); i < durationAdjustMaxTries && compareFuzzy(step.elapsed, options.Benchmark.Duration) == -1; i++ {
-		n = int(seconds(options.Benchmark.Duration) * float64(n) / seconds(step.elapsed))
-		baseDocs = resizeDocs(baseDocs, n)
-		if step, err = runSingleBenchmark(options, baseDocs); err != nil {
-			return step, err
-		}
-	}
-	return step, nil
+func (r *runner) benchmarkIngest(b *benchmark, entryPipeline string) (ingestResult, error) {
+	baseDocs := resizeDocs(b.events, b.config.NumDocs)
+	return r.runSingleBenchmark(entryPipeline, baseDocs)
 }
 
 type processorPerformance struct {
@@ -290,32 +251,16 @@ func (agg aggregation) collect(fn mapFn) ([]benchrunner.BenchmarkValue, error) {
 	return r, nil
 }
 
-func runSingleBenchmark(options benchrunner.TestOptions, docs []json.RawMessage) (ingestResult, error) {
+func (r *runner) runSingleBenchmark(entryPipeline string, docs []json.RawMessage) (ingestResult, error) {
 	if len(docs) == 0 {
 		return ingestResult{}, errors.New("no docs supplied for benchmark")
 	}
-	dataStreamPath, found, err := packages.FindDataStreamRootForPath(options.TestFolder.Path)
-	if err != nil {
-		return ingestResult{}, errors.Wrap(err, "locating data_stream root failed")
-	}
-	if !found {
-		return ingestResult{}, errors.New("data stream root not found")
-	}
-
-	testCase := testCase{
-		events: docs,
-	}
-	entryPipeline, pipelines, err := installIngestPipelines(options.API, dataStreamPath)
-	if err != nil {
-		return ingestResult{}, errors.Wrap(err, "installing ingest pipelines failed")
-	}
-	defer uninstallIngestPipelines(options.API, pipelines)
 
-	if _, err = simulatePipelineProcessing(options.API, entryPipeline, &testCase); err != nil {
+	if _, err := ingest.SimulatePipeline(r.options.API, entryPipeline, docs); err != nil {
 		return ingestResult{}, errors.Wrap(err, "simulate failed")
 	}
 
-	stats, err := ingest.GetPipelineStats(options.API, pipelines)
+	stats, err := ingest.GetPipelineStats(r.options.API, r.pipelines)
 	if err != nil {
 		return ingestResult{}, errors.Wrap(err, "error fetching pipeline stats")
 	}
@@ -324,7 +269,7 @@ func runSingleBenchmark(options benchrunner.TestOptions, docs []json.RawMessage)
 		took += time.Millisecond * time.Duration(pSt.TimeInMillis)
 	}
 	return ingestResult{
-		pipelines: pipelines,
+		pipelines: r.pipelines,
 		stats:     stats,
 		elapsed:   took,
 		numDocs:   len(docs),
@@ -345,44 +290,3 @@ func resizeDocs(inputDocs []json.RawMessage, want int) []json.RawMessage {
 	}
 	return result
 }
-
-func seconds(d time.Duration) float64 {
-	s := d.Seconds()
-	// Don't return durations less than the safe value.
-	if s < minDurationSeconds {
-		return minDurationSeconds
-	}
-	return s
-}
-
-func compareFuzzy(a, b time.Duration) int {
-	sa, sb := seconds(a), seconds(b)
-	if sa > sb {
-		sa, sb = sb, sa
-	}
-	if sb-sa <= durationToleranceSeconds || sa/sb >= durationTolerancePercent {
-		return 0
-	}
-	if a < b {
-		return -1
-	}
-	return 1
-}
-
-func loadAllTestDocs(testFolderPath string) ([]json.RawMessage, error) {
-	testCaseFiles, err := listTestCaseFiles(testFolderPath)
-	if err != nil {
-		return nil, err
-	}
-
-	var docs []json.RawMessage
-	for _, file := range testCaseFiles {
-		path := filepath.Join(testFolderPath, file)
-		tc, err := loadTestCaseFile(path)
-		if err != nil {
-			return nil, err
-		}
-		docs = append(docs, tc.events...)
-	}
-	return docs, err
-}
diff --git a/internal/benchrunner/runners/pipeline/config.go b/internal/benchrunner/runners/pipeline/config.go
new file mode 100644
index 0000000000..9c0f9780ae
--- /dev/null
+++ b/internal/benchrunner/runners/pipeline/config.go
@@ -0,0 +1,38 @@
+// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+// or more contributor license agreements. Licensed under the Elastic License;
+// you may not use this file except in compliance with the Elastic License.
+
+package pipeline
+
+import (
+	"os"
+	"path/filepath"
+
+	"github.com/elastic/go-ucfg/yaml"
+	"github.com/pkg/errors"
+)
+
+const (
+	configYAML = "config.yml"
+)
+
+type config struct {
+	NumDocs int `config:"num_docs"`
+}
+
+func readConfig(path string) (*config, error) {
+	configPath := filepath.Join(path, configYAML)
+	var c config
+	cfg, err := yaml.NewConfigWithFile(configPath)
+	if err != nil && !errors.Is(err, os.ErrNotExist) {
+		return nil, errors.Wrapf(err, "can't load common configuration: %s", configPath)
+	}
+
+	if err == nil {
+		if err := cfg.Unpack(&c); err != nil {
+			return nil, errors.Wrapf(err, "can't unpack benchmark configuration: %s", configPath)
+		}
+	}
+
+	return &c, nil
+}
diff --git a/internal/benchrunner/runners/pipeline/runner.go b/internal/benchrunner/runners/pipeline/runner.go
index 3fea84580a..ce4d4a1fdc 100644
--- a/internal/benchrunner/runners/pipeline/runner.go
+++ b/internal/benchrunner/runners/pipeline/runner.go
@@ -9,78 +9,51 @@ import (
 	"fmt"
 	"os"
 	"path/filepath"
-	"regexp"
-	"strings"
 	"time"
 
 	"github.com/pkg/errors"
 
 	"github.com/elastic/elastic-package/internal/benchrunner"
-	"github.com/elastic/elastic-package/internal/common"
 	"github.com/elastic/elastic-package/internal/elasticsearch/ingest"
-	"github.com/elastic/elastic-package/internal/fields"
-	"github.com/elastic/elastic-package/internal/logger"
-	"github.com/elastic/elastic-package/internal/multierror"
 	"github.com/elastic/elastic-package/internal/packages"
-	"github.com/elastic/elastic-package/internal/signal"
 )
 
 const (
-	// TestType defining pipeline tests
-	TestType benchrunner.TestType = "pipeline"
+	// BenchType defining pipeline benchmarks.
+	BenchType benchrunner.BenchType = "pipeline"
 )
 
 type runner struct {
-	options   benchrunner.TestOptions
+	options   benchrunner.BenchOptions
 	pipelines []ingest.Pipeline
 }
 
-func (r *runner) TestFolderRequired() bool {
-	return true
+// Type returns the type of benchmark that can be run by this benchmark runner.
+func (r *runner) Type() benchrunner.BenchType {
+	return BenchType
 }
 
-// Type returns the type of test that can be run by this test runner.
-func (r *runner) Type() benchrunner.TestType {
-	return TestType
-}
-
-// String returns the human-friendly name of the test runner.
+// String returns the human-friendly name of the benchmark runner.
 func (r *runner) String() string {
 	return "pipeline"
 }
 
-// Run runs the pipeline tests defined under the given folder
-func (r *runner) Run(options benchrunner.TestOptions) ([]benchrunner.TestResult, error) {
+// Run runs the pipeline benchmarks defined under the given folder
+func (r *runner) Run(options benchrunner.BenchOptions) (*benchrunner.Result, error) {
 	r.options = options
 	return r.run()
 }
 
-// TearDown shuts down the pipeline test runner.
+// TearDown shuts down the pipeline benchmark runner.
 func (r *runner) TearDown() error {
-	if r.options.DeferCleanup > 0 {
-		logger.Debugf("Waiting for %s before cleanup...", r.options.DeferCleanup)
-		signal.Sleep(r.options.DeferCleanup)
-	}
-
 	if err := ingest.UninstallPipelines(r.options.API, r.pipelines); err != nil {
 		return errors.Wrap(err, "uninstalling ingest pipelines failed")
 	}
 	return nil
 }
 
-// CanRunPerDataStream returns whether this test runner can run on individual
-// data streams within the package.
-func (r *runner) CanRunPerDataStream() bool {
-	return true
-}
-
-func (r *runner) run() ([]benchrunner.TestResult, error) {
-	testCaseFiles, err := r.listTestCaseFiles()
-	if err != nil {
-		return nil, errors.Wrap(err, "listing test case definitions failed")
-	}
-
-	dataStreamPath, found, err := packages.FindDataStreamRootForPath(r.options.TestFolder.Path)
+func (r *runner) run() (*benchrunner.Result, error) {
+	dataStreamPath, found, err := packages.FindDataStreamRootForPath(r.options.BenchmarkFolder.Path)
 	if err != nil {
 		return nil, errors.Wrap(err, "locating data_stream root failed")
 	}
@@ -89,117 +62,41 @@ func (r *runner) run() ([]benchrunner.TestResult, error) {
 	}
 
 	var entryPipeline string
-	entryPipeline, r.pipelines, err = installIngestPipelines(r.options.API, dataStreamPath)
+	entryPipeline, r.pipelines, err = ingest.InstallDataStreamPipelines(r.options.API, dataStreamPath)
 	if err != nil {
 		return nil, errors.Wrap(err, "installing ingest pipelines failed")
 	}
 
-	results := make([]benchrunner.TestResult, 0)
-	for _, testCaseFile := range testCaseFiles {
-		tr := benchrunner.TestResult{
-			TestType:   TestType,
-			Package:    r.options.TestFolder.Package,
-			DataStream: r.options.TestFolder.DataStream,
-		}
-		startTime := time.Now()
-
-		// TODO: Add tests to cover regressive use of json.Unmarshal in loadTestCaseFile.
-		// See https://github.com/elastic/elastic-package/pull/717.
-		tc, err := r.loadTestCaseFile(testCaseFile)
-		if err != nil {
-			err := errors.Wrap(err, "loading test case failed")
-			tr.ErrorMsg = err.Error()
-			results = append(results, tr)
-			continue
-		}
-		tr.Name = tc.name
-
-		if tc.config.Skip != nil {
-			logger.Warnf("skipping %s test for %s/%s: %s (details: %s)",
-				TestType, r.options.TestFolder.Package, r.options.TestFolder.DataStream,
-				tc.config.Skip.Reason, tc.config.Skip.Link.String())
-
-			tr.Skipped = tc.config.Skip
-			results = append(results, tr)
-			continue
-		}
-
-		result, err := simulatePipelineProcessing(r.options.API, entryPipeline, tc)
-		if err != nil {
-			err := errors.Wrap(err, "simulating pipeline processing failed")
-			tr.ErrorMsg = err.Error()
-			results = append(results, tr)
-			continue
-		}
-
-		tr.TimeElapsed = time.Since(startTime)
-		fieldsValidator, err := fields.CreateValidatorForDirectory(dataStreamPath,
-			fields.WithNumericKeywordFields(tc.config.NumericKeywordFields),
-			// explicitly enabled for pipeline tests only
-			// since system tests can have dynamic public IPs
-			fields.WithEnabledAllowedIPCheck(),
-		)
-		if err != nil {
-			return nil, errors.Wrapf(err, "creating fields validator for data stream failed (path: %s, test case file: %s)", dataStreamPath, testCaseFile)
-		}
-
-		// TODO: Add tests to cover regressive use of json.Unmarshal in verifyResults.
-		// See https://github.com/elastic/elastic-package/pull/717.
-		err = r.verifyResults(testCaseFile, tc.config, result, fieldsValidator)
-		if e, ok := err.(benchrunner.ErrTestCaseFailed); ok {
-			tr.FailureMsg = e.Error()
-			tr.FailureDetails = e.Details
-
-			results = append(results, tr)
-			continue
-		}
-		if err != nil {
-			err := errors.Wrap(err, "verifying test result failed")
-			tr.ErrorMsg = err.Error()
-			results = append(results, tr)
-			continue
-		}
+	start := time.Now()
+	result := &benchrunner.Result{
+		BenchType:  BenchType + " benchmark",
+		Package:    r.options.BenchmarkFolder.Package,
+		DataStream: r.options.BenchmarkFolder.DataStream,
+	}
 
-		if r.options.WithCoverage {
-			tr.Coverage, err = GetPipelineCoverage(r.options, r.pipelines)
-			if err != nil {
-				return nil, errors.Wrap(err, "error calculating pipeline coverage")
-			}
-		}
-		results = append(results, tr)
+	b, err := r.loadBenchmark()
+	if err != nil {
+		return nil, errors.Wrap(err, "loading benchmark failed")
 	}
 
-	if r.options.Benchmark.Enabled {
-		start := time.Now()
-		tr := benchrunner.TestResult{
-			TestType:   TestType + " benchmark",
-			Package:    r.options.TestFolder.Package,
-			DataStream: r.options.TestFolder.DataStream,
-		}
-		if tr.Benchmark, err = BenchmarkPipeline(r.options); err != nil {
-			tr.ErrorMsg = err.Error()
-		}
-		tr.TimeElapsed = time.Since(start)
-		results = append(results, tr)
+	if result.Benchmark, err = r.benchmarkPipeline(b, entryPipeline); err != nil {
+		result.ErrorMsg = err.Error()
 	}
 
-	return results, nil
-}
+	result.TimeElapsed = time.Since(start)
 
-func (r *runner) listTestCaseFiles() ([]string, error) {
-	return listTestCaseFiles(r.options.TestFolder.Path)
+	return result, nil
 }
 
-func listTestCaseFiles(path string) ([]string, error) {
-	fis, err := os.ReadDir(path)
+func (r *runner) listBenchmarkFiles() ([]string, error) {
+	fis, err := os.ReadDir(r.options.BenchmarkFolder.Path)
 	if err != nil {
-		return nil, errors.Wrapf(err, "reading pipeline tests failed (path: %s)", path)
+		return nil, errors.Wrapf(err, "reading pipeline benchmarks failed (path: %s)", r.options.BenchmarkFolder.Path)
 	}
 
 	var files []string
 	for _, fi := range fis {
-		if strings.HasSuffix(fi.Name(), expectedTestResultSuffix) ||
-			strings.HasSuffix(fi.Name(), configTestSuffixYAML) {
+		if fi.Name() == configYAML {
 			continue
 		}
 		files = append(files, fi.Name())
@@ -207,189 +104,49 @@ func listTestCaseFiles(path string) ([]string, error) {
 	return files, nil
 }
 
-func (r *runner) loadTestCaseFile(testCaseFile string) (*testCase, error) {
-	return loadTestCaseFile(filepath.Join(r.options.TestFolder.Path, testCaseFile))
-}
-
-func loadTestCaseFile(testCasePath string) (*testCase, error) {
-	testCaseData, err := os.ReadFile(testCasePath)
-	if err != nil {
-		return nil, errors.Wrapf(err, "reading input file failed (testCasePath: %s)", testCasePath)
-	}
-
-	config, err := readConfigForTestCase(testCasePath)
-	if err != nil {
-		return nil, errors.Wrapf(err, "reading config for test case failed (testCasePath: %s)", testCasePath)
-	}
-
-	testCaseFile := filepath.Base(testCasePath)
-	if config.Skip != nil {
-		return &testCase{
-			name:   testCaseFile,
-			config: config,
-		}, nil
-	}
-
-	ext := filepath.Ext(testCaseFile)
-
-	var entries []json.RawMessage
-	switch ext {
-	case ".json":
-		entries, err = readTestCaseEntriesForEvents(testCaseData)
-		if err != nil {
-			return nil, errors.Wrapf(err, "reading test case entries for events failed (testCasePath: %s)", testCasePath)
-		}
-	case ".log":
-		entries, err = readTestCaseEntriesForRawInput(testCaseData, config)
-		if err != nil {
-			return nil, errors.Wrapf(err, "creating test case entries for raw input failed (testCasePath: %s)", testCasePath)
-		}
-	default:
-		return nil, fmt.Errorf("unsupported extension for test case file (ext: %s)", ext)
-	}
-
-	tc, err := createTestCase(testCaseFile, entries, config)
-	if err != nil {
-		return nil, errors.Wrapf(err, "can't create test case (testCasePath: %s)", testCasePath)
-	}
-	return tc, nil
-}
-
-func (r *runner) verifyResults(testCaseFile string, config *testConfig, result *testResult, fieldsValidator *fields.Validator) error {
-	testCasePath := filepath.Join(r.options.TestFolder.Path, testCaseFile)
-
-	if r.options.GenerateTestResult {
-		// TODO: Add tests to cover regressive use of json.Unmarshal in writeTestResult.
-		// See https://github.com/elastic/elastic-package/pull/717.
-		err := writeTestResult(testCasePath, result)
-		if err != nil {
-			return errors.Wrap(err, "writing test result failed")
-		}
-	}
-
-	err := compareResults(testCasePath, config, result)
-	if _, ok := err.(benchrunner.ErrTestCaseFailed); ok {
-		return err
-	}
-	if err != nil {
-		return errors.Wrap(err, "comparing test results failed")
-	}
-
-	result = stripEmptyTestResults(result)
-
-	err = verifyDynamicFields(result, config)
+func (r *runner) loadBenchmark() (*benchmark, error) {
+	benchFiles, err := r.listBenchmarkFiles()
 	if err != nil {
-		return err
-	}
-
-	err = verifyFieldsInTestResult(result, fieldsValidator)
-	if err != nil {
-		return err
-	}
-	return nil
-}
-
-// stripEmptyTestResults function removes events which are nils. These nils can represent
-// documents processed by a pipeline which potentially used a "drop" processor (to drop the event at all).
-func stripEmptyTestResults(result *testResult) *testResult {
-	var tr testResult
-	for _, event := range result.events {
-		if event == nil {
-			continue
-		}
-		tr.events = append(tr.events, event)
+		return nil, err
 	}
-	return &tr
-}
 
-func verifyDynamicFields(result *testResult, config *testConfig) error {
-	if config == nil || config.DynamicFields == nil {
-		return nil
-	}
-
-	var multiErr multierror.Error
-	for _, event := range result.events {
-		var m common.MapStr
-		err := jsonUnmarshalUsingNumber(event, &m)
+	var allEntries []json.RawMessage
+	for _, benchFile := range benchFiles {
+		benchPath := filepath.Join(r.options.BenchmarkFolder.Path, benchFile)
+		benchData, err := os.ReadFile(benchPath)
 		if err != nil {
-			return errors.Wrap(err, "can't unmarshal event")
+			return nil, errors.Wrapf(err, "reading input file failed (benchPath: %s)", benchPath)
 		}
 
-		for key, pattern := range config.DynamicFields {
-			val, err := m.GetValue(key)
-			if err != nil && err != common.ErrKeyNotFound {
-				return errors.Wrap(err, "can't remove dynamic field")
-			}
-
-			valStr, ok := val.(string)
-			if !ok {
-				continue // regular expressions can be verify only string values
-			}
-
-			matched, err := regexp.MatchString(pattern, valStr)
+		ext := filepath.Ext(benchFile)
+		var entries []json.RawMessage
+		switch ext {
+		case ".json":
+			entries, err = readBenchmarkEntriesForEvents(benchData)
 			if err != nil {
-				return errors.Wrap(err, "pattern matching for dynamic field failed")
+				return nil, errors.Wrapf(err, "reading benchmark case entries for events failed (benchmarkPath: %s)", benchPath)
 			}
-
-			if !matched {
-				multiErr = append(multiErr, fmt.Errorf("dynamic field \"%s\" doesn't match the pattern (%s): %s",
-					key, pattern, valStr))
+		case ".log":
+			entries, err = readBenchmarkEntriesForRawInput(benchData)
+			if err != nil {
+				return nil, errors.Wrapf(err, "creating benchmark case entries for raw input failed (benchmarkPath: %s)", benchPath)
 			}
+		default:
+			return nil, fmt.Errorf("unsupported extension for benchmark case file (ext: %s)", ext)
 		}
+		allEntries = append(allEntries, entries...)
 	}
 
-	if len(multiErr) > 0 {
-		return benchrunner.ErrTestCaseFailed{
-			Reason:  "one or more problems with dynamic fields found in documents",
-			Details: multiErr.Unique().Error(),
-		}
-	}
-	return nil
-}
-
-func verifyFieldsInTestResult(result *testResult, fieldsValidator *fields.Validator) error {
-	var multiErr multierror.Error
-	for _, event := range result.events {
-		err := checkErrorMessage(event)
-		if err != nil {
-			multiErr = append(multiErr, err)
-			continue // all fields can be wrong, no need validate them
-		}
-
-		errs := fieldsValidator.ValidateDocumentBody(event)
-		if errs != nil {
-			multiErr = append(multiErr, errs...)
-		}
-	}
-
-	if len(multiErr) > 0 {
-		return benchrunner.ErrTestCaseFailed{
-			Reason:  "one or more problems with fields found in documents",
-			Details: multiErr.Unique().Error(),
-		}
-	}
-	return nil
-}
-
-func checkErrorMessage(event json.RawMessage) error {
-	var pipelineError struct {
-		Error struct {
-			Message interface{}
-		}
-	}
-	err := jsonUnmarshalUsingNumber(event, &pipelineError)
+	config, err := readConfig(r.options.BenchmarkFolder.Path)
 	if err != nil {
-		return errors.Wrapf(err, "can't unmarshal event to check pipeline error: %#q", event)
+		return nil, errors.Wrapf(err, "reading config for benchmark failed (benchPath: %s)", r.options.BenchmarkFolder.Path)
 	}
 
-	switch m := pipelineError.Error.Message.(type) {
-	case nil:
-		return nil
-	case string, []string:
-		return fmt.Errorf("unexpected pipeline error: %s", m)
-	default:
-		return fmt.Errorf("unexpected pipeline error (unexpected error.message type %T): %[1]v", m)
+	tc, err := createBenchmark(allEntries, config)
+	if err != nil {
+		return nil, errors.Wrapf(err, "can't create benchmark case (benchmarkPath: %s)", r.options.BenchmarkFolder.Path)
 	}
+	return tc, nil
 }
 
 func init() {
diff --git a/internal/benchrunner/runners/pipeline/runner_test.go b/internal/benchrunner/runners/pipeline/runner_test.go
index d8c04ddbb0..9900bc87b6 100644
--- a/internal/benchrunner/runners/pipeline/runner_test.go
+++ b/internal/benchrunner/runners/pipeline/runner_test.go
@@ -9,40 +9,8 @@ import (
 	"fmt"
 	"strings"
 	"testing"
-
-	"github.com/stretchr/testify/require"
-)
-
-const (
-	firstTestResult  = "first"
-	secondTestResult = "second"
-	thirdTestResult  = "third"
-
-	emptyTestResult = ""
 )
 
-func TestStripEmptyTestResults(t *testing.T) {
-	given := &testResult{
-		events: []json.RawMessage{
-			[]byte(firstTestResult),
-			nil,
-			nil,
-			[]byte(emptyTestResult),
-			[]byte(secondTestResult),
-			nil,
-			[]byte(thirdTestResult),
-			nil,
-		},
-	}
-
-	actual := stripEmptyTestResults(given)
-	require.Len(t, actual.events, 4)
-	require.Equal(t, actual.events[0], json.RawMessage(firstTestResult))
-	require.Equal(t, actual.events[1], json.RawMessage(emptyTestResult))
-	require.Equal(t, actual.events[2], json.RawMessage(secondTestResult))
-	require.Equal(t, actual.events[3], json.RawMessage(thirdTestResult))
-}
-
 var jsonUnmarshalUsingNumberTests = []struct {
 	name string
 	msg  string
diff --git a/internal/benchrunner/runners/pipeline/test_case.go b/internal/benchrunner/runners/pipeline/test_case.go
index 296b4d5bd5..bb8117568c 100644
--- a/internal/benchrunner/runners/pipeline/test_case.go
+++ b/internal/benchrunner/runners/pipeline/test_case.go
@@ -8,7 +8,8 @@ import (
 	"bufio"
 	"bytes"
 	"encoding/json"
-	"regexp"
+	"fmt"
+	"io"
 	"strings"
 
 	"github.com/elastic/elastic-package/internal/common"
@@ -16,18 +17,17 @@ import (
 	"github.com/pkg/errors"
 )
 
-type testCase struct {
-	name   string
-	config *testConfig
+type benchmark struct {
 	events []json.RawMessage
+	config *config
 }
 
-type testCaseDefinition struct {
+type benchmarkDefinition struct {
 	Events []json.RawMessage `json:"events"`
 }
 
-func readTestCaseEntriesForEvents(inputData []byte) ([]json.RawMessage, error) {
-	var tcd testCaseDefinition
+func readBenchmarkEntriesForEvents(inputData []byte) ([]json.RawMessage, error) {
+	var tcd benchmarkDefinition
 	err := jsonUnmarshalUsingNumber(inputData, &tcd)
 	if err != nil {
 		return nil, errors.Wrap(err, "unmarshalling input data failed")
@@ -35,8 +35,8 @@ func readTestCaseEntriesForEvents(inputData []byte) ([]json.RawMessage, error) {
 	return tcd.Events, nil
 }
 
-func readTestCaseEntriesForRawInput(inputData []byte, config *testConfig) ([]json.RawMessage, error) {
-	entries, err := readRawInputEntries(inputData, config)
+func readBenchmarkEntriesForRawInput(inputData []byte) ([]json.RawMessage, error) {
+	entries, err := readRawInputEntries(inputData)
 	if err != nil {
 		return nil, errors.Wrap(err, "reading raw input entries failed")
 	}
@@ -55,20 +55,13 @@ func readTestCaseEntriesForRawInput(inputData []byte, config *testConfig) ([]jso
 	return events, nil
 }
 
-func createTestCase(filename string, entries []json.RawMessage, config *testConfig) (*testCase, error) {
+func createBenchmark(entries []json.RawMessage, config *config) (*benchmark, error) {
 	var events []json.RawMessage
 	for _, entry := range entries {
 		var m common.MapStr
 		err := jsonUnmarshalUsingNumber(entry, &m)
 		if err != nil {
-			return nil, errors.Wrap(err, "can't unmarshal test case entry")
-		}
-
-		for k, v := range config.Fields {
-			_, err = m.Put(k, v)
-			if err != nil {
-				return nil, errors.Wrap(err, "can't set custom field")
-			}
+			return nil, errors.Wrap(err, "can't unmarshal benchmark entry")
 		}
 
 		event, err := json.Marshal(&m)
@@ -77,48 +70,24 @@ func createTestCase(filename string, entries []json.RawMessage, config *testConf
 		}
 		events = append(events, event)
 	}
-	return &testCase{
-		name:   filename,
-		config: config,
+	return &benchmark{
 		events: events,
+		config: config,
 	}, nil
 }
 
-func readRawInputEntries(inputData []byte, c *testConfig) ([]string, error) {
+func readRawInputEntries(inputData []byte) ([]string, error) {
 	var inputDataEntries []string
 
 	var builder strings.Builder
 	scanner := bufio.NewScanner(bytes.NewReader(inputData))
 	for scanner.Scan() {
 		line := scanner.Text()
-
-		var body string
-		if c.Multiline != nil && c.Multiline.FirstLinePattern != "" {
-			matched, err := regexp.MatchString(c.Multiline.FirstLinePattern, line)
-			if err != nil {
-				return nil, errors.Wrapf(err, "regexp matching failed (pattern: %s)", c.Multiline.FirstLinePattern)
-			}
-
-			if matched {
-				body = builder.String()
-				builder.Reset()
-			}
-			if builder.Len() > 0 {
-				builder.WriteByte('\n')
-			}
-			builder.WriteString(line)
-			if !matched || body == "" {
-				continue
-			}
-		} else {
-			body = line
-		}
-
-		inputDataEntries = append(inputDataEntries, body)
+		inputDataEntries = append(inputDataEntries, line)
 	}
 	err := scanner.Err()
 	if err != nil {
-		return nil, errors.Wrap(err, "reading raw input test file failed")
+		return nil, errors.Wrap(err, "reading raw input benchmark file failed")
 	}
 
 	lastEntry := builder.String()
@@ -127,3 +96,25 @@ func readRawInputEntries(inputData []byte, c *testConfig) ([]string, error) {
 	}
 	return inputDataEntries, nil
 }
+
+// jsonUnmarshalUsingNumber is a drop-in replacement for json.Unmarshal that
+// does not default to unmarshaling numeric values to float64 in order to
+// prevent low bit truncation of values greater than 1<<53.
+// See https://golang.org/cl/6202068 for details.
+func jsonUnmarshalUsingNumber(data []byte, v interface{}) error {
+	dec := json.NewDecoder(bytes.NewReader(data))
+	dec.UseNumber()
+	err := dec.Decode(v)
+	if err != nil {
+		if err == io.EOF {
+			return errors.New("unexpected end of JSON input")
+		}
+		return err
+	}
+	// Make sure there is no more data after the message
+	// to approximate json.Unmarshal's behaviour.
+	if dec.More() {
+		return fmt.Errorf("more data after top-level value")
+	}
+	return nil
+}
diff --git a/internal/benchrunner/runners/runners.go b/internal/benchrunner/runners/runners.go
index 21da99db8a..1fc2f6b067 100644
--- a/internal/benchrunner/runners/runners.go
+++ b/internal/benchrunner/runners/runners.go
@@ -5,9 +5,6 @@
 package runners
 
 import (
-	// Registered test runners
-	_ "github.com/elastic/elastic-package/internal/benchrunner/runners/asset"
+	// Registered benchmark runners
 	_ "github.com/elastic/elastic-package/internal/benchrunner/runners/pipeline"
-	_ "github.com/elastic/elastic-package/internal/benchrunner/runners/static"
-	_ "github.com/elastic/elastic-package/internal/benchrunner/runners/system"
 )
diff --git a/internal/cobraext/flags.go b/internal/cobraext/flags.go
index 76ec0aa7d6..abc1e1f0ed 100644
--- a/internal/cobraext/flags.go
+++ b/internal/cobraext/flags.go
@@ -131,12 +131,6 @@ const (
 	TestCoverageFlagName        = "test-coverage"
 	TestCoverageFlagDescription = "generate Cobertura test coverage reports"
 
-	TestBenchCountFlagName        = "bench-count"
-	TestBenchCountFlagDescription = "fixed number of docs to use for benchmark"
-
-	TestPerfDurationFlagName        = "bench-duration"
-	TestPerfDurationFlagDescription = "adjust the number of docs so that the benchmark runs for this duration"
-
 	VariantFlagName        = "variant"
 	VariantFlagDescription = "service variant"
 )

From 689bb4b5302441e04486b5f37aff9b826d4930c9 Mon Sep 17 00:00:00 2001
From: Marc Guasch <marc.guasch@elastic.co>
Date: Mon, 5 Sep 2022 14:57:19 +0200
Subject: [PATCH 12/20] Add doc

---
 cmd/benchrunner.go                  |   2 +-
 docs/howto/pipeline_benchmarking.md | 128 ++++++++++++++++++++++++++++
 2 files changed, 129 insertions(+), 1 deletion(-)
 create mode 100644 docs/howto/pipeline_benchmarking.md

diff --git a/cmd/benchrunner.go b/cmd/benchrunner.go
index 793d77f176..22fa595936 100644
--- a/cmd/benchrunner.go
+++ b/cmd/benchrunner.go
@@ -27,7 +27,7 @@ const benchLongDescription = `Use this command to run benchmarks on a package. C
 #### Pipeline Benchmarks
 These benchmarks allow you to benchmark any Ingest Node Pipelines defined by your packages.
 
-For details on how to configure pipeline benchmarks for a package, review the [HOWTO guide](https://github.com/elastic/elastic-package/blob/main/docs/howto/pipeline_benchmarks.md).`
+For details on how to configure pipeline benchmarks for a package, review the [HOWTO guide](https://github.com/elastic/elastic-package/blob/main/docs/howto/pipeline_benchmarking.md).`
 
 func setupBenchmarkCommand() *cobraext.Command {
 	var benchTypeCmdActions []cobraext.CommandAction
diff --git a/docs/howto/pipeline_benchmarking.md b/docs/howto/pipeline_benchmarking.md
new file mode 100644
index 0000000000..c68cbeb7bc
--- /dev/null
+++ b/docs/howto/pipeline_benchmarking.md
@@ -0,0 +1,128 @@
+# HOWTO: Writing pipeline benchmarks for a package
+
+## Introduction
+
+Elastic Packages are comprised of data streams. A pipeline benchmark exercises Elasticsearch Ingest Node pipelines defined for a package's data stream.
+
+## Conceptual process
+
+Conceptually, running a pipeline benchmark involves the following steps:
+
+1. Deploy the Elasticsearch instance (part of the Elastic Stack). This step takes time so it should typically be done once as a pre-requisite to running pipeline benchmarks on multiple data streams.
+1. Upload ingest pipelines to be benchmarked.
+1. Use [Simulate API](https://www.elastic.co/guide/en/elasticsearch/reference/master/simulate-pipeline-api.html) to process logs/metrics with the ingest pipeline.
+1. Gather statistics of the involved processors and show them in a report.
+
+## Limitations
+
+At the moment pipeline benchmarks have limitations. The main ones are:
+* As you're only benchmarking the ingest pipeline, you can prepare mocked documents with imaginary fields, different from ones collected in Beats. Also the other way round, you can skip most of the processors and as examples use tiny documents with minimal set of fields just to run the processing simulation.
+* There might be integrations which transform data mostly using Beats processors instead of ingest pipelines. In such cases ingest pipeline benchmarks are rather plain.
+
+## Defining a pipeline benchmark
+
+Packages have a specific folder structure (only relevant parts shown).
+
+```
+<package root>/
+  data_stream/
+    <data stream>/
+      manifest.yml
+  manifest.yml
+```
+
+To define a pipeline benchmark we must define configuration at each dataset's level:
+
+```
+<package root>/
+  data_stream/
+    <data stream>/
+      _dev/
+        benchmark/
+          pipeline/
+            (benchmark samples definitions, both raw files and input events, optional configuration)
+      manifest.yml
+  manifest.yml
+```
+
+### Benchmark definitions
+
+There are two types of benchmark samples definitions - **raw files** and **input events**.
+
+#### Raw files
+
+The raw files simplify preparing samples using real application `.log` files. A sample log (e.g. `access-sample.log`) file may look like the following one for Nginx:
+
+```
+127.0.0.1 - - [07/Dec/2016:11:04:37 +0100] "GET /test1 HTTP/1.1" 404 571 "-" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.98 Safari/537.36"
+127.0.0.1 - - [07/Dec/2016:11:04:58 +0100] "GET / HTTP/1.1" 304 0 "-" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:49.0) Gecko/20100101 Firefox/49.0"
+127.0.0.1 - - [07/Dec/2016:11:04:59 +0100] "GET / HTTP/1.1" 304 0 "-" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:49.0) Gecko/20100101 Firefox/49.0"
+```
+
+#### Input events
+
+The input events contain mocked JSON events that are ready to be passed to the ingest pipeline as-is. Such events can be helpful in situations in which an input event can't be serialized to a standard log file, e.g. Redis input. A sample file with input events  (e.g. `access-event.json`) looks as following:
+
+```json
+{
+    "events": [
+        {
+            "@timestamp": "2016-10-25T12:49:34.000Z",
+            "message": "127.0.0.1 - - [07/Dec/2016:11:04:37 +0100] \"GET /test1 HTTP/1.1\" 404 571 \"-\" \"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.98 Safari/537.36\"\n"
+        },
+        {
+            "@timestamp": "2016-10-25T12:49:34.000Z",
+            "message": "127.0.0.1 - - [07/Dec/2016:11:05:07 +0100] \"GET /taga HTTP/1.1\" 404 169 \"-\" \"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:49.0) Gecko/20100101 Firefox/49.0\"\n"
+        }
+    ]
+}
+```
+
+#### Benchmark configuration
+
+The benchmark execution can be customized to some extent using an optional configuration stored as a YAML file with the name `config.yml`:
+
+```yml
+num_docs: 1000
+```
+
+The `num_docs` option tells the benchmarks how many events should be sent with the simulation request. If not enough samples are provided, the events will be reused to generate a sufficient number of them.
+
+
+## Running a pipeline benchmark
+
+Once the configurations are defined as described in the previous section, you are ready to run pipeline benchmarks for a package's data streams.
+
+First you must deploy the Elasticsearch instance. This corresponds to step 1 as described in the [_Conceptual process_](#Conceptual-process) section.
+
+```
+elastic-package stack up -d --services=elasticsearch
+```
+
+For a complete listing of options available for this command, run `elastic-package stack up -h` or `elastic-package help stack up`.
+
+Next, you must set environment variables needed for further `elastic-package` commands.
+
+```
+$(elastic-package stack shellinit)
+```
+
+Next, you must invoke the pipeline benchmark runner. This corresponds to steps 2 through 4 as described in the [_Conceptual process_](#Conceptual-process) section.
+
+If you want to run pipeline tests for **all data streams** in a package, navigate to the package's root folder (or any sub-folder under it) and run the following command.
+
+```
+elastic-package test pipeline
+```
+
+If you want to run pipeline tests for **specific data streams** in a package, navigate to the package's root folder (or any sub-folder under it) and run the following command.
+
+```
+elastic-package test pipeline --data-streams <data stream 1>[,<data stream 2>,...]
+```
+
+Finally, when you are done running all pipeline tests, bring down the Elastic Stack. This corresponds to step 4 as described in the [_Conceptual process_](#Conceptual-process) section.
+
+```
+elastic-package stack down
+```

From 4d0f3d33cad4dd02df52f3780786b1cba63518d9 Mon Sep 17 00:00:00 2001
From: Marc Guasch <marc.guasch@elastic.co>
Date: Mon, 5 Sep 2022 15:45:07 +0200
Subject: [PATCH 13/20] Remove unused method after merge

---
 internal/configuration/locations/locations.go |  2 +-
 internal/elasticsearch/ingest/datastream.go   | 11 -----------
 internal/elasticsearch/ingest/pipeline.go     |  3 ++-
 3 files changed, 3 insertions(+), 13 deletions(-)

diff --git a/internal/configuration/locations/locations.go b/internal/configuration/locations/locations.go
index 9347335789..42831fa127 100644
--- a/internal/configuration/locations/locations.go
+++ b/internal/configuration/locations/locations.go
@@ -38,7 +38,7 @@ var (
 	dockerCustomAgentDeployerDir = filepath.Join(deployerDir, "docker_custom_agent")
 )
 
-//LocationManager maintains an instance of a config path location
+// LocationManager maintains an instance of a config path location
 type LocationManager struct {
 	stackPath string
 }
diff --git a/internal/elasticsearch/ingest/datastream.go b/internal/elasticsearch/ingest/datastream.go
index 0a6edf2f18..e2283978c5 100644
--- a/internal/elasticsearch/ingest/datastream.go
+++ b/internal/elasticsearch/ingest/datastream.go
@@ -155,17 +155,6 @@ func getIngestPipeline(api *elasticsearch.API, pipeline Pipeline) error {
 	return nil
 }
 
-func uninstallIngestPipelines(api *elasticsearch.API, pipelines []Pipeline) error {
-	for _, pipeline := range pipelines {
-		resp, err := api.Ingest.DeletePipeline(pipeline.Name)
-		if err != nil {
-			return pipelineError(err, pipeline, "DeletePipeline API call failed")
-		}
-		resp.Body.Close()
-	}
-	return nil
-}
-
 func getPipelineNameWithNonce(pipelineName string, nonce int64) string {
 	return fmt.Sprintf("%s-%d", pipelineName, nonce)
 }
diff --git a/internal/elasticsearch/ingest/pipeline.go b/internal/elasticsearch/ingest/pipeline.go
index ec65d5cb65..839bd6e52b 100644
--- a/internal/elasticsearch/ingest/pipeline.go
+++ b/internal/elasticsearch/ingest/pipeline.go
@@ -11,9 +11,10 @@ import (
 	"net/http"
 	"strings"
 
-	"github.com/elastic/elastic-package/internal/elasticsearch"
 	"github.com/pkg/errors"
 	"gopkg.in/yaml.v3"
+
+	"github.com/elastic/elastic-package/internal/elasticsearch"
 )
 
 type simulatePipelineRequest struct {

From 0d81b4f5166c80ad773665f466c14fefbca2aac6 Mon Sep 17 00:00:00 2001
From: Marc Guasch <marc.guasch@elastic.co>
Date: Mon, 5 Sep 2022 15:55:54 +0200
Subject: [PATCH 14/20] Re-generate readme

---
 README.md | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/README.md b/README.md
index 1c938b7189..0cbeb0e499 100644
--- a/README.md
+++ b/README.md
@@ -88,6 +88,17 @@ The command output shell completions information (for `bash`, `zsh`, `fish` and
 
 Run `elastic-package completion` and follow the instruction for your shell.
 
+### `elastic-package benchmark`
+
+_Context: package_
+
+Use this command to run benchmarks on a package. Currently, the following types of benchmarks are available:
+
+#### Pipeline Benchmarks
+These benchmarks allow you to benchmark any Ingest Node Pipelines defined by your packages.
+
+For details on how to configure pipeline benchmarks for a package, review the [HOWTO guide](https://github.com/elastic/elastic-package/blob/main/docs/howto/pipeline_benchmarking.md).
+
 ### `elastic-package build`
 
 _Context: package_

From 0456c35cfb228e9325b1ea66c4aaf5ad9007af42 Mon Sep 17 00:00:00 2001
From: Marc Guasch <marc.guasch@elastic.co>
Date: Mon, 5 Sep 2022 17:18:08 +0200
Subject: [PATCH 15/20] Fix benchmark commands in doc

---
 docs/howto/pipeline_benchmarking.md | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/docs/howto/pipeline_benchmarking.md b/docs/howto/pipeline_benchmarking.md
index c68cbeb7bc..0426c9ea0c 100644
--- a/docs/howto/pipeline_benchmarking.md
+++ b/docs/howto/pipeline_benchmarking.md
@@ -109,19 +109,19 @@ $(elastic-package stack shellinit)
 
 Next, you must invoke the pipeline benchmark runner. This corresponds to steps 2 through 4 as described in the [_Conceptual process_](#Conceptual-process) section.
 
-If you want to run pipeline tests for **all data streams** in a package, navigate to the package's root folder (or any sub-folder under it) and run the following command.
+If you want to run pipeline benchmarks for **all data streams** in a package, navigate to the package's root folder (or any sub-folder under it) and run the following command.
 
 ```
-elastic-package test pipeline
+elastic-package benchmark pipeline
 ```
 
-If you want to run pipeline tests for **specific data streams** in a package, navigate to the package's root folder (or any sub-folder under it) and run the following command.
+If you want to run pipeline benchmarks for **specific data streams** in a package, navigate to the package's root folder (or any sub-folder under it) and run the following command.
 
 ```
-elastic-package test pipeline --data-streams <data stream 1>[,<data stream 2>,...]
+elastic-package benchmark pipeline --data-streams <data stream 1>[,<data stream 2>,...]
 ```
 
-Finally, when you are done running all pipeline tests, bring down the Elastic Stack. This corresponds to step 4 as described in the [_Conceptual process_](#Conceptual-process) section.
+Finally, when you are done running all benchmarks, bring down the Elastic Stack. This corresponds to step 4 as described in the [_Conceptual process_](#Conceptual-process) section.
 
 ```
 elastic-package stack down

From 1ba379053e4e899abfdaa766a336b7827a73080d Mon Sep 17 00:00:00 2001
From: Marc Guasch <marc.guasch@elastic.co>
Date: Tue, 6 Sep 2022 12:27:03 +0200
Subject: [PATCH 16/20] Add fallback to use pipeline test samples

---
 cmd/benchrunner.go                            | 23 ++++++++++++--
 docs/howto/pipeline_benchmarking.md           |  8 ++++-
 internal/benchrunner/benchrunner.go           | 23 +++++---------
 .../benchrunner/runners/pipeline/benchmark.go |  6 ++--
 .../benchrunner/runners/pipeline/config.go    | 12 ++++++--
 .../benchrunner/runners/pipeline/runner.go    | 30 ++++++++++++-------
 internal/cobraext/flags.go                    |  3 ++
 7 files changed, 70 insertions(+), 35 deletions(-)

diff --git a/cmd/benchrunner.go b/cmd/benchrunner.go
index 22fa595936..4ac97a1a38 100644
--- a/cmd/benchrunner.go
+++ b/cmd/benchrunner.go
@@ -20,6 +20,7 @@ import (
 	"github.com/elastic/elastic-package/internal/elasticsearch"
 	"github.com/elastic/elastic-package/internal/packages"
 	"github.com/elastic/elastic-package/internal/signal"
+	"github.com/elastic/elastic-package/internal/testrunner"
 )
 
 const benchLongDescription = `Use this command to run benchmarks on a package. Currently, the following types of benchmarks are available:
@@ -49,6 +50,7 @@ func setupBenchmarkCommand() *cobraext.Command {
 	cmd.PersistentFlags().BoolP(cobraext.FailOnMissingFlagName, "m", false, cobraext.FailOnMissingFlagDescription)
 	cmd.PersistentFlags().StringP(cobraext.ReportFormatFlagName, "", string(formats.ReportFormatHuman), cobraext.ReportFormatFlagDescription)
 	cmd.PersistentFlags().StringP(cobraext.ReportOutputFlagName, "", string(outputs.ReportOutputSTDOUT), cobraext.ReportOutputFlagDescription)
+	cmd.PersistentFlags().BoolP(cobraext.BenchWithTestSamplesFlagName, "", true, cobraext.BenchWithTestSamplesFlagDescription)
 
 	for benchType, runner := range benchrunner.BenchRunners() {
 		action := benchTypeCommandActionFactory(runner)
@@ -89,6 +91,11 @@ func benchTypeCommandActionFactory(runner benchrunner.BenchRunner) cobraext.Comm
 			return cobraext.FlagParsingError(err, cobraext.ReportOutputFlagName)
 		}
 
+		useTestSamples, err := cmd.Flags().GetBool(cobraext.BenchWithTestSamplesFlagName)
+		if err != nil {
+			return cobraext.FlagParsingError(err, cobraext.BenchWithTestSamplesFlagName)
+		}
+
 		packageRootPath, found, err := packages.FindPackageRoot()
 		if !found {
 			return errors.New("package root not found")
@@ -99,8 +106,10 @@ func benchTypeCommandActionFactory(runner benchrunner.BenchRunner) cobraext.Comm
 
 		signal.Enable()
 
-		var benchFolders []benchrunner.BenchmarkFolder
-		var dataStreams []string
+		var (
+			benchFolders []testrunner.TestFolder
+			dataStreams  []string
+		)
 		// We check for the existence of the data streams flag before trying to
 		// parse it because if the root benchmark command is run instead of one of the
 		// subcommands of benchmark, the data streams flag will not be defined.
@@ -122,6 +131,14 @@ func benchTypeCommandActionFactory(runner benchrunner.BenchRunner) cobraext.Comm
 			return errors.Wrap(err, "unable to determine benchmark folder paths")
 		}
 
+		if useTestSamples {
+			testFolders, err := testrunner.FindTestFolders(packageRootPath, dataStreams, testrunner.TestType(benchType))
+			if err != nil {
+				return errors.Wrap(err, "unable to determine test folder paths")
+			}
+			benchFolders = append(benchFolders, testFolders...)
+		}
+
 		if failOnMissing && len(benchFolders) == 0 {
 			if len(dataStreams) > 0 {
 				return fmt.Errorf("no %s benchmarks found for %s data stream(s)", benchType, strings.Join(dataStreams, ","))
@@ -137,7 +154,7 @@ func benchTypeCommandActionFactory(runner benchrunner.BenchRunner) cobraext.Comm
 		var results []*benchrunner.Result
 		for _, folder := range benchFolders {
 			r, err := benchrunner.Run(benchType, benchrunner.BenchOptions{
-				BenchmarkFolder: folder,
+				Folder:          folder,
 				PackageRootPath: packageRootPath,
 				API:             esClient.API,
 			})
diff --git a/docs/howto/pipeline_benchmarking.md b/docs/howto/pipeline_benchmarking.md
index 0426c9ea0c..d0294a2d4e 100644
--- a/docs/howto/pipeline_benchmarking.md
+++ b/docs/howto/pipeline_benchmarking.md
@@ -86,7 +86,7 @@ The benchmark execution can be customized to some extent using an optional confi
 num_docs: 1000
 ```
 
-The `num_docs` option tells the benchmarks how many events should be sent with the simulation request. If not enough samples are provided, the events will be reused to generate a sufficient number of them.
+The `num_docs` option tells the benchmarks how many events should be sent with the simulation request. If not enough samples are provided, the events will be reused to generate a sufficient number of them. If not present it defaults to `1000`.
 
 
 ## Running a pipeline benchmark
@@ -121,6 +121,12 @@ If you want to run pipeline benchmarks for **specific data streams** in a packag
 elastic-package benchmark pipeline --data-streams <data stream 1>[,<data stream 2>,...]
 ```
 
+By default, if the benchmark configuration is not present, it will run using any samples found in the data stream. You can disable this behavior disabling the `--use-test-samples` flag.
+
+```
+elastic-package benchmark pipeline -v --use-test-samples=false
+```
+
 Finally, when you are done running all benchmarks, bring down the Elastic Stack. This corresponds to step 4 as described in the [_Conceptual process_](#Conceptual-process) section.
 
 ```
diff --git a/internal/benchrunner/benchrunner.go b/internal/benchrunner/benchrunner.go
index 4942bebc19..465f7af635 100644
--- a/internal/benchrunner/benchrunner.go
+++ b/internal/benchrunner/benchrunner.go
@@ -14,6 +14,7 @@ import (
 	"github.com/pkg/errors"
 
 	"github.com/elastic/elastic-package/internal/elasticsearch"
+	"github.com/elastic/elastic-package/internal/testrunner"
 )
 
 // BenchType represents the various supported benchmark types
@@ -21,7 +22,7 @@ type BenchType string
 
 // BenchOptions contains benchmark runner options.
 type BenchOptions struct {
-	BenchmarkFolder BenchmarkFolder
+	Folder          testrunner.TestFolder
 	PackageRootPath string
 	API             *elasticsearch.API
 }
@@ -99,16 +100,8 @@ func (rc *ResultComposer) WithSuccess() ([]Result, error) {
 	return rc.WithError(nil)
 }
 
-// BenchmarkFolder encapsulates the benchmark folder path and names of the package + data stream
-// to which the benchmark folder belongs.
-type BenchmarkFolder struct {
-	Path       string
-	Package    string
-	DataStream string
-}
-
 // FindBenchmarkFolders finds benchmark folders for the given package and, optionally, benchmark type and data streams
-func FindBenchmarkFolders(packageRootPath string, dataStreams []string, benchType BenchType) ([]BenchmarkFolder, error) {
+func FindBenchmarkFolders(packageRootPath string, dataStreams []string, benchType BenchType) ([]testrunner.TestFolder, error) {
 	// Expected folder structure:
 	// <packageRootPath>/
 	//   data_stream/
@@ -137,17 +130,17 @@ func FindBenchmarkFolders(packageRootPath string, dataStreams []string, benchTyp
 		paths = append(paths, p...)
 	}
 
-	folders := make([]BenchmarkFolder, len(paths))
+	folders := make([]testrunner.TestFolder, len(paths))
 	_, pkg := filepath.Split(packageRootPath)
 	for idx, p := range paths {
 		relP := strings.TrimPrefix(p, packageRootPath)
 		parts := strings.Split(relP, string(filepath.Separator))
 		dataStream := parts[2]
 
-		folder := BenchmarkFolder{
-			p,
-			pkg,
-			dataStream,
+		folder := testrunner.TestFolder{
+			Path:       p,
+			Package:    pkg,
+			DataStream: dataStream,
 		}
 
 		folders[idx] = folder
diff --git a/internal/benchrunner/runners/pipeline/benchmark.go b/internal/benchrunner/runners/pipeline/benchmark.go
index 8102f4eb41..50af4576c3 100644
--- a/internal/benchrunner/runners/pipeline/benchmark.go
+++ b/internal/benchrunner/runners/pipeline/benchmark.go
@@ -88,15 +88,15 @@ func (r *runner) benchmarkPipeline(b *benchmark, entryPipeline string) (*benchru
 
 	// Build result
 	result := &benchrunner.BenchmarkResult{
-		Name: fmt.Sprintf("pipeline benchmark for %s/%s", r.options.BenchmarkFolder.Package, r.options.BenchmarkFolder.DataStream),
+		Name: fmt.Sprintf("pipeline benchmark for %s/%s", r.options.Folder.Package, r.options.Folder.DataStream),
 		Parameters: []benchrunner.BenchmarkValue{
 			{
 				Name:  "package",
-				Value: r.options.BenchmarkFolder.Package,
+				Value: r.options.Folder.Package,
 			},
 			{
 				Name:  "data_stream",
-				Value: r.options.BenchmarkFolder.DataStream,
+				Value: r.options.Folder.DataStream,
 			},
 			{
 				Name:  "source doc count",
diff --git a/internal/benchrunner/runners/pipeline/config.go b/internal/benchrunner/runners/pipeline/config.go
index 9c0f9780ae..e980b61b50 100644
--- a/internal/benchrunner/runners/pipeline/config.go
+++ b/internal/benchrunner/runners/pipeline/config.go
@@ -20,19 +20,25 @@ type config struct {
 	NumDocs int `config:"num_docs"`
 }
 
+func defaultConfig() *config {
+	return &config{
+		NumDocs: 1000,
+	}
+}
+
 func readConfig(path string) (*config, error) {
 	configPath := filepath.Join(path, configYAML)
-	var c config
+	c := defaultConfig()
 	cfg, err := yaml.NewConfigWithFile(configPath)
 	if err != nil && !errors.Is(err, os.ErrNotExist) {
 		return nil, errors.Wrapf(err, "can't load common configuration: %s", configPath)
 	}
 
 	if err == nil {
-		if err := cfg.Unpack(&c); err != nil {
+		if err := cfg.Unpack(c); err != nil {
 			return nil, errors.Wrapf(err, "can't unpack benchmark configuration: %s", configPath)
 		}
 	}
 
-	return &c, nil
+	return c, nil
 }
diff --git a/internal/benchrunner/runners/pipeline/runner.go b/internal/benchrunner/runners/pipeline/runner.go
index ce4d4a1fdc..ba87d4121c 100644
--- a/internal/benchrunner/runners/pipeline/runner.go
+++ b/internal/benchrunner/runners/pipeline/runner.go
@@ -9,6 +9,7 @@ import (
 	"fmt"
 	"os"
 	"path/filepath"
+	"strings"
 	"time"
 
 	"github.com/pkg/errors"
@@ -53,7 +54,7 @@ func (r *runner) TearDown() error {
 }
 
 func (r *runner) run() (*benchrunner.Result, error) {
-	dataStreamPath, found, err := packages.FindDataStreamRootForPath(r.options.BenchmarkFolder.Path)
+	dataStreamPath, found, err := packages.FindDataStreamRootForPath(r.options.Folder.Path)
 	if err != nil {
 		return nil, errors.Wrap(err, "locating data_stream root failed")
 	}
@@ -70,8 +71,8 @@ func (r *runner) run() (*benchrunner.Result, error) {
 	start := time.Now()
 	result := &benchrunner.Result{
 		BenchType:  BenchType + " benchmark",
-		Package:    r.options.BenchmarkFolder.Package,
-		DataStream: r.options.BenchmarkFolder.DataStream,
+		Package:    r.options.Folder.Package,
+		DataStream: r.options.Folder.DataStream,
 	}
 
 	b, err := r.loadBenchmark()
@@ -89,14 +90,23 @@ func (r *runner) run() (*benchrunner.Result, error) {
 }
 
 func (r *runner) listBenchmarkFiles() ([]string, error) {
-	fis, err := os.ReadDir(r.options.BenchmarkFolder.Path)
+	const (
+		expectedTestResultSuffix = "-expected.json"
+		configTestSuffixYAML     = "-config.yml"
+	)
+
+	fis, err := os.ReadDir(r.options.Folder.Path)
 	if err != nil {
-		return nil, errors.Wrapf(err, "reading pipeline benchmarks failed (path: %s)", r.options.BenchmarkFolder.Path)
+		return nil, errors.Wrapf(err, "reading pipeline benchmarks failed (path: %s)", r.options.Folder.Path)
 	}
 
 	var files []string
 	for _, fi := range fis {
-		if fi.Name() == configYAML {
+		if fi.Name() == configYAML ||
+			// since pipeline tests might be included we need to
+			// exclude the expected and config files for them
+			strings.HasSuffix(fi.Name(), expectedTestResultSuffix) ||
+			strings.HasSuffix(fi.Name(), configTestSuffixYAML) {
 			continue
 		}
 		files = append(files, fi.Name())
@@ -112,7 +122,7 @@ func (r *runner) loadBenchmark() (*benchmark, error) {
 
 	var allEntries []json.RawMessage
 	for _, benchFile := range benchFiles {
-		benchPath := filepath.Join(r.options.BenchmarkFolder.Path, benchFile)
+		benchPath := filepath.Join(r.options.Folder.Path, benchFile)
 		benchData, err := os.ReadFile(benchPath)
 		if err != nil {
 			return nil, errors.Wrapf(err, "reading input file failed (benchPath: %s)", benchPath)
@@ -137,14 +147,14 @@ func (r *runner) loadBenchmark() (*benchmark, error) {
 		allEntries = append(allEntries, entries...)
 	}
 
-	config, err := readConfig(r.options.BenchmarkFolder.Path)
+	config, err := readConfig(r.options.Folder.Path)
 	if err != nil {
-		return nil, errors.Wrapf(err, "reading config for benchmark failed (benchPath: %s)", r.options.BenchmarkFolder.Path)
+		return nil, errors.Wrapf(err, "reading config for benchmark failed (benchPath: %s)", r.options.Folder.Path)
 	}
 
 	tc, err := createBenchmark(allEntries, config)
 	if err != nil {
-		return nil, errors.Wrapf(err, "can't create benchmark case (benchmarkPath: %s)", r.options.BenchmarkFolder.Path)
+		return nil, errors.Wrapf(err, "can't create benchmark case (benchmarkPath: %s)", r.options.Folder.Path)
 	}
 	return tc, nil
 }
diff --git a/internal/cobraext/flags.go b/internal/cobraext/flags.go
index abc1e1f0ed..77ee45e9b4 100644
--- a/internal/cobraext/flags.go
+++ b/internal/cobraext/flags.go
@@ -26,6 +26,9 @@ const (
 	AgentPolicyFlagName    = "agent-policy"
 	AgentPolicyDescription = "name of the agent policy"
 
+	BenchWithTestSamplesFlagName        = "use-test-samples"
+	BenchWithTestSamplesFlagDescription = "use test samples for the benchmarks"
+
 	BuildSkipValidationFlagName        = "skip-validation"
 	BuildSkipValidationFlagDescription = "skip validation of the built package, use only if all validation issues have been acknowledged"
 

From 3c50c8ed66d7d6cf69fe0129fc50c9b8ca8229f5 Mon Sep 17 00:00:00 2001
From: Marc Guasch <marc.guasch@elastic.co>
Date: Tue, 6 Sep 2022 12:50:50 +0200
Subject: [PATCH 17/20] Add CI testing for benchmarks

---
 .ci/Jenkinsfile                               |  1 +
 Makefile                                      |  5 +-
 internal/benchrunner/benchrunner.go           | 19 ++++-
 scripts/test-check-packages.sh                | 10 ++-
 .../pipeline_benchmark/changelog.yml          |  6 ++
 .../_dev/benchmark/pipeline/access-raw.log    |  1 +
 .../test/_dev/benchmark/pipeline/config.yml   |  1 +
 .../test/agent/stream/stream.yml.hbs          | 25 ++++++
 .../elasticsearch/ingest_pipeline/default.yml | 25 ++++++
 .../data_stream/test/fields/base-fields.yml   | 38 +++++++++
 .../data_stream/test/manifest.yml             | 79 +++++++++++++++++++
 .../pipeline_benchmark/docs/README.md         |  2 +
 .../pipeline_benchmark/manifest.yml           | 24 ++++++
 .../use_pipeline_tests/changelog.yml          |  6 ++
 .../_dev/test/pipeline/test-access-raw.log    |  1 +
 .../pipeline/test-access-raw.log-config.yml   |  4 +
 .../test-access-raw.log-expected.json         |  5 ++
 .../test/agent/stream/stream.yml.hbs          | 25 ++++++
 .../elasticsearch/ingest_pipeline/default.yml | 25 ++++++
 .../data_stream/test/fields/base-fields.yml   | 38 +++++++++
 .../data_stream/test/manifest.yml             | 79 +++++++++++++++++++
 .../use_pipeline_tests/docs/README.md         |  2 +
 .../use_pipeline_tests/manifest.yml           | 24 ++++++
 23 files changed, 439 insertions(+), 6 deletions(-)
 create mode 100644 test/packages/benchmarks/pipeline_benchmark/changelog.yml
 create mode 100644 test/packages/benchmarks/pipeline_benchmark/data_stream/test/_dev/benchmark/pipeline/access-raw.log
 create mode 100644 test/packages/benchmarks/pipeline_benchmark/data_stream/test/_dev/benchmark/pipeline/config.yml
 create mode 100644 test/packages/benchmarks/pipeline_benchmark/data_stream/test/agent/stream/stream.yml.hbs
 create mode 100644 test/packages/benchmarks/pipeline_benchmark/data_stream/test/elasticsearch/ingest_pipeline/default.yml
 create mode 100644 test/packages/benchmarks/pipeline_benchmark/data_stream/test/fields/base-fields.yml
 create mode 100644 test/packages/benchmarks/pipeline_benchmark/data_stream/test/manifest.yml
 create mode 100644 test/packages/benchmarks/pipeline_benchmark/docs/README.md
 create mode 100644 test/packages/benchmarks/pipeline_benchmark/manifest.yml
 create mode 100644 test/packages/benchmarks/use_pipeline_tests/changelog.yml
 create mode 100644 test/packages/benchmarks/use_pipeline_tests/data_stream/test/_dev/test/pipeline/test-access-raw.log
 create mode 100644 test/packages/benchmarks/use_pipeline_tests/data_stream/test/_dev/test/pipeline/test-access-raw.log-config.yml
 create mode 100644 test/packages/benchmarks/use_pipeline_tests/data_stream/test/_dev/test/pipeline/test-access-raw.log-expected.json
 create mode 100644 test/packages/benchmarks/use_pipeline_tests/data_stream/test/agent/stream/stream.yml.hbs
 create mode 100644 test/packages/benchmarks/use_pipeline_tests/data_stream/test/elasticsearch/ingest_pipeline/default.yml
 create mode 100644 test/packages/benchmarks/use_pipeline_tests/data_stream/test/fields/base-fields.yml
 create mode 100644 test/packages/benchmarks/use_pipeline_tests/data_stream/test/manifest.yml
 create mode 100644 test/packages/benchmarks/use_pipeline_tests/docs/README.md
 create mode 100644 test/packages/benchmarks/use_pipeline_tests/manifest.yml

diff --git a/.ci/Jenkinsfile b/.ci/Jenkinsfile
index a3846901e5..b83d2fe49c 100644
--- a/.ci/Jenkinsfile
+++ b/.ci/Jenkinsfile
@@ -103,6 +103,7 @@ pipeline {
                     'check-packages-with-kind': generateTestCommandStage(command: 'test-check-packages-with-kind', artifacts: ['build/test-results/*.xml', 'build/kubectl-dump.txt', 'build/elastic-stack-dump/check-*/logs/*.log', 'build/elastic-stack-dump/check-*/logs/fleet-server-internal/*'], junitArtifacts: true, publishCoverage: true),
                     'check-packages-other': generateTestCommandStage(command: 'test-check-packages-other', artifacts: ['build/test-results/*.xml', 'build/elastic-stack-dump/check-*/logs/*.log', 'build/elastic-stack-dump/check-*/logs/fleet-server-internal/*'], junitArtifacts: true, publishCoverage: true),
                     'check-packages-with-custom-agent': generateTestCommandStage(command: 'test-check-packages-with-custom-agent', artifacts: ['build/test-results/*.xml', 'build/elastic-stack-dump/check-*/logs/*.log', 'build/elastic-stack-dump/check-*/logs/fleet-server-internal/*'], junitArtifacts: true, publishCoverage: true),
+                    'check-packages-benchmarks': generateTestCommandStage(command: 'test-check-packages-benchmarks', artifacts: ['build/test-results/*.xml', 'build/elastic-stack-dump/check-*/logs/*.log', 'build/elastic-stack-dump/check-*/logs/fleet-server-internal/*'], junitArtifacts: true, publishCoverage: false),
                     'build-zip': generateTestCommandStage(command: 'test-build-zip', artifacts: ['build/elastic-stack-dump/build-zip/logs/*.log', 'build/packages/*.sig']),
                     'profiles-command': generateTestCommandStage(command: 'test-profiles-command')
                   ]
diff --git a/Makefile b/Makefile
index 031526ae3f..54dbdd8957 100644
--- a/Makefile
+++ b/Makefile
@@ -65,7 +65,7 @@ test-stack-command-8x:
 
 test-stack-command: test-stack-command-default test-stack-command-7x test-stack-command-800 test-stack-command-8x
 
-test-check-packages: test-check-packages-with-kind test-check-packages-other test-check-packages-parallel test-check-packages-with-custom-agent
+test-check-packages: test-check-packages-with-kind test-check-packages-other test-check-packages-parallel test-check-packages-with-custom-agent test-check-packages-benchmarks
 
 test-check-packages-with-kind:
 	PACKAGE_TEST_TYPE=with-kind ./scripts/test-check-packages.sh
@@ -73,6 +73,9 @@ test-check-packages-with-kind:
 test-check-packages-other:
 	PACKAGE_TEST_TYPE=other ./scripts/test-check-packages.sh
 
+test-check-packages-benchmarks:
+	PACKAGE_TEST_TYPE=benchmarks ./scripts/test-check-packages.sh
+
 test-check-packages-parallel:
 	PACKAGE_TEST_TYPE=parallel ./scripts/test-check-packages.sh
 
diff --git a/internal/benchrunner/benchrunner.go b/internal/benchrunner/benchrunner.go
index 465f7af635..5cda104ff9 100644
--- a/internal/benchrunner/benchrunner.go
+++ b/internal/benchrunner/benchrunner.go
@@ -116,8 +116,23 @@ func FindBenchmarkFolders(packageRootPath string, dataStreams []string, benchTyp
 	}
 
 	var paths []string
-	if len(dataStreams) == 0 {
-		return nil, errors.New("benchmarks can only be defined at the data_stream level")
+	if len(dataStreams) > 0 {
+		sort.Strings(dataStreams)
+		for _, dataStream := range dataStreams {
+			p, err := findBenchFolderPaths(packageRootPath, dataStream, benchTypeGlob)
+			if err != nil {
+				return nil, err
+			}
+
+			paths = append(paths, p...)
+		}
+	} else {
+		p, err := findBenchFolderPaths(packageRootPath, "*", benchTypeGlob)
+		if err != nil {
+			return nil, err
+		}
+
+		paths = p
 	}
 
 	sort.Strings(dataStreams)
diff --git a/scripts/test-check-packages.sh b/scripts/test-check-packages.sh
index 1456f07a00..b0258105db 100755
--- a/scripts/test-check-packages.sh
+++ b/scripts/test-check-packages.sh
@@ -38,7 +38,7 @@ OLDPWD=$PWD
 for d in test/packages/${PACKAGE_TEST_TYPE:-other}/${PACKAGE_UNDER_TEST:-*}/; do
   (
     cd $d
-    elastic-package check -v
+    elastic-package build -v
   )
 done
 cd -
@@ -62,8 +62,12 @@ for d in test/packages/${PACKAGE_TEST_TYPE:-other}/${PACKAGE_UNDER_TEST:-*}/; do
     cd $d
     elastic-package install -v
 
-    # defer-cleanup is set to a short period to verify that the option is available
-    elastic-package test -v --report-format xUnit --report-output file --defer-cleanup 1s --test-coverage
+    if [ "${PACKAGE_TEST_TYPE:-other}" == "benchmarks" ]; then
+      elastic-package benchmark -v --report-format xUnit --report-output file --fail-on-missing
+    else
+      # defer-cleanup is set to a short period to verify that the option is available
+      elastic-package test -v --report-format xUnit --report-output file --defer-cleanup 1s --test-coverage
+    fi
   )
 cd -
 done
diff --git a/test/packages/benchmarks/pipeline_benchmark/changelog.yml b/test/packages/benchmarks/pipeline_benchmark/changelog.yml
new file mode 100644
index 0000000000..d28b75270d
--- /dev/null
+++ b/test/packages/benchmarks/pipeline_benchmark/changelog.yml
@@ -0,0 +1,6 @@
+# newer versions go on top
+- version: "999.999.999"
+  changes:
+    - description: initial release
+      type: enhancement # can be one of: enhancement, bugfix, breaking-change
+      link: https://github.com/elastic/elastic-package/pull/209
diff --git a/test/packages/benchmarks/pipeline_benchmark/data_stream/test/_dev/benchmark/pipeline/access-raw.log b/test/packages/benchmarks/pipeline_benchmark/data_stream/test/_dev/benchmark/pipeline/access-raw.log
new file mode 100644
index 0000000000..c8c9ffe960
--- /dev/null
+++ b/test/packages/benchmarks/pipeline_benchmark/data_stream/test/_dev/benchmark/pipeline/access-raw.log
@@ -0,0 +1 @@
+1.2.3.4 - - [25/Oct/2016:14:49:34 +0200] "GET /favicon.ico HTTP/1.1" 404 571 "http://localhost:8080/" "skip-this-one/5.0 (Macintosh; Intel Mac OS X 10_12_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.59 Safari/537.36"
\ No newline at end of file
diff --git a/test/packages/benchmarks/pipeline_benchmark/data_stream/test/_dev/benchmark/pipeline/config.yml b/test/packages/benchmarks/pipeline_benchmark/data_stream/test/_dev/benchmark/pipeline/config.yml
new file mode 100644
index 0000000000..30a2b50cf6
--- /dev/null
+++ b/test/packages/benchmarks/pipeline_benchmark/data_stream/test/_dev/benchmark/pipeline/config.yml
@@ -0,0 +1 @@
+num_docs: 10000
diff --git a/test/packages/benchmarks/pipeline_benchmark/data_stream/test/agent/stream/stream.yml.hbs b/test/packages/benchmarks/pipeline_benchmark/data_stream/test/agent/stream/stream.yml.hbs
new file mode 100644
index 0000000000..2cdbbeb73e
--- /dev/null
+++ b/test/packages/benchmarks/pipeline_benchmark/data_stream/test/agent/stream/stream.yml.hbs
@@ -0,0 +1,25 @@
+paths:
+{{#each paths as |path i|}}
+  - {{path}}
+{{/each}}
+exclude_files: [".gz$"]
+tags:
+{{#each tags as |tag i|}}
+ - {{tag}}
+{{/each}}
+fields_under_root: true
+fields:
+    observer:
+        vendor: Test
+        product: Test
+        type: test
+{{#contains tags "forwarded"}}
+publisher_pipeline.disable_host: true
+{{/contains}}
+
+processors:
+- add_locale: ~
+- add_fields:
+    target: ''
+    fields:
+        ecs.version: 1.6.0
diff --git a/test/packages/benchmarks/pipeline_benchmark/data_stream/test/elasticsearch/ingest_pipeline/default.yml b/test/packages/benchmarks/pipeline_benchmark/data_stream/test/elasticsearch/ingest_pipeline/default.yml
new file mode 100644
index 0000000000..54a442eb2b
--- /dev/null
+++ b/test/packages/benchmarks/pipeline_benchmark/data_stream/test/elasticsearch/ingest_pipeline/default.yml
@@ -0,0 +1,25 @@
+---
+description: Pipeline for parsing Nginx access logs. Requires the geoip and user_agent
+  plugins.
+processors:
+  - grok:
+      field: message
+      patterns:
+        - (%{NGINX_HOST} )?"?(?:%{NGINX_ADDRESS_LIST:nginx.access.remote_ip_list}|%{NOTSPACE:source.address})
+          - (-|%{DATA:user.name}) \[%{HTTPDATE:nginx.access.time}\] "%{DATA:nginx.access.info}"
+          %{NUMBER:http.response.status_code:long} %{NUMBER:http.response.body.bytes:long}
+          "(-|%{DATA:http.request.referrer})" "(-|%{DATA:user_agent.original})"
+      pattern_definitions:
+        NGINX_HOST: (?:%{IP:destination.ip}|%{NGINX_NOTSEPARATOR:destination.domain})(:%{NUMBER:destination.port})?
+        NGINX_NOTSEPARATOR: "[^\t ,:]+"
+        NGINX_ADDRESS_LIST: (?:%{IP}|%{WORD})("?,?\s*(?:%{IP}|%{WORD}))*
+      ignore_missing: true
+  - user_agent:
+      field: user_agent.original
+      ignore_missing: true
+  - drop:
+      if: "ctx?.user_agent?.original?.startsWith('skip-this-one')"
+on_failure:
+  - set:
+      field: error.message
+      value: '{{ _ingest.on_failure_message }}'
\ No newline at end of file
diff --git a/test/packages/benchmarks/pipeline_benchmark/data_stream/test/fields/base-fields.yml b/test/packages/benchmarks/pipeline_benchmark/data_stream/test/fields/base-fields.yml
new file mode 100644
index 0000000000..0ec2cc7e01
--- /dev/null
+++ b/test/packages/benchmarks/pipeline_benchmark/data_stream/test/fields/base-fields.yml
@@ -0,0 +1,38 @@
+- name: data_stream.type
+  type: constant_keyword
+  description: Data stream type.
+- name: data_stream.dataset
+  type: constant_keyword
+  description: Data stream dataset.
+- name: data_stream.namespace
+  type: constant_keyword
+  description: Data stream namespace.
+- name: '@timestamp'
+  type: date
+  description: Event timestamp.
+- name: container.id
+  description: Unique container id.
+  ignore_above: 1024
+  type: keyword
+- name: input.type
+  description: Type of Filebeat input.
+  type: keyword
+- name: log.file.path
+  description: Full path to the log file this event came from.
+  example: /var/log/fun-times.log
+  ignore_above: 1024
+  type: keyword
+- name: log.source.address
+  description: Source address from which the log event was read / sent from.
+  type: keyword
+- name: log.flags
+  description: Flags for the log file.
+  type: keyword
+- name: log.offset
+  description: Offset of the entry in the log file.
+  type: long
+- name: tags
+  description: List of keywords used to tag each event.
+  example: '["production", "env2"]'
+  ignore_above: 1024
+  type: keyword
diff --git a/test/packages/benchmarks/pipeline_benchmark/data_stream/test/manifest.yml b/test/packages/benchmarks/pipeline_benchmark/data_stream/test/manifest.yml
new file mode 100644
index 0000000000..d922481164
--- /dev/null
+++ b/test/packages/benchmarks/pipeline_benchmark/data_stream/test/manifest.yml
@@ -0,0 +1,79 @@
+title: Test
+release: experimental
+type: logs
+streams:
+  - input: udp
+    title: UDP logs
+    description: Collect UDP logs
+    template_path: udp.yml.hbs
+    vars:
+      - name: tags
+        type: text
+        title: Tags
+        multi: true
+        required: true
+        show_user: false
+        default:
+          - forwarded
+      - name: udp_host
+        type: text
+        title: UDP host to listen on
+        multi: false
+        required: true
+        show_user: true
+        default: localhost
+      - name: udp_port
+        type: integer
+        title: UDP port to listen on
+        multi: false
+        required: true
+        show_user: true
+        default: 9999
+  - input: tcp
+    title: TCP logs
+    description: Collect TCP logs
+    template_path: tcp.yml.hbs
+    vars:
+      - name: tags
+        type: text
+        title: Tags
+        multi: true
+        required: true
+        show_user: false
+        default:
+          - forwarded
+      - name: tcp_host
+        type: text
+        title: TCP host to listen on
+        multi: false
+        required: true
+        show_user: true
+        default: localhost
+      - name: tcp_port
+        type: integer
+        title: TCP port to listen on
+        multi: false
+        required: true
+        show_user: true
+        default: 9511
+  - input: file
+    title: File logs
+    description: Collect logs from file
+    enabled: false
+    vars:
+      - name: paths
+        type: text
+        title: Paths
+        multi: true
+        required: true
+        show_user: true
+        default:
+          - /var/log/file.log
+      - name: tags
+        type: text
+        title: Tags
+        multi: true
+        required: true
+        show_user: false
+        default:
+          - forwarded
diff --git a/test/packages/benchmarks/pipeline_benchmark/docs/README.md b/test/packages/benchmarks/pipeline_benchmark/docs/README.md
new file mode 100644
index 0000000000..e0ef7b4a18
--- /dev/null
+++ b/test/packages/benchmarks/pipeline_benchmark/docs/README.md
@@ -0,0 +1,2 @@
+# Test integration
+
diff --git a/test/packages/benchmarks/pipeline_benchmark/manifest.yml b/test/packages/benchmarks/pipeline_benchmark/manifest.yml
new file mode 100644
index 0000000000..f7713fd52c
--- /dev/null
+++ b/test/packages/benchmarks/pipeline_benchmark/manifest.yml
@@ -0,0 +1,24 @@
+format_version: 1.0.0
+name: pipeline_benchmarks
+title: Pipeline benchmarks
+# version is set to something very large to so this test package can
+# be installed in the package registry regardless of the version of
+# the actual apache package in the registry at any given time.
+version: 999.999.999
+description: Test for pipeline test runner
+categories: ["network"]
+release: experimental
+license: basic
+type: integration
+conditions:
+  kibana.version: '^8.0.0'
+policy_templates:
+  - name: test
+    title: Test
+    description: Description
+    inputs:
+      - type: file
+        title: Foo bar
+        description: Foo bar
+owner:
+  github: elastic/integrations
diff --git a/test/packages/benchmarks/use_pipeline_tests/changelog.yml b/test/packages/benchmarks/use_pipeline_tests/changelog.yml
new file mode 100644
index 0000000000..d28b75270d
--- /dev/null
+++ b/test/packages/benchmarks/use_pipeline_tests/changelog.yml
@@ -0,0 +1,6 @@
+# newer versions go on top
+- version: "999.999.999"
+  changes:
+    - description: initial release
+      type: enhancement # can be one of: enhancement, bugfix, breaking-change
+      link: https://github.com/elastic/elastic-package/pull/209
diff --git a/test/packages/benchmarks/use_pipeline_tests/data_stream/test/_dev/test/pipeline/test-access-raw.log b/test/packages/benchmarks/use_pipeline_tests/data_stream/test/_dev/test/pipeline/test-access-raw.log
new file mode 100644
index 0000000000..c8c9ffe960
--- /dev/null
+++ b/test/packages/benchmarks/use_pipeline_tests/data_stream/test/_dev/test/pipeline/test-access-raw.log
@@ -0,0 +1 @@
+1.2.3.4 - - [25/Oct/2016:14:49:34 +0200] "GET /favicon.ico HTTP/1.1" 404 571 "http://localhost:8080/" "skip-this-one/5.0 (Macintosh; Intel Mac OS X 10_12_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.59 Safari/537.36"
\ No newline at end of file
diff --git a/test/packages/benchmarks/use_pipeline_tests/data_stream/test/_dev/test/pipeline/test-access-raw.log-config.yml b/test/packages/benchmarks/use_pipeline_tests/data_stream/test/_dev/test/pipeline/test-access-raw.log-config.yml
new file mode 100644
index 0000000000..958d74a23e
--- /dev/null
+++ b/test/packages/benchmarks/use_pipeline_tests/data_stream/test/_dev/test/pipeline/test-access-raw.log-config.yml
@@ -0,0 +1,4 @@
+multiline:
+  first_line_pattern: "^(?:[0-9]{1,3}\\.){3}[0-9]{1,3}"
+fields:
+  "@timestamp": "2020-04-28T11:07:58.223Z"
diff --git a/test/packages/benchmarks/use_pipeline_tests/data_stream/test/_dev/test/pipeline/test-access-raw.log-expected.json b/test/packages/benchmarks/use_pipeline_tests/data_stream/test/_dev/test/pipeline/test-access-raw.log-expected.json
new file mode 100644
index 0000000000..1c2f884a44
--- /dev/null
+++ b/test/packages/benchmarks/use_pipeline_tests/data_stream/test/_dev/test/pipeline/test-access-raw.log-expected.json
@@ -0,0 +1,5 @@
+{
+    "expected": [
+        null
+    ]
+}
\ No newline at end of file
diff --git a/test/packages/benchmarks/use_pipeline_tests/data_stream/test/agent/stream/stream.yml.hbs b/test/packages/benchmarks/use_pipeline_tests/data_stream/test/agent/stream/stream.yml.hbs
new file mode 100644
index 0000000000..2cdbbeb73e
--- /dev/null
+++ b/test/packages/benchmarks/use_pipeline_tests/data_stream/test/agent/stream/stream.yml.hbs
@@ -0,0 +1,25 @@
+paths:
+{{#each paths as |path i|}}
+  - {{path}}
+{{/each}}
+exclude_files: [".gz$"]
+tags:
+{{#each tags as |tag i|}}
+ - {{tag}}
+{{/each}}
+fields_under_root: true
+fields:
+    observer:
+        vendor: Test
+        product: Test
+        type: test
+{{#contains tags "forwarded"}}
+publisher_pipeline.disable_host: true
+{{/contains}}
+
+processors:
+- add_locale: ~
+- add_fields:
+    target: ''
+    fields:
+        ecs.version: 1.6.0
diff --git a/test/packages/benchmarks/use_pipeline_tests/data_stream/test/elasticsearch/ingest_pipeline/default.yml b/test/packages/benchmarks/use_pipeline_tests/data_stream/test/elasticsearch/ingest_pipeline/default.yml
new file mode 100644
index 0000000000..54a442eb2b
--- /dev/null
+++ b/test/packages/benchmarks/use_pipeline_tests/data_stream/test/elasticsearch/ingest_pipeline/default.yml
@@ -0,0 +1,25 @@
+---
+description: Pipeline for parsing Nginx access logs. Requires the geoip and user_agent
+  plugins.
+processors:
+  - grok:
+      field: message
+      patterns:
+        - (%{NGINX_HOST} )?"?(?:%{NGINX_ADDRESS_LIST:nginx.access.remote_ip_list}|%{NOTSPACE:source.address})
+          - (-|%{DATA:user.name}) \[%{HTTPDATE:nginx.access.time}\] "%{DATA:nginx.access.info}"
+          %{NUMBER:http.response.status_code:long} %{NUMBER:http.response.body.bytes:long}
+          "(-|%{DATA:http.request.referrer})" "(-|%{DATA:user_agent.original})"
+      pattern_definitions:
+        NGINX_HOST: (?:%{IP:destination.ip}|%{NGINX_NOTSEPARATOR:destination.domain})(:%{NUMBER:destination.port})?
+        NGINX_NOTSEPARATOR: "[^\t ,:]+"
+        NGINX_ADDRESS_LIST: (?:%{IP}|%{WORD})("?,?\s*(?:%{IP}|%{WORD}))*
+      ignore_missing: true
+  - user_agent:
+      field: user_agent.original
+      ignore_missing: true
+  - drop:
+      if: "ctx?.user_agent?.original?.startsWith('skip-this-one')"
+on_failure:
+  - set:
+      field: error.message
+      value: '{{ _ingest.on_failure_message }}'
\ No newline at end of file
diff --git a/test/packages/benchmarks/use_pipeline_tests/data_stream/test/fields/base-fields.yml b/test/packages/benchmarks/use_pipeline_tests/data_stream/test/fields/base-fields.yml
new file mode 100644
index 0000000000..0ec2cc7e01
--- /dev/null
+++ b/test/packages/benchmarks/use_pipeline_tests/data_stream/test/fields/base-fields.yml
@@ -0,0 +1,38 @@
+- name: data_stream.type
+  type: constant_keyword
+  description: Data stream type.
+- name: data_stream.dataset
+  type: constant_keyword
+  description: Data stream dataset.
+- name: data_stream.namespace
+  type: constant_keyword
+  description: Data stream namespace.
+- name: '@timestamp'
+  type: date
+  description: Event timestamp.
+- name: container.id
+  description: Unique container id.
+  ignore_above: 1024
+  type: keyword
+- name: input.type
+  description: Type of Filebeat input.
+  type: keyword
+- name: log.file.path
+  description: Full path to the log file this event came from.
+  example: /var/log/fun-times.log
+  ignore_above: 1024
+  type: keyword
+- name: log.source.address
+  description: Source address from which the log event was read / sent from.
+  type: keyword
+- name: log.flags
+  description: Flags for the log file.
+  type: keyword
+- name: log.offset
+  description: Offset of the entry in the log file.
+  type: long
+- name: tags
+  description: List of keywords used to tag each event.
+  example: '["production", "env2"]'
+  ignore_above: 1024
+  type: keyword
diff --git a/test/packages/benchmarks/use_pipeline_tests/data_stream/test/manifest.yml b/test/packages/benchmarks/use_pipeline_tests/data_stream/test/manifest.yml
new file mode 100644
index 0000000000..d922481164
--- /dev/null
+++ b/test/packages/benchmarks/use_pipeline_tests/data_stream/test/manifest.yml
@@ -0,0 +1,79 @@
+title: Test
+release: experimental
+type: logs
+streams:
+  - input: udp
+    title: UDP logs
+    description: Collect UDP logs
+    template_path: udp.yml.hbs
+    vars:
+      - name: tags
+        type: text
+        title: Tags
+        multi: true
+        required: true
+        show_user: false
+        default:
+          - forwarded
+      - name: udp_host
+        type: text
+        title: UDP host to listen on
+        multi: false
+        required: true
+        show_user: true
+        default: localhost
+      - name: udp_port
+        type: integer
+        title: UDP port to listen on
+        multi: false
+        required: true
+        show_user: true
+        default: 9999
+  - input: tcp
+    title: TCP logs
+    description: Collect TCP logs
+    template_path: tcp.yml.hbs
+    vars:
+      - name: tags
+        type: text
+        title: Tags
+        multi: true
+        required: true
+        show_user: false
+        default:
+          - forwarded
+      - name: tcp_host
+        type: text
+        title: TCP host to listen on
+        multi: false
+        required: true
+        show_user: true
+        default: localhost
+      - name: tcp_port
+        type: integer
+        title: TCP port to listen on
+        multi: false
+        required: true
+        show_user: true
+        default: 9511
+  - input: file
+    title: File logs
+    description: Collect logs from file
+    enabled: false
+    vars:
+      - name: paths
+        type: text
+        title: Paths
+        multi: true
+        required: true
+        show_user: true
+        default:
+          - /var/log/file.log
+      - name: tags
+        type: text
+        title: Tags
+        multi: true
+        required: true
+        show_user: false
+        default:
+          - forwarded
diff --git a/test/packages/benchmarks/use_pipeline_tests/docs/README.md b/test/packages/benchmarks/use_pipeline_tests/docs/README.md
new file mode 100644
index 0000000000..e0ef7b4a18
--- /dev/null
+++ b/test/packages/benchmarks/use_pipeline_tests/docs/README.md
@@ -0,0 +1,2 @@
+# Test integration
+
diff --git a/test/packages/benchmarks/use_pipeline_tests/manifest.yml b/test/packages/benchmarks/use_pipeline_tests/manifest.yml
new file mode 100644
index 0000000000..b30a02942b
--- /dev/null
+++ b/test/packages/benchmarks/use_pipeline_tests/manifest.yml
@@ -0,0 +1,24 @@
+format_version: 1.0.0
+name: use_pipeline_tests
+title: Use pipeline tests for the benchmark
+# version is set to something very large to so this test package can
+# be installed in the package registry regardless of the version of
+# the actual apache package in the registry at any given time.
+version: 999.999.999
+description: Test for pipeline test runner
+categories: ["network"]
+release: experimental
+license: basic
+type: integration
+conditions:
+  kibana.version: '^8.0.0'
+policy_templates:
+  - name: test
+    title: Test
+    description: Description
+    inputs:
+      - type: file
+        title: Foo bar
+        description: Foo bar
+owner:
+  github: elastic/integrations

From a029a95d100f239374cf3df8fed360d913d0bfac Mon Sep 17 00:00:00 2001
From: Marc Guasch <marc.guasch@elastic.co>
Date: Wed, 7 Sep 2022 10:03:48 +0200
Subject: [PATCH 18/20] Add output to doc, undo script change, change test
 packages PR number

---
 docs/howto/pipeline_benchmarking.md           | 48 +++++++++++++++++++
 scripts/test-check-packages.sh                |  2 +-
 .../pipeline_benchmark/changelog.yml          |  2 +-
 .../use_pipeline_tests/changelog.yml          |  2 +-
 4 files changed, 51 insertions(+), 3 deletions(-)

diff --git a/docs/howto/pipeline_benchmarking.md b/docs/howto/pipeline_benchmarking.md
index d0294a2d4e..362fa2f084 100644
--- a/docs/howto/pipeline_benchmarking.md
+++ b/docs/howto/pipeline_benchmarking.md
@@ -113,6 +113,54 @@ If you want to run pipeline benchmarks for **all data streams** in a package, na
 
 ```
 elastic-package benchmark pipeline
+
+--- Benchmark results for package: windows-1 - START ---
+╭───────────────────────────────╮
+│ parameters                    │
+├──────────────────┬────────────┤
+│ package          │    windows │
+│ data_stream      │ powershell │
+│ source doc count │          6 │
+│ doc count        │       1000 │
+╰──────────────────┴────────────╯
+╭───────────────────────╮
+│ ingest performance    │
+├─────────────┬─────────┤
+│ ingest time │   0.23s │
+│ eps         │ 4291.85 │
+╰─────────────┴─────────╯
+╭───────────────────────────────────╮
+│ processors by total time          │
+├──────────────────────────┬────────┤
+│ kv @ default.yml:4       │ 12.02% │
+│ script @ default.yml:240 │  7.73% │
+│ kv @ default.yml:13      │  6.87% │
+│ set @ default.yml:44     │  6.01% │
+│ script @ default.yml:318 │  5.58% │
+│ date @ default.yml:34    │  3.43% │
+│ script @ default.yml:397 │  2.15% │
+│ remove @ default.yml:425 │  2.15% │
+│ set @ default.yml:102    │  1.72% │
+│ set @ default.yml:108    │  1.29% │
+╰──────────────────────────┴────────╯
+╭─────────────────────────────────────╮
+│ processors by average time per doc  │
+├──────────────────────────┬──────────┤
+│ kv @ default.yml:4       │ 56.112µs │
+│ script @ default.yml:240 │ 36.072µs │
+│ kv @ default.yml:13      │ 31.936µs │
+│ script @ default.yml:397 │  29.94µs │
+│ set @ default.yml:44     │     14µs │
+│ script @ default.yml:318 │     13µs │
+│ date @ default.yml:34    │ 11.976µs │
+│ set @ default.yml:102    │  8.016µs │
+│ append @ default.yml:114 │  6.012µs │
+│ set @ default.yml:108    │  6.012µs │
+╰──────────────────────────┴──────────╯
+
+--- Benchmark results for package: windows-1 - END   ---
+Done
+
 ```
 
 If you want to run pipeline benchmarks for **specific data streams** in a package, navigate to the package's root folder (or any sub-folder under it) and run the following command.
diff --git a/scripts/test-check-packages.sh b/scripts/test-check-packages.sh
index b0258105db..97e4389024 100755
--- a/scripts/test-check-packages.sh
+++ b/scripts/test-check-packages.sh
@@ -38,7 +38,7 @@ OLDPWD=$PWD
 for d in test/packages/${PACKAGE_TEST_TYPE:-other}/${PACKAGE_UNDER_TEST:-*}/; do
   (
     cd $d
-    elastic-package build -v
+    elastic-package check -v
   )
 done
 cd -
diff --git a/test/packages/benchmarks/pipeline_benchmark/changelog.yml b/test/packages/benchmarks/pipeline_benchmark/changelog.yml
index d28b75270d..1ced0b8d36 100644
--- a/test/packages/benchmarks/pipeline_benchmark/changelog.yml
+++ b/test/packages/benchmarks/pipeline_benchmark/changelog.yml
@@ -3,4 +3,4 @@
   changes:
     - description: initial release
       type: enhancement # can be one of: enhancement, bugfix, breaking-change
-      link: https://github.com/elastic/elastic-package/pull/209
+      link: https://github.com/elastic/elastic-package/pull/906
diff --git a/test/packages/benchmarks/use_pipeline_tests/changelog.yml b/test/packages/benchmarks/use_pipeline_tests/changelog.yml
index d28b75270d..1ced0b8d36 100644
--- a/test/packages/benchmarks/use_pipeline_tests/changelog.yml
+++ b/test/packages/benchmarks/use_pipeline_tests/changelog.yml
@@ -3,4 +3,4 @@
   changes:
     - description: initial release
       type: enhancement # can be one of: enhancement, bugfix, breaking-change
-      link: https://github.com/elastic/elastic-package/pull/209
+      link: https://github.com/elastic/elastic-package/pull/906

From cf2d73900d7e95c864d5610c25ef27c7f1325b15 Mon Sep 17 00:00:00 2001
From: Marc Guasch <marc.guasch@elastic.co>
Date: Thu, 8 Sep 2022 11:08:21 +0200
Subject: [PATCH 19/20] Make suggested changes:  - Rename benchrunner.go ->
 benchmark.go  - Add data-streams as a persistent flag  - Add num-top-procs
 flag  - Implement Stringer for BenchmarkValue  - Rename benchFmtd ->
 benchFormatted  - Make constants global

---
 cmd/{benchrunner.go => benchmark.go}          | 36 ++++++++++---------
 internal/benchrunner/benchmark.go             |  4 +--
 internal/benchrunner/benchrunner.go           |  1 +
 .../benchrunner/reporters/formats/human.go    |  6 ++--
 .../benchrunner/reporters/formats/xunit.go    |  4 +--
 .../benchrunner/runners/pipeline/benchmark.go | 13 +++----
 .../benchrunner/runners/pipeline/runner.go    |  8 ++---
 internal/cobraext/flags.go                    |  3 ++
 8 files changed, 37 insertions(+), 38 deletions(-)
 rename cmd/{benchrunner.go => benchmark.go} (87%)

diff --git a/cmd/benchrunner.go b/cmd/benchmark.go
similarity index 87%
rename from cmd/benchrunner.go
rename to cmd/benchmark.go
index 4ac97a1a38..7b1d651dd9 100644
--- a/cmd/benchrunner.go
+++ b/cmd/benchmark.go
@@ -26,9 +26,10 @@ import (
 const benchLongDescription = `Use this command to run benchmarks on a package. Currently, the following types of benchmarks are available:
 
 #### Pipeline Benchmarks
+
 These benchmarks allow you to benchmark any Ingest Node Pipelines defined by your packages.
 
-For details on how to configure pipeline benchmarks for a package, review the [HOWTO guide](https://github.com/elastic/elastic-package/blob/main/docs/howto/pipeline_benchmarking.md).`
+For details on how to configure pipeline benchmarks for a package, review the [HOWTO guide](./docs/howto/pipeline_benchmarking.md).`
 
 func setupBenchmarkCommand() *cobraext.Command {
 	var benchTypeCmdActions []cobraext.CommandAction
@@ -51,6 +52,8 @@ func setupBenchmarkCommand() *cobraext.Command {
 	cmd.PersistentFlags().StringP(cobraext.ReportFormatFlagName, "", string(formats.ReportFormatHuman), cobraext.ReportFormatFlagDescription)
 	cmd.PersistentFlags().StringP(cobraext.ReportOutputFlagName, "", string(outputs.ReportOutputSTDOUT), cobraext.ReportOutputFlagDescription)
 	cmd.PersistentFlags().BoolP(cobraext.BenchWithTestSamplesFlagName, "", true, cobraext.BenchWithTestSamplesFlagDescription)
+	cmd.PersistentFlags().IntP(cobraext.BenchNumTopProcsFlagName, "", 10, cobraext.BenchNumTopProcsFlagDescription)
+	cmd.PersistentFlags().StringSliceP(cobraext.DataStreamsFlagName, "", nil, cobraext.DataStreamsFlagDescription)
 
 	for benchType, runner := range benchrunner.BenchRunners() {
 		action := benchTypeCommandActionFactory(runner)
@@ -96,6 +99,11 @@ func benchTypeCommandActionFactory(runner benchrunner.BenchRunner) cobraext.Comm
 			return cobraext.FlagParsingError(err, cobraext.BenchWithTestSamplesFlagName)
 		}
 
+		numTopProcs, err := cmd.Flags().GetInt(cobraext.BenchNumTopProcsFlagName)
+		if err != nil {
+			return cobraext.FlagParsingError(err, cobraext.BenchNumTopProcsFlagName)
+		}
+
 		packageRootPath, found, err := packages.FindPackageRoot()
 		if !found {
 			return errors.New("package root not found")
@@ -104,29 +112,22 @@ func benchTypeCommandActionFactory(runner benchrunner.BenchRunner) cobraext.Comm
 			return errors.Wrap(err, "locating package root failed")
 		}
 
-		signal.Enable()
+		dataStreams, err := cmd.Flags().GetStringSlice(cobraext.DataStreamsFlagName)
+		if err != nil {
+			return cobraext.FlagParsingError(err, cobraext.DataStreamsFlagName)
+		}
 
-		var (
-			benchFolders []testrunner.TestFolder
-			dataStreams  []string
-		)
-		// We check for the existence of the data streams flag before trying to
-		// parse it because if the root benchmark command is run instead of one of the
-		// subcommands of benchmark, the data streams flag will not be defined.
-		if cmd.Flags().Lookup(cobraext.DataStreamsFlagName) != nil {
-			dataStreams, err = cmd.Flags().GetStringSlice(cobraext.DataStreamsFlagName)
+		if len(dataStreams) > 0 {
 			common.TrimStringSlice(dataStreams)
-			if err != nil {
-				return cobraext.FlagParsingError(err, cobraext.DataStreamsFlagName)
-			}
 
-			err = validateDataStreamsFlag(packageRootPath, dataStreams)
-			if err != nil {
+			if err := validateDataStreamsFlag(packageRootPath, dataStreams); err != nil {
 				return cobraext.FlagParsingError(err, cobraext.DataStreamsFlagName)
 			}
 		}
 
-		benchFolders, err = benchrunner.FindBenchmarkFolders(packageRootPath, dataStreams, benchType)
+		signal.Enable()
+
+		benchFolders, err := benchrunner.FindBenchmarkFolders(packageRootPath, dataStreams, benchType)
 		if err != nil {
 			return errors.Wrap(err, "unable to determine benchmark folder paths")
 		}
@@ -157,6 +158,7 @@ func benchTypeCommandActionFactory(runner benchrunner.BenchRunner) cobraext.Comm
 				Folder:          folder,
 				PackageRootPath: packageRootPath,
 				API:             esClient.API,
+				NumTopProcs:     numTopProcs,
 			})
 
 			if err != nil {
diff --git a/internal/benchrunner/benchmark.go b/internal/benchrunner/benchmark.go
index ee7ecbbfac..7720bc419f 100644
--- a/internal/benchrunner/benchmark.go
+++ b/internal/benchrunner/benchmark.go
@@ -59,8 +59,8 @@ type BenchmarkValue struct {
 	Value interface{} `xml:"value,omitempty"`
 }
 
-// PrettyValue returns a BenchmarkValue's value nicely-formatted.
-func (p BenchmarkValue) PrettyValue() (r string) {
+// String returns a BenchmarkValue's value nicely-formatted.
+func (p BenchmarkValue) String() (r string) {
 	if str, ok := p.Value.(fmt.Stringer); ok {
 		return str.String()
 	}
diff --git a/internal/benchrunner/benchrunner.go b/internal/benchrunner/benchrunner.go
index 5cda104ff9..1e0ef86e8c 100644
--- a/internal/benchrunner/benchrunner.go
+++ b/internal/benchrunner/benchrunner.go
@@ -25,6 +25,7 @@ type BenchOptions struct {
 	Folder          testrunner.TestFolder
 	PackageRootPath string
 	API             *elasticsearch.API
+	NumTopProcs     int
 }
 
 // BenchRunner is the interface all benchmark runners must implement.
diff --git a/internal/benchrunner/reporters/formats/human.go b/internal/benchrunner/reporters/formats/human.go
index 36ad8c6ad9..272274bf99 100644
--- a/internal/benchrunner/reporters/formats/human.go
+++ b/internal/benchrunner/reporters/formats/human.go
@@ -34,11 +34,11 @@ func reportHumanFormat(results []*benchrunner.Result) ([]string, error) {
 		}
 	}
 
-	benchFmtd, err := reportHumanFormatBenchmark(benchmarks)
+	benchFormatted, err := reportHumanFormatBenchmark(benchmarks)
 	if err != nil {
 		return nil, err
 	}
-	return benchFmtd, nil
+	return benchFormatted, nil
 }
 
 func reportHumanFormatBenchmark(benchmarks []benchrunner.BenchmarkResult) ([]string, error) {
@@ -67,7 +67,7 @@ func renderBenchmarkTable(title string, values []benchrunner.BenchmarkValue) str
 		},
 	})
 	for _, r := range values {
-		t.AppendRow(table.Row{r.Name, r.PrettyValue()})
+		t.AppendRow(table.Row{r.Name, r.String()})
 	}
 	return t.Render()
 }
diff --git a/internal/benchrunner/reporters/formats/xunit.go b/internal/benchrunner/reporters/formats/xunit.go
index 209efb0144..47cc2a9ef1 100644
--- a/internal/benchrunner/reporters/formats/xunit.go
+++ b/internal/benchrunner/reporters/formats/xunit.go
@@ -29,11 +29,11 @@ func reportXUnitFormat(results []*benchrunner.Result) ([]string, error) {
 		}
 	}
 
-	benchFmtd, err := reportXUnitFormatBenchmark(benchmarks)
+	benchFormatted, err := reportXUnitFormatBenchmark(benchmarks)
 	if err != nil {
 		return nil, err
 	}
-	return benchFmtd, nil
+	return benchFormatted, nil
 }
 
 func reportXUnitFormatBenchmark(benchmarks []*benchrunner.BenchmarkResult) ([]string, error) {
diff --git a/internal/benchrunner/runners/pipeline/benchmark.go b/internal/benchrunner/runners/pipeline/benchmark.go
index 50af4576c3..752fa528c8 100644
--- a/internal/benchrunner/runners/pipeline/benchmark.go
+++ b/internal/benchrunner/runners/pipeline/benchmark.go
@@ -16,11 +16,6 @@ import (
 	"github.com/elastic/elastic-package/internal/elasticsearch/ingest"
 )
 
-const (
-	// How many top processors to return.
-	numTopProcs = 10
-)
-
 func (r *runner) benchmarkPipeline(b *benchmark, entryPipeline string) (*benchrunner.BenchmarkResult, error) {
 	// Run benchmark
 	bench, err := r.benchmarkIngest(b, entryPipeline)
@@ -70,7 +65,7 @@ func (r *runner) benchmarkPipeline(b *benchmark, entryPipeline string) (*benchru
 		aggregate(processorKey, byAbsoluteTime).
 		filter(nonZero).
 		sort(descending).
-		top(numTopProcs).
+		top(r.options.NumTopProcs).
 		collect(asPercentageOfTotalTime)
 	if err != nil {
 		return nil, err
@@ -80,7 +75,7 @@ func (r *runner) benchmarkPipeline(b *benchmark, entryPipeline string) (*benchru
 		aggregate(processorKey, byRelativeTime).
 		filter(nonZero).
 		sort(descending).
-		top(numTopProcs).
+		top(r.options.NumTopProcs).
 		collect(asDuration)
 	if err != nil {
 		return nil, err
@@ -127,13 +122,13 @@ func (r *runner) benchmarkPipeline(b *benchmark, entryPipeline string) (*benchru
 			{
 				Name:        "processors by total time",
 				Detailed:    true,
-				Description: fmt.Sprintf("top %d processors by time spent", numTopProcs),
+				Description: fmt.Sprintf("top %d processors by time spent", r.options.NumTopProcs),
 				Results:     topAbsProc,
 			},
 			{
 				Name:        "processors by average time per doc",
 				Detailed:    true,
-				Description: fmt.Sprintf("top %d processors by average time per document", numTopProcs),
+				Description: fmt.Sprintf("top %d processors by average time per document", r.options.NumTopProcs),
 				Results:     topRelProcs,
 			},
 		},
diff --git a/internal/benchrunner/runners/pipeline/runner.go b/internal/benchrunner/runners/pipeline/runner.go
index ba87d4121c..3feb28e224 100644
--- a/internal/benchrunner/runners/pipeline/runner.go
+++ b/internal/benchrunner/runners/pipeline/runner.go
@@ -22,6 +22,9 @@ import (
 const (
 	// BenchType defining pipeline benchmarks.
 	BenchType benchrunner.BenchType = "pipeline"
+
+	expectedTestResultSuffix = "-expected.json"
+	configTestSuffixYAML     = "-config.yml"
 )
 
 type runner struct {
@@ -90,11 +93,6 @@ func (r *runner) run() (*benchrunner.Result, error) {
 }
 
 func (r *runner) listBenchmarkFiles() ([]string, error) {
-	const (
-		expectedTestResultSuffix = "-expected.json"
-		configTestSuffixYAML     = "-config.yml"
-	)
-
 	fis, err := os.ReadDir(r.options.Folder.Path)
 	if err != nil {
 		return nil, errors.Wrapf(err, "reading pipeline benchmarks failed (path: %s)", r.options.Folder.Path)
diff --git a/internal/cobraext/flags.go b/internal/cobraext/flags.go
index 77ee45e9b4..0a19277119 100644
--- a/internal/cobraext/flags.go
+++ b/internal/cobraext/flags.go
@@ -26,6 +26,9 @@ const (
 	AgentPolicyFlagName    = "agent-policy"
 	AgentPolicyDescription = "name of the agent policy"
 
+	BenchNumTopProcsFlagName        = "num-top-procs"
+	BenchNumTopProcsFlagDescription = "number of top processors to show in the benchmarks results"
+
 	BenchWithTestSamplesFlagName        = "use-test-samples"
 	BenchWithTestSamplesFlagDescription = "use test samples for the benchmarks"
 

From 01248313eb489638f471d17d4e43de548654f59a Mon Sep 17 00:00:00 2001
From: Marc Guasch <marc.guasch@elastic.co>
Date: Thu, 8 Sep 2022 12:49:19 +0200
Subject: [PATCH 20/20] readme update

---
 README.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 0cbeb0e499..7e17dd336c 100644
--- a/README.md
+++ b/README.md
@@ -95,9 +95,10 @@ _Context: package_
 Use this command to run benchmarks on a package. Currently, the following types of benchmarks are available:
 
 #### Pipeline Benchmarks
+
 These benchmarks allow you to benchmark any Ingest Node Pipelines defined by your packages.
 
-For details on how to configure pipeline benchmarks for a package, review the [HOWTO guide](https://github.com/elastic/elastic-package/blob/main/docs/howto/pipeline_benchmarking.md).
+For details on how to configure pipeline benchmarks for a package, review the [HOWTO guide](./docs/howto/pipeline_benchmarking.md).
 
 ### `elastic-package build`