replay: adding ingestion testing

Sachuman · Sachuman · commit e2a0f833e83b · 2025-07-28T15:35:38.000-04:00
this commit adds ingestion tests on replay package for
improving benchmarks and removing any mismatch between workload and replay
database
diff --git a/compaction.go b/compaction.go
@@ -1593,6 +1593,7 @@ func (d *DB) runIngestFlush(c *tableCompaction) (*manifest.VersionEdit, error) {
 				Bounds: exciseBounds,
 				SeqNum: ingestFlushable.exciseSeqNum,
 			})
+			d.mu.versions.metrics.Ingest.ExciseIngestCount++
 		}
 		// Iterate through all levels and find files that intersect with exciseSpan.
 		for layer, ls := range version.AllLevelsAndSublevels() {
diff --git a/ingest.go b/ingest.go
@@ -2082,6 +2082,7 @@ func (d *DB) ingestApply(
 		var exciseBounds base.UserKeyBounds
 		if exciseSpan.Valid() {
 			exciseBounds = exciseSpan.UserKeyBounds()
+			d.mu.versions.metrics.Ingest.ExciseIngestCount++
 			// Iterate through all levels and find files that intersect with exciseSpan.
 			//
 			// TODO(bilal): We could drop the DB mutex here as we don't need it for
diff --git a/internal/datatest/datatest.go b/internal/datatest/datatest.go
@@ -7,12 +7,16 @@
 package datatest
 
 import (
+	"context"
 	"strings"
 	"sync"
 
 	"github.com/cockroachdb/datadriven"
 	"github.com/cockroachdb/errors"
 	"github.com/cockroachdb/pebble"
+	"github.com/cockroachdb/pebble/objstorage/objstorageprovider"
+	"github.com/cockroachdb/pebble/sstable"
+	"github.com/cockroachdb/pebble/vfs"
 )
 
 // TODO(jackson): Consider a refactoring that can consolidate this package and
@@ -138,3 +142,76 @@ func (cql *CompactionTracker) WaitForInflightCompactionsToEqual(target int) {
 	}
 	cql.L.Unlock()
 }
+
+// Below functions are more or less replica from data_test.go pebble package
+func RunBuildSSTCmd(
+	input string,
+	writerArgs []datadriven.CmdArg,
+	path string,
+	fs vfs.FS,
+	opts ...func(*dataDrivenCmdOptions),
+) (sstable.WriterMetadata, error) {
+	ddOpts := combineDataDrivenOpts(opts...)
+
+	writerOpts := ddOpts.defaultWriterOpts
+	if err := sstable.ParseWriterOptions(&writerOpts, writerArgs...); err != nil {
+		return sstable.WriterMetadata{}, err
+	}
+
+	f, err := fs.Create(path, vfs.WriteCategoryUnspecified)
+	if err != nil {
+		return sstable.WriterMetadata{}, err
+	}
+	w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), writerOpts)
+	if err := sstable.ParseTestSST(w.Raw(), input, nil /* bv */); err != nil {
+		return sstable.WriterMetadata{}, err
+	}
+	if err := w.Close(); err != nil {
+		return sstable.WriterMetadata{}, err
+	}
+	metadata, err := w.Metadata()
+	if err != nil {
+		return sstable.WriterMetadata{}, err
+	}
+	return *metadata, nil
+}
+
+func combineDataDrivenOpts(opts ...func(*dataDrivenCmdOptions)) dataDrivenCmdOptions {
+	combined := dataDrivenCmdOptions{}
+	for _, opt := range opts {
+		opt(&combined)
+	}
+	return combined
+}
+
+type dataDrivenCmdOptions struct {
+	defaultWriterOpts sstable.WriterOptions
+}
+
+func WithDefaultWriterOpts(defaultWriterOpts sstable.WriterOptions) func(*dataDrivenCmdOptions) {
+	return func(o *dataDrivenCmdOptions) { o.defaultWriterOpts = defaultWriterOpts }
+}
+
+func RunIngestAndExciseCmd(td *datadriven.TestData, d *pebble.DB) error {
+	paths := make([]string, 0)
+	var exciseSpan pebble.KeyRange
+	for i := range td.CmdArgs {
+		if strings.HasSuffix(td.CmdArgs[i].Key, ".sst") {
+			paths = append(paths, td.CmdArgs[i].Key)
+		} else if td.CmdArgs[i].Key == "excise" {
+			if len(td.CmdArgs[i].Vals) != 1 {
+				return errors.New("expected 2 values for excise separated by -, eg. ingest-and-excise foo1 excise=\"start-end\"")
+			}
+			fields := strings.Split(td.CmdArgs[i].Vals[0], "-")
+			if len(fields) != 2 {
+				return errors.New("expected 2 values for excise separated by -, eg. ingest-and-excise foo1 excise=\"start-end\"")
+			}
+			exciseSpan.Start = []byte(fields[0])
+			exciseSpan.End = []byte(fields[1])
+		}
+	}
+	if _, err := d.IngestAndExcise(context.Background(), paths, nil /* shared */, nil /* external */, exciseSpan); err != nil {
+		return err
+	}
+	return nil
+}
diff --git a/metrics.go b/metrics.go
@@ -258,6 +258,8 @@ type Metrics struct {
 	Ingest struct {
 		// The total number of ingestions
 		Count uint64
+		// The number of excise operations during ingestion
+		ExciseIngestCount int64
 	}
 
 	Flush struct {
diff --git a/replay/replay.go b/replay/replay.go
@@ -118,6 +118,7 @@ type Metrics struct {
 		// effective heuristics are at ingesting files into lower levels, saving
 		// write amplification.
 		BytesWeightedByLevel uint64
+		ExciseIngestCount    int64
 	}
 	// PaceDuration is the time waiting for the pacer to allow the workload to
 	// continue.
@@ -204,6 +205,9 @@ func (m *Metrics) WriteBenchmarkString(name string, w io.Writer) error {
 		{label: "EstimatedDebt/max", values: []benchfmt.Value{
 			{Value: float64(m.EstimatedDebt.Max()), Unit: "bytes"},
 		}},
+		{label: "ExciseDuringIngestion", values: []benchfmt.Value{
+			{Value: float64(m.Ingest.ExciseIngestCount), Unit: "excise"},
+		}},
 		{label: "FlushUtilization", values: []benchfmt.Value{
 			{Value: m.Final.Flush.WriteThroughput.Utilization(), Unit: "util"},
 		}},
@@ -563,6 +567,7 @@ func (r *Runner) Wait() (Metrics, error) {
 	m.CompactionCounts.Rewrite = pm.Compact.RewriteCount
 	m.CompactionCounts.Copy = pm.Compact.CopyCount
 	m.CompactionCounts.MultiLevel = pm.Compact.MultiLevelCount
+	m.Ingest.ExciseIngestCount = pm.Ingest.ExciseIngestCount
 	m.Ingest.BytesIntoL0 = pm.Levels[0].TableBytesIngested
 	m.Ingest.BytesWeightedByLevel = ingestBytesWeighted
 	return m, err
@@ -584,8 +589,10 @@ type workloadStep struct {
 	// readAmp estimation for the LSM *before* ve was applied.
 	previousReadAmp int
 	// non-nil for flushStepKind
-	flushBatch           *pebble.Batch
-	tablesToIngest       []string
+	flushBatch     *pebble.Batch
+	tablesToIngest []string
+	// exciseSpan is set for ingestAndExciseStepKind
+	exciseSpan           pebble.KeyRange
 	cumulativeWriteBytes uint64
 }
 
@@ -595,6 +602,7 @@ const (
 	flushStepKind stepKind = iota
 	ingestStepKind
 	compactionStepKind
+	ingestAndExciseStepKind
 )
 
 // eventListener returns a Pebble EventListener that is installed on the replay
@@ -692,6 +700,12 @@ func (r *Runner) applyWorkloadSteps(ctx context.Context) error {
 			}
 			r.metrics.writeBytes.Store(step.cumulativeWriteBytes)
 			r.stepsApplied <- step
+		case ingestAndExciseStepKind:
+			if _, err := r.d.IngestAndExcise(context.Background(), step.tablesToIngest, nil /* shared */, nil /* external */, step.exciseSpan); err != nil {
+				return err
+			}
+			r.metrics.writeBytes.Store(step.cumulativeWriteBytes)
+			r.stepsApplied <- step
 		case compactionStepKind:
 			// No-op.
 			// TODO(jackson): Should we elide this earlier?
@@ -795,12 +809,22 @@ func (r *Runner) prepareWorkloadSteps(ctx context.Context) error {
 					// flush.
 					s.kind = ingestStepKind
 				}
+				if len(ve.ExciseBoundsRecord) > 0 {
+					// If a version edit contains excise bounds records, it's an excise operation.
+					// In practice, there should typically be only one excise bounds record per version edit.
+					exciseEntry := ve.ExciseBoundsRecord[0]
+					s.exciseSpan = pebble.KeyRange{
+						Start: exciseEntry.Bounds.Start,
+						End:   exciseEntry.Bounds.End.Key,
+					}
+					s.kind = ingestAndExciseStepKind
+				}
 				var newFiles []base.DiskFileNum
 				blobRefMap := make(map[base.DiskFileNum]manifest.BlobReferences)
 				blobFileMap := make(map[base.BlobFileID]base.DiskFileNum)
 				for _, nf := range ve.NewTables {
 					newFiles = append(newFiles, nf.Meta.TableBacking.DiskFileNum)
-					if s.kind == ingestStepKind && (nf.Meta.SmallestSeqNum != nf.Meta.LargestSeqNum || nf.Level != 0) {
+					if s.kind == ingestStepKind && (nf.Meta.SmallestSeqNum != nf.Meta.LargestSeqNum) {
 						s.kind = flushStepKind
 					}
 					if nf.Meta.BlobReferenceDepth > 0 {
@@ -870,7 +894,7 @@ func (r *Runner) prepareWorkloadSteps(ctx context.Context) error {
 						return errors.Wrapf(err, "flush in %q at offset %d", manifestName, rr.Offset())
 					}
 					cumulativeWriteBytes += uint64(s.flushBatch.Len())
-				case ingestStepKind:
+				case ingestStepKind, ingestAndExciseStepKind:
 					// Copy the ingested sstables into a staging area within the
 					// run dir. This is necessary for two reasons:
 					//  a) Ingest will remove the source file, and we don't want
diff --git a/replay/replay_test.go b/replay/replay_test.go
@@ -27,6 +27,7 @@ import (
 	"github.com/cockroachdb/pebble/internal/invariants"
 	"github.com/cockroachdb/pebble/internal/testkeys"
 	"github.com/cockroachdb/pebble/rangekey"
+	"github.com/cockroachdb/pebble/sstable"
 	"github.com/cockroachdb/pebble/vfs"
 	"github.com/stretchr/testify/require"
 )
@@ -166,6 +167,10 @@ func TestReplayValSep(t *testing.T) {
 	runReplayTest(t, "testdata/replay_val_sep")
 }
 
+func TestReplayIngest(t *testing.T) {
+	runReplayTest(t, "testdata/replay_ingest")
+}
+
 func TestLoadFlushedSSTableKeys(t *testing.T) {
 	var buf bytes.Buffer
 	var diskFileNums []base.DiskFileNum
@@ -283,6 +288,34 @@ func collectCorpus(t *testing.T, fs *vfs.MemFS, name string) {
 				return err.Error()
 			}
 			return ""
+		case "build-sst":
+			writerOpts := sstable.WriterOptions{
+				Comparer: testkeys.Comparer,
+			}
+			sstPath := td.CmdArgs[0].Key
+			writerOpts.TableFormat = sstable.TableFormatPebblev7
+
+			_, err := datatest.RunBuildSSTCmd(td.Input, td.CmdArgs, sstPath, fs, datatest.WithDefaultWriterOpts(writerOpts))
+			if err != nil {
+				return err.Error()
+			}
+			return ""
+		case "ingest":
+			paths := make([]string, 0)
+			for i := range td.CmdArgs {
+				if strings.HasSuffix(td.CmdArgs[i].Key, ".sst") {
+					paths = append(paths, td.CmdArgs[i].Key)
+				}
+			}
+			if err := d.Ingest(context.Background(), paths); err != nil {
+				return err.Error()
+			}
+			return "ingested"
+		case "ingest-and-excise":
+			if err := datatest.RunIngestAndExciseCmd(td, d); err != nil {
+				return err.Error()
+			}
+			return "ingest-and-excised"
 		case "flush":
 			require.NoError(t, d.Flush())
 			return ""
@@ -329,6 +362,21 @@ func collectCorpus(t *testing.T, fs *vfs.MemFS, name string) {
 			d, err = pebble.Open("build", opts)
 			require.NoError(t, err)
 			return ""
+		case "open-ingest-excise":
+			wc = NewWorkloadCollector("build")
+			opts := &pebble.Options{
+				Comparer:                    testkeys.Comparer,
+				DisableAutomaticCompactions: true,
+				FormatMajorVersion:          pebble.FormatExciseBoundsRecord,
+				FS:                          fs,
+				MaxManifestFileSize:         156,
+			}
+			setDefaultExperimentalOpts(opts)
+			wc.Attach(opts)
+			var err error
+			d, err = pebble.Open("build", opts)
+			require.NoError(t, err)
+			return ""
 		case "close":
 			err := d.Close()
 			require.NoError(t, err)
@@ -492,6 +540,7 @@ BenchmarkBenchmarkReplay/tpcc/DurationQuiescing 1 0.5 sec/op
 BenchmarkBenchmarkReplay/tpcc/DurationPaceDelay 1 0.25 sec/op
 BenchmarkBenchmarkReplay/tpcc/EstimatedDebt/mean 1 1.6777216e+08 bytes
 BenchmarkBenchmarkReplay/tpcc/EstimatedDebt/max 1 1.6777216e+08 bytes
+BenchmarkBenchmarkReplay/tpcc/ExciseDuringIngestion 1 0 excise
 BenchmarkBenchmarkReplay/tpcc/FlushUtilization 1 0 util
 BenchmarkBenchmarkReplay/tpcc/IngestedIntoL0 1 5.24288e+06 bytes
 BenchmarkBenchmarkReplay/tpcc/IngestWeightedByLevel 1 9.437184e+06 bytes
diff --git a/replay/testdata/corpus/simple_ingest b/replay/testdata/corpus/simple_ingest
diff --git a/replay/testdata/replay_ingest b/replay/testdata/replay_ingest

Original file line number	Diff line number	Diff line change
`@@ -1593,6 +1593,7 @@ func (d DB) runIngestFlush(c tableCompaction) (*manifest.VersionEdit, error) {`
`1593`	`1593`	`Bounds: exciseBounds,`
`1594`	`1594`	`SeqNum: ingestFlushable.exciseSeqNum,`
`1595`	`1595`	`})`
	`1596`	`+ d.mu.versions.metrics.Ingest.ExciseIngestCount++`
`1596`	`1597`	`}`
`1597`	`1598`	`// Iterate through all levels and find files that intersect with exciseSpan.`
`1598`	`1599`	`for layer, ls := range version.AllLevelsAndSublevels() {`
Original file line number	Diff line number	Diff line change
`@@ -258,6 +258,8 @@ type Metrics struct {`
`258`	`258`	`Ingest struct {`
`259`	`259`	`// The total number of ingestions`
`260`	`260`	`Count uint64`
	`261`	`+ // The number of excise operations during ingestion`
	`262`	`+ ExciseIngestCount int64`
`261`	`263`	`}`
`262`	`264`
`263`	`265`	`Flush struct {`