Skip to content

Commit

Permalink
ingest: use StartKey/EndKey pair rather than bounds in ExternalFile
Browse files Browse the repository at this point in the history
Bounds is defined as exclusive, so it is semantically confusing to use
it here.
  • Loading branch information
stevendanna committed Mar 18, 2024
1 parent b55a958 commit c109764
Show file tree
Hide file tree
Showing 5 changed files with 52 additions and 49 deletions.
8 changes: 4 additions & 4 deletions data_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1321,16 +1321,16 @@ func runIngestExternalCmd(
switch arg.Key {
case "bounds":
nArgs(2)
ef.Bounds.Start = []byte(arg.Vals[0])
ef.Bounds.End = []byte(arg.Vals[1])
ef.StartKey = []byte(arg.Vals[0])
ef.EndKey = []byte(arg.Vals[1])
case "bounds-are-inclusive":
nArgs(1)
b, err := strconv.ParseBool(arg.Vals[0])
if err != nil {
usageErr(fmt.Sprintf("%s should have boolean argument: %v",
arg.Key, err))
}
ef.BoundsHasInclusiveEndKey = b
ef.EndKeyIsInclusive = b
case "size":
nArgs(1)
arg.Scan(t, 0, &ef.Size)
Expand All @@ -1347,7 +1347,7 @@ func runIngestExternalCmd(
usageErr(fmt.Sprintf("unknown argument %v", arg.Key))
}
}
if ef.Bounds.Start == nil {
if ef.StartKey == nil {
usageErr("no bounds specified")
}

Expand Down
42 changes: 23 additions & 19 deletions ingest.go
Original file line number Diff line number Diff line change
Expand Up @@ -182,17 +182,17 @@ func ingestLoad1External(
return nil, errors.New("pebble: cannot ingest external file with no point or range keys")
}

if opts.Comparer.Compare(e.Bounds.Start, e.Bounds.End) > 0 {
return nil, errors.Newf("pebble: external file bounds [%q, %q) are invalid", e.Bounds.Start, e.Bounds.End)
if opts.Comparer.Compare(e.StartKey, e.EndKey) > 0 {
return nil, errors.Newf("pebble: external file bounds [%q, %q) are invalid", e.StartKey, e.EndKey)
}
if opts.Comparer.Compare(e.Bounds.Start, e.Bounds.End) == 0 && !e.BoundsHasInclusiveEndKey {
return nil, errors.Newf("pebble: external file bounds [%q, %q) are invalid", e.Bounds.Start, e.Bounds.End)
if opts.Comparer.Compare(e.StartKey, e.EndKey) == 0 && !e.EndKeyIsInclusive {
return nil, errors.Newf("pebble: external file bounds [%q, %q) are invalid", e.StartKey, e.EndKey)
}
if n := opts.Comparer.Split(e.Bounds.Start); n != len(e.Bounds.Start) {
return nil, errors.Newf("pebble: external file bounds start key %q has suffix", e.Bounds.Start)
if n := opts.Comparer.Split(e.StartKey); n != len(e.StartKey) {
return nil, errors.Newf("pebble: external file bounds start key %q has suffix", e.StartKey)
}
if n := opts.Comparer.Split(e.Bounds.End); n != len(e.Bounds.End) {
return nil, errors.Newf("pebble: external file bounds end key %q has suffix", e.Bounds.End)
if n := opts.Comparer.Split(e.EndKey); n != len(e.EndKey) {
return nil, errors.Newf("pebble: external file bounds end key %q has suffix", e.EndKey)
}

// #3287: range keys don't yet work correctly when the range key bounds are not tight.
Expand All @@ -219,10 +219,13 @@ func ingestLoad1External(
// In the name of keeping this ingestion as fast as possible, we avoid
// *all* existence checks and synthesize a file metadata with smallest/largest
// keys that overlap whatever the passed-in span was.
smallestCopy := slices.Clone(e.Bounds.Start)
largestCopy := slices.Clone(e.Bounds.End)
smallestCopy := slices.Clone(e.StartKey)
largestCopy := slices.Clone(e.EndKey)
if e.HasPointKey {
if e.BoundsHasInclusiveEndKey {
// Sequence numbers are updated later by
// ingestUpdateSeqNum, applying a squence number that
// is applied to all keys in the sstable.
if e.EndKeyIsInclusive {
meta.ExtendPointKeyBounds(
opts.Comparer.Compare,
base.MakeInternalKey(smallestCopy, 0, InternalKeyKindMax),
Expand Down Expand Up @@ -1143,19 +1146,20 @@ type ExternalFile struct {
// is acceptable in lieu of the backing file size.
Size uint64

// Bounds of the sstable; the ingestion of this file will only result in keys
// within [Bounds.Start, Bounds.End). These bounds are loose i.e. it's
// possible for keys to not span the entirety of this range.
// StartKey and EndKey define the bounds of the sstable; the ingestion
// of this file will only result in keys within [StartKey, EndKey) if
// EndKeyIsInclusive is false or [StartKey, EndKey] if it is true.
// These bounds are loose i.e. it's possible for keys to not span the
// entirety of this range.
//
// The Bounds.Start/End user keys must not have suffixes.
// StartKey and EndKey user keys must not have suffixes.
//
// Multiple ExternalFiles in one ingestion must all have non-overlapping
// bounds.
Bounds KeyRange
StartKey, EndKey []byte

// BoundsHasInclusiveEndKey is true if Bounds.End should be
// treated as inclusive.
BoundsHasInclusiveEndKey bool
// EndKeyIsInclusive is true if EndKey should be treated as inclusive.
EndKeyIsInclusive bool

// HasPointKey and HasRangeKey denote whether this file contains point keys
// or range keys. If both structs are false, an error is returned during
Expand Down
10 changes: 6 additions & 4 deletions metamorphic/ops.go
Original file line number Diff line number Diff line change
Expand Up @@ -980,10 +980,12 @@ func (o *ingestExternalFilesOp) run(t *Test, h historyRecorder) {
for i, obj := range objs {
meta := t.getExternalObj(obj.externalObjID)
external[i] = pebble.ExternalFile{
Locator: "external",
ObjName: externalObjName(obj.externalObjID),
Size: meta.sstMeta.Size,
Bounds: obj.bounds,
Locator: "external",
ObjName: externalObjName(obj.externalObjID),
Size: meta.sstMeta.Size,
StartKey: obj.bounds.Start,
EndKey: obj.bounds.End,
EndKeyIsInclusive: false,
// Note: if the table has point/range keys, we don't know for sure whether
// this particular range has any, but that's acceptable.
HasPointKey: meta.sstMeta.HasPointKeys || meta.sstMeta.HasRangeDelKeys,
Expand Down
21 changes: 10 additions & 11 deletions scan_internal.go
Original file line number Diff line number Diff line change
Expand Up @@ -467,7 +467,6 @@ func (d *DB) truncateExternalFile(
Level: uint8(level),
ObjName: objMeta.Remote.CustomObjectName,
Locator: objMeta.Remote.Locator,
Bounds: KeyRange{},
HasPointKey: file.HasPointKeys,
HasRangeKey: file.HasRangeKeys,
Size: file.Size,
Expand All @@ -479,27 +478,27 @@ func (d *DB) truncateExternalFile(

needsLowerTruncate := cmp(lower, file.Smallest.UserKey) > 0
if needsLowerTruncate {
sst.Bounds.Start = slices.Clone(lower)
sst.StartKey = slices.Clone(lower)
} else {
sst.Bounds.Start = slices.Clone(file.Smallest.UserKey)
sst.StartKey = slices.Clone(file.Smallest.UserKey)
}

cmpUpper := cmp(upper, file.Largest.UserKey)
needsUpperTruncate := cmpUpper < 0
if needsUpperTruncate {
sst.Bounds.End = slices.Clone(upper)
sst.BoundsHasInclusiveEndKey = false
sst.EndKey = slices.Clone(upper)
sst.EndKeyIsInclusive = false
} else {
sst.Bounds.End = slices.Clone(file.Largest.UserKey)
sst.BoundsHasInclusiveEndKey = !file.Largest.IsExclusiveSentinel()
sst.EndKey = slices.Clone(file.Largest.UserKey)
sst.EndKeyIsInclusive = !file.Largest.IsExclusiveSentinel()
}

if cmp(sst.Bounds.Start, sst.Bounds.End) > 0 {
return nil, errors.AssertionFailedf("pebble: invalid external file bounds after truncation [%q, %q)", sst.Bounds.Start, sst.Bounds.End)
if cmp(sst.StartKey, sst.EndKey) > 0 {
return nil, errors.AssertionFailedf("pebble: invalid external file bounds after truncation [%q, %q)", sst.StartKey, sst.EndKey)
}

if cmp(sst.Bounds.Start, sst.Bounds.End) == 0 && !sst.BoundsHasInclusiveEndKey {
return nil, errors.AssertionFailedf("pebble: invalid external file bounds after truncation [%q, %q)", sst.Bounds.Start, sst.Bounds.End)
if cmp(sst.StartKey, sst.EndKey) == 0 && !sst.EndKeyIsInclusive {
return nil, errors.AssertionFailedf("pebble: invalid external file bounds after truncation [%q, %q)", sst.StartKey, sst.EndKey)
}

return sst, nil
Expand Down
20 changes: 9 additions & 11 deletions scan_internal_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -475,15 +475,13 @@ func TestScanInternal(t *testing.T) {

writeSST(points, rangeDels, rangeKeys, objstorageprovider.NewRemoteWritable(file))
ef := ExternalFile{
ObjName: objName,
Locator: remote.Locator("external-storage"),
Size: 10,
Bounds: KeyRange{
Start: smallest.UserKey,
End: largest.UserKey,
},
BoundsHasInclusiveEndKey: true,
HasPointKey: true,
ObjName: objName,
Locator: remote.Locator("external-storage"),
Size: 10,
StartKey: smallest.UserKey,
EndKey: largest.UserKey,
EndKeyIsInclusive: true,
HasPointKey: true,
}
_, err = d.IngestExternalFiles([]ExternalFile{ef})
require.NoError(t, err)
Expand Down Expand Up @@ -554,8 +552,8 @@ func TestScanInternal(t *testing.T) {
externalFileVisitor = func(sst *ExternalFile) error {
fmt.Fprintf(&b, "external file: %s %s [0x%s-0x%s] (hasPoint: %v, hasRange: %v)\n",
sst.Locator, sst.ObjName,
hex.EncodeToString(sst.Bounds.Start),
hex.EncodeToString(sst.Bounds.End),
hex.EncodeToString(sst.StartKey),
hex.EncodeToString(sst.EndKey),
sst.HasPointKey, sst.HasRangeKey)
return nil
}
Expand Down

0 comments on commit c109764

Please sign in to comment.