Skip to content

Commit

Permalink
Merge pull request #6778 from mjibson/gc-keep-interval
Browse files Browse the repository at this point in the history
storage: change the GC to keep values within the GC interval
  • Loading branch information
maddyblue committed May 24, 2016
2 parents 51e30da + fde7e38 commit cc4ce37
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 28 deletions.
50 changes: 29 additions & 21 deletions storage/engine/gc.go
Expand Up @@ -44,7 +44,18 @@ func MakeGarbageCollector(now roachpb.Timestamp, policy config.GCPolicy) Garbage
// garbage collection policy for batches of values for the same key.
// Returns the timestamp including, and after which, all values should
// be garbage collected. If no values should be GC'd, returns
// roachpb.ZeroTimestamp.
// roachpb.ZeroTimestamp. keys must be in descending time order.
// Values deleted at or before the returned timestamp can be deleted without
// invalidating any reads in the time interval (gc.expiration, \infinity).
//
// The GC keeps all values (including deletes) above the expiration time, plus
// the first value before or at the expiration time. This allows reads to be
// guaranteed as described above. However if this were the only rule, then
// if the most recent write was a delete, it would never be removed. Thus,
// when a deleted value is the most recent before expiration, it can be
// deleted. This would still allow for the tombstone bugs in #6227, so in
// the future we will add checks that disallow writes before the last GC
// expiration time.
func (gc GarbageCollector) Filter(keys []MVCCKey, values [][]byte) roachpb.Timestamp {
if gc.policy.TTLSeconds <= 0 {
return roachpb.ZeroTimestamp
Expand All @@ -54,34 +65,31 @@ func (gc GarbageCollector) Filter(keys []MVCCKey, values [][]byte) roachpb.Times
}

// Loop over values. All should be MVCC versions.
var i int
var key MVCCKey
delTS := roachpb.ZeroTimestamp
survivors := false
for i, key := range keys {
for i, key = range keys {
if !key.IsValue() {
log.Errorf("unexpected MVCC metadata encountered: %q", key)
return roachpb.ZeroTimestamp
}
deleted := len(values[i]) == 0
if i == 0 {
// If the first value isn't a deletion tombstone, don't consider
// it for GC. It should always survive if non-deleted.
if !deleted {
survivors = true
continue
}
if gc.expiration.Less(key.Timestamp) {
continue
}
// If we encounter a version older than our GC timestamp, mark for deletion.
if key.Timestamp.Less(gc.expiration) {
// Now key.Timestamp is <= gc.expiration, but the key-value pair is still
// "visible" at timestamp gc.expiration (and up to the next version).
if deleted := len(values[i]) == 0; deleted {
// We don't have to keep a delete visible (since GCing it does not change
// the outcome of the read). Note however that we can't touch deletes at
// higher timestamps immediately preceding this one, since they're above
// gc.expiration and are needed for correctness; see #6227.
delTS = key.Timestamp
break
} else if !deleted {
survivors = true
} else if i+1 < len(keys) {
// Otherwise mark the previous timestamp for deletion (since it won't ever
// be returned for reads at gc.expiration and up).
delTS = keys[i+1].Timestamp
}
}
// If there are no non-deleted survivors, return timestamp of first key
// to delete all entries.
if !survivors {
return keys[0].Timestamp
break
}
return delTS
}
5 changes: 3 additions & 2 deletions storage/engine/gc_test.go
Expand Up @@ -58,12 +58,13 @@ func TestGarbageCollectorFilter(t *testing.T) {
expDelTS roachpb.Timestamp
}{
{gcA, makeTS(0, 0), aKeys, [][]byte{n, n, n}, roachpb.ZeroTimestamp},
{gcA, makeTS(0, 0), aKeys, [][]byte{d, d, d}, makeTS(2E9, 0)},
{gcA, makeTS(0, 0), aKeys, [][]byte{d, d, d}, roachpb.ZeroTimestamp},
{gcB, makeTS(0, 0), bKeys, [][]byte{n, n}, roachpb.ZeroTimestamp},
{gcB, makeTS(0, 0), bKeys, [][]byte{d, d}, makeTS(2E9, 0)},
{gcB, makeTS(0, 0), bKeys, [][]byte{d, d}, roachpb.ZeroTimestamp},
{gcA, makeTS(1E9, 0), aKeys, [][]byte{n, n, n}, roachpb.ZeroTimestamp},
{gcB, makeTS(1E9, 0), bKeys, [][]byte{n, n}, roachpb.ZeroTimestamp},
{gcA, makeTS(2E9, 0), aKeys, [][]byte{n, n, n}, roachpb.ZeroTimestamp},
{gcA, makeTS(2E9, 0), aKeys, [][]byte{d, d, d}, makeTS(1E9, 0)},
{gcB, makeTS(2E9, 0), bKeys, [][]byte{n, n}, roachpb.ZeroTimestamp},
{gcA, makeTS(3E9, 0), aKeys, [][]byte{n, n, n}, makeTS(1E9, 1)},
{gcA, makeTS(3E9, 0), aKeys, [][]byte{d, n, n}, makeTS(2E9, 0)},
Expand Down
33 changes: 28 additions & 5 deletions storage/gc_queue_test.go
Expand Up @@ -162,6 +162,7 @@ func TestGCQueueProcess(t *testing.T) {

ts1 := makeTS(now-2*24*60*60*1E9+1, 0) // 2d old (add one nanosecond so we're not using zero timestamp)
ts2 := makeTS(now-25*60*60*1E9, 0) // GC will occur at time=25 hours
ts2m1 := ts2.Prev() // ts2 - 1 so we have something not right at the GC time
ts3 := makeTS(now-intentAgeThreshold.Nanoseconds(), 0) // 2h old
ts4 := makeTS(now-(intentAgeThreshold.Nanoseconds()-1), 0) // 2h-1ns old
ts5 := makeTS(now-1E9, 0) // 1s old
Expand All @@ -174,20 +175,22 @@ func TestGCQueueProcess(t *testing.T) {
key7 := roachpb.Key("g")
key8 := roachpb.Key("h")
key9 := roachpb.Key("i")
key10 := roachpb.Key("j")
key11 := roachpb.Key("k")

data := []struct {
key roachpb.Key
ts roachpb.Timestamp
del bool
txn bool
}{
// For key1, we expect first two values to GC.
// For key1, we expect first value to GC.
{key1, ts1, false, false},
{key1, ts2, false, false},
{key1, ts5, false, false},
// For key2, we expect all values to GC, because most recent is deletion.
// For key2, we expect values to GC, even though most recent is deletion.
{key2, ts1, false, false},
{key2, ts2, false, false},
{key2, ts2m1, false, false}, // use a value < the GC time to verify it's kept
{key2, ts5, true, false},
// For key3, we expect just ts1 to GC, because most recent deletion is intent.
{key3, ts1, false, false},
Expand All @@ -198,7 +201,7 @@ func TestGCQueueProcess(t *testing.T) {
{key4, ts2, false, false},
// For key5, expect all values to GC (most recent value deleted).
{key5, ts1, false, false},
{key5, ts2, true, false},
{key5, ts2, true, false}, // deleted, so GC
// For key6, expect no values to GC because most recent value is intent.
{key6, ts1, false, false},
{key6, ts5, false, true},
Expand All @@ -209,7 +212,17 @@ func TestGCQueueProcess(t *testing.T) {
{key8, ts2, false, false},
{key8, ts3, true, true},
// For key9, resolve naked intent with no remaining values.
{key9, ts3, true, false},
{key9, ts3, false, true},
// For key10, GC ts1 because it's a delete but not ts3 because it's above the threshold.
{key10, ts1, true, false},
{key10, ts3, true, false},
{key10, ts4, false, false},
{key10, ts5, false, false},
// For key11, we can't GC anything because ts1 isn't a delete.
{key11, ts1, false, false},
{key11, ts3, true, false},
{key11, ts4, true, false},
{key11, ts5, true, false},
}

for i, datum := range data {
Expand Down Expand Up @@ -260,6 +273,9 @@ func TestGCQueueProcess(t *testing.T) {
ts roachpb.Timestamp
}{
{key1, ts5},
{key1, ts2},
{key2, ts5},
{key2, ts2m1},
{key3, roachpb.ZeroTimestamp},
{key3, ts5},
{key3, ts2},
Expand All @@ -271,6 +287,13 @@ func TestGCQueueProcess(t *testing.T) {
{key7, ts4},
{key7, ts2},
{key8, ts2},
{key10, ts5},
{key10, ts4},
{key10, ts3},
{key11, ts5},
{key11, ts4},
{key11, ts3},
{key11, ts1},
}
// Read data directly from engine to avoid intent errors from MVCC.
kvs, err := engine.Scan(tc.store.Engine(), engine.MakeMVCCMetadataKey(key1),
Expand Down

0 comments on commit cc4ce37

Please sign in to comment.