Skip to content

Commit

Permalink
Ignore translog retention policy if soft-deletes enabled (#45473)
Browse files Browse the repository at this point in the history
Since #45136, we use soft-deletes instead of translog in peer recovery.
There's no need to retain extra translog to increase a chance of
operation-based recoveries. This commit ignores the translog retention
policy if soft-deletes is enabled so we can discard translog more
quickly.

Co-authored-by: David Turner <david.turner@elastic.co>

Relates #45136
  • Loading branch information
dnhatn committed Aug 21, 2019
1 parent 2202d00 commit b0d346f
Show file tree
Hide file tree
Showing 17 changed files with 343 additions and 69 deletions.
21 changes: 16 additions & 5 deletions docs/reference/index-modules/translog.asciidoc
Expand Up @@ -76,12 +76,23 @@ commit point. Defaults to `512mb`.

`index.translog.retention.size`::

The total size of translog files to keep. Keeping more translog files increases
the chance of performing an operation based sync when recovering replicas. If
the translog files are not sufficient, replica recovery will fall back to a
file based sync. Defaults to `512mb`
When soft deletes is disabled (enabled by default in 7.0 or later),
`index.translog.retention.size` controls the total size of translog files to keep.
Keeping more translog files increases the chance of performing an operation based
sync when recovering replicas. If the translog files are not sufficient,
replica recovery will fall back to a file based sync. Defaults to `512mb`

Both `index.translog.retention.size` and `index.translog.retention.age` should not
be specified unless soft deletes is disabled as they will be ignored.


`index.translog.retention.age`::

The maximum duration for which translog files will be kept. Defaults to `12h`.
When soft deletes is disabled (enabled by default in 7.0 or later),
`index.translog.retention.age` controls the maximum duration for which translog
files to keep. Keeping more translog files increases the chance of performing an
operation based sync when recovering replicas. If the translog files are not sufficient,
replica recovery will fall back to a file based sync. Defaults to `12h`

Both `index.translog.retention.size` and `index.translog.retention.age` should not
be specified unless soft deletes is disabled as they will be ignored.
@@ -1,14 +1,14 @@
---
setup:
"Translog retention without soft_deletes":
- do:
indices.create:
index: test
index: test
body:
settings:
soft_deletes.enabled: false
- do:
cluster.health:
wait_for_no_initializing_shards: true

---
"Translog retention":
- do:
indices.stats:
metric: [ translog ]
Expand Down Expand Up @@ -64,6 +64,53 @@ setup:
- lte: { indices.test.primaries.translog.uncommitted_size_in_bytes: $creation_size }
- match: { indices.test.primaries.translog.uncommitted_operations: 0 }

---
"Translog retention with soft_deletes":
- skip:
version: " - 7.9.99"
reason: "start ignoring translog retention policy with soft-deletes enabled in 8.0"
- do:
indices.create:
index: test
body:
settings:
soft_deletes.enabled: true
- do:
cluster.health:
wait_for_no_initializing_shards: true
- do:
indices.stats:
metric: [ translog ]
- set: { indices.test.primaries.translog.size_in_bytes: creation_size }

- do:
index:
index: test
id: 1
body: { "foo": "bar" }

- do:
indices.stats:
metric: [ translog ]
- gt: { indices.test.primaries.translog.size_in_bytes: $creation_size }
- match: { indices.test.primaries.translog.operations: 1 }
- match: { indices.test.primaries.translog.uncommitted_operations: 1 }
# call flush twice to sync the global checkpoint after the last operation so that we can have the safe commit
- do:
indices.flush:
index: test
- do:
indices.flush:
index: test
- do:
indices.stats:
metric: [ translog ]
# after flushing we have one empty translog file while an empty index before flushing has two empty translog files.
- lt: { indices.test.primaries.translog.size_in_bytes: $creation_size }
- match: { indices.test.primaries.translog.operations: 0 }
- lt: { indices.test.primaries.translog.uncommitted_size_in_bytes: $creation_size }
- match: { indices.test.primaries.translog.uncommitted_operations: 0 }

---
"Translog last modified age stats":

Expand All @@ -79,11 +126,20 @@ setup:
- gte: { indices.test.primaries.translog.earliest_last_modified_age: 0 }

---
"Translog stats on closed indices":
"Translog stats on closed indices without soft-deletes":
- skip:
version: " - 7.2.99"
reason: "closed indices have translog stats starting version 7.3.0"

- do:
indices.create:
index: test
body:
settings:
soft_deletes.enabled: false
- do:
cluster.health:
wait_for_no_initializing_shards: true
- do:
index:
index: test
Expand Down Expand Up @@ -121,3 +177,40 @@ setup:
forbid_closed_indices: false
- match: { indices.test.primaries.translog.operations: 3 }
- match: { indices.test.primaries.translog.uncommitted_operations: 0 }

---
"Translog stats on closed indices with soft-deletes":
- skip:
version: " - 7.9.99"
reason: "start ignoring translog retention policy with soft-deletes enabled in 8.0"
- do:
indices.create:
index: test
body:
settings:
soft_deletes.enabled: true
- do:
cluster.health:
wait_for_no_initializing_shards: true
- do:
index:
index: test
id: 1
body: { "foo": "bar" }
- do:
indices.stats:
metric: [ translog ]
- match: { indices.test.primaries.translog.operations: 1 }
- match: { indices.test.primaries.translog.uncommitted_operations: 1 }
- do:
indices.close:
index: test
wait_for_active_shards: 1
- is_true: acknowledged
- do:
indices.stats:
metric: [ translog ]
expand_wildcards: all
forbid_closed_indices: false
- match: { indices.test.primaries.translog.operations: 0 }
- match: { indices.test.primaries.translog.uncommitted_operations: 0 }
67 changes: 43 additions & 24 deletions server/src/main/java/org/elasticsearch/index/IndexSettings.java
Expand Up @@ -195,24 +195,6 @@ public final class IndexSettings {
new ByteSizeValue(Long.MAX_VALUE, ByteSizeUnit.BYTES),
Property.Dynamic, Property.IndexScope);

/**
* Controls how long translog files that are no longer needed for persistence reasons
* will be kept around before being deleted. A longer retention policy is useful to increase
* the chance of ops based recoveries.
**/
public static final Setting<TimeValue> INDEX_TRANSLOG_RETENTION_AGE_SETTING =
Setting.timeSetting("index.translog.retention.age", TimeValue.timeValueHours(12), TimeValue.timeValueMillis(-1),
Property.Dynamic, Property.IndexScope);

/**
* Controls how many translog files that are no longer needed for persistence reasons
* will be kept around before being deleted. Keeping more files is useful to increase
* the chance of ops based recoveries.
**/
public static final Setting<ByteSizeValue> INDEX_TRANSLOG_RETENTION_SIZE_SETTING =
Setting.byteSizeSetting("index.translog.retention.size", new ByteSizeValue(512, ByteSizeUnit.MB), Property.Dynamic,
Property.IndexScope);

/**
* The maximum size of a translog generation. This is independent of the maximum size of
* translog operations that have not been flushed.
Expand Down Expand Up @@ -258,6 +240,27 @@ public final class IndexSettings {
Setting.longSetting("index.soft_deletes.retention.operations", 0, 0,
Property.IndexScope, Property.Dynamic);

/**
* Controls how long translog files that are no longer needed for persistence reasons
* will be kept around before being deleted. Keeping more files is useful to increase
* the chance of ops based recoveries for indices with soft-deletes disabled.
* This setting will be ignored if soft-deletes is enabled.
**/
public static final Setting<TimeValue> INDEX_TRANSLOG_RETENTION_AGE_SETTING =
Setting.timeSetting("index.translog.retention.age",
settings -> INDEX_SOFT_DELETES_SETTING.get(settings) ? TimeValue.MINUS_ONE : TimeValue.timeValueHours(12), TimeValue.MINUS_ONE,
Property.Dynamic, Property.IndexScope);

/**
* Controls how many translog files that are no longer needed for persistence reasons
* will be kept around before being deleted. Keeping more files is useful to increase
* the chance of ops based recoveries for indices with soft-deletes disabled.
* This setting will be ignored if soft-deletes is enabled.
**/
public static final Setting<ByteSizeValue> INDEX_TRANSLOG_RETENTION_SIZE_SETTING =
Setting.byteSizeSetting("index.translog.retention.size", settings -> INDEX_SOFT_DELETES_SETTING.get(settings) ? "-1" : "512MB",
Property.Dynamic, Property.IndexScope);

/**
* Controls the maximum length of time since a retention lease is created or renewed before it is considered expired.
*/
Expand Down Expand Up @@ -466,8 +469,6 @@ public IndexSettings(final IndexMetaData indexMetaData, final Settings nodeSetti
syncInterval = INDEX_TRANSLOG_SYNC_INTERVAL_SETTING.get(settings);
refreshInterval = scopedSettings.get(INDEX_REFRESH_INTERVAL_SETTING);
flushThresholdSize = scopedSettings.get(INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING);
translogRetentionAge = scopedSettings.get(INDEX_TRANSLOG_RETENTION_AGE_SETTING);
translogRetentionSize = scopedSettings.get(INDEX_TRANSLOG_RETENTION_SIZE_SETTING);
generationThresholdSize = scopedSettings.get(INDEX_TRANSLOG_GENERATION_THRESHOLD_SIZE_SETTING);
mergeSchedulerConfig = new MergeSchedulerConfig(this);
gcDeletesInMillis = scopedSettings.get(INDEX_GC_DELETES_SETTING).getMillis();
Expand All @@ -493,6 +494,8 @@ public IndexSettings(final IndexMetaData indexMetaData, final Settings nodeSetti
this.indexSortConfig = new IndexSortConfig(this);
searchIdleAfter = scopedSettings.get(INDEX_SEARCH_IDLE_AFTER);
defaultPipeline = scopedSettings.get(DEFAULT_PIPELINE);
setTranslogRetentionAge(scopedSettings.get(INDEX_TRANSLOG_RETENTION_AGE_SETTING));
setTranslogRetentionSize(scopedSettings.get(INDEX_TRANSLOG_RETENTION_SIZE_SETTING));

scopedSettings.addSettingsUpdateConsumer(MergePolicyConfig.INDEX_COMPOUND_FORMAT_SETTING, mergePolicyConfig::setNoCFSRatio);
scopedSettings.addSettingsUpdateConsumer(MergePolicyConfig.INDEX_MERGE_POLICY_DELETES_PCT_ALLOWED_SETTING,
Expand Down Expand Up @@ -553,11 +556,21 @@ private void setTranslogFlushThresholdSize(ByteSizeValue byteSizeValue) {
}

private void setTranslogRetentionSize(ByteSizeValue byteSizeValue) {
this.translogRetentionSize = byteSizeValue;
if (softDeleteEnabled && byteSizeValue.getBytes() >= 0) {
// ignore the translog retention settings if soft-deletes enabled
this.translogRetentionSize = new ByteSizeValue(-1);
} else {
this.translogRetentionSize = byteSizeValue;
}
}

private void setTranslogRetentionAge(TimeValue age) {
this.translogRetentionAge = age;
if (softDeleteEnabled && age.millis() >= 0) {
// ignore the translog retention settings if soft-deletes enabled
this.translogRetentionAge = TimeValue.MINUS_ONE;
} else {
this.translogRetentionAge = age;
}
}

private void setGenerationThresholdSize(final ByteSizeValue generationThresholdSize) {
Expand Down Expand Up @@ -734,13 +747,19 @@ public TimeValue getRefreshInterval() {
/**
* Returns the transaction log retention size which controls how much of the translog is kept around to allow for ops based recoveries
*/
public ByteSizeValue getTranslogRetentionSize() { return translogRetentionSize; }
public ByteSizeValue getTranslogRetentionSize() {
assert softDeleteEnabled == false || translogRetentionSize.getBytes() == -1L : translogRetentionSize;
return translogRetentionSize;
}

/**
* Returns the transaction log retention age which controls the maximum age (time from creation) that translog files will be kept
* around
*/
public TimeValue getTranslogRetentionAge() { return translogRetentionAge; }
public TimeValue getTranslogRetentionAge() {
assert softDeleteEnabled == false || translogRetentionAge.millis() == -1L : translogRetentionSize;
return translogRetentionAge;
}

/**
* Returns the generation threshold size. As sequence numbers can cause multiple generations to
Expand Down
Expand Up @@ -177,11 +177,19 @@ private boolean isTranslogClean(ShardPath shardPath, String translogUUID) throws
final TranslogConfig translogConfig = new TranslogConfig(shardPath.getShardId(), translogPath,
indexSettings, BigArrays.NON_RECYCLING_INSTANCE);
long primaryTerm = indexSettings.getIndexMetaData().primaryTerm(shardPath.getShardId().id());
final TranslogDeletionPolicy translogDeletionPolicy =
new TranslogDeletionPolicy(indexSettings.getTranslogRetentionSize().getBytes(),
indexSettings.getTranslogRetentionAge().getMillis());
// We open translog to check for corruption, do not clean anything.
final TranslogDeletionPolicy retainAllTranslogPolicy = new TranslogDeletionPolicy(Long.MAX_VALUE, Long.MAX_VALUE) {
@Override
long minTranslogGenRequired(List<TranslogReader> readers, TranslogWriter writer) {
long minGen = writer.generation;
for (TranslogReader reader : readers) {
minGen = Math.min(reader.generation, minGen);
}
return minGen;
}
};
try (Translog translog = new Translog(translogConfig, translogUUID,
translogDeletionPolicy, () -> translogGlobalCheckpoint, () -> primaryTerm, seqNo -> {});
retainAllTranslogPolicy, () -> translogGlobalCheckpoint, () -> primaryTerm, seqNo -> {});
Translog.Snapshot snapshot = translog.newSnapshot()) {
//noinspection StatementWithEmptyBody we are just checking that we can iterate through the whole snapshot
while (snapshot.next() != null) {
Expand Down
Expand Up @@ -390,20 +390,24 @@ public void testAsyncTranslogTrimTaskOnClosedIndex() throws Exception {
IndexService indexService = createIndex(indexName, Settings.builder()
.put(TRANSLOG_RETENTION_CHECK_INTERVAL_SETTING.getKey(), "100ms")
.build());

Translog translog = IndexShardTestCase.getTranslog(indexService.getShard(0));
final Path translogPath = translog.getConfig().getTranslogPath();
final String translogUuid = translog.getTranslogUUID();

int translogOps = 0;
final int numDocs = scaledRandomIntBetween(10, 100);
for (int i = 0; i < numDocs; i++) {
client().prepareIndex().setIndex(indexName).setId(String.valueOf(i)).setSource("{\"foo\": \"bar\"}", XContentType.JSON).get();
translogOps++;
if (randomBoolean()) {
client().admin().indices().prepareFlush(indexName).get();
if (indexService.getIndexSettings().isSoftDeleteEnabled()) {
translogOps = 0;
}
}
}
assertThat(translog.totalOperations(), equalTo(numDocs));
assertThat(translog.stats().estimatedNumberOfOperations(), equalTo(numDocs));
assertThat(translog.totalOperations(), equalTo(translogOps));
assertThat(translog.stats().estimatedNumberOfOperations(), equalTo(translogOps));
assertAcked(client().admin().indices().prepareClose("test"));

indexService = getInstanceFromNode(IndicesService.class).indexServiceSafe(indexService.index());
Expand Down

0 comments on commit b0d346f

Please sign in to comment.