From b1c6f8e7aa8056934d30cb5684a9609fb2c0c5d5 Mon Sep 17 00:00:00 2001 From: Yannick Welsch Date: Wed, 3 Apr 2019 20:27:49 +0200 Subject: [PATCH] Avoid background sync on relocated primary (#40800) There were some test failures caused by the background retention lease sync running on a relocated primary. This commit fixes the situation that triggered the assertion and reactivates the failing test. Closes #40731 --- .../elasticsearch/upgrades/RecoveryIT.java | 1 - .../org/elasticsearch/index/IndexService.java | 6 +++- .../recovery/RecoveryWhileUnderLoadIT.java | 28 ++++++++++++++++++- 3 files changed, 32 insertions(+), 3 deletions(-) diff --git a/qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/RecoveryIT.java b/qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/RecoveryIT.java index 1b2eb7064f04c..f8246ca6dcd24 100644 --- a/qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/RecoveryIT.java +++ b/qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/RecoveryIT.java @@ -121,7 +121,6 @@ protected void doRun() throws Exception { return future; } - @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/40731") public void testRecoveryWithConcurrentIndexing() throws Exception { final String index = "recovery_with_concurrent_indexing"; Response response = client().performRequest(new Request("GET", "_nodes")); diff --git a/server/src/main/java/org/elasticsearch/index/IndexService.java b/server/src/main/java/org/elasticsearch/index/IndexService.java index 501dbf442b00b..1a49fd418735d 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexService.java +++ b/server/src/main/java/org/elasticsearch/index/IndexService.java @@ -830,7 +830,11 @@ private void sync(final Consumer sync, final String source) { case STARTED: try { shard.runUnderPrimaryPermit( - () -> sync.accept(shard), + () -> { + if (shard.isRelocatedPrimary() == false) { + sync.accept(shard); + } + }, e -> { if (e instanceof AlreadyClosedException == false && e instanceof IndexShardClosedException == false) { diff --git a/server/src/test/java/org/elasticsearch/recovery/RecoveryWhileUnderLoadIT.java b/server/src/test/java/org/elasticsearch/recovery/RecoveryWhileUnderLoadIT.java index 9220748f38c25..7002a77b6ba10 100644 --- a/server/src/test/java/org/elasticsearch/recovery/RecoveryWhileUnderLoadIT.java +++ b/server/src/test/java/org/elasticsearch/recovery/RecoveryWhileUnderLoadIT.java @@ -30,20 +30,28 @@ import org.elasticsearch.cluster.routing.ShardRouting; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.Priority; +import org.elasticsearch.common.settings.Setting; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.TimeValue; +import org.elasticsearch.index.IndexService; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.shard.DocsStats; import org.elasticsearch.index.shard.ShardId; import org.elasticsearch.index.translog.Translog; +import org.elasticsearch.plugins.Plugin; import org.elasticsearch.search.sort.SortOrder; import org.elasticsearch.test.BackgroundIndexer; import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.test.junit.annotations.TestLogging; import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.List; import java.util.Set; import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; +import java.util.stream.Stream; import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_REPLICAS; import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_SHARDS; @@ -58,6 +66,23 @@ public class RecoveryWhileUnderLoadIT extends ESIntegTestCase { private final Logger logger = LogManager.getLogger(RecoveryWhileUnderLoadIT.class); + public static final class RetentionLeaseSyncIntervalSettingPlugin extends Plugin { + + @Override + public List> getSettings() { + return Collections.singletonList(IndexService.RETENTION_LEASE_SYNC_INTERVAL_SETTING); + } + + } + + @Override + protected Collection> nodePlugins() { + return Stream.concat( + super.nodePlugins().stream(), + Stream.of(RetentionLeaseSyncIntervalSettingPlugin.class)) + .collect(Collectors.toList()); + } + public void testRecoverWhileUnderLoadAllocateReplicasTest() throws Exception { logger.info("--> creating test index ..."); int numberOfShards = numberOfShards(); @@ -260,7 +285,8 @@ public void testRecoverWhileRelocating() throws Exception { assertAcked(prepareCreate("test", 3, Settings.builder() .put(SETTING_NUMBER_OF_SHARDS, numShards) .put(SETTING_NUMBER_OF_REPLICAS, numReplicas) - .put(IndexSettings.INDEX_TRANSLOG_DURABILITY_SETTING.getKey(), Translog.Durability.ASYNC))); + .put(IndexSettings.INDEX_TRANSLOG_DURABILITY_SETTING.getKey(), Translog.Durability.ASYNC) + .put(IndexService.RETENTION_LEASE_SYNC_INTERVAL_SETTING.getKey(), randomFrom("100ms", "1s", "5s", "30s", "60s")))); final int numDocs = scaledRandomIntBetween(200, 9999);