Fix SnapshotBasedIndexRecoveryIT#testNodeDisconnectsDoNotOverAccountR…

…ecoveredBytes (#90849) (#91319) The clock used to check for timeouts does not provide enough granularity, and sometimes the RESTORE_FILE_FROM_SNAPSHOT is retried while the test expects to fail right away. This commit configures the nodes with a timeout of 0, effectively configuring it to avoid retrying recovery requests. Closes #90665
elastic · Nov 4, 2022 · b4b138c · b4b138c
1 parent 55cde1a
commit b4b138c
Showing 1 changed file with 17 additions and 9 deletions.
diff --git a/...rg/elasticsearch/xpack/snapshotbasedrecoveries/recovery/SnapshotBasedIndexRecoveryIT.java b/...rg/elasticsearch/xpack/snapshotbasedrecoveries/recovery/SnapshotBasedIndexRecoveryIT.java
@@ -100,6 +100,7 @@
 import static org.elasticsearch.cluster.routing.allocation.decider.MaxRetryAllocationDecider.SETTING_ALLOCATION_MAX_RETRY;
 import static org.elasticsearch.indices.recovery.RecoverySettings.INDICES_RECOVERY_INTERNAL_ACTION_RETRY_TIMEOUT_SETTING;
 import static org.elasticsearch.indices.recovery.RecoverySettings.INDICES_RECOVERY_MAX_BYTES_PER_SEC_SETTING;
+import static org.elasticsearch.indices.recovery.RecoverySettings.INDICES_RECOVERY_MAX_CONCURRENT_FILE_CHUNKS_SETTING;
 import static org.elasticsearch.indices.recovery.RecoverySettings.INDICES_RECOVERY_MAX_CONCURRENT_OPERATIONS_SETTING;
 import static org.elasticsearch.indices.recovery.RecoverySettings.INDICES_RECOVERY_MAX_CONCURRENT_SNAPSHOT_FILE_DOWNLOADS;
 import static org.elasticsearch.indices.recovery.RecoverySettings.INDICES_RECOVERY_MAX_CONCURRENT_SNAPSHOT_FILE_DOWNLOADS_PER_NODE;
@@ -1255,7 +1256,6 @@ public void testRecoveryRetryKeepsTheGrantedSnapshotFileDownloadPermit() throws
         );
     }
 
-    @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/90665")
     public void testNodeDisconnectsDoNotOverAccountRecoveredBytes() throws Exception {
         // This test reproduces a rare (but possible scenario) where a shard is recovering using
         // snapshots, using logically equivalent index files, but half-way the connection between
@@ -1266,14 +1266,7 @@ public void testNodeDisconnectsDoNotOverAccountRecoveredBytes() throws Exception
         // - The target updates the recovered bytes for the file it has been downloading, after the recovery state was cleared.
         // This could end up over-accounting the number of recovered bytes
 
-        Settings nodeSettings = Settings.builder()
-            // We use a really low timeout to avoid retrying the first RESTORE_FILE_FROM_SNAPSHOT after the connection is dropped
-            .put(INDICES_RECOVERY_INTERNAL_ACTION_RETRY_TIMEOUT_SETTING.getKey(), TimeValue.timeValueMillis(1))
-            .put(INDICES_RECOVERY_MAX_CONCURRENT_SNAPSHOT_FILE_DOWNLOADS.getKey(), 1)
-            .put(INDICES_RECOVERY_MAX_CONCURRENT_OPERATIONS_SETTING.getKey(), 1)
-            .build();
-
-        List<String> dataNodes = internalCluster().startDataOnlyNodes(3, nodeSettings);
+        List<String> dataNodes = internalCluster().startDataOnlyNodes(3);
         String indexName = randomAlphaOfLength(10).toLowerCase(Locale.ROOT);
         createIndex(
             indexName,
@@ -1287,6 +1280,21 @@ public void testNodeDisconnectsDoNotOverAccountRecoveredBytes() throws Exception
         );
         ensureGreen(indexName);
 
+        assertAcked(
+            client().admin()
+                .cluster()
+                .prepareUpdateSettings()
+                .setPersistentSettings(
+                    Settings.builder()
+                        // Do not retry the first RESTORE_FILE_FROM_SNAPSHOT after the connection is closed
+                        .put(INDICES_RECOVERY_INTERNAL_ACTION_RETRY_TIMEOUT_SETTING.getKey(), TimeValue.ZERO)
+                        .put(INDICES_RECOVERY_MAX_CONCURRENT_SNAPSHOT_FILE_DOWNLOADS.getKey(), 1)
+                        .put(INDICES_RECOVERY_MAX_CONCURRENT_FILE_CHUNKS_SETTING.getKey(), 1)
+                        .put(INDICES_RECOVERY_MAX_CONCURRENT_OPERATIONS_SETTING.getKey(), 1)
+                        .build()
+                )
+        );
+
         int numDocs = randomIntBetween(300, 1000);
         indexDocs(indexName, 0, numDocs);
         // Flush to ensure that index_commit_seq_nos(replica) == index_commit_seq_nos(primary),