elastic · original-brownbear · Apr 22, 2021 · Apr 22, 2021 · Apr 22, 2021 · Apr 22, 2021
diff --git a/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/slm/SnapshotLifecycleTask.java b/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/slm/SnapshotLifecycleTask.java
@@ -21,6 +21,7 @@
 import org.elasticsearch.cluster.service.ClusterService;
 import org.elasticsearch.common.Strings;
 import org.elasticsearch.common.bytes.BytesReference;
+import org.elasticsearch.common.unit.TimeValue;
 import org.elasticsearch.common.xcontent.ToXContent;
 import org.elasticsearch.common.xcontent.XContentBuilder;
 import org.elasticsearch.common.xcontent.json.JsonXContent;
@@ -85,7 +86,8 @@ public static Optional<String> maybeTakeSnapshot(final String jobId, final Clien
                                                      final SnapshotHistoryStore historyStore) {
         Optional<SnapshotLifecyclePolicyMetadata> maybeMetadata = getSnapPolicyMetadata(jobId, clusterService.state());
         String snapshotName = maybeMetadata.map(policyMetadata -> {
-            CreateSnapshotRequest request = policyMetadata.getPolicy().toRequest();
+            // don't time out on this request to not produce failed SLM runs in case of a temporarily slow master node
+            CreateSnapshotRequest request = policyMetadata.getPolicy().toRequest().masterNodeTimeout(TimeValue.MAX_VALUE);
             final LifecyclePolicySecurityClient clientWithHeaders = new LifecyclePolicySecurityClient(client,
                 ClientHelper.INDEX_LIFECYCLE_ORIGIN, policyMetadata.getHeaders());
             logger.info("snapshot lifecycle policy [{}] issuing create snapshot [{}]",

diff --git a/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/slm/SnapshotRetentionTask.java b/x-pack/plugin/ilm/src/main/java/org/elasticsearch/xpack/slm/SnapshotRetentionTask.java
@@ -242,6 +242,8 @@ void getAllRetainableSnapshots(Collection<String> repositories, ActionListener<M
 
         client.admin().cluster()
             .prepareGetSnapshots(repositories.toArray(Strings.EMPTY_ARRAY))
+            // don't time out on this request to not produce failed SLM runs in case of a temporarily slow master node
+            .setMasterNodeTimeout(TimeValue.MAX_VALUE)
             .setIgnoreUnavailable(true)
             .execute(ActionListener.wrap(resp -> {
                     if (logger.isTraceEnabled()) {
@@ -373,7 +375,9 @@ private void deleteSnapshots(SnapshotLifecycleStats slmStats, AtomicInteger dele
     void deleteSnapshot(String slmPolicy, String repo, SnapshotId snapshot, SnapshotLifecycleStats slmStats,
                         ActionListener<AcknowledgedResponse> listener) {
         logger.info("[{}] snapshot retention deleting snapshot [{}]", repo, snapshot);
-        client.admin().cluster().prepareDeleteSnapshot(repo, snapshot.getName()).execute(ActionListener.wrap(acknowledgedResponse -> {
+        // don't time out on this request to not produce failed SLM runs in case of a temporarily slow master node
+        client.admin().cluster().prepareDeleteSnapshot(repo, snapshot.getName()).setMasterNodeTimeout(TimeValue.MAX_VALUE).execute(
+            ActionListener.wrap(acknowledgedResponse -> {
                     slmStats.snapshotDeleted(slmPolicy);
                     listener.onResponse(acknowledgedResponse);
                 },