Skip to content

Commit

Permalink
Move the master stability logic into its own service separate from th…
Browse files Browse the repository at this point in the history
…e HealthIndicatorService (#87672)

This commit moves most of the logic of StableMasterHealthIndicatorService into CoordinationDiagnosticsService,
leaving only the parts that are specific to forming up the health API response.
  • Loading branch information
masseyke committed Jun 21, 2022
1 parent e0ced8a commit 214bd60
Show file tree
Hide file tree
Showing 10 changed files with 1,058 additions and 676 deletions.
5 changes: 5 additions & 0 deletions docs/changelog/87672.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 87672
summary: Move the master stability logic into its own service separate from the `HealthIndicatorService`
area: Health
type: enhancement
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@
import org.elasticsearch.client.internal.Client;
import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.ClusterStateUpdateTask;
import org.elasticsearch.cluster.coordination.CoordinationDiagnosticsService;
import org.elasticsearch.cluster.coordination.Coordinator;
import org.elasticsearch.cluster.coordination.FollowersChecker;
import org.elasticsearch.cluster.coordination.LeaderChecker;
import org.elasticsearch.cluster.coordination.StableMasterHealthIndicatorService;
import org.elasticsearch.cluster.node.DiscoveryNode;
import org.elasticsearch.cluster.service.ClusterService;
import org.elasticsearch.common.Priority;
Expand Down Expand Up @@ -320,8 +320,8 @@ public void testRepeatedMasterChanges(String expectedMasterStabilitySummarySubst
Settings.builder()
.put(LeaderChecker.LEADER_CHECK_TIMEOUT_SETTING.getKey(), "1s")
.put(Coordinator.PUBLISH_TIMEOUT_SETTING.getKey(), "1s")
.put(StableMasterHealthIndicatorService.IDENTITY_CHANGES_THRESHOLD_SETTING.getKey(), 1)
.put(StableMasterHealthIndicatorService.NO_MASTER_TRANSITIONS_THRESHOLD_SETTING.getKey(), 100)
.put(CoordinationDiagnosticsService.IDENTITY_CHANGES_THRESHOLD_SETTING.getKey(), 1)
.put(CoordinationDiagnosticsService.NO_MASTER_TRANSITIONS_THRESHOLD_SETTING.getKey(), 100)
.build()
);
ensureStableCluster(3);
Expand Down Expand Up @@ -416,19 +416,16 @@ public void testRepeatedNullMasterRecognizedAsGreenIfMasterDoesNotKnowItIsUnstab
Settings.builder()
.put(LeaderChecker.LEADER_CHECK_TIMEOUT_SETTING.getKey(), "1s")
.put(Coordinator.PUBLISH_TIMEOUT_SETTING.getKey(), "1s")
.put(StableMasterHealthIndicatorService.NO_MASTER_TRANSITIONS_THRESHOLD_SETTING.getKey(), 1)
.put(CoordinationDiagnosticsService.NO_MASTER_TRANSITIONS_THRESHOLD_SETTING.getKey(), 1)
.build()
);
final List<String> dataNodes = internalCluster().startDataOnlyNodes(
2,
Settings.builder()
.put(LeaderChecker.LEADER_CHECK_TIMEOUT_SETTING.getKey(), "1s")
.put(Coordinator.PUBLISH_TIMEOUT_SETTING.getKey(), "1s")
.put(StableMasterHealthIndicatorService.NO_MASTER_TRANSITIONS_THRESHOLD_SETTING.getKey(), 1)
.put(
StableMasterHealthIndicatorService.NODE_HAS_MASTER_LOOKUP_TIMEFRAME_SETTING.getKey(),
new TimeValue(60, TimeUnit.SECONDS)
)
.put(CoordinationDiagnosticsService.NO_MASTER_TRANSITIONS_THRESHOLD_SETTING.getKey(), 1)
.put(CoordinationDiagnosticsService.NODE_HAS_MASTER_LOOKUP_TIMEFRAME_SETTING.getKey(), new TimeValue(60, TimeUnit.SECONDS))
.build()
);
ensureStableCluster(3);
Expand Down

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.elasticsearch.cluster.action.index.MappingUpdatedAction;
import org.elasticsearch.cluster.coordination.ClusterBootstrapService;
import org.elasticsearch.cluster.coordination.ClusterFormationFailureHelper;
import org.elasticsearch.cluster.coordination.CoordinationDiagnosticsService;
import org.elasticsearch.cluster.coordination.Coordinator;
import org.elasticsearch.cluster.coordination.ElectionSchedulerFactory;
import org.elasticsearch.cluster.coordination.FollowersChecker;
Expand All @@ -32,7 +33,6 @@
import org.elasticsearch.cluster.coordination.MasterHistory;
import org.elasticsearch.cluster.coordination.NoMasterBlockService;
import org.elasticsearch.cluster.coordination.Reconfigurator;
import org.elasticsearch.cluster.coordination.StableMasterHealthIndicatorService;
import org.elasticsearch.cluster.metadata.IndexGraveyard;
import org.elasticsearch.cluster.metadata.Metadata;
import org.elasticsearch.cluster.routing.OperationRouting;
Expand Down Expand Up @@ -514,9 +514,9 @@ public void apply(Settings value, Settings current, Settings previous) {
IndexingPressure.MAX_INDEXING_BYTES,
ShardLimitValidator.SETTING_CLUSTER_MAX_SHARDS_PER_NODE_FROZEN,
DataTier.ENFORCE_DEFAULT_TIER_PREFERENCE_SETTING,
StableMasterHealthIndicatorService.IDENTITY_CHANGES_THRESHOLD_SETTING,
StableMasterHealthIndicatorService.NO_MASTER_TRANSITIONS_THRESHOLD_SETTING,
StableMasterHealthIndicatorService.NODE_HAS_MASTER_LOOKUP_TIMEFRAME_SETTING,
CoordinationDiagnosticsService.IDENTITY_CHANGES_THRESHOLD_SETTING,
CoordinationDiagnosticsService.NO_MASTER_TRANSITIONS_THRESHOLD_SETTING,
CoordinationDiagnosticsService.NODE_HAS_MASTER_LOOKUP_TIMEFRAME_SETTING,
MasterHistory.MAX_HISTORY_AGE_SETTING,
ReadinessService.PORT,
HealthNodeTaskExecutor.ENABLED_SETTING
Expand Down
12 changes: 12 additions & 0 deletions server/src/main/java/org/elasticsearch/health/HealthStatus.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

package org.elasticsearch.health;

import org.elasticsearch.cluster.coordination.CoordinationDiagnosticsService;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;

Expand Down Expand Up @@ -52,4 +53,15 @@ public static HealthStatus merge(Stream<HealthStatus> statuses) {
public String xContentValue() {
return name().toLowerCase(Locale.ROOT);
}

public static HealthStatus fromCoordinationDiagnosticsStatus(
CoordinationDiagnosticsService.CoordinationDiagnosticsStatus coordinationDiagnosticsStatus
) {
return switch (coordinationDiagnosticsStatus) {
case GREEN -> HealthStatus.GREEN;
case YELLOW -> HealthStatus.YELLOW;
case RED -> HealthStatus.RED;
case UNKNOWN -> HealthStatus.UNKNOWN;
};
}
}
12 changes: 9 additions & 3 deletions server/src/main/java/org/elasticsearch/node/Node.java
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
import org.elasticsearch.cluster.InternalClusterInfoService;
import org.elasticsearch.cluster.NodeConnectionsService;
import org.elasticsearch.cluster.action.index.MappingUpdatedAction;
import org.elasticsearch.cluster.coordination.CoordinationDiagnosticsService;
import org.elasticsearch.cluster.coordination.Coordinator;
import org.elasticsearch.cluster.coordination.MasterHistoryService;
import org.elasticsearch.cluster.coordination.StableMasterHealthIndicatorService;
Expand Down Expand Up @@ -900,7 +901,11 @@ protected Node(
);

MasterHistoryService masterHistoryService = new MasterHistoryService(transportService, threadPool, clusterService);
HealthService healthService = createHealthService(clusterService, clusterModule, masterHistoryService);
CoordinationDiagnosticsService coordinationDiagnosticsService = new CoordinationDiagnosticsService(
clusterService,
masterHistoryService
);
HealthService healthService = createHealthService(clusterService, clusterModule, coordinationDiagnosticsService);

modules.add(b -> {
b.bind(Node.class).toInstance(this);
Expand Down Expand Up @@ -984,6 +989,7 @@ protected Node(
b.bind(DesiredNodesSettingsValidator.class).toInstance(desiredNodesSettingsValidator);
b.bind(HealthService.class).toInstance(healthService);
b.bind(MasterHistoryService.class).toInstance(masterHistoryService);
b.bind(CoordinationDiagnosticsService.class).toInstance(coordinationDiagnosticsService);
if (HealthNode.isEnabled()) {
b.bind(HealthNodeTaskExecutor.class).toInstance(healthNodeTaskExecutor);
}
Expand Down Expand Up @@ -1045,10 +1051,10 @@ protected Node(
private HealthService createHealthService(
ClusterService clusterService,
ClusterModule clusterModule,
MasterHistoryService masterHistoryService
CoordinationDiagnosticsService coordinationDiagnosticsService
) {
List<HealthIndicatorService> preflightHealthIndicatorServices = Collections.singletonList(
new StableMasterHealthIndicatorService(clusterService, masterHistoryService)
new StableMasterHealthIndicatorService(coordinationDiagnosticsService)
);
var serverHealthIndicatorServices = List.of(
new RepositoryIntegrityHealthIndicatorService(clusterService),
Expand Down

0 comments on commit 214bd60

Please sign in to comment.