Skip to content

Commit

Permalink
Add Health Indicator Plugin (#83205)
Browse files Browse the repository at this point in the history
This change introduces initial model for health indicators as well
as extension point that allows to create health indicators outside
of server module.

Co-authored-by: Tanguy Leroux <tlrx.dev@gmail.com>
  • Loading branch information
idegtiarenko and tlrx committed Feb 9, 2022
1 parent de756c1 commit 2982591
Show file tree
Hide file tree
Showing 18 changed files with 489 additions and 219 deletions.
5 changes: 5 additions & 0 deletions docs/changelog/83205.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 83205
summary: Add Health Indicator Plugin
area: Health
type: feature
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,3 @@
- is_true: components.cluster_coordination.indicators.instance_has_master.details.coordinating_node.name
- is_true: components.cluster_coordination.indicators.instance_has_master.details.master_node.node_id
- is_true: components.cluster_coordination.indicators.instance_has_master.details.master_node.name
- match: { components.snapshots.status: "GREEN" }
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

package org.elasticsearch.cluster.coordination;

import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.health.GetHealthAction;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.test.ESIntegTestCase;
import org.elasticsearch.test.disruption.NetworkDisruption;
import org.elasticsearch.test.transport.MockTransportService;

import java.util.Collection;
import java.util.List;
import java.util.Set;

import static org.elasticsearch.cluster.coordination.InstanceHasMasterHealthIndicatorService.NAME;
import static org.elasticsearch.health.HealthStatus.GREEN;
import static org.elasticsearch.health.HealthStatus.RED;
import static org.elasticsearch.health.ServerHealthComponents.CLUSTER_COORDINATION;
import static org.hamcrest.Matchers.equalTo;

@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.SUITE)
public class InstanceHasMasterHealthIndicatorServiceIT extends ESIntegTestCase {

@Override
protected Collection<Class<? extends Plugin>> nodePlugins() {
return List.of(MockTransportService.TestPlugin.class);
}

@Override
protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) {
return Settings.builder()
.put(super.nodeSettings(nodeOrdinal, otherSettings))
.put(NoMasterBlockService.NO_MASTER_BLOCK_SETTING.getKey(), "all")
.build();
}

public void testGetHealthWhenMasterIsElected() throws Exception {
var client = client();

var response = client.execute(GetHealthAction.INSTANCE, new GetHealthAction.Request()).get();

assertThat(response.findComponent(CLUSTER_COORDINATION).findIndicator(NAME).status(), equalTo(GREEN));
}

public void testGetHealthWhenNoMaster() throws Exception {
var client = internalCluster().coordOnlyNodeClient();

var disruptionScheme = new NetworkDisruption(
new NetworkDisruption.IsolateAllNodes(Set.of(internalCluster().getNodeNames())),
NetworkDisruption.DISCONNECT
);

internalCluster().setDisruptionScheme(disruptionScheme);
disruptionScheme.startDisrupting();

try {
assertBusy(() -> {
ClusterState state = client.admin().cluster().prepareState().setLocal(true).execute().actionGet().getState();
assertTrue(state.blocks().hasGlobalBlockWithId(NoMasterBlockService.NO_MASTER_BLOCK_ID));

var response = client.execute(GetHealthAction.INSTANCE, new GetHealthAction.Request()).get();

assertThat(response.findComponent(CLUSTER_COORDINATION).findIndicator(NAME).status(), equalTo(RED));
});
} finally {
internalCluster().clearDisruptionScheme(true);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,93 +8,143 @@

package org.elasticsearch.health;

import org.apache.lucene.util.SetOnce;
import org.elasticsearch.client.internal.Client;
import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.coordination.NoMasterBlockService;
import org.elasticsearch.cluster.ClusterName;
import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver;
import org.elasticsearch.cluster.service.ClusterService;
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
import org.elasticsearch.common.settings.Setting;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.health.components.controller.ClusterCoordination;
import org.elasticsearch.env.Environment;
import org.elasticsearch.env.NodeEnvironment;
import org.elasticsearch.plugins.HealthPlugin;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.repositories.RepositoriesService;
import org.elasticsearch.script.ScriptService;
import org.elasticsearch.test.ESIntegTestCase;
import org.elasticsearch.test.disruption.NetworkDisruption;
import org.elasticsearch.test.transport.MockTransportService;
import org.elasticsearch.threadpool.ThreadPool;
import org.elasticsearch.watcher.ResourceWatcherService;
import org.elasticsearch.xcontent.NamedXContentRegistry;

import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.function.Supplier;

@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.SUITE)
import static org.elasticsearch.common.util.CollectionUtils.appendToCopy;
import static org.hamcrest.Matchers.equalTo;

@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST)
public class GetHealthActionIT extends ESIntegTestCase {

@Override
protected Collection<Class<? extends Plugin>> nodePlugins() {
return Collections.singletonList(MockTransportService.TestPlugin.class);
return appendToCopy(super.nodePlugins(), TestHealthPlugin.class);
}

@Override
protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) {
return Settings.builder()
.put(super.nodeSettings(nodeOrdinal, otherSettings))
.put(NoMasterBlockService.NO_MASTER_BLOCK_SETTING.getKey(), "all")
.build();
}
public static final Setting<HealthStatus> TEST_HEALTH_STATUS = new Setting<>(
"test.health.status",
"GREEN",
HealthStatus::valueOf,
Setting.Property.NodeScope,
Setting.Property.Dynamic
);

public void testGetHealth() throws Exception {
GetHealthAction.Response response = client().execute(GetHealthAction.INSTANCE, new GetHealthAction.Request()).get();
assertEquals(cluster().getClusterName(), response.getClusterName().value());
assertEquals(HealthStatus.GREEN, response.getStatus());
public static final class TestHealthPlugin extends Plugin implements HealthPlugin {

private final SetOnce<FixedStatusHealthIndicatorService> healthIndicatorService = new SetOnce<>();

assertEquals(2, response.getComponents().size());
@Override
public List<Setting<?>> getSettings() {
return List.of(TEST_HEALTH_STATUS);
}

for (HealthComponentResult component : response.getComponents()) {
assertEquals(HealthStatus.GREEN, component.status());
@Override
public Collection<Object> createComponents(
Client client,
ClusterService clusterService,
ThreadPool threadPool,
ResourceWatcherService resourceWatcherService,
ScriptService scriptService,
NamedXContentRegistry xContentRegistry,
Environment environment,
NodeEnvironment nodeEnvironment,
NamedWriteableRegistry namedWriteableRegistry,
IndexNameExpressionResolver indexNameExpressionResolver,
Supplier<RepositoriesService> repositoriesServiceSupplier
) {
var service = new FixedStatusHealthIndicatorService(clusterService);
healthIndicatorService.set(service);
return List.of(service);
}

HealthComponentResult controller = response.getComponents()
.stream()
.filter(c -> c.name().equals("cluster_coordination"))
.findAny()
.orElseThrow();
assertEquals(1, controller.indicators().size());
HealthIndicatorResult nodeDoesNotHaveMaster = controller.indicators().get(ClusterCoordination.INSTANCE_HAS_MASTER_NAME);
assertEquals(ClusterCoordination.INSTANCE_HAS_MASTER_NAME, nodeDoesNotHaveMaster.name());
assertEquals(HealthStatus.GREEN, nodeDoesNotHaveMaster.status());
assertEquals(ClusterCoordination.INSTANCE_HAS_MASTER_GREEN_SUMMARY, nodeDoesNotHaveMaster.summary());
@Override
public Collection<HealthIndicatorService> getHealthIndicatorServices() {
return List.of(healthIndicatorService.get());
}
}

public void testGetHealthInstanceNoMaster() throws Exception {
// builds the coordinating-only client before disrupting all nodes
final Client client = internalCluster().coordOnlyNodeClient();
/**
* This indicator could be used to pre-define health of the cluster with {@code TEST_HEALTH_STATUS} property
* and return it via health API.
*/
public static final class FixedStatusHealthIndicatorService implements HealthIndicatorService {

private final ClusterService clusterService;

public FixedStatusHealthIndicatorService(ClusterService clusterService) {
this.clusterService = clusterService;
}

@Override
public String name() {
return "test_indicator";
}

@Override
public String component() {
return "test_component";
}

@Override
public HealthIndicatorResult calculate() {
var status = clusterService.getClusterSettings().get(TEST_HEALTH_STATUS);
return createIndicator(status, "Health is set to [" + status + "] by test plugin", HealthIndicatorDetails.EMPTY);
}
}

final NetworkDisruption disruptionScheme = new NetworkDisruption(
new NetworkDisruption.IsolateAllNodes(new HashSet<>(Arrays.asList(internalCluster().getNodeNames()))),
NetworkDisruption.DISCONNECT
);
public void testGetHealth() throws Exception {

internalCluster().setDisruptionScheme(disruptionScheme);
disruptionScheme.startDisrupting();
var client = client();
var status = randomFrom(HealthStatus.values());

try {
assertBusy(() -> {
ClusterState state = client.admin().cluster().prepareState().setLocal(true).execute().actionGet().getState();
assertTrue(state.blocks().hasGlobalBlockWithId(NoMasterBlockService.NO_MASTER_BLOCK_ID));

GetHealthAction.Response response = client.execute(GetHealthAction.INSTANCE, new GetHealthAction.Request()).get();
assertEquals(HealthStatus.RED, response.getStatus());
assertEquals(2, response.getComponents().size());
HealthComponentResult controller = response.getComponents()
.stream()
.filter(c -> c.name().equals("cluster_coordination"))
.findAny()
.orElseThrow();
assertEquals(1, controller.indicators().size());
HealthIndicatorResult instanceHasMaster = controller.indicators().get(ClusterCoordination.INSTANCE_HAS_MASTER_NAME);
assertEquals(ClusterCoordination.INSTANCE_HAS_MASTER_NAME, instanceHasMaster.name());
assertEquals(HealthStatus.RED, instanceHasMaster.status());
assertEquals(ClusterCoordination.INSTANCE_HAS_MASTER_RED_SUMMARY, instanceHasMaster.summary());
});
updateClusterSettings(Settings.builder().put(TEST_HEALTH_STATUS.getKey(), status));

var response = client.execute(GetHealthAction.INSTANCE, new GetHealthAction.Request()).get();

assertThat(response.getStatus(), equalTo(status));
assertThat(response.getClusterName(), equalTo(new ClusterName(cluster().getClusterName())));
assertThat(
response.findComponent("test_component"),
equalTo(
new HealthComponentResult(
"test_component",
status,
List.of(
new HealthIndicatorResult(
"test_indicator",
"test_component",
status,
"Health is set to [" + status + "] by test plugin",
HealthIndicatorDetails.EMPTY
)
)
)
)
);
} finally {
internalCluster().clearDisruptionScheme(true);
updateClusterSettings(Settings.builder().putNull(TEST_HEALTH_STATUS.getKey()));
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

package org.elasticsearch.cluster.coordination;

import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.node.DiscoveryNode;
import org.elasticsearch.cluster.node.DiscoveryNodes;
import org.elasticsearch.cluster.service.ClusterService;
import org.elasticsearch.health.HealthIndicatorResult;
import org.elasticsearch.health.HealthIndicatorService;
import org.elasticsearch.health.HealthStatus;

import static org.elasticsearch.health.ServerHealthComponents.CLUSTER_COORDINATION;

public class InstanceHasMasterHealthIndicatorService implements HealthIndicatorService {

public static final String NAME = "instance_has_master";

private static final String INSTANCE_HAS_MASTER_GREEN_SUMMARY = "Health coordinating instance has a master node.";
private static final String INSTANCE_HAS_MASTER_RED_SUMMARY = "Health coordinating instance does not have a master node.";

private final ClusterService clusterService;

public InstanceHasMasterHealthIndicatorService(ClusterService clusterService) {
this.clusterService = clusterService;
}

@Override
public String name() {
return NAME;
}

@Override
public String component() {
return CLUSTER_COORDINATION;
}

@Override
public HealthIndicatorResult calculate() {

DiscoveryNode coordinatingNode = clusterService.localNode();
ClusterState clusterState = clusterService.state();
DiscoveryNodes nodes = clusterState.nodes();
DiscoveryNode masterNode = nodes.getMasterNode();

HealthStatus instanceHasMasterStatus = masterNode == null ? HealthStatus.RED : HealthStatus.GREEN;
String instanceHasMasterSummary = masterNode == null ? INSTANCE_HAS_MASTER_RED_SUMMARY : INSTANCE_HAS_MASTER_GREEN_SUMMARY;

return createIndicator(instanceHasMasterStatus, instanceHasMasterSummary, (builder, params) -> {
builder.startObject();
builder.object("coordinating_node", xContentBuilder -> {
builder.field("node_id", coordinatingNode.getId());
builder.field("name", coordinatingNode.getName());
});
builder.object("master_node", xContentBuilder -> {
if (masterNode != null) {
builder.field("node_id", masterNode.getId());
builder.field("name", masterNode.getName());
} else {
builder.nullField("node_id");
builder.nullField("name");
}
});
return builder.endObject();
});
}
}

0 comments on commit 2982591

Please sign in to comment.