Skip to content

Commit

Permalink
Added test with AwaitsFix annotation that simulates a split brain.
Browse files Browse the repository at this point in the history
  • Loading branch information
martijnvg committed Apr 10, 2014
1 parent 7a26b49 commit 7bf3ffe
Showing 1 changed file with 145 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.elasticsearch.discovery;

import com.google.common.base.Predicate;
import org.apache.lucene.util.LuceneTestCase;
import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse;
import org.elasticsearch.client.Client;
import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.node.DiscoveryNode;
import org.elasticsearch.cluster.node.DiscoveryNodes;
import org.elasticsearch.common.Priority;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.test.ElasticsearchIntegrationTest;
import org.elasticsearch.test.transport.MockTransportService;
import org.elasticsearch.transport.TransportModule;
import org.elasticsearch.transport.TransportService;
import org.junit.Test;

import java.util.Arrays;
import java.util.List;

import static org.elasticsearch.test.ElasticsearchIntegrationTest.ClusterScope;
import static org.elasticsearch.test.ElasticsearchIntegrationTest.Scope;
import static org.hamcrest.Matchers.*;

/**
*/
@ClusterScope(scope= Scope.SUITE, numNodes=0)
public class DiscoveryWithNetworkFailuresTests extends ElasticsearchIntegrationTest {

@Test
@LuceneTestCase.AwaitsFix(bugUrl = "https://github.com/elasticsearch/elasticsearch/issues/2488")
public void failWithMinimumMasterNodesConfigured() throws Exception {
final Settings settings = ImmutableSettings.settingsBuilder()
.put("discovery.zen.minimum_master_nodes", 2)
.put("discovery.zen.fd.ping_timeout", "1s") // <-- for hitting simulated network failures quickly
.put(TransportModule.TRANSPORT_SERVICE_TYPE_KEY, MockTransportService.class.getName())
.build();
List<String>nodes = cluster().startNodesAsync(3, settings).get();

// Wait until a green status has been reaches and 3 nodes are part of the cluster
List<String> nodesList = Arrays.asList(nodes.toArray(new String[3]));
ClusterHealthResponse clusterHealthResponse = client().admin().cluster().prepareHealth()
.setWaitForEvents(Priority.LANGUID)
.setWaitForNodes("3")
.get();
assertThat(clusterHealthResponse.isTimedOut(), is(false));

// Figure out what is the elected master node
DiscoveryNode masterDiscoNode = null;
for (String node : nodesList) {
ClusterState state = cluster().client(node).admin().cluster().prepareState().setLocal(true).execute().actionGet().getState();
assertThat(state.nodes().size(), equalTo(3));
if (masterDiscoNode == null) {
masterDiscoNode = state.nodes().masterNode();
} else {
assertThat(state.nodes().masterNode(), equalTo(masterDiscoNode));
}
}
assert masterDiscoNode != null;
logger.info("---> legit elected master node=" + masterDiscoNode);
final Client masterClient = cluster().masterClient();

// Everything is stable now, it is now time to simulate evil...

// Pick a node that isn't the elected master.
String unluckyNode = null;
for (String node : nodesList) {
if (!node.equals(masterDiscoNode.getName())) {
unluckyNode = node;
}
}
assert unluckyNode != null;

// Simulate a network issue between the unlucky node and elected master node in both directions.
addFailToSendNoConnectRule(masterDiscoNode.getName(), unluckyNode);
addFailToSendNoConnectRule(unluckyNode, masterDiscoNode.getName());
try {
// Wait until elected master has removed that the unlucky node...
awaitBusy(new Predicate<Object>() {
@Override
public boolean apply(Object input) {
return masterClient.admin().cluster().prepareState().setLocal(true).get().getState().nodes().size() == 2;
}
});

// The unlucky node must report *no* master node, since it can't connect to master and in fact it should
// continuously ping until network failures have been resolved.
Client isolatedNodeClient = cluster().client(unluckyNode);
ClusterState localClusterState = isolatedNodeClient.admin().cluster().prepareState().setLocal(true).get().getState();
DiscoveryNodes localDiscoveryNodes = localClusterState.nodes();
assertThat(localDiscoveryNodes.masterNode(), nullValue());
} finally {
// stop simulating network failures, from this point on the unlucky node is able to rejoin
// We also need to do this even if assertions fail, since otherwise the test framework can't work properly
clearNoConnectRule(masterDiscoNode.getName(), unluckyNode);
clearNoConnectRule(unluckyNode, masterDiscoNode.getName());
}

// Wait until the master node sees all 3 nodes again.
clusterHealthResponse = masterClient.admin().cluster().prepareHealth()
.setWaitForEvents(Priority.LANGUID)
.setWaitForNodes("3")
.get();
assertThat(clusterHealthResponse.isTimedOut(), is(false));

for (String node : nodesList) {
ClusterState state = cluster().client(node).admin().cluster().prepareState().setLocal(true).execute().actionGet().getState();
assertThat(state.nodes().size(), equalTo(3));
// The elected master shouldn't have changed, since the unlucky node never could have elected himself as
// master since m_m_n of 2 could never be satisfied.
assertThat(state.nodes().masterNode(), equalTo(masterDiscoNode));
}
}

private void addFailToSendNoConnectRule(String fromNode, String toNode) {
TransportService mockTransportService = cluster().getInstance(TransportService.class, fromNode);
((MockTransportService) mockTransportService).addFailToSendNoConnectRule(cluster().getInstance(Discovery.class, toNode).localNode());
}

private void clearNoConnectRule(String fromNode, String toNode) {
TransportService mockTransportService = cluster().getInstance(TransportService.class, fromNode);
((MockTransportService) mockTransportService).clearRule(cluster().getInstance(Discovery.class, toNode).localNode());
}

}

0 comments on commit 7bf3ffe

Please sign in to comment.