Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GEODE-6561: When a test server is forced to shutdown, do not save cache xml if use cluster config #3509

Merged
merged 6 commits into from Apr 30, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
@@ -0,0 +1,65 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more contributor license
* agreements. See the NOTICE file distributed with this work for additional information regarding
* copyright ownership. The ASF licenses this file to You under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License. You may obtain a
* copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*/

package org.apache.geode.management.internal.rest;

import static org.apache.geode.test.junit.assertions.ClusterManagementResultAssert.assertManagementResult;
import static org.assertj.core.api.Assertions.assertThat;

import org.junit.Rule;
import org.junit.Test;

import org.apache.geode.cache.Region;
import org.apache.geode.cache.configuration.RegionConfig;
import org.apache.geode.cache.configuration.RegionType;
import org.apache.geode.management.api.ClusterManagementResult;
import org.apache.geode.management.api.ClusterManagementService;
import org.apache.geode.management.client.ClusterManagementServiceProvider;
import org.apache.geode.test.dunit.rules.ClusterStartupRule;
import org.apache.geode.test.dunit.rules.MemberVM;

public class ServerRestartTest {
@Rule
public ClusterStartupRule cluster = new ClusterStartupRule();

@Test
public void serverReconnect() throws Exception {
MemberVM locator = cluster.startLocatorVM(0, l -> l.withHttpService());
cluster.startServerVM(1, locator.getPort());

// we will stop the 2nd server so that we won't get "loss of qurom" error
MemberVM server2 = cluster.startServerVM(2, locator.getPort());

ClusterManagementService cmService =
ClusterManagementServiceProvider.getService("localhost", locator.getHttpPort());

RegionConfig region = new RegionConfig();
region.setName("Foo");
region.setType(RegionType.REPLICATE);
assertManagementResult(cmService.create(region)).hasStatusCode(
ClusterManagementResult.StatusCode.OK);

// force reconnect and then server should reconnect after 5 seconds
server2.forceDisconnect();

server2.waitTilFullyReconnected();

server2.invoke(() -> {
Region foo = ClusterStartupRule.getCache().getRegion("Foo");
assertThat(foo).isNotNull();
});
}

}
Expand Up @@ -16,7 +16,6 @@
package org.apache.geode.management;

import static java.util.stream.Collectors.toList;
import static org.apache.geode.distributed.ConfigurationProperties.MAX_WAIT_TIME_RECONNECT;
import static org.apache.geode.management.ManagementService.getExistingManagementService;
import static org.apache.geode.test.awaitility.GeodeAwaitility.await;
import static org.apache.geode.test.dunit.internal.JUnit4DistributedTestCase.getBlackboard;
Expand All @@ -25,23 +24,19 @@
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.function.Predicate;

import javax.management.MBeanServerConnection;
import javax.management.ObjectName;
import javax.management.remote.JMXConnectorFactory;
import javax.management.remote.JMXServiceURL;

import org.junit.After;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import org.junit.experimental.categories.Category;

import org.apache.geode.cache.Cache;
import org.apache.geode.distributed.ConfigurationProperties;
import org.apache.geode.management.internal.SystemManagementService;
import org.apache.geode.test.awaitility.GeodeAwaitility;
import org.apache.geode.test.dunit.rules.ClusterStartupRule;
Expand All @@ -53,14 +48,8 @@

@Category({JMXTest.class})
public class JMXMBeanReconnectDUnitTest {
private static final String LOCATOR_1_NAME = "locator-one";
private static final String LOCATOR_2_NAME = "locator-two";
private static final String REGION_PATH = "/test-region-1";
private static final String RECONNECT_MAILBOX = "reconnectReady";
private static final int LOCATOR_1_VM_INDEX = 0;
private static final int LOCATOR_2_VM_INDEX = 1;
private static final int SERVER_1_VM_INDEX = 2;
private static final int SERVER_2_VM_INDEX = 3;
private static final int SERVER_COUNT = 2;
private static final int NUM_REMOTE_BEANS = 19;
private static final int NUM_LOCATOR_BEANS = 8;
Expand All @@ -69,51 +58,50 @@ public class JMXMBeanReconnectDUnitTest {

private MemberVM locator1, locator2, server1;

private MBeanServerConnection locator1Connection;
private MBeanServerConnection locator2Connection;

@Rule
public ClusterStartupRule lsRule = new ClusterStartupRule();

@Rule
public GfshCommandRule gfsh = new GfshCommandRule();

@Rule
public MBeanServerConnectionRule jmxConnectionRule = new MBeanServerConnectionRule();
private MBeanServerConnectionRule jmxConToLocator1;
private MBeanServerConnectionRule jmxConToLocator2;

@Rule
public ConcurrencyRule concurrencyRule = new ConcurrencyRule();

@Before
public void before() throws Exception {
Properties properties = new Properties();
properties.setProperty(MAX_WAIT_TIME_RECONNECT, "5000");

locator1 = lsRule.startLocatorVM(LOCATOR_1_VM_INDEX, locator1Properties());
locator1.waitTilLocatorFullyStarted();

locator2 = lsRule.startLocatorVM(LOCATOR_2_VM_INDEX, locator2Properties(), locator1.getPort());
locator2.waitTilLocatorFullyStarted();
locator1 = lsRule.startLocatorVM(0);
locator2 = lsRule.startLocatorVM(1, locator1.getPort());

server1 = lsRule.startServerVM(SERVER_1_VM_INDEX, properties, locator1.getPort());
server1 = lsRule.startServerVM(2, locator1.getPort());
// start an extra server to have more MBeans, but we don't need to use it in these tests
lsRule.startServerVM(SERVER_2_VM_INDEX, properties, locator1.getPort());
lsRule.startServerVM(3, locator1.getPort());

gfsh.connectAndVerify(locator1);
gfsh.executeAndAssertThat("create region --type=REPLICATE --name=" + REGION_PATH
+ " --enable-statistics=true").statusIsSuccess();

locator1.waitUntilRegionIsReadyOnExactlyThisManyServers(REGION_PATH, SERVER_COUNT);

locator1Connection = connectToMBeanServerFor(locator1.getJmxPort());
locator2Connection = connectToMBeanServerFor(locator2.getJmxPort());
jmxConToLocator1 = new MBeanServerConnectionRule();
jmxConToLocator1.connect(locator1.getJmxPort());
jmxConToLocator2 = new MBeanServerConnectionRule();
jmxConToLocator2.connect(locator2.getJmxPort());

await("Locators must agree on the state of the system")
.untilAsserted(() -> assertThat(getFederatedGemfireBeansFrom(locator1Connection))
.containsExactlyElementsOf(getFederatedGemfireBeansFrom(locator2Connection))
.untilAsserted(() -> assertThat(jmxConToLocator1.getGemfireFederatedBeans())
.containsExactlyElementsOf(jmxConToLocator2.getGemfireFederatedBeans())
.hasSize(NUM_REMOTE_BEANS));
}

@After
public void after() throws Exception {
jmxConToLocator1.disconnect();
jmxConToLocator2.disconnect();
}

/**
* Test that a server's local MBeans are not affected by a locator crashing
*/
Expand All @@ -129,7 +117,7 @@ public void testLocalBeans_MaintainServerAndCrashLocator() {
.containsExactlyElementsOf(initialServerBeans)
.hasSize(NUM_SERVER_BEANS);

locator1.waitTilLocatorFullyReconnected();
locator1.waitTilFullyReconnected();

List<String> finalServerBeans = server1.invoke(() -> getLocalCanonicalBeanNames());
assertThat(finalServerBeans)
Expand All @@ -152,7 +140,7 @@ public void testLocalBeans_MaintainLocatorAndCrashServer() {
.containsExactlyElementsOf(initialLocatorBeans)
.hasSize(NUM_LOCATOR_BEANS);

server1.waitTilServerFullyReconnected();
server1.waitTilFullyReconnected();
locator1.waitUntilRegionIsReadyOnExactlyThisManyServers(REGION_PATH, SERVER_COUNT);

List<String> finalLocatorBeans = locator1.invoke(() -> getLocalCanonicalBeanNames());
Expand All @@ -170,13 +158,13 @@ public void testLocalBeans_MaintainLocatorAndCrashServer() {
@Test
public void testRemoteBeanKnowledge_MaintainServerAndCrashLocator() throws IOException {
// check that the initial state is good
List<ObjectName> initialL1Beans = getFederatedGemfireBeansFrom(locator1Connection);
List<ObjectName> initialL2Beans = getFederatedGemfireBeansFrom(locator2Connection);
List<ObjectName> initialL1Beans = jmxConToLocator1.getGemfireFederatedBeans();
List<ObjectName> initialL2Beans = jmxConToLocator2.getGemfireFederatedBeans();
assertThat(initialL1Beans).containsExactlyElementsOf(initialL2Beans).hasSize(NUM_REMOTE_BEANS);

// calculate the expected list for use once the locator has crashed
List<ObjectName> expectedIntermediateBeanList = initialL1Beans.stream()
.filter(excludingBeansFor(LOCATOR_1_NAME)).collect(toList());
.filter(excludingBeansFor("locator-0")).collect(toList());

// crash the locator
locator1.forceDisconnect(TIMEOUT, TimeUnit.MILLISECONDS, RECONNECT_MAILBOX);
Expand All @@ -185,7 +173,7 @@ public void testRemoteBeanKnowledge_MaintainServerAndCrashLocator() throws IOExc
List<ObjectName> intermediateL2Beans = new ArrayList<>();
await().untilAsserted(() -> {
intermediateL2Beans.clear();
intermediateL2Beans.addAll(getFederatedGemfireBeansFrom(locator2Connection));
intermediateL2Beans.addAll(jmxConToLocator2.getGemfireFederatedBeans());

assertThat(intermediateL2Beans)
.containsExactlyElementsOf(expectedIntermediateBeanList)
Expand All @@ -199,13 +187,13 @@ public void testRemoteBeanKnowledge_MaintainServerAndCrashLocator() throws IOExc
List<ObjectName> finalL2Beans = new ArrayList<>();
await().untilAsserted(() -> {
finalL2Beans.clear();
finalL2Beans.addAll(getFederatedGemfireBeansFrom(locator2Connection));
finalL2Beans.addAll(jmxConToLocator2.getGemfireFederatedBeans());

assertThat(finalL2Beans).hasSize(NUM_REMOTE_BEANS);
});

// check that the final state is the same as the initial state
assertThat(getFederatedGemfireBeansFrom(locator1Connection))
assertThat(jmxConToLocator1.getGemfireFederatedBeans())
.containsExactlyElementsOf(finalL2Beans)
.containsExactlyElementsOf(initialL1Beans)
.hasSize(NUM_REMOTE_BEANS);
Expand All @@ -220,13 +208,13 @@ public void testRemoteBeanKnowledge_MaintainServerAndCrashLocator() throws IOExc
@Test
public void testRemoteBeanKnowledge_MaintainLocatorAndCrashServer() throws IOException {
// check that the initial state is correct
List<ObjectName> initialL1Beans = getFederatedGemfireBeansFrom(locator1Connection);
List<ObjectName> initialL2Beans = getFederatedGemfireBeansFrom(locator2Connection);
List<ObjectName> initialL1Beans = jmxConToLocator1.getGemfireFederatedBeans();
List<ObjectName> initialL2Beans = jmxConToLocator2.getGemfireFederatedBeans();
assertThat(initialL1Beans).containsExactlyElementsOf(initialL2Beans).hasSize(NUM_REMOTE_BEANS);

// calculate the expected list of MBeans when the server has crashed
List<ObjectName> expectedIntermediateBeanList = initialL1Beans.stream()
.filter(excludingBeansFor("server-" + SERVER_1_VM_INDEX)).collect(toList());
.filter(excludingBeansFor("server-2")).collect(toList());

// crash the server
server1.forceDisconnect(TIMEOUT, TimeUnit.MILLISECONDS, RECONNECT_MAILBOX);
Expand All @@ -239,8 +227,8 @@ public void testRemoteBeanKnowledge_MaintainLocatorAndCrashServer() throws IOExc
intermediateL1Beans.clear();
intermediateL2Beans.clear();

intermediateL1Beans.addAll(getFederatedGemfireBeansFrom(locator1Connection));
intermediateL2Beans.addAll(getFederatedGemfireBeansFrom(locator2Connection));
intermediateL1Beans.addAll(jmxConToLocator1.getGemfireFederatedBeans());
intermediateL2Beans.addAll(jmxConToLocator2.getGemfireFederatedBeans());

assertThat(intermediateL1Beans)
.containsExactlyElementsOf(expectedIntermediateBeanList)
Expand All @@ -261,8 +249,8 @@ public void testRemoteBeanKnowledge_MaintainLocatorAndCrashServer() throws IOExc
finalL1Beans.clear();
finalL2Beans.clear();

finalL1Beans.addAll(getFederatedGemfireBeansFrom(locator1Connection));
finalL2Beans.addAll(getFederatedGemfireBeansFrom(locator2Connection));
finalL1Beans.addAll(jmxConToLocator1.getGemfireFederatedBeans());
finalL2Beans.addAll(jmxConToLocator2.getGemfireFederatedBeans());

// check that the final state eventually matches the initial state
assertThat(finalL1Beans)
Expand All @@ -272,36 +260,6 @@ public void testRemoteBeanKnowledge_MaintainLocatorAndCrashServer() throws IOExc
});
}

/**
* Returns a list of remote MBeans from the given member. The MBeans are filtered to exclude the
* member's local MBeans. The resulting list includes only MBeans that all locators in the system
* should have.
*
* @param remoteMBS - the connection to the locator's MBean server, created using
* connectToMBeanServerFor(MemberVM member).
* @return List<ObjectName> - a filtered and sorted list of MBeans from the given member
*/
private static List<ObjectName> getFederatedGemfireBeansFrom(MBeanServerConnection remoteMBS)
throws IOException {
Set<ObjectName> allBeans = remoteMBS.queryNames(null, null);
// Each locator will have a "Manager" bean that is a part of the above query,
// representing the ManagementAdapter.
// This bean is registered (and so included in its own queries),
// but *not* federated (and so is not included in another locator's bean queries).
// For the scope of this test, we do not consider these "service=Manager" beans.
return allBeans.stream()
.filter(b -> b.toString().contains("GemFire"))
.filter(b -> !b.toString().contains("service=Manager,type=Member,member=locator"))
.sorted()
.collect(toList());
}

private static MBeanServerConnection connectToMBeanServerFor(int jmxPort) throws IOException {
String url = "service:jmx:rmi:///jndi/rmi://localhost" + ":" + jmxPort + "/jmxrmi";
final JMXServiceURL serviceURL = new JMXServiceURL(url);
return JMXConnectorFactory.connect(serviceURL).getMBeanServerConnection();
}

/**
* Gets a list of local MBeans from the JVM this is invoked from. This list of MBeans does not
* include beans for members other than the member this method is invoked on.
Expand All @@ -316,21 +274,4 @@ private static List<String> getLocalCanonicalBeanNames() {
private static Predicate<ObjectName> excludingBeansFor(String memberName) {
return b -> !b.getCanonicalName().contains("member=" + memberName);
}

private Properties locator1Properties() {
Properties props = new Properties();
props.setProperty(ConfigurationProperties.JMX_MANAGER_HOSTNAME_FOR_CLIENTS, "localhost");
props.setProperty(ConfigurationProperties.NAME, LOCATOR_1_NAME);
props.setProperty(MAX_WAIT_TIME_RECONNECT, "5000");
return props;
}

private Properties locator2Properties() {
Properties props = new Properties();
props.setProperty(ConfigurationProperties.JMX_MANAGER_HOSTNAME_FOR_CLIENTS, "localhost");
props.setProperty(ConfigurationProperties.NAME, LOCATOR_2_NAME);
props.setProperty(ConfigurationProperties.LOCATORS, "localhost[" + locator1.getPort() + "]");
props.setProperty(MAX_WAIT_TIME_RECONNECT, "5000");
return props;
}
}
Expand Up @@ -103,7 +103,7 @@ public void createdRegionPersistsThroughCacheConfig() {

server2.forceDisconnect();

server2.waitTilServerFullyReconnected();
server2.waitTilFullyReconnected();
locator.waitUntilRegionIsReadyOnExactlyThisManyServers(REGION_PATH, 2);

server2.invoke(() -> {
Expand Down Expand Up @@ -135,7 +135,7 @@ public void regionUpdatePersistsThroughClusterConfig() {

server2.forceDisconnect();

server2.waitTilServerFullyReconnected();
server2.waitTilFullyReconnected();
locator.waitUntilRegionIsReadyOnExactlyThisManyServers(REGION_PATH, 2);

server2.invoke(() -> {
Expand All @@ -160,7 +160,7 @@ public void destroyRegionPersistsThroughClusterConfig() {

server2.forceDisconnect();

server2.waitTilServerFullyReconnected();
server2.waitTilFullyReconnected();

server2.invoke(() -> {
InternalDistributedSystem system = InternalDistributedSystem.getConnectedInstance();
Expand Down
Expand Up @@ -79,7 +79,7 @@ public void functionExecutionAfterServerReconnect() throws Exception {

server2.forceDisconnect();

server2.waitTilServerFullyReconnected();
server2.waitTilFullyReconnected();

callFunction(server1);
}
Expand Down
Expand Up @@ -18,6 +18,7 @@
import static org.apache.geode.test.awaitility.GeodeAwaitility.await;

import org.apache.geode.CancelException;
import org.apache.geode.annotations.VisibleForTesting;
import org.apache.geode.distributed.DistributedMember;
import org.apache.geode.distributed.DistributedSystem;
import org.apache.geode.distributed.internal.ClusterDistributionManager;
Expand Down Expand Up @@ -123,10 +124,11 @@ public String description() {
GeodeAwaitility.await().untilAsserted(ev);
}

@VisibleForTesting
// this method is only used for testing. Should be extract to a test helper instead
public static void crashDistributedSystem(final DistributedSystem msys) {
msys.getLogWriter().info("crashing distributed system: " + msys);
GMSMembershipManager mgr = ((GMSMembershipManager) getMembershipManager(msys));
mgr.saveCacheXmlForReconnect(false);
MembershipManagerHelper.inhibitForcedDisconnectLogging(true);
MembershipManagerHelper.beSickMember(msys);
MembershipManagerHelper.playDead(msys);
Expand Down