Skip to content

Commit

Permalink
Revert "HBASE-23891: Add an option to Actions to filter out meta RS (#…
Browse files Browse the repository at this point in the history
…1217)"

This reverts commit 7d8fa5c.
  • Loading branch information
ndimiduk committed Mar 10, 2020
1 parent 30637f2 commit 4f76e24
Show file tree
Hide file tree
Showing 18 changed files with 84 additions and 98 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ protected void processBaseOptions(CommandLine cmd) {
* Loads entries from the provided {@code conf} into {@code props} when the configuration key
* is one that may be configuring ChaosMonkey actions.
*/
public static void loadMonkeyProperties(Properties props, Configuration conf) {
void loadMonkeyProperties(Properties props, Configuration conf) {
for (Entry<String,String> entry : conf) {
for (String prefix : MonkeyConstants.MONKEY_CONFIGURATION_KEY_PREFIXES) {
if (entry.getKey().startsWith(prefix)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.function.BiConsumer;
import java.util.function.Consumer;
Expand All @@ -35,13 +34,11 @@
import org.apache.hadoop.hbase.HBaseCluster;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.IntegrationTestBase;
import org.apache.hadoop.hbase.IntegrationTestingUtility;
import org.apache.hadoop.hbase.MiniHBaseCluster;
import org.apache.hadoop.hbase.ServerMetrics;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.chaos.factories.MonkeyConstants;
import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
Expand Down Expand Up @@ -93,7 +90,6 @@ public class Action {
protected HBaseCluster cluster;
protected ClusterMetrics initialStatus;
protected ServerName[] initialServers;
protected Properties monkeyProps;

protected long killMasterTimeout;
protected long startMasterTimeout;
Expand All @@ -105,7 +101,6 @@ public class Action {
protected long startDataNodeTimeout;
protected long killNameNodeTimeout;
protected long startNameNodeTimeout;
protected boolean skipMetaRS;

public void init(ActionContext context) throws IOException {
this.context = context;
Expand All @@ -114,34 +109,25 @@ public void init(ActionContext context) throws IOException {
Collection<ServerName> regionServers = initialStatus.getLiveServerMetrics().keySet();
initialServers = regionServers.toArray(new ServerName[regionServers.size()]);

monkeyProps = context.getMonkeyProps();
if (monkeyProps == null){
monkeyProps = new Properties();
IntegrationTestBase.loadMonkeyProperties(monkeyProps, cluster.getConf());
}
killMasterTimeout = cluster.getConf().getLong(KILL_MASTER_TIMEOUT_KEY,
KILL_MASTER_TIMEOUT_DEFAULT);
startMasterTimeout = cluster.getConf().getLong(START_MASTER_TIMEOUT_KEY,
START_MASTER_TIMEOUT_DEFAULT);
killRsTimeout = cluster.getConf().getLong(KILL_RS_TIMEOUT_KEY, KILL_RS_TIMEOUT_DEFAULT);
startRsTimeout = cluster.getConf().getLong(START_RS_TIMEOUT_KEY, START_RS_TIMEOUT_DEFAULT);
killZkNodeTimeout = cluster.getConf().getLong(KILL_ZK_NODE_TIMEOUT_KEY,
KILL_ZK_NODE_TIMEOUT_DEFAULT);
startZkNodeTimeout = cluster.getConf().getLong(START_ZK_NODE_TIMEOUT_KEY,
START_ZK_NODE_TIMEOUT_DEFAULT);
killDataNodeTimeout = cluster.getConf().getLong(KILL_DATANODE_TIMEOUT_KEY,
KILL_DATANODE_TIMEOUT_DEFAULT);
startDataNodeTimeout = cluster.getConf().getLong(START_DATANODE_TIMEOUT_KEY,
START_DATANODE_TIMEOUT_DEFAULT);
killNameNodeTimeout =
cluster.getConf().getLong(KILL_NAMENODE_TIMEOUT_KEY, KILL_NAMENODE_TIMEOUT_DEFAULT);
startNameNodeTimeout =
cluster.getConf().getLong(START_NAMENODE_TIMEOUT_KEY, START_NAMENODE_TIMEOUT_DEFAULT);

killMasterTimeout = Long.parseLong(monkeyProps.getProperty(
KILL_MASTER_TIMEOUT_KEY, KILL_MASTER_TIMEOUT_DEFAULT + ""));
startMasterTimeout = Long.parseLong(monkeyProps.getProperty(START_MASTER_TIMEOUT_KEY,
START_MASTER_TIMEOUT_DEFAULT + ""));
killRsTimeout = Long.parseLong(monkeyProps.getProperty(KILL_RS_TIMEOUT_KEY,
KILL_RS_TIMEOUT_DEFAULT + ""));
startRsTimeout = Long.parseLong(monkeyProps.getProperty(START_RS_TIMEOUT_KEY,
START_RS_TIMEOUT_DEFAULT+ ""));
killZkNodeTimeout = Long.parseLong(monkeyProps.getProperty(KILL_ZK_NODE_TIMEOUT_KEY,
KILL_ZK_NODE_TIMEOUT_DEFAULT + ""));
startZkNodeTimeout = Long.parseLong(monkeyProps.getProperty(START_ZK_NODE_TIMEOUT_KEY,
START_ZK_NODE_TIMEOUT_DEFAULT + ""));
killDataNodeTimeout = Long.parseLong(monkeyProps.getProperty(KILL_DATANODE_TIMEOUT_KEY,
KILL_DATANODE_TIMEOUT_DEFAULT + ""));
startDataNodeTimeout = Long.parseLong(monkeyProps.getProperty(START_DATANODE_TIMEOUT_KEY,
START_DATANODE_TIMEOUT_DEFAULT + ""));
killNameNodeTimeout = Long.parseLong(monkeyProps.getProperty(KILL_NAMENODE_TIMEOUT_KEY,
KILL_NAMENODE_TIMEOUT_DEFAULT + ""));
startNameNodeTimeout = Long.parseLong(monkeyProps.getProperty(START_NAMENODE_TIMEOUT_KEY,
START_NAMENODE_TIMEOUT_DEFAULT + ""));
skipMetaRS = Boolean.parseBoolean(monkeyProps.getProperty(MonkeyConstants.SKIP_META_RS,
MonkeyConstants.DEFAULT_SKIP_META_RS + ""));
}

public void perform() throws Exception { }
Expand All @@ -161,12 +147,6 @@ protected ServerName[] getCurrentServers() throws IOException {
ArrayList<ServerName> tmp = new ArrayList<>(count);
tmp.addAll(regionServers);
tmp.removeAll(masters);

if(skipMetaRS){
ServerName metaServer = cluster.getServerHoldingMeta();
tmp.remove(metaServer);
}

return tmp.toArray(new ServerName[tmp.size()]);
}

Expand Down Expand Up @@ -378,21 +358,11 @@ protected void modifyAllTableColumns(TableName tableName, Consumer<ColumnFamilyD
*/
public static class ActionContext {
private IntegrationTestingUtility util;
private Properties monkeyProps = null;

public ActionContext(IntegrationTestingUtility util) {
this.util = util;
}

public ActionContext(Properties monkeyProps, IntegrationTestingUtility util) {
this.util = util;
this.monkeyProps = monkeyProps;
}

public Properties getMonkeyProps(){
return monkeyProps;
}

public IntegrationTestingUtility getHBaseIntegrationTestingUtility() {
return util;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,6 @@ public RollingBatchRestartRsAction(long sleepTime, float ratio, int maxDeadServe
this.maxDeadServers = maxDeadServers;
}

public RollingBatchRestartRsAction(long sleepTime, float ratio, int maxDeadServers,
boolean skipMetaRS) {
this(sleepTime, ratio, maxDeadServers);
this.skipMetaRS = skipMetaRS;
}

enum KillOrStart {
KILL,
START
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.hbase.chaos.actions;

import java.util.List;

import org.apache.hadoop.hbase.ServerName;

/**
* Same as in {@link RollingBatchRestartRsAction} except that this action
* does not restart the region server holding the META table.
*/
public class RollingBatchRestartRsExceptMetaAction extends RollingBatchRestartRsAction {

public RollingBatchRestartRsExceptMetaAction(long sleepTime, float ratio, int maxDeadServers) {
super(sleepTime, ratio, maxDeadServers);
}

@Override
protected List<ServerName> selectServers() throws java.io.IOException {
ServerName metaServer = cluster.getServerHoldingMeta();
List<ServerName> servers = super.selectServers();
servers.remove(metaServer);
return servers;
};

}
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ public ChaosMonkey build() {
new DumpClusterStatusAction()
};

return new PolicyBasedChaosMonkey(properties, util,
return new PolicyBasedChaosMonkey(util,
new PeriodicRandomActionPolicy(action1Period, actions1),
new PeriodicRandomActionPolicy(action2Period, actions2));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,8 @@ public class DistributedIssuesMonkeyFactory extends MonkeyFactory {
// Action to log more info for debugging
Action[] actions2 = new Action[] {new DumpClusterStatusAction()};

return new PolicyBasedChaosMonkey(properties, util,
new PeriodicRandomActionPolicy(action1Period, actions1),
new PeriodicRandomActionPolicy(action2Period, actions2));
return new PolicyBasedChaosMonkey(util, new PeriodicRandomActionPolicy(action1Period, actions1),
new PeriodicRandomActionPolicy(action2Period, actions2));
}

private void loadProperties() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ public ChaosMonkey build() {
new DumpClusterStatusAction()
};

return new PolicyBasedChaosMonkey(properties, util,
return new PolicyBasedChaosMonkey(util,
new PeriodicRandomActionPolicy(action1Period, actions1),
new PeriodicRandomActionPolicy(action2Period, actions2));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ public class MobNoKillMonkeyFactory extends MonkeyFactory {

Action[] actions4 = new Action[] { new DumpClusterStatusAction() };

return new PolicyBasedChaosMonkey(properties, util,
return new PolicyBasedChaosMonkey(util,
new TwoConcurrentActionPolicy(MonkeyConstants.DEFAULT_PERIODIC_ACTION1_PERIOD, actions1,
actions2),
new PeriodicRandomActionPolicy(MonkeyConstants.DEFAULT_PERIODIC_ACTION2_PERIOD,actions3),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ public ChaosMonkey build() {
new DumpClusterStatusAction()
};

return new PolicyBasedChaosMonkey(properties, util,
return new PolicyBasedChaosMonkey(util,
new PeriodicRandomActionPolicy(action1Period, actions1),
new PeriodicRandomActionPolicy(action2Period, actions2),
new CompositeSequentialPolicy(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ public interface MonkeyConstants {
String GRACEFUL_RESTART_RS_SLEEP_TIME = "graceful.restart.rs.sleep.time";
String ROLLING_BATCH_SUSPEND_RS_SLEEP_TIME = "rolling.batch.suspend.rs.sleep.time";
String ROLLING_BATCH_SUSPEND_RS_RATIO = "rolling.batch.suspend.rs.ratio";
String SKIP_META_RS = "skip.meta.rs";
String CPU_LOAD_DURATION = "cpu.load.duration";
String CPU_LOAD_PROCESSES = "cpu.load.processes";
String NETWORK_ISSUE_COMMAND_TIMEOUT = "network.issue.command.timeout";
Expand All @@ -68,7 +67,7 @@ public interface MonkeyConstants {
*/
Set<String> MONKEY_CONFIGURATION_KEY_PREFIXES = new HashSet<>(
Arrays.asList("sdm.", "move.", "restart.", "batch.", "rolling.", "compact.", "unbalance.",
"decrease.", "decrease.", "graceful.", "cpu.", "network.", "fill.", "data.", "skip"));
"decrease.", "decrease.", "graceful.", "cpu.", "network.", "fill.", "data."));

long DEFAULT_PERIODIC_ACTION1_PERIOD = 60 * 1000;
long DEFAULT_PERIODIC_ACTION2_PERIOD = 90 * 1000;
Expand All @@ -95,7 +94,6 @@ public interface MonkeyConstants {
long DEFAULT_GRACEFUL_RESTART_RS_SLEEP_TIME = 5000;
long DEFAULT_ROLLING_BATCH_SUSPEND_RS_SLEEP_TIME = 30 * 1000;
float DEFAULT_ROLLING_BATCH_SUSPEND_RS_RATIO = 1.0f;
boolean DEFAULT_SKIP_META_RS = false;
long DEFAULT_CPU_LOAD_DURATION = 5 * 60 * 1000;
long DEFAULT_CPU_LOAD_PROCESSES = 2;
long DEFAULT_NETWORK_ISSUE_COMMAND_TIMEOUT = 30 * 1000;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ public class NoKillMonkeyFactory extends MonkeyFactory {
new DumpClusterStatusAction()
};

return new PolicyBasedChaosMonkey(properties, util,
return new PolicyBasedChaosMonkey(util,
new TwoConcurrentActionPolicy(MonkeyConstants.DEFAULT_PERIODIC_ACTION1_PERIOD, actions1, actions2),
new PeriodicRandomActionPolicy(MonkeyConstants.DEFAULT_PERIODIC_ACTION2_PERIOD,actions3),
new PeriodicRandomActionPolicy(MonkeyConstants.DEFAULT_PERIODIC_ACTION4_PERIOD,actions4));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
import org.apache.hadoop.hbase.chaos.actions.RestartRandomDataNodeAction;
import org.apache.hadoop.hbase.chaos.actions.RestartRandomRsExceptMetaAction;
import org.apache.hadoop.hbase.chaos.actions.RestartRandomZKNodeAction;
import org.apache.hadoop.hbase.chaos.actions.RollingBatchRestartRsAction;
import org.apache.hadoop.hbase.chaos.actions.RollingBatchRestartRsExceptMetaAction;
import org.apache.hadoop.hbase.chaos.actions.RollingBatchSuspendResumeRsAction;
import org.apache.hadoop.hbase.chaos.monkies.ChaosMonkey;
import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey;
Expand All @@ -52,8 +52,7 @@ public ChaosMonkey build() {
Action[] actions1 = new Action[]{
new RestartRandomRsExceptMetaAction(60000),
new RestartActiveMasterAction(5000),
new RollingBatchRestartRsAction(5000, 1.0f, 2,
true), // only allow 2 servers to be dead.
new RollingBatchRestartRsExceptMetaAction(5000, 1.0f, 2), // only allow 2 servers to be dead.
new ForceBalancerAction(),
new RestartRandomDataNodeAction(60000),
new RestartRandomZKNodeAction(60000),
Expand All @@ -67,7 +66,7 @@ public ChaosMonkey build() {
new DumpClusterStatusAction()
};

return new PolicyBasedChaosMonkey(properties, util,
return new PolicyBasedChaosMonkey(util,
new CompositeSequentialPolicy(
new DoActionsOncePolicy(60 * 1000, actions1),
new PeriodicRandomActionPolicy(60 * 1000, actions1)),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
import org.apache.hadoop.hbase.chaos.actions.GracefulRollingRestartRsAction;
import org.apache.hadoop.hbase.chaos.actions.RestartActiveMasterAction;
import org.apache.hadoop.hbase.chaos.actions.RestartRandomRsExceptMetaAction;
import org.apache.hadoop.hbase.chaos.actions.RollingBatchRestartRsAction;
import org.apache.hadoop.hbase.chaos.actions.RollingBatchRestartRsExceptMetaAction;
import org.apache.hadoop.hbase.chaos.actions.RollingBatchSuspendResumeRsAction;
import org.apache.hadoop.hbase.chaos.monkies.ChaosMonkey;
import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey;
Expand All @@ -50,8 +50,7 @@ public ChaosMonkey build() {
Action[] actions1 = new Action[] {
new RestartRandomRsExceptMetaAction(60000),
new RestartActiveMasterAction(5000),
new RollingBatchRestartRsAction(5000, 1.0f, 2,
true), //only allow 2 servers to be dead
new RollingBatchRestartRsExceptMetaAction(5000, 1.0f, 2), //only allow 2 servers to be dead
new ForceBalancerAction(),
new GracefulRollingRestartRsAction(gracefulRollingRestartTSSLeepTime),
new RollingBatchSuspendResumeRsAction(rollingBatchSuspendRSSleepTime,
Expand All @@ -63,7 +62,7 @@ public ChaosMonkey build() {
new DumpClusterStatusAction()
};

return new PolicyBasedChaosMonkey(properties, util,
return new PolicyBasedChaosMonkey(util,
new CompositeSequentialPolicy(
new DoActionsOncePolicy(60 * 1000, actions1),
new PeriodicRandomActionPolicy(60 * 1000, actions1)),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ public ChaosMonkey build() {
new DumpClusterStatusAction()
};

return new PolicyBasedChaosMonkey(properties, util,
return new PolicyBasedChaosMonkey(util,
new PeriodicRandomActionPolicy(action1Period, actions1),
new PeriodicRandomActionPolicy(action2Period, actions2),
new CompositeSequentialPolicy(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ public ChaosMonkey build() {
new DumpClusterStatusAction()
};

return new PolicyBasedChaosMonkey(properties, util,
return new PolicyBasedChaosMonkey(util,
new PeriodicRandomActionPolicy(90 * 1000, actions1),
new CompositeSequentialPolicy(
new DoActionsOncePolicy(90 * 1000, actions2),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ public ChaosMonkey build() {
new UnbalanceKillAndRebalanceAction(waitForUnbalanceMilliSec, waitForKillMilliSec,
waitAfterBalanceMilliSec, killMetaRs));

return new PolicyBasedChaosMonkey(properties, util, chaosPolicy);
return new PolicyBasedChaosMonkey(util, chaosPolicy);
}

private void loadProperties() {
Expand Down
Loading

0 comments on commit 4f76e24

Please sign in to comment.