diff --git a/src/java/voldemort/client/rebalance/RebalanceController.java b/src/java/voldemort/client/rebalance/RebalanceController.java index 88057b6d5e..7dafa74efe 100644 --- a/src/java/voldemort/client/rebalance/RebalanceController.java +++ b/src/java/voldemort/client/rebalance/RebalanceController.java @@ -46,7 +46,10 @@ import com.google.common.collect.Lists; -// TODO: javadoc header needed +/** + * Executes a RebalancePlan. + * + */ public class RebalanceController { // TODO: Remove server side "optimization" that does not bother to steal @@ -153,8 +156,8 @@ public void rebalance(final RebalancePlan rebalancePlan) { validateCluster(finalCluster, finalStores); logger.info("Propagating cluster " + finalCluster + " to all nodes"); - // TODO: Add finalStores here so that cluster & stores can be updated - // atomically. + // TODO: (atomic cluster/stores update) Add finalStores here so that + // cluster & stores can be updated atomically. Need to rebase first. RebalanceUtils.propagateCluster(adminClient, finalCluster); executePlan(rebalancePlan); @@ -270,7 +273,15 @@ private void batchStatusLog(int batchCount, RebalanceUtils.printLog(batchCount, logger, sb.toString()); } - // TODO: Add javadoc. + /** + * Executes a batch plan. + * + * @param batchCount Used as the ID of the batch plan. This allows related + * tasks on client- & server-side to pretty print messages in a + * manner that debugging can track specific batch plans across the + * cluster. + * @param batchPlan The batch plan... + */ private void executeBatch(int batchCount, final RebalanceBatchPlan batchPlan) { final Cluster batchCurrentCluster = batchPlan.getCurrentCluster(); final Cluster batchFinalCluster = batchPlan.getFinalCluster(); @@ -469,10 +480,11 @@ private void rebalanceStateChange(final int taskId, } } - // TODO: Fix this javadoc comment. Break this into multiple "sub" methods? - // AFAIK, this method either does the RO stores or the RW stores in a batch. - // I.e., there are at most 2 sub-batches for any given batch. And, in - // practice, there is one sub-batch that is either RO or RW. + // TODO: (refactor) Break this state-machine like method into multiple "sub" + // methods. AFAIK, this method either does the RO stores or the RW stores in + // a batch. I.e., there are at most 2 sub-batches for any given batch. And, + // in practice, there is one sub-batch that is either RO or RW. + // TODO: Fix the javadoc comment to be more easily understood. /** * The smallest granularity of rebalancing where-in we move partitions for a * sub-set of stores. Finally at the end of the movement, the node is @@ -631,12 +643,11 @@ private List executeTasks(final int taskId, HashMap> donorNodeBasedPartitionsInfo = RebalanceUtils.groupPartitionsInfoByNode(rebalancePartitionPlanList, false); for(Entry> entries: donorNodeBasedPartitionsInfo.entrySet()) { - // TODO: Can this sleep be removed? - /*- - try { - Thread.sleep(10000); - } catch(InterruptedException e) {} - */ + // At some point, a 10 second sleep was added here to help with + // a race condition. Leaving this comment here in case, at some + // point in the future, we need to hack around some race + // condition: + // Thread.sleep(10000); DonorBasedRebalanceTask rebalanceTask = new DonorBasedRebalanceTask(taskId, entries.getValue(), rebalancingClientTimeoutSeconds, diff --git a/src/java/voldemort/client/rebalance/task/StealerBasedRebalanceTask.java b/src/java/voldemort/client/rebalance/task/StealerBasedRebalanceTask.java index 4018010a7e..7f2df14efe 100644 --- a/src/java/voldemort/client/rebalance/task/StealerBasedRebalanceTask.java +++ b/src/java/voldemort/client/rebalance/task/StealerBasedRebalanceTask.java @@ -27,7 +27,8 @@ public class StealerBasedRebalanceTask extends RebalanceTask { private static final Logger logger = Logger.getLogger(StealerBasedRebalanceTask.class); private final int stealerNodeId; - // TODO: What is the use of this parameter!?!?!?!?! + // TODO: What is the use of maxTries for stealer-based tasks? Need to + // validate reason for existence or remove. private final int maxTries; public StealerBasedRebalanceTask(final int taskId, @@ -74,6 +75,7 @@ private int startNodeRebalancing() { rebalanceException); } + @Override public void run() { int rebalanceAsyncId = INVALID_REBALANCE_ID; diff --git a/src/java/voldemort/tools/RebalancePlanCLI.java b/src/java/voldemort/tools/RebalancePlanCLI.java index f473f14769..a18f352b41 100644 --- a/src/java/voldemort/tools/RebalancePlanCLI.java +++ b/src/java/voldemort/tools/RebalancePlanCLI.java @@ -125,7 +125,7 @@ private static OptionSet getValidOptions(String[] args) { return options; } - // TODO: Rename target-cluster target-stores to final-* + // TODO: (refactor) Rename target-cluster target-stores to final-* public static void main(String[] args) throws Exception { setupParser(); OptionSet options = getValidOptions(args); diff --git a/test/unit/voldemort/client/rebalance/AbstractNonZonedRebalanceTest.java b/test/unit/voldemort/client/rebalance/AbstractNonZonedRebalanceTest.java index 2efadf039c..90ab3a55e1 100644 --- a/test/unit/voldemort/client/rebalance/AbstractNonZonedRebalanceTest.java +++ b/test/unit/voldemort/client/rebalance/AbstractNonZonedRebalanceTest.java @@ -240,7 +240,8 @@ public void testRORWRebalance() throws Exception { targetCluster = updateCluster(targetCluster); // TODO: make helper method(s) (possibly at AbstractREbalanceTest - // level) that constructs appropriate controller & plan. + // level) that constructs appropriate controller & plan. There is a + // fair bit of cut-and-pasted coding among these tests... String bootstrapUrl = getBootstrapUrl(currentCluster, 0); int maxParallel = RebalanceController.MAX_PARALLEL_REBALANCING; int maxTries = RebalanceController.MAX_TRIES_REBALANCING; @@ -483,7 +484,6 @@ public void testRebalanceCleanPrimary() throws Exception { int maxTries = RebalanceController.MAX_TRIES_REBALANCING; long timeout = RebalanceController.REBALANCING_CLIENT_TIMEOUT_SEC; boolean stealerBased = !useDonorBased; - boolean deleteAfter = false; RebalanceController rebalanceClient = new RebalanceController(bootstrapUrl, maxParallel, maxTries, @@ -914,6 +914,7 @@ public void run() { // TODO: Fix this test. // TODO: fix this test. Need to take the plan into account to correctly do // tests. + // TODO: Confirm that this test passes after rebasing with Vinoth's fixes. @Test(timeout = 600000) public void testProxyPutDuringRebalancing() throws Exception { System.err.println("testProxyPutDuringRebalancing is currently failing (intermittently)?"); diff --git a/test/unit/voldemort/client/rebalance/AbstractRebalanceTest.java b/test/unit/voldemort/client/rebalance/AbstractRebalanceTest.java index c329952270..3361cccfd6 100644 --- a/test/unit/voldemort/client/rebalance/AbstractRebalanceTest.java +++ b/test/unit/voldemort/client/rebalance/AbstractRebalanceTest.java @@ -74,7 +74,7 @@ public AbstractRebalanceTest(boolean useNio, boolean useDonorBased) { // This method is susceptible to BindException issues due to TOCTOU // problem with getLocalCluster (which is used to construct cluster that is // passed in). - // TODO: Refactor AbstractRebalanceTest to take advantage of + // TODO: (refactor) AbstractRebalanceTest to take advantage of // ServerTestUtils.startVoldemortCluster. protected Cluster startServers(Cluster cluster, String storeXmlFile, @@ -176,7 +176,13 @@ protected String getBootstrapUrl(Cluster cluster, int nodeId) { return "tcp://" + node.getHost() + ":" + node.getSocketPort(); } - // TODO: Add javadoc + /** + * Does the rebalance and then checks that it succeeded. + * + * @param rebalancePlan + * @param rebalanceClient + * @param nodeCheckList + */ protected void rebalanceAndCheck(RebalancePlan rebalancePlan, RebalanceController rebalanceClient, List nodeCheckList) { @@ -189,7 +195,19 @@ protected void rebalanceAndCheck(RebalancePlan rebalancePlan, null); } - // TODO: change from storeDefs to currentStoreDefs and finalStoreDefs to + /** + * Makes sure that all expected partition-stores are on each server after + * the rebalance. + * + * @param currentCluster + * @param targetCluster + * @param storeDefs + * @param nodeCheckList + * @param baselineTuples + * @param baselineVersions + */ + // TODO: (atomic cluster/store update) change from storeDefs to + // currentStoreDefs and finalStoreDefs to // handle zone expansion/shrink tests. protected void checkEntriesPostRebalance(Cluster currentCluster, Cluster targetCluster,