Skip to content

Commit

Permalink
Implement POC work greedy based constraint algorithm
Browse files Browse the repository at this point in the history
  • Loading branch information
i3wangyi committed Aug 19, 2019
1 parent 9bb8eec commit ae105fc
Show file tree
Hide file tree
Showing 8 changed files with 334 additions and 84 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
package org.apache.helix.controller.rebalancer.waged.algorithm;

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.function.Function;
import java.util.stream.Collectors;

import org.apache.helix.controller.rebalancer.waged.constraints.HardConstraint;
import org.apache.helix.controller.rebalancer.waged.constraints.SoftConstraint;
import org.apache.helix.controller.rebalancer.waged.model.AssignableNode;
import org.apache.helix.controller.rebalancer.waged.model.AssignableReplica;
import org.apache.helix.controller.rebalancer.waged.model.ClusterContext;
import org.apache.helix.controller.rebalancer.waged.model.ClusterModel;
import org.apache.helix.controller.rebalancer.waged.model.OptimalAssignment;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* The algorithm is based on a given set of constraints
* - HardConstraint: Approve or deny the assignment given its condition, any assignment cannot
* bypass any "hard constraint"
* - SoftConstraint: Evaluate the assignment by points/rewards/scores, a higher point means a better
* assignment
* <p>
* The goal is to accumulate the most points(rewards) from "soft constraints" while avoiding all
* "hard constraints"
*/
public class ConstraintBasedAlgorithm implements RebalanceAlgorithm {
private static final Logger LOG = LoggerFactory.getLogger(ConstraintBasedAlgorithm.class);
private final List<HardConstraint> _hardConstraints;
private final List<SoftConstraint> _softConstraints;

public ConstraintBasedAlgorithm(List<HardConstraint> hardConstraints,
List<SoftConstraint> softConstraints) {
_hardConstraints = hardConstraints;
_softConstraints = softConstraints;
}

@Override
public OptimalAssignment calculate(ClusterModel clusterModel) {
ClusterContext clusterContext = clusterModel.getContext();
OptimalAssignment optimalAssignment = new OptimalAssignment(clusterContext);
Map<String, Set<AssignableReplica>> replicasByResource = clusterModel.getAssignableReplicaMap();
List<AssignableNode> nodes = new ArrayList<>(clusterModel.getAssignableNodes().values());

for (String resource : replicasByResource.keySet()) {
for (AssignableReplica replica : replicasByResource.get(resource)) {
Optional<AssignableNode> maybeBestNode =
getNodeWithHighestPoints(replica, nodes, optimalAssignment);
// stop immediately if any replica cannot find best assignable node
if (optimalAssignment.hasAnyFailure()) {
LOG.error(
"Unable to find any available candidate node for partition {}; Fail reasons: {}",
replica.getPartitionName(), optimalAssignment.getFailures());
return optimalAssignment;
}
maybeBestNode.ifPresent(node -> {
optimalAssignment.addAssignment(replica, node);
clusterModel.assign(replica.getResourceName(),
replica.getPartitionName(), replica.getReplicaState(), node.getInstanceName());
});
}
}

return optimalAssignment;
}

private Optional<AssignableNode> getNodeWithHighestPoints(AssignableReplica replica,
List<AssignableNode> assignableNodes, OptimalAssignment optimalAssignment) {
Map<AssignableNode, List<HardConstraint>> hardConstraintFailures = new HashMap<>();
List<AssignableNode> candidateNodes = assignableNodes.stream().filter(candidateNode -> {
boolean isValid = true;
// evaluate all hard constraints and record all the failure reasons why one assignment fails
for (HardConstraint hardConstraint : _hardConstraints) {
if (!hardConstraint.isAssignmentValid(candidateNode, replica,
optimalAssignment.getClusterContext())) {
hardConstraintFailures.computeIfAbsent(candidateNode, node -> new ArrayList<>())
.add(hardConstraint);
isValid = false;
}
}
return isValid;
}).collect(Collectors.toList());
if (candidateNodes.isEmpty()) {
optimalAssignment.trackAssignmentFailure(replica, hardConstraintFailures);
return Optional.empty();
}

Function<AssignableNode, Double> calculatePoints =
(candidateNode) -> _softConstraints.stream().map(softConstraint -> softConstraint
.assignmentScore(candidateNode, replica, optimalAssignment.getClusterContext()))
.mapToDouble(score -> score).sum();

return candidateNodes.stream().max(Comparator.comparing(calculatePoints));
}
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package org.apache.helix.controller.rebalancer.waged;
package org.apache.helix.controller.rebalancer.waged.algorithm;

/*
* Licensed to the Apache Software Foundation (ASF) under one
Expand All @@ -19,27 +19,21 @@
* under the License.
*/

import org.apache.helix.controller.rebalancer.waged.constraints.HardConstraint;
import org.apache.helix.controller.rebalancer.waged.model.ClusterModel;
import org.apache.helix.model.ResourceAssignment;

import java.util.Map;
import org.apache.helix.controller.rebalancer.waged.model.OptimalAssignment;

/**
* A generic rebalance algorithm interface for the WAGED rebalancer.
*
* @see <a href="Rebalance Algorithm">https://github.com/apache/helix/wiki/Design-Proposal---Weight-Aware-Globally-Even-Distribute-Rebalancer#rebalance-algorithm-adapter</a>
* @see <a
* href="Rebalance Algorithm">https://github.com/apache/helix/wiki/Design-Proposal---Weight-Aware-Globally-Even-Distribute-Rebalancer#rebalance-algorithm-adapter</a>
*/
public interface RebalanceAlgorithm {

/**
* Rebalance the Helix resource partitions based on the input cluster model.
*
* @param clusterModel
* @param failureReasons Return the failures <ResourceName, <FailureReason, Count>> that happen during the rebalance calculation.
* If the map is null, no failure will be returned.
* @return A map of <ResourceName, ResourceAssignment>.
* @param clusterModel The run time cluster model that contains all necessary information
* @return An instance of {@link OptimalAssignment}
*/
Map<String, ResourceAssignment> rebalance(ClusterModel clusterModel,
Map<String, Map<HardConstraint.FailureReason, Integer>> failureReasons);
OptimalAssignment calculate(ClusterModel clusterModel);
}

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ public class ClusterContext {
_estimatedMaxTopStateCount = estimateAvgReplicaCount(totalTopStateReplicas, instanceCount);
}


public Map<String, Map<String, Set<String>>> getAssignmentForFaultZoneMap() {
return _assignmentForFaultZoneMap;
}
Expand All @@ -92,6 +93,11 @@ public Set<String> getPartitionsForResourceAndFaultZone(String resourceName, Str
.getOrDefault(resourceName, Collections.emptySet());
}

void addAssignment(AssignableNode assignableNode, AssignableReplica assignableReplica) {
assignableNode.assign(assignableReplica);
addPartitionToFaultZone(assignableNode.getFaultZone(), assignableReplica.getResourceName(), assignableReplica.getPartitionName());
}

void addPartitionToFaultZone(String faultZoneId, String resourceName, String partition) {
if (!_assignmentForFaultZoneMap.computeIfAbsent(faultZoneId, k -> new HashMap<>())
.computeIfAbsent(resourceName, k -> new HashSet<>()).add(partition)) {
Expand All @@ -115,4 +121,4 @@ private int estimateAvgReplicaCount(int replicaCount, int instanceCount) {
return (int) Math
.ceil((float) replicaCount / instanceCount * ERROR_MARGIN_FOR_ESTIMATED_MAX_COUNT);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,22 +19,23 @@
* under the License.
*/

import org.apache.helix.HelixException;
import org.apache.helix.model.ResourceAssignment;

import java.util.Collections;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;

import org.apache.helix.HelixException;
import org.apache.helix.model.ResourceAssignment;

/**
* This class wraps the required input for the rebalance algorithm.
*/
public class ClusterModel {
private final ClusterContext _clusterContext;
// Map to track all the assignable replications. <Resource Name, Set<Replicas>>
private final Map<String, Set<AssignableReplica>> _assignableReplicaMap;
// The index to find the replication information with a certain state. <Resource, <Key(resource_partition_state), Replica>>
// The index to find the replication information with a certain state. <Resource,
// <Key(resource_partition_state), Replica>>
// Note that the identical replicas are deduped in the index.
private final Map<String, Map<String, AssignableReplica>> _assignableReplicaIndex;
private final Map<String, AssignableNode> _assignableNodeMap;
Expand All @@ -46,11 +47,12 @@ public class ClusterModel {
private final Map<String, ResourceAssignment> _bestPossibleAssignment;

/**
* @param clusterContext The initialized cluster context.
* @param assignableReplicas The replicas to be assigned.
* Note that the replicas in this list shall not be included while initializing the context and assignable nodes.
* @param assignableNodes The active instances.
* @param baselineAssignment The recorded baseline assignment.
* @param clusterContext The initialized cluster context.
* @param assignableReplicas The replicas to be assigned.
* Note that the replicas in this list shall not be included while initializing the
* context and assignable nodes.
* @param assignableNodes The active instances.
* @param baselineAssignment The recorded baseline assignment.
* @param bestPossibleAssignment The current best possible assignment.
*/
ClusterModel(ClusterContext clusterContext, Set<AssignableReplica> assignableReplicas,
Expand All @@ -62,11 +64,13 @@ public class ClusterModel {
_assignableReplicaMap = assignableReplicas.stream()
.collect(Collectors.groupingBy(AssignableReplica::getResourceName, Collectors.toSet()));

// Index all the replicas to be assigned. Dedup the replica if two instances have the same resource/partition/state
_assignableReplicaIndex = assignableReplicas.stream().collect(Collectors
.groupingBy(AssignableReplica::getResourceName, Collectors
.toMap(AssignableReplica::toString, replica -> replica,
(oldValue, newValue) -> oldValue)));
// Index all the replicas to be assigned. Dedup the replica if two instances have the same
// resource/partition/state
_assignableReplicaIndex =
assignableReplicas.stream()
.collect(Collectors.groupingBy(AssignableReplica::getResourceName,
Collectors.toMap(AssignableReplica::toString, replica -> replica,
(oldValue, newValue) -> oldValue)));

_assignableNodeMap = assignableNodes.stream()
.collect(Collectors.toMap(AssignableNode::getInstanceName, node -> node));
Expand Down Expand Up @@ -96,9 +100,9 @@ public Map<String, ResourceAssignment> getBestPossibleAssignment() {
}

/**
* Assign the given replica to the specified instance and record the assignment in the cluster model.
* Assign the given replica to the specified instance and record the assignment in the cluster
* model.
* The cluster usage information will be updated accordingly.
*
* @param resourceName
* @param partitionName
* @param state
Expand All @@ -115,7 +119,6 @@ public void assign(String resourceName, String partitionName, String state, Stri
/**
* Revert the proposed assignment from the cluster model.
* The cluster usage information will be updated accordingly.
*
* @param resourceName
* @param partitionName
* @param state
Expand Down Expand Up @@ -144,9 +147,9 @@ private AssignableReplica locateAssignableReplica(String resourceName, String pa
_assignableReplicaIndex.getOrDefault(resourceName, Collections.emptyMap())
.get(AssignableReplica.generateReplicaKey(resourceName, partitionName, state));
if (sampleReplica == null) {
throw new HelixException(String
.format("Cannot find the replication with resource name %s, partition name %s, state %s.",
resourceName, partitionName, state));
throw new HelixException(String.format(
"Cannot find the replication with resource name %s, partition name %s, state %s.",
resourceName, partitionName, state));
}
return sampleReplica;
}
Expand Down
Loading

0 comments on commit ae105fc

Please sign in to comment.