-
Notifications
You must be signed in to change notification settings - Fork 8.7k
/
SCMContainerPlacementCapacity.java
140 lines (131 loc) · 5.73 KB
/
SCMContainerPlacementCapacity.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with this
* work for additional information regarding copyright ownership. The ASF
* licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package org.apache.hadoop.hdds.scm.container.placement.algorithms;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
import org.apache.hadoop.hdds.scm.container.placement.metrics.SCMNodeMetric;
import org.apache.hadoop.hdds.scm.exceptions.SCMException;
import org.apache.hadoop.hdds.scm.net.NetworkTopology;
import org.apache.hadoop.hdds.scm.node.NodeManager;
import com.google.common.annotations.VisibleForTesting;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Container placement policy that randomly choose datanodes with remaining
* space to satisfy the size constraints.
* <p>
* The Algorithm is as follows, Pick 2 random nodes from a given pool of nodes
* and then pick the node which lower utilization. This leads to a higher
* probability of nodes with lower utilization to be picked.
* <p>
* For those wondering why we choose two nodes randomly and choose the node
* with lower utilization. There are links to this original papers in
* HDFS-11564.
* <p>
* A brief summary -- We treat the nodes from a scale of lowest utilized to
* highest utilized, there are (s * ( s + 1)) / 2 possibilities to build
* distinct pairs of nodes. There are s - k pairs of nodes in which the rank
* k node is less than the couple. So probability of a picking a node is
* (2 * (s -k)) / (s * (s - 1)).
* <p>
* In English, There is a much higher probability of picking less utilized nodes
* as compared to nodes with higher utilization since we pick 2 nodes and
* then pick the node with lower utilization.
* <p>
* This avoids the issue of users adding new nodes into the cluster and HDFS
* sending all traffic to those nodes if we only use a capacity based
* allocation scheme. Unless those nodes are part of the set of the first 2
* nodes then newer nodes will not be in the running to get the container.
* <p>
* This leads to an I/O pattern where the lower utilized nodes are favoured
* more than higher utilized nodes, but part of the I/O will still go to the
* older higher utilized nodes.
* <p>
* With this algorithm in place, our hope is that balancer tool needs to do
* little or no work and the cluster will achieve a balanced distribution
* over time.
*/
public final class SCMContainerPlacementCapacity extends SCMCommonPolicy {
@VisibleForTesting
static final Logger LOG =
LoggerFactory.getLogger(SCMContainerPlacementCapacity.class);
/**
* Constructs a Container Placement with considering only capacity.
* That is this policy tries to place containers based on node weight.
*
* @param nodeManager Node Manager
* @param conf Configuration
*/
public SCMContainerPlacementCapacity(final NodeManager nodeManager,
final Configuration conf, final NetworkTopology networkTopology,
final boolean fallback) {
super(nodeManager, conf);
}
/**
* Called by SCM to choose datanodes.
*
*
* @param excludedNodes - list of the datanodes to exclude.
* @param favoredNodes - list of nodes preferred.
* @param nodesRequired - number of datanodes required.
* @param sizeRequired - size required for the container or block.
* @return List of datanodes.
* @throws SCMException SCMException
*/
@Override
public List<DatanodeDetails> chooseDatanodes(
List<DatanodeDetails> excludedNodes, List<DatanodeDetails> favoredNodes,
final int nodesRequired, final long sizeRequired) throws SCMException {
List<DatanodeDetails> healthyNodes = super.chooseDatanodes(excludedNodes,
favoredNodes, nodesRequired, sizeRequired);
if (healthyNodes.size() == nodesRequired) {
return healthyNodes;
}
return getResultSet(nodesRequired, healthyNodes);
}
/**
* Find a node from the healthy list and return it after removing it from the
* list that we are operating on.
*
* @param healthyNodes - List of healthy nodes that meet the size
* requirement.
* @return DatanodeDetails that is chosen.
*/
@Override
public DatanodeDetails chooseNode(List<DatanodeDetails> healthyNodes) {
int firstNodeNdx = getRand().nextInt(healthyNodes.size());
int secondNodeNdx = getRand().nextInt(healthyNodes.size());
DatanodeDetails datanodeDetails;
// There is a possibility that both numbers will be same.
// if that is so, we just return the node.
if (firstNodeNdx == secondNodeNdx) {
datanodeDetails = healthyNodes.get(firstNodeNdx);
} else {
DatanodeDetails firstNodeDetails = healthyNodes.get(firstNodeNdx);
DatanodeDetails secondNodeDetails = healthyNodes.get(secondNodeNdx);
SCMNodeMetric firstNodeMetric =
getNodeManager().getNodeStat(firstNodeDetails);
SCMNodeMetric secondNodeMetric =
getNodeManager().getNodeStat(secondNodeDetails);
datanodeDetails = firstNodeMetric.isGreater(secondNodeMetric.get())
? firstNodeDetails : secondNodeDetails;
}
healthyNodes.remove(datanodeDetails);
return datanodeDetails;
}
}