Skip to content

Commit 189a63a

Browse files
committed
YARN-3434. Interaction between reservations and userlimit can result in significant ULF violation
1 parent baf8bc6 commit 189a63a

File tree

5 files changed

+186
-166
lines changed

5 files changed

+186
-166
lines changed

hadoop-yarn-project/CHANGES.txt

Lines changed: 3 additions & 0 deletions
Original file line numberOriginal file lineDiff line numberDiff line change
@@ -252,6 +252,9 @@ Release 2.8.0 - UNRELEASED
252
YARN-3495. Confusing log generated by FairScheduler.
252
YARN-3495. Confusing log generated by FairScheduler.
253
(Brahma Reddy Battula via ozawa)
253
(Brahma Reddy Battula via ozawa)
254

254

255+
YARN-3434. Interaction between reservations and userlimit can result in
256+
significant ULF violation (tgraves)
257+
255
Release 2.7.1 - UNRELEASED
258
Release 2.7.1 - UNRELEASED
256

259

257
INCOMPATIBLE CHANGES
260
INCOMPATIBLE CHANGES

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/ResourceLimits.java

Lines changed: 25 additions & 3 deletions
Original file line numberOriginal file lineDiff line numberDiff line change
@@ -19,22 +19,44 @@
19
package org.apache.hadoop.yarn.server.resourcemanager.scheduler;
19
package org.apache.hadoop.yarn.server.resourcemanager.scheduler;
20

20

21
import org.apache.hadoop.yarn.api.records.Resource;
21
import org.apache.hadoop.yarn.api.records.Resource;
22+
import org.apache.hadoop.yarn.util.resource.Resources;
22

23

23
/**
24
/**
24
* Resource limits for queues/applications, this means max overall (please note
25
* Resource limits for queues/applications, this means max overall (please note
25
* that, it's not "extra") resource you can get.
26
* that, it's not "extra") resource you can get.
26
*/
27
*/
27
public class ResourceLimits {
28
public class ResourceLimits {
29+
volatile Resource limit;
30+
31+
// This is special limit that goes with the RESERVE_CONT_LOOK_ALL_NODES
32+
// config. This limit indicates how much we need to unreserve to allocate
33+
// another container.
34+
private volatile Resource amountNeededUnreserve;
35+
28
public ResourceLimits(Resource limit) {
36
public ResourceLimits(Resource limit) {
37+
this.amountNeededUnreserve = Resources.none();
29
this.limit = limit;
38
this.limit = limit;
30
}
39
}
31-
40+
32-
volatile Resource limit;
41+
public ResourceLimits(Resource limit, Resource amountNeededUnreserve) {
42+
this.amountNeededUnreserve = amountNeededUnreserve;
43+
this.limit = limit;
44+
}
45+
33
public Resource getLimit() {
46
public Resource getLimit() {
34
return limit;
47
return limit;
35
}
48
}
36-
49+
50+
public Resource getAmountNeededUnreserve() {
51+
return amountNeededUnreserve;
52+
}
53+
37
public void setLimit(Resource limit) {
54
public void setLimit(Resource limit) {
38
this.limit = limit;
55
this.limit = limit;
39
}
56
}
57+
58+
public void setAmountNeededUnreserve(Resource amountNeededUnreserve) {
59+
this.amountNeededUnreserve = amountNeededUnreserve;
60+
}
61+
40
}
62
}

hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractCSQueue.java

Lines changed: 47 additions & 47 deletions
Original file line numberOriginal file lineDiff line numberDiff line change
@@ -85,7 +85,7 @@ public abstract class AbstractCSQueue implements CSQueue {
85
// Track capacities like used-capcity/abs-used-capacity/capacity/abs-capacity,
85
// Track capacities like used-capcity/abs-used-capacity/capacity/abs-capacity,
86
// etc.
86
// etc.
87
QueueCapacities queueCapacities;
87
QueueCapacities queueCapacities;
88-
88+
89
private final RecordFactory recordFactory =
89
private final RecordFactory recordFactory =
90
RecordFactoryProvider.getRecordFactory(null);
90
RecordFactoryProvider.getRecordFactory(null);
91
protected CapacitySchedulerContext csContext;
91
protected CapacitySchedulerContext csContext;
@@ -473,55 +473,55 @@ synchronized boolean canAssignToThisQueue(Resource clusterResource,
473
getCurrentLimitResource(nodePartition, clusterResource,
473
getCurrentLimitResource(nodePartition, clusterResource,
474
currentResourceLimits, schedulingMode);
474
currentResourceLimits, schedulingMode);
475

475

476-
// if reservation continous looking enabled, check to see if could we
477-
// potentially use this node instead of a reserved node if the application
478-
// has reserved containers.
479-
// TODO, now only consider reservation cases when the node has no label
480-
if (this.reservationsContinueLooking
481-
&& nodePartition.equals(RMNodeLabelsManager.NO_LABEL)
482-
&& Resources.greaterThan(resourceCalculator, clusterResource,
483-
resourceCouldBeUnreserved, Resources.none())) {
484-
// resource-without-reserved = used - reserved
485-
Resource newTotalWithoutReservedResource =
486-
Resources.subtract(newTotalResource, resourceCouldBeUnreserved);
487-
488-
// when total-used-without-reserved-resource < currentLimit, we still
489-
// have chance to allocate on this node by unreserving some containers
490-
if (Resources.lessThan(resourceCalculator, clusterResource,
491-
newTotalWithoutReservedResource, currentLimitResource)) {
492-
if (LOG.isDebugEnabled()) {
493-
LOG.debug("try to use reserved: " + getQueueName()
494-
+ " usedResources: " + queueUsage.getUsed()
495-
+ ", clusterResources: " + clusterResource
496-
+ ", reservedResources: " + resourceCouldBeUnreserved
497-
+ ", capacity-without-reserved: "
498-
+ newTotalWithoutReservedResource + ", maxLimitCapacity: "
499-
+ currentLimitResource);
500-
}
501-
return true;
502-
}
503-
}
504-
505-
// Check if we over current-resource-limit computed.
506
if (Resources.greaterThan(resourceCalculator, clusterResource,
476
if (Resources.greaterThan(resourceCalculator, clusterResource,
507
newTotalResource, currentLimitResource)) {
477
newTotalResource, currentLimitResource)) {
508-
return false;
509-
}
510

478

511-
if (LOG.isDebugEnabled()) {
479+
// if reservation continous looking enabled, check to see if could we
512-
LOG.debug(getQueueName()
480+
// potentially use this node instead of a reserved node if the application
513-
+ "Check assign to queue, nodePartition="
481+
// has reserved containers.
514-
+ nodePartition
482+
// TODO, now only consider reservation cases when the node has no label
515-
+ " usedResources: "
483+
if (this.reservationsContinueLooking
516-
+ queueUsage.getUsed(nodePartition)
484+
&& nodePartition.equals(RMNodeLabelsManager.NO_LABEL)
517-
+ " clusterResources: "
485+
&& Resources.greaterThan(resourceCalculator, clusterResource,
518-
+ clusterResource
486+
resourceCouldBeUnreserved, Resources.none())) {
519-
+ " currentUsedCapacity "
487+
// resource-without-reserved = used - reserved
520-
+ Resources.divide(resourceCalculator, clusterResource,
488+
Resource newTotalWithoutReservedResource =
521-
queueUsage.getUsed(nodePartition),
489+
Resources.subtract(newTotalResource, resourceCouldBeUnreserved);
522-
labelManager.getResourceByLabel(nodePartition, clusterResource))
490+
523-
+ " max-capacity: "
491+
// when total-used-without-reserved-resource < currentLimit, we still
524-
+ queueCapacities.getAbsoluteMaximumCapacity(nodePartition) + ")");
492+
// have chance to allocate on this node by unreserving some containers
493+
if (Resources.lessThan(resourceCalculator, clusterResource,
494+
newTotalWithoutReservedResource, currentLimitResource)) {
495+
if (LOG.isDebugEnabled()) {
496+
LOG.debug("try to use reserved: " + getQueueName()
497+
+ " usedResources: " + queueUsage.getUsed()
498+
+ ", clusterResources: " + clusterResource
499+
+ ", reservedResources: " + resourceCouldBeUnreserved
500+
+ ", capacity-without-reserved: "
501+
+ newTotalWithoutReservedResource + ", maxLimitCapacity: "
502+
+ currentLimitResource);
503+
}
504+
currentResourceLimits.setAmountNeededUnreserve(Resources.subtract(newTotalResource,
505+
currentLimitResource));
506+
return true;
507+
}
508+
}
509+
if (LOG.isDebugEnabled()) {
510+
LOG.debug(getQueueName()
511+
+ "Check assign to queue, nodePartition="
512+
+ nodePartition
513+
+ " usedResources: "
514+
+ queueUsage.getUsed(nodePartition)
515+
+ " clusterResources: "
516+
+ clusterResource
517+
+ " currentUsedCapacity "
518+
+ Resources.divide(resourceCalculator, clusterResource,
519+
queueUsage.getUsed(nodePartition),
520+
labelManager.getResourceByLabel(nodePartition, clusterResource))
521+
+ " max-capacity: "
522+
+ queueCapacities.getAbsoluteMaximumCapacity(nodePartition) + ")");
523+
}
524+
return false;
525
}
525
}
526
return true;
526
return true;
527
}
527
}

0 commit comments

Comments
 (0)