Skip to content

Commit

Permalink
YARN-3434. Interaction between reservations and userlimit can result …
Browse files Browse the repository at this point in the history
…in significant ULF violation
  • Loading branch information
tgravescs committed Apr 23, 2015
1 parent baf8bc6 commit 189a63a
Show file tree
Hide file tree
Showing 5 changed files with 186 additions and 166 deletions.
3 changes: 3 additions & 0 deletions hadoop-yarn-project/CHANGES.txt
Expand Up @@ -252,6 +252,9 @@ Release 2.8.0 - UNRELEASED
YARN-3495. Confusing log generated by FairScheduler. YARN-3495. Confusing log generated by FairScheduler.
(Brahma Reddy Battula via ozawa) (Brahma Reddy Battula via ozawa)


YARN-3434. Interaction between reservations and userlimit can result in
significant ULF violation (tgraves)

Release 2.7.1 - UNRELEASED Release 2.7.1 - UNRELEASED


INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES
Expand Down
Expand Up @@ -19,22 +19,44 @@
package org.apache.hadoop.yarn.server.resourcemanager.scheduler; package org.apache.hadoop.yarn.server.resourcemanager.scheduler;


import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.util.resource.Resources;


/** /**
* Resource limits for queues/applications, this means max overall (please note * Resource limits for queues/applications, this means max overall (please note
* that, it's not "extra") resource you can get. * that, it's not "extra") resource you can get.
*/ */
public class ResourceLimits { public class ResourceLimits {
volatile Resource limit;

// This is special limit that goes with the RESERVE_CONT_LOOK_ALL_NODES
// config. This limit indicates how much we need to unreserve to allocate
// another container.
private volatile Resource amountNeededUnreserve;

public ResourceLimits(Resource limit) { public ResourceLimits(Resource limit) {
this.amountNeededUnreserve = Resources.none();
this.limit = limit; this.limit = limit;
} }


volatile Resource limit; public ResourceLimits(Resource limit, Resource amountNeededUnreserve) {
this.amountNeededUnreserve = amountNeededUnreserve;
this.limit = limit;
}

public Resource getLimit() { public Resource getLimit() {
return limit; return limit;
} }


public Resource getAmountNeededUnreserve() {
return amountNeededUnreserve;
}

public void setLimit(Resource limit) { public void setLimit(Resource limit) {
this.limit = limit; this.limit = limit;
} }

public void setAmountNeededUnreserve(Resource amountNeededUnreserve) {
this.amountNeededUnreserve = amountNeededUnreserve;
}

} }
Expand Up @@ -85,7 +85,7 @@ public abstract class AbstractCSQueue implements CSQueue {
// Track capacities like used-capcity/abs-used-capacity/capacity/abs-capacity, // Track capacities like used-capcity/abs-used-capacity/capacity/abs-capacity,
// etc. // etc.
QueueCapacities queueCapacities; QueueCapacities queueCapacities;

private final RecordFactory recordFactory = private final RecordFactory recordFactory =
RecordFactoryProvider.getRecordFactory(null); RecordFactoryProvider.getRecordFactory(null);
protected CapacitySchedulerContext csContext; protected CapacitySchedulerContext csContext;
Expand Down Expand Up @@ -473,55 +473,55 @@ synchronized boolean canAssignToThisQueue(Resource clusterResource,
getCurrentLimitResource(nodePartition, clusterResource, getCurrentLimitResource(nodePartition, clusterResource,
currentResourceLimits, schedulingMode); currentResourceLimits, schedulingMode);


// if reservation continous looking enabled, check to see if could we
// potentially use this node instead of a reserved node if the application
// has reserved containers.
// TODO, now only consider reservation cases when the node has no label
if (this.reservationsContinueLooking
&& nodePartition.equals(RMNodeLabelsManager.NO_LABEL)
&& Resources.greaterThan(resourceCalculator, clusterResource,
resourceCouldBeUnreserved, Resources.none())) {
// resource-without-reserved = used - reserved
Resource newTotalWithoutReservedResource =
Resources.subtract(newTotalResource, resourceCouldBeUnreserved);

// when total-used-without-reserved-resource < currentLimit, we still
// have chance to allocate on this node by unreserving some containers
if (Resources.lessThan(resourceCalculator, clusterResource,
newTotalWithoutReservedResource, currentLimitResource)) {
if (LOG.isDebugEnabled()) {
LOG.debug("try to use reserved: " + getQueueName()
+ " usedResources: " + queueUsage.getUsed()
+ ", clusterResources: " + clusterResource
+ ", reservedResources: " + resourceCouldBeUnreserved
+ ", capacity-without-reserved: "
+ newTotalWithoutReservedResource + ", maxLimitCapacity: "
+ currentLimitResource);
}
return true;
}
}

// Check if we over current-resource-limit computed.
if (Resources.greaterThan(resourceCalculator, clusterResource, if (Resources.greaterThan(resourceCalculator, clusterResource,
newTotalResource, currentLimitResource)) { newTotalResource, currentLimitResource)) {
return false;
}


if (LOG.isDebugEnabled()) { // if reservation continous looking enabled, check to see if could we
LOG.debug(getQueueName() // potentially use this node instead of a reserved node if the application
+ "Check assign to queue, nodePartition=" // has reserved containers.
+ nodePartition // TODO, now only consider reservation cases when the node has no label
+ " usedResources: " if (this.reservationsContinueLooking
+ queueUsage.getUsed(nodePartition) && nodePartition.equals(RMNodeLabelsManager.NO_LABEL)
+ " clusterResources: " && Resources.greaterThan(resourceCalculator, clusterResource,
+ clusterResource resourceCouldBeUnreserved, Resources.none())) {
+ " currentUsedCapacity " // resource-without-reserved = used - reserved
+ Resources.divide(resourceCalculator, clusterResource, Resource newTotalWithoutReservedResource =
queueUsage.getUsed(nodePartition), Resources.subtract(newTotalResource, resourceCouldBeUnreserved);
labelManager.getResourceByLabel(nodePartition, clusterResource))
+ " max-capacity: " // when total-used-without-reserved-resource < currentLimit, we still
+ queueCapacities.getAbsoluteMaximumCapacity(nodePartition) + ")"); // have chance to allocate on this node by unreserving some containers
if (Resources.lessThan(resourceCalculator, clusterResource,
newTotalWithoutReservedResource, currentLimitResource)) {
if (LOG.isDebugEnabled()) {
LOG.debug("try to use reserved: " + getQueueName()
+ " usedResources: " + queueUsage.getUsed()
+ ", clusterResources: " + clusterResource
+ ", reservedResources: " + resourceCouldBeUnreserved
+ ", capacity-without-reserved: "
+ newTotalWithoutReservedResource + ", maxLimitCapacity: "
+ currentLimitResource);
}
currentResourceLimits.setAmountNeededUnreserve(Resources.subtract(newTotalResource,
currentLimitResource));
return true;
}
}
if (LOG.isDebugEnabled()) {
LOG.debug(getQueueName()
+ "Check assign to queue, nodePartition="
+ nodePartition
+ " usedResources: "
+ queueUsage.getUsed(nodePartition)
+ " clusterResources: "
+ clusterResource
+ " currentUsedCapacity "
+ Resources.divide(resourceCalculator, clusterResource,
queueUsage.getUsed(nodePartition),
labelManager.getResourceByLabel(nodePartition, clusterResource))
+ " max-capacity: "
+ queueCapacities.getAbsoluteMaximumCapacity(nodePartition) + ")");
}
return false;
} }
return true; return true;
} }
Expand Down

0 comments on commit 189a63a

Please sign in to comment.