Skip to content

Commit

Permalink
YARN-4162. CapacityScheduler: Add resource usage by partition and que…
Browse files Browse the repository at this point in the history
…ue capacity by partition to REST API. (Naganarasimha G R via wangda)
  • Loading branch information
wangdatan committed Oct 16, 2015
1 parent 79b8d60 commit 4337b26
Show file tree
Hide file tree
Showing 16 changed files with 1,147 additions and 94 deletions.
3 changes: 3 additions & 0 deletions hadoop-yarn-project/CHANGES.txt
Expand Up @@ -507,6 +507,9 @@ Release 2.8.0 - UNRELEASED
YARN-4258. Add support for controlling capabilities for docker containers.
(Sidharta Seethana via vvasudev)

YARN-4162. CapacityScheduler: Add resource usage by partition and queue capacity
by partition to REST API. (Naganarasimha G R via wangda)

OPTIMIZATIONS

YARN-3339. TestDockerContainerExecutor should pull a single image and not
Expand Down
Expand Up @@ -29,12 +29,11 @@
public abstract class NodeLabel implements Comparable<NodeLabel> {

/**
* Default node label partition.
* Default node label partition used for displaying.
*/
@Private
@Unstable
public static final String DEFAULT_NODE_LABEL_PARTITION =
"<DEFAULT_PARTITION>";
public static final String DEFAULT_NODE_LABEL_PARTITION = "<DEFAULT_PARTITION>";

/**
* Node Label expression not set .
Expand Down
Expand Up @@ -58,8 +58,8 @@
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ActiveUsersManager;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceUsage;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedContainerChangeRequest;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode;
Expand Down Expand Up @@ -425,7 +425,7 @@ public synchronized ArrayList<UserInfo> getUsers() {
.getAllUsed()), user.getActiveApplications(), user
.getPendingApplications(), Resources.clone(user
.getConsumedAMResources()), Resources.clone(user
.getUserResourceLimit())));
.getUserResourceLimit()), user.getResourceUsage()));
}
return usersToReturn;
}
Expand Down
Expand Up @@ -23,7 +23,9 @@
import javax.xml.bind.annotation.XmlRootElement;

import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceUsage;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ResourceInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ResourceUsageInfo;

@XmlRootElement
@XmlAccessorType(XmlAccessType.FIELD)
Expand All @@ -34,17 +36,19 @@ public class UserInfo {
protected int numActiveApplications;
protected ResourceInfo AMResourceUsed;
protected ResourceInfo userResourceLimit;
protected ResourceUsageInfo resources;

UserInfo() {}

UserInfo(String username, Resource resUsed, int activeApps, int pendingApps,
Resource amResUsed, Resource resourceLimit) {
Resource amResUsed, Resource resourceLimit, ResourceUsage resourceUsage) {
this.username = username;
this.resourcesUsed = new ResourceInfo(resUsed);
this.numActiveApplications = activeApps;
this.numPendingApplications = pendingApps;
this.AMResourceUsed = new ResourceInfo(amResUsed);
this.userResourceLimit = new ResourceInfo(resourceLimit);
this.resources = new ResourceUsageInfo(resourceUsage);
}

public String getUsername() {
Expand All @@ -70,4 +74,8 @@ public ResourceInfo getAMResourcesUsed() {
public ResourceInfo getUserResourceLimit() {
return userResourceLimit;
}

public ResourceUsageInfo getResourceUsageInfo() {
return resources;
}
}
Expand Up @@ -34,11 +34,13 @@
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerHealth;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CSQueue;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.QueueCapacities;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.UserInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.CapacitySchedulerInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.CapacitySchedulerLeafQueueInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.CapacitySchedulerQueueInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.PartitionQueueCapacitiesInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.PartitionResourceUsageInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ResourceInfo;
import org.apache.hadoop.yarn.server.security.ApplicationACLsManager;
import org.apache.hadoop.yarn.util.Times;
import org.apache.hadoop.yarn.webapp.ResponseInfo;
Expand Down Expand Up @@ -70,6 +72,7 @@ static class CSQInfo {
CapacitySchedulerInfo csinfo;
CapacitySchedulerQueueInfo qinfo;
String label;
boolean isExclusiveNodeLabel;
}

static class LeafQueueInfoBlock extends HtmlBlock {
Expand All @@ -92,13 +95,13 @@ protected void render(Block html) {
}

private void renderLeafQueueInfoWithPartition(Block html) {
nodeLabel = nodeLabel.length() == 0
String nodeLabelDisplay = nodeLabel.length() == 0
? NodeLabel.DEFAULT_NODE_LABEL_PARTITION : nodeLabel;
// first display the queue's label specific details :
ResponseInfo ri =
info("\'" + lqinfo.getQueuePath().substring(5)
+ "\' Queue Status for Partition \'" + nodeLabel + "\'");
renderQueueCapacityInfo(ri);
+ "\' Queue Status for Partition \'" + nodeLabelDisplay + "\'");
renderQueueCapacityInfo(ri, nodeLabel);
html._(InfoBlock.class);
// clear the info contents so this queue's info doesn't accumulate into
// another queue's info
Expand All @@ -120,23 +123,27 @@ private void renderLeafQueueInfoWithoutParition(Block html) {
ResponseInfo ri =
info("\'" + lqinfo.getQueuePath().substring(5) + "\' Queue Status")
._("Queue State:", lqinfo.getQueueState());
renderQueueCapacityInfo(ri);
renderQueueCapacityInfo(ri, "");
renderCommonLeafQueueInfo(ri);
html._(InfoBlock.class);
// clear the info contents so this queue's info doesn't accumulate into
// another queue's info
ri.clear();
}

private void renderQueueCapacityInfo(ResponseInfo ri) {
private void renderQueueCapacityInfo(ResponseInfo ri, String label) {
PartitionQueueCapacitiesInfo capacities =
lqinfo.getCapacities().getPartitionQueueCapacitiesInfo(label);
PartitionResourceUsageInfo resourceUsages =
lqinfo.getResources().getPartitionResourceUsageInfo(label);
ri.
_("Used Capacity:", percent(lqinfo.getUsedCapacity() / 100)).
_("Configured Capacity:", percent(lqinfo.getCapacity() / 100)).
_("Configured Max Capacity:", percent(lqinfo.getMaxCapacity() / 100)).
_("Absolute Used Capacity:", percent(lqinfo.getAbsoluteUsedCapacity() / 100)).
_("Absolute Configured Capacity:", percent(lqinfo.getAbsoluteCapacity() / 100)).
_("Absolute Configured Max Capacity:", percent(lqinfo.getAbsoluteMaxCapacity() / 100)).
_("Used Resources:", lqinfo.getResourcesUsed().toString());
_("Used Capacity:", percent(capacities.getUsedCapacity() / 100)).
_("Configured Capacity:", percent(capacities.getCapacity() / 100)).
_("Configured Max Capacity:", percent(capacities.getMaxCapacity() / 100)).
_("Absolute Used Capacity:", percent(capacities.getAbsoluteUsedCapacity() / 100)).
_("Absolute Configured Capacity:", percent(capacities.getAbsoluteCapacity() / 100)).
_("Absolute Configured Max Capacity:", percent(capacities.getAbsoluteMaxCapacity() / 100)).
_("Used Resources:", resourceUsages.getUsed().toString());
}

private void renderCommonLeafQueueInfo(ResponseInfo ri) {
Expand Down Expand Up @@ -166,11 +173,13 @@ private void renderCommonLeafQueueInfo(ResponseInfo ri) {

static class QueueUsersInfoBlock extends HtmlBlock {
final CapacitySchedulerLeafQueueInfo lqinfo;
private String nodeLabel;

@Inject
QueueUsersInfoBlock(ViewContext ctx, CSQInfo info) {
super(ctx);
lqinfo = (CapacitySchedulerLeafQueueInfo) info.qinfo;
nodeLabel = info.label;
}

@Override
Expand All @@ -188,9 +197,14 @@ protected void render(Block html) {

ArrayList<UserInfo> users = lqinfo.getUsers().getUsersList();
for (UserInfo userInfo : users) {
ResourceInfo resourcesUsed = userInfo.getResourcesUsed();
if (nodeLabel != null) {
resourcesUsed = userInfo.getResourceUsageInfo()
.getPartitionResourceUsageInfo(nodeLabel).getUsed();
}
tbody.tr().td(userInfo.getUsername())
.td(userInfo.getUserResourceLimit().toString())
.td(userInfo.getResourcesUsed().toString())
.td(resourcesUsed.toString())
.td(lqinfo.getUserAMResourceLimit().toString())
.td(userInfo.getAMResourcesUsed().toString())
.td(Integer.toString(userInfo.getNumActiveApplications()))
Expand All @@ -211,15 +225,32 @@ public static class QueueBlock extends HtmlBlock {

@Override
public void render(Block html) {
ArrayList<CapacitySchedulerQueueInfo> subQueues =
(csqinfo.qinfo == null) ? csqinfo.csinfo.getQueues().getQueueInfoList()
: csqinfo.qinfo.getQueues().getQueueInfoList();
ArrayList<CapacitySchedulerQueueInfo> subQueues = (csqinfo.qinfo == null)
? csqinfo.csinfo.getQueues().getQueueInfoList()
: csqinfo.qinfo.getQueues().getQueueInfoList();

UL<Hamlet> ul = html.ul("#pq");
float used;
float absCap;
float absMaxCap;
float absUsedCap;
for (CapacitySchedulerQueueInfo info : subQueues) {
float used = info.getUsedCapacity() / 100;
float absCap = info.getAbsoluteCapacity() / 100;
float absMaxCap = info.getAbsoluteMaxCapacity() / 100;
float absUsedCap = info.getAbsoluteUsedCapacity() / 100;
String nodeLabel = (csqinfo.label == null) ? "" : csqinfo.label;
//DEFAULT_NODE_LABEL_PARTITION is accessible to all queues
//other exclsiveNodeLabels are accessible only if configured
if (!nodeLabel.isEmpty()// i.e. its DEFAULT_NODE_LABEL_PARTITION
&& csqinfo.isExclusiveNodeLabel
&& !info.getNodeLabels().contains("*")
&& !info.getNodeLabels().contains(nodeLabel)) {
continue;
}
PartitionQueueCapacitiesInfo partitionQueueCapsInfo = info
.getCapacities().getPartitionQueueCapacitiesInfo(nodeLabel);
used = partitionQueueCapsInfo.getUsedCapacity() / 100;
absCap = partitionQueueCapsInfo.getAbsoluteCapacity() / 100;
absMaxCap = partitionQueueCapsInfo.getAbsoluteMaxCapacity() / 100;
absUsedCap = partitionQueueCapsInfo.getAbsoluteUsedCapacity() / 100;

LI<UL<Hamlet>> li = ul.
li().
a(_Q).$style(width(absMaxCap * Q_MAX_WIDTH)).
Expand Down Expand Up @@ -343,16 +374,13 @@ public void render(Block html) {
_();

float used = 0;
if (null == nodeLabelsInfo
|| (nodeLabelsInfo.size() == 1 && nodeLabelsInfo.get(0)
.getLabelName().isEmpty())) {
CSQueue root = cs.getRootQueue();
CapacitySchedulerInfo sinfo =
new CapacitySchedulerInfo(root, cs, new RMNodeLabel(
RMNodeLabelsManager.NO_LABEL));
csqinfo.csinfo = sinfo;
csqinfo.qinfo = null;

CSQueue root = cs.getRootQueue();
CapacitySchedulerInfo sinfo = new CapacitySchedulerInfo(root, cs);
csqinfo.csinfo = sinfo;

if (null == nodeLabelsInfo || (nodeLabelsInfo.size() == 1
&& nodeLabelsInfo.get(0).getLabelName().isEmpty())) {
used = sinfo.getUsedCapacity() / 100;
//label is not enabled in the cluster or there's only "default" label,
ul.li().
Expand All @@ -365,18 +393,16 @@ public void render(Block html) {
_(QueueBlock.class)._();
} else {
for (RMNodeLabel label : nodeLabelsInfo) {
CSQueue root = cs.getRootQueue();
CapacitySchedulerInfo sinfo =
new CapacitySchedulerInfo(root, cs, label);
csqinfo.csinfo = sinfo;
csqinfo.qinfo = null;
csqinfo.label = label.getLabelName();
String nodeLabel = csqinfo.label.length() == 0
csqinfo.isExclusiveNodeLabel = label.getIsExclusive();
String nodeLabelDisplay = csqinfo.label.length() == 0
? NodeLabel.DEFAULT_NODE_LABEL_PARTITION : csqinfo.label;
QueueCapacities queueCapacities = root.getQueueCapacities();
used = queueCapacities.getUsedCapacity(label.getLabelName());
PartitionQueueCapacitiesInfo capacities = sinfo.getCapacities()
.getPartitionQueueCapacitiesInfo(csqinfo.label);
used = capacities.getUsedCapacity() / 100;
String partitionUiTag =
"Partition: " + nodeLabel + " " + label.getResource();
"Partition: " + nodeLabelDisplay + " " + label.getResource();
ul.li().
a(_Q).$style(width(Q_MAX_WIDTH)).
span().$style(join(width(used), ";left:0%;",
Expand Down
Expand Up @@ -105,14 +105,12 @@
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
import org.apache.hadoop.yarn.factories.RecordFactory;
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
import org.apache.hadoop.yarn.nodelabels.RMNodeLabel;
import org.apache.hadoop.yarn.security.client.RMDelegationTokenIdentifier;
import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger;
import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger.AuditConstants;
import org.apache.hadoop.yarn.server.resourcemanager.RMServerUtils;
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.NodeLabelsUtils;
import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
Expand Down Expand Up @@ -144,8 +142,8 @@
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NodeLabelInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NodeLabelsInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NodeToLabelsEntry;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NodeToLabelsInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NodeToLabelsEntryList;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NodeToLabelsInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NodesInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ResourceInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.SchedulerInfo;
Expand Down Expand Up @@ -249,8 +247,7 @@ public SchedulerTypeInfo getSchedulerInfo() {
CapacityScheduler cs = (CapacityScheduler) rs;
CSQueue root = cs.getRootQueue();
sinfo =
new CapacitySchedulerInfo(root, cs, new RMNodeLabel(
RMNodeLabelsManager.NO_LABEL));
new CapacitySchedulerInfo(root, cs);
} else if (rs instanceof FairScheduler) {
FairScheduler fs = (FairScheduler) rs;
sinfo = new FairSchedulerInfo(fs);
Expand Down

0 comments on commit 4337b26

Please sign in to comment.