Skip to content

Commit

Permalink
YARN-3273. Improve scheduler UI to facilitate scheduling analysis and…
Browse files Browse the repository at this point in the history
… debugging. Contributed Rohith Sharmaks
  • Loading branch information
jian-he committed Mar 18, 2015
1 parent 3bc72cc commit 658097d
Show file tree
Hide file tree
Showing 21 changed files with 371 additions and 62 deletions.
3 changes: 3 additions & 0 deletions hadoop-yarn-project/CHANGES.txt
Expand Up @@ -59,6 +59,9 @@ Release 2.8.0 - UNRELEASED
YARN-3243. CapacityScheduler should pass headroom from parent to children YARN-3243. CapacityScheduler should pass headroom from parent to children
to make sure ParentQueue obey its capacity limits. (Wangda Tan via jianhe) to make sure ParentQueue obey its capacity limits. (Wangda Tan via jianhe)


YARN-3273. Improve scheduler UI to facilitate scheduling analysis and
debugging. (Rohith Sharmaks via jianhe)

OPTIMIZATIONS OPTIMIZATIONS


YARN-3339. TestDockerContainerExecutor should pull a single image and not YARN-3339. TestDockerContainerExecutor should pull a single image and not
Expand Down
Expand Up @@ -172,7 +172,7 @@ public Collection<ContainerReport> run() throws Exception {
._("Diagnostics Info:", appAttempt.getDiagnosticsInfo() == null ? ._("Diagnostics Info:", appAttempt.getDiagnosticsInfo() == null ?
"" : appAttempt.getDiagnosticsInfo()); "" : appAttempt.getDiagnosticsInfo());


html._(InfoBlock.class);


if (exceptionWhenGetContainerReports) { if (exceptionWhenGetContainerReports) {
html html
Expand All @@ -183,6 +183,19 @@ public Collection<ContainerReport> run() throws Exception {
return; return;
} }


// TODO need to render applicationHeadRoom value from
// ApplicationAttemptMetrics after YARN-3284
if (webUiType.equals(YarnWebParams.RM_WEB_UI)) {
if (!isApplicationInFinalState(appAttempt.getAppAttemptState())) {
DIV<Hamlet> pdiv = html._(InfoBlock.class).div(_INFO_WRAP);
info("Application Attempt Overview").clear();
info("Application Attempt Metrics")._(
"Application Attempt Headroom : ", 0);
pdiv._();
}
}
html._(InfoBlock.class);

// Container Table // Container Table
TBODY<TABLE<Hamlet>> tbody = TBODY<TABLE<Hamlet>> tbody =
html.table("#containers").thead().tr().th(".id", "Container ID") html.table("#containers").thead().tr().th(".id", "Container ID")
Expand Down Expand Up @@ -273,4 +286,10 @@ private boolean hasAMContainer(ContainerId containerId,
} }
return false; return false;
} }

private boolean isApplicationInFinalState(YarnApplicationAttemptState state) {
return state == YarnApplicationAttemptState.FINISHED
|| state == YarnApplicationAttemptState.FAILED
|| state == YarnApplicationAttemptState.KILLED;
}
} }
Expand Up @@ -41,6 +41,8 @@ public class RMAppAttemptMetrics {
private ApplicationAttemptId attemptId = null; private ApplicationAttemptId attemptId = null;
// preemption info // preemption info
private Resource resourcePreempted = Resource.newInstance(0, 0); private Resource resourcePreempted = Resource.newInstance(0, 0);
// application headroom
private volatile Resource applicationHeadroom = Resource.newInstance(0, 0);
private AtomicInteger numNonAMContainersPreempted = new AtomicInteger(0); private AtomicInteger numNonAMContainersPreempted = new AtomicInteger(0);
private AtomicBoolean isPreempted = new AtomicBoolean(false); private AtomicBoolean isPreempted = new AtomicBoolean(false);


Expand Down Expand Up @@ -145,4 +147,12 @@ public int[][] getLocalityStatistics() {
public int getTotalAllocatedContainers() { public int getTotalAllocatedContainers() {
return this.totalAllocatedContainers; return this.totalAllocatedContainers;
} }

public Resource getApplicationAttemptHeadroom() {
return applicationHeadroom;
}

public void setApplicationAttemptHeadRoom(Resource headRoom) {
this.applicationHeadroom = headRoom;
}
} }
Expand Up @@ -632,4 +632,14 @@ public void incNumAllocatedContainers(NodeType containerType,
requestType); requestType);
} }
} }

public void setApplicationHeadroomForMetrics(Resource headroom) {
RMAppAttempt attempt =
rmContext.getRMApps().get(attemptId.getApplicationId())
.getCurrentAppAttempt();
if (attempt != null) {
attempt.getRMAppAttemptMetrics().setApplicationAttemptHeadRoom(
Resources.clone(headroom));
}
}
} }
Expand Up @@ -419,10 +419,13 @@ public synchronized User getUser(String userName) {
*/ */
public synchronized ArrayList<UserInfo> getUsers() { public synchronized ArrayList<UserInfo> getUsers() {
ArrayList<UserInfo> usersToReturn = new ArrayList<UserInfo>(); ArrayList<UserInfo> usersToReturn = new ArrayList<UserInfo>();
for (Map.Entry<String, User> entry: users.entrySet()) { for (Map.Entry<String, User> entry : users.entrySet()) {
usersToReturn.add(new UserInfo(entry.getKey(), Resources.clone( User user = entry.getValue();
entry.getValue().getUsed()), entry.getValue().getActiveApplications(), usersToReturn.add(new UserInfo(entry.getKey(), Resources.clone(user
entry.getValue().getPendingApplications())); .getUsed()), user.getActiveApplications(), user
.getPendingApplications(), Resources.clone(user
.getConsumedAMResources()), Resources.clone(user
.getUserResourceLimit())));
} }
return usersToReturn; return usersToReturn;
} }
Expand Down Expand Up @@ -1068,7 +1071,7 @@ private Resource computeUserLimit(FiCaSchedulerApp application,
" clusterCapacity: " + clusterResource " clusterCapacity: " + clusterResource
); );
} }

user.setUserResourceLimit(limit);
return limit; return limit;
} }


Expand Down Expand Up @@ -1738,6 +1741,7 @@ resourceCalculator, this, getParent(), clusterResource,
@VisibleForTesting @VisibleForTesting
public static class User { public static class User {
ResourceUsage userResourceUsage = new ResourceUsage(); ResourceUsage userResourceUsage = new ResourceUsage();
volatile Resource userResourceLimit = Resource.newInstance(0, 0);
int pendingApplications = 0; int pendingApplications = 0;
int activeApplications = 0; int activeApplications = 0;


Expand Down Expand Up @@ -1807,6 +1811,14 @@ public void releaseContainer(Resource resource, Set<String> nodeLabels) {
} }
} }
} }

public Resource getUserResourceLimit() {
return userResourceLimit;
}

public void setUserResourceLimit(Resource userResourceLimit) {
this.userResourceLimit = userResourceLimit;
}
} }


@Override @Override
Expand Down
Expand Up @@ -32,14 +32,19 @@ public class UserInfo {
protected ResourceInfo resourcesUsed; protected ResourceInfo resourcesUsed;
protected int numPendingApplications; protected int numPendingApplications;
protected int numActiveApplications; protected int numActiveApplications;
protected ResourceInfo AMResourceUsed;
protected ResourceInfo userResourceLimit;


UserInfo() {} UserInfo() {}


UserInfo(String username, Resource resUsed, int activeApps, int pendingApps) { UserInfo(String username, Resource resUsed, int activeApps, int pendingApps,
Resource amResUsed, Resource resourceLimit) {
this.username = username; this.username = username;
this.resourcesUsed = new ResourceInfo(resUsed); this.resourcesUsed = new ResourceInfo(resUsed);
this.numActiveApplications = activeApps; this.numActiveApplications = activeApps;
this.numPendingApplications = pendingApps; this.numPendingApplications = pendingApps;
this.AMResourceUsed = new ResourceInfo(amResUsed);
this.userResourceLimit = new ResourceInfo(resourceLimit);
} }


public String getUsername() { public String getUsername() {
Expand All @@ -57,4 +62,12 @@ public int getNumPendingApplications() {
public int getNumActiveApplications() { public int getNumActiveApplications() {
return numActiveApplications; return numActiveApplications;
} }

public ResourceInfo getAMResourcesUsed() {
return AMResourceUsed;
}

public ResourceInfo getUserResourceLimit() {
return userResourceLimit;
}
} }
Expand Up @@ -268,7 +268,9 @@ public synchronized Allocation getAllocation(ResourceCalculator rc,
minimumAllocation, numCont); minimumAllocation, numCont);
ContainersAndNMTokensAllocation allocation = ContainersAndNMTokensAllocation allocation =
pullNewlyAllocatedContainersAndNMTokens(); pullNewlyAllocatedContainersAndNMTokens();
return new Allocation(allocation.getContainerList(), getHeadroom(), null, Resource headroom = getHeadroom();
setApplicationHeadroomForMetrics(headroom);
return new Allocation(allocation.getContainerList(), headroom, null,
currentContPreemption, Collections.singletonList(rr), currentContPreemption, Collections.singletonList(rr),
allocation.getNMTokenList()); allocation.getNMTokenList());
} }
Expand Down
Expand Up @@ -938,9 +938,10 @@ clusterResource, minimumAllocation, getMaximumResourceCapability(),
application.updateBlacklist(blacklistAdditions, blacklistRemovals); application.updateBlacklist(blacklistAdditions, blacklistRemovals);
ContainersAndNMTokensAllocation allocation = ContainersAndNMTokensAllocation allocation =
application.pullNewlyAllocatedContainersAndNMTokens(); application.pullNewlyAllocatedContainersAndNMTokens();
return new Allocation(allocation.getContainerList(), Resource headroom = application.getHeadroom();
application.getHeadroom(), preemptionContainerIds, null, null, application.setApplicationHeadroomForMetrics(headroom);
allocation.getNMTokenList()); return new Allocation(allocation.getContainerList(), headroom,
preemptionContainerIds, null, null, allocation.getNMTokenList());
} }
} }


Expand Down
Expand Up @@ -343,9 +343,10 @@ public Allocation allocate(
application.updateBlacklist(blacklistAdditions, blacklistRemovals); application.updateBlacklist(blacklistAdditions, blacklistRemovals);
ContainersAndNMTokensAllocation allocation = ContainersAndNMTokensAllocation allocation =
application.pullNewlyAllocatedContainersAndNMTokens(); application.pullNewlyAllocatedContainersAndNMTokens();
return new Allocation(allocation.getContainerList(), Resource headroom = application.getHeadroom();
application.getHeadroom(), null, null, null, application.setApplicationHeadroomForMetrics(headroom);
allocation.getNMTokenList()); return new Allocation(allocation.getContainerList(), headroom, null,
null, null, allocation.getNMTokenList());
} }
} }


Expand Down
Expand Up @@ -37,6 +37,8 @@
import org.apache.hadoop.yarn.webapp.hamlet.Hamlet; import org.apache.hadoop.yarn.webapp.hamlet.Hamlet;
import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.DIV; import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.DIV;
import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.LI; import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.LI;
import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.TABLE;
import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.TBODY;
import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.UL; import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.UL;
import org.apache.hadoop.yarn.webapp.view.HtmlBlock; import org.apache.hadoop.yarn.webapp.view.HtmlBlock;
import org.apache.hadoop.yarn.webapp.view.InfoBlock; import org.apache.hadoop.yarn.webapp.view.InfoBlock;
Expand Down Expand Up @@ -67,41 +69,8 @@ static class LeafQueueInfoBlock extends HtmlBlock {
lqinfo = (CapacitySchedulerLeafQueueInfo) info.qinfo; lqinfo = (CapacitySchedulerLeafQueueInfo) info.qinfo;
} }


//Return a string describing one resource as a percentage of another
private String getPercentage(ResourceInfo numerator, ResourceInfo denominator) {
StringBuilder percentString = new StringBuilder("Memory: ");
if (numerator != null) {
percentString.append(numerator.getMemory());
}
if (denominator.getMemory() != 0) {
percentString.append(" (<span title='of used resources in this queue'>")
.append(StringUtils.format("%.2f", numerator.getMemory() * 100.0 /
denominator.getMemory()) + "%</span>)");
}
percentString.append(", vCores: ");
if (numerator != null) {
percentString.append(numerator.getvCores());
}
if (denominator.getvCores() != 0) {
percentString.append(" (<span title='of used resources in this queue'>")
.append(StringUtils.format("%.2f", numerator.getvCores() * 100.0 /
denominator.getvCores()) + "%</span>)");
}
return percentString.toString();
}

@Override @Override
protected void render(Block html) { protected void render(Block html) {
StringBuilder activeUserList = new StringBuilder("");
ResourceInfo usedResources = lqinfo.getResourcesUsed();
ArrayList<UserInfo> users = lqinfo.getUsers().getUsersList();
for (UserInfo entry: users) {
activeUserList.append(entry.getUsername()).append(" &lt;")
.append(getPercentage(entry.getResourcesUsed(), usedResources))
.append(", Schedulable Apps: " + entry.getNumActiveApplications())
.append(", Non-Schedulable Apps: " + entry.getNumPendingApplications())
.append("&gt;<br style='display:block'>"); //Force line break
}


ResponseInfo ri = info("\'" + lqinfo.getQueuePath().substring(5) + "\' Queue Status"). ResponseInfo ri = info("\'" + lqinfo.getQueuePath().substring(5) + "\' Queue Status").
_("Queue State:", lqinfo.getQueueState()). _("Queue State:", lqinfo.getQueueState()).
Expand All @@ -116,12 +85,12 @@ protected void render(Block html) {
_("Max Applications:", Integer.toString(lqinfo.getMaxApplications())). _("Max Applications:", Integer.toString(lqinfo.getMaxApplications())).
_("Max Applications Per User:", Integer.toString(lqinfo.getMaxApplicationsPerUser())). _("Max Applications Per User:", Integer.toString(lqinfo.getMaxApplicationsPerUser())).
_("Max Application Master Resources:", lqinfo.getAMResourceLimit().toString()). _("Max Application Master Resources:", lqinfo.getAMResourceLimit().toString()).
_("Used Application Master Resources:", lqinfo.getUsedAMResource().toString()).
_("Max Application Master Resources Per User:", lqinfo.getUserAMResourceLimit().toString()). _("Max Application Master Resources Per User:", lqinfo.getUserAMResourceLimit().toString()).
_("Configured Capacity:", percent(lqinfo.getCapacity() / 100)). _("Configured Capacity:", percent(lqinfo.getCapacity() / 100)).
_("Configured Max Capacity:", percent(lqinfo.getMaxCapacity() / 100)). _("Configured Max Capacity:", percent(lqinfo.getMaxCapacity() / 100)).
_("Configured Minimum User Limit Percent:", Integer.toString(lqinfo.getUserLimit()) + "%"). _("Configured Minimum User Limit Percent:", Integer.toString(lqinfo.getUserLimit()) + "%").
_("Configured User Limit Factor:", String.format("%.1f", lqinfo.getUserLimitFactor())). _("Configured User Limit Factor:", String.format("%.1f", lqinfo.getUserLimitFactor())).
_r("Active Users: ", activeUserList.toString()).
_("Accessible Node Labels:", StringUtils.join(",", lqinfo.getNodeLabels())). _("Accessible Node Labels:", StringUtils.join(",", lqinfo.getNodeLabels())).
_("Preemption:", lqinfo.getPreemptionDisabled() ? "disabled" : "enabled"); _("Preemption:", lqinfo.getPreemptionDisabled() ? "disabled" : "enabled");


Expand All @@ -132,6 +101,44 @@ protected void render(Block html) {
} }
} }


static class QueueUsersInfoBlock extends HtmlBlock {
final CapacitySchedulerLeafQueueInfo lqinfo;

@Inject
QueueUsersInfoBlock(ViewContext ctx, CSQInfo info) {
super(ctx);
lqinfo = (CapacitySchedulerLeafQueueInfo) info.qinfo;
}

@Override
protected void render(Block html) {
TBODY<TABLE<Hamlet>> tbody =
html.table("#userinfo").thead().$class("ui-widget-header").tr().th()
.$class("ui-state-default")._("User Name")._().th()
.$class("ui-state-default")._("Max Resource")._().th()
.$class("ui-state-default")._("Used Resource")._().th()
.$class("ui-state-default")._("Max AM Resource")._().th()
.$class("ui-state-default")._("Used AM Resource")._().th()
.$class("ui-state-default")._("Schedulable Apps")._().th()
.$class("ui-state-default")._("Non-Schedulable Apps")._()._()._()
.tbody();

ArrayList<UserInfo> users = lqinfo.getUsers().getUsersList();
for (UserInfo userInfo : users) {
tbody.tr().td(userInfo.getUsername())
.td(userInfo.getUserResourceLimit().toString())
.td(userInfo.getResourcesUsed().toString())
.td(lqinfo.getUserAMResourceLimit().toString())
.td(userInfo.getAMResourcesUsed().toString())
.td(Integer.toString(userInfo.getNumActiveApplications()))
.td(Integer.toString(userInfo.getNumPendingApplications()))._();
}

html.div().$class("usersinfo").h5("Active Users Info")._();
tbody._()._();
}
}

public static class QueueBlock extends HtmlBlock { public static class QueueBlock extends HtmlBlock {
final CSQInfo csqinfo; final CSQInfo csqinfo;


Expand Down Expand Up @@ -166,6 +173,7 @@ public void render(Block html) {
csqinfo.qinfo = info; csqinfo.qinfo = info;
if (info.getQueues() == null) { if (info.getQueues() == null) {
li.ul("#lq").li()._(LeafQueueInfoBlock.class)._()._(); li.ul("#lq").li()._(LeafQueueInfoBlock.class)._()._();
li.ul("#lq").li()._(QueueUsersInfoBlock.class)._()._();
} else { } else {
li._(QueueBlock.class); li._(QueueBlock.class);
} }
Expand Down
Expand Up @@ -21,6 +21,7 @@
import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager; import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterMetricsInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterMetricsInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.SchedulerInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.UserMetricsInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.UserMetricsInfo;


import org.apache.hadoop.yarn.webapp.hamlet.Hamlet; import org.apache.hadoop.yarn.webapp.hamlet.Hamlet;
Expand Down Expand Up @@ -153,6 +154,27 @@ protected void render(Block html) {


} }
} }

SchedulerInfo schedulerInfo=new SchedulerInfo(this.rm);

div.h3("Scheduler Metrics").
table("#schedulermetricsoverview").
thead().$class("ui-widget-header").
tr().
th().$class("ui-state-default")._("Scheduler Type")._().
th().$class("ui-state-default")._("Scheduling Resource Type")._().
th().$class("ui-state-default")._("Minimum Allocation")._().
th().$class("ui-state-default")._("Maximum Allocation")._().
_().
_().
tbody().$class("ui-widget-content").
tr().
td(String.valueOf(schedulerInfo.getSchedulerType())).
td(String.valueOf(schedulerInfo.getSchedulerResourceTypes())).
td(schedulerInfo.getMinAllocation().toString()).
td(schedulerInfo.getMaxAllocation().toString()).
_().
_()._();


div._(); div._();
} }
Expand Down
Expand Up @@ -35,7 +35,8 @@ public class CapacitySchedulerLeafQueueInfo extends CapacitySchedulerQueueInfo {
protected int userLimit; protected int userLimit;
protected UsersInfo users; // To add another level in the XML protected UsersInfo users; // To add another level in the XML
protected float userLimitFactor; protected float userLimitFactor;
protected ResourceInfo aMResourceLimit; protected ResourceInfo AMResourceLimit;
protected ResourceInfo usedAMResource;
protected ResourceInfo userAMResourceLimit; protected ResourceInfo userAMResourceLimit;
protected boolean preemptionDisabled; protected boolean preemptionDisabled;


Expand All @@ -52,7 +53,8 @@ public class CapacitySchedulerLeafQueueInfo extends CapacitySchedulerQueueInfo {
userLimit = q.getUserLimit(); userLimit = q.getUserLimit();
users = new UsersInfo(q.getUsers()); users = new UsersInfo(q.getUsers());
userLimitFactor = q.getUserLimitFactor(); userLimitFactor = q.getUserLimitFactor();
aMResourceLimit = new ResourceInfo(q.getAMResourceLimit()); AMResourceLimit = new ResourceInfo(q.getAMResourceLimit());
usedAMResource = new ResourceInfo(q.getQueueResourceUsage().getAMUsed());
userAMResourceLimit = new ResourceInfo(q.getUserAMResourceLimit()); userAMResourceLimit = new ResourceInfo(q.getUserAMResourceLimit());
preemptionDisabled = q.getPreemptionDisabled(); preemptionDisabled = q.getPreemptionDisabled();
} }
Expand Down Expand Up @@ -91,9 +93,13 @@ public float getUserLimitFactor() {
} }


public ResourceInfo getAMResourceLimit() { public ResourceInfo getAMResourceLimit() {
return aMResourceLimit; return AMResourceLimit;
} }


public ResourceInfo getUsedAMResource() {
return usedAMResource;
}

public ResourceInfo getUserAMResourceLimit() { public ResourceInfo getUserAMResourceLimit() {
return userAMResourceLimit; return userAMResourceLimit;
} }
Expand Down

0 comments on commit 658097d

Please sign in to comment.