Skip to content

Commit

Permalink
YARN-8588. Logging improvements for better debuggability. (Suma Shiva…
Browse files Browse the repository at this point in the history
…prasad via wangda)

Change-Id: I66aa4b0ec031ae5ce0fae558e2f8cbcbbfebc442
  • Loading branch information
wangdatan committed Aug 9, 2018
1 parent 5326a79 commit 344c335
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 41 deletions.
Expand Up @@ -63,4 +63,9 @@ public QueueCapacities getQueueCapacities() {
public CapacitySchedulerConfiguration getLeafQueueConfigs() {
return leafQueueConfigs;
}

@Override public String toString() {
return "AutoCreatedLeafQueueConfig{" + "queueCapacities=" + queueCapacities
+ ", leafQueueConfigs=" + leafQueueConfigs + '}';
}
}
Expand Up @@ -124,7 +124,7 @@ public int hashCode() {

@Override
public String toString() {
return "QueueManagementChange{" + "queue=" + queue
return "QueueManagementChange{" + "queue=" + queue.getQueueName()
+ ", updatedEntitlementsByPartition=" + queueTemplateUpdate
+ ", queueAction=" + queueAction + ", transitionToQueueState="
+ transitionToQueueState + '}';
Expand Down
Expand Up @@ -19,8 +19,9 @@


import com.google.common.annotations.VisibleForTesting;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
Expand Down Expand Up @@ -50,8 +51,8 @@
*/
public class QueueManagementDynamicEditPolicy implements SchedulingEditPolicy {

private static final Log LOG =
LogFactory.getLog(QueueManagementDynamicEditPolicy.class);
private static final Logger LOG =
LoggerFactory.getLogger(QueueManagementDynamicEditPolicy.class);

private Clock clock;

Expand Down Expand Up @@ -90,7 +91,7 @@ public QueueManagementDynamicEditPolicy(RMContext context,
@Override
public void init(final Configuration config, final RMContext context,
final ResourceScheduler sched) {
LOG.info("Queue Management Policy monitor:" + this.
LOG.info("Queue Management Policy monitor: {}" + this.
getClass().getCanonicalName());
assert null == scheduler : "Unexpected duplicate call to init";
if (!(sched instanceof CapacityScheduler)) {
Expand Down Expand Up @@ -189,13 +190,7 @@ List<QueueManagementChange> manageAutoCreatedLeafQueues()
parentQueue.getAutoCreatedQueueManagementPolicy();
long startTime = 0;
try {
if (LOG.isDebugEnabled()) {
LOG.debug(MessageFormat
.format("Trying to use {0} to compute preemption "
+ "candidates",
policyClazz.getClass().getName()));
startTime = clock.getTime();
}
startTime = clock.getTime();

queueManagementChanges = policyClazz.computeQueueManagementChanges();

Expand All @@ -209,15 +204,14 @@ List<QueueManagementChange> manageAutoCreatedLeafQueues()
}

if (LOG.isDebugEnabled()) {
LOG.debug(MessageFormat.format("{0} uses {1} millisecond"
+ " to run",
policyClazz.getClass().getName(), clock.getTime()
- startTime));
LOG.debug("{} uses {} millisecond" + " to run",
policyClazz.getClass().getName(), clock.getTime() - startTime);
if (queueManagementChanges.size() > 0) {
LOG.debug(" Updated queue management updates for parent queue"
+ " ["
+ parentQueue.getQueueName() + ": [\n" + queueManagementChanges
.toString() + "\n]");
LOG.debug(" Updated queue management changes for parent queue" + " "
+ "{}: [{}]", parentQueue.getQueueName(),
queueManagementChanges.size() < 25 ?
queueManagementChanges.toString() :
queueManagementChanges.size());
}
}
} catch (YarnException e) {
Expand All @@ -232,7 +226,7 @@ List<QueueManagementChange> manageAutoCreatedLeafQueues()
"Skipping queue management updates for parent queue "
+ parentQueue
.getQueuePath() + " "
+ "since configuration for auto creating queue's beyond "
+ "since configuration for auto creating queues beyond "
+ "parent's "
+ "guaranteed capacity is disabled");
}
Expand Down
Expand Up @@ -19,8 +19,10 @@
.queuemanagement;

import com.google.common.annotations.VisibleForTesting;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity
.QueueManagementDynamicEditPolicy;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler
.SchedulerDynamicEditException;
Expand Down Expand Up @@ -81,8 +83,8 @@ public class GuaranteedOrZeroCapacityOverTimePolicy
private CapacitySchedulerContext scheduler;
private ManagedParentQueue managedParentQueue;

private static final Log LOG = LogFactory.getLog(
GuaranteedOrZeroCapacityOverTimePolicy.class);
private static final Logger LOG =
LoggerFactory.getLogger(GuaranteedOrZeroCapacityOverTimePolicy.class);

private ReentrantReadWriteLock.WriteLock writeLock;

Expand Down Expand Up @@ -380,6 +382,17 @@ public List<QueueManagementChange> computeQueueManagementChanges()
deactivateLeafQueuesIfInActive(managedParentQueue, nodeLabel,
leafQueueEntitlements);

if (LOG.isDebugEnabled()) {
if ( deactivatedLeafQueues.size() > 0) {
LOG.debug("Parent queue = {}, " +
", nodeLabel = {}, deactivated leaf queues = [{}] ",
managedParentQueue.getQueueName(), nodeLabel,
deactivatedLeafQueues.size() > 25 ? deactivatedLeafQueues
.size() : deactivatedLeafQueues);

}
}

float deactivatedCapacity = getTotalDeactivatedCapacity(
deactivatedLeafQueues, nodeLabel);

Expand All @@ -392,7 +405,7 @@ public List<QueueManagementChange> computeQueueManagementChanges()
+ deactivatedCapacity + EPSILON;

if (LOG.isDebugEnabled()) {
LOG.debug("Parent queue : " + managedParentQueue.getQueueName()
LOG.debug("Parent queue = " + managedParentQueue.getQueueName()
+ ", nodeLabel = " + nodeLabel + ", absCapacity = "
+ parentAbsoluteCapacity + ", leafQueueAbsoluteCapacity = "
+ leafQueueTemplateAbsoluteCapacity + ", deactivatedCapacity = "
Expand All @@ -409,7 +422,8 @@ public List<QueueManagementChange> computeQueueManagementChanges()
pendingApps.size());

if (LOG.isDebugEnabled()) {
LOG.debug("Found " + maxLeafQueuesTobeActivated + " leaf queues"
LOG.debug("Parent queue = " + managedParentQueue.getQueueName()
+ " : Found " + maxLeafQueuesTobeActivated + " leaf queues"
+ " to be activated with " + pendingApps.size() + " apps ");
}

Expand All @@ -424,8 +438,9 @@ public List<QueueManagementChange> computeQueueManagementChanges()

if (LOG.isDebugEnabled()) {
if (leafQueuesToBeActivated.size() > 0) {
LOG.debug("Activated leaf queues : [" + leafQueuesToBeActivated
+ "]");
LOG.debug("Activated leaf queues : [{}]",
leafQueuesToBeActivated.size() < 25 ?
leafQueuesToBeActivated : leafQueuesToBeActivated.size());
}
}
}
Expand Down Expand Up @@ -492,8 +507,9 @@ void updateLeafQueueState() {
String partition = e.getKey();
if (!newPartitions.contains(partition)) {
itr.remove();
LOG.info(
"Removed partition " + partition + " from leaf queue " + "state");
LOG.info(managedParentQueue.getQueueName() +
" : Removed partition " + partition + " from leaf queue " +
"state");
} else{
Map<String, LeafQueueStatePerPartition> queues = e.getValue();
for (
Expand All @@ -502,7 +518,9 @@ void updateLeafQueueState() {
String queue = queueItr.next().getKey();
if (!newQueues.contains(queue)) {
queueItr.remove();
LOG.info("Removed queue " + queue + " from leaf queue "
LOG.info(managedParentQueue.getQueueName() + " : Removed queue"
+ queue + " from "
+ "leaf queue "
+ "state from partition " + partition);
}
}
Expand Down Expand Up @@ -582,12 +600,6 @@ private Map<String, QueueCapacities> deactivateLeafQueuesIfInActive(
updateToZeroCapacity(capacities, nodeLabel);
deactivatedQueues.put(leafQueue.getQueueName(),
leafQueueTemplateCapacities);
} else{
if (LOG.isDebugEnabled()) {
LOG.debug(" Leaf queue has pending applications or is " + "inactive"
+ " : " + leafQueue.getNumApplications()
+ ".Skipping deactivation for " + leafQueue);
}
}
} else{
LOG.warn("Could not find queue in scheduler while trying" + " to "
Expand Down Expand Up @@ -659,7 +671,7 @@ public void commitQueueManagementChanges(
if (isActive(leafQueue, nodeLabel)) {
if (LOG.isDebugEnabled()) {
LOG.debug("Queue is already active." + " Skipping activation : "
+ queue.getQueuePath());
+ leafQueue.getQueueName());
}
} else{
activate(leafQueue, nodeLabel);
Expand All @@ -668,7 +680,7 @@ public void commitQueueManagementChanges(
if (!isActive(leafQueue, nodeLabel)) {
if (LOG.isDebugEnabled()) {
LOG.debug("Queue is already de-activated. Skipping "
+ "de-activation : " + leafQueue.getQueuePath());
+ "de-activation : " + leafQueue.getQueueName());
}
} else{
deactivate(leafQueue, nodeLabel);
Expand Down

0 comments on commit 344c335

Please sign in to comment.