Skip to content

Commit

Permalink
HDFS-8880. NameNode metrics logging. (Arpit Agarwal)
Browse files Browse the repository at this point in the history
  • Loading branch information
arp7 committed Aug 17, 2015
1 parent ec183fa commit a88f31e
Show file tree
Hide file tree
Showing 8 changed files with 445 additions and 5 deletions.
13 changes: 13 additions & 0 deletions hadoop-common-project/hadoop-common/src/main/conf/log4j.properties
Expand Up @@ -148,6 +148,19 @@ log4j.appender.RFAAUDIT.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n
log4j.appender.RFAAUDIT.MaxFileSize=${hdfs.audit.log.maxfilesize}
log4j.appender.RFAAUDIT.MaxBackupIndex=${hdfs.audit.log.maxbackupindex}

#
# NameNode metrics logging.
# The default is to retain two namenode-metrics.log files up to 64MB each.
#
log4j.logger.NameNodeMetricsLog=INFO,NNMETRICSRFA
log4j.additivity.NameNodeMetricsLog=false
log4j.appender.NNMETRICSRFA=org.apache.log4j.RollingFileAppender
log4j.appender.NNMETRICSRFA.File=${hadoop.log.dir}/namenode-metrics.log
log4j.appender.NNMETRICSRFA.layout=org.apache.log4j.PatternLayout
log4j.appender.NNMETRICSRFA.layout.ConversionPattern=%d{ISO8601} %m%n
log4j.appender.NNMETRICSRFA.MaxBackupIndex=1
log4j.appender.NNMETRICSRFA.MaxFileSize=64MB

#
# mapred audit logging
#
Expand Down
Expand Up @@ -18,6 +18,8 @@
package org.apache.hadoop.metrics2.util;

import java.lang.management.ManagementFactory;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import javax.management.InstanceAlreadyExistsException;
import javax.management.MBeanServer;
Expand All @@ -38,6 +40,13 @@
@InterfaceStability.Stable
public class MBeans {
private static final Log LOG = LogFactory.getLog(MBeans.class);
private static final String DOMAIN_PREFIX = "Hadoop:";
private static final String SERVICE_PREFIX = "service=";
private static final String NAME_PREFIX = "name=";

private static final Pattern MBEAN_NAME_PATTERN = Pattern.compile(
"^" + DOMAIN_PREFIX + SERVICE_PREFIX + "([^,]+)," +
NAME_PREFIX + "(.+)$");

/**
* Register the MBean using our standard MBeanName format
Expand Down Expand Up @@ -72,6 +81,26 @@ static public ObjectName register(String serviceName, String nameName,
return null;
}

public static String getMbeanNameService(final ObjectName objectName) {
Matcher matcher = MBEAN_NAME_PATTERN.matcher(objectName.toString());
if (matcher.matches()) {
return matcher.group(1);
} else {
throw new IllegalArgumentException(
objectName + " is not a valid Hadoop mbean");
}
}

public static String getMbeanNameName(final ObjectName objectName) {
Matcher matcher = MBEAN_NAME_PATTERN.matcher(objectName.toString());
if (matcher.matches()) {
return matcher.group(2);
} else {
throw new IllegalArgumentException(
objectName + " is not a valid Hadoop mbean");
}
}

static public void unregister(ObjectName mbeanName) {
LOG.debug("Unregistering "+ mbeanName);
final MBeanServer mbs = ManagementFactory.getPlatformMBeanServer();
Expand All @@ -88,13 +117,13 @@ static public void unregister(ObjectName mbeanName) {
}

static private ObjectName getMBeanName(String serviceName, String nameName) {
ObjectName name = null;
String nameStr = "Hadoop:service="+ serviceName +",name="+ nameName;
String nameStr = DOMAIN_PREFIX + SERVICE_PREFIX + serviceName + "," +
NAME_PREFIX + nameName;
try {
name = DefaultMetricsSystem.newMBeanName(nameStr);
return DefaultMetricsSystem.newMBeanName(nameStr);
} catch (Exception e) {
LOG.warn("Error creating MBean object name: "+ nameStr, e);
return null;
}
return name;
}
}
2 changes: 2 additions & 0 deletions hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
Expand Up @@ -799,6 +799,8 @@ Release 2.8.0 - UNRELEASED
HDFS-8801. Convert BlockInfoUnderConstruction as a feature.
(Jing Zhao via wheat9)

HDFS-8880. NameNode metrics logging. (Arpit Agarwal)

OPTIMIZATIONS

HDFS-8026. Trace FSOutputSummer#writeChecksumChunks rather than
Expand Down
Expand Up @@ -344,6 +344,10 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
public static final String DFS_NAMENODE_AUDIT_LOG_ASYNC_KEY = "dfs.namenode.audit.log.async";
public static final boolean DFS_NAMENODE_AUDIT_LOG_ASYNC_DEFAULT = false;
public static final String DFS_NAMENODE_AUDIT_LOG_DEBUG_CMDLIST = "dfs.namenode.audit.log.debug.cmdlist";
public static final String DFS_NAMENODE_METRICS_LOGGER_PERIOD_SECONDS_KEY =
"dfs.namenode.metrics.logger.period.seconds";
public static final int DFS_NAMENODE_METRICS_LOGGER_PERIOD_SECONDS_DEFAULT =
600;

public static final String DFS_BALANCER_MOVEDWINWIDTH_KEY = "dfs.balancer.movedWinWidth";
public static final long DFS_BALANCER_MOVEDWINWIDTH_DEFAULT = 5400*1000L;
Expand Down
Expand Up @@ -21,6 +21,9 @@
import com.google.common.base.Joiner;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.commons.logging.impl.Log4JLogger;
import org.apache.hadoop.HadoopIllegalArgumentException;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
Expand Down Expand Up @@ -74,20 +77,34 @@
import org.apache.hadoop.util.JvmPauseMonitor;
import org.apache.hadoop.util.ServicePlugin;
import org.apache.hadoop.util.StringUtils;
import org.apache.log4j.Appender;
import org.apache.log4j.AsyncAppender;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.management.Attribute;
import javax.management.AttributeList;
import javax.management.MBeanAttributeInfo;
import javax.management.MBeanInfo;
import javax.management.MBeanServer;
import javax.management.MalformedObjectNameException;
import javax.management.ObjectName;

import java.io.IOException;
import java.io.PrintStream;
import java.lang.management.ManagementFactory;
import java.net.InetSocketAddress;
import java.net.URI;
import java.security.PrivilegedExceptionAction;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.ScheduledThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;

import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY;
Expand All @@ -113,6 +130,8 @@
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_KERBEROS_INTERNAL_SPNEGO_PRINCIPAL_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_KERBEROS_PRINCIPAL_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_KEYTAB_FILE_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_METRICS_LOGGER_PERIOD_SECONDS_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_METRICS_LOGGER_PERIOD_SECONDS_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_PLUGINS_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY;
Expand Down Expand Up @@ -303,7 +322,10 @@ public long getProtocolVersion(String protocol,
LoggerFactory.getLogger("BlockStateChange");
public static final HAState ACTIVE_STATE = new ActiveState();
public static final HAState STANDBY_STATE = new StandbyState();


public static final Log MetricsLog =
LogFactory.getLog("NameNodeMetricsLog");

protected FSNamesystem namesystem;
protected final Configuration conf;
protected final NamenodeRole role;
Expand All @@ -329,6 +351,8 @@ public long getProtocolVersion(String protocol,
private JvmPauseMonitor pauseMonitor;
private ObjectName nameNodeStatusBeanName;
SpanReceiverHost spanReceiverHost;
ScheduledThreadPoolExecutor metricsLoggerTimer;

/**
* The namenode address that clients will use to access this namenode
* or the name service. For HA configurations using logical URI, it
Expand Down Expand Up @@ -662,6 +686,68 @@ protected void initialize(Configuration conf) throws IOException {
metrics.getJvmMetrics().setPauseMonitor(pauseMonitor);

startCommonServices(conf);
startMetricsLogger(conf);
}

/**
* Start a timer to periodically write NameNode metrics to the log
* file. This behavior can be disabled by configuration.
* @param conf
*/
protected void startMetricsLogger(Configuration conf) {
long metricsLoggerPeriodSec =
conf.getInt(DFS_NAMENODE_METRICS_LOGGER_PERIOD_SECONDS_KEY,
DFS_NAMENODE_METRICS_LOGGER_PERIOD_SECONDS_DEFAULT);

if (metricsLoggerPeriodSec <= 0) {
return;
}

makeMetricsLoggerAsync();

// Schedule the periodic logging.
metricsLoggerTimer = new ScheduledThreadPoolExecutor(1);
metricsLoggerTimer.setExecuteExistingDelayedTasksAfterShutdownPolicy(
false);
metricsLoggerTimer.scheduleWithFixedDelay(new MetricsLoggerTask(),
metricsLoggerPeriodSec,
metricsLoggerPeriodSec,
TimeUnit.SECONDS);
}

/**
* Make the metrics logger async and add all pre-existing appenders
* to the async appender.
*/
private static void makeMetricsLoggerAsync() {
if (!(MetricsLog instanceof Log4JLogger)) {
LOG.warn(
"Metrics logging will not be async since the logger is not log4j");
return;
}
org.apache.log4j.Logger logger = ((Log4JLogger) MetricsLog).getLogger();
logger.setAdditivity(false); // Don't pollute NN logs with metrics dump

@SuppressWarnings("unchecked")
List<Appender> appenders = Collections.list(logger.getAllAppenders());
// failsafe against trying to async it more than once
if (!appenders.isEmpty() && !(appenders.get(0) instanceof AsyncAppender)) {
AsyncAppender asyncAppender = new AsyncAppender();
// change logger to have an async appender containing all the
// previously configured appenders
for (Appender appender : appenders) {
logger.removeAppender(appender);
asyncAppender.addAppender(appender);
}
logger.addAppender(asyncAppender);
}
}

protected void stopMetricsLogger() {
if (metricsLoggerTimer != null) {
metricsLoggerTimer.shutdown();
metricsLoggerTimer = null;
}
}

/**
Expand Down Expand Up @@ -865,6 +951,7 @@ public void stop() {
} catch (ServiceFailedException e) {
LOG.warn("Encountered exception while exiting state ", e);
} finally {
stopMetricsLogger();
stopCommonServices();
if (metrics != null) {
metrics.shutdown();
Expand Down Expand Up @@ -1830,4 +1917,91 @@ void checkHaStateChange(StateChangeRequestInfo req)
break;
}
}

private static class MetricsLoggerTask implements Runnable {
private static final int MAX_LOGGED_VALUE_LEN = 128;
private static ObjectName OBJECT_NAME = null;

static {
try {
OBJECT_NAME = new ObjectName("Hadoop:*");
} catch (MalformedObjectNameException m) {
// This should not occur in practice since we pass
// a valid pattern to the constructor above.
}
}

/**
* Write NameNode metrics to the metrics appender when invoked.
*/
@Override
public void run() {
// Skip querying metrics if there are no known appenders.
if (!MetricsLog.isInfoEnabled() ||
!hasAppenders(MetricsLog) ||
OBJECT_NAME == null) {
return;
}

MetricsLog.info(" >> Begin NameNode metrics dump");
final MBeanServer server = ManagementFactory.getPlatformMBeanServer();

// Iterate over each MBean.
for (final ObjectName mbeanName : server.queryNames(OBJECT_NAME, null)) {
try {
MBeanInfo mBeanInfo = server.getMBeanInfo(mbeanName);
final String mBeanNameName = MBeans.getMbeanNameName(mbeanName);
final Set<String> attributeNames = getFilteredAttributes(mBeanInfo);

final AttributeList attributes =
server.getAttributes(mbeanName,
attributeNames.toArray(new String[attributeNames.size()]));

for (Object o : attributes) {
final Attribute attribute = (Attribute) o;
final Object value = attribute.getValue();
final String valueStr =
(value != null) ? value.toString() : "null";
// Truncate the value if it is too long
MetricsLog.info(mBeanNameName + ":" + attribute.getName() + "=" +
(valueStr.length() < MAX_LOGGED_VALUE_LEN ? valueStr :
valueStr.substring(0, MAX_LOGGED_VALUE_LEN) + "..."));
}
} catch (Exception e) {
MetricsLog.error("Failed to get NameNode metrics for mbean " +
mbeanName.toString(), e);
}
}
MetricsLog.info(" << End NameNode metrics dump");
}

private static boolean hasAppenders(Log logger) {
if (!(logger instanceof Log4JLogger)) {
// Don't bother trying to determine the presence of appenders.
return true;
}
Log4JLogger log4JLogger = ((Log4JLogger) MetricsLog);
return log4JLogger.getLogger().getAllAppenders().hasMoreElements();
}

/**
* Get the list of attributes for the MBean, filtering out a few
* attribute types.
*/
private static Set<String> getFilteredAttributes(
MBeanInfo mBeanInfo) {
Set<String> attributeNames = new HashSet<>();
for (MBeanAttributeInfo attributeInfo : mBeanInfo.getAttributes()) {
if (!attributeInfo.getType().equals(
"javax.management.openmbean.TabularData") &&
!attributeInfo.getType().equals(
"javax.management.openmbean.CompositeData") &&
!attributeInfo.getType().equals(
"[Ljavax.management.openmbean.CompositeData;")) {
attributeNames.add(attributeInfo.getName());
}
}
return attributeNames;
}
}
}
Expand Up @@ -1575,6 +1575,18 @@
</description>
</property>

<property>
<name>dfs.namenode.metrics.logger.period.seconds</name>
<value>600</value>
<description>
This setting controls how frequently the NameNode logs its metrics. The
logging configuration must also define one or more appenders for
NameNodeMetricsLog for the metrics to be logged.
NameNode metrics logging is disabled if this value is set to zero or
less than zero.
</description>
</property>

<property>
<name>dfs.metrics.percentiles.intervals</name>
<value></value>
Expand Down

0 comments on commit a88f31e

Please sign in to comment.