Permalink
Browse files

HDFS-1117. Metrics 2.0 HDFS instrumentation. Contributed by Luke Lu.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/hdfs/trunk@1103834 13f79535-47bb-0310-9956-ffa450edef68
  • Loading branch information...
1 parent 102ee5b commit 4979d5e4b73baf56f6fb142e901dfb263a180a88 Suresh Srinivas committed May 16, 2011
Showing with 553 additions and 768 deletions.
  1. +4 −1 CHANGES.txt
  2. +1 −1 build.xml
  3. +27 −0 conf/hadoop-metrics2.properties
  4. +9 −0 src/java/org/apache/hadoop/hdfs/DFSUtil.java
  5. +2 −2 src/java/org/apache/hadoop/hdfs/server/datanode/BlockPoolSliceScanner.java
  6. +2 −2 src/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java
  7. +15 −26 src/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
  8. +19 −21 src/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java
  9. +5 −6 src/java/org/apache/hadoop/hdfs/server/datanode/FSDataset.java
  10. +0 −80 src/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeActivityMBean.java
  11. +128 −101 src/java/org/apache/hadoop/hdfs/server/datanode/metrics/DataNodeMetrics.java
  12. +1 −1 src/java/org/apache/hadoop/hdfs/server/datanode/metrics/FSDatasetMBean.java
  13. +2 −2 src/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java
  14. +3 −3 src/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
  15. +51 −42 src/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
  16. +32 −35 src/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
  17. +5 −2 src/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java
  18. +2 −2 src/java/org/apache/hadoop/hdfs/server/namenode/metrics/FSNamesystemMBean.java
  19. +0 −132 src/java/org/apache/hadoop/hdfs/server/namenode/metrics/FSNamesystemMetrics.java
  20. +0 −69 src/java/org/apache/hadoop/hdfs/server/namenode/metrics/NameNodeActivityMBean.java
  21. +134 −116 src/java/org/apache/hadoop/hdfs/server/namenode/metrics/NameNodeMetrics.java
  22. +3 −3 src/java/org/apache/hadoop/hdfs/tools/JMXGet.java
  23. +3 −0 src/test/hdfs/org/apache/hadoop/hdfs/MiniDFSCluster.java
  24. +2 −4 src/test/hdfs/org/apache/hadoop/hdfs/TestDatanodeReport.java
  25. +4 −0 src/test/hdfs/org/apache/hadoop/hdfs/TestHDFSServerPorts.java
  26. +6 −7 src/test/hdfs/org/apache/hadoop/hdfs/server/datanode/SimulatedFSDataset.java
  27. +2 −1 src/test/hdfs/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMXBean.java
  28. +5 −3 src/test/hdfs/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMetrics.java
  29. +11 −16 src/test/hdfs/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailureReporting.java
  30. +3 −9 src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java
  31. +2 −1 src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/TestNameNodeMXBean.java
  32. +5 −6 src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/metrics/TestNNMetricFilesInGetListingOps.java
  33. +48 −60 src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java
  34. +17 −14 src/test/hdfs/org/apache/hadoop/tools/TestJMXGet.java
View
@@ -286,7 +286,8 @@ Trunk (unreleased changes)
HDFS-1628. Display full path in AccessControlException. (John George
via szetszwo)
- HDFS-1707. Federation: Failure in browsing data on new namenodes. (jitendra)
+ HDFS-1707. Federation: Failure in browsing data on new namenodes.
+ (jitendra)
HDFS-1683. Test Balancer with multiple NameNodes. (szetszwo)
@@ -413,6 +414,8 @@ Trunk (unreleased changes)
HDFS-1899. GenericTestUtils.formatNamenode should be moved to DFSTestUtil
(Ted Yu via todd)
+ HDFS-1117. Metrics 2.0 HDFS instrumentation. (Luke Lu via suresh)
+
OPTIMIZATIONS
HDFS-1458. Improve checkpoint performance by avoiding unnecessary image
View
@@ -92,7 +92,7 @@
<property name="test.junit.fork.mode" value="perTest" />
<property name="test.junit.printsummary" value="yes" />
<property name="test.junit.haltonfailure" value="no" />
- <property name="test.junit.maxmemory" value="512m" />
+ <property name="test.junit.maxmemory" value="1024m" />
<property name="test.conf.dir" value="${build.dir}/test/conf" />
<property name="test.hdfs.build.classes" value="${test.build.dir}/hdfs/classes"/>
@@ -0,0 +1,27 @@
+# syntax: [prefix].[source|sink].[instance].[options]
+# See javadoc of package-info.java for org.apache.hadoop.metrics2 for details
+
+*.sink.file.class=org.apache.hadoop.metrics2.sink.FileSink
+# default sampling period
+*.period=10
+
+# The namenode-metrics.out will contain metrics from all context
+#namenode.sink.file.filename=namenode-metrics.out
+# Specifying a special sampling period for namenode:
+#namenode.sink.*.period=8
+
+#datanode.sink.file.filename=datanode-metrics.out
+
+# the following example split metrics of different
+# context to different sinks (in this case files)
+#jobtracker.sink.file_jvm.context=jvm
+#jobtracker.sink.file_jvm.filename=jobtracker-jvm-metrics.out
+#jobtracker.sink.file_mapred.context=mapred
+#jobtracker.sink.file_mapred.filename=jobtracker-mapred-metrics.out
+
+#tasktracker.sink.file.filename=tasktracker-metrics.out
+
+#maptask.sink.file.filename=maptask-metrics.out
+
+#reducetask.sink.file.filename=reducetask-metrics.out
+
@@ -576,4 +576,13 @@ public static InetSocketAddress getSocketAddress(String address) {
return new InetSocketAddress(address.substring(0, colon),
Integer.parseInt(address.substring(colon + 1)));
}
+
+ /**
+ * Round bytes to GiB (gibibyte)
+ * @param bytes number of bytes
+ * @return number of GiB
+ */
+ public static int roundBytesToGB(long bytes) {
+ return Math.round((float)bytes/ 1024 / 1024 / 1024);
+ }
}
@@ -440,13 +440,13 @@ private void verifyBlock(ExtendedBlock block) {
if (second) {
totalScanErrors++;
- datanode.getMetrics().blockVerificationFailures.inc();
+ datanode.getMetrics().incrBlockVerificationFailures();
handleScanFailure(block);
return;
}
} finally {
IOUtils.closeStream(blockSender);
- datanode.getMetrics().blocksVerified.inc();
+ datanode.getMetrics().incrBlocksVerified();
totalScans++;
}
}
@@ -628,7 +628,7 @@ private int receivePacket(long offsetInBlock, long seqno,
offsetInBlock, lastChunkChecksum
);
- datanode.myMetrics.bytesWritten.inc(len);
+ datanode.metrics.incrBytesWritten(len);
}
} catch (IOException iex) {
datanode.checkDiskError(iex);
@@ -696,7 +696,7 @@ void receiveBlock(
// Finalize the block. Does this fsync()?
datanode.data.finalizeBlock(block);
}
- datanode.myMetrics.blocksWritten.inc();
+ datanode.metrics.incrBlocksWritten();
}
} catch (IOException ioe) {
@@ -29,7 +29,6 @@
import java.io.File;
import java.io.IOException;
import java.io.OutputStream;
-import java.lang.management.ManagementFactory;
import java.net.InetSocketAddress;
import java.net.ServerSocket;
import java.net.Socket;
@@ -54,9 +53,6 @@
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
-import javax.management.MBeanServer;
-import javax.management.ObjectName;
-
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
@@ -118,6 +114,8 @@
import org.apache.hadoop.ipc.RPC;
import org.apache.hadoop.ipc.RemoteException;
import org.apache.hadoop.ipc.Server;
+import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
+import org.apache.hadoop.metrics2.util.MBeans;
import org.apache.hadoop.net.DNS;
import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.security.SecurityUtil;
@@ -353,7 +351,7 @@ void refreshNamenodes(Configuration conf)
long heartBeatInterval;
private DataStorage storage = null;
private HttpServer infoServer = null;
- DataNodeMetrics myMetrics;
+ DataNodeMetrics metrics;
private InetSocketAddress selfAddr;
private static volatile DataNode datanodeObject = null;
@@ -925,7 +923,7 @@ DatanodeCommand blockReport() throws IOException {
cmd = bpNamenode.blockReport(bpRegistration, blockPoolId, bReport
.getBlockListAsLongs());
long brTime = now() - brStartTime;
- myMetrics.blockReports.inc(brTime);
+ metrics.addBlockReport(brTime);
LOG.info("BlockReport of " + bReport.getNumberOfBlocks() +
" blocks got processed in " + brTime + " msecs");
//
@@ -1036,7 +1034,7 @@ private void offerService() throws Exception {
//
lastHeartbeat = startTime;
DatanodeCommand[] cmds = sendHeartBeat();
- myMetrics.heartbeats.inc(now() - startTime);
+ metrics.addHeartbeat(now() - startTime);
if (!processCommand(cmds))
continue;
}
@@ -1258,7 +1256,7 @@ private boolean processCommand(DatanodeCommand cmd) throws IOException {
case DatanodeProtocol.DNA_TRANSFER:
// Send a copy of a block to another datanode
transferBlocks(bcmd.getBlockPoolId(), bcmd.getBlocks(), bcmd.getTargets());
- myMetrics.blocksReplicated.inc(bcmd.getBlocks().length);
+ metrics.incrBlocksReplicated(bcmd.getBlocks().length);
break;
case DatanodeProtocol.DNA_INVALIDATE:
//
@@ -1276,7 +1274,7 @@ private boolean processCommand(DatanodeCommand cmd) throws IOException {
checkDiskError();
throw e;
}
- myMetrics.blocksRemoved.inc(toDelete.length);
+ metrics.incrBlocksRemoved(toDelete.length);
break;
case DatanodeProtocol.DNA_SHUTDOWN:
// shut down the data node
@@ -1377,7 +1375,7 @@ void startDataNode(Configuration conf,
this.blockPoolTokenSecretManager = new BlockPoolTokenSecretManager();
initIpcServer(conf);
- myMetrics = new DataNodeMetrics(conf, getMachineName());
+ metrics = DataNodeMetrics.create(conf, getMachineName());
blockPoolManager = new BlockPoolManager(conf);
}
@@ -1427,17 +1425,7 @@ public static InetSocketAddress getInfoAddr(Configuration conf) {
}
private void registerMXBean() {
- // register MXBean
- MBeanServer mbs = ManagementFactory.getPlatformMBeanServer();
- try {
- ObjectName mxbeanName = new ObjectName("HadoopInfo:type=DataNodeInfo");
- mbs.registerMBean(this, mxbeanName);
- } catch ( javax.management.InstanceAlreadyExistsException iaee ) {
- // in unit tests, we may have multiple datanodes in the same JVM
- LOG.info("DataNode MXBean already registered");
- } catch ( javax.management.JMException e ) {
- LOG.warn("Failed to register DataNode MXBean", e);
- }
+ MBeans.register("DataNode", "DataNodeInfo", this);
}
int getPort() {
@@ -1551,7 +1539,7 @@ public InetSocketAddress getSelfAddr() {
}
DataNodeMetrics getMetrics() {
- return myMetrics;
+ return metrics;
}
public static void setNewStorageID(DatanodeID dnId) {
@@ -1668,8 +1656,8 @@ public void shutdown() {
if (data != null) {
data.shutdown();
}
- if (myMetrics != null) {
- myMetrics.shutdown();
+ if (metrics != null) {
+ metrics.shutdown();
}
}
@@ -1709,7 +1697,7 @@ private void handleDiskError(String errMsgr) {
// shutdown the DN completely.
int dpError = hasEnoughResources ? DatanodeProtocol.DISK_ERROR
: DatanodeProtocol.FATAL_DISK_ERROR;
- myMetrics.volumeFailures.inc(1);
+ metrics.incrVolumeFailures();
//inform NameNodes
for(BPOfferService bpos: blockPoolManager.getAllNamenodeThreads()) {
@@ -2003,7 +1991,7 @@ public void run() {
* @param delHint
*/
void closeBlock(ExtendedBlock block, String delHint) {
- myMetrics.blocksWritten.inc();
+ metrics.incrBlocksWritten();
BPOfferService bpos = blockPoolManager.get(block.getBlockPoolId());
if(bpos != null) {
bpos.notifyNamenodeReceivedBlock(block, delHint);
@@ -2138,6 +2126,7 @@ static DataNode makeInstance(Collection<URI> dataDirs, Configuration conf,
conf.get(DFS_DATANODE_DATA_DIR_PERMISSION_KEY,
DFS_DATANODE_DATA_DIR_PERMISSION_DEFAULT));
ArrayList<File> dirs = getDataDirsFromURIs(dataDirs, localFS, permission);
+ DefaultMetricsSystem.initialize("DataNode");
assert dirs.size() > 0 : "number of data directories should be > 0";
return new DataNode(conf, dirs, resources);
@@ -48,8 +48,8 @@
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.MD5Hash;
import org.apache.hadoop.io.Text;
-import org.apache.hadoop.metrics.util.MetricsTimeVaryingInt;
-import org.apache.hadoop.metrics.util.MetricsTimeVaryingRate;
+import org.apache.hadoop.metrics2.lib.MutableCounterLong;
+import org.apache.hadoop.metrics2.lib.MutableRate;
import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.security.token.SecretManager.InvalidToken;
import org.apache.hadoop.security.token.Token;
@@ -183,11 +183,11 @@ protected void opReadBlock(DataInputStream in, ExtendedBlock block,
SUCCESS.write(out); // send op status
long read = blockSender.sendBlock(out, baseStream, null); // send data
- datanode.myMetrics.bytesRead.inc((int) read);
- datanode.myMetrics.blocksRead.inc();
+ datanode.metrics.incrBytesRead((int) read);
+ datanode.metrics.incrBlocksRead();
} catch ( SocketException ignored ) {
// Its ok for remote side to close the connection anytime.
- datanode.myMetrics.blocksRead.inc();
+ datanode.metrics.incrBlocksRead();
} catch ( IOException ioe ) {
/* What exactly should we do here?
* Earlier version shutdown() datanode if there is disk error.
@@ -203,9 +203,8 @@ protected void opReadBlock(DataInputStream in, ExtendedBlock block,
}
//update metrics
- updateDuration(datanode.myMetrics.readBlockOp);
- updateCounter(datanode.myMetrics.readsFromLocalClient,
- datanode.myMetrics.readsFromRemoteClient);
+ datanode.metrics.addReadBlockOp(elapsed());
+ datanode.metrics.incrReadsFromClient(isLocal);
}
/**
@@ -409,9 +408,8 @@ protected void opWriteBlock(final DataInputStream in, final ExtendedBlock block,
}
//update metrics
- updateDuration(datanode.myMetrics.writeBlockOp);
- updateCounter(datanode.myMetrics.writesFromLocalClient,
- datanode.myMetrics.writesFromRemoteClient);
+ datanode.metrics.addWriteBlockOp(elapsed());
+ datanode.metrics.incrWritesFromClient(isLocal);
}
@Override
@@ -482,7 +480,7 @@ protected void opBlockChecksum(DataInputStream in, ExtendedBlock block,
}
//update metrics
- updateDuration(datanode.myMetrics.blockChecksumOp);
+ datanode.metrics.addBlockChecksumOp(elapsed());
}
/**
@@ -535,8 +533,8 @@ protected void opCopyBlock(DataInputStream in, ExtendedBlock block,
long read = blockSender.sendBlock(reply, baseStream,
dataXceiverServer.balanceThrottler);
- datanode.myMetrics.bytesRead.inc((int) read);
- datanode.myMetrics.blocksRead.inc();
+ datanode.metrics.incrBytesRead((int) read);
+ datanode.metrics.incrBlocksRead();
LOG.info("Copied block " + block + " to " + s.getRemoteSocketAddress());
} catch (IOException ioe) {
@@ -556,7 +554,7 @@ protected void opCopyBlock(DataInputStream in, ExtendedBlock block,
}
//update metrics
- updateDuration(datanode.myMetrics.copyBlockOp);
+ datanode.metrics.addCopyBlockOp(elapsed());
}
/**
@@ -670,16 +668,16 @@ protected void opReplaceBlock(DataInputStream in,
}
//update metrics
- updateDuration(datanode.myMetrics.replaceBlockOp);
+ datanode.metrics.addReplaceBlockOp(elapsed());
}
- private void updateDuration(MetricsTimeVaryingRate mtvr) {
- mtvr.inc(now() - opStartTime);
+ private long elapsed() {
+ return now() - opStartTime;
}
- private void updateCounter(MetricsTimeVaryingInt localCounter,
- MetricsTimeVaryingInt remoteCounter) {
- (isLocal? localCounter: remoteCounter).inc();
+ private void updateCounter(MutableCounterLong localCounter,
+ MutableCounterLong remoteCounter) {
+ (isLocal? localCounter: remoteCounter).incr();
}
/**
@@ -59,7 +59,7 @@
import org.apache.hadoop.hdfs.server.protocol.InterDatanodeProtocol;
import org.apache.hadoop.hdfs.server.protocol.ReplicaRecoveryInfo;
import org.apache.hadoop.hdfs.server.protocol.BlockRecoveryCommand.RecoveringBlock;
-import org.apache.hadoop.metrics.util.MBeanUtil;
+import org.apache.hadoop.metrics2.util.MBeans;
import org.apache.hadoop.util.DataChecksum;
import org.apache.hadoop.util.DiskChecker;
import org.apache.hadoop.util.StringUtils;
@@ -2186,18 +2186,17 @@ void registerMBean(final String storageId) {
}
try {
bean = new StandardMBean(this,FSDatasetMBean.class);
- mbeanName = MBeanUtil.registerMBean("DataNode", "FSDatasetState-" + storageName, bean);
+ mbeanName = MBeans.register("DataNode", "FSDatasetState-" + storageName, bean);
} catch (NotCompliantMBeanException e) {
- e.printStackTrace();
+ DataNode.LOG.warn("Error registering FSDatasetState MBean", e);
}
-
- DataNode.LOG.info("Registered FSDatasetStatusMBean");
+ DataNode.LOG.info("Registered FSDatasetState MBean");
}
@Override // FSDatasetInterface
public void shutdown() {
if (mbeanName != null)
- MBeanUtil.unregisterMBean(mbeanName);
+ MBeans.unregister(mbeanName);
if (asyncDiskService != null) {
asyncDiskService.shutdown();
Oops, something went wrong.

0 comments on commit 4979d5e

Please sign in to comment.