Permalink
Browse files

Merge pull request #2 from brianmartin/master

HDFSFileFInder
  • Loading branch information...
2 parents 4e3e00b + af0514c commit 662179b713538c5974bd7e90e2d9e2f327e15d57 @jwills jwills committed Jun 28, 2012
View
@@ -2,3 +2,4 @@
.project
.settings
target
+build
View
@@ -47,6 +47,13 @@
<artifactId>guava</artifactId>
<version>${guava.version}</version>
</dependency>
+
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-minicluster</artifactId>
+ <version>${hadoop.version}</version>
+ <scope>test</scope>
+ </dependency>
</dependencies>
<build>
@@ -0,0 +1,79 @@
+/**
+ * Copyright (c) 2012, Cloudera, Inc. All Rights Reserved.
+ *
+ * Cloudera, Inc. licenses this file to you under the Apache License,
+ * Version 2.0 (the "License"). You may not use this file except in
+ * compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+ * CONDITIONS OF ANY KIND, either express or implied. See the License for
+ * the specific language governing permissions and limitations under the
+ * License.
+ */
+package com.cloudera.kitten.appmaster.util;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.BlockLocation;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+import com.google.common.collect.Maps;
+
+public class HDFSFileFinder extends Configured implements Tool {
+
+ private static Log LOG = LogFactory.getLog(HDFSFileFinder.class);
+
+ public Map<String,Long> getNumBytesOfGlobHeldByDatanodes(Path p) throws IOException {
+ return getNumBytesOfGlobHeldByDatanodes(p, getConf());
+ }
+
+ public static Map<String,Long> getNumBytesOfGlobHeldByDatanodes(Path p, Configuration conf) throws IOException {
+ FileSystem fs = p.getFileSystem(conf);
+
+ HashMap<String,Long> bytesHeld = Maps.newHashMap();
+ for (FileStatus f : fs.globStatus(p)) {
+ BlockLocation[] bls = fs.getFileBlockLocations(p, 0, f.getLen());
+ if (bls.length > 0) {
+ for (BlockLocation bl : bls) {
+ long l = bl.getLength();
+ for (String name : bl.getNames()) {
+ if (bytesHeld.containsKey(name))
+ bytesHeld.put(name, bytesHeld.get(name) + l);
+ else
+ bytesHeld.put(name, l);
+ }
+ }
+ }
+ }
+
+ return bytesHeld;
+ }
+
+ @Override
+ public int run(String[] args) throws Exception {
+ Configuration conf = getConf();
+ for (String a : args) {
+ Path p = new Path(a);
+ Map<String, Long> bytesHeld = getNumBytesOfGlobHeldByDatanodes(p, conf);
+ for (String node : bytesHeld.keySet())
+ LOG.info(node + " : " + bytesHeld.get(node) + "b");
+ }
+ return 0;
+ }
+
+ public static void main(String[] args) throws Exception {
+ ToolRunner.run(new Configuration(), new HDFSFileFinder(), args);
+ }
+}
@@ -0,0 +1,76 @@
+package com.cloudera.kitten.appmaster.util;
+
+import static org.junit.Assert.assertTrue;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.DistributedFileSystem;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class HDFSFileFinderTest {
+
+ private static final Log LOG = LogFactory.getLog(HDFSFileFinderTest.class);
+
+ protected static MiniDFSCluster cluster = null;
+ protected static DistributedFileSystem fs = null;
+ protected static Configuration conf = new Configuration();
+ protected static int numDataNodes = 5;
+ protected static int replicationFactor = 3;
+ protected static long blockSize = 8; // should be power of 2
+
+ @BeforeClass
+ public static void setup() throws InterruptedException, IOException {
+ LOG.info("Starting up MR cluster");
+ if (cluster == null) {
+ conf.set("dfs.namenode.replication.min", "" + replicationFactor);
+ conf.set("dfs.block.size", "" + blockSize);
+ conf.set("io.bytes.per.checksum", "" + 4);
+ cluster = new MiniDFSCluster
+ .Builder(conf)
+ .numDataNodes(numDataNodes)
+ .build();
+ fs = cluster.getFileSystem();
+ }
+ }
+
+ @AfterClass
+ public static void tearDown() throws IOException {
+ if (cluster != null) {
+ cluster.shutdown();
+ cluster = null;
+ }
+ }
+
+ @Test
+ public void testFileFinder() throws Exception {
+ // make a file
+ File tmpFile = File.createTempFile("test-file", ".txt");
+ // add some data
+ PrintWriter pw = new PrintWriter(tmpFile); pw.println("test file data"); pw.flush(); pw.close();
+ tmpFile.deleteOnExit();
+
+ // copy to hdfs
+ Path src = new Path(tmpFile.getAbsolutePath());
+ Path dst = new Path("test-file.txt");
+ fs.copyFromLocalFile(src, dst);
+
+ // get the hosts
+ Map<String, Long> bytesHeld = HDFSFileFinder.getNumBytesOfGlobHeldByDatanodes(dst, conf);
+
+ for (String node : bytesHeld.keySet())
+ LOG.info(node + " : " + bytesHeld.get(node));
+
+ assertTrue(replicationFactor <= bytesHeld.keySet().size());
+
+ }
+}

0 comments on commit 662179b

Please sign in to comment.