Permalink
Browse files

OfflineImageDecompressor tool

Summary:
This tool is for decompressing compressed image files, so we can check
corruptions. For as long as we can get through the header:
-layout
-namespace id
-numFiles
-genstamp
-txid (optional)

syntactically, we should be able to decompress the rest. Also this tool
is good for fixing image corruptions, better than customizing loading
and saving code.

Test Plan:
manually, tested dfsdev image, decompressed and parsed with
oiv

Reviewers: hkuang, pritam, weiyan

Reviewed By: hkuang
  • Loading branch information...
1 parent 20a5d4a commit 8cd6f78572323fd423cb7946bc25509cba19af9b tomasz committed with Alex Feinberg Oct 4, 2012
View
@@ -72,6 +72,7 @@ if [ $# = 0 ]; then
echo " jmxget get JMX exported values from NameNode or DataNode."
echo " oiv apply the offline fsimage viewer to an fsimage"
echo " oev apply the offline edits viewer to an edits file"
+ echo " oid apply the offline fsimage decompressor to an fsimage"
echo " Use -help to see options"
echo " jobtracker run the MapReduce job Tracker node"
echo " pipes run a Pipes job"
@@ -324,6 +325,9 @@ elif [ "$COMMAND" = "oiv" ] ; then
elif [ "$COMMAND" = "oev" ] ; then
CLASS=org.apache.hadoop.hdfs.tools.offlineEditsViewer.OfflineEditsViewer
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
+elif [ "$COMMAND" = "oid" ] ; then
+ CLASS=org.apache.hadoop.hdfs.tools.offlineImageViewer.OfflineImageDecompressor
+ HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
elif [ "$COMMAND" = "jmxget" ] ; then
CLASS=org.apache.hadoop.hdfs.tools.JMXGet
HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
@@ -463,7 +463,7 @@ long getNumTransactions() {
/**
* Stream wrapper that keeps track of the current stream position.
*/
- static class PositionTrackingInputStream extends FilterInputStream {
+ public static class PositionTrackingInputStream extends FilterInputStream {
private long curPos = 0;
private long markPos = -1;
@@ -31,16 +31,19 @@
import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.apache.hadoop.io.Text;
-import org.mortbay.log.Log;
/**
* Simple container class that handles support for compressed fsimage files.
*/
-class FSImageCompression {
+public class FSImageCompression {
/** Codec to use to save or load image, or null if the image is not compressed */
private CompressionCodec imageCodec;
+ public boolean isNoOpCompression() {
+ return imageCodec == null;
+ }
+
/**
* Create a "noop" compression - i.e. uncompressed
*/
@@ -57,7 +60,7 @@ private FSImageCompression(CompressionCodec codec) {
/**
* Create a "noop" compression - i.e. uncompressed
*/
- static FSImageCompression createNoopCompression() {
+ public static FSImageCompression createNoopCompression() {
return new FSImageCompression();
}
@@ -104,7 +107,7 @@ private static FSImageCompression createCompression(Configuration conf,
* @throws IOException if the specified codec is not available or the
* underlying IO fails.
*/
- static FSImageCompression readCompressionHeader(
+ public static FSImageCompression readCompressionHeader(
Configuration conf,
DataInputStream dis) throws IOException
{
@@ -126,7 +129,7 @@ static FSImageCompression readCompressionHeader(
* @throws IOException If the decompressor cannot be instantiated or an IO
* error occurs.
*/
- DataInputStream unwrapInputStream(InputStream is) throws IOException {
+ public DataInputStream unwrapInputStream(InputStream is) throws IOException {
if (imageCodec != null) {
return new DataInputStream(imageCodec.createInputStream(is));
} else {
@@ -0,0 +1,236 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools.offlineImageViewer;
+
+import java.io.BufferedOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.EOFException;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.OptionBuilder;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.commons.cli.PosixParser;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.protocol.LayoutVersion;
+import org.apache.hadoop.hdfs.protocol.LayoutVersion.Feature;
+import org.apache.hadoop.hdfs.server.namenode.FSEditLogLoader.PositionTrackingInputStream;
+import org.apache.hadoop.hdfs.server.namenode.FSImageCompression;
+
+/**
+ * OfflineImageViewer to dump the contents of an Hadoop image file to XML or the
+ * console. Main entry point into utility, either via the command line or
+ * programatically.
+ */
+public class OfflineImageDecompressor {
+
+ private final static String usage = "Usage: bin/hdfs oid -i INPUTFILE -o OUTPUTFILE\n"
+ + "Offline Image Decompressor\n"
+ + "The oid utility will attempt to decompress image files.\n"
+ + "The tool works offline and does not require a running cluster in\n"
+ + "order to process an image file.\n"
+ + "Required command line arguments:\n"
+ + "-i,--inputFile <arg> FSImage file to process.\n"
+ + "-o,--outputFile <arg> Name of output file. If the specified\n"
+ + " file exists, it will be overwritten.\n";
+
+ private final String inputFile;
+ private final String outputFile;
+ private int lastProgress = 0;
+
+ public OfflineImageDecompressor(String inputFile, String outputFile) {
+ this.inputFile = inputFile;
+ this.outputFile = outputFile;
+ }
+
+ /**
+ * Process image file.
+ */
+ private void go() throws IOException {
+ long start = System.currentTimeMillis();
+ System.out.println("Decompressing image file: " + inputFile + " to "
+ + outputFile);
+ DataInputStream in = null;
+ DataOutputStream out = null;
+
+ try {
+ // setup in
+ PositionTrackingInputStream ptis = new PositionTrackingInputStream(
+ new FileInputStream(new File(inputFile)));
+ in = new DataInputStream(ptis);
+
+ // read header information
+ int imgVersion = in.readInt();
+ if (!LayoutVersion.supports(Feature.FSIMAGE_COMPRESSION, imgVersion)) {
+ System.out
+ .println("Image is not compressed. No output will be produced.");
+ return;
+ }
+ int namespaceId = in.readInt();
+ long numFiles = in.readLong();
+ long genstamp = in.readLong();
+
+ long imgTxId = -1;
+ if (LayoutVersion.supports(Feature.STORED_TXIDS, imgVersion)) {
+ imgTxId = in.readLong();
+ }
+ FSImageCompression compression = FSImageCompression
+ .readCompressionHeader(new Configuration(), in);
+ if (compression.isNoOpCompression()) {
+ System.out
+ .println("Image is not compressed. No output will be produced.");
+ return;
+ }
+ in = compression.unwrapInputStream(in);
+ System.out.println("Starting decompression.");
+
+ // setup output
+ out = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(
+ outputFile)));
+
+ // write back the uncompressed information
+ out.writeInt(imgVersion);
+ out.writeInt(namespaceId);
+ out.writeLong(numFiles);
+ out.writeLong(genstamp);
+ if (LayoutVersion.supports(Feature.STORED_TXIDS, imgVersion)) {
+ out.writeLong(imgTxId);
+ }
+ // no compression
+ out.writeBoolean(false);
+
+ // copy the data
+ long size = new File(inputFile).length();
+ // read in 1MB chunks
+ byte[] block = new byte[1024 * 1024];
+ while (true) {
+ int bytesRead = in.read(block);
+ if (bytesRead <= 0)
+ break;
+ out.write(block, 0, bytesRead);
+ printProgress(ptis.getPos(), size);
+ }
+
+ out.close();
+
+ long stop = System.currentTimeMillis();
+ System.out.println("Input file : " + inputFile + " size: " + size);
+ System.out.println("Output file: " + outputFile + " size: "
+ + new File(outputFile).length());
+ System.out.println("Decompression completed in " + (stop - start)
+ + " ms.");
+ } finally {
+ if (in != null)
+ in.close();
+ if (out != null)
+ out.close();
+ }
+ }
+
+ /**
+ * Print the progress.
+ */
+ private void printProgress(long read, long size) {
+ int progress = Math.min(100, (int) ((100 * read) / size));
+ if (progress > lastProgress) {
+ lastProgress = progress;
+ System.out.println("Completed " + lastProgress + " % ");
+ }
+ }
+
+ /**
+ * Build command-line options and descriptions
+ */
+ public static Options buildOptions() {
+ Options options = new Options();
+
+ // Build in/output file arguments, which are required, but there is no
+ // addOption method that can specify this
+ OptionBuilder.isRequired();
+ OptionBuilder.hasArgs();
+ OptionBuilder.withLongOpt("outputFile");
+ options.addOption(OptionBuilder.create("o"));
+
+ OptionBuilder.isRequired();
+ OptionBuilder.hasArgs();
+ OptionBuilder.withLongOpt("inputFile");
+ options.addOption(OptionBuilder.create("i"));
+
+ options.addOption("h", "help", false, "");
+ return options;
+ }
+
+ /**
+ * Entry point to command-line-driven operation. User may specify options and
+ * start fsimage viewer from the command line. Program will process image file
+ * and exit cleanly or, if an error is encountered, inform user and exit.
+ *
+ * @param args
+ * Command line options
+ * @throws IOException
+ */
+ public static void main(String[] args) throws IOException {
+ Options options = buildOptions();
+ if (args.length == 0) {
+ printUsage();
+ return;
+ }
+
+ CommandLineParser parser = new PosixParser();
+ CommandLine cmd;
+
+ try {
+ cmd = parser.parse(options, args);
+ } catch (ParseException e) {
+ System.out.println("Error parsing command-line options: ");
+ printUsage();
+ return;
+ }
+
+ if (cmd.hasOption("h")) { // print help and exit
+ printUsage();
+ return;
+ }
+
+ String inputFile = cmd.getOptionValue("i");
+ String outputFile = cmd.getOptionValue("o");
+
+ try {
+ OfflineImageDecompressor d = new OfflineImageDecompressor(inputFile,
+ outputFile);
+ d.go();
+ } catch (EOFException e) {
+ System.err.println("Input file ended unexpectedly. Exiting");
+ } catch (IOException e) {
+ System.err.println("Encountered exception. Exiting: " + e.getMessage());
+ }
+ }
+
+ /**
+ * Print application usage instructions.
+ */
+ private static void printUsage() {
+ System.out.println(usage);
+ }
+}

0 comments on commit 8cd6f78

Please sign in to comment.