Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Made DeprecatedLzoTextInputFormat a subclass of TextInputFormat, whic…

…h cleans up the code nicely.
  • Loading branch information...
commit 78c21f5ea7003513a3a40b4a85619a8436931865 1 parent dbfd677
Ilya Maykov authored
Showing with 10 additions and 22 deletions.
  1. +10 −22 src/java/com/hadoop/mapred/DeprecatedLzoTextInputFormat.java
View
32 src/java/com/hadoop/mapred/DeprecatedLzoTextInputFormat.java
@@ -31,7 +31,6 @@
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
@@ -54,24 +53,19 @@
* com.hadoop.mapreduce.LzoTextInputFormat. The classes attempt to be alike in
* every other respect.
*
+ * Note that to use this input format properly with hadoop-streaming, you should
+ * also set the property <code>stream.map.input.ignoreKey=true</code>. That will
+ * replicate the behavior of the default TextInputFormat by stripping off the byte
+ * offset keys from the input lines that get piped to the mapper process.
+ *
* See {@link LzoInputFormatCommon} for a description of the boolean property
* <code>lzo.text.input.format.ignore.nonlzo</code> and how it affects the
* behavior of this input format.
*/
@SuppressWarnings("deprecation")
-public class DeprecatedLzoTextInputFormat extends FileInputFormat<LongWritable, Text>
- implements JobConfigurable {
- // We need to call TextInputFormat.isSplitable() but the method is protected, so we
- // make a private subclass that exposes a public wrapper method. /puke.
- private class WrappedTextInputFormat extends TextInputFormat {
- public boolean isSplitableWrapper(FileSystem fs, Path file) {
- return isSplitable(fs, file);
- }
- }
-
+public class DeprecatedLzoTextInputFormat extends TextInputFormat {
private final Map<Path, LzoIndex> indexes = new HashMap<Path, LzoIndex>();
- private final WrappedTextInputFormat textInputFormat = new WrappedTextInputFormat();
@Override
protected FileStatus[] listStatus(JobConf conf) throws IOException {
@@ -108,8 +102,8 @@ protected boolean isSplitable(FileSystem fs, Path filename) {
LzoIndex index = indexes.get(filename);
return !index.isEmpty();
} else {
- // Delegate non-LZO files to TextInputFormat.
- return textInputFormat.isSplitableWrapper(fs, filename);
+ // Delegate non-LZO files to the TextInputFormat base class.
+ return super.isSplitable(fs, filename);
}
}
@@ -152,7 +146,6 @@ protected boolean isSplitable(FileSystem fs, Path filename) {
}
}
- LOG.info("DeprecatedLzoTextInputFormat: returning " + result.size() + " input splits!");
return result.toArray(new FileSplit[result.size()]);
}
@@ -164,13 +157,8 @@ protected boolean isSplitable(FileSystem fs, Path filename) {
reporter.setStatus(split.toString());
return new DeprecatedLzoLineRecordReader(conf, (FileSplit)split);
} else {
- // delegate non-LZO files to TextInputFormat
- return textInputFormat.getRecordReader(split, conf, reporter);
+ // delegate non-LZO files to the TextInputFormat base class.
+ return super.getRecordReader(split, conf, reporter);
}
}
-
- @Override
- public void configure(JobConf conf) {
- textInputFormat.configure(conf);
- }
}
Please sign in to comment.
Something went wrong with that request. Please try again.