bigdatagenomics · heuermh · Mar 27, 2017 · Mar 27, 2017
diff --git a/adam-core/src/main/java/org/bdgenomics/adam/io/InterleavedFastqInputFormat.java b/adam-core/src/main/java/org/bdgenomics/adam/io/InterleavedFastqInputFormat.java
@@ -19,8 +19,12 @@
 
 import java.io.EOFException;
 import java.io.IOException;
+import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.compress.CompressionCodec;
+import org.apache.hadoop.io.compress.CompressionCodecFactory;
+import org.apache.hadoop.mapreduce.JobContext;
 import org.apache.hadoop.mapreduce.InputSplit;
 import org.apache.hadoop.mapreduce.RecordReader;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
@@ -43,6 +47,21 @@
  */
 public final class InterleavedFastqInputFormat extends FileInputFormat<Void, Text> {
 
+    /**
+     * For now, we do not support splittable compression codecs. As in, we will
+     * read the compressed data, but we will not allow it to be splittable. We
+     * will fix this in #1457.
+     *
+     * @param context The job context to get the configuration from.
+     * @param filename The path the input file is saved at.
+     * @return Returns false if this file is compressed.
+     */
+    @Override protected boolean isSplitable(JobContext context, Path filename) {
+        Configuration conf = context.getConfiguration();
+        final CompressionCodec codec = new CompressionCodecFactory(context.getConfiguration()).getCodec(filename);
+        return (codec == null);
+    }
+
     /**
      * A record reader for the interleaved FASTQ format.
      *

diff --git a/adam-core/src/main/java/org/bdgenomics/adam/io/SingleFastqInputFormat.java b/adam-core/src/main/java/org/bdgenomics/adam/io/SingleFastqInputFormat.java
@@ -19,8 +19,12 @@
 
 import java.io.EOFException;
 import java.io.IOException;
+import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.compress.CompressionCodec;
+import org.apache.hadoop.io.compress.CompressionCodecFactory;
+import org.apache.hadoop.mapreduce.JobContext;
 import org.apache.hadoop.mapreduce.InputSplit;
 import org.apache.hadoop.mapreduce.RecordReader;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
@@ -35,6 +39,21 @@
  */
 public final class SingleFastqInputFormat extends FileInputFormat<Void, Text> {
 
+    /**
+     * For now, we do not support splittable compression codecs. As in, we will
+     * read the compressed data, but we will not allow it to be splittable. We
+     * will fix this in #1457.
+     *
+     * @param context The job context to get the configuration from.
+     * @param filename The path the input file is saved at.
+     * @return Returns false if this file is compressed.
+     */
+    @Override protected boolean isSplitable(JobContext context, Path filename) {
+        Configuration conf = context.getConfiguration();
+        final CompressionCodec codec = new CompressionCodecFactory(context.getConfiguration()).getCodec(filename);
+        return (codec == null);
+    }
+
     /**
      * A record reader for the standard FASTQ format.
      *