diff --git a/adam-core/src/main/java/org/bdgenomics/adam/io/InterleavedFastqInputFormat.java b/adam-core/src/main/java/org/bdgenomics/adam/io/InterleavedFastqInputFormat.java index 14d12521f9..24b7bef32f 100755 --- a/adam-core/src/main/java/org/bdgenomics/adam/io/InterleavedFastqInputFormat.java +++ b/adam-core/src/main/java/org/bdgenomics/adam/io/InterleavedFastqInputFormat.java @@ -19,8 +19,12 @@ import java.io.EOFException; import java.io.IOException; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.compress.CompressionCodec; +import org.apache.hadoop.io.compress.CompressionCodecFactory; +import org.apache.hadoop.mapreduce.JobContext; import org.apache.hadoop.mapreduce.InputSplit; import org.apache.hadoop.mapreduce.RecordReader; import org.apache.hadoop.mapreduce.TaskAttemptContext; @@ -43,6 +47,21 @@ */ public final class InterleavedFastqInputFormat extends FileInputFormat { + /** + * For now, we do not support splittable compression codecs. As in, we will + * read the compressed data, but we will not allow it to be splittable. We + * will fix this in #1457. + * + * @param context The job context to get the configuration from. + * @param filename The path the input file is saved at. + * @return Returns false if this file is compressed. + */ + @Override protected boolean isSplitable(JobContext context, Path filename) { + Configuration conf = context.getConfiguration(); + final CompressionCodec codec = new CompressionCodecFactory(context.getConfiguration()).getCodec(filename); + return (codec == null); + } + /** * A record reader for the interleaved FASTQ format. * diff --git a/adam-core/src/main/java/org/bdgenomics/adam/io/SingleFastqInputFormat.java b/adam-core/src/main/java/org/bdgenomics/adam/io/SingleFastqInputFormat.java index e933c1ac89..1c39c385ce 100644 --- a/adam-core/src/main/java/org/bdgenomics/adam/io/SingleFastqInputFormat.java +++ b/adam-core/src/main/java/org/bdgenomics/adam/io/SingleFastqInputFormat.java @@ -19,8 +19,12 @@ import java.io.EOFException; import java.io.IOException; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.compress.CompressionCodec; +import org.apache.hadoop.io.compress.CompressionCodecFactory; +import org.apache.hadoop.mapreduce.JobContext; import org.apache.hadoop.mapreduce.InputSplit; import org.apache.hadoop.mapreduce.RecordReader; import org.apache.hadoop.mapreduce.TaskAttemptContext; @@ -35,6 +39,21 @@ */ public final class SingleFastqInputFormat extends FileInputFormat { + /** + * For now, we do not support splittable compression codecs. As in, we will + * read the compressed data, but we will not allow it to be splittable. We + * will fix this in #1457. + * + * @param context The job context to get the configuration from. + * @param filename The path the input file is saved at. + * @return Returns false if this file is compressed. + */ + @Override protected boolean isSplitable(JobContext context, Path filename) { + Configuration conf = context.getConfiguration(); + final CompressionCodec codec = new CompressionCodecFactory(context.getConfiguration()).getCodec(filename); + return (codec == null); + } + /** * A record reader for the standard FASTQ format. *