Skip to content

Commit

Permalink
FilterReads integration tests.
Browse files Browse the repository at this point in the history
  • Loading branch information
cmnbroad committed Sep 3, 2015
1 parent bb424f1 commit 741a21f
Show file tree
Hide file tree
Showing 9 changed files with 204 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ public String toString() {
@Argument(
doc = "Create .reads files (for debugging purposes)",
optional = true)
public boolean WRITE_READS_FILES = true;
public boolean WRITE_READS_FILES = false;

@Argument(doc = "SAM or BAM file to write read excluded results to",
optional = false, shortName = "O")
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
package org.broadinstitute.hellbender.tools.picard.sam;

import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import htsjdk.samtools.util.CloserUtil;
import htsjdk.samtools.util.IOUtil;
import org.broadinstitute.hellbender.utils.test.BaseTest;
import org.broadinstitute.hellbender.CommandLineProgramTest;
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

/**
* Tests for FilterReads
*/
public final class FilterReadsIntegrationTest extends CommandLineProgramTest {

private static final File TEST_DATA_DIR = new File(getTestDataDir(), "picard/sam/FilterReads/");

@Override
public String getTestedClassName() {
return FilterReads.class.getSimpleName();
}

@DataProvider(name="filterTestsData")
public Object[][] filterTestData() {
return new Object[][]{
{"all_aligned.sam", ".sam", "includeAligned", null, "unsorted", 4},
{"all_aligned.sam", ".sam", "excludeAligned", null, "unsorted", 0},

{"mixed_aligned.sam", ".sam", "includeAligned", null, "unsorted", 8},
{"mixed_aligned.sam", ".sam", "excludeAligned", null, "unsorted", 2},
{"mixed_aligned.sam", ".sam", "includeAligned", null, "coordinate", 8},
{"mixed_aligned.sam", ".sam", "excludeAligned", null, "queryname", 2},
{"mixed_aligned.sam", ".sam", "includeReadList", "readlist.txt", "unsorted", 2},
{"mixed_aligned.sam", ".sam", "excludeReadList", "readlist.txt", "unsorted", 8},

{"unmapped.sam", ".sam", "includeAligned", null, "unsorted", 0},
{"unmapped.sam", ".sam", "excludeAligned", null, "unsorted", 10},
{"unmapped.sam", ".sam", "includeAligned", null, "coordinate", 0},
{"unmapped.sam", ".sam", "excludeAligned", null, "queryname", 10},
{"unmapped.sam", ".sam", "includeReadList", "readlist.txt", "coordinate", 2},
{"unmapped.sam", ".sam", "excludeReadList", "readlist.txt", "queryname", 8},

{"all_aligned.bam", ".bam", "includeAligned", null, "unsorted", 4},
{"all_aligned.bam", ".bam", "excludeAligned", null, "unsorted", 0},

{"mixed_aligned.bam", ".bam", "includeAligned", null, "unsorted", 8},
{"mixed_aligned.bam", ".bam", "excludeAligned", null, "unsorted", 2},
{"mixed_aligned.bam", ".bam", "includeAligned", null, "coordinate", 8},
{"mixed_aligned.bam", ".bam", "excludeAligned", null, "queryname", 2},
{"mixed_aligned.bam", ".bam", "includeReadList", "readlist.txt", "unsorted", 2},
{"mixed_aligned.bam", ".bam", "excludeReadList", "readlist.txt", "unsorted", 8},

{"unmapped.bam", ".bam", "includeAligned", null, "unsorted", 0},
{"unmapped.bam", ".bam", "excludeAligned", null, "unsorted", 10},
{"unmapped.bam", ".bam", "includeAligned", null, "coordinate", 0},
{"unmapped.bam", ".bam", "excludeAligned", null, "queryname", 10},
{"unmapped.bam", ".bam", "includeReadList", "readlist.txt", "coordinate", 2},
{"unmapped.bam", ".bam", "excludeReadList", "readlist.txt", "queryname", 8},
};
}

@Test(dataProvider="filterTestsData")
public void testReadFilter(
final String inputFileName,
final String outputExtension,
final String filter,
final String readListFile,
final String sortOrder,
final int expectedCount) throws Exception {
final List<String> args = new ArrayList<>();

args.add("-" + StandardArgumentDefinitions.INPUT_SHORT_NAME);
args.add(new File(TEST_DATA_DIR, inputFileName).getAbsolutePath());

final String outFileName = BaseTest.createTempFile(inputFileName, outputExtension).getAbsolutePath();
args.add("-"+ StandardArgumentDefinitions.OUTPUT_SHORT_NAME);
args.add(outFileName);

if (null != filter) {
args.add("--FILTER");
args.add(filter);
}
if (null != readListFile) {
String readlistFile = new File(TEST_DATA_DIR, readListFile).getAbsolutePath();
args.add("--READ_LIST_FILE");
args.add(readlistFile);
}

switch (sortOrder) {
case "coordinate": {
args.add("--SORT_ORDER");
args.add("coordinate");
break;
}
case "queryname": {
args.add("--SORT_ORDER");
args.add("queryname");
break;
}
case "unsorted":
default:
break;
}

Assert.assertNull(runCommandLine(args));
Assert.assertEquals(getReadCounts(outFileName), expectedCount);
Assert.assertTrue(validateSortOrder(outFileName, sortOrder));
}

private int getReadCounts(String resultFileName) {
final File path = new File(resultFileName);
IOUtil.assertFileIsReadable(path);
final SamReader in = SamReaderFactory.makeDefault().open(path);
int count = 0;
for (@SuppressWarnings("unused") final SAMRecord rec : in) {
count++;
}
CloserUtil.close(in);
return count;
}

private boolean validateSortOrder(final String resultFileName, final String sortOrderName) throws IOException {
final File path = new File(resultFileName);
IOUtil.assertFileIsReadable(path);

try (final SamReader in = SamReaderFactory.makeDefault().open(path);) {
final SAMFileHeader header = in.getFileHeader();
final SAMFileHeader.SortOrder hdrOrder = header.getSortOrder();
switch (sortOrderName) {
case "coordinate": {
return hdrOrder == SAMFileHeader.SortOrder.coordinate;
}
case "queryname": {
return hdrOrder == SAMFileHeader.SortOrder.queryname;
}
case "unsorted":
default: // don't care what it is
return true;
}
}
}

}
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
@HD VN:1.5 SO:queryname
@SQ SN:chr1 LN:101
@SQ SN:chr2 LN:101
@SQ SN:chr3 LN:101
@SQ SN:chr4 LN:101
@SQ SN:chr5 LN:101
@SQ SN:chr6 LN:101
@SQ SN:chr7 LN:404
@SQ SN:chr8 LN:202
@RG ID:0 SM:Hi,Mom! PL:ILLUMINA
@PG ID:1 PN:Hey! VN:2.0
both_reads_align_clip_marked 83 chr7 1 255 101M = 302 201 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0
both_reads_align_clip_marked 163 chr7 302 255 101M = 1 -201 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA &/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1 RG:Z:0
read_2_too_many_gaps 83 chr7 1 255 101M = 302 201 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0
read_2_too_many_gaps 163 chr7 302 255 10M1D10M5I76M = 1 -201 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA &/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1 RG:Z:0
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
@HD VN:1.5 SO:queryname
@SQ SN:chr1 LN:101
@SQ SN:chr2 LN:101
@SQ SN:chr3 LN:101
@SQ SN:chr4 LN:101
@SQ SN:chr5 LN:101
@SQ SN:chr6 LN:101
@SQ SN:chr7 LN:404
@SQ SN:chr8 LN:202
@RG ID:0 SM:Hi,Mom! PL:ILLUMINA
@PG ID:1 PN:Hey! VN:2.0
both_reads_align_clip_adapter 99 chr7 21 255 101M = 16 96 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0
both_reads_align_clip_adapter 147 chr7 16 255 101M = 21 -96 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0
both_reads_align_clip_adapter 2147 chr7 21 255 101M = 16 96 AAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0
both_reads_align_clip_adapter 2195 chr7 16 255 101M = 21 -96 AAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0
both_reads_align_clip_marked 83 chr7 1 255 101M = 302 201 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0
both_reads_align_clip_marked 163 chr7 302 255 101M = 1 -201 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA &/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1 RG:Z:0
both_reads_present_only_first_aligns 89 chr7 1 255 101M * 0 0 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0
both_reads_present_only_first_aligns 165 * 0 0 * chr7 1 0 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA &/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1 RG:Z:0
read_2_too_many_gaps 83 chr7 1 255 101M = 302 201 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0
read_2_too_many_gaps 163 chr7 302 255 10M1D10M5I76M = 1 -201 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA &/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1 RG:Z:0
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
both_reads_present_only_first_aligns

Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
@HD VN:1.5 SO:queryname
@RG ID:0 SM:Hi,Mom! PL:ILLUMINA
both_reads_align_clip_adapter 77 * 0 0 * * 0 0 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0
both_reads_align_clip_adapter 141 * 0 0 * * 0 0 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA &/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1 RG:Z:0
both_reads_align_clip_marked 77 * 0 0 * * 0 0 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 XT:i:97
both_reads_align_clip_marked 141 * 0 0 * * 0 0 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA &/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1 RG:Z:0 XT:i:97
both_reads_present_only_first_aligns 77 * 0 0 * * 0 0 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0
both_reads_present_only_first_aligns 141 * 0 0 * * 0 0 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA &/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1 RG:Z:0
neither_read_aligns_or_present 77 * 0 0 * * 0 0 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0
neither_read_aligns_or_present 141 * 0 0 * * 0 0 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA &/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1 RG:Z:0
read_2_too_many_gaps 77 * 0 0 * * 0 0 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0
read_2_too_many_gaps 141 * 0 0 * * 0 0 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA &/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1 RG:Z:0

0 comments on commit 741a21f

Please sign in to comment.