Skip to content

Commit

Permalink
Add tests back to BaseRecalibratorDataflowIntegrationTest that run co…
Browse files Browse the repository at this point in the history
…mpletely

locally using a local reference.
  • Loading branch information
tomwhite committed Sep 9, 2015
1 parent 102127b commit 7de84ad
Showing 1 changed file with 56 additions and 23 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -40,28 +40,27 @@ public static void main(String[] args) throws Exception {
}

private static class BQSRTest {
final String referenceSetID;
final String reference;
final String bam;
final String knownSites;
final String args;
final String expectedFileName;

private BQSRTest(String referenceSetID, String bam, String knownSites, String args, String expectedFileName) {
this.referenceSetID = referenceSetID;
private BQSRTest(String reference, String bam, String knownSites, String args, String expectedFileName) {
this.reference = reference;
this.bam = bam;
this.knownSites = knownSites;
this.args = args;
this.expectedFileName = expectedFileName;
}

public String getCommandLine() {
return " -R " + RefAPISource.URL_PREFIX + referenceSetID +
return " -R " + reference +
" -I " + bam +
" " + args +
(knownSites.isEmpty() ? "": " -BQSRKnownVariants " + knownSites) +
" --RECAL_TABLE_FILE %s" +
" -sortAllCols" +
" --apiKey " + getDataflowTestApiKey();

This comment has been minimized.

Copy link
@lbergelson

lbergelson Sep 9, 2015

Member

So I'd added this here instead of putting it in the individual tests as an easy way to hide the apiKey from appearing in the list of args. When we call spec.executeTest("testBQSR-" + params.args, this); it logs the args as the test name. This is good, except we don't want to put our key in our public logs. If some tests can't handle the key, we need to change the parameter to executeTest so we're not dumping the key into our public logs. (and we need to revoke our current key and replace it.)

Could you update so it doesn't leak the key? Maybe add an includeKey parameter to BQSRTest that toggles including it in the command line. Or use whatever solution you think is most elegant.

" -sortAllCols";
}

@Override
Expand All @@ -85,6 +84,11 @@ public void runAllTests() throws Exception {
BQSRTest testCase = (BQSRTest)in[0];
testBQSRLocal(testCase);
}
System.out.println("BQSRTestRefCloud");
for(Object[] in : createBQSRTestDataRefCloud()) {
BQSRTest testCase = (BQSRTest)in[0];
testBQSRRefCloud(testCase);
}
System.out.println("testBQSRFailWithoutDBSNP");
testBQSRFailWithoutDBSNP();
System.out.println("testBQSRFailWithIncompatibleReference");
Expand All @@ -102,14 +106,32 @@ public void runAllTests() throws Exception {
System.out.println("Tests passed.");
}


@DataProvider(name = "BQSRTest")
@DataProvider(name = "BQSRTest") // same as BaseRecalibratorIntegrationTest
public Object[][] createBQSRTestData() {
final String hg18Reference = publicTestDir + "human_g1k_v37.chr17_1Mb.fasta";
final String HiSeqBam = getResourceDir() + "NA12878.chr17_69k_70k.dictFix.bam";
final String dbSNPb37 = getResourceDir() + "dbsnp_132.b37.excluding_sites_after_129.chr17_69k_70k.vcf";

final String moreSites = getResourceDir() + "bqsr.fakeSitesForTesting.b37.chr17.vcf"; //for testing 2 input files

return new Object[][]{
{new BQSRTest(hg18Reference, HiSeqBam, dbSNPb37, "", getResourceDir() + "expected.NA12878.chr17_69k_70k.txt")},
// TODO: fix this (expected and actual output differ in a few places)
//{new BQSRTest(hg18Reference, HiSeqBam, dbSNPb37, "-knownSites " + moreSites, getResourceDir() + "expected.NA12878.chr17_69k_70k.2inputs.txt")},
{new BQSRTest(hg18Reference, HiSeqBam, dbSNPb37, "--indels_context_size 4", getResourceDir() + "expected.NA12878.chr17_69k_70k.indels_context_size4.txt")},
{new BQSRTest(hg18Reference, HiSeqBam, dbSNPb37, "--low_quality_tail 5", getResourceDir() + "expected.NA12878.chr17_69k_70k.low_quality_tail5.txt")},
{new BQSRTest(hg18Reference, HiSeqBam, dbSNPb37, "--quantizing_levels 6", getResourceDir() + "expected.NA12878.chr17_69k_70k.quantizing_levels6.txt")},
{new BQSRTest(hg18Reference, HiSeqBam, dbSNPb37, "--mismatches_context_size 4", getResourceDir() + "expected.NA12878.chr17_69k_70k.mismatches_context_size4.txt")},
//{new BQSRTest(b36Reference, origQualsBam, dbSNPb36, "-OQ", getResourceDir() + "expected.originalQuals.1kg.chr1.1-1K.1RG.dictFix.OQ.txt")},
};
}

@DataProvider(name = "BQSRTestRefCloud")
public Object[][] createBQSRTestDataRefCloud() {
// we need an API key to get to the reference
final String apiArgs = " --project " + getDataflowTestProject() + " ";
final String apiArgs = "--apiKey " + getDataflowTestApiKey() + " --project " + getDataflowTestProject() + " ";
final String localResources = getResourceDir();
final String hg19Ref = "EMWV_ZfLxrDY-wE";
final String GRCh37Ref = RefAPISource.GRCH37_REF_ID;
final String GRCh37Ref = RefAPISource.URL_PREFIX + RefAPISource.GRCH37_REF_ID;
final String HiSeqBam = localResources + "CEUTrio.HiSeq.WGS.b37.ch20.1m-1m1k.NA12878.bam";
final String dbSNPb37 = getResourceDir() + "dbsnp_132.b37.excluding_sites_after_129.chr17_69k_70k.vcf";
final String moreSites = getResourceDir() + "bqsr.fakeSitesForTesting.b37.chr17.vcf"; //for testing 2 input files
Expand All @@ -128,8 +150,8 @@ public Object[][] createBQSRTestData() {

@DataProvider(name = "BQSRTestBucket")
public Object[][] createBQSRTestDataBucket() {
final String apiArgs = " --project " + getDataflowTestProject();
final String GRCh37Ref = RefAPISource.GRCH37_REF_ID;
final String apiArgs = "--apiKey " + getDataflowTestApiKey() + " --project " + getDataflowTestProject();
final String GRCh37Ref = RefAPISource.URL_PREFIX + RefAPISource.GRCH37_REF_ID;
final String localResources = getResourceDir();
final String HiSeqBamCloud = getCloudInputs() + "CEUTrio.HiSeq.WGS.b37.ch20.1m-1m1k.NA12878.bam";
final String dbSNPb37 = getResourceDir() + "dbsnp_132.b37.excluding_sites_after_129.chr17_69k_70k.vcf";
Expand All @@ -142,8 +164,8 @@ public Object[][] createBQSRTestDataBucket() {

@DataProvider(name = "BQSRTestCloud")
public Object[][] createBQSRTestDataCloud() {
final String cloudArgs = "--runner BLOCKING --project " + getDataflowTestProject() + " --staging " + getDataflowTestStaging();
final String GRCh37Ref = RefAPISource.GRCH37_REF_ID;
final String cloudArgs = "--runner BLOCKING --apiKey " + getDataflowTestApiKey() + " --project " + getDataflowTestProject() + " --staging " + getDataflowTestStaging();
final String GRCh37Ref = RefAPISource.URL_PREFIX + RefAPISource.GRCH37_REF_ID;
final String localResources = getResourceDir();
final String HiSeqBamCloud = getCloudInputs() + "CEUTrio.HiSeq.WGS.b37.ch20.1m-1m1k.NA12878.bam";
final String dbSNPb37 = getResourceDir() + "dbsnp_132.b37.excluding_sites_after_129.chr17_69k_70k.vcf";
Expand All @@ -154,8 +176,7 @@ public Object[][] createBQSRTestDataCloud() {
};
}

// "local", but we're still getting the reference from the cloud.
@Test(dataProvider = "BQSRTest", groups = {"cloud"})
@Test(dataProvider = "BQSRTest")
public void testBQSRLocal(BQSRTest params) throws IOException {
ArgumentsBuilder ab = new ArgumentsBuilder().add(params.getCommandLine());
addDataflowRunnerArgs(ab);
Expand All @@ -165,6 +186,15 @@ public void testBQSRLocal(BQSRTest params) throws IOException {
spec.executeTest("testBQSR-" + params.args, this);
}

@Test(dataProvider = "BQSRTestRefCloud", groups = {"cloud"})
public void testBQSRRefCloud(BQSRTest params) throws IOException {
ArgumentsBuilder ab = new ArgumentsBuilder().add(params.getCommandLine());
addDataflowRunnerArgs(ab);
IntegrationTestSpec spec = new IntegrationTestSpec(
ab.getString(),
Arrays.asList(params.expectedFileName));
spec.executeTest("testBQSR-" + params.args, this);
}

@Test(dataProvider = "BQSRTestBucket", groups = {"bucket"})
public void testBQSRBucket(BQSRTest params) throws IOException {
Expand All @@ -187,6 +217,7 @@ public void testBQSRCloud(BQSRTest params) throws IOException {
// TODO: enable this once we figure out how to read bams without requiring them to be indexed.
@Test(description = "This is to test https://github.com/broadinstitute/hellbender/issues/322", groups = {"cloud"}, enabled = false)
public void testPlottingWorkflow() throws IOException {
final String cloudArgs = "--apiKey " + getDataflowTestApiKey() + " ";
final String resourceDir = getTestDataDir() + "/" + "BQSR" + "/";
final String GRCh37Ref = RefAPISource.GRCH37_REF_ID; // that's the "full" version
final String dbSNPb37 = getResourceDir() + "dbsnp_132.b37.excluding_sites_after_129.chr17_69k_70k.vcf";
Expand All @@ -195,15 +226,17 @@ public void testPlottingWorkflow() throws IOException {
final File actualHiSeqBam_recalibrated = createTempFile("actual.NA12878.chr17_69k_70k.dictFix.recalibrated", ".bam");

final String tablePre = createTempFile("gatk4.pre.cols", ".table").getAbsolutePath();
final String argPre = " -apiref " + RefAPISource.URL_PREFIX + GRCh37Ref + " -BQSRKnownVariants " + dbSNPb37 + " -I " + HiSeqBam
final String argPre = cloudArgs
+ " -apiref " + RefAPISource.URL_PREFIX + GRCh37Ref + " -BQSRKnownVariants " + dbSNPb37 + " -I " + HiSeqBam
+ " -RECAL_TABLE_FILE " + tablePre + " --sort_by_all_columns true";
new BaseRecalibratorDataflow().instanceMain(Utils.escapeExpressions(argPre));

final String argApply = "-I " + HiSeqBam + " --bqsr_recal_file " + tablePre+ " -O " + actualHiSeqBam_recalibrated.getAbsolutePath();
new ApplyBQSR().instanceMain(Utils.escapeExpressions(argApply));

final File actualTablePost = createTempFile("gatk4.post.cols", ".table");
final String argsPost = " -apiref " + RefAPISource.URL_PREFIX + GRCh37Ref + " -BQSRKnownVariants " + dbSNPb37 + " -I " + actualHiSeqBam_recalibrated.getAbsolutePath()
final String argsPost = cloudArgs
+ " -apiref " + RefAPISource.URL_PREFIX + GRCh37Ref + " -BQSRKnownVariants " + dbSNPb37 + " -I " + actualHiSeqBam_recalibrated.getAbsolutePath()
+ " -RECAL_TABLE_FILE " + actualTablePost.getAbsolutePath() + " --sort_by_all_columns true";
// currently fails with:
// org.broadinstitute.hellbender.exceptions.UserException: A USER ERROR has occurred: Traversal by intervals was requested but some input files are not indexed.
Expand All @@ -223,10 +256,10 @@ public void testPlottingWorkflow() throws IOException {
@Test(groups = {"cloud"})
public void testBQSRFailWithoutDBSNP() throws IOException {
final String resourceDir = getTestDataDir() + "/" + "BQSR" + "/";
final String apiArgs = " --project " + getDataflowTestProject() + " ";
final String apiArgs = "--apiKey " + getDataflowTestApiKey() + " --project " + getDataflowTestProject() + " ";
final String localResources = getResourceDir();

final String GRCh37Ref = RefAPISource.GRCH37_REF_ID; // that's the "full" version
final String GRCh37Ref = RefAPISource.URL_PREFIX + RefAPISource.GRCH37_REF_ID; // that's the "full" version
final String HiSeqBam = resourceDir + "NA12878.chr17_69k_70k.dictFix.bam";

final String NO_DBSNP = "";
Expand All @@ -241,10 +274,10 @@ public void testBQSRFailWithoutDBSNP() throws IOException {
@Test(groups = {"cloud"})
public void testBQSRFailWithIncompatibleReference() throws IOException {
final String resourceDir = getTestDataDir() + "/" + "BQSR" + "/";
final String apiArgs = " --project " + getDataflowTestProject() + " ";
final String apiArgs = "--apiKey " + getDataflowTestApiKey() + " --project " + getDataflowTestProject() + " ";
final String localResources = getResourceDir();

final String hg19Ref = "EMWV_ZfLxrDY-wE";
final String hg19Ref = RefAPISource.URL_PREFIX + "EMWV_ZfLxrDY-wE";
final String HiSeqBam = resourceDir + "NA12878.chr17_69k_70k.dictFix.bam";

final String dbSNPb37 = getResourceDir() + "dbsnp_132.b37.excluding_sites_after_129.chr17_69k_70k.vcf";
Expand Down

0 comments on commit 7de84ad

Please sign in to comment.