Skip to content

Commit

Permalink
Type change int -> long to prevent tranche novel variant count overfl…
Browse files Browse the repository at this point in the history
…ow (#7864)

* Type change int -> long to prevent tranche novel variant count overflow
in VQSR scattered mode
  • Loading branch information
ldgauthier committed Jun 17, 2022
1 parent d22b752 commit add34e1
Show file tree
Hide file tree
Showing 9 changed files with 79 additions and 46 deletions.
Expand Up @@ -27,14 +27,16 @@ public class Tranche {
final double minVQSLod; //minimum value of VQSLOD in this tranche
final double knownTiTv; //titv value of known sites in this tranche
final double novelTiTv; //titv value of novel sites in this tranche
final int numKnown; //number of known sites in this tranche
final int numNovel; //number of novel sites in this tranche
final long numKnown; //number of known sites in this tranche
final long numNovel; //number of novel sites in this tranche
final VariantRecalibratorArgumentCollection.Mode model;
final String name; //Name of the tranche

public Tranche(final String name, final double knownTiTv, final int numNovel, final double minVQSLod, final VariantRecalibratorArgumentCollection.Mode model, final double novelTiTv, final int accessibleTruthSites, final int numKnown, final int callsAtTruthSites) {
public Tranche(final String name, final double knownTiTv, final long numNovel, final double minVQSLod,
final VariantRecalibratorArgumentCollection.Mode model, final double novelTiTv,
final int accessibleTruthSites, final long numKnown, final int callsAtTruthSites) {
if ( numKnown < 0 || numNovel < 0) {
throw new GATKException("Invalid tranche - no. variants is < 0 : known " + numKnown + " novel " + numNovel);
throw new GATKException("Invalid tranche " + name + " - no. variants is < 0 : known " + numKnown + " novel " + numNovel);
}

if ( name == null ) {
Expand Down Expand Up @@ -104,7 +106,8 @@ public <T extends Tranche> String getTrancheString(final T prev) {
}

protected static Tranche trancheOfVariants(final List<VariantDatum> data, final int minI, final double ts, final VariantRecalibratorArgumentCollection.Mode model ) {
int numKnown = 0, numNovel = 0, knownTi = 0, knownTv = 0, novelTi = 0, novelTv = 0;
long numKnown = 0, numNovel = 0;
int knownTi = 0, knownTv = 0, novelTi = 0, novelTv = 0;

final double minLod = data.get(minI).lod;
for (final VariantDatum datum : data) {
Expand Down Expand Up @@ -147,8 +150,8 @@ protected static Tranche emptyTranche(final List<VariantDatum> data, final int m

final double knownTiTv = 0.0;
final double novelTiTv = 0.0;
final int numKnown = 0;
final int numNovel = 0;
final long numKnown = 0;
final long numNovel = 0;

return new Tranche("unnamed", knownTiTv, numNovel, minLod, model, novelTiTv, accessibleTruthSites, numKnown, nCallsAtTruth);
}
Expand Down Expand Up @@ -194,6 +197,26 @@ protected static int getOptionalInteger(final Map<String, String> bindings, fina
}
}

protected static long getRequiredLong(final Map<String, String> bindings, final String key) {
if ( bindings.containsKey(key) ) {
try{
return Long.valueOf(bindings.get(key));
} catch (NumberFormatException e){
throw new UserException.MalformedFile("Malformed tranches file. Invalid value for key " + key);
}
} else {
throw new UserException.MalformedFile("Malformed tranches file. Missing required key " + key);
}
}

protected static long getOptionalLong(final Map<String, String> bindings, final String key, final int defaultValue) {
try{
return Long.valueOf(bindings.getOrDefault(key, String.valueOf(defaultValue)));
} catch (NumberFormatException e){
throw new UserException.MalformedFile("Malformed tranches file. Invalid value for key " + key);
}
}

protected double getTruthSensitivity() {
return accessibleTruthSites > 0 ? callsAtTruthSites / (1.0*accessibleTruthSites) : 0.0;
}
Expand Down
Expand Up @@ -28,9 +28,9 @@ final class TruthSensitivityTranche extends Tranche {
public TruthSensitivityTranche(
final double targetTruthSensitivity,
final double minVQSLod,
final int numKnown,
final long numKnown,
final double knownTiTv,
final int numNovel,
final long numNovel,
final double novelTiTv,
final int accessibleTruthSites,
final int callsAtTruthSites,
Expand All @@ -41,9 +41,9 @@ public TruthSensitivityTranche(
public TruthSensitivityTranche(
final double targetTruthSensitivity,
final double minVQSLod,
final int numKnown,
final long numKnown,
final double knownTiTv,
final int numNovel,
final long numNovel,
final double novelTiTv,
final int accessibleTruthSites,
final int callsAtTruthSites,
Expand Down
Expand Up @@ -26,9 +26,9 @@ public Double getTrancheIndex() {

public VQSLODTranche(
final double minVQSLod,
final int numKnown,
final long numKnown,
final double knownTiTv,
final int numNovel,
final long numNovel,
final double novelTiTv,
final int accessibleTruthSites,
final int callsAtTruthSites,
Expand Down Expand Up @@ -114,9 +114,9 @@ public static List<VQSLODTranche> readTranches(final GATKPath f) throws IOExcept
}
tranches.add(new VQSLODTranche(
getRequiredDouble(bindings, "minVQSLod"),
getOptionalInteger(bindings, "numKnown", -1),
getOptionalLong(bindings, "numKnown", -1),
getOptionalDouble(bindings, "knownTiTv", -1.0),
getRequiredInteger(bindings, "numNovel"),
getRequiredLong(bindings, "numNovel"),
getRequiredDouble(bindings, "novelTiTv"),
getOptionalInteger(bindings, "accessibleTruthSites", -1),
getOptionalInteger(bindings, "callsAtTruthSites", -1),
Expand Down Expand Up @@ -178,10 +178,10 @@ public static List<TruthSensitivityTranche> mergeAndConvertTranches(final TreeMa

public static VQSLODTranche mergeAndConvertTranches(final List<VQSLODTranche> scatteredTranches, VariantRecalibratorArgumentCollection.Mode mode) {
double indexVQSLOD = scatteredTranches.get(0).minVQSLod;
int sumNumKnown = 0;
long sumNumKnown = 0;
double sumKnownTransitions = 0;
double sumKnownTransversions = 0;
int sumNumNovel = 0;
long sumNumNovel = 0;
double sumNovelTransitions = 0;
double sumNovelTransversions = 0;
int sumAccessibleTruthSites = 0;
Expand Down
Expand Up @@ -5,9 +5,13 @@
import org.broadinstitute.hellbender.GATKBaseTest;
import org.broadinstitute.hellbender.testutils.IntegrationTestSpec;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;

import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;

/**
* Created by gauthier on 7/18/17.
Expand All @@ -16,45 +20,36 @@ public class GatherTranchesIntegrationTest extends CommandLineProgramTest {

private static final String testDir = GATKBaseTest.publicTestDir + "/large/VQSR/";

@Test
public void testCombine2Shards() throws Exception {
final File recal1 = new File(testDir + "snpTranches.scattered.txt"); //this is the output of VariantRecalibratorIntegrationTest.testVariantRecalibratorSNPscattered
final File recal2 = new File(testDir + "snpTranches.scattered.2.txt"); //this is a copy of the above
@DataProvider(name = "testInputs")
public Object[][] getTestInputs () {
return new Object[][]{
{Arrays.asList(new File(testDir + "snpTranches.scattered.txt"), new File(testDir + "snpTranches.scattered.txt")),
new File(testDir + "expected/snpTranches.gathered.txt"), "SNP"},

final File recal_original = new File(testDir + "expected/snpTranches.gathered.txt");
{Arrays.asList(new File(testDir + "indels.0.tranches"), new File(testDir + "indels.1.tranches")),
new File(testDir + "expected/indels.gathered.tranches"), "INDEL"},

final ArgumentsBuilder args = new ArgumentsBuilder();
args.addRaw("--input");
args.addRaw(recal1.getAbsolutePath());
args.addRaw("--input");
args.addRaw(recal2.getAbsolutePath());
args.add("mode", "SNP");
{Arrays.asList(new File(testDir + "test-single-giant-input-snps.tranches")),
new File(testDir + "expected/singleOverflow.tranches"), "SNP"},

final File outFile = GATKBaseTest.createTempFile("gatheredTranches", ".txt");
args.addOutput(outFile);
final Object res = this.runCommandLine(args.getArgsArray());
Assert.assertEquals(res, 0);
IntegrationTestSpec.assertEqualTextFiles(outFile, recal_original);
{Arrays.asList(new File(testDir + "test-very-large-one-snps.tranches"), new File(testDir + "test-very-large-two-snps.tranches")),
new File(testDir + "expected/testSummedOverflow.tranches"), "SNP"}
};
}

@Test
public void testCombine2IndelTranches() throws Exception {
final File tranches1 = new File(testDir + "indels.0.tranches");
final File tranches2 = new File(testDir + "indels.1.tranches");

final File recal_original = new File(testDir + "expected/indels.gathered.tranches");

@Test (dataProvider = "testInputs")
public void testGatherTranches(List<File> inputs, File expected, String mode) throws IOException {
final ArgumentsBuilder args = new ArgumentsBuilder();
args.addRaw("--input");
args.addRaw(tranches1.getAbsolutePath());
args.addRaw("--input");
args.addRaw(tranches2.getAbsolutePath());
args.add("mode", "INDEL");
for (File inFile : inputs) {
args.addRaw("--input");
args.addRaw(inFile);
}
args.add("mode", mode);

final File outFile = GATKBaseTest.createTempFile("gatheredTranches", ".txt");
args.addOutput(outFile);
final Object res = this.runCommandLine(args.getArgsArray());
Assert.assertEquals(res, 0);
IntegrationTestSpec.assertEqualTextFiles(outFile, recal_original);
IntegrationTestSpec.assertEqualTextFiles(outFile, expected);
}
}
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown

0 comments on commit add34e1

Please sign in to comment.