Permalink
Browse files

Merge remote-tracking branch 'unstable/master'

  • Loading branch information...
2 parents 532efad + a0a1a36 commit e29b52b9a5a898c904e18507ddbaf8fd98712039 @eitanbanks eitanbanks committed Apr 30, 2013
Showing 383 changed files with 25,654 additions and 10,029 deletions.
View
@@ -1,7 +1,6 @@
/*.bam
/*.bai
/*.bed
-*.idx
*~
/*.vcf
/*.txt
View
@@ -91,9 +91,8 @@
<property name="key.dir" value="${public.dir}/keys" />
<!-- Contracts for Java -->
- <!-- By default, enabled only for test targets -->
- <!-- To disable for test targets, run with -Duse.contracts=false -->
- <!-- To enable for non-test targets, run with -Duse.contracts=true -->
+ <!-- Disabled by default -->
+ <!-- To enable, run with -Duse.contracts=true -->
<property name="java.contracts.dir" value="${build.dir}/java/contracts" />
<property name="contracts.version" value="1.0-r139" />
<property name="cofoja.jar" value="${lib.dir}/cofoja-${contracts.version}.jar"/>
@@ -675,8 +674,9 @@
<include name="org/broadinstitute/sting/utils/GenomeLocParser*.class"/>
<include name="org/broadinstitute/sting/utils/GenomeLoc.class"/>
<include name="org/broadinstitute/sting/utils/HasGenomeLocation.class"/>
- <include name="org/broadinstitute/sting/utils/BaseUtils.class"/>
+ <include name="org/broadinstitute/sting/utils/BaseUtils*.class"/>
<include name="org/broadinstitute/sting/utils/Utils.class"/>
+ <include name="org/broadinstitute/sting/utils/MRUCaching*.class"/>
<include name="org/broadinstitute/sting/utils/exceptions/**/*.class"/>
<include name="org/broadinstitute/sting/gatk/walkers/na12878kb/core/**/*.class"/>
<include name="net/sf/picard/reference/FastaSequenceFile.class"/>
@@ -865,14 +865,18 @@
<property name="executable" value="GenomeAnalysisTK" />
</target>
- <target name="init.executable.queueall" depends="init.build.all, init.javaandscala">
- <property name="executable" value="Queue" />
+ <target name="init.executable.gatkall" depends="init.build.all, init.javaonly">
+ <property name="executable" value="GenomeAnalysisTK" />
</target>
<target name="init.executable.queuefull" depends="init.build.publicprotectedonly, init.javaandscala">
<property name="executable" value="Queue" />
</target>
+ <target name="init.executable.queueall" depends="init.build.all, init.javaandscala">
+ <property name="executable" value="Queue" />
+ </target>
+
<target name="require.executable">
<condition property="no.executable.defined">
<or>
@@ -921,12 +925,17 @@
</target>
<!-- Package specific versions of the GATK/Queue. ALWAYS do an ant clean before invoking these! -->
+
+ <!-- GATK "full" == public + protected, ie., the standard binary release of the GATK -->
<target name="package.gatk.full" depends="init.executable.gatkfull,package" />
- <target name="package.queue.all" depends="init.executable.queueall,package" />
+ <!-- GATK "all" == public + protected + private. Should never be publicly released -->
+ <target name="package.gatk.all" depends="init.executable.gatkall,package" />
<target name="package.queue.full" depends="init.executable.queuefull,package" />
+ <target name="package.queue.all" depends="init.executable.queueall,package" />
+
<!-- Release a build. Don't call this target directly. Call one of the specific release targets below -->
<target name="release" depends="require.executable" description="release a build, putting each file in a location specified by the package">
<ant antfile="${package.output.dir}/${executable}.xml" target="release" />
@@ -1104,7 +1113,7 @@
<path id="testng.default.classpath">
<path refid="build.results" />
- <pathelement path="${clover.jar}"/>
+ <pathelement path="${clover.jar}"/>
<pathelement location="${java.contracts.dir}" />
<pathelement location="${java.test.classes}" />
<pathelement location="${scala.test.classes}" />
@@ -1114,7 +1123,7 @@
<target name="clover.report">
<clover-report coverageCacheSize="nocache">
- <current outfile="clover_html" title="GATK clover report" showUniqueCoverage="false" numThreads="4">
+ <current outfile="clover_html" title="GATK clover report" showUniqueCoverage="false" numThreads="4">
<format type="html" filter="catch,static,property"/>
<fileset dir="public">
<patternset id="clover.excludes">
@@ -1194,8 +1203,8 @@
</scalac>
</target>
- <!-- NOTE: contracts enabled for all tests -->
- <target name="test.compile" depends="init.usecontracts,dist,test.java.compile,test.scala.compile" />
+ <!-- NOTE: contracts disabled for all tests now, since contracts don't work with Java 7 -->
+ <target name="test.compile" depends="dist,test.java.compile,test.scala.compile" />
<!-- Run test macro -->
@@ -1244,7 +1253,7 @@
listeners="org.testng.reporters.FailedReporter,org.testng.reporters.JUnitXMLReporter,org.broadinstitute.sting.TestNGTestTransformer,org.broadinstitute.sting.StingTextReporter,org.uncommons.reportng.HTMLReporter">
<jvmarg value="-Xmx${test.maxmemory}" />
<jvmarg value="-ea" />
- <jvmarg value="-Dclover.pertest.coverage=diff" />
+ <jvmarg value="-Dclover.pertest.coverage=diff" />
<jvmarg value="-Djava.awt.headless=true" />
<jvmarg value="-Dpipeline.run=${pipeline.run}" />
<jvmarg value="-Djava.io.tmpdir=${java.io.tmpdir}" />
@@ -1287,7 +1296,7 @@
<target name="test.init">
<property name="testng.classpath" value="testng.default.classpath" />
- <property name="test.maxmemory" value="${test.default.maxmemory}"/>
+ <property name="test.maxmemory" value="${test.default.maxmemory}"/>
</target>
<target name="init.testgatkjar">
@@ -1331,9 +1340,9 @@
<target name="committests" depends="unittest,integrationtest,pipelinetest" />
<!-- Order of the dependencies is significant in the *.release.tests targets -->
- <target name="gatkfull.binary.release.tests" depends="init.usecontracts,package.gatk.full,init.testgatkjar,unittest,integrationtest" />
+ <target name="gatkfull.binary.release.tests" depends="package.gatk.full,init.testgatkjar,unittest,integrationtest" />
- <target name="queuefull.binary.release.tests" depends="init.usecontracts,package.queue.full,init.testqueuejar,pipelinetest" />
+ <target name="queuefull.binary.release.tests" depends="package.queue.full,init.testqueuejar,pipelinetest" />
<!-- Our four different test types: UnitTest, IntegrationTest, LargeScaleTest, PipelineTest -->
<target name="unittest" depends="test.compile,test.init" description="Run unit tests">
@@ -1442,4 +1451,30 @@
<run-test testtype="${single}" outputdir="${report}/${single}" runfailed="false"/>
</target>
+
+ <!-- A target that runs a test without doing ANY compilation or any extra work at all -->
+ <!-- Intended to enable parallel tests that share the same working directory and build -->
+ <target name="runtestonly">
+ <condition property="not.clean">
+ <and>
+ <available file="${build.dir}" />
+ <available file="${lib.dir}" />
+ <available file="${dist.dir}" />
+ <available file="${java.test.classes}" />
+ </and>
+ </condition>
+ <fail message="runtestonly target requires a NON-CLEAN working directory (INCLUDING test classes). Do a full test build using ant test.compile first." unless="not.clean" />
+
+ <condition property="no.single.test.specified">
+ <equals arg1="${single}" arg2="$${single}" />
+ </condition>
+ <fail message="Must specify a specific test. Usage: ant runtestonly -Dsingle=TestClass" if="no.single.test.specified" />
+
+ <property name="testng.classpath" value="testng.default.classpath" />
+ <property name="test.maxmemory" value="${test.default.maxmemory}"/>
+ <property name="include.scala" value="true" />
+
+ <run-test testtype="${single}" outputdir="${report}/${single}" runfailed="false"/>
+ </target>
+
</project>
View
@@ -41,6 +41,8 @@
<dependency org="log4j" name="log4j" rev="1.2.15"/>
<dependency org="javax.mail" name="mail" rev="1.4.4"/>
<dependency org="colt" name="colt" rev="1.2.0"/>
+ <dependency org="it.unimi.dsi" name="fastutil" rev="6.5.3" />
+
<!-- <dependency org="jboss" name="javassist" rev="3.7.ga"/> -->
<dependency org="org.simpleframework" name="simple-xml" rev="2.0.4"/>
<dependency org="org.apache.bcel" name="bcel" rev="5.2"/>
@@ -55,7 +55,9 @@
import java.io.File;
import java.io.PrintStream;
+import java.util.ArrayList;
import java.util.Collections;
+import java.util.List;
import java.util.Map;
/**
@@ -74,6 +76,12 @@
@Argument(fullName = "heterozygosity", shortName = "hets", doc = "Heterozygosity value used to compute prior likelihoods for any locus", required = false)
public Double heterozygosity = UnifiedGenotyperEngine.HUMAN_SNP_HETEROZYGOSITY;
+ /**
+ * This argument informs the prior probability of having an indel at a site.
+ */
+ @Argument(fullName = "indel_heterozygosity", shortName = "indelHeterozygosity", doc = "Heterozygosity for indel calling", required = false)
+ public double INDEL_HETEROZYGOSITY = 1.0/8000;
+
@Argument(fullName = "genotyping_mode", shortName = "gt_mode", doc = "Specifies how to determine the alternate alleles to use for genotyping", required = false)
public GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY;
@@ -113,6 +121,29 @@
public int MAX_ALTERNATE_ALLELES = 6;
/**
+ * By default, the prior specified with the argument --heterozygosity/-hets is used for variant discovery at a particular locus, using an infinite sites model,
+ * see e.g. Waterson (1975) or Tajima (1996).
+ * This model asserts that the probability of having a population of k variant sites in N chromosomes is proportional to theta/k, for 1=1:N
+ *
+ * There are instances where using this prior might not be desireable, e.g. for population studies where prior might not be appropriate,
+ * as for example when the ancestral status of the reference allele is not known.
+ * By using this argument, user can manually specify priors to be used for calling as a vector for doubles, with the following restriciotns:
+ * a) User must specify 2N values, where N is the number of samples.
+ * b) Only diploid calls supported.
+ * c) Probability values are specified in double format, in linear space.
+ * d) No negative values allowed.
+ * e) Values will be added and Pr(AC=0) will be 1-sum, so that they sum up to one.
+ * f) If user-defined values add to more than one, an error will be produced.
+ *
+ * If user wants completely flat priors, then user should specify the same value (=1/(2*N+1)) 2*N times,e.g.
+ * -inputPrior 0.33 -inputPrior 0.33
+ * for the single-sample diploid case.
+ */
+ @Advanced
+ @Argument(fullName = "input_prior", shortName = "inputPrior", doc = "Input prior for calls", required = false)
+ public List<Double> inputPrior = Collections.emptyList();
+
+ /**
* If this fraction is greater is than zero, the caller will aggressively attempt to remove contamination through biased down-sampling of reads.
* Basically, it will ignore the contamination fraction of reads for each alternate allele. So if the pileup contains N total bases, then we
* will try to remove (N * contamination fraction) bases for each alternate allele.
@@ -156,10 +187,6 @@ public void setSampleContamination(DefaultHashMap<String, Double> sampleContamin
public AFCalcFactory.Calculation AFmodel = AFCalcFactory.Calculation.getDefaultModel();
@Hidden
- @Argument(fullName = "logRemovedReadsFromContaminationFiltering", shortName="contaminationLog", required=false)
- public PrintStream contaminationLog = null;
-
- @Hidden
@Argument(shortName = "logExactCalls", doc="x", required=false)
public File exactCallsLog = null;
@@ -170,15 +197,16 @@ public StandardCallerArgumentCollection(final StandardCallerArgumentCollection S
this.alleles = SCAC.alleles;
this.GenotypingMode = SCAC.GenotypingMode;
this.heterozygosity = SCAC.heterozygosity;
+ this.INDEL_HETEROZYGOSITY = SCAC.INDEL_HETEROZYGOSITY;
this.MAX_ALTERNATE_ALLELES = SCAC.MAX_ALTERNATE_ALLELES;
this.OutputMode = SCAC.OutputMode;
this.STANDARD_CONFIDENCE_FOR_CALLING = SCAC.STANDARD_CONFIDENCE_FOR_CALLING;
this.STANDARD_CONFIDENCE_FOR_EMITTING = SCAC.STANDARD_CONFIDENCE_FOR_EMITTING;
this.CONTAMINATION_FRACTION = SCAC.CONTAMINATION_FRACTION;
this.CONTAMINATION_FRACTION_FILE=SCAC.CONTAMINATION_FRACTION_FILE;
- this.contaminationLog = SCAC.contaminationLog;
this.exactCallsLog = SCAC.exactCallsLog;
this.sampleContamination=SCAC.sampleContamination;
this.AFmodel = SCAC.AFmodel;
+ this.inputPrior = SCAC.inputPrior;
}
}
@@ -47,6 +47,7 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
+import org.broadinstitute.sting.utils.genotyper.MostLikelyAllele;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.variant.vcf.VCFHeaderLineType;
import org.broadinstitute.variant.vcf.VCFInfoHeaderLine;
@@ -58,8 +59,12 @@
/**
- * The u-based z-approximation from the Mann-Whitney Rank Sum Test for base qualities (ref bases vs. bases of the alternate allele).
- * Note that the base quality rank sum test can not be calculated for sites without a mixture of reads showing both the reference and alternate alleles.
+ * U-based z-approximation from the Mann-Whitney Rank Sum Test for base qualities
+ *
+ * <p>This tool calculates the u-based z-approximation from the Mann-Whitney Rank Sum Test for base qualities(ref bases vs. bases of the alternate allele).</p>
+ *
+ * <h3>Caveat</h3>
+ * <p>The base quality rank sum test can not be calculated for sites without a mixture of reads showing both the reference and alternate alleles.</p>
*/
public class BaseQualityRankSumTest extends RankSumTest implements StandardAnnotation {
public List<String> getKeyNames() { return Arrays.asList("BaseQRankSum"); }
@@ -86,13 +91,13 @@ protected void fillQualsFromPileup(final List<Allele> allAlleles, final int refL
}
for (Map<Allele,Double> el : alleleLikelihoodMap.getLikelihoodMapValues()) {
- final Allele a = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el);
- if (a.isNoCall())
+ final MostLikelyAllele a = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el);
+ if (! a.isInformative())
continue; // read is non-informative
- if (a.isReference())
- refQuals.add(-10.0*(double)el.get(a));
- else if (allAlleles.contains(a))
- altQuals.add(-10.0*(double)el.get(a));
+ if (a.getMostLikelyAllele().isReference())
+ refQuals.add(-10.0*(double)el.get(a.getMostLikelyAllele()));
+ else if (allAlleles.contains(a.getMostLikelyAllele()))
+ altQuals.add(-10.0*(double)el.get(a.getMostLikelyAllele()));
}
@@ -65,9 +65,15 @@
/**
- * Allele count in genotypes, for each ALT allele, in the same order as listed;
- * allele Frequency, for each ALT allele, in the same order as listed; total number
- * of alleles in called genotypes.
+ * Allele counts and frequency for each ALT allele and total number of alleles in called genotypes
+ *
+ * <p>This annotation tool outputs the following:
+ *
+ * <ul>
+ * <li>Allele count in genotypes, for each ALT allele, in the same order as listed</li>
+ * <li>Allele Frequency, for each ALT allele, in the same order as listed</li>
+ * <li>Total number of alleles in called genotypes</li>
+ * </ul></p>
*/
public class ChromosomeCounts extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
@@ -46,6 +46,7 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
+import org.broadinstitute.sting.utils.genotyper.MostLikelyAllele;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.variant.vcf.VCFHeaderLineType;
import org.broadinstitute.variant.vcf.VCFInfoHeaderLine;
@@ -57,14 +58,15 @@
import java.util.*;
/**
- * Created with IntelliJ IDEA.
- * User: rpoplin
- * Date: 6/28/12
- */
-
-/**
- * The u-based z-approximation from the Mann-Whitney Rank Sum Test for reads with clipped bases (reads with ref bases vs. those with the alternate allele)
- * Note that the clipping rank sum test can not be calculated for sites without a mixture of reads showing both the reference and alternate alleles.
+ * U-based z-approximation from the Mann-Whitney Rank Sum Test for reads with clipped bases
+ *
+ * <p>This tool calculates the u-based z-approximation from the Mann-Whitney Rank Sum Test for reads with clipped bases (reads with ref bases vs. those with the alternate allele).</p>
+ *
+ * <h3>Caveat</h3>
+ * <p>The clipping rank sum test can not be calculated for sites without a mixture of reads showing both the reference and alternate alleles.</p>
+ *
+ * @author rpoplin
+ * @since 6/28/12
*/
public class ClippingRankSumTest extends RankSumTest {
@@ -83,12 +85,12 @@ protected void fillQualsFromPileup(final List<Allele> allAlleles,
for (Map.Entry<GATKSAMRecord,Map<Allele,Double>> el : likelihoodMap.getLikelihoodReadMap().entrySet()) {
- final Allele a = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue());
- if (a.isNoCall())
+ final MostLikelyAllele a = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue());
+ if (! a.isInformative())
continue; // read is non-informative
- if (a.isReference())
+ if (a.getMostLikelyAllele().isReference())
refQuals.add((double)AlignmentUtils.getNumHardClippedBases(el.getKey()));
- else if (allAlleles.contains(a))
+ else if (allAlleles.contains(a.getMostLikelyAllele()))
altQuals.add((double)AlignmentUtils.getNumHardClippedBases(el.getKey()));
}
@@ -70,10 +70,11 @@
/**
* Total (unfiltered) depth over all samples.
*
- * While the sample-level (FORMAT) DP field describes the total depth of reads that passed the Unified Genotyper's
+ * <p>While the sample-level (FORMAT) DP field describes the total depth of reads that passed the caller's
* internal quality control metrics (like MAPQ > 17, for example), the INFO field DP represents the unfiltered depth
* over all samples. Note though that the DP is affected by downsampling (-dcov), so the max value one can obtain for
* N samples with -dcov D is N * D
+ * </p>
*/
public class Coverage extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
Oops, something went wrong.

0 comments on commit e29b52b

Please sign in to comment.