/
GenotypeSummaries.java
69 lines (61 loc) · 2.96 KB
/
GenotypeSummaries.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
package org.broadinstitute.hellbender.tools.walkers.annotator;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.Genotype;
import htsjdk.variant.variantcontext.VariantContext;
import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.engine.ReferenceContext;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.genotyper.AlleleLikelihoods;
import org.broadinstitute.hellbender.utils.help.HelpConstants;
import org.broadinstitute.hellbender.utils.read.GATKRead;
import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;
import java.util.*;
/**
* Summarize genotype statistics from all samples at the site level
*
* <p>This annotation collects several genotype-level statistics from all samples and summarizes them in the INFO field. The following statistics are collected:</p>
* <ul>
* <li>Number of called chromosomes (should amount to ploidy * called samples)</li>
* <li>Number of no-called samples</li>
* <li>p-value from Hardy-Weinberg Equilibrium test</li>
* <li>Mean of all GQ values</li>
* <li>Standard deviation of all GQ values</li>
* </ul>
* <h3>Note</h3>
* <p>These summaries can all be recomputed from the genotypes on the fly but it is a lot faster to add them here as INFO field annotations.</p>
*/
@DocumentedFeature(groupName=HelpConstants.DOC_CAT_ANNOTATORS, groupSummary=HelpConstants.DOC_CAT_ANNOTATORS_SUMMARY, summary="Summary of genotype statistics from all samples (NCC, GQ_MEAN, GQ_STDDEV)")
public final class GenotypeSummaries implements InfoFieldAnnotation {
@Override
public Map<String, Object> annotate(final ReferenceContext ref,
final VariantContext vc,
final AlleleLikelihoods<GATKRead, Allele> likelihoods) {
Utils.nonNull(vc);
if ( ! vc.hasGenotypes() ) {
return Collections.emptyMap();
}
final Map<String,Object> returnMap = new LinkedHashMap<>();
returnMap.put(GATKVCFConstants.NOCALL_CHROM_KEY, vc.getNoCallCount());
final DescriptiveStatistics stats = new DescriptiveStatistics();
for( final Genotype g : vc.getGenotypes() ) {
if( g.hasGQ() ) {
stats.addValue(g.getGQ());
}
}
if( stats.getN() > 0L ) {
returnMap.put(GATKVCFConstants.GQ_MEAN_KEY, String.format("%.2f", stats.getMean()));
if( stats.getN() > 1L ) {
returnMap.put(GATKVCFConstants.GQ_STDEV_KEY, String.format("%.2f", stats.getStandardDeviation()));
}
}
return returnMap;
}
@Override
public List<String> getKeyNames() {
return Arrays.asList(
GATKVCFConstants.NOCALL_CHROM_KEY,
GATKVCFConstants.GQ_MEAN_KEY,
GATKVCFConstants.GQ_STDEV_KEY);
}
}