Skip to content
This repository has been archived by the owner on Dec 16, 2022. It is now read-only.

Commit

Permalink
Merge pull request #302 from broadinstitute/dr_fix_genotype_handling_…
Browse files Browse the repository at this point in the history
…of_vector_end

Fix handling of vector end values by Genotype::operator[]
  • Loading branch information
droazen committed Oct 3, 2014
2 parents cdbae35 + e61ffe1 commit e5f0438
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 4 deletions.
11 changes: 7 additions & 4 deletions gamgee/utils/genotype_utils.h
Expand Up @@ -38,12 +38,15 @@ using namespace std;
bool allele_missing(const bcf_fmt_t* const format_ptr, const uint8_t* data_ptr, const uint32_t allele_index);

template<class TYPE>
inline int32_t allele_key(const uint8_t* data_ptr, const uint32_t allele_index, const TYPE missing) {
inline int32_t allele_key(const uint8_t* data_ptr, const uint32_t allele_index, const TYPE missing, const TYPE vector_end) {
// mostly copied from htslib
const auto p = reinterpret_cast<const TYPE*>(data_ptr);
if ( !(p[allele_index]>>1) || p[allele_index]==missing ) {
return missing_values::int32;
}
else if ( p[allele_index] == vector_end ) {
return bcf_int32_vector_end;
}
return (p[allele_index]>>1)-1;
}

Expand All @@ -59,11 +62,11 @@ using namespace std;
const bcf_fmt_t* const format_ptr, const uint8_t* data_ptr, const uint32_t allele_index) {
switch (format_ptr->type) {
case BCF_BT_INT8:
return allele_key<int8_t>(data_ptr, allele_index, bcf_int8_missing);
return allele_key<int8_t>(data_ptr, allele_index, bcf_int8_missing, bcf_int8_vector_end);
case BCF_BT_INT16:
return allele_key<int16_t>(data_ptr, allele_index, bcf_int16_missing);
return allele_key<int16_t>(data_ptr, allele_index, bcf_int16_missing, bcf_int16_vector_end);
case BCF_BT_INT32:
return allele_key<int32_t>(data_ptr, allele_index, bcf_int32_missing);
return allele_key<int32_t>(data_ptr, allele_index, bcf_int32_missing, bcf_int32_vector_end);
default:
throw invalid_argument("unknown GT field type: " + to_string(format_ptr->type));
}
Expand Down
12 changes: 12 additions & 0 deletions test/genotypes_test.cpp
Expand Up @@ -154,3 +154,15 @@ BOOST_AUTO_TEST_CASE( is_variant ) {
BOOST_CHECK_EQUAL(genotypes[i].variant(), truth[i]);
}

BOOST_AUTO_TEST_CASE( mixed_ploidy_random_access ) {
const auto rec = (*(SingleVariantReader("testdata/test_variants_mixed_ploidy.vcf").begin()));
const auto expected = vector<vector<int32_t>>{ {0, 1}, {0, bcf_int32_vector_end}, {1, bcf_int32_vector_end} };

auto sample_idx = 0u;
for ( const auto& genotype : rec.genotypes() ) {
for ( auto allele_idx = 0u; allele_idx < genotype.size(); ++allele_idx ) {
BOOST_CHECK_EQUAL(genotype[allele_idx], expected[sample_idx][allele_idx]);
}
++sample_idx;
}
}
19 changes: 19 additions & 0 deletions testdata/test_variants_mixed_ploidy.vcf
@@ -0,0 +1,19 @@
##fileformat=VCFv4.1
##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed">
##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
##INFO=<ID=VALIDATED,Number=0,Type=Flag,Description="Validated By Follow-up Experiment">
##INFO=<ID=DESC,Number=.,Type=String,Description="Custom Description">
##FILTER=<ID=PASS,Description=All filters passed>
##FILTER=<ID=LOW_QUAL,Description=Low quality call>
##FILTER=<ID=MISSED,Description=Missed by the variant caller>
##FILTER=<ID=NOT_DEFINED,Description=Undefined filter>
##contig=<ID=1,Length=300000000,Description=the first chromosome>
##contig=<ID=20,Length=64000000>
##contig=<ID=22,Length=120000000>
##FORMAT=<ID=GT,Number=1,Type=String,Description=Genotype>
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description=Genotype quality>
##FORMAT=<ID=PL,Number=G,Type=Integer,Description=Phred scaled relative Likelihoods of the genotypes>
##FORMAT=<ID=AF,Number=2,Type=Float,Description=Arbitrary float field with 2 values (test purposes only)>
##FORMAT=<ID=AS,Number=1,Type=String,Description=Arbitrary string field with 1 value (test purposes only)>
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878 NA12891 NA12892
1 10000000 db2342 T C 80 PASS AF=0.5;AN=6;VALIDATED;DESC=Test1,Test2 GT:GQ:PL:AF:AS 0/1:25:10,0,100:3.1,2.2:ABA 0:12:0,10,1000:3.1,2.2:CA 1:650:10,100,0:3.1,2.2:XISPAFORRO

0 comments on commit e5f0438

Please sign in to comment.