diff --git a/gamgee/utils/genotype_utils.h b/gamgee/utils/genotype_utils.h index d209ed1a6..578816d48 100644 --- a/gamgee/utils/genotype_utils.h +++ b/gamgee/utils/genotype_utils.h @@ -38,12 +38,15 @@ using namespace std; bool allele_missing(const bcf_fmt_t* const format_ptr, const uint8_t* data_ptr, const uint32_t allele_index); template - inline int32_t allele_key(const uint8_t* data_ptr, const uint32_t allele_index, const TYPE missing) { + inline int32_t allele_key(const uint8_t* data_ptr, const uint32_t allele_index, const TYPE missing, const TYPE vector_end) { // mostly copied from htslib const auto p = reinterpret_cast(data_ptr); if ( !(p[allele_index]>>1) || p[allele_index]==missing ) { return missing_values::int32; } + else if ( p[allele_index] == vector_end ) { + return bcf_int32_vector_end; + } return (p[allele_index]>>1)-1; } @@ -59,11 +62,11 @@ using namespace std; const bcf_fmt_t* const format_ptr, const uint8_t* data_ptr, const uint32_t allele_index) { switch (format_ptr->type) { case BCF_BT_INT8: - return allele_key(data_ptr, allele_index, bcf_int8_missing); + return allele_key(data_ptr, allele_index, bcf_int8_missing, bcf_int8_vector_end); case BCF_BT_INT16: - return allele_key(data_ptr, allele_index, bcf_int16_missing); + return allele_key(data_ptr, allele_index, bcf_int16_missing, bcf_int16_vector_end); case BCF_BT_INT32: - return allele_key(data_ptr, allele_index, bcf_int32_missing); + return allele_key(data_ptr, allele_index, bcf_int32_missing, bcf_int32_vector_end); default: throw invalid_argument("unknown GT field type: " + to_string(format_ptr->type)); } diff --git a/test/genotypes_test.cpp b/test/genotypes_test.cpp index ce4e533ae..fdfb0d4cc 100644 --- a/test/genotypes_test.cpp +++ b/test/genotypes_test.cpp @@ -154,3 +154,15 @@ BOOST_AUTO_TEST_CASE( is_variant ) { BOOST_CHECK_EQUAL(genotypes[i].variant(), truth[i]); } +BOOST_AUTO_TEST_CASE( mixed_ploidy_random_access ) { + const auto rec = (*(SingleVariantReader("testdata/test_variants_mixed_ploidy.vcf").begin())); + const auto expected = vector>{ {0, 1}, {0, bcf_int32_vector_end}, {1, bcf_int32_vector_end} }; + + auto sample_idx = 0u; + for ( const auto& genotype : rec.genotypes() ) { + for ( auto allele_idx = 0u; allele_idx < genotype.size(); ++allele_idx ) { + BOOST_CHECK_EQUAL(genotype[allele_idx], expected[sample_idx][allele_idx]); + } + ++sample_idx; + } +} diff --git a/testdata/test_variants_mixed_ploidy.vcf b/testdata/test_variants_mixed_ploidy.vcf new file mode 100644 index 000000000..54229ac73 --- /dev/null +++ b/testdata/test_variants_mixed_ploidy.vcf @@ -0,0 +1,19 @@ +##fileformat=VCFv4.1 +##INFO= +##INFO= +##INFO= +##INFO= +##FILTER= +##FILTER= +##FILTER= +##FILTER= +##contig= +##contig= +##contig= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878 NA12891 NA12892 +1 10000000 db2342 T C 80 PASS AF=0.5;AN=6;VALIDATED;DESC=Test1,Test2 GT:GQ:PL:AF:AS 0/1:25:10,0,100:3.1,2.2:ABA 0:12:0,10,1000:3.1,2.2:CA 1:650:10,100,0:3.1,2.2:XISPAFORRO