From b7255a383ac4f7154c19f9505ca7ff19024b45b5 Mon Sep 17 00:00:00 2001 From: Karthik Gururaj Date: Wed, 17 Sep 2014 15:11:52 -0700 Subject: [PATCH] Modified convert_data_to_integer and convert_data_to_float so that vector_end and missing values are correctly handled for INT8 and INT16 types. Since vector_end is (INT8_MIN+1) while missing is (INT8_MIN), check if the dereferenced value is <= vector_end. If yes, handle as special case and return INT32_MIN+1 or INT32_MIN. Updated test case for checking bcf_*_missing values. Added line in vcf and bcf files with missing values in many fields Modified == operator in IndividualFieldValue and SharedField to handle special floating point values (bcf_float_missing and bcf_float_vector_end) --- gamgee/individual_field_value.h | 2 +- gamgee/shared_field.h | 2 +- gamgee/utils/utils.h | 21 +++ gamgee/utils/variant_field_type.cpp | 40 ++++- test/genotypes_test.cpp | 2 + test/select_if_test.cpp | 4 +- test/variant_reader_test.cpp | 220 ++++++++++++++++++++++------ testdata/test_variants.bcf | Bin 1091 -> 2346 bytes testdata/test_variants.vcf | 1 + testdata/test_variants.vcf.gz | Bin 943 -> 961 bytes 10 files changed, 239 insertions(+), 53 deletions(-) diff --git a/gamgee/individual_field_value.h b/gamgee/individual_field_value.h index d0f95f9b3..c45a64aaf 100644 --- a/gamgee/individual_field_value.h +++ b/gamgee/individual_field_value.h @@ -112,7 +112,7 @@ class IndividualFieldValue { if (size() != other.size()) return false; for (auto i=0u; i != size(); ++i) { - if (operator[](i) != other[i]) + if (!utils::bcf_check_equal_primitive(operator[](i), other[i])) return false; } return true; diff --git a/gamgee/shared_field.h b/gamgee/shared_field.h index a5cb0cdec..a48ca2fae 100644 --- a/gamgee/shared_field.h +++ b/gamgee/shared_field.h @@ -90,7 +90,7 @@ class SharedField { if (size() != other.size()) return false; for (auto i=0u; i != size(); ++i) { - if (operator[](i) != other[i]) + if (!utils::bcf_check_equal_primitive(operator[](i), other[i])) return false; } return true; diff --git a/gamgee/utils/utils.h b/gamgee/utils/utils.h index 0a5f38bf8..a74a2bca9 100644 --- a/gamgee/utils/utils.h +++ b/gamgee/utils/utils.h @@ -5,6 +5,7 @@ #include #include #include +#include "htslib/vcf.h" namespace gamgee { @@ -66,6 +67,26 @@ inline void check_max_boundary(const uint32_t index, const uint32_t size) { } } +/** + * @brief - Check whether two values from VCF fields of primitive types (for which the == operator is defined) * are equal + * The function template is specialized for float fields + */ +template +inline bool bcf_check_equal_primitive(const TYPE x, const TYPE y) { + return (x == y); +} +/** + * @brief: Check whether two float values from VCF fields are equal + * @note: since bcf_float_missing and bcf_float_vector_end are represented as NaNs and (nan == nan) returns false, + * a special check needs to be done for checking equality of float values in VCFs + */ +template<> +inline bool bcf_check_equal_primitive(const float x, const float y) { + return ((x == y) || (bcf_float_is_missing(x) && bcf_float_is_missing(y)) + || (bcf_float_is_vector_end(x) && bcf_float_is_vector_end(y))); +} + + } // end utils namespace } // end gamgee namespace diff --git a/gamgee/utils/variant_field_type.cpp b/gamgee/utils/variant_field_type.cpp index 1d3aa3f84..9a3ea005f 100644 --- a/gamgee/utils/variant_field_type.cpp +++ b/gamgee/utils/variant_field_type.cpp @@ -9,12 +9,18 @@ namespace gamgee { namespace utils { int32_t convert_data_to_integer(const uint8_t* data_ptr, const int index, const uint8_t num_bytes_per_value, const VariantFieldType& type) { + auto return_val = -1; const auto p = data_ptr + (index * num_bytes_per_value); + auto end_val = -1; switch (type) { case VariantFieldType::INT8: - return *(reinterpret_cast(p)); + return_val = *(reinterpret_cast(p)); + end_val = bcf_int8_vector_end; + break; case VariantFieldType::INT16: - return *(reinterpret_cast(p)); + return_val = *(reinterpret_cast(p)); + end_val = bcf_int16_vector_end; + break; case VariantFieldType::INT32: return *(reinterpret_cast(p)); case VariantFieldType::FLOAT: @@ -24,17 +30,31 @@ int32_t convert_data_to_integer(const uint8_t* data_ptr, const int index, const default: return 0; // undefined or NULL type -- impossible to happen just so the compiler doesn't warn us } + //diff = 0 if return_val == vector_end, 1 if return_val == missing, < 0 if return_val is 'normal' + auto diff = end_val - return_val; + if(diff >= 0) + return bcf_int32_vector_end - diff; + else + return return_val; } float convert_data_to_float(const uint8_t* data_ptr, const int index, const uint8_t num_bytes_per_value, const VariantFieldType& type) { + auto return_val = -1; + auto end_val = -1; const auto p = data_ptr + (index * num_bytes_per_value); switch (type) { case VariantFieldType::INT8: - return *(reinterpret_cast(p)); + return_val = *(reinterpret_cast(p)); + end_val = bcf_int8_vector_end; + break; case VariantFieldType::INT16: - return *(reinterpret_cast(p)); + return_val = *(reinterpret_cast(p)); + end_val = bcf_int16_vector_end; + break; case VariantFieldType::INT32: - return *(reinterpret_cast(p)); + return_val = *(reinterpret_cast(p)); + end_val = bcf_int32_vector_end; + break; case VariantFieldType::FLOAT: return *(reinterpret_cast(p)); case VariantFieldType::STRING: @@ -42,6 +62,16 @@ float convert_data_to_float(const uint8_t* data_ptr, const int index, const uint default: return 0.0f; // undefined or NULL type -- impossible to happen just so the compiler doesn't warn us } + if(end_val >= return_val) //can't use diff because INT32 types are also checked here and diff will wrap-around + { + auto tmp = 0.0f; //pack bcf_float_missing or bcf_float_vector_end into tmp and return tmp + //diff = 0 if return_val == vector_end, 1 if return_val == missing, + auto diff = end_val - return_val; + bcf_float_set(&tmp, bcf_float_vector_end - diff); + return tmp; + } + else + return return_val; } std::string convert_data_to_string(const uint8_t* data_ptr, const int index, const uint8_t num_bytes_per_value, const VariantFieldType& type) { diff --git a/test/genotypes_test.cpp b/test/genotypes_test.cpp index 864f3a719..017a8958c 100644 --- a/test/genotypes_test.cpp +++ b/test/genotypes_test.cpp @@ -33,6 +33,7 @@ BOOST_AUTO_TEST_CASE( hom_ref_test ){ dynamic_bitset<>(string("010")), dynamic_bitset<>(string("010")), dynamic_bitset<>(string("010")), + dynamic_bitset<>(string("010")), dynamic_bitset<>(string("010"))}; select_if_test(truth, diploid, hom_ref ); @@ -53,6 +54,7 @@ BOOST_AUTO_TEST_CASE( hom_var_test ){ dynamic_bitset<>(string("100")), dynamic_bitset<>(string("100")), dynamic_bitset<>(string("100")), + dynamic_bitset<>(string("100")), dynamic_bitset<>(string("100"))}; select_if_test(truth, diploid, hom_var); diff --git a/test/select_if_test.cpp b/test/select_if_test.cpp index 93ea54573..0a4ff7dde 100644 --- a/test/select_if_test.cpp +++ b/test/select_if_test.cpp @@ -45,8 +45,8 @@ BOOST_AUTO_TEST_CASE( select_if_individual_fields ) { } BOOST_AUTO_TEST_CASE( select_if_shared_field ) { - const auto truth_an_counts = std::vector{1,1,1,1,1}; - const auto truth_af_counts = std::vector{0,0,0,0,1}; + const auto truth_an_counts = std::vector{1,1,1,1,1,1}; + const auto truth_af_counts = std::vector{0,0,0,0,1,0}; auto truth_index = 0u; for (const auto& record : SingleVariantReader{"testdata/test_variants.vcf"}) { const auto an = record.integer_shared_field("AN"); diff --git a/test/variant_reader_test.cpp b/test/variant_reader_test.cpp index 9954b7c1c..2bc1aff91 100644 --- a/test/variant_reader_test.cpp +++ b/test/variant_reader_test.cpp @@ -11,45 +11,156 @@ #include #include +#include using namespace std; using namespace gamgee; constexpr auto FLOAT_COMPARISON_THRESHOLD = 0.0001f; -const auto truth_chromosome = vector{0, 1, 1, 1, 2}; -const auto truth_alignment_starts = vector{10000000, 10001000, 10002000, 10003000, 10004000}; -const auto truth_alignment_stops = vector{10000000, 10001001, 10002006, 10003000, 10004002}; -const auto truth_n_alleles = vector{2, 2, 2, 2, 3}; -const auto truth_filter_name = vector{"PASS", "PASS", "LOW_QUAL", "NOT_DEFINED", "PASS"}; -const auto truth_filter_size = vector{1,1,1,1,2}; -const auto truth_quals = vector{80,8.4,-1,-1,-1}; -const auto truth_ref = vector{"T", "GG", "TAGTGQA", "A", "GAT"}; -const auto truth_alt = vector< vector> { { "C" } , {"AA"}, {"T"}, {"AGCT"}, {"G","GATAT"}}; +float g_bcf_float_missing = reinterpret_cast(bcf_float_missing); + +const auto truth_chromosome = vector{0, 1, 1, 1, 2, 2}; +const auto truth_alignment_starts = vector{10000000, 10001000, 10002000, 10003000, 10004000, 10005000 }; +const auto truth_alignment_stops = vector{10000000, 10001001, 10002006, 10003000, 10004002, 10005002 }; +const auto truth_n_alleles = vector{2, 2, 2, 2, 3, 3}; +const auto truth_filter_name = vector{"PASS", "PASS", "LOW_QUAL", "NOT_DEFINED", "PASS", "PASS"}; +const auto truth_filter_size = vector{1,1,1,1,2,1}; +const auto truth_quals = vector{80,8.4,-1,-1,-1,-1}; +const auto truth_ref = vector{"T", "GG", "TAGTGQA", "A", "GAT","GAT"}; +const auto truth_alt = vector< vector> { { "C" } , {"AA"}, {"T"}, {"AGCT"}, {"G","GATAT"}, {"G","GATAT"}}; const auto truth_high_quality_hets = boost::dynamic_bitset<>{std::string{"001"}}; -const auto truth_id = vector{"db2342", "rs837472", ".", ".", "."}; -const auto truth_shared_af = vector>{{0.5}, {0.5}, {0.5}, {0.5}, {0.5, 0.0}}; -const auto truth_shared_an = vector>{{6}, {6}, {6}, {6}, {6}}; -const auto truth_shared_desc = vector>{{"Test1,Test2"}, {}, {}, {}, {}}; -const auto truth_shared_validated = vector{true, false, true, false, false}; -const auto truth_gq = vector>{{25,12,650}, {35,35,35}, {35,35,35}, {35,35,35}, {35,35,35}}; -const auto truth_af = vector { 3.1,2.2 }; +const auto truth_id = vector{"db2342", "rs837472", ".", ".", ".","."}; +const auto truth_shared_af = vector>{{0.5}, {0.5}, {0.5}, {0.5}, {0.5, 0.0}, {0.5, g_bcf_float_missing}}; +const auto truth_shared_an = vector>{{6}, {6}, {6}, {6}, {6}, {6}}; +const auto truth_shared_desc = vector>{{"Test1,Test2"}, {}, {}, {}, {}, {}}; +const auto truth_shared_validated = vector{true, false, true, false, false, false}; +const auto truth_gq_bt_type = vector{BCF_BT_INT16, BCF_BT_INT8, BCF_BT_INT8, BCF_BT_INT8, BCF_BT_INT8, BCF_BT_INT8}; +const auto truth_gq = vector>{{25,12,650}, {35,35,35}, {35,35,35}, {35,35,35}, {35,35,35}, {35,bcf_int32_missing,35}}; +const auto truth_af = vector>> { + { {3.1,2.2}, {3.1,2.2}, {3.1,2.2} }, + { {3.1,2.2}, {3.1,2.2}, {3.1,2.2} }, + { {3.1,2.2}, {3.1,2.2}, {3.1,2.2} }, + { {3.1,2.2}, {3.1,2.2}, {3.1,2.2} }, + { {3.1,2.2}, {3.1,2.2}, {3.1,2.2} }, + { {3.1,g_bcf_float_missing}, {3.1,2.2}, {3.1,2.2} } +}; + +const auto truth_pl_bt_type = vector{ BCF_BT_INT8, BCF_BT_INT8, BCF_BT_INT32, BCF_BT_INT8, BCF_BT_INT8, BCF_BT_INT8 }; const auto truth_pl = vector>>{ {{10,0,100 }, {0,10,1000 }, {10,100,0} }, {{10,0,100 }, {0,10,100 }, {10,100,0} }, {{10,0,100 }, {0,10,2000000000}, {10,100,0} }, {{10,0,100 }, {0,10,100 }, {10,100,0} }, - {{10,0,100,2,4,8}, {0,10,100,2,4,8 }, {10,100,0,2,4,8}} + {{10,0,100,2,4,8}, {0,10,100,2,4,8 }, {10,100,0,2,4,8}}, + {{10,0,100,bcf_int32_missing,4,bcf_int32_missing}, {0,10,100,2,4,8 }, {10,100,0,2,4,8}} }; const auto truth_as = vector>{ {"ABA","CA","XISPAFORRO"}, {"ABA","ABA","ABA"}, {"ABA","ABA","ABA"}, {"ABA","ABA","ABA"}, - {"ABA","ABA","ABA"} + {"ABA","ABA","ABA"}, + {"ABA","ABA","."} }; +/* + * @brief given the data type (BCF_BT_*), returns the int32_t with the value of bcf_*_vector_end + * @param bcf_bt_type type (BCF_BT_*) of the data + * @return bcf_*_vector_end + */ +int32_t bcf_vector_end_val(int32_t bcf_bt_type) +{ + auto vector_end_val = 0; + switch(bcf_bt_type) + { + case BCF_BT_INT8: + vector_end_val = bcf_int8_vector_end; + break; + case BCF_BT_INT16: + vector_end_val = bcf_int16_vector_end; + break; + case BCF_BT_INT32: + vector_end_val = bcf_int32_vector_end; + break; + case BCF_BT_FLOAT: + vector_end_val = bcf_float_vector_end; + break; + default: + BOOST_CHECK_MESSAGE(0, "Unknown VCF integer type "<< bcf_bt_type); + return -1; + } + return vector_end_val; +} +/* + * @brief given the int data type (BCF_BT_INT*) and int32 representation of the datatype, returns the string corresponding to + * the data type. Note that functions like shared_field_as_integer return int32 representation of data, even though the underlying data + * type might be int16/int8 etc. Hence, shared_field_as_integer would convert bcf_int8_missing to bcf_int32_missing so that + * missing functions in the rest of gamgee work correctly. However, a string comparison check in the tests here would fail + * since to_string(bcf_int8_missing) != to_string(bcf_int32_missing). This function re-converts 'special' values back to their + * original data types and then converts to string. + * @note the function checks whether it is a special value - anything <= bcf_int32_vector_end is a special value- if yes, compute + * the real special value corresponding to the data type bcf_bt_type. The 'real' special value is computed by obtaining the + * bcf_*_vector_end value and subtracting from it 'diff', where diff is the difference between v and the int32 special value + * bcf_int32_vector_end + * @param v int32_t value obtained from *_as_integer gamgee function + * @param bcf_bt_type int type (BCF_BT_INT*) of the data + * @return string representation of value + */ +string bcf_int32_to_string(int32_t v, int32_t bcf_bt_type) +{ + if(v <= bcf_int32_vector_end) + { + auto vector_end_val = bcf_vector_end_val(bcf_bt_type); + auto diff = bcf_int32_vector_end - v; //either 0 or 1 + return to_string(vector_end_val - diff); + } + else + return to_string(v); +} +/* + * @brief given the int data type (BCF_BT_INT*) and int32 representation of the datatype, returns the float corresponding to + * the data type. Note that functions like shared_field_as_integer return int32 representation of data, even though the underlying data + * type might be int16/int8 etc. Hence, shared_field_as_integer would convert bcf_int8_missing to bcf_int32_missing so that + * missing functions in the rest of gamgee work correctly. However, a float comparison check in the tests here would fail + * since float(bcf_int8_missing) != float(bcf_int32_missing). This function re-converts 'special' values back to their + * original data types and then converts to float. + * @note the function checks whether it is a special value - anything <= bcf_int32_vector_end is a special value- if yes, compute + * the real special value corresponding to the data type bcf_bt_type. The 'real' special value is computed by obtaining the + * bcf_*_vector_end value and subtracting from it 'diff', where diff is the difference between v and the int32 special value + * bcf_int32_vector_end + * @param v int32_t value obtained from *_as_integer gamgee function + * @param bcf_bt_type type (BCF_BT_*) of the data + * @return string representation of value + */ +float bcf_int32_to_float(int32_t v, int32_t bcf_bt_type) +{ + if(v <= bcf_int32_vector_end) + { + auto diff = bcf_int32_vector_end - v; //either 0 or 1 + auto pack_value = bcf_float_vector_end - diff; + auto tmp = 0.0f; + bcf_float_set(&tmp, pack_value); + return tmp; + } + else + return float(v); +} +/* + * @brief floating point comparison for VCF fields + * v1, v2 can be NaNs since the bcf_float_missing and vector_end values are NaNs. A simple comparison + * of v1 and v2 would fail since (nan == nan) is always false. This is fixed by explicitly comparing both values to bcf_float_missing and + * vector_end i.e. v1 and v2 are equal if ANY ONE of the following conditions is true + * (a) both are bcf_float_missing OR + * (b) both are bcf_float_vector_end OR + * (c) |v1-v2| high_qual_hets(const Variant& record) { // filter all hets that have GQ > 20 @@ -190,29 +301,33 @@ void check_individual_field_api(const Variant& record, const uint32_t truth_inde for(auto i=0u; i != record.n_samples(); ++i) { BOOST_CHECK_EQUAL(gq_int[i][0], truth_gq[truth_index][i]); BOOST_CHECK_EQUAL(gq_int_idx[i][0], truth_gq[truth_index][i]); - BOOST_CHECK_CLOSE(gq_float[i][0], float(truth_gq[truth_index][i]), FLOAT_COMPARISON_THRESHOLD); - BOOST_CHECK_CLOSE(gq_float_idx[i][0], float(truth_gq[truth_index][i]), FLOAT_COMPARISON_THRESHOLD); - BOOST_CHECK_EQUAL(gq_string[i][0], to_string(truth_gq[truth_index][i])); - BOOST_CHECK_EQUAL(gq_string_idx[i][0], to_string(truth_gq[truth_index][i])); - BOOST_REQUIRE_EQUAL(af_int[i].size(), truth_af.size()); // require otherwise next line may segfault - BOOST_REQUIRE_EQUAL(af_int_idx[i].size(), truth_af.size()); // require otherwise next line may segfault + BOOST_CHECK_MESSAGE(bcf_compare_float(gq_float[i][0], + bcf_int32_to_float(truth_gq[truth_index][i], truth_gq_bt_type[truth_index]), FLOAT_COMPARISON_THRESHOLD), + "["<(), + //tuple.get<1>(), FLOAT_COMPARISON_THRESHOLD); } ); const auto af_idx = record.float_shared_field(header.field_index("AF")); // test the index based api - BOOST_CHECK_EQUAL_COLLECTIONS(af_idx.begin(), af_idx.end(), truth_shared_af[truth_index].begin(), truth_shared_af[truth_index].end()); + af_index = 0; + for(auto af_value : af_idx) + { + BOOST_CHECK_MESSAGE(bcf_compare_float(af_value, truth_shared_af[truth_index][af_index], FLOAT_COMPARISON_THRESHOLD), + "["<{false, false, true, true, true}; + const auto truth_missing = vector{false, false, true, true, true, true}; auto i = 0u; for (const auto& record : SingleVariantReader{"testdata/test_variants.vcf"}) BOOST_CHECK_EQUAL(missing(record.id()), truth_missing[i++]); // check that the missing and non-missing values in the vcf actually return the right missingness @@ -587,12 +719,12 @@ BOOST_AUTO_TEST_CASE( multiple_variant_reader_test ) { } } -const auto multi_diff_truth_record_count = vector{4, 1, 1, 1, 2, 1}; -const auto multi_diff_truth_chromosome = vector{0, 1, 1, 1, 1, 2}; -const auto multi_diff_truth_alignment_starts = vector{10000000, 10001000, 10001999, 10002000, 10003000, 10004000}; -const auto multi_diff_truth_ref = vector{"T", "GG", "TAGTGQA", "TAGTGQA", "A", "GAT"}; -const auto multi_diff_truth_n_alleles = vector{2, 2, 2, 2, 2, 3}; -const auto multi_diff_truth_id = vector{"db2342", "rs837472", ".", ".", ".", "."}; +const auto multi_diff_truth_record_count = vector{4, 1, 1, 1, 2, 1, 1}; +const auto multi_diff_truth_chromosome = vector{0, 1, 1, 1, 1, 2, 2}; +const auto multi_diff_truth_alignment_starts = vector{10000000, 10001000, 10001999, 10002000, 10003000, 10004000, 10005000}; +const auto multi_diff_truth_ref = vector{"T", "GG", "TAGTGQA", "TAGTGQA", "A", "GAT", "GAT"}; +const auto multi_diff_truth_n_alleles = vector{2, 2, 2, 2, 2, 3, 3}; +const auto multi_diff_truth_id = vector{"db2342", "rs837472", ".", ".", ".", ".", "."}; BOOST_AUTO_TEST_CASE( multiple_variant_reader_difference_test ) { auto truth_index = 0u; @@ -609,7 +741,7 @@ BOOST_AUTO_TEST_CASE( multiple_variant_reader_difference_test ) { } ++truth_index; } - BOOST_CHECK_EQUAL(truth_index, 6u); + BOOST_CHECK_EQUAL(truth_index, 7u); } void multiple_variant_reader_sample_test(const vector samples, const bool include, const int desired_samples) { diff --git a/testdata/test_variants.bcf b/testdata/test_variants.bcf index 5d752f9f7f738d3b84a05ad9e6a6ea91bf4c97fb..5646a3395c32c8bad8b701d226d6f1496e38d7f6 100644 GIT binary patch literal 2346 zcmb7EL2u$l7@ZIj0^N%%RZ&l!EULCufr24%B9-n=08>-~+2GCYrHTw5gH>a*;{nPk zxmD`9s-hmMs)ttUALyTGkNpq*8U1E#H{i{-TZ^#=n0fDg-+SZJhMr3Oxr0zXZ#yov zeKs|D{j#AiYNbjppVvEGqjgdLq|>aQlfj_Sq``mNAyB(J{?k4Ck10QqjlZ&P5ufJbt%>veL1JzXjQxw6UJivucuYN?gYqCOPu(E7B;8Jv7_DZ^RgSAP$D2f#rt4THXvL>1toyF(FN^aTZhbYQ z%$ZV;hw#-6_-1R+h>=Sn@_;kPOJFzV0r#hP&7KgfCcxHA?Dn5uJ$**H8_K%=62F{- zI=+%6OA`+hJI^|U0Tj8$P(O2o=Xki1t}IOEm>v(C#w5m30;AVAu9_{q(@Ow7^DJsR z9$RNc|@CjD&!gS{}z#ni5md)mD<_GZA_uSQoiL%HbF6;Fj zzM~4{=~(Bggk1Mhg|$iVha;Q&z8hRE9J;)Ocba&|l}l==Tx27ysH(N%`0z-p+L~6h z9L854O?cos!{XhV%1OC6Ru6}@!%-Q+yMfs7r&H6j>H-}X`*@B;iqZHakXlHB<)oM-MH})F}QnucKzaJ|pWmnloO8PegWI@N_t}1&6 zsdOrpPAl2B-$f75Hz_3t_y}1ra_Fz^oSZ3J@7uQi>(?weu4*++ebjD0B4ktVojfI<&nvl#(37)r z7F6{wk~EMMeyYIsMa16!;TC1&{N{Q)V$UoM(l**p$-bfNBs!HOuZqTqOEgDZ^82@k z%eZ0Uw>Q*Pgr#v;w4Xu;x6sXX+?C8eX_I!tP%`VHl8XL!J2asE9ELFAy`OJISil%T zyDfv0(b~k=Z$F1SF%Hl!R0As>!$2}Wdk5*=FfO4VCisy`?|~(72UpSvkN^C z#WUlXd7tNbzkJftbGbiv5God3&!;ZqGfOltTKZM}psEy$dbekGhRsjA?dBO7jY@4A z+T5E9j|I*9#P=~+3Ccq}x5AJ*_ezuw=(7;}mFet&)vZt8zOl(ERCF})QOeqen8O4k{lw&Kzz88v=xw^^RPnoMR zb2kulLir}Q$%N(OAi9W|n~;T)%$92hPiVjd*oQHRwn1`1dfhfLJMEOKl6q-PHi<4Q z-*YU`iceNpXTHyt<;5I#Ud}1^W;75nd~E~1-5Iq~KJi9!9N_rdCJ+{Oz7NAb7B9|k}!;Ix=cQgVg*X>R{nR7f|MOnGBxMu|-4jPXz zjxreifqB{P=-qw>=xN|k*9!nEE~m)2A&qSo2yYS@S4%w_OvJQVt0cb-3@O3&cnAWf zoXuFsX7r{S*Ko8-3UpLYa9)?r`ZX=RR@EwN_oD3UgW)MLqkD##DY7@D$4Ck&0yxeB zoZq5KQ-m~65z?a!(pfJ>GTx4KHsuiK5V8alr@ke;D~fyGSJd~Wj5(p~0U6BKRUe0^ zk|DckYwc}qP59Uo+~On0^&PzQ#1v~#F8%_#>?1)TW%C7}vk+cc;IBT;GL8N_ z!Z%c5bhyq{jk(@P75f(Der&sfF+aS#^62sczG)J-dQd5AN9D<3&9$9F`;(~%eQ#Wr z+j6&lGmjtrrRThSC^s&XehCl27MBNjd~eb-e1sf$6!h1&60Ov&4_#M(^2hIYpJZqConOfbNwl=m1$y2coe^+j zIH<$h2|QBc+{x#O}3001A02m}BC000301^_}s0stET0{{R3 J00000007Bh4Y&XR diff --git a/testdata/test_variants.vcf b/testdata/test_variants.vcf index 20811f8d7..6386ea4ab 100644 --- a/testdata/test_variants.vcf +++ b/testdata/test_variants.vcf @@ -21,3 +21,4 @@ 20 10002000 . TAGTGQA T . LOW_QUAL AF=0.5;AN=6;VALIDATED GT:GQ:PL:AF:AS 0/1:35:10,0,100:3.1,2.2:ABA 0/0:35:0,10,2000000000:3.1,2.2:ABA 1/1:35:10,100,0:3.1,2.2:ABA 20 10003000 . A AGCT . NOT_DEFINED AF=0.5;AN=6 GT:GQ:PL:AF:AS 0/1:35:10,0,100:3.1,2.2:ABA 0/0:35:0,10,100:3.1,2.2:ABA 1/1:35:10,100,0:3.1,2.2:ABA 22 10004000 . GAT G,GATAT . PASS;MISSED AF=0.5,0;AN=6 GT:GQ:PL:AF:AS 1/2:35:10,0,100,2,4,8:3.1,2.2:ABA 0/0:35:0,10,100,2,4,8:3.1,2.2:ABA 1/1:35:10,100,0,2,4,8:3.1,2.2:ABA +22 10005000 . GAT G,GATAT . PASS AF=0.5,.;AN=6 GT:GQ:PL:AF:AS 0/1:35:10,0,100,.,4,.:3.1,.:ABA 0/0:.:0,10,100,2,4,8:3.1,2.2:ABA 1/1:35:10,100,0,2,4,8:3.1,2.2:. diff --git a/testdata/test_variants.vcf.gz b/testdata/test_variants.vcf.gz index 79954e7b269be496ebd45780a7b3f1fdf22e76b9..0016b49eaf6268abdcf778c4cbda56ef67293680 100644 GIT binary patch delta 947 zcmV;k15Es{2f+t_ABzYC000000RIL6LPG)oqyxQG(Q?{26nz$7p(!uBQ{&je1nR7J zMg{CL;{d^K+P93s0yJwZDhX2d>sPXI0EcwDeb~$dJGu9sb98j?rP*B4j4T=7B+`5A z`+LW<8_lLa3gh9-`(rflrZ|d>0TByMx015ldxx_Oswzo;xPaS42$H`0Q`L;d{-mhD zzA@fy=7f9Lh!0!h`58;(D-4nm=yURINAktNfJy@-$zlz07{jCp8<6Hut_c*$hCs$s z!eJs{MumLEIj%VEl5>*2$>^G~ROk-YcD?j$jpa0l=&d=#d`=}#_yI0;OgT@o z6h2dbxrRvHezsG?@tY*-HnuyyWkUVQ^6cuIgU!IO*Ak6(@*=Y8EKg}lhU^uY7*6k5AOFfqm>ukj_!tK_CWw(6fMEw*dR9$6wV}t&;yPv0X~U00(`o?`(7a@1~(bky9!PREGwJ z`ovn6zQnuoJK-qHu&xTTZ_v3g0~wAg(fg6SD;YJBuQMvCowuY`8qJ?E&oC9fS>T&5 V3e8^^Xg2-Ju!W+61%6{Own)>-%0ryFEc#@XHEAyx^a_9!+~=QU4}pJagiJ~C_O+NCmV=@2*zbtgQS3BgCUQ%7*dvE24fC$!o?fS zX{~8intmbh8nbKFh>AFeMVSa`c9P*rCQ;8RT;qZYiRah!&nWQ6D4L9G6{B8RyuOxr zj&qX6(jWYIgf7i<`qJ6&;N;5=GqS~nxF#M?!consSDc|>q-d~?_FT{{ym(jATx?S? z`*Zp92?bZUfWE-bd+A6V%LO(tkA4`+C0!yOiP}%NI+k@m#Eis+sEWfH=;efWK38ZZn-RvP=!VfP<#dZ143<$-40;tq zGr_78Ov^ZDxUUHt~fJg7Om_c`VpCTqxL?d6T( zJQn1DVL<-IIoVK}a@E1AT(3RbP&uDN>%DV`*op`ivm?w@OjaJ}DSRPf11-7x`CcB5 zp9Ge>vD>p9Y^T;n_0>PwP-xl=Eer@rUQ zgcJRcWcvCm?9AZTbNW@9N@=x6e=KH?U_J|hKL(453*;aJ>PUj>&;WJlK~*&{MW!{p zA6EBY&C}g#nZP`CEJ#;Yw`YL}jKI(UY77J9I)>hVKvQS%@H&zn)C_SVX-F1dUM=87 zw)bSu0~@(E3W0HF+E(8-4b9LbyWQ1I&C)F!eMB;5*rsKxfQp@iBLqx|8}tpk3Tfxq zUw$}8av_UZ%Q8yC>I)bjcJF)lQr`mzfuY`e)c?I`R_Ul$Ir3zOdYZf0fM