diff --git a/gamgee/variant.cpp b/gamgee/variant.cpp index 67bcb6445..0e9f21701 100644 --- a/gamgee/variant.cpp +++ b/gamgee/variant.cpp @@ -10,6 +10,36 @@ using namespace std; namespace gamgee { +/****************************************************************************** + * Accessory functions (private functions) * + ******************************************************************************/ +inline bool Variant::check_field(const int32_t type_field, const int32_t type_value, const int32_t index) const { + if (!check_field_exists(type_field, index)) + return false; + if (!check_field_type(type_field, type_value, index)) + throw std::runtime_error("individual field requested is not of the right type"); + return true; +} + +template +IndividualField> Variant::individual_field_as(const INDEX_OR_TAG& p) const { + const auto field_ptr = find_individual_field(p); + if (field_ptr == nullptr) + return IndividualField>{}; + return IndividualField>{m_body, field_ptr}; +} + +template +SharedField Variant::shared_field_as(const INDEX_OR_TAG& p) const { + const auto field_ptr = find_shared_field(p); + if (field_ptr == nullptr) + return SharedField{}; + return SharedField{m_body, field_ptr}; +} + +/****************************************************************************** + * Constructors and operator overloads * + ******************************************************************************/ /** * @brief creates a variant record that points to htslib memory already allocated * @note the resulting Variant shares ownership of the pre-allocated memory via shared_ptr reference counting @@ -31,14 +61,6 @@ Variant::Variant(const Variant& other) : m_body {utils::make_shared_variant(utils::variant_deep_copy(other.m_body.get()))} {} -/** - * @brief moves a variant record and header, transferring ownership of the underlying htslib memory - */ -Variant::Variant(Variant&& other) noexcept : - m_header {move(other.m_header)}, - m_body {move(other.m_body)} -{} - /** * @brief creates a deep copy of a variant record * @param other the Variant to be copied @@ -54,17 +76,10 @@ Variant& Variant::operator=(const Variant& other) { return *this; } -/** - * @brief moves a variant record, transferring ownership of the underlying htslib memory - */ -Variant& Variant::operator=(Variant&& other) noexcept { - if ( &other == this ) - return *this; - m_body = move(other.m_body); - m_header = move(other.m_header); - return *this; -} +/****************************************************************************** + * General record API * + ******************************************************************************/ std::string Variant::id () const { bcf_unpack(m_body.get(), BCF_UN_STR); return std::string{m_body->d.id}; @@ -90,113 +105,127 @@ bool Variant::has_filter(const std::string& filter) const { return bcf_has_filter(m_header.get(), m_body.get(), const_cast(filter.c_str())) > 0; // have to cast away the constness here for the C api to work. But the promise still remains as the C function is not modifying the string. } +/****************************************************************************** + * Individual field API * + ******************************************************************************/ IndividualField> Variant::integer_individual_field(const std::string& tag) const { - const auto id = bcf_hdr_id2int(m_header.get(), BCF_DT_ID, tag.c_str()); - if (!bcf_hdr_idinfo_exists(m_header.get(), BCF_HL_FMT, id)) - return IndividualField>{}; - if (bcf_hdr_id2type(m_header.get(),BCF_HL_FMT,id)!=BCF_HT_INT) - throw runtime_error("individual field requested is not an integer"); - return individual_field_as_integer(tag); // @todo: move this to an indexed based lookup API + return integer_individual_field(get_field_index(tag)); } IndividualField> Variant::float_individual_field(const std::string& tag) const { - const auto id = bcf_hdr_id2int(m_header.get(), BCF_DT_ID, tag.c_str()); - if (!bcf_hdr_idinfo_exists(m_header.get(), BCF_HL_FMT, id)) - return IndividualField>{}; - if (bcf_hdr_id2type(m_header.get(),BCF_HL_FMT,id)!=BCF_HT_REAL) - throw runtime_error("individual field requested is not a float"); - return individual_field_as_float(tag); // @todo: move this to an indexed based lookup API + return float_individual_field(get_field_index(tag)); } IndividualField> Variant::string_individual_field(const std::string& tag) const { - const auto id = bcf_hdr_id2int(m_header.get(), BCF_DT_ID, tag.c_str()); - if (!bcf_hdr_idinfo_exists(m_header.get(), BCF_HL_FMT, id)) - return IndividualField>{}; - if (bcf_hdr_id2type(m_header.get(),BCF_HL_FMT,id)!=BCF_HT_STR) - throw runtime_error("individual field requested is not a string"); - return individual_field_as_string(tag); // @todo: move this to an indexed based lookup API + return string_individual_field(get_field_index(tag)); } IndividualField> Variant::individual_field_as_integer(const std::string& tag) const { - const auto fmt = find_individual_field_by_tag(tag); - if (fmt == nullptr) ///< if the variant is missing or the PL tag is missing, return an empty IndividualField - return IndividualField>{}; - return IndividualField>{m_body, fmt}; + return individual_field_as(tag); } IndividualField> Variant::individual_field_as_float(const std::string& tag) const { - const auto fmt = find_individual_field_by_tag(tag); - if (fmt == nullptr) ///< if the variant is missing or the PL tag is missing, return an empty IndividualField - return IndividualField>{}; - return IndividualField>{m_body, fmt}; + return individual_field_as(tag); } IndividualField> Variant::individual_field_as_string(const std::string& tag) const { - const auto fmt = find_individual_field_by_tag(tag); - if (fmt == nullptr) ///< if the variant is missing or the PL tag is missing, return an empty IndividualField - return IndividualField>{}; - return IndividualField>{m_body, fmt}; + return individual_field_as(tag); +} + +IndividualField> Variant::integer_individual_field(const int32_t index) const { + if (check_field(BCF_HL_FMT, BCF_HT_INT, index)) + return individual_field_as(index); + return IndividualField>{}; +} + +IndividualField> Variant::float_individual_field(const int32_t index) const { + if (check_field(BCF_HL_FMT, BCF_HT_REAL, index)) + return individual_field_as(index); + return IndividualField>{}; +} + +IndividualField> Variant::string_individual_field(const int32_t index) const { + if (check_field(BCF_HL_FMT, BCF_HT_STR, index)) + return individual_field_as(index); + return IndividualField>{}; } -inline bcf_fmt_t* Variant::find_individual_field_by_tag(const string& tag) const { - return bcf_get_fmt(m_header.get(), m_body.get(), tag.c_str()); +IndividualField> Variant::individual_field_as_integer(const int32_t index) const { + return individual_field_as(index); } -inline bcf_info_t* Variant::find_shared_field_by_tag(const string& tag) const { - return bcf_get_info(m_header.get(), m_body.get(), tag.c_str()); +IndividualField> Variant::individual_field_as_float(const int32_t index) const { + return individual_field_as(index); } +IndividualField> Variant::individual_field_as_string(const int32_t index) const { + return individual_field_as(index); +} + + +/****************************************************************************** + * Shared field API * + ******************************************************************************/ bool Variant::boolean_shared_field(const std::string& tag) const { - const auto info = find_shared_field_by_tag(tag); - return info != nullptr; + return find_shared_field(tag) != nullptr; +} + +bool Variant::boolean_shared_field(const int32_t index) const { + return find_shared_field(index) != nullptr; } SharedField Variant::integer_shared_field(const std::string& tag) const { - const auto id = bcf_hdr_id2int(m_header.get(), BCF_DT_ID, tag.c_str()); - if (!bcf_hdr_idinfo_exists(m_header.get(), BCF_HL_INFO, id)) - return SharedField{}; - if (bcf_hdr_id2type(m_header.get(), BCF_HL_INFO,id) != BCF_HT_INT) - throw runtime_error("shared field requested is not a int"); - return shared_field_as_integer(tag); // @todo: move this to an indexed based lookup API + return integer_shared_field(get_field_index(tag)); } SharedField Variant::float_shared_field(const std::string& tag) const { - const auto id = bcf_hdr_id2int(m_header.get(), BCF_DT_ID, tag.c_str()); - if (!bcf_hdr_idinfo_exists(m_header.get(), BCF_HL_INFO, id)) - return SharedField{}; - if (bcf_hdr_id2type(m_header.get(), BCF_HL_INFO,id) != BCF_HT_REAL) - throw runtime_error("shared field requested is not a float"); - return shared_field_as_float(tag); // @todo: move this to an indexed based lookup API + return float_shared_field(get_field_index(tag)); } SharedField Variant::string_shared_field(const std::string& tag) const { - const auto id = bcf_hdr_id2int(m_header.get(), BCF_DT_ID, tag.c_str()); - if (!bcf_hdr_idinfo_exists(m_header.get(), BCF_HL_INFO, id)) - return SharedField{}; - if (bcf_hdr_id2type(m_header.get(), BCF_HL_INFO,id) != BCF_HT_STR) - throw runtime_error("shared field requested is not a string"); - return shared_field_as_string(tag); // @todo: move this to an indexed based lookup API + return string_shared_field(get_field_index(tag)); } SharedField Variant::shared_field_as_integer(const std::string& tag) const { - const auto info = find_shared_field_by_tag(tag); - if (info == nullptr) - return SharedField{}; - return SharedField{m_body, info}; + return shared_field_as(tag); } SharedField Variant::shared_field_as_float(const std::string& tag) const { - const auto info = find_shared_field_by_tag(tag); - if (info == nullptr) - return SharedField{}; - return SharedField{m_body, info}; + return shared_field_as(tag); } SharedField Variant::shared_field_as_string(const std::string& tag) const { - const auto info = find_shared_field_by_tag(tag); - if (info == nullptr) - return SharedField{}; - return SharedField{m_body, info}; + return shared_field_as(tag); +} + +SharedField Variant::integer_shared_field(const int32_t index) const { + if (check_field(BCF_HL_INFO, BCF_HT_INT, index)) + return shared_field_as(index); + return SharedField{}; +} + +SharedField Variant::float_shared_field(const int32_t index) const { + if (check_field(BCF_HL_INFO, BCF_HT_REAL, index)) + return shared_field_as(index); + return SharedField{}; +} + +SharedField Variant::string_shared_field(const int32_t index) const { + if (check_field(BCF_HL_INFO, BCF_HT_STR, index)) + return shared_field_as(index); + return SharedField{}; +} + +SharedField Variant::shared_field_as_integer(const int32_t index) const { + return shared_field_as(index); +} + +SharedField Variant::shared_field_as_float(const int32_t index) const { + return shared_field_as(index); +} + +SharedField Variant::shared_field_as_string(const int32_t index) const { + return shared_field_as(index); } IndividualField Variant::genotypes() const { diff --git a/gamgee/variant.h b/gamgee/variant.h index 41ea7440b..f52cadf98 100644 --- a/gamgee/variant.h +++ b/gamgee/variant.h @@ -29,9 +29,9 @@ class Variant { Variant() = default; ///< initializes a null Variant @note this is only used internally by the iterators @warning if you need to create a Variant from scratch, use the builder instead explicit Variant(const std::shared_ptr& header, const std::shared_ptr& body) noexcept; ///< creates a Variant given htslib objects. @note used by all iterators Variant(const Variant& other); ///< makes a deep copy of a Variant and it's header. Shared pointers maintain state to all other associated objects correctly. - Variant(Variant&& other) noexcept; ///< moves Variant and it's header accordingly. Shared pointers maintain state to all other associated objects correctly. Variant& operator=(const Variant& other); ///< deep copy assignment of a Variant and it's header. Shared pointers maintain state to all other associated objects correctly. - Variant& operator=(Variant&& other) noexcept; ///< move assignment of a Variant and it's header. Shared pointers maintain state to all other associated objects correctly. + Variant(Variant&& other) = default; ///< moves Variant and it's header accordingly. Shared pointers maintain state to all other associated objects correctly. + Variant& operator=(Variant&& other) = default; ///< move assignment of a Variant and it's header. Shared pointers maintain state to all other associated objects correctly. VariantHeader header() const { return VariantHeader{m_header}; } @@ -52,13 +52,19 @@ class Variant { // individual field getters (a.k.a "format fields") - IndividualField> integer_individual_field(const std::string& tag) const; ///< returns a random access object with all the values in a given individual field tag in integer format for all samples contiguous in memory. @warning Only int8_t GT fields have been tested. @warning Missing GT fields are untested. @warning creates a new object but makes no copies of the underlying values. - IndividualField> float_individual_field(const std::string& tag) const; ///< returns a random access object with all the values in a given individual field tag in float format for all samples contiguous in memory. @warning Only int8_t GT fields have been tested. @warning Missing GT fields are untested. @warning creates a new object but makes no copies of the underlying values. - IndividualField> string_individual_field(const std::string& tag) const; ///< returns a random access object with all the values in a given individual field tag in string format for all samples contiguous in memory. @warning Only int8_t GT fields have been tested. @warning Missing GT fields are untested. @warning creates a new object but makes no copies of the underlying values. - IndividualField> individual_field_as_integer(const std::string& tag) const; ///< same as integer_format_field but will attempt to convert underlying data to integer if possible. @warning Only int8_t GT fields have been tested. @warning Missing GT fields are untested. @warning creates a new object but makes no copies of the underlying values. - IndividualField> individual_field_as_float(const std::string& tag) const; ///< same as float_format_field but will attempt to convert underlying data to float if possible. @warning Only int8_t GT fields have been tested. @warning Missing GT fields are untested. @warning creates a new object but makes no copies of the underlying values. - IndividualField> individual_field_as_string(const std::string& tag) const; ///< same as string_format_field but will attempt to convert underlying data to string if possible. @warning Only int8_t GT fields have been tested. @warning Missing GT fields are untested. @warning creates a new object but makes no copies of the underlying values. IndividualField genotypes() const; ///< special getter for the Genotype (GT) field. Returns a random access object with all the values in a given GT tag for all samples contiguous in memory. @warning Only int8_t GT fields have been tested. @warning Missing GT fields are untested. @warning creates a new object but makes no copies of the underlying values. + IndividualField> integer_individual_field(const std::string& tag) const; ///< returns a random access object with all the values in a given individual field tag in integer format for all samples contiguous in memory. @warning creates a new object but makes no copies of the underlying values. + IndividualField> float_individual_field(const std::string& tag) const; ///< returns a random access object with all the values in a given individual field tag in float format for all samples contiguous in memory. @warning creates a new object but makes no copies of the underlying values. + IndividualField> string_individual_field(const std::string& tag) const; ///< returns a random access object with all the values in a given individual field tag in string format for all samples contiguous in memory. @warning creates a new object but makes no copies of the underlying values. + IndividualField> individual_field_as_integer(const std::string& tag) const; ///< same as integer_individual_field but will attempt to convert underlying data to integer if possible. @warning creates a new object but makes no copies of the underlying values. + IndividualField> individual_field_as_float(const std::string& tag) const; ///< same as float_individual_field but will attempt to convert underlying data to float if possible. @warning creates a new object but makes no copies of the underlying values. + IndividualField> individual_field_as_string(const std::string& tag) const; ///< same as string_individual_field but will attempt to convert underlying data to string if possible. @warning Only int8_t GT fields have been tested. + IndividualField> integer_individual_field(const int32_t index) const; ///< returns a random access object with all the values in a given individual field tag index in integer format for all samples contiguous in memory. @warning creates a new object but makes no copies of the underlying values. + IndividualField> float_individual_field(const int32_t index) const; ///< returns a random access object with all the values in a given individual field tag index in float format for all samples contiguous in memory. @warning creates a new object but makes no copies of the underlying values. + IndividualField> string_individual_field(const int32_t index) const; ///< returns a random access object with all the values in a given individual field tag index in string format for all samples contiguous in memory. @warning creates a new object but makes no copies of the underlying values. + IndividualField> individual_field_as_integer(const int32_t index) const; ///< same as integer_individual_field but will attempt to convert underlying data to integer if possible. @warning creates a new object but makes no copies of the underlying values. + IndividualField> individual_field_as_float(const int32_t index) const; ///< same as float_individual_field but will attempt to convert underlying data to float if possible. @warning creates a new object but makes no copies of the underlying values. + IndividualField> individual_field_as_string(const int32_t index) const; ///< same as string_individual_field but will attempt to convert underlying data to string if possible. @warning creates a new object but makes no copies of the underlying values. // shared field getters (a.k.a "info fields") bool boolean_shared_field(const std::string& tag) const; ///< whether or not the tag is present @note bools are treated specially as vector is impossible given the spec @@ -68,6 +74,14 @@ class Variant { SharedField shared_field_as_integer(const std::string& tag) const; ///< same as integer_shared_field but will attempt to convert underlying data to integer if possible. @warning creates a new object but makes no copies of the underlying values. SharedField shared_field_as_float(const std::string& tag) const; ///< same as float_shared_field but will attempt to convert underlying data to float if possible. @warning creates a new object but makes no copies of the underlying values. SharedField shared_field_as_string(const std::string& tag) const; ///< same as string_shared_field but will attempt to convert underlying data to string if possible. @warning creates a new object but makes no copies of the underlying values. + bool boolean_shared_field(const int32_t index) const; ///< whether or not the tag with this index is present @note bools are treated specially as vector is impossible given the spec + SharedField integer_shared_field(const int32_t index) const; ///< same as integer_shared_field but will attempt to convert underlying data to integer if possible. @warning creates a new object but makes no copies of the underlying values. + SharedField float_shared_field(const int32_t index) const; ///< same as float_shared_field but will attempt to convert underlying data to float if possible. @warning creates a new object but makes no copies of the underlying values. + SharedField string_shared_field(const int32_t index) const; ///< same as string_shared_field but will attempt to convert underlying data to string if possible. @warning creates a new object but makes no copies of the underlying values. + SharedField shared_field_as_integer(const int32_t index) const; ///< same as integer_shared_field but will attempt to convert underlying data to integer if possible. @warning creates a new object but makes no copies of the underlying values. + SharedField shared_field_as_float(const int32_t index) const; ///< same as float_shared_field but will attempt to convert underlying data to float if possible. @warning creates a new object but makes no copies of the underlying values. + SharedField shared_field_as_string(const int32_t index) const; ///< same as string_shared_field but will attempt to convert underlying data to string if possible. @warning creates a new object but makes no copies of the underlying values. + /** * @brief functional-stlye set logic operations for variant field vectors * @@ -137,9 +151,17 @@ class Variant { std::shared_ptr m_header; ///< htslib variant header pointer std::shared_ptr m_body; ///< htslib variant body pointer - inline bcf_fmt_t* find_individual_field_by_tag(const std::string& tag) const; - inline bcf_info_t* find_shared_field_by_tag(const std::string& tag) const; - template inline std::vector shared_field(const std::string& tag, const int type) const; + uint32_t get_field_index(const std::string& tag) const { return bcf_hdr_id2int(m_header.get(), BCF_DT_ID, tag.c_str()); } + bool check_field_exists(const int type_field, const int index) const { return index >= 0 && bcf_hdr_idinfo_exists(m_header.get(), type_field, index); } + bool check_field_type(const int type_field, const uint32_t type_value, const int index) const { return bcf_hdr_id2type(m_header.get(), type_field, index) == type_value; } + bcf_fmt_t* find_individual_field(const std::string& tag) const { return bcf_get_fmt(m_header.get(), m_body.get(), tag.c_str()); } + bcf_info_t* find_shared_field(const std::string& tag) const { return bcf_get_info(m_header.get(), m_body.get(), tag.c_str()); } + bcf_fmt_t* find_individual_field(const uint32_t index) const { return bcf_get_fmt_idx(m_body.get(), index); } + bcf_info_t* find_shared_field(const uint32_t index) const { return bcf_get_info_idx(m_body.get(), index); } + bool check_field(const int32_t type_field, const int32_t type_value, const int32_t index) const; + + template SharedField shared_field_as(const INDEX_OR_TAG& p) const; + template IndividualField> individual_field_as(const INDEX_OR_TAG& p) const; friend class VariantWriter; }; diff --git a/gamgee/variant_header.cpp b/gamgee/variant_header.cpp index b025e7140..906f9df47 100644 --- a/gamgee/variant_header.cpp +++ b/gamgee/variant_header.cpp @@ -1,6 +1,7 @@ #include "variant_header.h" #include "utils/hts_memory.h" #include "utils/utils.h" +#include "missing.h" #include #include @@ -79,4 +80,9 @@ bool VariantHeader::has_individual_field(const string field) const { return find(fields.begin(), fields.end(), field) != fields.end(); } +int32_t VariantHeader::field_index(const string& tag) const { + const auto index = bcf_hdr_id2int(m_header.get(), BCF_DT_ID, tag.c_str()); + return index >= 0 ? index : missing_values::int32; +} + } diff --git a/gamgee/variant_header.h b/gamgee/variant_header.h index e13c3d6da..765f3af2d 100644 --- a/gamgee/variant_header.h +++ b/gamgee/variant_header.h @@ -40,6 +40,14 @@ class VariantHeader { void advanced_merge_header(const VariantHeader& other) { bcf_hdr_combine(other.m_header.get(), m_header.get()); } + /** + * @brief looks up the index of a particular filter, shared or individual field tag, enabling subsequent O(1) random-access lookups for that field throughout the iteration. + * @return missing_values::int32_t if the tag is not present in the header (you can use missing() on the return value to check) + * @note prefer this to looking up tag names during the iteration if you are looking for shared fields multiple times. + * @note if multiple fields (e.g. shared and individual) have the same tag (e.g. "DP"), they will also have the same index internally, so this function will do the right thing. The accessors for individual and shared field will know how to use the index to retrieve the correct field. + */ + int32_t field_index(const std::string& tag) const; + private: std::shared_ptr m_header; diff --git a/gamgee/variant_reader.h b/gamgee/variant_reader.h index 69152d57b..616f70001 100644 --- a/gamgee/variant_reader.h +++ b/gamgee/variant_reader.h @@ -97,7 +97,7 @@ class VariantReader { * * @param filenames a vector containing a single element: the name of the variant file * @param samples the list of samples you want included/excluded from your iteration - * @param whether you want these samples to be included or excluded from your iteration. + * @param include whether you want these samples to be included or excluded from your iteration. */ VariantReader(const std::vector& filenames, const std::vector& samples, const bool include = true) : m_variant_file_ptr {}, diff --git a/lib/htslib b/lib/htslib index 949d26736..4c3406f05 160000 --- a/lib/htslib +++ b/lib/htslib @@ -1 +1 @@ -Subproject commit 949d267369ecadca6a2e9a1312450f9eeb4cd9b4 +Subproject commit 4c3406f05d9911d0dbd7c360d90db6d78800f1b5 diff --git a/test/variant_header_test.cpp b/test/variant_header_test.cpp index 3f5891a0e..c6eea416b 100644 --- a/test/variant_header_test.cpp +++ b/test/variant_header_test.cpp @@ -1,5 +1,6 @@ #include #include "variant_header_builder.h" +#include "missing.h" using namespace std; using namespace gamgee; @@ -17,7 +18,9 @@ BOOST_AUTO_TEST_CASE( variant_header_builder_simple_building ) { const auto chromosomes = vector{"chr1", "chr2", "chr3", "chr4"}; const auto filters = vector{"LOW_QUAL", "PASS", "VQSR_FAILED"}; const auto shareds = vector{"DP", "MQ", "RankSum"}; + const auto shareds_indices = vector{3,4,5}; // looks arbitrary but these are the indices of the shared fields because the filters get 0, 1 and 2. const auto individuals = vector{"GQ", "PL", "DP"}; + const auto individuals_indices = vector{6,7,3}; // the last index gets the same number as the info index. Weird, but that's how htslib deals with this. auto builder = VariantHeaderBuilder{}; builder.add_source("Gamgee api test"); builder.advanced_add_arbitrary_line("##unused="); @@ -40,5 +43,12 @@ BOOST_AUTO_TEST_CASE( variant_header_builder_simple_building ) { BOOST_CHECK(vh.has_individual_field("GQ") == true); BOOST_CHECK(vh.has_individual_field("DP") == true); BOOST_CHECK(vh.has_individual_field("BLAH") == false); + for (auto i = 0u; i != shareds.size(); ++i) + BOOST_CHECK_EQUAL(shareds_indices[i], vh.field_index(shareds[i])); + for (auto i = 0u; i != individuals.size(); ++i) + BOOST_CHECK_EQUAL(individuals_indices[i], vh.field_index(individuals[i])); + BOOST_CHECK(missing(vh.field_index("MISSING"))); + BOOST_CHECK(missing(vh.field_index("MISSING"))); + BOOST_CHECK_EQUAL(vh.field_index("PASS"), 0); } diff --git a/test/variant_reader_test.cpp b/test/variant_reader_test.cpp index 1ad076511..57c63981d 100644 --- a/test/variant_reader_test.cpp +++ b/test/variant_reader_test.cpp @@ -126,6 +126,26 @@ void check_individual_field_api(const Variant& record, const uint32_t truth_inde const auto as_int = record.individual_field_as_integer("AS"); // this is a string field, we should be able to create the object but not access it's elements due to lazy initialization const auto as_float = record.individual_field_as_float("AS"); // this is a string field, we should be able to create the object but not access it's elements due to lazy initialization const auto as_string = record.individual_field_as_string("AS"); + + // index based API + const auto header = record.header(); + const auto gq_idx = header.field_index("GQ"); + const auto af_idx = header.field_index("AF"); + const auto pl_idx = header.field_index("PL"); + const auto as_idx = header.field_index("AS"); + const auto gq_int_idx = record.individual_field_as_integer(gq_idx); + const auto gq_float_idx = record.individual_field_as_float(gq_idx); + const auto gq_string_idx = record.individual_field_as_string(gq_idx); + const auto af_int_idx = record.individual_field_as_integer(af_idx); + const auto af_float_idx = record.individual_field_as_float(af_idx); + const auto af_string_idx = record.individual_field_as_string(af_idx); + const auto pl_int_idx = record.individual_field_as_integer(pl_idx); + const auto pl_float_idx = record.individual_field_as_float(pl_idx); + const auto pl_string_idx = record.individual_field_as_string(pl_idx); + const auto as_int_idx = record.individual_field_as_integer(as_idx); // this is a string field, we should be able to create the object but not access it's elements due to lazy initialization + const auto as_float_idx = record.individual_field_as_float(as_idx); // this is a string field, we should be able to create the object but not access it's elements due to lazy initialization + const auto as_string_idx = record.individual_field_as_string(as_idx); + // test the conversions using unforgiving API BOOST_CHECK_THROW(record.float_individual_field("GQ"), runtime_error); @@ -136,6 +156,14 @@ void check_individual_field_api(const Variant& record, const uint32_t truth_inde BOOST_CHECK_THROW(record.string_individual_field("PL"), runtime_error); BOOST_CHECK_THROW(record.integer_individual_field("AS"), runtime_error); BOOST_CHECK_THROW(record.float_individual_field("AS"), runtime_error); + BOOST_CHECK_THROW(record.float_individual_field(gq_idx), runtime_error); + BOOST_CHECK_THROW(record.string_individual_field(gq_idx), runtime_error); + BOOST_CHECK_THROW(record.integer_individual_field(af_idx), runtime_error); + BOOST_CHECK_THROW(record.string_individual_field(af_idx), runtime_error); + BOOST_CHECK_THROW(record.float_individual_field(pl_idx), runtime_error); + BOOST_CHECK_THROW(record.string_individual_field(pl_idx), runtime_error); + BOOST_CHECK_THROW(record.integer_individual_field(as_idx), runtime_error); + BOOST_CHECK_THROW(record.float_individual_field(as_idx), runtime_error); // need operator== to make these easy to write. // @@ -146,43 +174,72 @@ void check_individual_field_api(const Variant& record, const uint32_t truth_inde for(auto i=0u; i != record.n_samples(); ++i) { BOOST_CHECK_EQUAL(gq_int[i][0], truth_gq[truth_index][i]); + BOOST_CHECK_EQUAL(gq_int_idx[i][0], truth_gq[truth_index][i]); BOOST_CHECK_CLOSE(gq_float[i][0], float(truth_gq[truth_index][i]), FLOAT_COMPARISON_THRESHOLD); + BOOST_CHECK_CLOSE(gq_float_idx[i][0], float(truth_gq[truth_index][i]), FLOAT_COMPARISON_THRESHOLD); BOOST_CHECK_EQUAL(gq_string[i][0], to_string(truth_gq[truth_index][i])); + BOOST_CHECK_EQUAL(gq_string_idx[i][0], to_string(truth_gq[truth_index][i])); BOOST_REQUIRE_EQUAL(af_int[i].size(), truth_af.size()); // require otherwise next line may segfault + BOOST_REQUIRE_EQUAL(af_int_idx[i].size(), truth_af.size()); // require otherwise next line may segfault for (auto j=0u; j!= af_int[i].size(); ++j) { BOOST_CHECK_EQUAL(af_int[i][j], int32_t(truth_af[j])); + BOOST_CHECK_EQUAL(af_int_idx[i][j], int32_t(truth_af[j])); BOOST_CHECK_CLOSE(af_float[i][j], truth_af[j], FLOAT_COMPARISON_THRESHOLD); + BOOST_CHECK_CLOSE(af_float_idx[i][j], truth_af[j], FLOAT_COMPARISON_THRESHOLD); BOOST_CHECK_EQUAL(af_string[i][j], to_string(truth_af[j])); + BOOST_CHECK_EQUAL(af_string_idx[i][j], to_string(truth_af[j])); } BOOST_REQUIRE_EQUAL(pl_int[i].size(), truth_pl[truth_index][i].size()); // require otherwise next line may segfault + BOOST_REQUIRE_EQUAL(pl_int_idx[i].size(), truth_pl[truth_index][i].size()); // require otherwise next line may segfault for (auto j=0u; j!= pl_int[i].size(); ++j) { BOOST_CHECK_EQUAL(pl_int[i][j], truth_pl[truth_index][i][j]); + BOOST_CHECK_EQUAL(pl_int_idx[i][j], truth_pl[truth_index][i][j]); BOOST_CHECK_CLOSE(pl_float[i][j], float(truth_pl[truth_index][i][j]), FLOAT_COMPARISON_THRESHOLD); + BOOST_CHECK_CLOSE(pl_float_idx[i][j], float(truth_pl[truth_index][i][j]), FLOAT_COMPARISON_THRESHOLD); BOOST_CHECK_EQUAL(pl_string[i][j], to_string(truth_pl[truth_index][i][j])); + BOOST_CHECK_EQUAL(pl_string_idx[i][j], to_string(truth_pl[truth_index][i][j])); } BOOST_CHECK_EQUAL(as_string[i][0], truth_as[truth_index][i]); + BOOST_CHECK_EQUAL(as_string_idx[i][0], truth_as[truth_index][i]); } BOOST_CHECK_THROW(as_float[0][0], invalid_argument); + BOOST_CHECK_THROW(as_float_idx[0][0], invalid_argument); BOOST_CHECK_THROW(as_int[0][0], invalid_argument); + BOOST_CHECK_THROW(as_int_idx[0][0], invalid_argument); BOOST_CHECK(missing(record.integer_individual_field("NON_EXISTING"))); + BOOST_CHECK(missing(record.integer_individual_field(-1))); BOOST_CHECK(missing(record.float_individual_field("NON_EXISTING"))); + BOOST_CHECK(missing(record.float_individual_field(-1))); BOOST_CHECK(missing(record.string_individual_field("NON_EXISTING"))); + BOOST_CHECK(missing(record.string_individual_field(-1))); BOOST_CHECK_THROW(record.float_individual_field("NON_EXISTING")[0], out_of_range); + BOOST_CHECK_THROW(record.float_individual_field(-1)[0], out_of_range); } void check_shared_field_api(const Variant& record, const uint32_t truth_index) { + const auto header = record.header(); BOOST_CHECK_EQUAL(record.boolean_shared_field("VALIDATED"), truth_shared_validated[truth_index]); - const auto an = record.integer_shared_field("AN"); + const auto an = record.integer_shared_field("AN"); // test the string based api BOOST_CHECK_EQUAL_COLLECTIONS(an.begin(), an.end(), truth_shared_an[truth_index].begin(), truth_shared_an[truth_index].end()); + const auto an_idx = record.integer_shared_field(header.field_index("AN")); // test the index based api + BOOST_CHECK_EQUAL_COLLECTIONS(an_idx.begin(), an_idx.end(), truth_shared_an[truth_index].begin(), truth_shared_an[truth_index].end()); const auto af = record.float_shared_field("AF"); BOOST_CHECK_EQUAL_COLLECTIONS(af.begin(), af.end(), truth_shared_af[truth_index].begin(), truth_shared_af[truth_index].end()); + const auto af_idx = record.float_shared_field(header.field_index("AF")); // test the index based api + BOOST_CHECK_EQUAL_COLLECTIONS(af_idx.begin(), af_idx.end(), truth_shared_af[truth_index].begin(), truth_shared_af[truth_index].end()); const auto desc = record.string_shared_field("DESC"); BOOST_CHECK_EQUAL_COLLECTIONS(desc.begin(), desc.end(), truth_shared_desc[truth_index].begin(), truth_shared_desc[truth_index].end()); + const auto desc_idx = record.string_shared_field(header.field_index("DESC")); // test the index based api + BOOST_CHECK_EQUAL_COLLECTIONS(desc_idx.begin(), desc_idx.end(), truth_shared_desc[truth_index].begin(), truth_shared_desc[truth_index].end()); // check non-existing missing values BOOST_CHECK(missing(record.boolean_shared_field("NON_EXISTING"))); BOOST_CHECK(missing(record.integer_shared_field("NON_EXISTING"))); BOOST_CHECK(missing(record.float_shared_field("NON_EXISTING"))); BOOST_CHECK(missing(record.string_shared_field("NON_EXISTING"))); + BOOST_CHECK(missing(record.boolean_shared_field(-1))); + BOOST_CHECK(missing(record.integer_shared_field(-1))); + BOOST_CHECK(missing(record.float_shared_field(-1))); + BOOST_CHECK(missing(record.string_shared_field(-1))); // check type conversions in the unforgiving API BOOST_CHECK_THROW(record.float_shared_field("AN"), runtime_error); BOOST_CHECK_THROW(record.string_shared_field("AN"), runtime_error); @@ -190,6 +247,12 @@ void check_shared_field_api(const Variant& record, const uint32_t truth_index) { BOOST_CHECK_THROW(record.string_shared_field("AF"), runtime_error); BOOST_CHECK_THROW(record.integer_shared_field("DESC"), runtime_error); BOOST_CHECK_THROW(record.float_shared_field("DESC"), runtime_error); + BOOST_CHECK_THROW(record.float_shared_field(header.field_index("AN")), runtime_error); + BOOST_CHECK_THROW(record.string_shared_field(header.field_index("AN")), runtime_error); + BOOST_CHECK_THROW(record.integer_shared_field(header.field_index("AF")), runtime_error); + BOOST_CHECK_THROW(record.string_shared_field(header.field_index("AF")), runtime_error); + BOOST_CHECK_THROW(record.integer_shared_field(header.field_index("DESC")), runtime_error); + BOOST_CHECK_THROW(record.float_shared_field(header.field_index("DESC")), runtime_error); // check conversions on the nice API const auto an_float = record.shared_field_as_float("AN"); const auto an_string = record.shared_field_as_string("AN"); @@ -197,30 +260,56 @@ void check_shared_field_api(const Variant& record, const uint32_t truth_index) { BOOST_CHECK_EQUAL(an_float[i], float(truth_shared_an[truth_index][i])); BOOST_CHECK_EQUAL(an_string[i], to_string(truth_shared_an[truth_index][i])); } + const auto an_float_idx = record.shared_field_as_float(header.field_index("AN")); + const auto an_string_idx = record.shared_field_as_string(header.field_index("AN")); + for (auto i=0u; i != an_float.size(); ++i) { + BOOST_CHECK_EQUAL(an_float_idx[i], float(truth_shared_an[truth_index][i])); + BOOST_CHECK_EQUAL(an_string_idx[i], to_string(truth_shared_an[truth_index][i])); + } const auto af_integer = record.shared_field_as_integer("AF"); const auto af_string = record.shared_field_as_string("AF"); for (auto i=0u; i != af_integer.size(); ++i) { BOOST_CHECK_EQUAL(af_integer[i], int32_t(truth_shared_af[truth_index][i])); BOOST_CHECK_EQUAL(af_string[i], to_string(truth_shared_af[truth_index][i])); } + const auto af_integer_idx = record.shared_field_as_integer(header.field_index("AF")); + const auto af_string_idx = record.shared_field_as_string(header.field_index("AF")); + for (auto i=0u; i != af_integer.size(); ++i) { + BOOST_CHECK_EQUAL(af_integer_idx[i], int32_t(truth_shared_af[truth_index][i])); + BOOST_CHECK_EQUAL(af_string_idx[i], to_string(truth_shared_af[truth_index][i])); + } + const auto desc_index = header.field_index("DESC"); const auto desc_bool = record.boolean_shared_field("DESC"); const auto desc_integer = record.shared_field_as_integer("DESC"); const auto desc_float = record.shared_field_as_float("DESC"); + const auto desc_bool_idx = record.boolean_shared_field(desc_index); + const auto desc_integer_idx = record.shared_field_as_integer(desc_index); + const auto desc_float_idx = record.shared_field_as_float(desc_index); if (truth_shared_desc[truth_index].empty()) { BOOST_CHECK(!desc_bool); BOOST_CHECK(missing(desc_integer)); BOOST_CHECK(missing(desc_float)); + BOOST_CHECK(!desc_bool_idx); + BOOST_CHECK(missing(desc_integer_idx)); + BOOST_CHECK(missing(desc_float_idx)); BOOST_CHECK(missing(record.shared_field_as_string("DESC"))); // check that an existing tag in the header can be missing + BOOST_CHECK(missing(record.shared_field_as_string(desc_index))); // check that an existing tag in the header can be missing BOOST_CHECK_THROW(desc_float[0], out_of_range); } else { BOOST_CHECK(desc_bool); BOOST_CHECK_THROW(desc_float[0], invalid_argument); BOOST_CHECK_THROW(desc_integer[0], invalid_argument); + BOOST_CHECK(desc_bool_idx); + BOOST_CHECK_THROW(desc_float_idx[0], invalid_argument); + BOOST_CHECK_THROW(desc_integer_idx[0], invalid_argument); // BOOST_CHECK_EQUAL(desc, record.shared_field_as_string("DESC")); // needs operator == on shared fields + // BOOST_CHECK_EQUAL(desc_idx, record.shared_field_as_string(desc_index)); // needs operator == on shared fields } // BOOST_CHECK_EQUAL(an, record.shared_field_as_integer("AN")); // needs operator == on shared fields - // BOOST_CHECK_EQUAL(af, record.shared_field_as_float("AN")); // needs operator == on shared fields + // BOOST_CHECK_EQUAL(an, record.shared_field_as_integer(header.field_index("AN"))); // needs operator == on shared fields + // BOOST_CHECK_EQUAL(af, record.shared_field_as_float("AF")); // needs operator == on shared fields + // BOOST_CHECK_EQUAL(af, record.shared_field_as_float(header.field_index("AF"))); // needs operator == on shared fields } void check_genotype_api(const Variant& record, const uint32_t truth_index) {