Skip to content
This repository has been archived by the owner on Dec 16, 2022. It is now read-only.

Commit

Permalink
1. Modified operator == in IndividualFieldValue to use iterators which
Browse files Browse the repository at this point in the history
take care of bcf_*_vector_end values implicitly
A more serious bug in the previous implementation of the == operator was
in the way string fields were handled. Points to remember for string
fields:
a. The size() function returns the number of characters allocated for the string.
b. The variable m_num_bytes also stores the number of characters
allocated for the string (same as size())
c. When operator[i] is called for string fields, the address that is
accessed is m_format_ptr->p + i*m_num_bytes (not m_format_ptr->p + i).
The address accessed is outside the original allocated memory.

Moral of the story: use iterators always, or remember the intricacies of
string v/s other types of fields.

Similar fix for SharedField also

2. Use references to avoid copies where possible as per David's
suggestions
3. Specialized operator[] for IndividualFieldValue<string> and
SharedField<string> so that if the original VCF field type is of type
String, the index argument for operator [] cannot be greater than 0. If
it is greater than 0, then an out_of_range exception is thrown.

3. Added extra out of bounds test cases (by catching exceptions)

4. Fixed a memory leak in file_utils.h
  • Loading branch information
kgururaj committed Oct 23, 2014
1 parent faadd94 commit 37d7964
Show file tree
Hide file tree
Showing 7 changed files with 136 additions and 19 deletions.
35 changes: 30 additions & 5 deletions gamgee/individual_field_value.h
Expand Up @@ -110,12 +110,20 @@ class IndividualFieldValue {
bool operator==(const IndividualFieldValue& other) const {
if (this == &other)
return true;
if (size() != other.size())
return false;
for (auto i=0u; i != size(); ++i) {
if (!utils::bcf_check_equal_primitive(operator[](i), other[i]))
return false;
//Use iterators where possible as they take care of field sizes, bcf_*_vector_end
auto other_iter = other.begin();
auto other_end = other.end();
for(const auto& curr_val : *this)
{
if(other_iter == other_end) //different length, this is longer (more valid values) than other
return false;
if(!utils::bcf_check_equal_element(curr_val, *other_iter))
return false;
++other_iter;
}
//Check if other still has more valid values
if(other_iter != other_end)
return false;
return true;
}

Expand Down Expand Up @@ -202,6 +210,23 @@ std::string IndividualFieldValue<std::string>::convert_from_byte_array(int index
return utils::convert_data_to_string(m_data_ptr, index, m_num_bytes, static_cast<utils::VariantFieldType>(m_format_ptr->type));
}

/**
* @brief specialization for operator[] for strings
* String fields have 1 string (at most), it is wrong to have a non-0 index
*/
template<> inline
std::string IndividualFieldValue<std::string>::operator[](const uint32_t index) const {
auto is_string_type = utils::is_string_type(m_format_ptr->type);
auto limit = size();
auto prefix_msg = "";
if(is_string_type)
{
limit = 1u;
prefix_msg = "FORMAT fields of type string in VCFs have only 1 element per sample :: ";
}
utils::check_max_boundary(index, limit, prefix_msg);
return convert_from_byte_array(index);
}

}

Expand Down
37 changes: 32 additions & 5 deletions gamgee/shared_field.h
Expand Up @@ -87,12 +87,20 @@ class SharedField {
bool operator==(const SharedField& other) const {
if (this == &other)
return true;
if (size() != other.size())
return false;
for (auto i=0u; i != size(); ++i) {
if (!utils::bcf_check_equal_primitive(operator[](i), other[i]))
return false;
//Use iterators where possible as they take care of field sizes, bcf_*_vector_end
auto other_iter = other.begin();
auto other_end = other.end();
for(const auto& curr_val : *this)
{
if(other_iter == other_end) //different length, this is longer (more valid values) than other
return false;
if(!utils::bcf_check_equal_element(curr_val, *other_iter))
return false;
++other_iter;
}
//Check if other still has more valid values
if(other_iter != other_end)
return false;
return true;
}

Expand Down Expand Up @@ -173,6 +181,25 @@ std::string SharedField<std::string>::convert_from_byte_array(int index) const {
return utils::convert_data_to_string(m_info_ptr->vptr, index, m_bytes_per_value, static_cast<utils::VariantFieldType>(m_info_ptr->type));
}

/**
* @brief specialization for operator[] for strings
* String fields have 1 string (at most), it is wrong to have a non-0 index
*/
template<> inline
std::string SharedField<std::string>::operator[](const uint32_t index) const {
if (empty())
throw std::out_of_range("Tried to index a shared field that is missing with operator[]");
auto is_string_type = utils::is_string_type(m_info_ptr->type);
auto limit = m_info_ptr->len; //cannot use size here as size<string> is specialized to return 1
auto prefix_msg = "";
if(is_string_type)
{
limit = 1u;
prefix_msg = "INFO fields of type string in VCFs have only 1 element per sample :: ";
}
utils::check_max_boundary(index, limit, prefix_msg);
return convert_from_byte_array(index);
}

} // end of namespace gamgee

Expand Down
5 changes: 4 additions & 1 deletion gamgee/utils/file_utils.h
Expand Up @@ -12,7 +12,10 @@ namespace utils {
* @brief a functor object to delete an ifstream
*/
struct IFStreamDeleter {
void operator()(std::ifstream* p) const { p->close(); }
void operator()(std::ifstream* p) const {
p->close();
delete p;
}
};

/**
Expand Down
24 changes: 17 additions & 7 deletions gamgee/utils/utils.h
Expand Up @@ -57,22 +57,32 @@ std::vector<std::string> hts_string_array_to_vector(const char * const * const s
* @brief checks that an index is greater than or equal to size
* @param index the index between 0 and size to check
* @param size one past the maximum valid index
* @param prefix_msg additional string to prefix error message
* @exception throws an out_of_bounds exception if index is out of limits
*/
inline void check_max_boundary(const uint32_t index, const uint32_t size) {
inline void check_max_boundary(const uint32_t index, const uint32_t size, const std::string& prefix_msg) {
if (index >= size) {
std::stringstream error_message {}; ///< @todo update this to auto when gcc-4.9 is available on travis-ci
error_message << "Index: " << index << " must be less than " << size << std::endl;
error_message << prefix_msg << "Index: " << index << " must be less than " << size << std::endl;
throw std::out_of_range(error_message.str());
}
}

/**
* @brief checks that an index is greater than or equal to size
* @param index the index between 0 and size to check
* @param size one past the maximum valid index
* @exception throws an out_of_bounds exception if index is out of limits
*/
inline void check_max_boundary(const uint32_t index, const uint32_t size) {
check_max_boundary(index, size, "");
}
/**
* @brief - Check whether two values from VCF fields of primitive types (for which the == operator is defined) * are equal
* The function template is specialized for float fields
*/
template<class TYPE>
inline bool bcf_check_equal_primitive(const TYPE x, const TYPE y) {
inline bool bcf_check_equal_element(const TYPE& x, const TYPE& y) {
return (x == y);
}
/**
Expand All @@ -81,7 +91,7 @@ inline bool bcf_check_equal_primitive(const TYPE x, const TYPE y) {
* a special check needs to be done for checking equality of float values in VCFs
*/
template<>
inline bool bcf_check_equal_primitive<float>(const float x, const float y) {
inline bool bcf_check_equal_element<float>(const float& x, const float& y) {
return ((x == y) || (bcf_float_is_missing(x) && bcf_float_is_missing(y))
|| (bcf_float_is_vector_end(x) && bcf_float_is_vector_end(y)));
}
Expand All @@ -91,23 +101,23 @@ inline bool bcf_check_equal_primitive<float>(const float x, const float y) {
* @note: for non numeric types returns false, as vector end is undefined
*/
template<class TYPE> inline
bool bcf_is_vector_end_value(TYPE value) {
bool bcf_is_vector_end_value(const TYPE& value) {
return false;
}

/*
* @brief specialization of the bcf_is_vector_end_value function for int32_t
*/
template<> inline
bool bcf_is_vector_end_value<int32_t>(int32_t value) {
bool bcf_is_vector_end_value<int32_t>(const int32_t& value) {
return (value == bcf_int32_vector_end);
}

/*
* @brief specialization of the bcf_is_vector_end_value function for float
*/
template<> inline
bool bcf_is_vector_end_value<float>(float value) {
bool bcf_is_vector_end_value<float>(const float& value) {
return bcf_float_is_vector_end(value);
}

Expand Down
1 change: 0 additions & 1 deletion gamgee/utils/variant_field_type.cpp
Expand Up @@ -131,7 +131,6 @@ uint8_t size_for_type(const VariantFieldType& type, const bcf_info_t* const info
}
}


} // end namespace utils
} // end namespace gamgee

7 changes: 7 additions & 0 deletions gamgee/utils/variant_field_type.h
Expand Up @@ -53,6 +53,13 @@ uint8_t size_for_type(const VariantFieldType& type, const bcf_fmt_t* const forma
*/
uint8_t size_for_type(const VariantFieldType& type, const bcf_info_t* const info_ptr);

/**
* @brief - check if type is of type string
*/
inline bool is_string_type(const int32_t& type) {
return (static_cast<utils::VariantFieldType>(type) == VariantFieldType::STRING);
}

} // end namespace utils
} // end namespace gamgee

Expand Down
46 changes: 46 additions & 0 deletions test/variant_reader_test.cpp
Expand Up @@ -436,6 +436,51 @@ void check_operator_index_for_iterators(const Variant& record)
}
}

void check_out_of_bound_exceptions(const Variant& record) {
const auto vlint_shared = record.integer_shared_field("VLINT");
const auto af_float_shared = record.shared_field_as_float("AF");
if(!vlint_shared.empty())
BOOST_CHECK_THROW(vlint_shared[vlint_shared.size()], out_of_range);
if(!af_float_shared.empty())
BOOST_CHECK_THROW(af_float_shared[record.n_alleles()], out_of_range);

const auto as_string = record.individual_field_as_string("AS");
const auto pl_int = record.individual_field_as_integer("PL");
const auto vlfloat = record.individual_field_as_float("VLFLOAT");
auto n_alleles = record.n_alleles();
auto num_gts = (n_alleles*(n_alleles+1))/2;
auto n_samples = record.n_samples();
for(auto i=0u; i != record.n_samples(); ++i) {
if(as_string.empty())
{
BOOST_CHECK_THROW(as_string[i][0], out_of_range);
}
else
{
BOOST_CHECK_THROW(as_string[n_samples][0], out_of_range);
BOOST_CHECK_THROW(as_string[i][1], out_of_range);
}
if(pl_int.empty())
{
BOOST_CHECK_THROW(pl_int[i][0], out_of_range);
}
else
{
BOOST_CHECK_THROW(pl_int[n_samples][0], out_of_range);
BOOST_CHECK_THROW(pl_int[i][num_gts], out_of_range);
}
if(vlfloat.empty())
{
BOOST_CHECK_THROW(vlfloat[i][0], out_of_range);
}
else
{
BOOST_CHECK_THROW(vlfloat[n_samples][0], out_of_range);
BOOST_CHECK_THROW(vlfloat[i][vlfloat[i].size()], out_of_range);
}
}
}

void check_individual_field_api(const Variant& record, const uint32_t truth_index) {
const auto gq_int = record.individual_field_as_integer("GQ");
const auto gq_float = record.individual_field_as_float("GQ");
Expand Down Expand Up @@ -560,6 +605,7 @@ void check_individual_field_api(const Variant& record, const uint32_t truth_inde
BOOST_CHECK_THROW(record.float_individual_field(-1)[0], out_of_range);
check_variable_length_field_api(record, truth_index);
check_operator_index_for_iterators(record);
check_out_of_bound_exceptions(record);
}

void check_shared_field_api(const Variant& record, const uint32_t truth_index) {
Expand Down

0 comments on commit 37d7964

Please sign in to comment.