Skip to content
This repository has been archived by the owner on Dec 16, 2022. It is now read-only.

Commit

Permalink
Merge pull request #383 from broadinstitute/dr_vb_auto_encode_genotypes
Browse files Browse the repository at this point in the history
Encode genotypes automatically in VariantBuilder instead of requiring the client to call Genotype::encode_genotype()
  • Loading branch information
jmthibault79 committed Nov 18, 2014
2 parents f53b18e + 6537829 commit a25b637
Show file tree
Hide file tree
Showing 5 changed files with 204 additions and 120 deletions.
40 changes: 26 additions & 14 deletions gamgee/genotype.h
Original file line number Diff line number Diff line change
Expand Up @@ -278,23 +278,29 @@ class Genotype{

/**
* @brief Converts a vector of allele indices representing a genotype into BCF-encoded
* format suitable for passing to VariantBuilder::set_genotype(). No phasing
* is added.
* format suitable for passing to htslib. No phasing is added.
*
* Example: if you want to encode the genotype 0/1, create a vector with {0, 1}
* and then pass it to this function
* Example: if you want to BCF-encode the genotype 0/1, create a vector with {0, 1}
* and then pass it to this function
*
* @note Do not call this function yourself before passing genotypes into VariantBuilder -- the builder
* will call it for you as necessary. Unless you are working with low-level BCF data you
* probably do not ever need to call this function.
*/
static inline void encode_genotype(std::vector<int32_t>& alleles) {
encode_genotype(alleles, false);
}

/**
* @brief Converts a vector of allele indices representing a genotype into BCF-encoded
* format suitable for passing to VariantBuilder::set_genotype(), and also
* allows you to phase all alleles
* format suitable for passing to htslib, and also allows you to phase all alleles
*
* Example: if you want to BCF-encode the genotype 0|1, create a vector with {0, 1}
* and then pass it to this function with phase_all_alleles set to true
*
* Example: if you want to encode the genotype 0|1, create a vector with {0, 1}
* and then pass it to this function with phase_all_alleles set to true
* @note Do not call this function yourself before passing genotypes into VariantBuilder -- the builder
* will call it for you as necessary. Unless you are working with low-level BCF data you
* probably do not ever need to call this function.
*/
static inline void encode_genotype(std::vector<int32_t>& alleles, bool phase_all_alleles) {
for ( auto allele_index = 0u; allele_index < alleles.size(); ++allele_index ) {
Expand All @@ -311,11 +317,14 @@ class Genotype{

/**
* @brief Converts multiple vectors of allele indices representing genotypes into
* BCF-encoded format suitable for passing to VariantBuilder::set_genotypes().
* No phasing is added.
* BCF-encoded format suitable for passing to htslib. No phasing is added.
*
* Example: if you want to encode the genotypes 0/1 and 1/1, create a vector
* with { {0, 1}, {1, 1} } and pass it to this function
* Example: if you want to BCF-encode the genotypes 0/1 and 1/1, create a vector
* with { {0, 1}, {1, 1} } and pass it to this function
*
* @note Do not call this function yourself before passing genotypes into VariantBuilder -- the builder
* will call it for you as necessary. Unless you are working with low-level BCF data you
* probably do not ever need to call this function.
*/
static inline void encode_genotypes(std::vector<std::vector<int32_t>>& multiple_genotypes) {
for ( auto& genotype : multiple_genotypes ) {
Expand All @@ -325,8 +334,11 @@ class Genotype{

/**
* @brief Converts multiple genotypes stored in a VariantBuilderMultiSampleVector into
* BCF-encoded format suitable for passing to VariantBuilder::set_genotypes().
* No phasing is added.
* BCF-encoded format suitable for passing to htslib. No phasing is added.
*
* @note Do not call this function yourself before passing genotypes into VariantBuilder -- the builder
* will call it for you as necessary. Unless you are working with low-level BCF data you
* probably do not ever need to call this function.
*/
static inline void encode_genotypes(VariantBuilderMultiSampleVector<int32_t>& multiple_genotypes) {
auto& genotypes_vector = const_cast<std::vector<int32_t>&>(multiple_genotypes.get_vector());
Expand Down
46 changes: 42 additions & 4 deletions gamgee/variant_builder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -219,23 +219,37 @@ VariantBuilder& VariantBuilder::remove_shared_fields(const std::vector<uint32_t>
******************************************************************************/

VariantBuilder& VariantBuilder::set_genotypes(const VariantBuilderMultiSampleVector<int32_t>& genotypes_for_all_samples) {
// Ensure that we have an lvalue reference to the genotypes vector so that we make a copy further down the line
const auto& genotypes_vector = genotypes_for_all_samples.get_vector();
m_individual_region.bulk_set_integer_field(m_individual_region.gt_index(), genotypes_vector);
// Since the user has chosen to pass by lvalue, make a copy before encoding the genotypes
auto encoded_genotypes = genotypes_for_all_samples;
Genotype::encode_genotypes(encoded_genotypes);

// We've made a copy, so we can move the copy into the storage layer
m_individual_region.bulk_set_integer_field(m_individual_region.gt_index(), move(encoded_genotypes.get_vector()));
return *this;
}

VariantBuilder& VariantBuilder::set_genotypes(VariantBuilderMultiSampleVector<int32_t>&& genotypes_for_all_samples) {
// Encode user's vector directly, since it's been moved in to us
Genotype::encode_genotypes(genotypes_for_all_samples);

m_individual_region.bulk_set_integer_field(m_individual_region.gt_index(), move(genotypes_for_all_samples.get_vector()));
return *this;
}

VariantBuilder& VariantBuilder::set_genotypes(const std::vector<std::vector<int32_t>>& genotypes_for_all_samples) {
m_individual_region.bulk_set_integer_field(m_individual_region.gt_index(), genotypes_for_all_samples);
// Since the user has chosen to pass by lvalue, make a copy before encoding the genotypes
auto encoded_genotypes = genotypes_for_all_samples;
Genotype::encode_genotypes(encoded_genotypes);

// We've made a copy, so we can move the copy into the storage layer
m_individual_region.bulk_set_integer_field(m_individual_region.gt_index(), move(encoded_genotypes));
return *this;
}

VariantBuilder& VariantBuilder::set_genotypes(std::vector<std::vector<int32_t>>&& genotypes_for_all_samples) {
// Encode user's vector directly, since it's been moved in to us
Genotype::encode_genotypes(genotypes_for_all_samples);

m_individual_region.bulk_set_integer_field(m_individual_region.gt_index(), move(genotypes_for_all_samples));
return *this;
}
Expand Down Expand Up @@ -355,11 +369,35 @@ VariantBuilder& VariantBuilder::set_string_individual_field(const uint32_t field
******************************************************************************/

VariantBuilder& VariantBuilder::set_genotype(const std::string& sample, const std::vector<int32_t>& genotype) {
// Since the user has passed by lvalue, make a copy before encoding
auto encoded_genotype = genotype;
Genotype::encode_genotype(encoded_genotype);

m_individual_region.set_integer_field_by_sample(m_individual_region.gt_index(), sample, encoded_genotype.empty() ? nullptr : &(encoded_genotype[0]), encoded_genotype.size());
return *this;
}

VariantBuilder& VariantBuilder::set_genotype(const std::string& sample, std::vector<int32_t>&& genotype) {
// Encode user's vector directly, since it's been moved in to us
Genotype::encode_genotype(genotype);

m_individual_region.set_integer_field_by_sample(m_individual_region.gt_index(), sample, genotype.empty() ? nullptr : &(genotype[0]), genotype.size());
return *this;
}

VariantBuilder& VariantBuilder::set_genotype(const uint32_t sample_index, const std::vector<int32_t>& genotype) {
// Since the user has passed by lvalue, make a copy before encoding
auto encoded_genotype = genotype;
Genotype::encode_genotype(encoded_genotype);

m_individual_region.set_integer_field_by_sample(m_individual_region.gt_index(), sample_index, encoded_genotype.empty() ? nullptr : &(encoded_genotype[0]), encoded_genotype.size());
return *this;
}

VariantBuilder& VariantBuilder::set_genotype(const uint32_t sample_index, std::vector<int32_t>&& genotype) {
// Encode user's vector directly, since it's been moved in to us
Genotype::encode_genotype(genotype);

m_individual_region.set_integer_field_by_sample(m_individual_region.gt_index(), sample_index, genotype.empty() ? nullptr : &(genotype[0]), genotype.size());
return *this;
}
Expand Down

0 comments on commit a25b637

Please sign in to comment.