This repository has been archived by the owner on Dec 16, 2022. It is now read-only.
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
15 changed files
with
652 additions
and
273 deletions.
There are no files selected for viewing
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
#include "genotype.h" | ||
#include "genotype_encoding.h" | ||
|
||
namespace gamgee { | ||
|
||
using namespace std; | ||
|
||
Genotype::Genotype(const std::shared_ptr<bcf1_t>& body, const bcf_fmt_t* const format_ptr, const uint8_t* data_ptr) : | ||
m_body {body}, | ||
m_format_ptr {format_ptr}, | ||
m_data_ptr {data_ptr} { | ||
} | ||
|
||
Genotype::Genotype(Genotype&& other) noexcept : | ||
m_body {move(other.m_body)}, | ||
m_format_ptr {other.m_format_ptr}, | ||
m_data_ptr {other.m_data_ptr} | ||
{} | ||
|
||
bool Genotype::is_het() const { | ||
const auto allele_1 = GenotypeUtils::allele_integer(m_format_ptr, m_data_ptr, 0); | ||
const auto allele_2 = GenotypeUtils::allele_integer(m_format_ptr, m_data_ptr, 1); | ||
return (allele_1 != allele_2); | ||
} | ||
|
||
bool Genotype::is_hom_var() const { | ||
const auto allele_1 = GenotypeUtils::allele_integer(m_format_ptr, m_data_ptr, 0); | ||
if (allele_1 == 0) | ||
return false; | ||
const auto allele_2 = GenotypeUtils::allele_integer(m_format_ptr, m_data_ptr, 1); | ||
return allele_1 == allele_2; | ||
} | ||
|
||
bool Genotype::is_hom_ref() const { | ||
const auto allele_1 = GenotypeUtils::allele_integer(m_format_ptr, m_data_ptr, 0); | ||
if (allele_1 != 0) | ||
return false; | ||
const auto allele_2 = GenotypeUtils::allele_integer(m_format_ptr, m_data_ptr, 1); | ||
return allele_2 == 0; | ||
} | ||
|
||
vector<string> Genotype::alleles() const { | ||
return GenotypeUtils::allele_strings(m_body, m_format_ptr, m_data_ptr); | ||
} | ||
|
||
GenotypeEncoding Genotype::value() const { | ||
return GenotypeEncoding{m_body, m_format_ptr, m_data_ptr}; | ||
} | ||
|
||
GenotypeEncodingValue Genotype::operator [](const uint32_t index) const { | ||
return GenotypeEncodingValue{m_body, m_format_ptr, m_data_ptr, index}; | ||
} | ||
|
||
uint32_t Genotype::size() const { | ||
return GenotypeUtils::allele_count(m_format_ptr); | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
#ifndef __gamgee__genotype__ | ||
#define __gamgee__genotype__ | ||
|
||
#include "utils/hts_memory.h" | ||
#include "utils/genotype_utils.h" | ||
#include "genotype_encoding.h" | ||
#include "genotype_encoding_value.h" | ||
#include "variant_field_value.h" | ||
|
||
#include "utils/utils.h" | ||
#include "utils/format_field_type.h" | ||
|
||
#include "htslib/vcf.h" | ||
|
||
#include <memory> | ||
|
||
namespace gamgee { | ||
|
||
/** | ||
* @brief Encodes a genotype. | ||
*/ | ||
class Genotype { | ||
public: | ||
/** | ||
* @brief Creates a genotype. | ||
* @param body The shared memory variant "line" from a vcf, or bcf. | ||
* @param format_ptr The GT field from the line. | ||
* @param data_ptr The GT for this sample. | ||
*/ | ||
Genotype(const std::shared_ptr<bcf1_t>& body, const bcf_fmt_t* const format_ptr, const uint8_t* data_ptr); | ||
|
||
/** | ||
* @brief Moves a genotype. | ||
* @param other The other genotype object to move. | ||
*/ | ||
Genotype(Genotype&& other) noexcept; | ||
|
||
// only for diploids | ||
/** | ||
* @brief Checks if this genotype is any type of heterozygous call. | ||
* @return True if this GT is a het. | ||
* @note only for diploids. | ||
*/ | ||
bool is_het() const; | ||
/** | ||
* @brief Checks if this genotype is a homozygous call that is non-reference. | ||
* @return True if this GT is a hom var. | ||
* @note only for diploids. | ||
*/ | ||
bool is_hom_var() const; | ||
/** | ||
* @brief Checks if this genotype is a homozygous call that is reference. | ||
* @return True if this GT is a hom ref. | ||
* @note only for diploids. | ||
*/ | ||
bool is_hom_ref() const; | ||
|
||
/** | ||
* @brief Returns a bit encoding of the alleles. | ||
* @return A bit encoding of the alleles. | ||
*/ | ||
GenotypeEncoding value() const; // returns the number of the genotype. => 0/1 = 1 | 0/0 = 0 | 1/1 = 2 ... | 0/2 | ||
|
||
/** | ||
* @brief Returns a genotype encoding value of each allele. | ||
* @param index From [0,size()) | ||
* @return A genotype encoding value of the allele at index. | ||
*/ | ||
GenotypeEncodingValue operator[](const uint32_t index) const; // getter | ||
|
||
/** | ||
* @brief Number of alleles in this genotype. | ||
* @return Number of alleles in this genotype. | ||
*/ | ||
uint32_t size() const; | ||
|
||
/** | ||
* @brief The alleles in this genotype. | ||
* @return The alleles in this genotype as strings. | ||
*/ | ||
vector<string> alleles() const; // returns the actual alleles (e.g. A/T or TG/C or T/T/C or T/T/T/T/T ... ) | ||
|
||
private: | ||
const std::shared_ptr<bcf1_t> m_body; | ||
const bcf_fmt_t* const m_format_ptr; | ||
const uint8_t* m_data_ptr; | ||
}; | ||
|
||
} | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
#ifndef __gamgee__genotype_encoding__ | ||
#define __gamgee__genotype_encoding__ | ||
|
||
#include "utils/hts_memory.h" | ||
#include "utils/utils.h" | ||
#include "utils/format_field_type.h" | ||
#include "utils/genotype_utils.h" | ||
|
||
#include "htslib/vcf.h" | ||
|
||
#include <memory> | ||
#include <utility> | ||
|
||
namespace gamgee { | ||
|
||
/** | ||
* @brief Encodes a diploid genotype as bits. | ||
* @note Missing genotypes are not implemented yet. | ||
*/ | ||
class GenotypeEncoding { | ||
|
||
public: | ||
/** | ||
* @brief Constructs a genotype encoding. | ||
* @param body The shared memory variant "line" from a vcf, or bcf. | ||
* @param format_ptr The GT field from the line. | ||
* @param data_ptr The GT for this sample. | ||
*/ | ||
GenotypeEncoding(const std::shared_ptr<bcf1_t>& body, const bcf_fmt_t* const format_ptr, const uint8_t* data_ptr) : | ||
m_body {body}, | ||
m_format_ptr {format_ptr}, | ||
m_data_ptr {data_ptr} { | ||
} | ||
|
||
/** | ||
* @brief A bit encoding for the first two alleles. | ||
* @return A bit encoding for the first two alleles. | ||
* @note Diploid only. | ||
*/ | ||
uint32_t bits() const { | ||
const auto allele_1 = static_cast<uint32_t>(GenotypeUtils::allele_integer(m_format_ptr, m_data_ptr, 0)); | ||
const auto allele_2 = static_cast<uint32_t>(GenotypeUtils::allele_integer(m_format_ptr, m_data_ptr, 1)); | ||
return (allele_1 << 16) | allele_2; | ||
} | ||
|
||
/** | ||
* @brief Checks if another genotype encoding does not equal this encoding. | ||
* @param other The other genotype encoding to compare. | ||
* @return False if other genotype encoding equals this encoding. | ||
*/ | ||
bool operator!=(const GenotypeEncoding& other) const { | ||
const auto count = GenotypeUtils::allele_count(m_format_ptr); | ||
if (count != GenotypeUtils::allele_count(other.m_format_ptr)) | ||
return true; | ||
for (auto i = 0; i < count; i++) { | ||
if (GenotypeUtils::allele_integer(m_format_ptr, m_data_ptr, i) != GenotypeUtils::allele_integer(other.m_format_ptr, other.m_data_ptr, i)) { | ||
return true; | ||
} | ||
} | ||
return false; | ||
} | ||
|
||
/** | ||
* @brief Checks if another genotype encoding equals this encoding. | ||
* @param other The other genotype encoding to compare. | ||
* @return True if other genotype encoding equals this encoding. | ||
*/ | ||
bool operator==(const GenotypeEncoding& other) const { | ||
const auto count = GenotypeUtils::allele_count(m_format_ptr); | ||
if (count != GenotypeUtils::allele_count(other.m_format_ptr)) | ||
return false; | ||
for (auto i = 0; i < count; i++) { | ||
if (GenotypeUtils::allele_integer(m_format_ptr, m_data_ptr, i) != GenotypeUtils::allele_integer(other.m_format_ptr, other.m_data_ptr, i)) { | ||
return false; | ||
} | ||
} | ||
return true; | ||
} | ||
|
||
/** | ||
* @brief Checks if the first allele is missing. | ||
* @return True if first allele is missing. | ||
* @warning this function assumes that if the first allele is missing, then all alleles are missing. | ||
*/ | ||
bool is_missing() const { | ||
return GenotypeUtils::allele_missing(m_format_ptr, m_data_ptr, 0); | ||
} | ||
|
||
private: | ||
const std::shared_ptr<bcf1_t> m_body; | ||
const bcf_fmt_t* const m_format_ptr; | ||
const uint8_t* m_data_ptr; | ||
}; | ||
|
||
} | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
#ifndef __gamgee__genotype_encoding_value__ | ||
#define __gamgee__genotype_encoding_value__ | ||
|
||
#include "utils/hts_memory.h" | ||
#include "utils/utils.h" | ||
#include "utils/format_field_type.h" | ||
#include "utils/genotype_utils.h" | ||
|
||
#include "htslib/vcf.h" | ||
|
||
#include <memory> | ||
#include <string> | ||
|
||
namespace gamgee { | ||
|
||
using namespace std; | ||
|
||
/** | ||
* @brief Decodes a genotype allele. | ||
*/ | ||
class GenotypeEncodingValue { | ||
public: | ||
/** | ||
* @brief Constructs a genotype encoding value. | ||
* @param body The shared memory variant "line" from a vcf, or bcf. | ||
* @param format_ptr The GT field from the line. | ||
* @param data_ptr The GT for this sample. | ||
*/ | ||
GenotypeEncodingValue(const std::shared_ptr<bcf1_t>& body, const bcf_fmt_t* const format_ptr, | ||
const uint8_t* data_ptr, const uint32_t allele_index) : | ||
m_body {body}, | ||
m_format_ptr {format_ptr}, | ||
m_data_ptr {data_ptr}, | ||
m_allele_index {allele_index} { | ||
} | ||
|
||
/** | ||
* @brief Checks if another genotype encoding value does not equal this encoding value. | ||
* @param other The other genotype encoding value to compare. | ||
* @return False if other genotype encoding value equals this encoding value. | ||
*/ | ||
bool operator!=(const GenotypeEncodingValue& other) const { | ||
if (m_data_ptr == other.m_data_ptr) { | ||
return m_allele_index != other.m_allele_index; | ||
} | ||
return allele() != other.allele(); | ||
} | ||
|
||
/** | ||
* @brief Checks if another genotype encoding value equals this encoding value. | ||
* @param other The other genotype encoding value to compare. | ||
* @return True if other genotype encoding value equals this encoding value. | ||
*/ | ||
bool operator==(const GenotypeEncodingValue& other) const { | ||
if (m_data_ptr == other.m_data_ptr) { | ||
return m_allele_index == other.m_allele_index; | ||
} | ||
return allele() == other.allele(); | ||
} | ||
|
||
/** | ||
* @brief A single allele from a genotype. | ||
* @return A single allele from a genotype. | ||
*/ | ||
string allele() const // returns the actual allele (e.g. A or T or AT or ATCAG...) returns empty string when is missing. | ||
{ | ||
const auto allele_int = GenotypeUtils::allele_integer(m_format_ptr, m_data_ptr, m_allele_index); | ||
return GenotypeUtils::allele_integer_to_string(m_body, allele_int); | ||
} | ||
|
||
/** | ||
* @brief Checks if the allele at this index is missing. | ||
* @return True if the allele at this index is missing. | ||
*/ | ||
bool is_missing() const { | ||
return GenotypeUtils::allele_missing(m_format_ptr, m_data_ptr, m_allele_index); | ||
} | ||
|
||
private: | ||
const std::shared_ptr<bcf1_t> m_body; | ||
const bcf_fmt_t* const m_format_ptr; | ||
const uint8_t* m_data_ptr; | ||
const uint32_t m_allele_index; | ||
}; | ||
|
||
} | ||
|
||
#endif |
Oops, something went wrong.