Skip to content
This repository has been archived by the owner on Dec 16, 2022. It is now read-only.

Commit

Permalink
Commit for Jul 23
Browse files Browse the repository at this point in the history
  • Loading branch information
kshakir committed Jul 23, 2014
1 parent 04b03aa commit ea3d8fb
Show file tree
Hide file tree
Showing 15 changed files with 652 additions and 273 deletions.
12 changes: 0 additions & 12 deletions gamgee/diploid_pl_genotype.h

This file was deleted.

58 changes: 58 additions & 0 deletions gamgee/genotype.cpp
@@ -0,0 +1,58 @@
#include "genotype.h"
#include "genotype_encoding.h"

namespace gamgee {

using namespace std;

Genotype::Genotype(const std::shared_ptr<bcf1_t>& body, const bcf_fmt_t* const format_ptr, const uint8_t* data_ptr) :
m_body {body},
m_format_ptr {format_ptr},
m_data_ptr {data_ptr} {
}

Genotype::Genotype(Genotype&& other) noexcept :
m_body {move(other.m_body)},
m_format_ptr {other.m_format_ptr},
m_data_ptr {other.m_data_ptr}
{}

bool Genotype::is_het() const {
const auto allele_1 = GenotypeUtils::allele_integer(m_format_ptr, m_data_ptr, 0);
const auto allele_2 = GenotypeUtils::allele_integer(m_format_ptr, m_data_ptr, 1);
return (allele_1 != allele_2);
}

bool Genotype::is_hom_var() const {
const auto allele_1 = GenotypeUtils::allele_integer(m_format_ptr, m_data_ptr, 0);
if (allele_1 == 0)
return false;
const auto allele_2 = GenotypeUtils::allele_integer(m_format_ptr, m_data_ptr, 1);
return allele_1 == allele_2;
}

bool Genotype::is_hom_ref() const {
const auto allele_1 = GenotypeUtils::allele_integer(m_format_ptr, m_data_ptr, 0);
if (allele_1 != 0)
return false;
const auto allele_2 = GenotypeUtils::allele_integer(m_format_ptr, m_data_ptr, 1);
return allele_2 == 0;
}

vector<string> Genotype::alleles() const {
return GenotypeUtils::allele_strings(m_body, m_format_ptr, m_data_ptr);
}

GenotypeEncoding Genotype::value() const {
return GenotypeEncoding{m_body, m_format_ptr, m_data_ptr};
}

GenotypeEncodingValue Genotype::operator [](const uint32_t index) const {
return GenotypeEncodingValue{m_body, m_format_ptr, m_data_ptr, index};
}

uint32_t Genotype::size() const {
return GenotypeUtils::allele_count(m_format_ptr);
}

}
91 changes: 91 additions & 0 deletions gamgee/genotype.h
@@ -0,0 +1,91 @@
#ifndef __gamgee__genotype__
#define __gamgee__genotype__

#include "utils/hts_memory.h"
#include "utils/genotype_utils.h"
#include "genotype_encoding.h"
#include "genotype_encoding_value.h"
#include "variant_field_value.h"

#include "utils/utils.h"
#include "utils/format_field_type.h"

#include "htslib/vcf.h"

#include <memory>

namespace gamgee {

/**
* @brief Encodes a genotype.
*/
class Genotype {
public:
/**
* @brief Creates a genotype.
* @param body The shared memory variant "line" from a vcf, or bcf.
* @param format_ptr The GT field from the line.
* @param data_ptr The GT for this sample.
*/
Genotype(const std::shared_ptr<bcf1_t>& body, const bcf_fmt_t* const format_ptr, const uint8_t* data_ptr);

/**
* @brief Moves a genotype.
* @param other The other genotype object to move.
*/
Genotype(Genotype&& other) noexcept;

// only for diploids
/**
* @brief Checks if this genotype is any type of heterozygous call.
* @return True if this GT is a het.
* @note only for diploids.
*/
bool is_het() const;
/**
* @brief Checks if this genotype is a homozygous call that is non-reference.
* @return True if this GT is a hom var.
* @note only for diploids.
*/
bool is_hom_var() const;
/**
* @brief Checks if this genotype is a homozygous call that is reference.
* @return True if this GT is a hom ref.
* @note only for diploids.
*/
bool is_hom_ref() const;

/**
* @brief Returns a bit encoding of the alleles.
* @return A bit encoding of the alleles.
*/
GenotypeEncoding value() const; // returns the number of the genotype. => 0/1 = 1 | 0/0 = 0 | 1/1 = 2 ... | 0/2

/**
* @brief Returns a genotype encoding value of each allele.
* @param index From [0,size())
* @return A genotype encoding value of the allele at index.
*/
GenotypeEncodingValue operator[](const uint32_t index) const; // getter

/**
* @brief Number of alleles in this genotype.
* @return Number of alleles in this genotype.
*/
uint32_t size() const;

/**
* @brief The alleles in this genotype.
* @return The alleles in this genotype as strings.
*/
vector<string> alleles() const; // returns the actual alleles (e.g. A/T or TG/C or T/T/C or T/T/T/T/T ... )

private:
const std::shared_ptr<bcf1_t> m_body;
const bcf_fmt_t* const m_format_ptr;
const uint8_t* m_data_ptr;
};

}

#endif
97 changes: 97 additions & 0 deletions gamgee/genotype_encoding.h
@@ -0,0 +1,97 @@
#ifndef __gamgee__genotype_encoding__
#define __gamgee__genotype_encoding__

#include "utils/hts_memory.h"
#include "utils/utils.h"
#include "utils/format_field_type.h"
#include "utils/genotype_utils.h"

#include "htslib/vcf.h"

#include <memory>
#include <utility>

namespace gamgee {

/**
* @brief Encodes a diploid genotype as bits.
* @note Missing genotypes are not implemented yet.
*/
class GenotypeEncoding {

public:
/**
* @brief Constructs a genotype encoding.
* @param body The shared memory variant "line" from a vcf, or bcf.
* @param format_ptr The GT field from the line.
* @param data_ptr The GT for this sample.
*/
GenotypeEncoding(const std::shared_ptr<bcf1_t>& body, const bcf_fmt_t* const format_ptr, const uint8_t* data_ptr) :
m_body {body},
m_format_ptr {format_ptr},
m_data_ptr {data_ptr} {
}

/**
* @brief A bit encoding for the first two alleles.
* @return A bit encoding for the first two alleles.
* @note Diploid only.
*/
uint32_t bits() const {
const auto allele_1 = static_cast<uint32_t>(GenotypeUtils::allele_integer(m_format_ptr, m_data_ptr, 0));
const auto allele_2 = static_cast<uint32_t>(GenotypeUtils::allele_integer(m_format_ptr, m_data_ptr, 1));
return (allele_1 << 16) | allele_2;
}

/**
* @brief Checks if another genotype encoding does not equal this encoding.
* @param other The other genotype encoding to compare.
* @return False if other genotype encoding equals this encoding.
*/
bool operator!=(const GenotypeEncoding& other) const {
const auto count = GenotypeUtils::allele_count(m_format_ptr);
if (count != GenotypeUtils::allele_count(other.m_format_ptr))
return true;
for (auto i = 0; i < count; i++) {
if (GenotypeUtils::allele_integer(m_format_ptr, m_data_ptr, i) != GenotypeUtils::allele_integer(other.m_format_ptr, other.m_data_ptr, i)) {
return true;
}
}
return false;
}

/**
* @brief Checks if another genotype encoding equals this encoding.
* @param other The other genotype encoding to compare.
* @return True if other genotype encoding equals this encoding.
*/
bool operator==(const GenotypeEncoding& other) const {
const auto count = GenotypeUtils::allele_count(m_format_ptr);
if (count != GenotypeUtils::allele_count(other.m_format_ptr))
return false;
for (auto i = 0; i < count; i++) {
if (GenotypeUtils::allele_integer(m_format_ptr, m_data_ptr, i) != GenotypeUtils::allele_integer(other.m_format_ptr, other.m_data_ptr, i)) {
return false;
}
}
return true;
}

/**
* @brief Checks if the first allele is missing.
* @return True if first allele is missing.
* @warning this function assumes that if the first allele is missing, then all alleles are missing.
*/
bool is_missing() const {
return GenotypeUtils::allele_missing(m_format_ptr, m_data_ptr, 0);
}

private:
const std::shared_ptr<bcf1_t> m_body;
const bcf_fmt_t* const m_format_ptr;
const uint8_t* m_data_ptr;
};

}

#endif
88 changes: 88 additions & 0 deletions gamgee/genotype_encoding_value.h
@@ -0,0 +1,88 @@
#ifndef __gamgee__genotype_encoding_value__
#define __gamgee__genotype_encoding_value__

#include "utils/hts_memory.h"
#include "utils/utils.h"
#include "utils/format_field_type.h"
#include "utils/genotype_utils.h"

#include "htslib/vcf.h"

#include <memory>
#include <string>

namespace gamgee {

using namespace std;

/**
* @brief Decodes a genotype allele.
*/
class GenotypeEncodingValue {
public:
/**
* @brief Constructs a genotype encoding value.
* @param body The shared memory variant "line" from a vcf, or bcf.
* @param format_ptr The GT field from the line.
* @param data_ptr The GT for this sample.
*/
GenotypeEncodingValue(const std::shared_ptr<bcf1_t>& body, const bcf_fmt_t* const format_ptr,
const uint8_t* data_ptr, const uint32_t allele_index) :
m_body {body},
m_format_ptr {format_ptr},
m_data_ptr {data_ptr},
m_allele_index {allele_index} {
}

/**
* @brief Checks if another genotype encoding value does not equal this encoding value.
* @param other The other genotype encoding value to compare.
* @return False if other genotype encoding value equals this encoding value.
*/
bool operator!=(const GenotypeEncodingValue& other) const {
if (m_data_ptr == other.m_data_ptr) {
return m_allele_index != other.m_allele_index;
}
return allele() != other.allele();
}

/**
* @brief Checks if another genotype encoding value equals this encoding value.
* @param other The other genotype encoding value to compare.
* @return True if other genotype encoding value equals this encoding value.
*/
bool operator==(const GenotypeEncodingValue& other) const {
if (m_data_ptr == other.m_data_ptr) {
return m_allele_index == other.m_allele_index;
}
return allele() == other.allele();
}

/**
* @brief A single allele from a genotype.
* @return A single allele from a genotype.
*/
string allele() const // returns the actual allele (e.g. A or T or AT or ATCAG...) returns empty string when is missing.
{
const auto allele_int = GenotypeUtils::allele_integer(m_format_ptr, m_data_ptr, m_allele_index);
return GenotypeUtils::allele_integer_to_string(m_body, allele_int);
}

/**
* @brief Checks if the allele at this index is missing.
* @return True if the allele at this index is missing.
*/
bool is_missing() const {
return GenotypeUtils::allele_missing(m_format_ptr, m_data_ptr, m_allele_index);
}

private:
const std::shared_ptr<bcf1_t> m_body;
const bcf_fmt_t* const m_format_ptr;
const uint8_t* m_data_ptr;
const uint32_t m_allele_index;
};

}

#endif

0 comments on commit ea3d8fb

Please sign in to comment.