Skip to content

Commit

Permalink
Bugfix #151
Browse files Browse the repository at this point in the history
  • Loading branch information
bricoletc committed Sep 25, 2019
1 parent 8ff5ca3 commit 73e4835
Show file tree
Hide file tree
Showing 5 changed files with 33 additions and 6 deletions.
7 changes: 7 additions & 0 deletions libgramtools/include/prg/prg.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,14 @@

namespace gram {

using marker_map = std::unordered_map<Marker,int>;
/**
* The key data structure holding all of the information used for vBWT backward search.
*/
struct PRG_Info {
FM_Index fm_index; /**< FM_index as a `sdsl::csa_wt` from the `sdsl` library. @note Accessing this data structure ([]) accesses the suffix array. */
sdsl::int_vector<> encoded_prg;
marker_map last_allele_positions;

sdsl::int_vector<> sites_mask; /**< Stores the site number at each allele position. Variant markers and outside variant sites get 0.*/
sdsl::int_vector<> allele_mask; /**< Stores the allele index at each allele position. Variant markers and outside variant sites get 0. */
Expand All @@ -46,6 +48,11 @@ namespace gram {
uint64_t max_alphabet_num;
};

/**
* Maps the end positions of sites, producing a map with keys being the even `Marker` for that site.
*/
marker_map map_site_ends(sdsl::int_vector<> const& encoded_prg);

/**
* Performs a rank query on the BWT. given a `dna_base` and a `upper_index`.
* @param upper_index the index into the suffix array/BWT.
Expand Down
2 changes: 2 additions & 0 deletions libgramtools/src/build/build.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ void commands::build::run(const Parameters &parameters) {
<< prg_info.encoded_prg.size()
<< std::endl;

prg_info.last_allele_positions = map_site_ends(prg_info.encoded_prg);

prg_info.max_alphabet_num = get_max_alphabet_num(prg_info.encoded_prg);
std::cout << "Maximum alphabet character: " << prg_info.max_alphabet_num << std::endl;
if (prg_info.max_alphabet_num <= 4) { // No personalised reference to infer; exit
Expand Down
16 changes: 16 additions & 0 deletions libgramtools/src/prg/prg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,21 @@

using namespace gram;

marker_map gram::map_site_ends(sdsl::int_vector<> const& encoded_prg) {
marker_map last_allele_indices;
int pos{-1};
for (auto const& s : encoded_prg) {
pos++;
if (s <= 4) continue;
if (s % 2 == 1) continue;
if (last_allele_indices.find(s) != last_allele_indices.end()) {
last_allele_indices.erase(s);
}
last_allele_indices.insert({s, pos});
}
return last_allele_indices;
}

uint64_t gram::dna_bwt_rank(const uint64_t &upper_index,
const Marker &dna_base,
const PRG_Info &prg_info) {
Expand Down Expand Up @@ -150,6 +165,7 @@ PRG_Info gram::load_prg_info(const Parameters &parameters) {
PRG_Info prg_info = {};

prg_info.encoded_prg = parse_raw_prg_file(parameters.linear_prg_fpath);
prg_info.last_allele_positions = map_site_ends(prg_info.encoded_prg);
prg_info.max_alphabet_num = get_max_alphabet_num(prg_info.encoded_prg);

prg_info.fm_index = load_fm_index(parameters);
Expand Down
13 changes: 7 additions & 6 deletions libgramtools/src/quasimap/coverage/allele_base.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,15 +98,13 @@ uint64_t gram::set_site_base_coverage(Coverage &coverage,
std::pair<uint64_t, uint64_t> gram::site_marker_prg_indexes(const uint64_t &site_marker, const PRG_Info &prg_info) {
auto alphabet_rank = prg_info.fm_index.char2comp[site_marker];
auto first_sa_index = prg_info.fm_index.C[alphabet_rank];
auto second_sa_index = first_sa_index + 1;

auto first_prg_index = prg_info.fm_index[first_sa_index];
auto second_prg_index = prg_info.fm_index[second_sa_index];
// Need to be sure we are dealing with a site marker so that we know we can look for its even counterpart in map
assert(site_marker % 2 == 1);
auto second_prg_index = prg_info.last_allele_positions.at(site_marker + 1);

if (first_prg_index < second_prg_index)
return std::make_pair(first_prg_index, second_prg_index);
else
return std::make_pair(second_prg_index, first_prg_index);
return std::make_pair(first_prg_index, second_prg_index);
}


Expand Down Expand Up @@ -165,6 +163,9 @@ void sa_index_allele_base_coverage(Coverage &coverage,
if (last_site_prg_start_end.first != 0) {
site_prg_start_end = site_marker_prg_indexes(site_marker, prg_info);
read_bases_consumed += site_prg_start_end.first - last_site_prg_start_end.second - 1;
if (read_bases_consumed > read_length) throw std::out_of_range(
"ERROR:Consumed more bases than the read length."
"Check that the site boundaries are properly detected.");
}
last_site_prg_start_end = site_prg_start_end;

Expand Down
1 change: 1 addition & 0 deletions libgramtools/submods/src_common/generate_prg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ PRG_Info generate_prg_info(const std::string &prg_raw) {
PRG_Info prg_info;
prg_info.fm_index = generate_fm_index(parameters);
prg_info.encoded_prg = encoded_prg;
prg_info.last_allele_positions = map_site_ends(prg_info.encoded_prg);
prg_info.sites_mask = generate_sites_mask(encoded_prg);
prg_info.allele_mask = generate_allele_mask(encoded_prg);

Expand Down

0 comments on commit 73e4835

Please sign in to comment.