Skip to content

Commit

Permalink
add serialisation support
Browse files Browse the repository at this point in the history
  • Loading branch information
alexbowe committed Mar 7, 2016
1 parent 05e79ae commit 5d53513
Show file tree
Hide file tree
Showing 3 changed files with 108 additions and 30 deletions.
49 changes: 49 additions & 0 deletions dna_bv_rs.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#ifndef DNA_BV_RS_H
#define DNA_BV_RS_H

#include <iostream>
#include <sdsl/bit_vectors.hpp>

#include "utility.hpp"
Expand All @@ -15,10 +16,12 @@ using std::endl;
using std::vector;
using std::array;
using std::bitset;
using std::string;

using sdsl::sd_vector;
using sdsl::hyb_vector;
using sdsl::bit_vector;
using sdsl::structure_tree_node;
}

// TODO: make t_symbol_bv, t_minus_bv, t_terminal_bv all parameters
Expand All @@ -40,6 +43,10 @@ class dna_bv_rs {
array<select_1_type, dna_sigma+1> m_select_supports;

public:
typedef size_t size_type;

dna_bv_rs() {}

template<typename InputIterator>
dna_bv_rs(InputIterator in, InputIterator end) : m_size(std::distance(in, end)) {
array<bit_vector, dna_sigma+1> temp;
Expand Down Expand Up @@ -92,6 +99,48 @@ class dna_bv_rs {
size_t size() const {
return m_size;
}

size_type serialize(std::ostream& out, structure_tree_node* v=NULL, string name="") const {
using namespace sdsl;

structure_tree_node* child = structure_tree::add_child(v, name, util::class_name(*this));
size_type written_bytes = 0;

written_bytes += write_member(m_size, out, child, "m_size");
for (int i = 0; i < dna_sigma+1; i++) {
written_bytes += m_bit_vectors[i].serialize(out, child, "m_bit_vectors" + i);
}
for (int i = 0; i < dna_sigma+1; i++) {
written_bytes += m_rank_supports[i].serialize(out, child, "m_rank_supports" + i);
}
for (int i = 0; i < dna_sigma+1; i++) {
written_bytes += m_select_supports[i].serialize(out, child, "m_select_supports" + i);
}

structure_tree::add_size(child, written_bytes);
return written_bytes;
}

void load(std::istream& in) {
using namespace sdsl;
read_member(m_size, in);

for (int i = 0; i < dna_sigma+1; i++) {
m_bit_vectors[i].load(in);
}

for (int i = 0; i < dna_sigma+1; i++) {
m_rank_supports[i].load(in);
m_rank_supports[i].set_vector(&m_bit_vectors[i]);
}

for (int i = 0; i < dna_sigma+1; i++) {
m_select_supports[i].load(in);
m_select_supports[i].set_vector(&m_bit_vectors[i]);
}
}


};

} // namespace index
Expand Down
83 changes: 53 additions & 30 deletions dna_rs_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@
#include "dna_bv_rs.hpp"
#include "utility.hpp"
#include <chrono>
#include <cstdio>
#include <sdsl/wavelet_trees.hpp>
#include <sdsl/vectors.hpp>
#include <sdsl/bit_vectors.hpp>
#include <boost/filesystem.hpp>
#include <boost/mpl/list.hpp>
#include <boost/mpl/begin_end.hpp>
#include <boost/mpl/next_prior.hpp>
Expand Down Expand Up @@ -122,10 +124,11 @@ template<typename ... types> using type_list = boost::mpl::list<types...>;
template <typename T> void COSMO_##__LINE__::call()


// TODO: move to benchmark program instead
TEST_CASE("Large Query", "[benchmark]") {
using namespace sdsl;
size_t n = 10e5;
size_t m = 10e5;
size_t n = 50e3;
size_t m = 50e3;
auto input = cosmo::random_string("$acgtACGT", n);
int_vector<8> temp(input.size());
for (size_t i = 0; i < input.size(); ++i) temp[i] = input[i];
Expand Down Expand Up @@ -200,48 +203,68 @@ TEST_CASE("Large Query", "[benchmark]") {
}
end = std::chrono::steady_clock::now();
std::cout << "WT Select average time per element: " << std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count()/double(m) << " ns" << std::endl;

std::cout << "RS average bits per element : " << bits_per_element(rs) << std::endl;
std::cout << "WT average bits per element : " << bits_per_element(wt) << std::endl;
}

// NOTE: Add index types here in type_list template parameter
typedef cosmo::type_list<dna_bv_rs<>> rank_types;
TYPED_TEST_CASE("DNA index queries are answered", "[index][dna_rs]", rank_types) {
TYPED_TEST_CASE("DNA index", "[index][dna_rs]", rank_types) {
using namespace sdsl;

const std::string input = "acgt$acgt$ACGTacgt";
int_vector<8> temp(input.size());
for (size_t i = 0; i < input.size(); ++i) temp[i] = input[i];
wt_blcd<> wt;
construct_im(wt, temp);
vector<T> test_objects;

T a(input);
test_objects.push_back(a);

// Serialization
std::string temp_file = boost::filesystem::unique_path().native();
T b;
store_to_file(a, temp_file);
load_from_file(b, temp_file);
boost::filesystem::remove(temp_file);
test_objects.push_back(b);

string storage = "when stored in memory";
for (auto & x : test_objects) {
SECTION(storage) {
REQUIRE(x.size() == input.length());

// Access
SECTION("original elements are accessible", "[access]") {
for (size_t i = 0; i < x.size(); ++i) {
REQUIRE(x[i] == input[i]);
}
}

T x(input);

REQUIRE(x.size() == input.length());

// Access
SECTION("original elements are accessible", "[access]") {
for (size_t i = 0; i < x.size(); ++i) {
REQUIRE(x[i] == input[i]);
}
}

// Rank
SECTION("ranks are computed for each symbol over [0, i)", "[rank]") {
size_t c_i = 0;
for (char c : std::string("$acgtACGT")) {
for (size_t i = 0; i <= x.size(); ++i) {
REQUIRE(x.rank(i,c) == wt.rank(i,c));
// Rank
SECTION("ranks are computed for each symbol over [0, i)", "[rank]") {
size_t c_i = 0;
for (char c : std::string("$acgtACGT")) {
for (size_t i = 0; i <= x.size(); ++i) {
REQUIRE(x.rank(i,c) == wt.rank(i,c));
}
++c_i;
}
}
++c_i;
}
}

// Select
SECTION("Select is computed for each symbol over [1, m]", "[select]") {
size_t c_i = 0;
for (char c : std::string("$acgtACGT")) {
for (size_t i = 1; i <= x.rank(x.size(), c); ++i) {
REQUIRE(x.select(i,c) == wt.select(i,c));
// Select
SECTION("Select is computed for each symbol over [1, m]", "[select]") {
size_t c_i = 0;
for (char c : std::string("$acgtACGT")) {
for (size_t i = 1; i <= x.rank(x.size(), c); ++i) {
REQUIRE(x.select(i,c) == wt.select(i,c));
}
++c_i;
}
}
++c_i;
}
storage = "when loaded from disk";
}
}
6 changes: 6 additions & 0 deletions utility.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,15 @@
#include <boost/filesystem.hpp>
//#include <gmp.h>
#include <boost/random.hpp>
#include <sdsl/bit_vectors.hpp> // for size_in_bytes

#include "debug.hpp"

template <typename Container>
double bits_per_element(const Container & c) {
return sdsl::size_in_bytes(c) * 8.0 / c.size();
}

namespace cosmo {

namespace fs = boost::filesystem;
Expand Down

0 comments on commit 5d53513

Please sign in to comment.