Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Add FNV1a hash function #1760

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 112 additions & 0 deletions include/oxli/hashtable.hh
Original file line number Diff line number Diff line change
Expand Up @@ -540,6 +540,118 @@ public:
}
};


class FNVKmerHashIterator : public KmerHashIterator
{
const char * _seq;
const char _ksize;
unsigned int index;
unsigned int length;
bool _initialized;
public:
FNVKmerHashIterator(const char * seq, unsigned char k) :
_seq(seq), _ksize(k), index(0), _initialized(false)
{
length = strlen(_seq);
};

HashIntoType first()
{
_initialized = true;
return next();
}

HashIntoType next()
{
if (!_initialized) {
_initialized = true;
}

if (done()) {
throw oxli_exception("past end of iterator");
}

std::string kmer;
kmer.assign(_seq + index, _ksize);
index += 1;
return _hash_fnv(kmer, _ksize);
}

bool done() const
{
return (index + _ksize > length);
}

unsigned int get_start_pos() const
{
if (!_initialized) {
return 0;
}
return index - 1;
}
unsigned int get_end_pos() const
{
if (!_initialized) {
return _ksize;
}
return index + _ksize - 1;
}
};


class FNVHashtable : public oxli::Hashtable
{
public:
explicit FNVHashtable(WordLength ksize, Storage * s)
: Hashtable(ksize, s) { };

inline
virtual
HashIntoType
hash_dna(const char * kmer) const
{
if (!(strlen(kmer) >= _ksize)) {
throw oxli_value_exception("Supplied kmer string doesn't match the underlying k-size.");
}
return _hash_fnv(kmer, _ksize);
}

inline virtual HashIntoType
hash_dna_top_strand(const char * kmer) const
{
throw oxli_value_exception("not implemented");
}

inline virtual HashIntoType
hash_dna_bottom_strand(const char * kmer) const
{
throw oxli_value_exception("not implemented");
}

inline virtual std::string
unhash_dna(HashIntoType hashval) const
{
throw oxli_value_exception("not implemented");
}

virtual KmerHashIteratorPtr new_kmer_iterator(const char * sp) const
{
KmerHashIterator * ki = new FNVKmerHashIterator(sp, _ksize);
return unique_ptr<KmerHashIterator>(ki);
}

virtual void save(std::string filename)
{
store->save(filename, _ksize);
}
virtual void load(std::string filename)
{
store->load(filename, _ksize);
_init_bitstuff();
}
};


// Hashtable-derived class with ByteStorage.
class Counttable : public oxli::MurmurHashtable
{
Expand Down
8 changes: 8 additions & 0 deletions include/oxli/kmer_hash.hh
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,14 @@ HashIntoType _hash_murmur(const std::string& kmer, const WordLength k,
HashIntoType _hash_murmur_forward(const std::string& kmer,
const WordLength k);


HashIntoType _hash_fnv(const std::string& kmer, const WordLength k);
HashIntoType _hash_fnv(const std::string& kmer, const WordLength k,
HashIntoType& h, HashIntoType& r);
HashIntoType _hash_fnv_forward(const std::string& kmer,
const WordLength k);


// Function to support k-mer banding.
std::pair<uint64_t, uint64_t> compute_band_interval(unsigned int num_bands,
unsigned int band);
Expand Down
56 changes: 56 additions & 0 deletions src/oxli/kmer_hash.cc
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,62 @@ HashIntoType _hash_murmur_forward(const std::string& kmer, const WordLength k)
return h;
}


inline const uint64_t hash_64_fnv1a(const char* data, const uint64_t len) {
uint64_t hash = 0xcbf29ce484222325;
const uint64_t prime = 0x100000001b3;

for(uint64_t i = 0; i < len; ++i) {
//const uint8_t value = data[i];
//hash = hash ^ value;
hash = hash ^ data[i];
hash *= prime;
}

return hash;
}

constexpr uint64_t val_64_const = 0xcbf29ce484222325;
constexpr uint64_t prime_64_const = 0x100000001b3;
inline constexpr uint64_t hash_64_fnv1a_const(const char* const str, const uint64_t value = val_64_const) noexcept {
return (str[0] == '\0') ? value : hash_64_fnv1a_const(&str[1], (value ^ uint64_t(str[0])) * prime_64_const);
}

HashIntoType _hash_fnv(const std::string& kmer, const WordLength k)
{
HashIntoType h = 0;
HashIntoType r = 0;

return oxli::_hash_fnv(kmer, k, h, r);
}

HashIntoType _hash_fnv(const std::string& kmer, const WordLength k,
HashIntoType& h, HashIntoType& r) {
h = hash_64_fnv1a_const(kmer.c_str());

assert(kmer.length() == k); // an assumption of the below code
std::string rev = oxli::_revcomp(kmer);
if (rev == kmer) {
// self complement kmer, can't use bitwise XOR
r = h;
return h;
}
r = hash_64_fnv1a_const(rev.c_str());

return h ^ r;
}

HashIntoType _hash_fnv_forward(const std::string& kmer, const WordLength k)
{
HashIntoType h = 0;
HashIntoType r = 0;

oxli::_hash_fnv(kmer, k, h, r);

return h;
}


std::pair<uint64_t, uint64_t> compute_band_interval(unsigned int num_bands,
unsigned int band)
{
Expand Down