Skip to content

Commit

Permalink
Add FNV1a hash function
Browse files Browse the repository at this point in the history
  • Loading branch information
betatim committed Aug 22, 2017
1 parent acdacef commit 6cc0d0b
Show file tree
Hide file tree
Showing 3 changed files with 172 additions and 2 deletions.
116 changes: 114 additions & 2 deletions include/oxli/hashtable.hh
Original file line number Diff line number Diff line change
Expand Up @@ -540,12 +540,124 @@ public:
}
};


class FNVKmerHashIterator : public KmerHashIterator
{
const char * _seq;
const char _ksize;
unsigned int index;
unsigned int length;
bool _initialized;
public:
FNVKmerHashIterator(const char * seq, unsigned char k) :
_seq(seq), _ksize(k), index(0), _initialized(false)
{
length = strlen(_seq);
};

HashIntoType first()
{
_initialized = true;
return next();
}

HashIntoType next()
{
if (!_initialized) {
_initialized = true;
}

if (done()) {
throw oxli_exception("past end of iterator");
}

std::string kmer;
kmer.assign(_seq + index, _ksize);
index += 1;
return _hash_fnv(kmer, _ksize);
}

bool done() const
{
return (index + _ksize > length);
}

unsigned int get_start_pos() const
{
if (!_initialized) {
return 0;
}
return index - 1;
}
unsigned int get_end_pos() const
{
if (!_initialized) {
return _ksize;
}
return index + _ksize - 1;
}
};


class FNVHashtable : public oxli::Hashtable
{
public:
explicit FNVHashtable(WordLength ksize, Storage * s)
: Hashtable(ksize, s) { };

inline
virtual
HashIntoType
hash_dna(const char * kmer) const
{
if (!(strlen(kmer) >= _ksize)) {
throw oxli_value_exception("Supplied kmer string doesn't match the underlying k-size.");
}
return _hash_fnv(kmer, _ksize);
}

inline virtual HashIntoType
hash_dna_top_strand(const char * kmer) const
{
throw oxli_value_exception("not implemented");
}

inline virtual HashIntoType
hash_dna_bottom_strand(const char * kmer) const
{
throw oxli_value_exception("not implemented");
}

inline virtual std::string
unhash_dna(HashIntoType hashval) const
{
throw oxli_value_exception("not implemented");
}

virtual KmerHashIteratorPtr new_kmer_iterator(const char * sp) const
{
KmerHashIterator * ki = new FNVKmerHashIterator(sp, _ksize);
return unique_ptr<KmerHashIterator>(ki);
}

virtual void save(std::string filename)
{
store->save(filename, _ksize);
}
virtual void load(std::string filename)
{
store->load(filename, _ksize);
_init_bitstuff();
}
};


// Hashtable-derived class with ByteStorage.
class Counttable : public oxli::MurmurHashtable
class Counttable : public oxli::FNVHashtable
{
public:
explicit Counttable(WordLength ksize, std::vector<uint64_t> sizes)
: MurmurHashtable(ksize, new ByteStorage(sizes)) { } ;
: FNVHashtable(ksize, new ByteStorage(sizes)) { } ;
};

// Hashtable-derived class with NibbleStorage.
Expand Down
8 changes: 8 additions & 0 deletions include/oxli/kmer_hash.hh
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,14 @@ HashIntoType _hash_murmur(const std::string& kmer, const WordLength k,
HashIntoType _hash_murmur_forward(const std::string& kmer,
const WordLength k);


HashIntoType _hash_fnv(const std::string& kmer, const WordLength k);
HashIntoType _hash_fnv(const std::string& kmer, const WordLength k,
HashIntoType& h, HashIntoType& r);
HashIntoType _hash_fnv_forward(const std::string& kmer,
const WordLength k);


// Function to support k-mer banding.
std::pair<uint64_t, uint64_t> compute_band_interval(unsigned int num_bands,
unsigned int band);
Expand Down
50 changes: 50 additions & 0 deletions src/oxli/kmer_hash.cc
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,56 @@ HashIntoType _hash_murmur_forward(const std::string& kmer, const WordLength k)
return h;
}


inline const uint64_t hash_64_fnv1a(const char* data, const uint64_t len) {
uint64_t hash = 0xcbf29ce484222325;
uint64_t prime = 0x100000001b3;

for(int i = 0; i < len; ++i) {
const uint8_t value = data[i];
hash = hash ^ value;
hash *= prime;
}

return hash;

}

HashIntoType _hash_fnv(const std::string& kmer, const WordLength k)
{
HashIntoType h = 0;
HashIntoType r = 0;

return oxli::_hash_fnv(kmer, k, h, r);
}

HashIntoType _hash_fnv(const std::string& kmer, const WordLength k,
HashIntoType& h, HashIntoType& r) {
h = hash_64_fnv1a(kmer.c_str(), k);

assert(kmer.length() == k); // an assumption of the below code
std::string rev = oxli::_revcomp(kmer);
if (rev == kmer) {
// self complement kmer, can't use bitwise XOR
r = h;
return h;
}
r = hash_64_fnv1a(rev.c_str(), k);

return h ^ r;
}

HashIntoType _hash_fnv_forward(const std::string& kmer, const WordLength k)
{
HashIntoType h = 0;
HashIntoType r = 0;

oxli::_hash_fnv(kmer, k, h, r);

return h;
}


std::pair<uint64_t, uint64_t> compute_band_interval(unsigned int num_bands,
unsigned int band)
{
Expand Down

0 comments on commit 6cc0d0b

Please sign in to comment.