Navigation Menu

Skip to content

Commit

Permalink
Adapt the Map to allow for larger pointers (up to 256GB of memory)
Browse files Browse the repository at this point in the history
  • Loading branch information
gangeli committed Aug 5, 2014
1 parent ece7594 commit 166fb25
Show file tree
Hide file tree
Showing 6 changed files with 144 additions and 57 deletions.
1 change: 1 addition & 0 deletions autogen.sh
Expand Up @@ -2,6 +2,7 @@

libtoolize --force
aclocal --force
autoheader
autoconf
automake --add-missing
chmod +x configure
51 changes: 28 additions & 23 deletions src/Map.cc
Expand Up @@ -10,16 +10,16 @@
// HashIntMap::HashIntMap
//
HashIntMap::HashIntMap(const uint64_t& size) : dataLength(size) {
this->data = (uint64_t*) malloc(size * sizeof(uint64_t));
memset(this->data, 0, size * sizeof(uint64_t));
this->data = (map_entry*) malloc(size * sizeof(map_entry));
memset(this->data, 0, size * sizeof(map_entry));
}

//
// HashIntMap::HashIntMap
//
HashIntMap::HashIntMap(const HashIntMap& other) : dataLength(other.buckets()) {
this->data = (uint64_t*) malloc(dataLength * sizeof(uint64_t));
memcpy(this->data, other.data, dataLength * sizeof(uint64_t));
this->data = (map_entry*) malloc(dataLength * sizeof(map_entry));
memcpy(this->data, other.data, dataLength * sizeof(map_entry));
}

//
Expand All @@ -34,63 +34,68 @@ HashIntMap::~HashIntMap() {
//
void HashIntMap::put(const uint32_t& hash,
const uint32_t& secondHash,
const uint32_t& value) {
const uint64_t& value) {
uint64_t index = hash % this->dataLength;
uint64_t originalIndex = index;
while (!checkChecksum(this->data[index], secondHash)) {
// Find a free bucket
while (!canOccupy(this->data[index], secondHash)) {
index = (index + 1) % this->dataLength;
if (index == originalIndex) {
printf("Overflowed allocated HashIntMap buckets [1]!\n");
exit(1);
}
}
setChecksum(&(this->data[index]), secondHash);
setDatum(&(this->data[index]), value);
// Occupy it
occupy(&(this->data[index]), secondHash, value);
}

//
// HashIntMap::increment
//
void HashIntMap::increment(const uint32_t& hash,
const uint32_t& secondHash,
const uint32_t& incr,
const uint32_t& limit) {
const uint64_t& incr,
const uint64_t& limit) {
uint64_t index = hash % this->dataLength;
uint64_t originalIndex = index;
while (!checkChecksum(this->data[index], secondHash)) {
// Find a free bucket
while (!canOccupy(this->data[index], secondHash)) {
index = (index + 1) % this->dataLength;
if (index == originalIndex) {
printf("Overflowed allocated HashIntMap buckets [2]!\n");
exit(1);
}
}
setChecksum(&(this->data[index]), secondHash);
uint32_t value = getDatum(this->data[index] + incr);
// Increment the value
uint64_t value = getDatum(this->data[index]) + incr;
if (value > limit) { value = limit; }
setDatum(&(this->data[index]), value);
// Occupy the bucket
occupy(&(this->data[index]), secondHash, value);
}

//
// HashIntMap::get
//
bool HashIntMap::get(const uint32_t& hash,
const uint32_t& secondHash,
uint32_t* toSet) const {
uint64_t* toSet) const {
uint64_t index = hash % this->dataLength;
uint64_t originalIndex = index;
uint64_t checksum = 0;
const uint32_t checksum = (secondHash << __MAP_CHECKSUM_LEFTSHIFT) >> __MAP_CHECKSUM_LEFTSHIFT;

// If this cell isn't occupied, we're out of luck
if (!occupied(this->data[index])) { return false; }
// If it is, let's see if it's the right checksum
do {
checksum = getChecksum(this->data[index]);
if (checksum == secondHash) {
// We're at index i; check the checksum
if (getChecksum(this->data[index]) == checksum) {
// Hooray, it matches!
*toSet = getDatum(this->data[index]);
return true;
}
// Doesn't match -- continue looking
index = (index + 1) % this->dataLength;
if (checksum != 0 && index == originalIndex) {
printf("Overflowed allocated HashIntMap buckets [3]!\n");
exit(1);
}
} while (checksum != 0);
} while (occupied(this->data[index]));
return false;
}

Expand Down
70 changes: 51 additions & 19 deletions src/Map.h
Expand Up @@ -3,6 +3,23 @@

#include <stdint.h>
#include <limits>
#include <assert.h>

#define __MAP_CHECKSUM_LEFTSHIFT 7

#ifdef __GNUG__
typedef struct {
#else
typedef struct alignas(8) {
#endif
uint64_t checksum:25,
pointer:38;
bool occupied:1;
#ifdef __GNUG__
} __attribute__ ((__packed__)) map_entry;
#else
} map_entry;
#endif

/**
* A really funky open-address hash map that's intended to be used by
Expand All @@ -16,22 +33,22 @@ class HashIntMap {

void put(const uint32_t& hash,
const uint32_t& secondHash,
const uint32_t& value);
const uint64_t& value);

void increment(const uint32_t& hash,
const uint32_t& secondHash,
const uint32_t& incr,
const uint32_t& limit);
const uint64_t& incr,
const uint64_t& limit);

inline void increment(const uint32_t& hash,
const uint32_t& secondHash,
const uint32_t& incr) {
const uint64_t& incr) {
increment(hash, secondHash, incr, std::numeric_limits<uint32_t>::max());
}

bool get(const uint32_t& hash,
const uint32_t& secondHash,
uint32_t* toSet) const;
uint64_t* toSet) const;



Expand Down Expand Up @@ -63,27 +80,42 @@ class HashIntMap {
}

private:
uint64_t* data;
map_entry* data;
uint64_t dataLength;

inline uint32_t getChecksum(const uint64_t& cell) const {
return cell >> 32;
inline uint32_t occupied(const map_entry& cell) const {
return cell.occupied;
}
inline uint32_t checkChecksum(const uint64_t& cell, const uint32_t& checksum) const {
uint32_t empiricalChecksum = cell >> 32;
return empiricalChecksum == 0 || empiricalChecksum == checksum;

inline uint32_t getChecksum(const map_entry& cell) const {
return cell.checksum;
}
inline void setChecksum(uint64_t* cell, const uint32_t& checksum) {
uint64_t checksumLong = checksum;
*cell = (*cell) | (checksumLong << 32);
inline void setChecksum(map_entry* cell, const uint32_t& checksum) {
cell->checksum = checksum;
}

inline uint32_t getDatum(const uint64_t& cell) const {
return (cell << 32) >> 32;
inline uint64_t getDatum(const map_entry& cell) const {
return cell.pointer;
}
inline void setDatum(uint64_t* cell, const uint32_t& datum) {
uint64_t datumLong = datum;
*cell = (((*cell) >> 32) << 32) | datumLong;
inline void setDatum(map_entry* cell, const uint64_t& datum) {
assert( ((datum << 25) >> 25) == datum );
cell->pointer = datum;
}

inline uint32_t canOccupy(const map_entry& cell, const uint32_t& checksum) const {
if (!cell.occupied) { return true; }
uint32_t empiricalChecksum = cell.checksum;
uint32_t knownChecksum = (checksum << __MAP_CHECKSUM_LEFTSHIFT) >> __MAP_CHECKSUM_LEFTSHIFT;
return empiricalChecksum == knownChecksum;
}

inline void occupy(map_entry* cell,
const uint32_t& checksum,
const uint64_t& datum) {
assert( canOccupy(*cell, checksum) );
setChecksum(cell, checksum);
setDatum(cell, datum);
cell->occupied = true;
}
};

Expand Down
6 changes: 3 additions & 3 deletions src/Trie.cc
Expand Up @@ -336,7 +336,7 @@ bool LossyTrie::addCompletion(const uint32_t* fact,
uint32_t mainHash = fnv_32a_buf((uint8_t*) fact, factLength * sizeof(uint32_t), FNV1_32_INIT);
uint32_t auxHash = fnv_32a_buf((uint8_t*) fact, factLength * sizeof(uint32_t), 1154);
// Do the lookup
uint32_t pointer;
uint64_t pointer;
if (!completions.get(mainHash, auxHash, &pointer)) {
printf("No pointer was allocated for completion!\n");
std::exit(1);
Expand Down Expand Up @@ -393,7 +393,7 @@ void LossyTrie::addFact(const uint32_t* fact,
uint32_t mainHash = fnv_32a_buf((uint8_t*) fact, factLength * sizeof(uint32_t), FNV1_32_INIT);
uint32_t auxHash = fnv_32a_buf((uint8_t*) fact, factLength * sizeof(uint32_t), 1154);
// Do the lookup
uint32_t pointer;
uint64_t pointer;
if (!completions.get(mainHash, auxHash, &pointer)) {
printf("No pointer was allocated for completion!\n");
std::exit(1);
Expand All @@ -419,7 +419,7 @@ const bool LossyTrie::contains(const tagged_word* taggedFact,

// Look up the containment info
bool contains = false;
uint32_t pointer;
uint64_t pointer;
if (completions.get(mainHash, auxHash, &pointer)) {
contains = (completionData[pointer - 1] & 0x1) != 0;
}
Expand Down
2 changes: 1 addition & 1 deletion test/src/ITest.cc
Expand Up @@ -24,7 +24,7 @@ uint64_t rdtsc(){

using namespace std;
#define NUM_FACTS_TO_CHECK 1000000
#define NUM_WORDS_TO_CONSIDER 1000000
#define NUM_WORDS_TO_CONSIDER 100000

/**
* Make sure that we are reading the fact database in a repeatable order.
Expand Down

0 comments on commit 166fb25

Please sign in to comment.