Navigation Menu

Skip to content

Commit

Permalink
Implemented 4-xxhash as a faster hash option.
Browse files Browse the repository at this point in the history
This is a simplified adaptation of the xxhash where we just run 4 xxhashes in
parallel to create a 128-bit hash. The resulting hashes are endian specific,
but so are all the memory mapped files we're saving so it doesn't matter that
much.

Enabled this hash function by default.
  • Loading branch information
deplinenoise committed Apr 28, 2013
1 parent 8289b16 commit 2e7d00b
Show file tree
Hide file tree
Showing 20 changed files with 452 additions and 184 deletions.
2 changes: 1 addition & 1 deletion Makefile
Expand Up @@ -58,7 +58,7 @@ LIBTUNDRA_SOURCES = \
MemAllocLinear.cpp MemoryMappedFile.cpp PathUtil.cpp \
ScanCache.cpp Scanner.cpp SignalHandler.cpp StatCache.cpp \
TargetSelect.cpp Thread.cpp dlmalloc.c TerminalIo.cpp \
ExecUnix.cpp DigestCache.cpp FileSign.cpp
ExecUnix.cpp DigestCache.cpp FileSign.cpp HashSha1.cpp HashFast.cpp

T2LUA_SOURCES = LuaMain.cpp LuaInterface.cpp LuaInterpolate.cpp LuaJsonWriter.cpp \
LuaPath.cpp
Expand Down
20 changes: 20 additions & 0 deletions src/Common.hpp
Expand Up @@ -124,6 +124,8 @@ int GetCpuCount();

int CountTrailingZeroes(uint32_t word);

#if ENABLED(USE_LITTLE_ENDIAN)

inline uint32_t LoadBigEndian32(uint32_t v)
{
#if defined(__GNUC__)
Expand All @@ -146,6 +148,24 @@ inline uint64_t LoadBigEndian64(uint64_t v)
#endif
}

inline uint32_t LoadLittleEndian32(uint32_t v)
{
return v;
}

inline uint64_t LoadLittleEndian64(uint64_t v)
{
return v;
}

#else

#error implement me

#endif

//-----------------------------------------------------------------------------

struct FileAndHash
{
const char* m_Filename;
Expand Down
12 changes: 12 additions & 0 deletions src/Config.hpp
Expand Up @@ -9,6 +9,8 @@

// Set up build features

#define USE_SHA1_HASH NO
#define USE_FAST_HASH YES

#if defined(_DEBUG)
#define USE_DLMALLOC YES
Expand All @@ -23,14 +25,24 @@
#define RESTRICT __restrict
#define NORETURN __attribute__((noreturn))
#define ALIGNOF(t) __alignof(t)
#define ALIGN(n) __attribute__((aligned(n)))
#elif defined(_MSC_VER)
#define RESTRICT __restrict
#define NORETURN __declspec(noreturn)
#define ALIGNOF(t) __alignof(t)
#define ALIGN(n) __declspec(align(n))
#else
#error unsupported compiler
#endif

#if defined(__powerpc__)
#define USE_LITTLE_ENDIAN NO
#elif defined(_WIN32) || defined(__x86__) || defined(__x86_64__)
#define USE_LITTLE_ENDIAN YES
#else
#error add endian detection here
#endif

#if defined(__APPLE__)
#define TUNDRA_UNIX 1
#define TUNDRA_APPLE 1
Expand Down
4 changes: 2 additions & 2 deletions src/DagData.hpp
Expand Up @@ -76,7 +76,7 @@ struct DagGlobSignature
FrozenString m_Path;
HashDigest m_Digest;
};
static_assert(sizeof(DagGlobSignature) == 24, "struct layout");
static_assert(sizeof(HashDigest) + sizeof(FrozenString) == sizeof(DagGlobSignature), "struct layout");

struct EnvVarData
{
Expand Down Expand Up @@ -117,7 +117,7 @@ struct PassData

struct DagData
{
static const uint32_t MagicNumber = 0x1589010b;
static const uint32_t MagicNumber = 0x1589010b ^ kTundraHashMagic;

uint32_t m_MagicNumber;

Expand Down
4 changes: 3 additions & 1 deletion src/DagGenerator.cpp
Expand Up @@ -559,7 +559,9 @@ bool ComputeNodeGuids(const JsonArrayValue* nodes, int32_t* remap_table, TempNod
const JsonObjectValue* o1 = nodes->m_Values[i1]->AsObject();
const char* anno0 = FindStringValue(o0, "Annotation");
const char* anno1 = FindStringValue(o1, "Annotation");
Log(kError, "duplicate node guids: %s and %s share common GUID", anno0, anno1);
char digest[kDigestStringSize];
DigestToString(digest, guid_table[i].m_Digest);
Log(kError, "duplicate node guids: %s and %s share common GUID (%s)", anno0, anno1, digest);
return false;
}
}
Expand Down
3 changes: 3 additions & 0 deletions src/DigestCache.cpp
Expand Up @@ -99,6 +99,9 @@ bool DigestCacheSave(DigestCache* self, MemAllocHeap* serialization_heap)
BinarySegmentWritePointer(array_seg, BinarySegmentPosition(string_seg));
BinarySegmentWriteStringData(string_seg, r->m_String);
BinarySegmentWriteUint32(array_seg, 0); // m_Padding
#if ENABLED(USE_FAST_HASH)
BinarySegmentWriteUint32(array_seg, 0); // m_Padding
#endif
};

HashTableWalk(&self->m_Table, save_record);
Expand Down
6 changes: 5 additions & 1 deletion src/DigestCache.hpp
Expand Up @@ -23,13 +23,17 @@ namespace t2
uint32_t m_FilenameHash;
HashDigest m_ContentDigest;
FrozenString m_Filename;
#if ENABLED(USE_SHA1_HASH)
uint32_t m_Padding;
#elif ENABLED(USE_FAST_HASH)
uint32_t m_Padding[2];
#endif
};
static_assert(sizeof(FrozenDigestRecord) == 48, "struct size");

struct DigestCacheState
{
static const uint32_t MagicNumber = 0x12781fa3;
static const uint32_t MagicNumber = 0x12781fa6 ^ kTundraHashMagic;

uint32_t m_MagicNumber;
FrozenArray<FrozenDigestRecord> m_Records;
Expand Down
26 changes: 15 additions & 11 deletions src/Driver.cpp
Expand Up @@ -182,6 +182,15 @@ static bool DriverCheckDagSignatures(Driver* self)
{
const DagData* dag_data = self->m_DagData;

#if ENABLED(CHECKED_BUILD)
// Paranoia - make sure the data is sorted.
for (int i = 1, count = dag_data->m_NodeCount; i < count; ++i)
{
if (dag_data->m_NodeGuids[i] < dag_data->m_NodeGuids[i - 1])
Croak("DAG data is not sorted by guid");
}
#endif

Log(kDebug, "checking file signatures for DAG data");

if (dag_data->m_Passes.GetCount() > Driver::kMaxPasses)
Expand Down Expand Up @@ -287,7 +296,7 @@ static bool DriverCheckDagSignatures(Driver* self)
// Compare digest with the one stored in the signature block
if (0 != memcmp(&digest, &sig.m_Digest, sizeof digest))
{
char stored[41], actual[41];
char stored[kDigestStringSize], actual[kDigestStringSize];
DigestToString(stored, sig.m_Digest);
DigestToString(actual, digest);
Log(kInfo, "DAG out of date: file glob change for %s (%s => %s)", sig.m_Path.Get(), stored, actual);
Expand Down Expand Up @@ -586,19 +595,14 @@ bool DriverPrepareNodes(Driver* self, const char** targets, int target_count)
}
}

struct SortHelper
{
static int CompareNodeState(const void* l, const void* r)
{
int32_t pass_l = static_cast<const NodeState*>(l)->m_PassIndex;
int32_t pass_r = static_cast<const NodeState*>(r)->m_PassIndex;

return pass_l - pass_r;
}
// Sort the node state array based on which pass the nodes are in.
auto compare_node_passes = [](const NodeState& l, const NodeState& r) -> bool
{
return l.m_PassIndex < r.m_PassIndex;
};

// Sort the node state array based on which pass the nodes are in.
qsort(out_nodes, node_count, sizeof(out_nodes[0]), SortHelper::CompareNodeState);
std::sort(out_nodes, out_nodes + node_count, compare_node_passes);

// Now that our local order is established (sorted for pass purposes),
// initialize a remapping table from global (dag) index to local (state)
Expand Down
152 changes: 48 additions & 104 deletions src/Hash.cpp
Expand Up @@ -5,74 +5,16 @@
namespace t2
{

static inline uint32_t SHA1Rotate(uint32_t value, uint32_t bits)
{
return ((value) << bits) | (value >> (32 - bits));
}

static void HashBlock(const uint8_t* block, uint32_t* state)
{
uint32_t w[80];

// Prepare message schedule
for (int i = 0; i < 16; ++i)
{
w[i] =
(((uint32_t)block[(i*4)+0]) << 24) |
(((uint32_t)block[(i*4)+1]) << 16) |
(((uint32_t)block[(i*4)+2]) << 8) |
(((uint32_t)block[(i*4)+3]) << 0);
}

for (int i = 16; i < 80; ++i)
{
w[i] = SHA1Rotate(w[i-3] ^ w[i-8] ^ w[i-14] ^ w[i-16], 1);
}

// Initialize working variables
uint32_t a = state[0];
uint32_t b = state[1];
uint32_t c = state[2];
uint32_t d = state[3];
uint32_t e = state[4];

// This is the core loop for each 20-word span.
#define SHA1_LOOP(start, end, func, constant) \
for (int i = (start); i < (end); ++i) \
{ \
uint32_t t = SHA1Rotate(a, 5) + (func) + e + (constant) + w[i]; \
e = d; d = c; c = SHA1Rotate(b, 30); b = a; a = t; \
}

SHA1_LOOP( 0, 20, ((b & c) ^ (~b & d)), 0x5a827999)
SHA1_LOOP(20, 40, (b ^ c ^ d), 0x6ed9eba1)
SHA1_LOOP(40, 60, ((b & c) ^ (b & d) ^ (c & d)), 0x8f1bbcdc)
SHA1_LOOP(60, 80, (b ^ c ^ d), 0xca62c1d6)

#undef SHA1_LOOP

// Update state
state[0] += a;
state[1] += b;
state[2] += c;
state[3] += d;
state[4] += e;
}

void HashInit(HashState* self)
{
self->m_MsgSize = 0;
self->m_BufUsed = 0;
static const uint32_t init_state[5] = { 0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476, 0xc3d2e1f0 };
memcpy(self->m_State, init_state, sizeof init_state);
}
void HashInitImpl(HashStateImpl* impl);
void HashBlock(const uint8_t* data, HashStateImpl* state);
void HashFinalizeImpl(HashStateImpl* self, HashDigest* digest);

void HashUpdate(HashState* self, const void *data_in, size_t size)
{
const uint8_t* data = static_cast<const uint8_t*>(data_in);
size_t remain = size;
uint8_t* buffer = self->m_Buffer;
uint32_t* state = self->m_State;
HashStateImpl* state = &self->m_StateImpl;
size_t used = self->m_BufUsed;

while (remain > 0)
Expand Down Expand Up @@ -105,6 +47,42 @@ void HashUpdate(HashState* self, const void *data_in, size_t size)
self->m_MsgSize += size * 8;
}

// Quickie to generate a hash digest from a single string
void HashSingleString(HashDigest* digest_out, const char* string)
{
HashState h;
HashInit(&h);
HashUpdate(&h, string, strlen(string));
HashFinalize(&h, digest_out);
}

void HashAddInteger(HashState* self, uint64_t value)
{
uint8_t bytes[8];
bytes[0] = uint8_t(value >> 56);
bytes[1] = uint8_t(value >> 48);
bytes[2] = uint8_t(value >> 40);
bytes[3] = uint8_t(value >> 32);
bytes[4] = uint8_t(value >> 24);
bytes[5] = uint8_t(value >> 16);
bytes[6] = uint8_t(value >> 8);
bytes[7] = uint8_t(value >> 0);
HashUpdate(self, bytes, sizeof bytes);
}

void HashAddSeparator(HashState* self)
{
uint8_t zero = 0;
HashUpdate(self, &zero, 1);
}

void HashInit(HashState* self)
{
self->m_MsgSize = 0;
self->m_BufUsed = 0;
HashInitImpl(&self->m_StateImpl);
}

void HashFinalize(HashState* self, HashDigest* digest)
{
uint8_t one_bit = 0x80;
Expand All @@ -119,75 +97,41 @@ void HashFinalize(HashState* self, HashDigest* digest)
// Set trailing one-bit
HashUpdate(self, &one_bit, 1);

// Emit null padding to to make room for 64 bits of size info in the last 512 bit block
// Emit null padding to to make room for 64 bits of size info in the last block
static const uint8_t zeroes[128] = { 0 };

int diff = int(self->m_BufUsed) - 56;
int diff = int(self->m_BufUsed) - (sizeof(self->m_Buffer) - 8);

if (diff < 0)
HashUpdate(self, zeroes, -diff);
else
HashUpdate(self, zeroes, 64 - diff);
HashUpdate(self, zeroes, sizeof(self->m_Buffer) - diff);

CHECK(self->m_BufUsed == 56);
CHECK(self->m_BufUsed == sizeof(self->m_Buffer) - 8);

// Write size in bits as last 64-bits
HashUpdate(self, count_data, 8);

// Make sure we actually finalized our last block
CHECK(0 == self->m_BufUsed);

// Copy digest bytes out (our 5 state words)
for (int i = 0; i < 20; ++i)
{
uint32_t word = self->m_State[i >> 2];
int byte = 3 - (i & 3);
digest->m_Data[i] = (uint8_t) (word >> (byte * 8));
}
}

void HashAddInteger(HashState* self, uint64_t value)
{
uint8_t bytes[8];
bytes[0] = uint8_t(value >> 56);
bytes[1] = uint8_t(value >> 48);
bytes[2] = uint8_t(value >> 40);
bytes[3] = uint8_t(value >> 32);
bytes[4] = uint8_t(value >> 24);
bytes[5] = uint8_t(value >> 16);
bytes[6] = uint8_t(value >> 8);
bytes[7] = uint8_t(value >> 0);
HashUpdate(self, bytes, sizeof bytes);
}

void HashAddSeparator(HashState* self)
{
uint8_t zero = 0;
HashUpdate(self, &zero, 1);
HashFinalizeImpl(&self->m_StateImpl, digest);
}

void
DigestToString(char (&buffer)[41], const HashDigest& digest)
DigestToString(char (&buffer)[kDigestStringSize], const HashDigest& digest)
{
static const char hex[] = "0123456789abcdef";

for (int i = 0; i < 20; ++i)
for (int i = 0; i < sizeof(digest.m_Data); ++i)
{
uint8_t byte = digest.m_Data[i];
buffer[2 * i + 0] = hex[byte>>4];
buffer[2 * i + 1] = hex[byte & 0xf];
}

buffer[40] = '\0';
buffer[kDigestStringSize - 1] = '\0';
}

// Quickie to generate a hash digest from a single string
void HashSingleString(HashDigest* digest_out, const char* string)
{
HashState h;
HashInit(&h);
HashUpdate(&h, string, strlen(string));
HashFinalize(&h, digest_out);
}

}

0 comments on commit 2e7d00b

Please sign in to comment.