Skip to content

Commit

Permalink
file_integrity: use xxh64
Browse files Browse the repository at this point in the history
  • Loading branch information
enkore committed Jun 1, 2017
1 parent 23d591c commit 0221e31
Show file tree
Hide file tree
Showing 7 changed files with 997 additions and 32 deletions.
6 changes: 6 additions & 0 deletions AUTHORS
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,9 @@ Folding CRC32

Borg includes an extremely fast folding implementation of CRC32, Copyright 2013 Intel Corporation,
licensed under the terms of the zlib license.

xxHash
------

XXH64, a fast non-cryptographic hash algorithm. Copyright 2012-2016 Yann Collet,
licensed under a BSD 2-clause license.
65 changes: 65 additions & 0 deletions src/borg/algorithms/crc32.pyx
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from ..helpers import bin_to_hex

from libc.stdint cimport uint32_t
from cpython.buffer cimport PyBUF_SIMPLE, PyObject_GetBuffer, PyBuffer_Release
from cpython.bytes cimport PyBytes_FromStringAndSize


cdef extern from "crc32_dispatch.c":
Expand All @@ -10,6 +12,29 @@ cdef extern from "crc32_dispatch.c":
int _have_clmul "have_clmul"()


cdef extern from "xxh64/xxhash.c":
ctypedef struct XXH64_canonical_t:
char digest[8]

ctypedef struct XXH64_state_t:
pass # opaque

ctypedef unsigned long long XXH64_hash_t

ctypedef enum XXH_errorcode:
XXH_OK,
XXH_ERROR

XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed);

XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, unsigned long long seed);
XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
XXH64_hash_t XXH64_digest (const XXH64_state_t* statePtr);

void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash);
XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src);


cdef Py_buffer ro_buffer(object data) except *:
cdef Py_buffer view
PyObject_GetBuffer(data, &view, PyBUF_SIMPLE)
Expand Down Expand Up @@ -39,3 +64,43 @@ if have_clmul:
crc32 = crc32_clmul
else:
crc32 = crc32_slice_by_8


def xxh64(data, seed=0):
cdef unsigned long long _seed = seed
cdef XXH64_hash_t hash
cdef XXH64_canonical_t digest
cdef Py_buffer data_buf = ro_buffer(data)
try:
hash = XXH64(data_buf.buf, data_buf.len, _seed)
finally:
PyBuffer_Release(&data_buf)
XXH64_canonicalFromHash(&digest, hash)
return PyBytes_FromStringAndSize(<const char*> digest.digest, 8)


cdef class StreamingXXH64:
cdef XXH64_state_t state

def __cinit__(self, seed=0):
cdef unsigned long long _seed = seed
if XXH64_reset(&self.state, _seed) != XXH_OK:
raise Exception('XXH64_reset failed')

def update(self, data):
cdef Py_buffer data_buf = ro_buffer(data)
try:
if XXH64_update(&self.state, data_buf.buf, data_buf.len) != XXH_OK:
raise Exception('XXH64_update failed')
finally:
PyBuffer_Release(&data_buf)

def digest(self):
cdef XXH64_hash_t hash
cdef XXH64_canonical_t digest
hash = XXH64_digest(&self.state)
XXH64_canonicalFromHash(&digest, hash)
return PyBytes_FromStringAndSize(<const char*> digest.digest, 8)

def hexdigest(self):
return bin_to_hex(self.digest())
Loading

0 comments on commit 0221e31

Please sign in to comment.