Skip to content

Commit

Permalink
Back out "Improve large-buffer crc32c performance by 50-85% [3/3]"
Browse files Browse the repository at this point in the history
Summary:
Original commit changeset: 5ee59d858cec

Original Phabricator Diff: D52679940

Reviewed By: udippant

Differential Revision: D52915808

fbshipit-source-id: 9aecf413abcd017467511ca16289fba019f71283
  • Loading branch information
Bin Liu authored and facebook-github-bot committed Jan 19, 2024
1 parent 4a2f1aa commit b930cba
Show file tree
Hide file tree
Showing 3 changed files with 4 additions and 118 deletions.
23 changes: 2 additions & 21 deletions folly/hash/Checksum.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,6 @@
#include <boost/crc.hpp>

#include <folly/CpuId.h>
#include <folly/external/fast-crc32/avx512_crc32c_v8s3x4.h>
#include <folly/external/fast-crc32/sse_crc32c_v8s3x3.h>
#include <folly/hash/detail/ChecksumDetail.h>

#if FOLLY_SSE_PREREQ(4, 2)
Expand Down Expand Up @@ -74,11 +72,6 @@ bool crc32c_hw_supported() {
return id.sse42();
}

bool crc32c_hw_supported_avx512() {
static folly::CpuId id;
return id.avx512vl();
}

bool crc32_hw_supported() {
static folly::CpuId id;
return id.sse42();
Expand All @@ -97,10 +90,6 @@ bool crc32c_hw_supported() {
return false;
}

bool crc32c_hw_supported_avx512() {
return false;
}

bool crc32_hw_supported() {
return false;
}
Expand Down Expand Up @@ -143,16 +132,8 @@ uint32_t crc32_sw(
} // namespace detail

uint32_t crc32c(const uint8_t* data, size_t nbytes, uint32_t startingChecksum) {
if (detail::crc32c_hw_supported_avx512()) {
if (nbytes <= 4096) {
return detail::crc32c_hw(data, nbytes, startingChecksum);
}
return detail::avx512_crc32c_v8s3x4(data, nbytes, startingChecksum);
} else if (detail::crc32c_hw_supported()) {
if (nbytes <= 4096) {
return detail::crc32c_hw(data, nbytes, startingChecksum);
}
return detail::sse_crc32c_v8s3x3(data, nbytes, startingChecksum);
if (detail::crc32c_hw_supported()) {
return detail::crc32c_hw(data, nbytes, startingChecksum);
} else {
return detail::crc32c_sw(data, nbytes, startingChecksum);
}
Expand Down
8 changes: 1 addition & 7 deletions folly/hash/detail/ChecksumDetail.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,17 +43,11 @@ uint32_t crc32c_hw(
const uint8_t* data, size_t nbytes, uint32_t startingChecksum = ~0U);

/**
* Check whether a SSE4.2 hardware-accelerated CRC-32C implementation is
* Check whether a hardware-accelerated CRC-32C implementation is
* supported on the current CPU.
*/
bool crc32c_hw_supported();

/**
* Check whether an AVX512VL hardware-accelerated CRC-32C implementation is
* supported on the current CPU.
*/
bool crc32c_hw_supported_avx512();

/**
* Compute a CRC-32C checksum of a buffer using a portable,
* software-only implementation.
Expand Down
91 changes: 1 addition & 90 deletions folly/hash/test/ChecksumTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,6 @@

#include <folly/Benchmark.h>
#include <folly/Random.h>
#include <folly/external/fast-crc32/avx512_crc32c_v8s3x4.h>
#include <folly/external/fast-crc32/sse_crc32c_v8s3x3.h>
#include <folly/hash/Hash.h>
#include <folly/hash/detail/ChecksumDetail.h>
#include <folly/portability/GFlags.h>
Expand Down Expand Up @@ -146,93 +144,6 @@ TEST(Checksum, crc32cContinuationHardware) {
}
}

TEST(Checksum, crc32cHardwareSse42) {
if (folly::detail::crc32c_hw_supported()) {
testCRC32C(folly::detail::sse_crc32c_v8s3x3);
} else {
LOG(WARNING) << "skipping SSE4.2 hardware-accelerated CRC-32C tests"
<< " (not supported on this CPU)";
}
}

TEST(Checksum, crc32cHardwareEqSse42) {
if (folly::detail::crc32c_hw_supported()) {
for (size_t i = 0; i < 1000; i++) {
auto sw = folly::detail::crc32c_sw(buffer, i, 0);
auto hw = folly::detail::sse_crc32c_v8s3x3(buffer, i, 0);
ASSERT_EQ(sw, hw);
}
} else {
LOG(WARNING) << "skipping SSE4.2 hardware-accelerated CRC-32C tests"
<< " (not supported on this CPU)";
}
}

TEST(Checksum, crc32cContinuationHardwareSse42) {
if (folly::detail::crc32c_hw_supported()) {
testCRC32CContinuation(folly::detail::sse_crc32c_v8s3x3);
} else {
LOG(WARNING) << "skipping SSE4.2 hardware-accelerated CRC-32C tests"
<< " (not supported on this CPU)";
}
}

TEST(Checksum, crc32cHardwareAvx512) {
if (folly::detail::crc32c_hw_supported_avx512()) {
testCRC32C(folly::detail::avx512_crc32c_v8s3x4);
} else {
LOG(WARNING) << "skipping AVX512 hardware-accelerated CRC-32C tests"
<< " (not supported on this CPU)";
}
}

TEST(Checksum, crc32cHardwareEqAvx512) {
if (folly::detail::crc32c_hw_supported_avx512()) {
for (size_t i = 0; i < 1000; i++) {
auto sw = folly::detail::crc32c_sw(buffer, i, 0);
auto hw = folly::detail::avx512_crc32c_v8s3x4(buffer, i, 0);
ASSERT_EQ(sw, hw);
}
} else {
LOG(WARNING) << "skipping AVX512 hardware-accelerated CRC-32C tests"
<< " (not supported on this CPU)";
}
}

TEST(Checksum, crc32cContinuationHardwareAvx512) {
if (folly::detail::crc32c_hw_supported_avx512()) {
testCRC32CContinuation(folly::detail::avx512_crc32c_v8s3x4);
} else {
LOG(WARNING) << "skipping AVX512 hardware-accelerated CRC-32C tests"
<< " (not supported on this CPU)";
}
}

// Test on very large buffer inputs to attempt to sanity check 32-bit
// overflow problems on 64-bit platforms.
#ifdef __LP64__
TEST(Checksum, crc32clargeBuffers) {
constexpr size_t kLargeBufSz = 5ull * 1024 * 1024 * 1024; // 5GiB
auto buf = std::make_unique<uint8_t[]>(kLargeBufSz); // 5GiB
auto* bufp = buf.get();
// Fill with non-zero pattern.
memset(bufp, 0x2e, kLargeBufSz);

constexpr uint32_t kCrc = 2860399007;

if (folly::detail::crc32c_hw_supported()) {
auto crcSse42 = folly::detail::sse_crc32c_v8s3x3(bufp, kLargeBufSz, ~0);
ASSERT_EQ(kCrc, crcSse42);
auto crcHw = folly::detail::crc32c_hw(bufp, kLargeBufSz, ~0);
ASSERT_EQ(kCrc, crcHw);
}
if (folly::detail::crc32c_hw_supported_avx512()) {
auto crcAvx = folly::detail::avx512_crc32c_v8s3x4(bufp, kLargeBufSz, ~0);
ASSERT_EQ(kCrc, crcAvx);
}
}
#endif

TEST(Checksum, crc32cAutodetect) {
testCRC32C(folly::crc32c);
}
Expand Down Expand Up @@ -468,7 +379,7 @@ int main(int argc, char** argv) {
// on which to compute checksums
const uint8_t* src = buffer;
uint64_t* dst = (uint64_t*)buffer;
const uint64_t* end = (const uint64_t*)(buffer + sizeof(buffer));
const uint64_t* end = (const uint64_t*)(buffer + BUFFER_SIZE);
*dst++ = 0;
while (dst < end) {
*dst++ = folly::hash::fnv64_buf((const char*)src, sizeof(uint64_t));
Expand Down

0 comments on commit b930cba

Please sign in to comment.