Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Prefetch the backreference hashtable bucket. #1156

Merged
merged 1 commit into from
Apr 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions c/common/platform.h
Original file line number Diff line number Diff line change
Expand Up @@ -519,6 +519,21 @@ BROTLI_UNUSED_FUNCTION void BrotliSuppressUnusedFunctions(void) {
#if BROTLI_ENABLE_DUMP
BROTLI_UNUSED(&BrotliDump);
#endif

#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) && !defined(_M_ARM64EC) /* _mm_prefetch() is not defined outside of x86/x64 */
# include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
# define PREFETCH_L1(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
# define PREFETCH_L2(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1)
#elif BROTLI_GNUC_HAS_BUILTIN(__builtin_prefetch, 3, 1, 0)
# define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
# define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
#elif defined(__aarch64__)
# define PREFETCH_L1(ptr) do { __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr))); } while (0)
# define PREFETCH_L2(ptr) do { __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr))); } while (0)
#else
# define PREFETCH_L1(ptr) do { (void)(ptr); } while (0) /* disabled */
# define PREFETCH_L2(ptr) do { (void)(ptr); } while (0) /* disabled */
#endif
}

#endif /* BROTLI_COMMON_PLATFORM_H_ */
7 changes: 5 additions & 2 deletions c/enc/hash_longest_match64_inc.h
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,11 @@ static BROTLI_INLINE void FN(FindLongestMatch)(
score_t best_score = out->score;
size_t best_len = out->len;
size_t i;
/* Precalculate the hash key and prefetch the bucket. */
const size_t key = FN(HashBytes)(&data[cur_ix_masked], self->hash_mul_);
uint32_t* BROTLI_RESTRICT bucket = &buckets[key << self->block_bits_];
PREFETCH_L1(bucket);
if (self->block_bits_ > 4) PREFETCH_L1(bucket + 16);
out->len = 0;
out->len_code_delta = 0;

Expand Down Expand Up @@ -220,8 +225,6 @@ static BROTLI_INLINE void FN(FindLongestMatch)(
best_len = 3;
}
{
const size_t key = FN(HashBytes)(&data[cur_ix_masked], self->hash_mul_);
uint32_t* BROTLI_RESTRICT bucket = &buckets[key << self->block_bits_];
const size_t down =
(num[key] > self->block_size_) ?
(num[key] - self->block_size_) : 0u;
Expand Down
13 changes: 8 additions & 5 deletions c/enc/hash_longest_match_inc.h
Original file line number Diff line number Diff line change
Expand Up @@ -169,11 +169,17 @@ static BROTLI_INLINE void FN(FindLongestMatch)(
score_t best_score = out->score;
size_t best_len = out->len;
size_t i;
/* Precalculate the hash key and prefetch the bucket. */
const uint32_t key =
FN(HashBytes)(&data[cur_ix_masked], self->hash_shift_);
uint32_t* BROTLI_RESTRICT bucket = &buckets[key << self->block_bits_];
PREFETCH_L1(bucket);
if (self->block_bits_ > 4) PREFETCH_L1(bucket + 16);
out->len = 0;
out->len_code_delta = 0;

BROTLI_DCHECK(cur_ix_masked + max_length <= ring_buffer_mask);

out->len = 0;
out->len_code_delta = 0;
/* Try last distance first. */
for (i = 0; i < (size_t)self->num_last_distances_to_check_; ++i) {
const size_t backward = (size_t)distance_cache[i];
Expand Down Expand Up @@ -219,9 +225,6 @@ static BROTLI_INLINE void FN(FindLongestMatch)(
best_len = 3;
}
{
const uint32_t key =
FN(HashBytes)(&data[cur_ix_masked], self->hash_shift_);
uint32_t* BROTLI_RESTRICT bucket = &buckets[key << self->block_bits_];
const size_t down =
(num[key] > self->block_size_) ? (num[key] - self->block_size_) : 0u;
for (i = num[key]; i > down;) {
Expand Down
Loading