Skip to content

Commit

Permalink
Prefetch the backreference hashtable bucket.
Browse files Browse the repository at this point in the history
Place the prefetch before the last distance checks, to give the prefetch enough time to work.

PiperOrigin-RevId: 609192361
  • Loading branch information
Brotli authored and Copybara-Service committed Apr 18, 2024
1 parent 443af10 commit c681363
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 6 deletions.
15 changes: 15 additions & 0 deletions c/common/platform.h
Original file line number Diff line number Diff line change
Expand Up @@ -519,6 +519,21 @@ BROTLI_UNUSED_FUNCTION void BrotliSuppressUnusedFunctions(void) {
#if BROTLI_ENABLE_DUMP
BROTLI_UNUSED(&BrotliDump);
#endif

#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) && !defined(_M_ARM64EC) /* _mm_prefetch() is not defined outside of x86/x64 */
# include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
# define PREFETCH_L1(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
# define PREFETCH_L2(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1)
#elif BROTLI_GNUC_HAS_BUILTIN(__builtin_prefetch, 3, 1, 0)
# define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
# define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
#elif defined(__aarch64__)
# define PREFETCH_L1(ptr) do { __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr))); } while (0)
# define PREFETCH_L2(ptr) do { __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr))); } while (0)
#else
# define PREFETCH_L1(ptr) do { (void)(ptr); } while (0) /* disabled */
# define PREFETCH_L2(ptr) do { (void)(ptr); } while (0) /* disabled */
#endif
}

#endif /* BROTLI_COMMON_PLATFORM_H_ */
8 changes: 6 additions & 2 deletions c/enc/hash_longest_match64_inc.h
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,12 @@ static BROTLI_INLINE void FN(FindLongestMatch)(
score_t best_score = out->score;
size_t best_len = out->len;
size_t i;
BROTLI_DCHECK(cur_ix_masked + max_length <= ring_buffer_mask);
/* Precalculate the hash key and prefetch the bucket. */
const size_t key = FN(HashBytes)(&data[cur_ix_masked], self->hash_mul_);
uint32_t* BROTLI_RESTRICT bucket = &buckets[key << self->block_bits_];
PREFETCH_L1(bucket);
if (self->block_bits_ > 4) PREFETCH_L1(bucket + 16);
out->len = 0;
out->len_code_delta = 0;

Expand Down Expand Up @@ -220,8 +226,6 @@ static BROTLI_INLINE void FN(FindLongestMatch)(
best_len = 3;
}
{
const size_t key = FN(HashBytes)(&data[cur_ix_masked], self->hash_mul_);
uint32_t* BROTLI_RESTRICT bucket = &buckets[key << self->block_bits_];
const size_t down =
(num[key] > self->block_size_) ?
(num[key] - self->block_size_) : 0u;
Expand Down
11 changes: 7 additions & 4 deletions c/enc/hash_longest_match_inc.h
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,14 @@ static BROTLI_INLINE void FN(FindLongestMatch)(
size_t best_len = out->len;
size_t i;

BROTLI_DCHECK(cur_ix_masked + max_length <= ring_buffer_mask);
/* Precalculate the hash key and prefetch the bucket. */
const uint32_t key =
FN(HashBytes)(&data[cur_ix_masked], self->hash_shift_);
uint32_t* BROTLI_RESTRICT bucket = &buckets[key << self->block_bits_];
PREFETCH_L1(bucket);
if (self->block_bits_ > 4) PREFETCH_L1(bucket + 16);

BROTLI_DCHECK(cur_ix_masked + max_length <= ring_buffer_mask);
out->len = 0;
out->len_code_delta = 0;
/* Try last distance first. */
Expand Down Expand Up @@ -219,9 +225,6 @@ static BROTLI_INLINE void FN(FindLongestMatch)(
best_len = 3;
}
{
const uint32_t key =
FN(HashBytes)(&data[cur_ix_masked], self->hash_shift_);
uint32_t* BROTLI_RESTRICT bucket = &buckets[key << self->block_bits_];
const size_t down =
(num[key] > self->block_size_) ? (num[key] - self->block_size_) : 0u;
for (i = num[key]; i > down;) {
Expand Down

0 comments on commit c681363

Please sign in to comment.