Skip to content

Commit

Permalink
[JSC] Use 32byte stride for ARM64 gcSafe ops
Browse files Browse the repository at this point in the history
https://bugs.webkit.org/show_bug.cgi?id=259226
rdar://112284761

Reviewed by Mark Lam.

This patch extends gcSafe mem ops stride from 16 to 32 bytes.
We do not use SIMD v128 etc. here for now, which could be beneficial
for more larger sized ones. We clean up these ops with prefix / postfix
increment addressing in ARM64.

* Source/JavaScriptCore/heap/GCMemoryOperations.h:
(JSC::gcSafeMemcpy):
(JSC::gcSafeMemmove):

Canonical link: https://commits.webkit.org/266079@main
  • Loading branch information
Constellation committed Jul 15, 2023
1 parent 525b38f commit 8e00fd0
Showing 1 changed file with 23 additions and 23 deletions.
46 changes: 23 additions & 23 deletions Source/JavaScriptCore/heap/GCMemoryOperations.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,32 +92,31 @@ ALWAYS_INLINE void gcSafeMemcpy(T* dst, T* src, size_t bytes)
: "xmm0", "xmm1", "xmm2", "xmm3", "memory", "cc"
);
#elif CPU(ARM64)
uint64_t alignedBytes = (static_cast<uint64_t>(bytes) / 16) * 16;
size_t offset = 0;
uint64_t alignedBytes = (static_cast<uint64_t>(bytes) / 32) * 32;

uint64_t dstPtr = static_cast<uint64_t>(bitwise_cast<uintptr_t>(dst));
uint64_t srcPtr = static_cast<uint64_t>(bitwise_cast<uintptr_t>(src));
uint64_t end = dstPtr + bytes;
uint64_t alignedEnd = dstPtr + alignedBytes;

asm volatile(
"1:\t\n"
"cmp %x[offset], %x[alignedBytes]\t\n"
"cmp %x[dstPtr], %x[alignedEnd]\t\n"
"b.eq 2f\t\n"
"ldr q0, [%x[srcPtr], %x[offset]]\t\n"
"str q0, [%x[dstPtr], %x[offset]]\t\n"
"add %x[offset], %x[offset], #0x10\t\n"
"ldp q0, q1, [%x[srcPtr]], #0x20\t\n"
"stp q0, q1, [%x[dstPtr]], #0x20\t\n"
"b 1b\t\n"

"2:\t\n"
"cmp %x[offset], %x[bytes]\t\n"
"cmp %x[dstPtr], %x[end]\t\n"
"b.eq 3f\t\n"
"ldr d0, [%x[srcPtr], %x[offset]]\t\n"
"str d0, [%x[dstPtr], %x[offset]]\t\n"
"add %x[offset], %x[offset], #0x8\t\n"
"ldr d0, [%x[srcPtr]], #0x8\t\n"
"str d0, [%x[dstPtr]], #0x8\t\n"
"b 2b\t\n"

"3:\t\n"

: [alignedBytes] "+r" (alignedBytes), [bytes] "+r" (bytes), [offset] "+r" (offset), [dstPtr] "+r" (dstPtr), [srcPtr] "+r" (srcPtr)
: [end] "+r" (end), [alignedEnd] "+r" (alignedEnd), [dstPtr] "+r" (dstPtr), [srcPtr] "+r" (srcPtr)
:
: "d0", "d1", "memory"
);
Expand Down Expand Up @@ -209,29 +208,30 @@ ALWAYS_INLINE void gcSafeMemmove(T* dst, T* src, size_t bytes)
: "xmm0", "xmm1", "xmm2", "xmm3", "memory", "cc"
);
#elif CPU(ARM64)
uint64_t alignedBytes = (static_cast<uint64_t>(bytes) / 16) * 16;
uint64_t dstPtr = static_cast<uint64_t>(bitwise_cast<uintptr_t>(dst));
uint64_t srcPtr = static_cast<uint64_t>(bitwise_cast<uintptr_t>(src));
uint64_t alignedBytes = (static_cast<uint64_t>(bytes) / 32) * 32;
uint64_t dstPtr = static_cast<uint64_t>(bitwise_cast<uintptr_t>(dst) + static_cast<uint64_t>(bytes));
uint64_t srcPtr = static_cast<uint64_t>(bitwise_cast<uintptr_t>(src) + static_cast<uint64_t>(bytes));
uint64_t alignedEnd = bitwise_cast<uintptr_t>(dst) + alignedBytes;
uint64_t end = bitwise_cast<uintptr_t>(dst);

asm volatile(
"1:\t\n"
"cmp %x[alignedBytes], %x[bytes]\t\n"
"cmp %x[dstPtr], %x[alignedEnd]\t\n"
"b.eq 2f\t\n"
"sub %x[bytes], %x[bytes], #0x8\t\n"
"ldr d0, [%x[srcPtr], %x[bytes]]\t\n"
"str d0, [%x[dstPtr], %x[bytes]]\t\n"
"ldr d0, [%x[srcPtr], #-0x8]!\t\n"
"str d0, [%x[dstPtr], #-0x8]!\t\n"
"b 1b\t\n"

"2:\t\n"
"cbz %x[alignedBytes], 3f\t\n"
"sub %x[alignedBytes], %x[alignedBytes], #0x10\t\n"
"ldr q0, [%x[srcPtr], %x[alignedBytes]]\t\n"
"str q0, [%x[dstPtr], %x[alignedBytes]]\t\n"
"cmp %x[dstPtr], %x[end]\t\n"
"b.eq 3f\t\n"
"ldp q0, q1, [%x[srcPtr], #-0x20]!\t\n"
"stp q0, q1, [%x[dstPtr], #-0x20]!\t\n"
"b 2b\t\n"

"3:\t\n"

: [alignedBytes] "+r" (alignedBytes), [bytes] "+r" (bytes), [dstPtr] "+r" (dstPtr), [srcPtr] "+r" (srcPtr)
: [alignedEnd] "+r" (alignedEnd), [end] "+r" (end), [dstPtr] "+r" (dstPtr), [srcPtr] "+r" (srcPtr)
:
: "d0", "d1", "memory"
);
Expand Down

0 comments on commit 8e00fd0

Please sign in to comment.