Skip to content

Commit 68674f9

Browse files
committed
x86: don't use REP_GOOD or ERMS for small memory copies
The modern target to use is FSRM (Fast Short REP MOVS), and the other cases should only be used for bigger areas (ie mainly things like page copying and clearing). Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent 6a8f57a commit 68674f9

File tree

1 file changed

+10
-24
lines changed

1 file changed

+10
-24
lines changed

arch/x86/lib/memcpy_64.S

Lines changed: 10 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,6 @@
1010

1111
.section .noinstr.text, "ax"
1212

13-
/*
14-
* We build a jump to memcpy_orig by default which gets NOPped out on
15-
* the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which
16-
* have the enhanced REP MOVSB/STOSB feature (ERMS), change those NOPs
17-
* to a jmp to memcpy_erms which does the REP; MOVSB mem copy.
18-
*/
19-
2013
/*
2114
* memcpy - Copy a memory block.
2215
*
@@ -27,17 +20,21 @@
2720
*
2821
* Output:
2922
* rax original destination
23+
*
24+
* The FSRM alternative should be done inline (avoiding the call and
25+
* the disgusting return handling), but that would require some help
26+
* from the compiler for better calling conventions.
27+
*
28+
* The 'rep movsb' itself is small enough to replace the call, but the
29+
* two register moves blow up the code. And one of them is "needed"
30+
* only for the return value that is the same as the source input,
31+
* which the compiler could/should do much better anyway.
3032
*/
3133
SYM_TYPED_FUNC_START(__memcpy)
32-
ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \
33-
"jmp memcpy_erms", X86_FEATURE_ERMS
34+
ALTERNATIVE "jmp memcpy_orig", "", X86_FEATURE_FSRM
3435

3536
movq %rdi, %rax
3637
movq %rdx, %rcx
37-
shrq $3, %rcx
38-
andl $7, %edx
39-
rep movsq
40-
movl %edx, %ecx
4138
rep movsb
4239
RET
4340
SYM_FUNC_END(__memcpy)
@@ -46,17 +43,6 @@ EXPORT_SYMBOL(__memcpy)
4643
SYM_FUNC_ALIAS(memcpy, __memcpy)
4744
EXPORT_SYMBOL(memcpy)
4845

49-
/*
50-
* memcpy_erms() - enhanced fast string memcpy. This is faster and
51-
* simpler than memcpy. Use memcpy_erms when possible.
52-
*/
53-
SYM_FUNC_START_LOCAL(memcpy_erms)
54-
movq %rdi, %rax
55-
movq %rdx, %rcx
56-
rep movsb
57-
RET
58-
SYM_FUNC_END(memcpy_erms)
59-
6046
SYM_FUNC_START_LOCAL(memcpy_orig)
6147
movq %rdi, %rax
6248

0 commit comments

Comments
 (0)