|
17 | 17 | #include <asm/export.h> |
18 | 18 | #include <asm/trapnr.h> |
19 | 19 |
|
20 | | -.macro ALIGN_DESTINATION |
21 | | - /* check for bad alignment of destination */ |
22 | | - movl %edi,%ecx |
23 | | - andl $7,%ecx |
24 | | - jz 102f /* already aligned */ |
25 | | - subl $8,%ecx |
26 | | - negl %ecx |
27 | | - subl %ecx,%edx |
28 | | -100: movb (%rsi),%al |
29 | | -101: movb %al,(%rdi) |
30 | | - incq %rsi |
31 | | - incq %rdi |
32 | | - decl %ecx |
33 | | - jnz 100b |
34 | | -102: |
35 | | - |
36 | | - _ASM_EXTABLE_CPY(100b, .Lcopy_user_handle_align) |
37 | | - _ASM_EXTABLE_CPY(101b, .Lcopy_user_handle_align) |
38 | | -.endm |
39 | | - |
40 | 20 | /* |
41 | | - * copy_user_generic_unrolled - memory copy with exception handling. |
42 | | - * This version is for CPUs like P4 that don't have efficient micro |
43 | | - * code for rep movsq |
| 21 | + * rep_movs_alternative - memory copy with exception handling. |
| 22 | + * This version is for CPUs that don't have FSRM (Fast Short Rep Movs) |
44 | 23 | * |
45 | 24 | * Input: |
46 | 25 | * rdi destination |
|
52 | 31 | * |
53 | 32 | * NOTE! The calling convention is very intentionally the same as |
54 | 33 | * for 'rep movs', so that we can rewrite the function call with |
55 | | - * just a plain 'rep movs' on machines that have FSRM. |
56 | | - * |
57 | | - * HOWEVER! This function ends up having a lot of the code common |
58 | | - * with __copy_user_nocache(), which is a normal C function, and |
59 | | - * has a similar calling convention, but gets the 'count' in %rdx, |
60 | | - * and returns the result in %rax. |
61 | | - * |
62 | | - * To share as much code as possible, we end up returning the |
63 | | - * result in *both* %rcx/%rax, and we also move the initial count |
64 | | - * into %rdx. |
65 | | - * |
66 | | - * We can clobber rdx/rsi/rdi and r8-r11 |
| 34 | + * just a plain 'rep movs' on machines that have FSRM. But to make |
| 35 | + * it simpler for us, we can clobber rsi/rdi and rax/r8-r11 freely. |
67 | 36 | */ |
68 | | -SYM_FUNC_START(copy_user_generic_unrolled) |
69 | | - movl %ecx,%edx |
70 | | - cmpl $8,%ecx |
71 | | - jb .Lcopy_user_short_string_bytes |
72 | | - ALIGN_DESTINATION |
73 | | - movl %edx,%ecx |
74 | | - andl $63,%edx |
75 | | - shrl $6,%ecx |
76 | | - jz copy_user_short_string |
77 | | -1: movq (%rsi),%r8 |
78 | | -2: movq 1*8(%rsi),%r9 |
79 | | -3: movq 2*8(%rsi),%r10 |
80 | | -4: movq 3*8(%rsi),%r11 |
81 | | -5: movq %r8,(%rdi) |
82 | | -6: movq %r9,1*8(%rdi) |
83 | | -7: movq %r10,2*8(%rdi) |
84 | | -8: movq %r11,3*8(%rdi) |
85 | | -9: movq 4*8(%rsi),%r8 |
86 | | -10: movq 5*8(%rsi),%r9 |
87 | | -11: movq 6*8(%rsi),%r10 |
88 | | -12: movq 7*8(%rsi),%r11 |
89 | | -13: movq %r8,4*8(%rdi) |
90 | | -14: movq %r9,5*8(%rdi) |
91 | | -15: movq %r10,6*8(%rdi) |
92 | | -16: movq %r11,7*8(%rdi) |
93 | | - leaq 64(%rsi),%rsi |
94 | | - leaq 64(%rdi),%rdi |
95 | | - decl %ecx |
96 | | - jnz 1b |
97 | | - jmp copy_user_short_string |
| 37 | +SYM_FUNC_START(rep_movs_alternative) |
| 38 | + cmpq $64,%rcx |
| 39 | + jae .Lunrolled |
98 | 40 |
|
99 | | -30: shll $6,%ecx |
100 | | - addl %ecx,%edx |
101 | | - jmp .Lcopy_user_handle_tail |
| 41 | + cmp $8,%ecx |
| 42 | + jae .Lword |
102 | 43 |
|
103 | | - _ASM_EXTABLE_CPY(1b, 30b) |
104 | | - _ASM_EXTABLE_CPY(2b, 30b) |
105 | | - _ASM_EXTABLE_CPY(3b, 30b) |
106 | | - _ASM_EXTABLE_CPY(4b, 30b) |
107 | | - _ASM_EXTABLE_CPY(5b, 30b) |
108 | | - _ASM_EXTABLE_CPY(6b, 30b) |
109 | | - _ASM_EXTABLE_CPY(7b, 30b) |
110 | | - _ASM_EXTABLE_CPY(8b, 30b) |
111 | | - _ASM_EXTABLE_CPY(9b, 30b) |
112 | | - _ASM_EXTABLE_CPY(10b, 30b) |
113 | | - _ASM_EXTABLE_CPY(11b, 30b) |
114 | | - _ASM_EXTABLE_CPY(12b, 30b) |
115 | | - _ASM_EXTABLE_CPY(13b, 30b) |
116 | | - _ASM_EXTABLE_CPY(14b, 30b) |
117 | | - _ASM_EXTABLE_CPY(15b, 30b) |
118 | | - _ASM_EXTABLE_CPY(16b, 30b) |
119 | | -SYM_FUNC_END(copy_user_generic_unrolled) |
120 | | -EXPORT_SYMBOL(copy_user_generic_unrolled) |
| 44 | + testl %ecx,%ecx |
| 45 | + je .Lexit |
121 | 46 |
|
122 | | -/* |
123 | | - * Try to copy last bytes and clear the rest if needed. |
124 | | - * Since protection fault in copy_from/to_user is not a normal situation, |
125 | | - * it is not necessary to optimize tail handling. |
126 | | - * Don't try to copy the tail if machine check happened |
127 | | - * |
128 | | - * Input: |
129 | | - * eax trap number written by ex_handler_copy() |
130 | | - * rdi destination |
131 | | - * rsi source |
132 | | - * rdx count |
133 | | - * |
134 | | - * Output: |
135 | | - * eax uncopied bytes or 0 if successful. |
136 | | - */ |
137 | | -SYM_CODE_START_LOCAL(.Lcopy_user_handle_tail) |
138 | | - cmp $X86_TRAP_MC,%eax |
139 | | - je 3f |
140 | | - |
141 | | - movl %edx,%ecx |
142 | | -1: rep movsb |
143 | | -2: mov %ecx,%eax |
| 47 | +.Lcopy_user_tail: |
| 48 | +0: movb (%rsi),%al |
| 49 | +1: movb %al,(%rdi) |
| 50 | + inc %rdi |
| 51 | + inc %rsi |
| 52 | + dec %rcx |
| 53 | + jne .Lcopy_user_tail |
| 54 | +.Lexit: |
144 | 55 | RET |
145 | 56 |
|
146 | | -3: |
147 | | - movl %edx,%eax |
148 | | - movl %edx,%ecx |
149 | | - RET |
| 57 | + _ASM_EXTABLE_UA( 0b, .Lexit) |
| 58 | + _ASM_EXTABLE_UA( 1b, .Lexit) |
150 | 59 |
|
151 | | - _ASM_EXTABLE_CPY(1b, 2b) |
| 60 | + .p2align 4 |
| 61 | +.Lword: |
| 62 | +2: movq (%rsi),%rax |
| 63 | +3: movq %rax,(%rdi) |
| 64 | + addq $8,%rsi |
| 65 | + addq $8,%rdi |
| 66 | + sub $8,%ecx |
| 67 | + je .Lexit |
| 68 | + cmp $8,%ecx |
| 69 | + jae .Lword |
| 70 | + jmp .Lcopy_user_tail |
152 | 71 |
|
153 | | -.Lcopy_user_handle_align: |
154 | | - addl %ecx,%edx /* ecx is zerorest also */ |
155 | | - jmp .Lcopy_user_handle_tail |
| 72 | + _ASM_EXTABLE_UA( 2b, .Lcopy_user_tail) |
| 73 | + _ASM_EXTABLE_UA( 3b, .Lcopy_user_tail) |
156 | 74 |
|
157 | | -SYM_CODE_END(.Lcopy_user_handle_tail) |
| 75 | + .p2align 4 |
| 76 | +.Lunrolled: |
| 77 | +10: movq (%rsi),%r8 |
| 78 | +11: movq 8(%rsi),%r9 |
| 79 | +12: movq 16(%rsi),%r10 |
| 80 | +13: movq 24(%rsi),%r11 |
| 81 | +14: movq %r8,(%rdi) |
| 82 | +15: movq %r9,8(%rdi) |
| 83 | +16: movq %r10,16(%rdi) |
| 84 | +17: movq %r11,24(%rdi) |
| 85 | +20: movq 32(%rsi),%r8 |
| 86 | +21: movq 40(%rsi),%r9 |
| 87 | +22: movq 48(%rsi),%r10 |
| 88 | +23: movq 56(%rsi),%r11 |
| 89 | +24: movq %r8,32(%rdi) |
| 90 | +25: movq %r9,40(%rdi) |
| 91 | +26: movq %r10,48(%rdi) |
| 92 | +27: movq %r11,56(%rdi) |
| 93 | + addq $64,%rsi |
| 94 | + addq $64,%rdi |
| 95 | + subq $64,%rcx |
| 96 | + cmpq $64,%rcx |
| 97 | + jae .Lunrolled |
| 98 | + cmpl $8,%ecx |
| 99 | + jae .Lword |
| 100 | + testl %ecx,%ecx |
| 101 | + jne .Lcopy_user_tail |
| 102 | + RET |
| 103 | + |
| 104 | + _ASM_EXTABLE_UA(10b, .Lcopy_user_tail) |
| 105 | + _ASM_EXTABLE_UA(11b, .Lcopy_user_tail) |
| 106 | + _ASM_EXTABLE_UA(12b, .Lcopy_user_tail) |
| 107 | + _ASM_EXTABLE_UA(13b, .Lcopy_user_tail) |
| 108 | + _ASM_EXTABLE_UA(14b, .Lcopy_user_tail) |
| 109 | + _ASM_EXTABLE_UA(15b, .Lcopy_user_tail) |
| 110 | + _ASM_EXTABLE_UA(16b, .Lcopy_user_tail) |
| 111 | + _ASM_EXTABLE_UA(17b, .Lcopy_user_tail) |
| 112 | + _ASM_EXTABLE_UA(20b, .Lcopy_user_tail) |
| 113 | + _ASM_EXTABLE_UA(21b, .Lcopy_user_tail) |
| 114 | + _ASM_EXTABLE_UA(22b, .Lcopy_user_tail) |
| 115 | + _ASM_EXTABLE_UA(23b, .Lcopy_user_tail) |
| 116 | + _ASM_EXTABLE_UA(24b, .Lcopy_user_tail) |
| 117 | + _ASM_EXTABLE_UA(25b, .Lcopy_user_tail) |
| 118 | + _ASM_EXTABLE_UA(26b, .Lcopy_user_tail) |
| 119 | + _ASM_EXTABLE_UA(27b, .Lcopy_user_tail) |
| 120 | +SYM_FUNC_END(rep_movs_alternative) |
| 121 | +EXPORT_SYMBOL(rep_movs_alternative) |
158 | 122 |
|
159 | 123 | /* |
160 | | - * Finish memcpy of less than 64 bytes. #AC should already be set. |
161 | | - * |
162 | | - * Input: |
163 | | - * rdi destination |
164 | | - * rsi source |
165 | | - * rdx count (< 64) |
166 | | - * |
167 | | - * Output: |
168 | | - * eax uncopied bytes or 0 if successful. |
| 124 | + * The uncached copy needs to align the destination for |
| 125 | + * movnti and friends. |
169 | 126 | */ |
170 | | -SYM_CODE_START_LOCAL(copy_user_short_string) |
171 | | - movl %edx,%ecx |
172 | | - andl $7,%edx |
173 | | - shrl $3,%ecx |
174 | | - jz .Lcopy_user_short_string_bytes |
175 | | -18: movq (%rsi),%r8 |
176 | | -19: movq %r8,(%rdi) |
177 | | - leaq 8(%rsi),%rsi |
178 | | - leaq 8(%rdi),%rdi |
179 | | - decl %ecx |
180 | | - jnz 18b |
181 | | -.Lcopy_user_short_string_bytes: |
182 | | - andl %edx,%edx |
183 | | - jz 23f |
184 | | - movl %edx,%ecx |
185 | | -21: movb (%rsi),%al |
186 | | -22: movb %al,(%rdi) |
| 127 | +.macro ALIGN_DESTINATION |
| 128 | + /* check for bad alignment of destination */ |
| 129 | + movl %edi,%ecx |
| 130 | + andl $7,%ecx |
| 131 | + jz 102f /* already aligned */ |
| 132 | + subl $8,%ecx |
| 133 | + negl %ecx |
| 134 | + subl %ecx,%edx |
| 135 | +100: movb (%rsi),%al |
| 136 | +101: movb %al,(%rdi) |
187 | 137 | incq %rsi |
188 | 138 | incq %rdi |
189 | 139 | decl %ecx |
190 | | - jnz 21b |
191 | | -23: xor %eax,%eax |
192 | | - xor %ecx,%ecx |
193 | | - RET |
| 140 | + jnz 100b |
| 141 | +102: |
194 | 142 |
|
195 | | -40: leal (%rdx,%rcx,8),%edx |
196 | | - jmp 60f |
197 | | -50: movl %ecx,%edx /* ecx is zerorest also */ |
198 | | -60: jmp .Lcopy_user_handle_tail |
| 143 | + _ASM_EXTABLE_CPY(100b, .Lcopy_user_handle_align) |
| 144 | + _ASM_EXTABLE_CPY(101b, .Lcopy_user_handle_align) |
| 145 | +.endm |
199 | 146 |
|
200 | | - _ASM_EXTABLE_CPY(18b, 40b) |
201 | | - _ASM_EXTABLE_CPY(19b, 40b) |
202 | | - _ASM_EXTABLE_CPY(21b, 50b) |
203 | | - _ASM_EXTABLE_CPY(22b, 50b) |
204 | | -SYM_CODE_END(copy_user_short_string) |
205 | 147 |
|
206 | 148 | /* |
207 | 149 | * copy_user_nocache - Uncached memory copy with exception handling |
@@ -346,5 +288,40 @@ SYM_FUNC_START(__copy_user_nocache) |
346 | 288 | _ASM_EXTABLE_CPY(31b, .L_fixup_4b_copy) |
347 | 289 | _ASM_EXTABLE_CPY(40b, .L_fixup_1b_copy) |
348 | 290 | _ASM_EXTABLE_CPY(41b, .L_fixup_1b_copy) |
| 291 | + |
| 292 | +/* |
| 293 | + * Try to copy last bytes and clear the rest if needed. |
| 294 | + * Since protection fault in copy_from/to_user is not a normal situation, |
| 295 | + * it is not necessary to optimize tail handling. |
| 296 | + * Don't try to copy the tail if machine check happened |
| 297 | + * |
| 298 | + * Input: |
| 299 | + * eax trap number written by ex_handler_copy() |
| 300 | + * rdi destination |
| 301 | + * rsi source |
| 302 | + * rdx count |
| 303 | + * |
| 304 | + * Output: |
| 305 | + * eax uncopied bytes or 0 if successful. |
| 306 | + */ |
| 307 | +.Lcopy_user_handle_tail: |
| 308 | + cmp $X86_TRAP_MC,%eax |
| 309 | + je 3f |
| 310 | + |
| 311 | + movl %edx,%ecx |
| 312 | +1: rep movsb |
| 313 | +2: mov %ecx,%eax |
| 314 | + RET |
| 315 | + |
| 316 | +3: |
| 317 | + movl %edx,%eax |
| 318 | + RET |
| 319 | + |
| 320 | + _ASM_EXTABLE_CPY(1b, 2b) |
| 321 | + |
| 322 | +.Lcopy_user_handle_align: |
| 323 | + addl %ecx,%edx /* ecx is zerorest also */ |
| 324 | + jmp .Lcopy_user_handle_tail |
| 325 | + |
349 | 326 | SYM_FUNC_END(__copy_user_nocache) |
350 | 327 | EXPORT_SYMBOL(__copy_user_nocache) |
0 commit comments