Skip to content

Commit 9a462b9

Browse files
anadavIngo Molnar
authored andcommitted
x86/percpu: Use compiler segment prefix qualifier
Using a segment prefix qualifier is cleaner than using a segment prefix in the inline assembly, and provides the compiler with more information, telling it that __seg_gs:[addr] is different than [addr] when it analyzes data dependencies. It also enables various optimizations that will be implemented in the next patches. Use segment prefix qualifiers when they are supported. Unfortunately, gcc does not provide a way to remove segment qualifiers, which is needed to use typeof() to create local instances of the per-CPU variable. For this reason, do not use the segment qualifier for per-CPU variables, and do casting using the segment qualifier instead. Uros: Improve compiler support detection and update the patch to the current mainline. Signed-off-by: Nadav Amit <namit@vmware.com> Signed-off-by: Uros Bizjak <ubizjak@gmail.com> Signed-off-by: Ingo Molnar <mingo@kernel.org> Cc: Andy Lutomirski <luto@kernel.org> Cc: Brian Gerst <brgerst@gmail.com> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Josh Poimboeuf <jpoimboe@redhat.com> Link: https://lore.kernel.org/r/20231004145137.86537-4-ubizjak@gmail.com
1 parent 1ca3683 commit 9a462b9

File tree

2 files changed

+47
-23
lines changed

2 files changed

+47
-23
lines changed

arch/x86/include/asm/percpu.h

Lines changed: 46 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -28,26 +28,50 @@
2828
#include <linux/stringify.h>
2929

3030
#ifdef CONFIG_SMP
31+
32+
#ifdef CONFIG_CC_HAS_NAMED_AS
33+
34+
#ifdef CONFIG_X86_64
35+
#define __percpu_seg_override __seg_gs
36+
#else
37+
#define __percpu_seg_override __seg_fs
38+
#endif
39+
40+
#define __percpu_prefix ""
41+
42+
#else /* CONFIG_CC_HAS_NAMED_AS */
43+
44+
#define __percpu_seg_override
3145
#define __percpu_prefix "%%"__stringify(__percpu_seg)":"
46+
47+
#endif /* CONFIG_CC_HAS_NAMED_AS */
48+
49+
#define __force_percpu_prefix "%%"__stringify(__percpu_seg)":"
3250
#define __my_cpu_offset this_cpu_read(this_cpu_off)
3351

3452
/*
3553
* Compared to the generic __my_cpu_offset version, the following
3654
* saves one instruction and avoids clobbering a temp register.
3755
*/
38-
#define arch_raw_cpu_ptr(ptr) \
39-
({ \
40-
unsigned long tcp_ptr__; \
41-
asm ("add " __percpu_arg(1) ", %0" \
42-
: "=r" (tcp_ptr__) \
43-
: "m" (this_cpu_off), "0" (ptr)); \
44-
(typeof(*(ptr)) __kernel __force *)tcp_ptr__; \
56+
#define arch_raw_cpu_ptr(ptr) \
57+
({ \
58+
unsigned long tcp_ptr__; \
59+
asm ("add " __percpu_arg(1) ", %0" \
60+
: "=r" (tcp_ptr__) \
61+
: "m" (__my_cpu_var(this_cpu_off)), "0" (ptr)); \
62+
(typeof(*(ptr)) __kernel __force *)tcp_ptr__; \
4563
})
46-
#else
64+
#else /* CONFIG_SMP */
65+
#define __percpu_seg_override
4766
#define __percpu_prefix ""
48-
#endif
67+
#define __force_percpu_prefix ""
68+
#endif /* CONFIG_SMP */
4969

70+
#define __my_cpu_type(var) typeof(var) __percpu_seg_override
71+
#define __my_cpu_ptr(ptr) (__my_cpu_type(*ptr) *)(uintptr_t)(ptr)
72+
#define __my_cpu_var(var) (*__my_cpu_ptr(&var))
5073
#define __percpu_arg(x) __percpu_prefix "%" #x
74+
#define __force_percpu_arg(x) __force_percpu_prefix "%" #x
5175

5276
/*
5377
* Initialized pointers to per-cpu variables needed for the boot
@@ -107,14 +131,14 @@ do { \
107131
(void)pto_tmp__; \
108132
} \
109133
asm qual(__pcpu_op2_##size(op, "%[val]", __percpu_arg([var])) \
110-
: [var] "+m" (_var) \
134+
: [var] "+m" (__my_cpu_var(_var)) \
111135
: [val] __pcpu_reg_imm_##size(pto_val__)); \
112136
} while (0)
113137

114138
#define percpu_unary_op(size, qual, op, _var) \
115139
({ \
116140
asm qual (__pcpu_op1_##size(op, __percpu_arg([var])) \
117-
: [var] "+m" (_var)); \
141+
: [var] "+m" (__my_cpu_var(_var))); \
118142
})
119143

120144
/*
@@ -144,14 +168,14 @@ do { \
144168
__pcpu_type_##size pfo_val__; \
145169
asm qual (__pcpu_op2_##size(op, __percpu_arg([var]), "%[val]") \
146170
: [val] __pcpu_reg_##size("=", pfo_val__) \
147-
: [var] "m" (_var)); \
171+
: [var] "m" (__my_cpu_var(_var))); \
148172
(typeof(_var))(unsigned long) pfo_val__; \
149173
})
150174

151175
#define percpu_stable_op(size, op, _var) \
152176
({ \
153177
__pcpu_type_##size pfo_val__; \
154-
asm(__pcpu_op2_##size(op, __percpu_arg(P[var]), "%[val]") \
178+
asm(__pcpu_op2_##size(op, __force_percpu_arg(P[var]), "%[val]") \
155179
: [val] __pcpu_reg_##size("=", pfo_val__) \
156180
: [var] "p" (&(_var))); \
157181
(typeof(_var))(unsigned long) pfo_val__; \
@@ -166,7 +190,7 @@ do { \
166190
asm qual (__pcpu_op2_##size("xadd", "%[tmp]", \
167191
__percpu_arg([var])) \
168192
: [tmp] __pcpu_reg_##size("+", paro_tmp__), \
169-
[var] "+m" (_var) \
193+
[var] "+m" (__my_cpu_var(_var)) \
170194
: : "memory"); \
171195
(typeof(_var))(unsigned long) (paro_tmp__ + _val); \
172196
})
@@ -187,7 +211,7 @@ do { \
187211
__percpu_arg([var])) \
188212
"\n\tjnz 1b" \
189213
: [oval] "=&a" (pxo_old__), \
190-
[var] "+m" (_var) \
214+
[var] "+m" (__my_cpu_var(_var)) \
191215
: [nval] __pcpu_reg_##size(, pxo_new__) \
192216
: "memory"); \
193217
(typeof(_var))(unsigned long) pxo_old__; \
@@ -204,7 +228,7 @@ do { \
204228
asm qual (__pcpu_op2_##size("cmpxchg", "%[nval]", \
205229
__percpu_arg([var])) \
206230
: [oval] "+a" (pco_old__), \
207-
[var] "+m" (_var) \
231+
[var] "+m" (__my_cpu_var(_var)) \
208232
: [nval] __pcpu_reg_##size(, pco_new__) \
209233
: "memory"); \
210234
(typeof(_var))(unsigned long) pco_old__; \
@@ -221,7 +245,7 @@ do { \
221245
CC_SET(z) \
222246
: CC_OUT(z) (success), \
223247
[oval] "+a" (pco_old__), \
224-
[var] "+m" (_var) \
248+
[var] "+m" (__my_cpu_var(_var)) \
225249
: [nval] __pcpu_reg_##size(, pco_new__) \
226250
: "memory"); \
227251
if (unlikely(!success)) \
@@ -244,7 +268,7 @@ do { \
244268
\
245269
asm qual (ALTERNATIVE("call this_cpu_cmpxchg8b_emu", \
246270
"cmpxchg8b " __percpu_arg([var]), X86_FEATURE_CX8) \
247-
: [var] "+m" (_var), \
271+
: [var] "+m" (__my_cpu_var(_var)), \
248272
"+a" (old__.low), \
249273
"+d" (old__.high) \
250274
: "b" (new__.low), \
@@ -276,7 +300,7 @@ do { \
276300
"cmpxchg8b " __percpu_arg([var]), X86_FEATURE_CX8) \
277301
CC_SET(z) \
278302
: CC_OUT(z) (success), \
279-
[var] "+m" (_var), \
303+
[var] "+m" (__my_cpu_var(_var)), \
280304
"+a" (old__.low), \
281305
"+d" (old__.high) \
282306
: "b" (new__.low), \
@@ -313,7 +337,7 @@ do { \
313337
\
314338
asm qual (ALTERNATIVE("call this_cpu_cmpxchg16b_emu", \
315339
"cmpxchg16b " __percpu_arg([var]), X86_FEATURE_CX16) \
316-
: [var] "+m" (_var), \
340+
: [var] "+m" (__my_cpu_var(_var)), \
317341
"+a" (old__.low), \
318342
"+d" (old__.high) \
319343
: "b" (new__.low), \
@@ -345,7 +369,7 @@ do { \
345369
"cmpxchg16b " __percpu_arg([var]), X86_FEATURE_CX16) \
346370
CC_SET(z) \
347371
: CC_OUT(z) (success), \
348-
[var] "+m" (_var), \
372+
[var] "+m" (__my_cpu_var(_var)), \
349373
"+a" (old__.low), \
350374
"+d" (old__.high) \
351375
: "b" (new__.low), \
@@ -494,7 +518,7 @@ static inline bool x86_this_cpu_variable_test_bit(int nr,
494518
asm volatile("btl "__percpu_arg(2)",%1"
495519
CC_SET(c)
496520
: CC_OUT(c) (oldbit)
497-
: "m" (*(unsigned long __percpu *)addr), "Ir" (nr));
521+
: "m" (*__my_cpu_ptr((unsigned long __percpu *)(addr))), "Ir" (nr));
498522

499523
return oldbit;
500524
}

arch/x86/include/asm/preempt.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ static __always_inline void __preempt_count_sub(int val)
9292
*/
9393
static __always_inline bool __preempt_count_dec_and_test(void)
9494
{
95-
return GEN_UNARY_RMWcc("decl", pcpu_hot.preempt_count, e,
95+
return GEN_UNARY_RMWcc("decl", __my_cpu_var(pcpu_hot.preempt_count), e,
9696
__percpu_arg([var]));
9797
}
9898

0 commit comments

Comments
 (0)