From 3fef9c7e6b415249c8a71bb6a2d2b1f87c20df0e Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Wed, 10 Jul 2024 11:35:01 -0700 Subject: [PATCH 1/3] locking/atomic: Make test_and_*_bit() ordered on failure jira LE-1482 commit 415d832497098030241605c52ea83d4e2cfa7879 upstream-diff - Conflicts around use of arch_atomic vs atomic - there are no arch_atomic* ops in Rocky 8.x These operations are documented as always ordered in include/asm-generic/bitops/instrumented-atomic.h, and producer-consumer type use cases where one side needs to ensure a flag is left pending after some shared data was updated rely on this ordering, even in the failure case. This is the case with the workqueue code, which currently suffers from a reproducible ordering violation on Apple M1 platforms (which are notoriously out-of-order) that ends up causing the TTY layer to fail to deliver data to userspace properly under the right conditions. This change fixes that bug. Change the documentation to restrict the "no order on failure" story to the _lock() variant (for which it makes sense), and remove the early-exit from the generic implementation, which is what causes the missing barrier semantics in that case. Without this, the remaining atomic op is fully ordered (including on ARM64 LSE, as of recent versions of the architecture spec). Suggested-by: Linus Torvalds Cc: stable@vger.kernel.org Fixes: e986a0d6cb36 ("locking/atomics, asm-generic/bitops/atomic.h: Rewrite using atomic_*() APIs") Fixes: 61e02392d3c7 ("locking/atomic/bitops: Document and clarify ordering semantics for failed test_and_{}_bit()") Signed-off-by: Hector Martin Acked-by: Will Deacon Reviewed-by: Arnd Bergmann Signed-off-by: Linus Torvalds (cherry picked from commit 415d832497098030241605c52ea83d4e2cfa7879) Signed-off-by: Greg Rose Signed-off-by: Jonathan Maple --- Documentation/atomic_bitops.txt | 2 +- include/asm-generic/bitops/atomic.h | 6 ------ 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/Documentation/atomic_bitops.txt b/Documentation/atomic_bitops.txt index be70b32c95d91..bc3fac8e1db3a 100644 --- a/Documentation/atomic_bitops.txt +++ b/Documentation/atomic_bitops.txt @@ -59,7 +59,7 @@ Like with atomic_t, the rule of thumb is: - RMW operations that have a return value are fully ordered. - RMW operations that are conditional are unordered on FAILURE, - otherwise the above rules apply. In the case of test_and_{}_bit() operations, + otherwise the above rules apply. In the case of test_and_set_bit_lock(), if the bit in memory is unchanged by the operation then it is deemed to have failed. diff --git a/include/asm-generic/bitops/atomic.h b/include/asm-generic/bitops/atomic.h index dd90c9792909d..18399e3759a24 100644 --- a/include/asm-generic/bitops/atomic.h +++ b/include/asm-generic/bitops/atomic.h @@ -35,9 +35,6 @@ static inline int test_and_set_bit(unsigned int nr, volatile unsigned long *p) unsigned long mask = BIT_MASK(nr); p += BIT_WORD(nr); - if (READ_ONCE(*p) & mask) - return 1; - old = atomic_long_fetch_or(mask, (atomic_long_t *)p); return !!(old & mask); } @@ -48,9 +45,6 @@ static inline int test_and_clear_bit(unsigned int nr, volatile unsigned long *p) unsigned long mask = BIT_MASK(nr); p += BIT_WORD(nr); - if (!(READ_ONCE(*p) & mask)) - return 0; - old = atomic_long_fetch_andnot(mask, (atomic_long_t *)p); return !!(old & mask); } From a1626400c731913a5d6b5871da4c80dff75b51d0 Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Wed, 11 Oct 2023 04:01:10 +0000 Subject: [PATCH 2/3] x86/sev-es: Set x86_virt_bits to the correct value straight away, instead of a two-phase approach jira roc-2673 commit fbf6449f84bf5e4ad09f2c09ee70ed7d629b5ff6 Instead of setting x86_virt_bits to a possibly-correct value and then correcting it later, do all the necessary checks before setting it. At this point, the #VC handler references boot_cpu_data.x86_virt_bits, and in the previous version, it would be triggered by the CPUIDs between the point at which it is set to 48 and when it is set to the correct value. Suggested-by: Dave Hansen Signed-off-by: Adam Dunlap Signed-off-by: Ingo Molnar Tested-by: Jacob Xu Link: https://lore.kernel.org/r/20230912002703.3924521-3-acdunlap@google.com Signed-off-by: Ronnie Sahlberg Signed-off-by: Jonathan Maple --- arch/x86/kernel/cpu/common.c | 37 +++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index dbb5dee28ca27..40b4861cf9c79 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1020,17 +1020,32 @@ void get_cpu_cap(struct cpuinfo_x86 *c) static void get_cpu_address_sizes(struct cpuinfo_x86 *c) { u32 eax, ebx, ecx, edx; + bool vp_bits_from_cpuid = true; - if (c->extended_cpuid_level >= 0x80000008) { + if (!cpu_has(c, X86_FEATURE_CPUID) || + (c->extended_cpuid_level < 0x80000008)) + vp_bits_from_cpuid = false; + + if (vp_bits_from_cpuid) { cpuid(0x80000008, &eax, &ebx, &ecx, &edx); c->x86_virt_bits = (eax >> 8) & 0xff; c->x86_phys_bits = eax & 0xff; + } else { + if (IS_ENABLED(CONFIG_X86_64)) { + c->x86_clflush_size = 64; + c->x86_phys_bits = 36; + c->x86_virt_bits = 48; + } else { + c->x86_clflush_size = 32; + c->x86_virt_bits = 32; + c->x86_phys_bits = 32; + + if (cpu_has(c, X86_FEATURE_PAE) || + cpu_has(c, X86_FEATURE_PSE36)) + c->x86_phys_bits = 36; + } } -#ifdef CONFIG_X86_32 - else if (cpu_has(c, X86_FEATURE_PAE) || cpu_has(c, X86_FEATURE_PSE36)) - c->x86_phys_bits = 36; -#endif c->x86_cache_bits = c->x86_phys_bits; } @@ -1468,15 +1483,6 @@ static void __init cpu_parse_early_param(void) */ static void __init early_identify_cpu(struct cpuinfo_x86 *c) { -#ifdef CONFIG_X86_64 - c->x86_clflush_size = 64; - c->x86_phys_bits = 36; - c->x86_virt_bits = 48; -#else - c->x86_clflush_size = 32; - c->x86_phys_bits = 32; - c->x86_virt_bits = 32; -#endif c->x86_cache_alignment = c->x86_clflush_size; memset(&c->x86_capability, 0, sizeof(c->x86_capability)); @@ -1488,7 +1494,6 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) get_cpu_vendor(c); get_cpu_cap(c); get_model_name(c); /* RHEL8: get model name for unsupported check */ - get_cpu_address_sizes(c); setup_force_cpu_cap(X86_FEATURE_CPUID); cpu_parse_early_param(); @@ -1505,6 +1510,8 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) setup_clear_cpu_cap(X86_FEATURE_CPUID); } + get_cpu_address_sizes(c); + setup_force_cpu_cap(X86_FEATURE_ALWAYS); cpu_set_bug_bits(c); From db599ef0953c2675b1c8610490e1dc277be4ccea Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Wed, 11 Oct 2023 04:04:08 +0000 Subject: [PATCH 3/3] x86/boot: Move x86_cache_alignment initialization to correct spot jira roc-2673 commit 3e32552652917f10c0aa8ac75cdc8f0b8d257dec c->x86_cache_alignment is initialized from c->x86_clflush_size. However, commit fbf6449f84bf moved c->x86_clflush_size initialization to later in boot without moving the c->x86_cache_alignment assignment: fbf6449f84bf ("x86/sev-es: Set x86_virt_bits to the correct value straight away, instead of a two-phase approach") This presumably left c->x86_cache_alignment set to zero for longer than it should be. The result was an oops on 32-bit kernels while accessing a pointer at 0x20. The 0x20 came from accessing a structure member at offset 0x10 (buffer->cpumask) from a ZERO_SIZE_PTR=0x10. kmalloc() can evidently return ZERO_SIZE_PTR when it's given 0 as its alignment requirement. Move the c->x86_cache_alignment initialization to be after c->x86_clflush_size has an actual value. Fixes: fbf6449f84bf ("x86/sev-es: Set x86_virt_bits to the correct value straight away, instead of a two-phase approach") Signed-off-by: Dave Hansen Signed-off-by: Ingo Molnar Tested-by: Nathan Chancellor Link: https://lore.kernel.org/r/20231002220045.1014760-1-dave.hansen@linux.intel.com (cherry picked from commit 3e32552652917f10c0aa8ac75cdc8f0b8d257dec) Signed-off-by: Ronnie Sahlberg Signed-off-by: Jonathan Maple --- arch/x86/kernel/cpu/common.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 40b4861cf9c79..2ad285f913633 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1047,6 +1047,7 @@ static void get_cpu_address_sizes(struct cpuinfo_x86 *c) } } c->x86_cache_bits = c->x86_phys_bits; + c->x86_cache_alignment = c->x86_clflush_size; } static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) @@ -1483,8 +1484,6 @@ static void __init cpu_parse_early_param(void) */ static void __init early_identify_cpu(struct cpuinfo_x86 *c) { - c->x86_cache_alignment = c->x86_clflush_size; - memset(&c->x86_capability, 0, sizeof(c->x86_capability)); c->extended_cpuid_level = 0;