Skip to content

Commit

Permalink
x86/smpboot: Support parallel startup of secondary CPUs
Browse files Browse the repository at this point in the history
To allow for parallel AP bringup, we need to avoid the use of global
variables for passing information to the APs, as well as preventing them
from all trying to use the same real-mode stack simultaneously.

So, introduce a 'lock' field in struct trampoline_header to use as a
simple bit-spinlock for the real-mode stack. That lock also protects
the global variables initial_gs, initial_stack and early_gdt_descr,
which can now be calculated...

So how do we calculate those addresses? Well, they they can all be found
from the per_cpu data for this CPU. Simples! Except... how does it know
what its CPU# is? OK, we export the cpuid_to_apicid[] array and it can
search it to find its APIC ID in there.

But now you whine at me that it doesn't even know its APIC ID? Well, if
it's a relatively modern CPU then the APIC ID is in CPUID leaf 0x0B so
we can use that. Otherwise... erm... OK, otherwise it can't have parallel
CPU bringup for now. We'll still use a global variable for those CPUs and
bring them up one at a time.

So add a global 'smpboot_control' field which either contains the APIC
ID, or a flag indicating that it can be found in CPUID.

This adds the 'do_parallel_bringup' flag in preparation but doesn't
actually enable parallel bringup yet.

[ dwmw2: Minor tweaks, write a commit message ]
[ seanc: Fix stray override of initial_gs in common_cpu_up() ]
[ Usama Arif: Disable parallel bringup for AMD CPUs]
Not-signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Usama Arif <usama.arif@bytedance.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
  • Loading branch information
Thomas Gleixner authored and paulmckrcu committed Feb 1, 2023
1 parent 9d41880 commit 42e667c
Show file tree
Hide file tree
Showing 9 changed files with 135 additions and 7 deletions.
3 changes: 3 additions & 0 deletions arch/x86/include/asm/realmode.h
Expand Up @@ -52,6 +52,7 @@ struct trampoline_header {
u64 efer;
u32 cr4;
u32 flags;
u32 lock;
#endif
};

Expand All @@ -65,6 +66,8 @@ extern unsigned long initial_stack;
extern unsigned long initial_vc_handler;
#endif

extern u32 *trampoline_lock;

extern unsigned char real_mode_blob[];
extern unsigned char real_mode_relocs[];

Expand Down
9 changes: 8 additions & 1 deletion arch/x86/include/asm/smp.h
Expand Up @@ -201,5 +201,12 @@ extern void nmi_selftest(void);
#define nmi_selftest() do { } while (0)
#endif

#endif /* __ASSEMBLY__ */
extern unsigned int smpboot_control;

#endif /* !__ASSEMBLY__ */

/* Control bits for startup_64 */
#define STARTUP_PARALLEL 0x80000000
#define STARTUP_SECONDARY 0x40000000

#endif /* _ASM_X86_SMP_H */
1 change: 1 addition & 0 deletions arch/x86/kernel/acpi/sleep.c
Expand Up @@ -115,6 +115,7 @@ int x86_acpi_suspend_lowlevel(void)
early_gdt_descr.address =
(unsigned long)get_cpu_gdt_rw(smp_processor_id());
initial_gs = per_cpu_offset(smp_processor_id());
smpboot_control = 0;
#endif
initial_code = (unsigned long)wakeup_long64;
saved_magic = 0x123456789abcdef0L;
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/kernel/apic/apic.c
Expand Up @@ -2374,7 +2374,7 @@ static int nr_logical_cpuids = 1;
/*
* Used to store mapping between logical CPU IDs and APIC IDs.
*/
static int cpuid_to_apicid[] = {
int cpuid_to_apicid[] = {
[0 ... NR_CPUS - 1] = -1,
};

Expand Down
73 changes: 73 additions & 0 deletions arch/x86/kernel/head_64.S
Expand Up @@ -25,6 +25,7 @@
#include <asm/export.h>
#include <asm/nospec-branch.h>
#include <asm/fixmap.h>
#include <asm/smp.h>

/*
* We are not able to switch in one step to the final KERNEL ADDRESS SPACE
Expand Down Expand Up @@ -241,6 +242,66 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
UNWIND_HINT_EMPTY
ANNOTATE_NOENDBR // above

/*
* Is this the boot CPU coming up? If so everything is available
* in initial_gs, initial_stack and early_gdt_descr.
*/
movl smpboot_control(%rip), %eax
testl %eax, %eax
jz .Lsetup_cpu

/*
* Secondary CPUs find out the offsets via the APIC ID. For parallel
* boot the APIC ID is retrieved from CPUID, otherwise it's encoded
* in smpboot_control:
* Bit 0-29 APIC ID if STARTUP_PARALLEL flag is not set
* Bit 30 STARTUP_SECONDARY flag
* Bit 31 STARTUP_PARALLEL flag (use CPUID 0x0b for APIC ID)
*/
testl $STARTUP_PARALLEL, %eax
jnz .Luse_cpuid_0b
andl $0x0FFFFFFF, %eax
jmp .Lsetup_AP

.Luse_cpuid_0b:
mov $0x0B, %eax
xorl %ecx, %ecx
cpuid
mov %edx, %eax

.Lsetup_AP:
/* EAX contains the APICID of the current CPU */
xorl %ecx, %ecx
leaq cpuid_to_apicid(%rip), %rbx

.Lfind_cpunr:
cmpl (%rbx), %eax
jz .Linit_cpu_data
addq $4, %rbx
addq $8, %rcx
jmp .Lfind_cpunr

.Linit_cpu_data:
/* Get the per cpu offset */
leaq __per_cpu_offset(%rip), %rbx
addq %rcx, %rbx
movq (%rbx), %rbx
/* Save it for GS BASE setup */
movq %rbx, initial_gs(%rip)

/* Calculate the GDT address */
movq $gdt_page, %rcx
addq %rbx, %rcx
movq %rcx, early_gdt_descr_base(%rip)

/* Find the idle task stack */
movq $idle_threads, %rcx
addq %rbx, %rcx
movq (%rcx), %rcx
movq TASK_threadsp(%rcx), %rcx
movq %rcx, initial_stack(%rip)

.Lsetup_cpu:
/*
* We must switch to a new descriptor in kernel space for the GDT
* because soon the kernel won't have access anymore to the userspace
Expand Down Expand Up @@ -281,6 +342,14 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
*/
movq initial_stack(%rip), %rsp

/* Drop the realmode protection. For the boot CPU the pointer is NULL! */
movq trampoline_lock(%rip), %rax
testq %rax, %rax
jz .Lsetup_idt
lock
btrl $0, (%rax)

.Lsetup_idt:
/* Setup and Load IDT */
pushq %rsi
call early_setup_idt
Expand Down Expand Up @@ -425,6 +494,7 @@ SYM_DATA(initial_vc_handler, .quad handle_vc_boot_ghcb)
* reliably detect the end of the stack.
*/
SYM_DATA(initial_stack, .quad init_thread_union + THREAD_SIZE - FRAME_SIZE)
SYM_DATA(trampoline_lock, .quad 0);
__FINITDATA

__INIT
Expand Down Expand Up @@ -659,6 +729,9 @@ SYM_DATA_END(level1_fixmap_pgt)
SYM_DATA(early_gdt_descr, .word GDT_ENTRIES*8-1)
SYM_DATA_LOCAL(early_gdt_descr_base, .quad INIT_PER_CPU_VAR(gdt_page))

.align 16
SYM_DATA(smpboot_control, .long 0)

.align 16
/* This must match the first entry in level2_kernel_pgt */
SYM_DATA(phys_base, .quad 0x0)
Expand Down
35 changes: 31 additions & 4 deletions arch/x86/kernel/smpboot.c
Expand Up @@ -798,6 +798,16 @@ static int __init cpu_init_udelay(char *str)
}
early_param("cpu_init_udelay", cpu_init_udelay);

static bool do_parallel_bringup = true;

static int __init no_parallel_bringup(char *str)
{
do_parallel_bringup = false;

return 0;
}
early_param("no_parallel_bringup", no_parallel_bringup);

static void __init smp_quirk_init_udelay(void)
{
/* if cmdline changed it from default, leave it alone */
Expand Down Expand Up @@ -1085,8 +1095,6 @@ int common_cpu_up(unsigned int cpu, struct task_struct *idle)
#ifdef CONFIG_X86_32
/* Stack for startup_32 can be just as for start_secondary onwards */
per_cpu(cpu_current_top_of_stack, cpu) = task_top_of_stack(idle);
#else
initial_gs = per_cpu_offset(cpu);
#endif
return 0;
}
Expand All @@ -1111,9 +1119,16 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
start_ip = real_mode_header->trampoline_start64;
#endif
idle->thread.sp = (unsigned long)task_pt_regs(idle);
early_gdt_descr.address = (unsigned long)get_cpu_gdt_rw(cpu);
initial_code = (unsigned long)start_secondary;
initial_stack = idle->thread.sp;

if (IS_ENABLED(CONFIG_X86_32)) {
early_gdt_descr.address = (unsigned long)get_cpu_gdt_rw(cpu);
initial_stack = idle->thread.sp;
} else if (do_parallel_bringup) {
smpboot_control = STARTUP_SECONDARY | STARTUP_PARALLEL;
} else {
smpboot_control = STARTUP_SECONDARY | apicid;
}

/* Enable the espfix hack for this CPU */
init_espfix_ap(cpu);
Expand Down Expand Up @@ -1515,6 +1530,18 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)

speculative_store_bypass_ht_init();

/*
* We can do 64-bit AP bringup in parallel if the CPU reports its
* APIC ID in CPUID leaf 0x0B. Otherwise it's too hard. And not
* for SEV-ES guests because they can't use CPUID that early.
* Also, some AMD CPUs crash when doing parallel cpu bringup, disable
* it for all AMD CPUs to be on the safe side.
*/
if (IS_ENABLED(CONFIG_X86_32) || boot_cpu_data.cpuid_level < 0x0B ||
cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT) ||
boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
do_parallel_bringup = false;

snp_set_wakeup_secondary_cpu();
}

Expand Down
3 changes: 3 additions & 0 deletions arch/x86/realmode/init.c
Expand Up @@ -154,6 +154,9 @@ static void __init setup_real_mode(void)

trampoline_header->flags = 0;

trampoline_lock = &trampoline_header->lock;
*trampoline_lock = 0;

trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd);

/* Map the real mode stub as virtual == physical */
Expand Down
14 changes: 14 additions & 0 deletions arch/x86/realmode/rm/trampoline_64.S
Expand Up @@ -49,6 +49,19 @@ SYM_CODE_START(trampoline_start)
mov %ax, %es
mov %ax, %ss

/*
* Make sure only one CPU fiddles with the realmode stack
*/
.Llock_rm:
btl $0, tr_lock
jnc 2f
pause
jmp .Llock_rm
2:
lock
btsl $0, tr_lock
jc .Llock_rm

# Setup stack
movl $rm_stack_end, %esp

Expand Down Expand Up @@ -241,6 +254,7 @@ SYM_DATA_START(trampoline_header)
SYM_DATA(tr_efer, .space 8)
SYM_DATA(tr_cr4, .space 4)
SYM_DATA(tr_flags, .space 4)
SYM_DATA(tr_lock, .space 4)
SYM_DATA_END(trampoline_header)

#include "trampoline_common.S"
2 changes: 1 addition & 1 deletion kernel/smpboot.c
Expand Up @@ -25,7 +25,7 @@
* For the hotplug case we keep the task structs around and reuse
* them.
*/
static DEFINE_PER_CPU(struct task_struct *, idle_threads);
DEFINE_PER_CPU(struct task_struct *, idle_threads);

struct task_struct *idle_thread_get(unsigned int cpu)
{
Expand Down

0 comments on commit 42e667c

Please sign in to comment.