Skip to content
Permalink
Browse files
x86/tdx: Add HLT support for TDX guests
The HLT instruction is a privileged instruction, executing it stops
instruction execution and places the processor in a HALT state. It
is used in kernel for cases like reboot, idle loop and exception fixup
handlers. For the idle case, interrupts will be enabled (using STI)
before the HLT instruction (this is also called safe_halt()).

To support the HLT instruction in TDX guests, it needs to be emulated
using TDVMCALL (hypercall to VMM). More details about it can be found
in Intel Trust Domain Extensions (Intel TDX) Guest-Host-Communication
Interface (GHCI) specification, section TDVMCALL[Instruction.HLT].

Any of the following three approaches can be used to emulate the HLT
instruction:

1. Using PV ops.
2. Using #VE exception handler (In TDX guest, executing HLT will lead
   to #VE exception).
3. Direct substitution of TDVMCALLs in places where emulation is
   required.

Regarding option #1, since emulation of hlt() and safe_halt() is not
similar, and PV ops provides different hooks to emulate halt() and
safe_halt() variants, it is the simplest solution available. But,
currently halt and safe_halt hooks only exist under the
CONFIG_PARAVIRT_XXL option, and enabling it for TDX guests will bring
in a lot more than just the halt hooks. Hence, although this option is
the simplest, it is not cost effective.

Option #2 is also not preferred because, in the exception handler
safe_halt() and normal halt() use cases cannot be differentiated. This
differentiation is needed to add STI before the hypercall for the
safe_halt() use case.

In option #3, *halt() and *safe_halt() use cases will be substituted
with TDX variants (like tdx_halt() or tdx_safe_halt()).  In the kernel,
TDX guest cares about only around 6 references of halt calls
(specifically in reboot, exception support and smpboot code handlers).
Direct replacement of these 6 *halt* calls with corresponding TDX
variants (using alternative_call) is the best option because it is
simpler and it will also make the code faster in both TDX and non TDX
cases.

Reviewed-by: Andi Kleen <ak@linux.intel.com>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
  • Loading branch information
Kuppuswamy Sathyanarayanan committed Nov 23, 2021
1 parent 3d380b9 commit e565a4318c87e3d1243bb31b89c53aff081472c5
Show file tree
Hide file tree
Showing 8 changed files with 115 additions and 6 deletions.
@@ -5,6 +5,9 @@

#include <linux/init.h>
#include <asm/ptrace.h>
#include <asm/cpufeatures.h>
#include <asm/alternative.h>
#include <asm/irqflags.h>

#define TDX_CPUID_LEAF_ID 0x21
#define TDX_HYPERCALL_STANDARD 0
@@ -72,10 +75,15 @@ bool tdx_get_ve_info(struct ve_info *ve);

bool tdx_handle_virt_exception(struct pt_regs *regs, struct ve_info *ve);

void __cpuidle tdx_safe_halt(void);
void __cpuidle tdx_halt(void);

#else

static inline void tdx_early_init(void) { };
static inline bool is_tdx_guest(void) { return false; }
static inline void __cpuidle tdx_halt(void) { };
static inline void __cpuidle tdx_safe_halt(void) { };

#endif /* CONFIG_INTEL_TDX_GUEST */

@@ -46,6 +46,7 @@
#include <asm/proto.h>
#include <asm/frame.h>
#include <asm/unwind.h>
#include <asm/tdx.h>

#include "process.h"

@@ -730,7 +731,8 @@ void arch_cpu_idle(void)
*/
void __cpuidle default_idle(void)
{
raw_safe_halt();
alternative_call(arch_safe_halt, tdx_safe_halt, X86_FEATURE_TDX_GUEST,
/* No outputs. */, ASM_NO_INPUT_CLOBBER("memory"));
}
#if defined(CONFIG_APM_MODULE) || defined(CONFIG_HALTPOLL_CPUIDLE_MODULE)
EXPORT_SYMBOL(default_idle);
@@ -774,7 +776,8 @@ void stop_this_cpu(void *dummy)
* (stack usage and variables) after possibly issuing the
* native_wbinvd() above.
*/
native_halt();
alternative_call(native_halt, tdx_halt, X86_FEATURE_TDX_GUEST,
/* No outputs. */, ASM_NO_INPUT_CLOBBER("memory"));
}
}

@@ -32,6 +32,7 @@
#include <asm/realmode.h>
#include <asm/x86_init.h>
#include <asm/efi.h>
#include <asm/tdx.h>

/*
* Power off function, if any
@@ -829,7 +830,8 @@ static int crash_nmi_callback(unsigned int val, struct pt_regs *regs)

atomic_dec(&waiting_for_crash_ipi);
/* Assume hlt works */
halt();
alternative_call(halt, tdx_halt, X86_FEATURE_TDX_GUEST,
/* No outputs. */, ASM_NO_INPUT_CLOBBER("memory"));
for (;;)
cpu_relax();

@@ -82,6 +82,7 @@
#include <asm/spec-ctrl.h>
#include <asm/hw_irq.h>
#include <asm/stackprotector.h>
#include <asm/tdx.h>

#ifdef CONFIG_ACPI_CPPC_LIB
#include <acpi/cppc_acpi.h>
@@ -1805,7 +1806,8 @@ void hlt_play_dead(void)
wbinvd();

while (1) {
native_halt();
alternative_call(native_halt, tdx_halt, X86_FEATURE_TDX_GUEST,
/* No outputs. */, ASM_NO_INPUT_CLOBBER("memory"));

cond_wakeup_cpu0();
}
@@ -30,6 +30,7 @@
#include <asm/setup.h>
#include <asm/e820/api.h>
#include <asm/io.h>
#include <asm/tdx.h>

#include "../realmode/rm/wakeup.h"

@@ -255,7 +256,9 @@ void tboot_shutdown(u32 shutdown_type)

/* should not reach here */
while (1)
halt();
alternative_call(halt, tdx_halt, X86_FEATURE_TDX_GUEST,
/* No outputs. */,
ASM_NO_INPUT_CLOBBER("memory"));
}

static void tboot_copy_fadt(const struct acpi_table_fadt *fadt)
@@ -3,6 +3,7 @@
#include <asm/asm.h>
#include <asm/frame.h>
#include <asm/unwind_hints.h>
#include <uapi/asm/vmx.h>

#include <linux/linkage.h>
#include <linux/bits.h>
@@ -39,6 +40,13 @@
*/
#define tdcall .byte 0x66,0x0f,0x01,0xcc

/*
* Used in the __tdx_hypercall() function to test R15 register content
* and optionally include the STI instruction before the TDCALL
* instruction (for EXIT_REASON_HLT case).
*/
#define do_sti 0x01

/*
* __tdx_module_call() - Used by TDX guests to request services from
* the TDX module (does not include VMM services).
@@ -231,6 +239,30 @@ SYM_FUNC_START(__tdx_hypercall)

movl $TDVMCALL_EXPOSE_REGS_MASK, %ecx

/*
* For the idle loop STI needs to be called directly before
* the TDCALL that enters idle (EXIT_REASON_HLT case). STI
* instruction enables interrupts only one instruction later.
* If there is a window between STI and the instruction that
* emulates the HALT state, there is a chance for interrupts to
* happen in this window, which can delay the HLT operation
* indefinitely. Since this is the not the desired result, add
* support to conditionally call STI before TDCALL.
*
* Since STI instruction is only required for the idle case
* (a special case of EXIT_REASON_HLT), use the r15 register
* value to identify it. Since the R15 register is not used
* by the VMM as per EXIT_REASON_HLT ABI, re-use it in
* software to identify the STI case.
*/
cmpl $EXIT_REASON_HLT, %r11d
jne skip_sti
cmpl $do_sti, %r15d
jne skip_sti
/* Set R15 register to 0, it is unused in EXIT_REASON_HLT case */
xor %r15, %r15
sti
skip_sti:
tdcall

/* Restore output pointer to R9 */
@@ -6,6 +6,7 @@

#include <linux/cpufeature.h>
#include <asm/tdx.h>
#include <asm/vmx.h>

/* TDX Module Call Leaf IDs */
#define TDX_GET_VEINFO 3
@@ -36,6 +37,61 @@ static inline u64 _tdx_hypercall(u64 fn, u64 r12, u64 r13, u64 r14,
return out->r10;
}

static __cpuidle void _tdx_halt(const bool irq_disabled, const bool do_sti)
{
u64 ret;

/*
* Emulate HLT operation via hypercall. More info about ABI
* can be found in TDX Guest-Host-Communication Interface
* (GHCI), sec 3.8 TDG.VP.VMCALL<Instruction.HLT>.
*
* The VMM uses the "IRQ disabled" param to understand IRQ
* enabled status (RFLAGS.IF) of the TD guest and to determine
* whether or not it should schedule the halted vCPU if an
* IRQ becomes pending. E.g. if IRQs are disabled, the VMM
* can keep the vCPU in virtual HLT, even if an IRQ is
* pending, without hanging/breaking the guest.
*
* do_sti parameter is used by the __tdx_hypercall() to decide
* whether to call the STI instruction before executing the
* TDCALL instruction.
*/
ret = _tdx_hypercall(EXIT_REASON_HLT, irq_disabled, 0, 0,
do_sti, NULL);

/*
* Use WARN_ONCE() to report the failure.
*/
WARN_ONCE(ret, "HLT instruction emulation failed\n");
}

void __cpuidle tdx_halt(void)
{
/*
* Since non safe halt is mainly used in CPU offlining
* and the guest will always stay in the halt state, don't
* call the STI instruction (set do_sti as false).
*/
const bool irq_disabled = irqs_disabled();
const bool do_sti = false;

_tdx_halt(irq_disabled, do_sti);
}

void __cpuidle tdx_safe_halt(void)
{
/*
* For do_sti=true case, __tdx_hypercall() function enables
* interrupts using the STI instruction before the TDCALL. So
* set irq_disabled as false.
*/
const bool irq_disabled = false;
const bool do_sti = true;

_tdx_halt(irq_disabled, do_sti);
}

bool tdx_get_ve_info(struct ve_info *ve)
{
struct tdx_module_output out;
@@ -8,6 +8,7 @@
#include <asm/sev.h>
#include <asm/traps.h>
#include <asm/kdebug.h>
#include <asm/tdx.h>

static inline unsigned long
ex_fixup_addr(const struct exception_table_entry *x)
@@ -221,5 +222,7 @@ void __init early_fixup_exception(struct pt_regs *regs, int trapnr)

halt_loop:
while (true)
halt();
alternative_call(halt, tdx_halt, X86_FEATURE_TDX_GUEST,
/* No outputs. */,
ASM_NO_INPUT_CLOBBER("memory"));
}

0 comments on commit e565a43

Please sign in to comment.