Skip to content

Commit

Permalink
[PAL/Linux-SGX] Cross-verify SW signals vs HW exceptions
Browse files Browse the repository at this point in the history
Previously, our trusted exception handler was lax with reported-by-host
SW signals vs reported-by-SGX HW exceptions. The old code did not verify
that the (untrusted) SW signal corresponds to the (trusted) HW
exception. This could lead to cases where the host e.g. injects SIGILL
(PAL_EVENT_ILLEGAL) whereas no corresponding #UD happened in the
enclave, and this malicious SIGILL is delivered to the app.

This commit hardens cross-verification of SW signals vs HW exceptions.
To add such functionality, the SGX asm code is modified to forward both
the trusted EXITINFO value and the untrusted external-event value to the
`_PalExceptionHandler()` function.

As part of this commit, two additional bugs are fixed:

- When enabled, the SGX EXINFO feature forces the CPU core to report #PF
  exceptions to the SGX enclave (in the EXITINFO/EXINFO SSA fields)
  whenever #PFs occur in the hardware, even if these #PFs are benign. By
  benign page faults we mean the ones that are handled completely by the
  host Linux kernel (more specifically, by the Linux SGX kernel driver).
  Such benign #PF exceptions should be considered spurious -- they are
  reported to the SGX enclave (when `sgx.use_exinfo = true`), but they
  are completely resolved by the host Linux kernel and must be ignored
  by Gramine.

- Previously, EXINFO information and `has_hw_fault_address` applied only
  to #PF hardware exception. However, this info must be set on #GP
  exceptions too. Otherwise Gramine may fail with "Tried to handle a
  memory fault with no faulting address ..." message on a #GP (and if
  the manifest contains `sgx.use_exinfo = true`).

The vulnerability of mismatching SW signals vs HW exceptions was also
independently found and reported to the Gramine team on 29. Nov 2023 by
a team from ETH Zürich: Supraja Sridhara, Benedict Schlueter, Mark
Kuhne, Andrin Bertschi and Shweta Shinde (emails:
firstname.lastname@inf.ethz.ch).

Signed-off-by: Dmitrii Kuvaiskii <dmitrii.kuvaiskii@intel.com>
  • Loading branch information
dimakuv committed Dec 1, 2023
1 parent 24c8e9b commit a390e33
Show file tree
Hide file tree
Showing 3 changed files with 147 additions and 66 deletions.
56 changes: 22 additions & 34 deletions pal/src/host/linux-sgx/enclave_entry.S
Original file line number Diff line number Diff line change
Expand Up @@ -209,24 +209,13 @@ enclave_entry:
# SGX_GPR is a base pointer to the SSA[0].GPRSGX region
movq %gs:SGX_GPR, %rbx

# first check SSA[0].GPRSGX.EXITINFO -- if VALID bit (0x80000000) is set, then use trusted
# SSA[0].GPRSGX.EXITINFO.VECTOR (first 8 bits of EXITINFO) instead of possibly-malicious
# "external event" value in RDI
movq %rdi, %rsi
xorq %rdi, %rdi
movl SGX_GPR_EXITINFO(%rbx), %edi
testl $0x80000000, %edi
jnz .Lcssa1_exception_determine_when
# memoize SSA[0].GPRSGX.EXITINFO in r14, we'll forward it to _PalExceptionHandler() as 1st arg
movl SGX_GPR_EXITINFO(%rbx), %r14d

# VALID bit in SSA[0].GPRSGX.EXITINFO is clear, some unknown-to-SGX exception occured, use the
# possibly-malicious "external event" value in RDI (only the first 8 bits count)
movl %esi, %edi
# memoize the possibly-malicious "external event" in r15, we'll forward it to
# _PalExceptionHandler() as 2nd arg
andl $0xff, %edi
cmpl $0, %edi
jne .Lcssa1_exception_determine_when

# TODO: we shouldn't ignore definitely-malicious exception, but we do it now
jmp .Lcssa1_exception_eexit
movl %edi, %r15d

.Lcssa1_exception_determine_when:
# If this enclave thread has not been initialized yet, we should not try to call an event
Expand Down Expand Up @@ -453,15 +442,7 @@ enclave_entry:
andq $~(PAL_XSTATE_ALIGN - 1), %rsi
subq $SGX_CPU_CONTEXT_XSTATE_ALIGN_SUB, %rsi

# Rewire SSA0: pass 1st arg to _PalExceptionHandler():
# - exit info (in RDI, either trusted SSA[0].GPRSGX.EXITINFO or possibly-malicious
# "external event")
#
# Also copy SSA[0].GPRSGX.RDI to the CPU context on the stack
xchgq %rdi, SGX_GPR_RDI(%rbx)
movq %rdi, SGX_CPU_CONTEXT_RDI(%rsi)

# Copy the rest of SSA[0].GPRSGX to the CPU context on the stack
# Copy SSA[0].GPRSGX to the CPU context on the stack
movq SGX_GPR_RAX(%rbx), %rdi
movq %rdi, SGX_CPU_CONTEXT_RAX(%rsi)
movq SGX_GPR_RCX(%rbx), %rdi
Expand All @@ -476,7 +457,8 @@ enclave_entry:
movq %rdi, SGX_CPU_CONTEXT_RBP(%rsi)
movq SGX_GPR_RSI(%rbx), %rdi
movq %rdi, SGX_CPU_CONTEXT_RSI(%rsi)
/* RDI was saved above */
movq SGX_GPR_RDI(%rbx), %rdi
movq %rdi, SGX_CPU_CONTEXT_RDI(%rsi)
movq SGX_GPR_R8(%rbx), %rdi
movq %rdi, SGX_CPU_CONTEXT_R8(%rsi)
movq SGX_GPR_R9(%rbx), %rdi
Expand All @@ -498,17 +480,23 @@ enclave_entry:
movq SGX_GPR_RIP(%rbx), %rdi
movq %rdi, SGX_CPU_CONTEXT_RIP(%rsi)

# Rewire SSA0: pass more args to _PalExceptionHandler():
# - pointer to sgx_cpu_context_t (SSA[0].GPRSGX.RSI, 2nd arg)
# - pointer to PAL_XREGS_STATE (SSA[0].GPRSGX.RDX, 3rd arg)
movq %rsi, SGX_GPR_RSI(%rbx)
# Rewire SSA0 (args to _PalExceptionHandler()):
# - trusted EXITINFO (SSA[0].GPRSGX.EXITINFO, 1st arg)
# - untrusted external event (host's RDI, 2nd arg)
movq %r14, SGX_GPR_RDI(%rbx)
movq %r15, SGX_GPR_RSI(%rbx)

# Continue rewiring SSA0 (args to _PalExceptionHandler()):
# - pointer to sgx_cpu_context_t (3rd arg)
# - pointer to PAL_XREGS_STATE (4th arg)
movq %rsi, SGX_GPR_RDX(%rbx)
addq $SGX_CPU_CONTEXT_SIZE, SGX_GPR_RDX(%rbx)
movq %rsi, SGX_GPR_RCX(%rbx)
addq $SGX_CPU_CONTEXT_SIZE, SGX_GPR_RCX(%rbx)

# Rewire SSA0 (args to _PalExceptionHandler()):
# - pointer to EXINFO (SSA[0].GPRSGX.RCX, 4rd arg)
# Continue rewiring SSA0 (args to _PalExceptionHandler()):
# - pointer to EXINFO (5th arg)
sub $SSA_MISC_EXINFO_SIZE, %rsi
mov %rsi, SGX_GPR_RCX(%rbx)
mov %rsi, SGX_GPR_R8(%rbx)

# Save EXINFO - it's always immediately before GPR in SSA.
# If EXINFO MISC component is not enabled, it will contain padding with all 0.
Expand Down
154 changes: 123 additions & 31 deletions pal/src/host/linux-sgx/pal_exception.c
Original file line number Diff line number Diff line change
Expand Up @@ -233,54 +233,148 @@ static bool handle_ud(sgx_cpu_context_t* uc) {
}

/* perform exception handling inside the enclave */
void _PalExceptionHandler(unsigned int exit_info, sgx_cpu_context_t* uc,
void _PalExceptionHandler(uint32_t trusted_exit_info_,
uint32_t untrusted_external_event, sgx_cpu_context_t* uc,
PAL_XREGS_STATE* xregs_state, sgx_arch_exinfo_t* exinfo) {
assert(IS_ALIGNED_PTR(xregs_state, PAL_XSTATE_ALIGN));

union {
sgx_arch_exit_info_t info;
unsigned int intval;
} ei = {.intval = exit_info};
sgx_arch_exit_info_t trusted_exit_info;
static_assert(sizeof(trusted_exit_info) == sizeof(trusted_exit_info_), "invalid size");
memcpy(&trusted_exit_info, &trusted_exit_info_, sizeof(trusted_exit_info));

/*
* Intel SGX hardware exposes information on a HW exception in the EXITINFO struct.
* Host OS + Gramine's untrusted part of PAL deliver a SW signal. The SW signal can be a
* reaction to HW exception (synchronous signal) or a reaction to software events (asynchronous
* signal). For security, it is important to cross-check HW exception state vs SW signal state.
*
* The below table shows the cross checks. "yes" means allowed combination, "no" means
* prohibited combination (Gramine terminates). "yes*" means a special case of #PF, see comments
* below on #PF handling.
*
* +-----------------------------+-----+-----+-----+-----+------------------+------------+
* | HW exceptions (trusted) -> | | #DE | | | | |
* | --------------------------- | | #MF | | #GP | others | none |
* | SW signals (untrusted) | | #UD | #XM | #PF | #AC | (#BR,#DB,#BP,#CP)| (valid=0) |
* | v | | | | | | |
* --+-----------------------------+-----+-----+-----+-----+------------------+------------+
* s | | | | | | | |
* y | PAL_EVENT_ILLEGAL | yes | no | no | no | | |
* n | | | | | | | |
* c +-----------------------------+-----+-----+-----+-----+ no | no |
* h | | | | | | (exceptions | (malicious |
* r | PAL_EVENT_ARITHMETIC_ERROR | no | yes | no | no | unsupported | host |
* o | | | | | | by Gramine) | injected |
* n +-----------------------------+-----+-----+-----+-----+ | SW signal)|
* o | | | | | | | |
* u | PAL_EVENT_MEMFAULT | no | no |yes* | yes | | |
* s | | | | | | | |
* --+-----------------------------+-----+-----+-----+-----+------------------+------------+
* | | | |
* a | PAL_EVENT_QUIT | | yes |
* s | | no, except #PF case* | |
* y +-----------------------------+ (malicious host ignored HW exception) +------------+
* n | | | |
* c | PAL_EVENT_INTERRUPTED | | yes |
* | | | |
* --+-----------------------------+------------------------------------------+------------+
*/

int event_num;
uint32_t event_num = 0; /* illegal event */

if (!ei.info.valid) {
event_num = exit_info;
if (event_num <= 0 || event_num >= PAL_EVENT_NUM_BOUND) {
log_error("Illegal exception reported by untrusted PAL: %d", event_num);
if (!trusted_exit_info.valid) {
/* corresponds to last column in the table above */
if (untrusted_external_event != PAL_EVENT_QUIT
&& untrusted_external_event != PAL_EVENT_INTERRUPTED) {
log_error("Host injected malicious signal %u", untrusted_external_event);
_PalProcessExit(1);
}
event_num = untrusted_external_event;
} else {
switch (ei.info.vector) {
case SGX_EXCEPTION_VECTOR_BR:
log_error("Handling #BR exceptions is currently unsupported by Gramine");
_PalProcessExit(1);
break;
/* corresponds to all but last columns in the table above */
const char* exception_name = NULL;
switch (trusted_exit_info.vector) {
case SGX_EXCEPTION_VECTOR_UD:
if (untrusted_external_event != PAL_EVENT_ILLEGAL) {
log_error("Host reported mismatching signal (expected %u, got %u)",
PAL_EVENT_ILLEGAL, untrusted_external_event);
_PalProcessExit(1);
}
if (handle_ud(uc)) {
restore_sgx_context(uc, xregs_state);
/* NOTREACHED */
/* UNREACHABLE */
}
event_num = PAL_EVENT_ILLEGAL;
break;
case SGX_EXCEPTION_VECTOR_DE:
case SGX_EXCEPTION_VECTOR_MF:
case SGX_EXCEPTION_VECTOR_XM:
if (untrusted_external_event != PAL_EVENT_ARITHMETIC_ERROR) {
log_error("Host reported mismatching signal (expected %u, got %u)",
PAL_EVENT_ARITHMETIC_ERROR, untrusted_external_event);
_PalProcessExit(1);
}
event_num = PAL_EVENT_ARITHMETIC_ERROR;
break;
case SGX_EXCEPTION_VECTOR_GP:
case SGX_EXCEPTION_VECTOR_PF:
if (untrusted_external_event == PAL_EVENT_QUIT
|| untrusted_external_event == PAL_EVENT_INTERRUPTED) {
/*
* The host delivered an asynchronous signal, so the reported-by-SGX #PF must be
* benign (resolved completely by the host kernel), otherwise the host would
* deliver PAL_EVENT_MEMFAULT (to signify a #PF which should be acted upon by
* Gramine).
*
* The SGX hardware always reports such benign #PFs though they can be
* considered spurious and should be ignored. So the event must be a
* host-induced external event, so in the following we handle this external
* event and ignore the #PF info.
*
* Note that the host could modify a real memory fault (a valid #PF) to e.g. a
* PAL_EVENT_INTERRUPTED signal. Then we end up in this special case and the app
* will not handle a real memory fault but a dummy PAL_EVENT_INTERRUPTED. This
* will lead to the app getting stuck on #PF. Since this is a DoS, and Intel SGX
* and Gramine don't care about DoSes, this special case is benign.
*/
memset(&trusted_exit_info, 0, sizeof(trusted_exit_info));
event_num = untrusted_external_event;
break;
}
/* fallthrough */
case SGX_EXCEPTION_VECTOR_GP:
case SGX_EXCEPTION_VECTOR_AC:
if (untrusted_external_event != PAL_EVENT_MEMFAULT) {
log_error("Host reported mismatching signal (expected %u, got %u)",
PAL_EVENT_MEMFAULT, untrusted_external_event);
_PalProcessExit(1);
}
event_num = PAL_EVENT_MEMFAULT;
break;
case SGX_EXCEPTION_VECTOR_BR:
exception_name = exception_name ? : "#BR";
/* fallthrough */
case SGX_EXCEPTION_VECTOR_DB:
exception_name = exception_name ? : "#DB";
/* fallthrough */
case SGX_EXCEPTION_VECTOR_BP:
exception_name = exception_name ? : "#BP";
/* fallthrough */
case SGX_EXCEPTION_VECTOR_CP:
exception_name = exception_name ? : "#CP";
/* fallthrough */
default:
restore_sgx_context(uc, xregs_state);
/* NOTREACHED */
log_error("Handling %s exceptions is currently unsupported by Gramine",
exception_name ? : "[unknown]");
_PalProcessExit(1);
/* UNREACHABLE */
}
}

if (event_num == 0 || event_num >= PAL_EVENT_NUM_BOUND) {
log_error("Illegal exception reported: %d", event_num);
_PalProcessExit(1);
}

/* in PAL, and event isn't asynchronous (i.e., synchronous exception) */
if (ADDR_IN_PAL(uc->rip) && event_num != PAL_EVENT_QUIT && event_num != PAL_EVENT_INTERRUPTED) {
char buf[LOCATION_BUF_SIZE];
Expand All @@ -289,12 +383,12 @@ void _PalExceptionHandler(unsigned int exit_info, sgx_cpu_context_t* uc,
const char* event_name = pal_event_name(event_num);
log_error("Unexpected %s occurred inside PAL (%s)", event_name, buf);

if (ei.info.valid) {
if (trusted_exit_info.valid) {
/* EXITINFO field: vector = exception number, exit_type = 0x3 for HW / 0x6 for SW */
log_debug("(SGX HW reported AEX vector 0x%x with exit_type = 0x%x)", ei.info.vector,
ei.info.exit_type);
log_debug("(SGX HW reported AEX vector 0x%x with exit_type = 0x%x)",
trusted_exit_info.vector, trusted_exit_info.exit_type);
} else {
log_debug("(untrusted PAL sent PAL event 0x%x)", ei.intval);
log_debug("(untrusted PAL sent PAL event 0x%x)", untrusted_external_event);
}

_PalProcessExit(1);
Expand All @@ -305,15 +399,13 @@ void _PalExceptionHandler(unsigned int exit_info, sgx_cpu_context_t* uc,

bool has_hw_fault_address = false;

if (ei.info.valid) {
ctx.trapno = ei.info.vector;
if (trusted_exit_info.valid) {
ctx.trapno = trusted_exit_info.vector;
/* Only these two exceptions save information in EXINFO. */
if (ei.info.vector == SGX_EXCEPTION_VECTOR_GP
|| ei.info.vector == SGX_EXCEPTION_VECTOR_PF) {
ctx.err = exinfo->error_code_val;
}
if (ei.info.vector == SGX_EXCEPTION_VECTOR_PF) {
ctx.cr2 = exinfo->maddr;
if (trusted_exit_info.vector == SGX_EXCEPTION_VECTOR_GP
|| trusted_exit_info.vector == SGX_EXCEPTION_VECTOR_PF) {
ctx.err = exinfo->error_code_val; /* bits: Present, Write/Read, User/Kernel, etc. */
ctx.cr2 = exinfo->maddr; /* NOTE: on #GP, maddr = 0 */
has_hw_fault_address = true;
}
}
Expand Down
3 changes: 2 additions & 1 deletion pal/src/host/linux-sgx/pal_linux.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,8 @@ void save_xregs(PAL_XREGS_STATE* xsave_area);
void restore_xregs(const PAL_XREGS_STATE* xsave_area);
noreturn void _restore_sgx_context(sgx_cpu_context_t* uc, PAL_XREGS_STATE* xsave_area);

void _PalExceptionHandler(unsigned int exit_info, sgx_cpu_context_t* uc,
void _PalExceptionHandler(uint32_t trusted_exit_info_,
uint32_t untrusted_external_event, sgx_cpu_context_t* uc,
PAL_XREGS_STATE* xregs_state, sgx_arch_exinfo_t* exinfo);
/* `event_` is actually of `enum pal_event` type, but we call it from assembly, so we need to know
* its underlying type. */
Expand Down

0 comments on commit a390e33

Please sign in to comment.