diff --git a/doc/stack.qbk b/doc/stack.qbk index b5d3bdc3..4445a807 100644 --- a/doc/stack.qbk +++ b/doc/stack.qbk @@ -372,5 +372,13 @@ Boost.Context headers if stack protection is enabled. [endsect] +[section:shadow_stack Support for shadow stack protection] + +Shadow stack is part of Intel's Control-Flow Enforcement Technology. Users must +check if syscall 'map_shadow_stack' exists, which is no.451 and then define +`SHADOW_STACK_SYSCALL` before including any Boost.Context headers +if shadow stack protection is enabled. + +[endsect] [endsect] diff --git a/include/boost/context/continuation_fcontext.hpp b/include/boost/context/continuation_fcontext.hpp index 881e6f26..32c8d156 100644 --- a/include/boost/context/continuation_fcontext.hpp +++ b/include/boost/context/continuation_fcontext.hpp @@ -44,6 +44,17 @@ # include BOOST_ABI_PREFIX #endif +#if defined __CET__ +# include +# include +# define SHSTK_ENABLED (__CET__ & 0x2) +# define BOOST_CONTEXT_SHADOW_STACK (SHSTK_ENABLED && SHADOW_STACK_SYSCALL) +# define __NR_map_shadow_stack 451 +#ifndef SHADOW_STACK_SET_TOKEN +# define SHADOW_STACK_SET_TOKEN 0x1 +#endif +#endif + #if defined(BOOST_MSVC) # pragma warning(push) # pragma warning(disable: 4702) @@ -62,6 +73,12 @@ transfer_t context_unwind( transfer_t t) { template< typename Rec > transfer_t context_exit( transfer_t t) noexcept { Rec * rec = static_cast< Rec * >( t.data); +#if BOOST_CONTEXT_SHADOW_STACK + // destory shadow stack + std::size_t ss_size = *((unsigned long*)(reinterpret_cast< uintptr_t >( rec)- 16)); + long unsigned int ss_base = *((unsigned long*)(reinterpret_cast< uintptr_t >( rec)- 8)); + munmap((void *)ss_base, ss_size); +#endif // destroy context stack rec->deallocate(); return { nullptr, nullptr }; @@ -168,6 +185,25 @@ fcontext_t create_context1( StackAlloc && salloc, Fn && fn) { reinterpret_cast< uintptr_t >( sctx.sp) - static_cast< uintptr_t >( sctx.size) ); // create fast-context const std::size_t size = reinterpret_cast< uintptr_t >( stack_top) - reinterpret_cast< uintptr_t >( stack_bottom); + +#if BOOST_CONTEXT_SHADOW_STACK + std::size_t ss_size = size >> 5; + // align shadow stack to 8 bytes. + ss_size = (ss_size + 7) & ~7; + // Todo: shadow stack occupies at least 4KB + ss_size = (ss_size > 4096) ? size : 4096; + // create shadow stack + void *ss_base = (void *)syscall(__NR_map_shadow_stack, 0, ss_size, SHADOW_STACK_SET_TOKEN); + BOOST_ASSERT(ss_base != -1); + unsigned long ss_sp = (unsigned long)ss_base + ss_size; + /* pass the shadow stack pointer to make_fcontext + i.e., link the new shadow stack with the new fcontext + TODO should be a better way? */ + *((unsigned long*)(reinterpret_cast< uintptr_t >( stack_top)- 8)) = ss_sp; + /* Todo: place shadow stack info in 64byte gap */ + *((unsigned long*)(reinterpret_cast< uintptr_t >( storage)- 8)) = (unsigned long) ss_base; + *((unsigned long*)(reinterpret_cast< uintptr_t >( storage)- 16)) = ss_size; +#endif const fcontext_t fctx = make_fcontext( stack_top, size, & context_entry< Record >); BOOST_ASSERT( nullptr != fctx); // transfer control structure to context-stack @@ -190,6 +226,25 @@ fcontext_t create_context2( preallocated palloc, StackAlloc && salloc, Fn && fn) reinterpret_cast< uintptr_t >( palloc.sctx.sp) - static_cast< uintptr_t >( palloc.sctx.size) ); // create fast-context const std::size_t size = reinterpret_cast< uintptr_t >( stack_top) - reinterpret_cast< uintptr_t >( stack_bottom); + +#if BOOST_CONTEXT_SHADOW_STACK + std::size_t ss_size = size >> 5; + // align shadow stack to 8 bytes. + ss_size = (ss_size + 7) & ~7; + // Todo: shadow stack occupies at least 4KB + ss_size = (ss_size > 4096) ? size : 4096; + // create shadow stack + void *ss_base = (void *)syscall(__NR_map_shadow_stack, 0, ss_size, SHADOW_STACK_SET_TOKEN); + BOOST_ASSERT(ss_base != -1); + unsigned long ss_sp = (unsigned long)ss_base + ss_size; + /* pass the shadow stack pointer to make_fcontext + i.e., link the new shadow stack with the new fcontext + TODO should be a better way? */ + *((unsigned long*)(reinterpret_cast< uintptr_t >( stack_top)- 8)) = ss_sp; + /* Todo: place shadow stack info in 64byte gap */ + *((unsigned long*)(reinterpret_cast< uintptr_t >( storage)- 8)) = (unsigned long) ss_base; + *((unsigned long*)(reinterpret_cast< uintptr_t >( storage)- 16)) = ss_size; +#endif const fcontext_t fctx = make_fcontext( stack_top, size, & context_entry< Record >); BOOST_ASSERT( nullptr != fctx); // transfer control structure to context-stack diff --git a/include/boost/context/fiber_fcontext.hpp b/include/boost/context/fiber_fcontext.hpp index 47a98c89..4f6e5d75 100644 --- a/include/boost/context/fiber_fcontext.hpp +++ b/include/boost/context/fiber_fcontext.hpp @@ -44,6 +44,17 @@ # include BOOST_ABI_PREFIX #endif +#if defined __CET__ +# include +# include +# define SHSTK_ENABLED (__CET__ & 0x2) +# define BOOST_CONTEXT_SHADOW_STACK (SHSTK_ENABLED && SHADOW_STACK_SYSCALL) +# define __NR_map_shadow_stack 451 +#ifndef SHADOW_STACK_SET_TOKEN +# define SHADOW_STACK_SET_TOKEN 0x1 +#endif +#endif + #if defined(BOOST_MSVC) # pragma warning(push) # pragma warning(disable: 4702) @@ -62,6 +73,12 @@ transfer_t fiber_unwind( transfer_t t) { template< typename Rec > transfer_t fiber_exit( transfer_t t) noexcept { Rec * rec = static_cast< Rec * >( t.data); +#if BOOST_CONTEXT_SHADOW_STACK + // destory shadow stack + std::size_t ss_size = *((unsigned long*)(reinterpret_cast< uintptr_t >( rec)- 16)); + long unsigned int ss_base = *((unsigned long*)(reinterpret_cast< uintptr_t >( rec)- 8)); + munmap((void *)ss_base, ss_size); +#endif // destroy context stack rec->deallocate(); return { nullptr, nullptr }; @@ -165,6 +182,25 @@ fcontext_t create_fiber1( StackAlloc && salloc, Fn && fn) { reinterpret_cast< uintptr_t >( sctx.sp) - static_cast< uintptr_t >( sctx.size) ); // create fast-context const std::size_t size = reinterpret_cast< uintptr_t >( stack_top) - reinterpret_cast< uintptr_t >( stack_bottom); + +#if BOOST_CONTEXT_SHADOW_STACK + std::size_t ss_size = size >> 5; + // align shadow stack to 8 bytes. + ss_size = (ss_size + 7) & ~7; + // Todo: shadow stack occupies at least 4KB + ss_size = (ss_size > 4096) ? size : 4096; + // create shadow stack + void *ss_base = (void *)syscall(__NR_map_shadow_stack, 0, ss_size, SHADOW_STACK_SET_TOKEN); + BOOST_ASSERT(ss_base != -1); + unsigned long ss_sp = (unsigned long)ss_base + ss_size; + /* pass the shadow stack pointer to make_fcontext + i.e., link the new shadow stack with the new fcontext + TODO should be a better way? */ + *((unsigned long*)(reinterpret_cast< uintptr_t >( stack_top)- 8)) = ss_sp; + /* Todo: place shadow stack info in 64byte gap */ + *((unsigned long*)(reinterpret_cast< uintptr_t >( storage)- 8)) = (unsigned long) ss_base; + *((unsigned long*)(reinterpret_cast< uintptr_t >( storage)- 16)) = ss_size; +#endif const fcontext_t fctx = make_fcontext( stack_top, size, & fiber_entry< Record >); BOOST_ASSERT( nullptr != fctx); // transfer control structure to context-stack @@ -187,6 +223,25 @@ fcontext_t create_fiber2( preallocated palloc, StackAlloc && salloc, Fn && fn) { reinterpret_cast< uintptr_t >( palloc.sctx.sp) - static_cast< uintptr_t >( palloc.sctx.size) ); // create fast-context const std::size_t size = reinterpret_cast< uintptr_t >( stack_top) - reinterpret_cast< uintptr_t >( stack_bottom); + +#if BOOST_CONTEXT_SHADOW_STACK + std::size_t ss_size = size >> 5; + // align shadow stack to 8 bytes. + ss_size = (ss_size + 7) & ~7; + // Todo: shadow stack occupies at least 4KB + ss_size = (ss_size > 4096) ? size : 4096; + // create shadow stack + void *ss_base = (void *)syscall(__NR_map_shadow_stack, 0, ss_size, SHADOW_STACK_SET_TOKEN); + BOOST_ASSERT(ss_base != -1); + unsigned long ss_sp = (unsigned long)ss_base + ss_size; + /* pass the shadow stack pointer to make_fcontext + i.e., link the new shadow stack with the new fcontext + TODO should be a better way? */ + *((unsigned long*)(reinterpret_cast< uintptr_t >( stack_top)- 8)) = ss_sp; + /* Todo: place shadow stack info in 64byte gap */ + *((unsigned long*)(reinterpret_cast< uintptr_t >( storage)- 8)) = (unsigned long) ss_base; + *((unsigned long*)(reinterpret_cast< uintptr_t >( storage)- 16)) = ss_size; +#endif const fcontext_t fctx = make_fcontext( stack_top, size, & fiber_entry< Record >); BOOST_ASSERT( nullptr != fctx); // transfer control structure to context-stack diff --git a/src/asm/jump_x86_64_sysv_elf_gas.S b/src/asm/jump_x86_64_sysv_elf_gas.S index 2505219c..58f0e241 100644 --- a/src/asm/jump_x86_64_sysv_elf_gas.S +++ b/src/asm/jump_x86_64_sysv_elf_gas.S @@ -30,8 +30,11 @@ * ---------------------------------------------------------------------------------- * * * ****************************************************************************************/ + # if defined __CET__ # include +# define SHSTK_ENABLED (__CET__ & 0x2) +# define BOOST_CONTEXT_SHADOW_STACK (SHSTK_ENABLED && SHADOW_STACK_SYSCALL) # else # define _CET_ENDBR # endif @@ -61,12 +64,38 @@ jump_fcontext: movq %rbx, 0x30(%rsp) /* save RBX */ movq %rbp, 0x38(%rsp) /* save RBP */ +#if BOOST_CONTEXT_SHADOW_STACK + /* grow the stack to reserve space for shadow stack pointer(SSP) */ + leaq -0x8(%rsp), %rsp + /* read the current SSP and store it */ + rdsspq %rcx + movq %rcx, (%rsp) +#endif + /* store RSP (pointing to context-data) in RAX */ movq %rsp, %rax /* restore RSP (pointing to context-data) from RDI */ movq %rdi, %rsp +#if BOOST_CONTEXT_SHADOW_STACK + /* first 8 bytes are SSP */ + movq (%rsp), %rcx + leaq 0x8(%rsp), %rsp + + /* Restore target(new) shadow stack */ + rstorssp -8(%rcx) + /* restore token for previous shadow stack is pushed */ + /* on previous shadow stack after saveprevssp */ + saveprevssp + + /* when return, jump_fcontext jump to restored return address */ + /* (r8) instead of RET. This miss of RET implies us to unwind */ + /* shadow stack accordingly. Otherwise mismatch occur */ + movq $1, %rcx + incsspq %rcx +#endif + movq 0x40(%rsp), %r8 /* restore return-address */ #if !defined(BOOST_USE_TSX) diff --git a/src/asm/make_x86_64_sysv_elf_gas.S b/src/asm/make_x86_64_sysv_elf_gas.S index 0890fcfb..4294398a 100644 --- a/src/asm/make_x86_64_sysv_elf_gas.S +++ b/src/asm/make_x86_64_sysv_elf_gas.S @@ -30,8 +30,11 @@ * ---------------------------------------------------------------------------------- * * * ****************************************************************************************/ + # if defined __CET__ # include +# define SHSTK_ENABLED (__CET__ & 0x2) +# define BOOST_CONTEXT_SHADOW_STACK (SHSTK_ENABLED && SHADOW_STACK_SYSCALL) # else # define _CET_ENDBR # endif @@ -42,6 +45,11 @@ .align 16 make_fcontext: _CET_ENDBR +#if BOOST_CONTEXT_SHADOW_STACK + /* the new shadow stack pointer (SSP) */ + movq -0x8(%rdi), %r9 +#endif + /* first arg of make_fcontext() == top of context-stack */ movq %rdi, %rax @@ -79,13 +87,50 @@ make_fcontext: /* will be entered after context-function returns */ movq %rcx, 0x38(%rax) +#if BOOST_CONTEXT_SHADOW_STACK + /* Populate the shadow stack and normal stack */ + /* get original SSP */ + rdsspq %r8 + /* restore new shadow stack */ + rstorssp -0x8(%r9) + /* save the restore token on the original shadow stack */ + saveprevssp + /* push the address of "jmp trampoline" to the new shadow stack */ + /* as well as the stack */ + call 1f + jmp trampoline +1: + /* save address of "jmp trampoline" as return-address */ + /* for context-function */ + pop 0x38(%rax) + /* Get the new SSP. */ + rdsspq %r9 + /* restore original shadow stack */ + rstorssp -0x8(%r8) + /* save the restore token on the new shadow stack. */ + saveprevssp + + /* reserve space for the new SSP */ + leaq -0x8(%rax), %rax + /* save the new SSP to this fcontext */ + movq %r9, (%rax) +#endif + ret /* return pointer to context-data */ trampoline: _CET_ENDBR /* store return address on stack */ /* fix stack alignment */ +#if BOOST_CONTEXT_SHADOW_STACK + /* save address of "jmp *%rbp" as return-address */ + /* on stack and shadow stack */ + call 2f + jmp *%rbp +2: +#else push %rbp +#endif /* jump to context-function */ jmp *%rbx diff --git a/src/asm/ontop_x86_64_sysv_elf_gas.S b/src/asm/ontop_x86_64_sysv_elf_gas.S index 6b35a7c9..c3892b8b 100644 --- a/src/asm/ontop_x86_64_sysv_elf_gas.S +++ b/src/asm/ontop_x86_64_sysv_elf_gas.S @@ -32,6 +32,8 @@ ****************************************************************************************/ # if defined __CET__ # include +# define SHSTK_ENABLED (__CET__ & 0x2) +# define BOOST_CONTEXT_SHADOW_STACK (SHSTK_ENABLED && SHADOW_STACK_SYSCALL) # else # define _CET_ENDBR # endif @@ -64,12 +66,32 @@ ontop_fcontext: movq %rbx, 0x30(%rsp) /* save RBX */ movq %rbp, 0x38(%rsp) /* save RBP */ +#if BOOST_CONTEXT_SHADOW_STACK + /* grow the stack to reserve space for shadow stack pointer(SSP) */ + leaq -0x8(%rsp), %rsp + /* read the current SSP and store it */ + rdsspq %rcx + movq %rcx, (%rsp) +#endif + /* store RSP (pointing to context-data) in RAX */ movq %rsp, %rax /* restore RSP (pointing to context-data) from RDI */ movq %rdi, %rsp +#if BOOST_CONTEXT_SHADOW_STACK + /* first 8 bytes are SSP */ + movq (%rsp), %rcx + leaq 0x8(%rsp), %rsp + + /* Restore target(new) shadow stack */ + rstorssp -8(%rcx) + /* restore token for previous shadow stack is pushed */ + /* on previous shadow stack after saveprevssp */ + saveprevssp +#endif + #if !defined(BOOST_USE_TSX) ldmxcsr (%rsp) /* restore MMX control- and status-word */ fldcw 0x4(%rsp) /* restore x87 control-word */