From fd48daf278ed3aebdd217d11dfa721ae0f34b2a6 Mon Sep 17 00:00:00 2001 From: Darian Leung Date: Sat, 3 Dec 2022 00:14:17 +0800 Subject: [PATCH 1/6] freertos(SMP): Fix SMP FreeRTOS Xtensa port FPU/Coproccessor bugs This commit fixes the following FPU/Coprocessor bugs in the Xtensa port of Amazon SMP FreeRTOS: - vPortCleanUpCoprocArea() does not calculate the correct pointer to the task's CPSA (located on the task's stack). This can result in - _xt_coproc_release() not releasing the task's CP ownership - The next coprocessor exception will write the current CP owner (i.e., the deleted task's CPSA) leading to memory corruption - _xt_coproc_release() writes xCoreID instead of 0 when clearing a CP owner. This results in the next CP exception trying to load the CP owner's CPSA at the address of "xCoreID", leading to a double exception. --- .../FreeRTOS-Kernel-SMP/portable/xtensa/port.c | 16 ++++++++-------- .../portable/xtensa/xtensa_context.S | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/components/freertos/FreeRTOS-Kernel-SMP/portable/xtensa/port.c b/components/freertos/FreeRTOS-Kernel-SMP/portable/xtensa/port.c index 181c48dc1b9..cd62e8e598e 100644 --- a/components/freertos/FreeRTOS-Kernel-SMP/portable/xtensa/port.c +++ b/components/freertos/FreeRTOS-Kernel-SMP/portable/xtensa/port.c @@ -721,21 +721,21 @@ void _xt_coproc_release(volatile void *coproc_sa_base, BaseType_t xCoreID); void vPortCleanUpCoprocArea( void *pxTCB ) { - StackType_t *coproc_area; + UBaseType_t uxCoprocArea; BaseType_t xCoreID; - /* Calculate the coproc save area in the stack from the TCB base */ - coproc_area = ( StackType_t * ) ( ( uint32_t ) ( pxTCB + offset_pxEndOfStack )); - coproc_area = ( StackType_t * ) ( ( ( portPOINTER_SIZE_TYPE ) coproc_area ) & ( ~( ( portPOINTER_SIZE_TYPE ) portBYTE_ALIGNMENT_MASK ) ) ); - coproc_area = ( StackType_t * ) ( ( ( uint32_t ) coproc_area - XT_CP_SIZE ) & ~0xf ); + /* Get pointer to the task's coprocessor save area from TCB->pxEndOfStack. See uxInitialiseStackCPSA() */ + uxCoprocArea = ( UBaseType_t ) ( ( ( StaticTask_t * ) pxTCB )->pxDummy8 ); /* Get TCB_t.pxEndOfStack */ + uxCoprocArea = STACKPTR_ALIGN_DOWN(16, uxCoprocArea - XT_CP_SIZE); /* Extract core ID from the affinity mask */ - xCoreID = __builtin_ffs( * ( UBaseType_t * ) ( pxTCB + offset_uxCoreAffinityMask ) ); - assert( xCoreID >= 1 ); + xCoreID = ( ( StaticTask_t * ) pxTCB )->uxDummy25 ; + xCoreID = ( BaseType_t ) __builtin_ffs( ( int ) xCoreID ); + assert( xCoreID >= 1 ); // __builtin_ffs always returns first set index + 1 xCoreID -= 1; /* If task has live floating point registers somewhere, release them */ - _xt_coproc_release( coproc_area, xCoreID ); + _xt_coproc_release( (void *)uxCoprocArea, xCoreID ); } #endif // ( XCHAL_CP_NUM > 0 && configUSE_CORE_AFFINITY == 1 && configNUM_CORES > 1 ) diff --git a/components/freertos/FreeRTOS-Kernel-SMP/portable/xtensa/xtensa_context.S b/components/freertos/FreeRTOS-Kernel-SMP/portable/xtensa/xtensa_context.S index 044a4c76770..546fbd073e7 100644 --- a/components/freertos/FreeRTOS-Kernel-SMP/portable/xtensa/xtensa_context.S +++ b/components/freertos/FreeRTOS-Kernel-SMP/portable/xtensa/xtensa_context.S @@ -449,7 +449,7 @@ _xt_coproc_release: 1: l32i a8, a4, 0 /* a8 = owner at a4 */ bne a2, a8, 2f /* if (coproc_sa_base == owner) */ - s32i a3, a4, 0 /* owner = unowned */ + s32i a6, a4, 0 /* owner = unowned */ 2: addi a4, a4, 1<<2 /* a4 = next entry in owner array */ bltu a4, a5, 1b /* repeat until end of array */ From 9300bef9b8fef57f4b5fcba7407e5bc716dfa165 Mon Sep 17 00:00:00 2001 From: Darian Leung Date: Wed, 14 Dec 2022 20:17:51 +0800 Subject: [PATCH 2/6] freertos(SMP): Refactor FPU handling on the Xtensa port of Amaazon SMP FreeRTOS This commit refactors the FPU handling code on the Xtensa port of Amazon SMP FreeRTOS in the following ways: Auto-pinning via XT_RTOS_CP_EXC_HOOK ------------------------------------ The "_xt_coproc_exc" exception would previously automatically pin a task that uses the FPU to the current core (to ensure that we can lazy save the task's FPU context). However, this would mean that "xtensa_vectors.S" would need to be OS-aware (to read the task's TCB structure). This is now refactored so that "_xt_coproc_exc" calls a CP exception hook function ("XT_RTOS_CP_EXC_HOOK") implemented in "portasm.S", thus allowing "xtensa_vectors.S" to remain OS agnostic. Using macros to acquire owner spinlock -------------------------------------- The taking and relasing of the "_xt_coproc_owner_sa_lock" is now mostly abstracted as the "spinlock_take" and "spinlock_release" macro. As a result, "_xt_coproc_release" and "_xt_coproc_exc" are refactored so that: - They are closer to their upstream (original) versions - The spinlock is only taken when building for multicore - The spinlock held region is shortened (now only protects the instructions that access the "_xt_coproc_owner_sa" array Other Changes ------------- - Updated placing and comments of various "offset_..." constants used by portasm.S - Update description of "get_cpsa_from_tcb" assembly macro - Tidied up some typos in the ".S" files --- .../xtensa/include/freertos/xtensa_rtos.h | 9 ++ .../portable/xtensa/port.c | 46 ++++--- .../portable/xtensa/portasm.S | 84 ++++++++++-- .../portable/xtensa/xt_asm_utils.h | 96 +++++++++++--- .../portable/xtensa/xtensa_context.S | 72 +++++------ .../portable/xtensa/xtensa_vectors.S | 120 +++++------------- 6 files changed, 255 insertions(+), 172 deletions(-) diff --git a/components/freertos/FreeRTOS-Kernel-SMP/portable/xtensa/include/freertos/xtensa_rtos.h b/components/freertos/FreeRTOS-Kernel-SMP/portable/xtensa/include/freertos/xtensa_rtos.h index f2e2f99cccf..42399b5c68f 100644 --- a/components/freertos/FreeRTOS-Kernel-SMP/portable/xtensa/include/freertos/xtensa_rtos.h +++ b/components/freertos/FreeRTOS-Kernel-SMP/portable/xtensa/include/freertos/xtensa_rtos.h @@ -170,6 +170,15 @@ The implementation may use only a2-4, a15 (all other regs must be preserved). // void* XT_RTOS_CP_STATE(void) #define XT_RTOS_CP_STATE _frxt_task_coproc_state +/* +RTOS provided hook function that is called on every coprocessor exception. May +only be called from assembly code and by the 'call0' instruction. +The implementation may use only a2-4, a15 (all other regs must be preserved). +*/ +// void XT_RTOS_CP_EXC_HOOK(void) +#if XCHAL_CP_NUM > 0 +#define XT_RTOS_CP_EXC_HOOK _frxt_coproc_exc_hook +#endif /******************************************************************************* diff --git a/components/freertos/FreeRTOS-Kernel-SMP/portable/xtensa/port.c b/components/freertos/FreeRTOS-Kernel-SMP/portable/xtensa/port.c index cd62e8e598e..16ab7437e99 100644 --- a/components/freertos/FreeRTOS-Kernel-SMP/portable/xtensa/port.c +++ b/components/freertos/FreeRTOS-Kernel-SMP/portable/xtensa/port.c @@ -41,9 +41,25 @@ _Static_assert(portBYTE_ALIGNMENT == 16, "portBYTE_ALIGNMENT must be set to 16"); -/* -OS state variables -*/ +/* ---------------------------------------------------- Variables ------------------------------------------------------ + * - Various variables used to maintain the FreeRTOS port's state. Used from both port.c and various .S files + * - Constant offsets are used by assembly to jump to particular TCB members or a stack area (such as the CPSA). We use + * C constants instead of preprocessor macros due to assembly lacking "offsetof()". + * ------------------------------------------------------------------------------------------------------------------ */ + +#if XCHAL_CP_NUM > 0 +/* Offsets used to navigate to a task's CPSA on the stack */ +const DRAM_ATTR uint32_t offset_pxEndOfStack = offsetof(StaticTask_t, pxDummy8); +const DRAM_ATTR uint32_t offset_cpsa = XT_CP_SIZE; /* Offset to start of the CPSA area on the stack. See uxInitialiseStackCPSA(). */ +#if configNUM_CORES > 1 +/* Offset to TCB_t.uxCoreAffinityMask member. Used to pin unpinned tasks that use the FPU. */ +const DRAM_ATTR uint32_t offset_uxCoreAffinityMask = offsetof(StaticTask_t, uxDummy25); +#if configUSE_CORE_AFFINITY != 1 +#error "configUSE_CORE_AFFINITY must be 1 on multicore targets with coprocessor support" +#endif +#endif /* configNUM_CORES > 1 */ +#endif /* XCHAL_CP_NUM > 0 */ + volatile unsigned port_xSchedulerRunning[portNUM_PROCESSORS] = {0}; // Indicates whether scheduler is running on a per-core basis unsigned int port_interruptNesting[portNUM_PROCESSORS] = {0}; // Interrupt nesting level. Increased/decreased in portasm.c, _frxt_int_enter/_frxt_int_exit //FreeRTOS SMP Locks @@ -423,12 +439,6 @@ static void vPortTaskWrapper(TaskFunction_t pxCode, void *pvParameters) } #endif -const DRAM_ATTR uint32_t offset_pxEndOfStack = offsetof(StaticTask_t, pxDummy8); -#if ( configUSE_CORE_AFFINITY == 1 && configNUM_CORES > 1 ) -const DRAM_ATTR uint32_t offset_uxCoreAffinityMask = offsetof(StaticTask_t, uxDummy25); -#endif // ( configUSE_CORE_AFFINITY == 1 && configNUM_CORES > 1 ) -const DRAM_ATTR uint32_t offset_cpsa = XT_CP_SIZE; - /** * @brief Align stack pointer in a downward growing stack * @@ -677,7 +687,7 @@ StackType_t * pxPortInitialiseStack( StackType_t * pxTopOfStack, /* HIGH ADDRESS |---------------------------| <- pxTopOfStack on entry - | Coproc Save Area | + | Coproc Save Area | (CPSA MUST BE FIRST) | ------------------------- | | TLS Variables | | ------------------------- | <- Start of useable stack @@ -697,7 +707,7 @@ StackType_t * pxPortInitialiseStack( StackType_t * pxTopOfStack, configASSERT((uxStackPointer & portBYTE_ALIGNMENT_MASK) == 0); #if XCHAL_CP_NUM > 0 - // Initialize the coprocessor save area + // Initialize the coprocessor save area. THIS MUST BE THE FIRST AREA due to access from _frxt_task_coproc_state() uxStackPointer = uxInitialiseStackCPSA(uxStackPointer); configASSERT((uxStackPointer & portBYTE_ALIGNMENT_MASK) == 0); #endif /* XCHAL_CP_NUM > 0 */ @@ -717,25 +727,25 @@ StackType_t * pxPortInitialiseStack( StackType_t * pxTopOfStack, // -------------------- Co-Processor ----------------------- #if ( XCHAL_CP_NUM > 0 && configUSE_CORE_AFFINITY == 1 && configNUM_CORES > 1 ) -void _xt_coproc_release(volatile void *coproc_sa_base, BaseType_t xCoreID); +void _xt_coproc_release(volatile void *coproc_sa_base, BaseType_t xTargetCoreID); void vPortCleanUpCoprocArea( void *pxTCB ) { UBaseType_t uxCoprocArea; - BaseType_t xCoreID; + BaseType_t xTargetCoreID; /* Get pointer to the task's coprocessor save area from TCB->pxEndOfStack. See uxInitialiseStackCPSA() */ uxCoprocArea = ( UBaseType_t ) ( ( ( StaticTask_t * ) pxTCB )->pxDummy8 ); /* Get TCB_t.pxEndOfStack */ uxCoprocArea = STACKPTR_ALIGN_DOWN(16, uxCoprocArea - XT_CP_SIZE); /* Extract core ID from the affinity mask */ - xCoreID = ( ( StaticTask_t * ) pxTCB )->uxDummy25 ; - xCoreID = ( BaseType_t ) __builtin_ffs( ( int ) xCoreID ); - assert( xCoreID >= 1 ); // __builtin_ffs always returns first set index + 1 - xCoreID -= 1; + xTargetCoreID = ( ( StaticTask_t * ) pxTCB )->uxDummy25 ; + xTargetCoreID = ( BaseType_t ) __builtin_ffs( ( int ) xTargetCoreID ); + assert( xTargetCoreID >= 1 ); // __builtin_ffs always returns first set index + 1 + xTargetCoreID -= 1; /* If task has live floating point registers somewhere, release them */ - _xt_coproc_release( (void *)uxCoprocArea, xCoreID ); + _xt_coproc_release( (void *)uxCoprocArea, xTargetCoreID ); } #endif // ( XCHAL_CP_NUM > 0 && configUSE_CORE_AFFINITY == 1 && configNUM_CORES > 1 ) diff --git a/components/freertos/FreeRTOS-Kernel-SMP/portable/xtensa/portasm.S b/components/freertos/FreeRTOS-Kernel-SMP/portable/xtensa/portasm.S index f51ba0924b1..b472be73055 100644 --- a/components/freertos/FreeRTOS-Kernel-SMP/portable/xtensa/portasm.S +++ b/components/freertos/FreeRTOS-Kernel-SMP/portable/xtensa/portasm.S @@ -34,32 +34,45 @@ #define TOPOFSTACK_OFFS 0x00 /* StackType_t *pxTopOfStack */ .extern pxCurrentTCBs +#if XCHAL_CP_NUM > 0 +/* Offsets used to get a task's coprocessor save area (CPSA) from its TCB */ .extern offset_pxEndOfStack .extern offset_cpsa +#if configNUM_CORES > 1 +/* Offset to TCB_t.uxCoreAffinityMask member. Used to pin unpinned tasks that use the FPU. */ +.extern offset_uxCoreAffinityMask +#endif /* configNUM_CORES > 1 */ +#endif /* XCHAL_CP_NUM > 0 */ /* -Macro to get a task's coprocessor save area (CPSA) from its TCB +-------------------------------------------------------------------------------- + Macro get_cpsa_from_tcb - get the pointer to a task's CPSA form its TCB + + Entry: + - "reg_A" contains a pointer to the task's TCB + Exit: + - "reg_A" contains pointer the the task's CPSA + - "reg_B" clobbered -Entry: -- reg_A contains a pointer to the TCB -Exit: -- reg_A contains a pointer to the CPSA -- reg_B destroyed + The two arguments must be different AR registers. +-------------------------------------------------------------------------------- */ +#if XCHAL_CP_NUM > 0 .macro get_cpsa_from_tcb reg_A reg_B - // Get TCB.pxEndOfStack from reg_A + /* Get TCB.pxEndOfStack from reg_A */ movi \reg_B, offset_pxEndOfStack /* Move &offset_pxEndOfStack into reg_B */ l32i \reg_B, \reg_B, 0 /* Load offset_pxEndOfStack into reg_B */ add \reg_A, \reg_A, \reg_B /* Calculate &pxEndOfStack to reg_A (&TCB + offset_pxEndOfStack) */ l32i \reg_A, \reg_A, 0 /* Load TCB.pxEndOfStack into reg_A */ - //Offset to start of coproc save area + /* Offset to start of CP save area */ movi \reg_B, offset_cpsa /* Move &offset_cpsa into reg_B */ l32i \reg_B, \reg_B, 0 /* Load offset_cpsa into reg_B */ sub \reg_A, \reg_A, \reg_B /* Subtract offset_cpsa from pxEndOfStack to get to start of CP save area (unaligned) */ - //Align down start of CP save area to 16 byte boundary + /* Align down start of CP save area to 16 byte boundary */ movi \reg_B, ~(0xF) - and \reg_A, \reg_A, \reg_B /* Align CPSA pointer to 16 bytes */ + and \reg_A, \reg_A, \reg_B /* Align CP save area pointer to 16 bytes */ .endm +#endif /* XCHAL_CP_NUM > 0 */ .global port_IntStack .global port_switch_flag //Required by sysview_tracing build @@ -692,3 +705,54 @@ _frxt_task_coproc_state: 2: ret #endif /* XCHAL_CP_NUM > 0 */ + +/* +********************************************************************************************************** +* _frxt_coproc_exc_hook +* void _frxt_coproc_exc_hook(void) +* +* Implements the Xtensa RTOS porting layer's XT_RTOS_CP_EXC_HOOK function for FreeRTOS. +* +* May only be called from assembly code by the 'call0' instruction. Does NOT obey ABI conventions. +* May only only use a2-4, a15 (all other regs must be preserved). +* See the detailed description of the XT_RTOS_ENTER macro in xtensa_rtos.h. +* +********************************************************************************************************** +*/ +#if XCHAL_CP_NUM > 0 + + .globl _frxt_coproc_exc_hook + .type _frxt_coproc_exc_hook,@function + .align 4 +_frxt_coproc_exc_hook: + + #if configUSE_CORE_AFFINITY == 1 && configNUM_CORES > 1 + getcoreid a2 /* a2 = xCurCoreID */ + /* if (port_xSchedulerRunning[xCurCoreID] == 0) */ + movi a3, port_xSchedulerRunning + addx4 a3, a2, a3 + l32i a3, a3, 0 + beqz a3, 1f /* Scheduler hasn't started yet. Return. */ + /* if (port_interruptNesting[xCurCoreID] != 0) */ + movi a3, port_interruptNesting + addx4 a3, a2, a3 + l32i a3, a3, 0 + bnez a3, 1f /* We are in an interrupt. Return*/ + /* CP operations are incompatible with unpinned tasks. Thus we pin the task + to the current running core by updating its TCB.uxCoreAffinityMask field. */ + movi a3, pxCurrentTCBs + addx4 a3, a2, a3 + l32i a3, a3, 0 /* a3 = pxCurrentTCBs[xCurCoreID] */ + movi a4, offset_uxCoreAffinityMask + l32i a4, a4, 0 /* a4 = offset_uxCoreAffinityMask */ + add a3, a3, a4 /* a3 = &TCB.uxCoreAffinityMask */ + ssl a2 /* Use xCurCoreID as left shift amount */ + movi a4, 1 + sll a4, a4 /* a4 = (1 << xCurCoreID) */ + s32i a4, a3, 0 /* TCB.uxCoreAffinityMask = a4 = (1 << xCurCoreID) */ +1: + #endif /* configUSE_CORE_AFFINITY == 1 && configNUM_CORES > 1 */ + + ret + +#endif /* XCHAL_CP_NUM > 0 */ diff --git a/components/freertos/FreeRTOS-Kernel-SMP/portable/xtensa/xt_asm_utils.h b/components/freertos/FreeRTOS-Kernel-SMP/portable/xtensa/xt_asm_utils.h index d4f25b79f7e..a21b02a687f 100644 --- a/components/freertos/FreeRTOS-Kernel-SMP/portable/xtensa/xt_asm_utils.h +++ b/components/freertos/FreeRTOS-Kernel-SMP/portable/xtensa/xt_asm_utils.h @@ -50,26 +50,88 @@ .macro SPILL_ALL_WINDOWS #if XCHAL_NUM_AREGS == 64 - and a12, a12, a12 - rotw 3 - and a12, a12, a12 - rotw 3 - and a12, a12, a12 - rotw 3 - and a12, a12, a12 - rotw 3 - and a12, a12, a12 - rotw 4 + and a12, a12, a12 + rotw 3 + and a12, a12, a12 + rotw 3 + and a12, a12, a12 + rotw 3 + and a12, a12, a12 + rotw 3 + and a12, a12, a12 + rotw 4 #elif XCHAL_NUM_AREGS == 32 - and a12, a12, a12 - rotw 3 - and a12, a12, a12 - rotw 3 - and a4, a4, a4 - rotw 2 + and a12, a12, a12 + rotw 3 + and a12, a12, a12 + rotw 3 + and a4, a4, a4 + rotw 2 #else #error Unrecognized XCHAL_NUM_AREGS #endif .endm -#endif +/* +-------------------------------------------------------------------------------- + Macro spinlock_take + + This macro will repeatedley attempt to atomically set a spinlock variable + using the s32c1i instruciton. A spinlock is considered free if its value is 0. + + Entry: + - "reg_A/B" as scratch registers + - "lock_var" spinlock variable's symbol + - Interrupts must already be disabled by caller + Exit: + - Spinlock set to current core's ID (PRID) + - "reg_A/B" clobbered +-------------------------------------------------------------------------------- +*/ + +#if portNUM_PROCESSORS > 1 + + .macro spinlock_take reg_A reg_B lock_var + + movi \reg_A, \lock_var /* reg_A = &lock_var */ +.L_spinlock_loop: + movi \reg_B, 0 /* Load spinlock free value (0) into SCOMPARE1 */ + wsr \reg_B, SCOMPARE1 + rsync /* Ensure that SCOMPARE1 is set before s32c1i executes */ + rsr \reg_B, PRID /* Load the current core's ID into reg_B */ + s32c1i \reg_B, \reg_A, 0 /* Attempt *lock_var = reg_B */ + bnez \reg_B, .L_spinlock_loop /* If the write was successful (i.e., lock was free), 0 will have been written back to reg_B */ + + .endm + +#endif /* portNUM_PROCESSORS > 1 */ + +/* +-------------------------------------------------------------------------------- + Macro spinlock_release + + This macro will release a spinlock variable previously taken by the + spinlock_take macro. + + Entry: + - "reg_A/B" as scratch registers + - "lock_var" spinlock variable's symbol + - Interrupts must already be disabled by caller + Exit: + - "reg_A/B" clobbered +-------------------------------------------------------------------------------- +*/ + +#if portNUM_PROCESSORS > 1 + + .macro spinlock_release reg_A reg_B lock_var + + movi \reg_A, \lock_var /* reg_A = &lock_var */ + movi \reg_B, 0 + s32i \reg_B, \reg_A, 0 /* Release the spinlock (*reg_A = 0) */ + + .endm + +#endif /* portNUM_PROCESSORS > 1 */ + +#endif /* __XT_ASM_UTILS_H */ diff --git a/components/freertos/FreeRTOS-Kernel-SMP/portable/xtensa/xtensa_context.S b/components/freertos/FreeRTOS-Kernel-SMP/portable/xtensa/xtensa_context.S index 546fbd073e7..cde52ea40e5 100644 --- a/components/freertos/FreeRTOS-Kernel-SMP/portable/xtensa/xtensa_context.S +++ b/components/freertos/FreeRTOS-Kernel-SMP/portable/xtensa/xtensa_context.S @@ -397,18 +397,16 @@ May be called when a thread terminates or completes but does not delete the co-proc save area, to avoid the exception handler having to save the thread's co-proc state before another thread can use it (optimization). -Needs to be called on the processor the thread was running on. Unpinned threads -won't have an entry here because they get pinned as soon they use a coprocessor. - Entry Conditions: A2 = Pointer to base of co-processor state save area. - A3 = Core ID of the pinned task + A3 = Core ID of the task (must be pinned) who's coproc ownership we are + releasing. Exit conditions: None. Obeys ABI conventions per prototype: - void _xt_coproc_release(void * coproc_sa_base, BaseType_t xCoreID) + void _xt_coproc_release(void * coproc_sa_base, BaseType_t xTargetCoreID) *******************************************************************************/ @@ -421,43 +419,33 @@ Obeys ABI conventions per prototype: .align 4 _xt_coproc_release: ENTRY0 /* a2 = base of save area */ - /* a3 = core ID */ - - rsil a7, XCHAL_EXCM_LEVEL /* lock interrupts */ - - /* Aquire spinlock before proceeding with the routine. - * Refer _xt_coproc_exc for details on the puspose of - * the _xt_coproc_owner_sa_lock lock and its intended use. - */ -.L_spinlock_loop: - mov a8, a3 /* Save a copy of the core ID in a8 */ - movi a10, _xt_coproc_owner_sa_lock /* a10 = base address of lock variable */ - addx4 a10, a8, a10 /* Use core ID in a8 to calculate the offset to the lock variable for the core */ - movi a11, 0 /* a11 = 0 */ - wsr a11, scompare1 /* scompare1 = a11 :- Expect the spinlock to be free (value = 0) */ - movi a11, 1 /* a11 = 1 :- Write 1 to take the spinlock */ - s32c1i a11, a10, 0 /* if (lock == scompare1) {tmp = lock; lock = a11; a11 = tmp} else {a11 = lock} */ - bnez a11, .L_spinlock_loop /* if (a11 != 0) {loop} :- Keep spinning until the spinlock is available */ - - movi a4, XCHAL_CP_MAX << 2 - mull a3, a3, a4 - movi a4, _xt_coproc_owner_sa /* a4 = base of owner array */ - add a4, a4, a3 - - addi a5, a4, XCHAL_CP_MAX << 2 /* a5 = top+1 of owner array */ - movi a6, 0 /* a6 = 0 (unowned) */ - -1: l32i a8, a4, 0 /* a8 = owner at a4 */ - bne a2, a8, 2f /* if (coproc_sa_base == owner) */ - s32i a6, a4, 0 /* owner = unowned */ -2: addi a4, a4, 1<<2 /* a4 = next entry in owner array */ - bltu a4, a5, 1b /* repeat until end of array */ - -3: wsr a7, PS /* restore interrupts */ - - /* Release spinlock */ - movi a11, 0 /* a11 = 0 */ - s32ri a11, a10, 0 /* a10 = base address of lock variable. Write 0 to release the lock */ + /* a3 = xTargetCoreID */ + + movi a4, XCHAL_CP_MAX << 2 /* a4 = size of an owner array */ + mull a4, a3, a4 /* a4 = offset to the owner array of the target core */ + movi a3, _xt_coproc_owner_sa /* a3 = base of all owner arrays */ + add a3, a3, a4 /* a3 = base of owner array of the target core */ + addi a4, a3, XCHAL_CP_MAX << 2 /* a4 = top+1 of owner array of the target core */ + movi a5, 0 /* a5 = 0 (unowned) */ + + rsil a6, XCHAL_EXCM_LEVEL /* lock interrupts */ +#if portNUM_PROCESSORS > 1 + /* If multicore, we must also acquire the _xt_coproc_owner_sa_lock spinlock + * to ensure thread safe access of _xt_coproc_owner_sa between cores. */ + spinlock_take a7 a8 _xt_coproc_owner_sa_lock +#endif /* portNUM_PROCESSORS > 1 */ + +1: l32i a7, a3, 0 /* a7 = owner at a3 */ + bne a2, a7, 2f /* if (coproc_sa_base == owner) */ + s32i a5, a3, 0 /* owner = unowned */ +2: addi a3, a3, 1<<2 /* a3 = next entry in owner array */ + bltu a3, a4, 1b /* repeat until end of array */ + +#if portNUM_PROCESSORS > 1 + /* Release previously taken spinlock */ + spinlock_release a7 a8 _xt_coproc_owner_sa_lock +#endif /* portNUM_PROCESSORS > 1 */ + wsr a6, PS /* restore interrupts */ RET0 diff --git a/components/freertos/FreeRTOS-Kernel-SMP/portable/xtensa/xtensa_vectors.S b/components/freertos/FreeRTOS-Kernel-SMP/portable/xtensa/xtensa_vectors.S index 9f96dafb496..61e1c799688 100644 --- a/components/freertos/FreeRTOS-Kernel-SMP/portable/xtensa/xtensa_vectors.S +++ b/components/freertos/FreeRTOS-Kernel-SMP/portable/xtensa/xtensa_vectors.S @@ -102,15 +102,8 @@ #include "esp_private/panic_reason.h" #include "sdkconfig.h" #include "soc/soc.h" +#include "xt_asm_utils.h" -/* - Define for workaround: pin no-cpu-affinity tasks to a cpu when fpu is used. - Please change this when the tcb structure is changed -*/ -.extern pxCurrentTCBs -#if ( configUSE_CORE_AFFINITY == 1 && configNUM_CORES > 1 ) -.extern offset_uxCoreAffinityMask -#endif // ( configUSE_CORE_AFFINITY == 1 && configNUM_CORES > 1 ) /* -------------------------------------------------------------------------------- @@ -859,22 +852,22 @@ _xt_coproc_owner_sa: /* Spinlock per core for accessing _xt_coproc_owner_sa array * * 0 = Spinlock available - * 1 = Spinlock taken + * PRID = Spinlock taken * * The lock provides mutual exclusion for accessing the _xt_coproc_owner_sa array. - * This array can be modified by both _xt_coproc_exc and _xt_coproc_release routines - * simultaneously owing to the fact that the FreeRTOS SMP Kernel allows cross-core - * task deletion. Therefore, the same memory location in the owner save-area array - * could be modified at the same time. + * The array can be modified by multiple cores simultaneously (via _xt_coproc_exc + * and _xt_coproc_release). Therefore, this spinlock is defined to ensure thread + * safe access of the _xt_coproc_owner_sa array. */ +#if portNUM_PROCESSORS > 1 .global _xt_coproc_owner_sa_lock .type _xt_coproc_owner_sa_lock,@object .align 16 /* minimize crossing cache boundaries */ _xt_coproc_owner_sa_lock: - .space (portNUM_PROCESSORS) << 2 + .space 4 +#endif /* portNUM_PROCESSORS > 1 */ .section .iram1,"ax" - .align 4 .L_goto_invalid: j .L_xt_coproc_invalid /* not in a thread (invalid) */ @@ -924,51 +917,15 @@ _xt_coproc_exc: s32i a4, sp, XT_STK_A4 s32i a15, sp, XT_STK_A15 - /* Aquire spinlock before proceeding with the exception handler. - * (Refer _xt_coproc_release for competing routine for the lock.) - * - * [refactor-todo]: The spinlock aquire/release routine can be - * refactored in to a macro later if the need arises to use it - * at more than one place in the port assembler files. - */ -.L_spinlock_loop: - movi a2, _xt_coproc_owner_sa_lock /* a2 = base address of lock variable */ - getcoreid a0 /* get the core ID in a0 to calculate the offset of the lock variable */ - addx4 a2, a0, a2 /* a2 = address of desired lock variable */ - movi a0, 0 /* a0 = 0 */ - wsr a0, scompare1 /* scompare1 = a0 :- Expect the spinlock to be free (value = 0) */ - movi a0, 1 /* a0 = 1 :- Write 1 to take the spinlock */ - s32c1i a0, a2, 0 /* if (lock == scompare1) {tmp = lock; lock = a0; a0 = tmp} else {a0 = lock} */ - bnez a0, .L_spinlock_loop /* if (a0 != 0) {loop} :- Keep spinning until the spinlock is available */ + /* Call the RTOS coprocessor exception hook */ + call0 XT_RTOS_CP_EXC_HOOK /* Get co-processor state save area of new owner thread. */ call0 XT_RTOS_CP_STATE /* a15 = new owner's save area */ - #if CONFIG_FREERTOS_FPU_IN_ISR - beqz a15, .L_skip_core_pin /* CP used in ISR, skip task pinning */ - #else + #if !CONFIG_FREERTOS_FPU_IN_ISR beqz a15, .L_goto_invalid /* not in a thread (invalid) */ #endif -#if ( XCHAL_CP_NUM > 0 && configUSE_CORE_AFFINITY == 1 && configNUM_CORES > 1 ) - /* CP operations are incompatible with unpinned tasks. Thus we pin the task - to the current running core. */ - movi a2, pxCurrentTCBs - getcoreid a3 /* a3 = current core ID */ - addx4 a2, a3, a2 - l32i a2, a2, 0 /* a2 = start of pxCurrentTCBs[cpuid] */ - movi a4, offset_uxCoreAffinityMask - l32i a4, a4, 0 /* a4 = offset_uxCoreAffinityMask */ - add a2, a2, a4 /* a2 = &TCB.uxCoreAffinityMask */ - ssl a3 /* Use core ID as shift amount */ - movi a4, 1 - sll a4, a4 /* a4 = uxCoreAffinityMask = (1 << core ID) */ - s32i a4, a2, 0 /* Store affinity mask to TCB.uxCoreAffinityMask */ -#endif // ( XCHAL_CP_NUM > 0 && configUSE_CORE_AFFINITY == 1 && configNUM_CORES > 1 ) - -#if CONFIG_FREERTOS_FPU_IN_ISR -.L_skip_core_pin: -#endif - /* Enable the co-processor's bit in CPENABLE. */ movi a0, _xt_coproc_mask rsr a4, CPENABLE /* a4 = CPENABLE */ @@ -978,17 +935,18 @@ _xt_coproc_exc: or a4, a4, a2 /* a4 = CPENABLE | (1 << n) */ wsr a4, CPENABLE -/* -Keep loading _xt_coproc_owner_sa[n] atomic (=load once, then use that value -everywhere): _xt_coproc_release assumes it works like this in order not to need -locking. -*/ - /* Grab correct xt_coproc_owner_sa for this core */ + /* Grab the xt_coproc_owner_sa owner array for current core */ getcoreid a3 /* a3 = current core ID */ - movi a2, XCHAL_CP_MAX << 2 - mull a2, a2, a3 /* multiply by current processor id */ - movi a3, _xt_coproc_owner_sa /* a3 = base of owner array */ - add a3, a3, a2 /* a3 = owner area needed for this processor */ + movi a2, XCHAL_CP_MAX << 2 /* a2 = size of an owner array */ + mull a2, a2, a3 /* a2 = offset to the owner array of the current core*/ + movi a3, _xt_coproc_owner_sa /* a3 = base of all owner arrays */ + add a3, a3, a2 /* a3 = base of owner array of the current core */ + +#if portNUM_PROCESSORS > 1 + /* If multicore, we must also acquire the _xt_coproc_owner_sa_lock spinlock + * to ensure thread safe access of _xt_coproc_owner_sa between cores. */ + spinlock_take a0 a2 _xt_coproc_owner_sa_lock +#endif /* portNUM_PROCESSORS > 1 */ /* Get old coprocessor owner thread (save area ptr) and assign new one. */ addx4 a3, a5, a3 /* a3 = &_xt_coproc_owner_sa[n] */ @@ -996,13 +954,21 @@ locking. s32i a15, a3, 0 /* _xt_coproc_owner_sa[n] = new */ rsync /* ensure wsr.CPENABLE is complete */ +#if portNUM_PROCESSORS > 1 + /* Release previously taken spinlock */ + spinlock_release a0 a2 _xt_coproc_owner_sa_lock +#endif /* portNUM_PROCESSORS > 1 */ + /* Only need to context switch if new owner != old owner. */ /* If float is necessary on ISR, we need to remove this check */ /* below, because on restoring from ISR we may have new == old condition used * to force cp restore to next thread + * Todo: IDF-6418 */ - #ifndef CONFIG_FREERTOS_FPU_IN_ISR - beq a15, a2, .L_goto_done /* new owner == old, we're done */ + #if !CONFIG_FREERTOS_FPU_IN_ISR + bne a15, a2, .L_switch_context + j .L_goto_done /* new owner == old, we're done */ +.L_switch_context: #endif /* If no old owner then nothing to save. */ @@ -1072,14 +1038,6 @@ locking. /* Restore interruptee's saved registers. */ /* Can omit rsync for wsr.CPENABLE here because _xt_user_exit does it. */ .L_xt_coproc_done: - - /* Release spinlock */ - movi a2, _xt_coproc_owner_sa_lock /* a2 = base address of the lock variable */ - getcoreid a0 /* a0 = core ID to calculate the offset of the lock variable */ - addx4 a2, a0, a2 /* a2 = address of the lock variable */ - movi a0, 0 /* a0 = 0 */ - s32ri a0, a2, 0 /* a2 = a0 :- Write 0 to release the lock */ - l32i a15, sp, XT_STK_A15 l32i a5, sp, XT_STK_A5 l32i a4, sp, XT_STK_A4 @@ -1107,14 +1065,6 @@ locking. /* Co-processor exception occurred outside a thread (not supported). */ .L_xt_coproc_invalid: - - /* Release spinlock */ - movi a2, _xt_coproc_owner_sa_lock /* a2 = base address of the lock variable */ - getcoreid a0 /* a0 = core ID to calculate the offset of the lock variable */ - addx4 a2, a0, a2 /* a2 = address of the lock variable */ - movi a0, 0 /* a0 = 0 */ - s32ri a0, a2, 0 /* a2 = a0 :- Write 0 to release the lock */ - movi a0,PANIC_RSN_COPROCEXCEPTION wsr a0,EXCCAUSE call0 _xt_panic /* not in a thread (invalid) */ @@ -1735,7 +1685,7 @@ _Level6Vector: .global xt_nmi .align 4 _NMIExceptionVector: - wsr a0, EXCSAVE + XCHAL_NMILEVEL _ /* preserve a0 */ + wsr a0, EXCSAVE + XCHAL_NMILEVEL /* preserve a0 */ call0 xt_nmi /* load interrupt handler */ /* never returns here - call0 is used as a jump (see note at top) */ @@ -1856,9 +1806,9 @@ _xt_alloca_exc: wsr a2, PS /* update PS.OWB to new window base */ rsync - _bbci.l a4, 31, _WindowUnderflow4 + bbci.l a4, 31, _WindowUnderflow4 rotw -1 /* original a0 goes to a8 */ - _bbci.l a8, 30, _WindowUnderflow8 + bbci.l a8, 30, _WindowUnderflow8 rotw -1 j _WindowUnderflow12 From c318c89453b3140956682285bc46fa4884135728 Mon Sep 17 00:00:00 2001 From: Darian Leung Date: Wed, 14 Dec 2022 20:44:13 +0800 Subject: [PATCH 3/6] freertos(IDF): Remove dependency on portUSING_MPU_WRAPPERS This commit removes the dependency on portUSING_MPU_WRAPPERS on the Xtensa port of IDF FreeRTOS. This dependency was added due to a hack implemented in the upstream port that required the usage of the "xMPUSettings" member of the TCB. The "xMPUSettings" would be used as a pointer to the task's coprocessor save area on the stack, even though FreeRTOS MPU support was not available. The hack has now been removed, and the CPSA pointer is now calculated using a combination of constant offsets values and the pxEndOfStack member of the TCB. Note: This impelemtation was copied from the Xtensa port of Amazon SMP FreeRTOS. --- .../xtensa/include/freertos/portmacro.h | 39 +----- .../xtensa/include/freertos/xtensa_rtos.h | 9 ++ .../FreeRTOS-Kernel/portable/xtensa/port.c | 61 ++++----- .../FreeRTOS-Kernel/portable/xtensa/portasm.S | 124 ++++++++++++++---- .../portable/xtensa/xtensa_vectors.S | 32 +---- components/freertos/FreeRTOS-Kernel/tasks.c | 4 + .../include/freertos/FreeRTOSConfig.h | 2 +- components/freertos/linker.lf | 5 +- docs/doxygen-known-warnings.txt | 2 + docs/doxygen/Doxyfile | 1 - tools/test_idf_monitor/Makefile | 4 +- 11 files changed, 160 insertions(+), 123 deletions(-) diff --git a/components/freertos/FreeRTOS-Kernel/portable/xtensa/include/freertos/portmacro.h b/components/freertos/FreeRTOS-Kernel/portable/xtensa/include/freertos/portmacro.h index a08ae69d309..4dfa19fb628 100644 --- a/components/freertos/FreeRTOS-Kernel/portable/xtensa/include/freertos/portmacro.h +++ b/components/freertos/FreeRTOS-Kernel/portable/xtensa/include/freertos/portmacro.h @@ -642,48 +642,13 @@ FORCE_INLINE_ATTR BaseType_t xPortGetCoreID(void) /* ------------------------------------------------------ Misc --------------------------------------------------------- * - Miscellaneous porting macros * - These are not port of the FreeRTOS porting interface, but are used by other FreeRTOS dependent components - * - [refactor-todo] Remove dependency on MPU wrappers by modifying TCB * ------------------------------------------------------------------------------------------------------------------ */ // -------------------- Co-Processor ----------------------- -// When coprocessors are defined, we maintain a pointer to coprocessors area. -// We currently use a hack: redefine field xMPU_SETTINGS in TCB block as a structure that can hold: -// MPU wrappers, coprocessor area pointer, trace code structure, and more if needed. -// The field is normally used for memory protection. FreeRTOS should create another general purpose field. -typedef struct { #if XCHAL_CP_NUM > 0 - volatile StackType_t *coproc_area; // Pointer to coprocessor save area; MUST BE FIRST -#endif - -#if portUSING_MPU_WRAPPERS - // Define here mpu_settings, which is port dependent - int mpu_setting; // Just a dummy example here; MPU not ported to Xtensa yet -#endif -} xMPU_SETTINGS; - -// Main hack to use MPU_wrappers even when no MPU is defined (warning: mpu_setting should not be accessed; otherwise move this above xMPU_SETTINGS) -#if (XCHAL_CP_NUM > 0) && !portUSING_MPU_WRAPPERS // If MPU wrappers not used, we still need to allocate coproc area -#undef portUSING_MPU_WRAPPERS -#define portUSING_MPU_WRAPPERS 1 // Enable it to allocate coproc area -#define MPU_WRAPPERS_H // Override mpu_wrapper.h to disable unwanted code -#define PRIVILEGED_FUNCTION -#define PRIVILEGED_DATA -#endif - -void _xt_coproc_release(volatile void *coproc_sa_base); - -/* - * The structures and methods of manipulating the MPU are contained within the - * port layer. - * - * Fills the xMPUSettings structure with the memory region information - * contained in xRegions. - */ -#if( portUSING_MPU_WRAPPERS == 1 ) -struct xMEMORY_REGION; -void vPortStoreTaskMPUSettings( xMPU_SETTINGS *xMPUSettings, const struct xMEMORY_REGION *const xRegions, StackType_t *pxBottomOfStack, uint32_t usStackDepth ) PRIVILEGED_FUNCTION; -void vPortReleaseTaskMPUSettings( xMPU_SETTINGS *xMPUSettings ); +void vPortCleanUpCoprocArea(void *pvTCB); +#define portCLEAN_UP_COPROC(pvTCB) vPortCleanUpCoprocArea(pvTCB) #endif // -------------------- Heap Related ----------------------- diff --git a/components/freertos/FreeRTOS-Kernel/portable/xtensa/include/freertos/xtensa_rtos.h b/components/freertos/FreeRTOS-Kernel/portable/xtensa/include/freertos/xtensa_rtos.h index f2e2f99cccf..42399b5c68f 100644 --- a/components/freertos/FreeRTOS-Kernel/portable/xtensa/include/freertos/xtensa_rtos.h +++ b/components/freertos/FreeRTOS-Kernel/portable/xtensa/include/freertos/xtensa_rtos.h @@ -170,6 +170,15 @@ The implementation may use only a2-4, a15 (all other regs must be preserved). // void* XT_RTOS_CP_STATE(void) #define XT_RTOS_CP_STATE _frxt_task_coproc_state +/* +RTOS provided hook function that is called on every coprocessor exception. May +only be called from assembly code and by the 'call0' instruction. +The implementation may use only a2-4, a15 (all other regs must be preserved). +*/ +// void XT_RTOS_CP_EXC_HOOK(void) +#if XCHAL_CP_NUM > 0 +#define XT_RTOS_CP_EXC_HOOK _frxt_coproc_exc_hook +#endif /******************************************************************************* diff --git a/components/freertos/FreeRTOS-Kernel/portable/xtensa/port.c b/components/freertos/FreeRTOS-Kernel/portable/xtensa/port.c index dad25e9ed22..430308ac91e 100644 --- a/components/freertos/FreeRTOS-Kernel/portable/xtensa/port.c +++ b/components/freertos/FreeRTOS-Kernel/portable/xtensa/port.c @@ -81,9 +81,21 @@ _Static_assert(tskNO_AFFINITY == CONFIG_FREERTOS_NO_AFFINITY, "incorrect tskNO_A /* ---------------------------------------------------- Variables ------------------------------------------------------ - * + * - Various variables used to maintain the FreeRTOS port's state. Used from both port.c and various .S files + * - Constant offsets are used by assembly to jump to particular TCB members or a stack area (such as the CPSA). We use + * C constants instead of preprocessor macros due to assembly lacking "offsetof()". * ------------------------------------------------------------------------------------------------------------------ */ +#if XCHAL_CP_NUM > 0 +/* Offsets used to navigate to a task's CPSA on the stack */ +const DRAM_ATTR uint32_t offset_pxEndOfStack = offsetof(StaticTask_t, pxDummy8); +const DRAM_ATTR uint32_t offset_cpsa = XT_CP_SIZE; /* Offset to start of the CPSA area on the stack. See uxInitialiseStackCPSA(). */ +#if configNUM_CORES > 1 +/* Offset to TCB_t.xCoreID member. Used to pin unpinned tasks that use the FPU. */ +const DRAM_ATTR uint32_t offset_xCoreID = offsetof(StaticTask_t, xDummyCoreID); +#endif /* configNUM_CORES > 1 */ +#endif /* XCHAL_CP_NUM > 0 */ + volatile unsigned port_xSchedulerRunning[portNUM_PROCESSORS] = {0}; // Indicates whether scheduler is running on a per-core basis unsigned port_interruptNesting[portNUM_PROCESSORS] = {0}; // Interrupt nesting level. Increased/decreased in portasm.c, _frxt_int_enter/_frxt_int_exit BaseType_t port_uxCriticalNesting[portNUM_PROCESSORS] = {0}; @@ -389,16 +401,21 @@ FORCE_INLINE_ATTR UBaseType_t uxInitialiseStackFrame(UBaseType_t uxStackPointer, return uxStackPointer; } -#if portUSING_MPU_WRAPPERS -StackType_t *pxPortInitialiseStack(StackType_t *pxTopOfStack, TaskFunction_t pxCode, void *pvParameters, BaseType_t xRunPrivileged) +#if ( portHAS_STACK_OVERFLOW_CHECKING == 1 ) +StackType_t * pxPortInitialiseStack( StackType_t * pxTopOfStack, + StackType_t * pxEndOfStack, + TaskFunction_t pxCode, + void * pvParameters ) #else -StackType_t *pxPortInitialiseStack(StackType_t *pxTopOfStack, TaskFunction_t pxCode, void *pvParameters ) +StackType_t * pxPortInitialiseStack( StackType_t * pxTopOfStack, + TaskFunction_t pxCode, + void * pvParameters ) #endif { /* HIGH ADDRESS |---------------------------| <- pxTopOfStack on entry - | Coproc Save Area | + | Coproc Save Area | (CPSA MUST BE FIRST) | ------------------------- | | TLS Variables | | ------------------------- | <- Start of useable stack @@ -417,7 +434,7 @@ StackType_t *pxPortInitialiseStack(StackType_t *pxTopOfStack, TaskFunction_t pxC // Make sure the incoming stack pointer is aligned on 16 configASSERT((uxStackPointer & portBYTE_ALIGNMENT_MASK) == 0); #if XCHAL_CP_NUM > 0 - // Initialize the coprocessor save area + // Initialize the coprocessor save area. THIS MUST BE THE FIRST AREA due to access from _frxt_task_coproc_state() uxStackPointer = uxInitialiseStackCPSA(uxStackPointer); // Each allocated section on the stack must have a size aligned on 16 configASSERT((uxStackPointer & portBYTE_ALIGNMENT_MASK) == 0); @@ -583,33 +600,17 @@ void vPortSetStackWatchpoint( void *pxStackStart ) esp_cpu_set_watchpoint(STACK_WATCH_POINT_NUMBER, (char *)addr, 32, ESP_CPU_WATCHPOINT_STORE); } -/* ---------------------------------------------- Misc Implementations ------------------------------------------------- - * - * ------------------------------------------------------------------------------------------------------------------ */ - // -------------------- Co-Processor ----------------------- -/* - * Used to set coprocessor area in stack. Current hack is to reuse MPU pointer for coprocessor area. - */ -#if portUSING_MPU_WRAPPERS -void vPortStoreTaskMPUSettings( xMPU_SETTINGS *xMPUSettings, const struct xMEMORY_REGION *const xRegions, StackType_t *pxBottomOfStack, uint32_t usStackDepth ) -{ #if XCHAL_CP_NUM > 0 - xMPUSettings->coproc_area = ( StackType_t * ) ( ( uint32_t ) ( pxBottomOfStack + usStackDepth - 1 )); - xMPUSettings->coproc_area = ( StackType_t * ) ( ( ( portPOINTER_SIZE_TYPE ) xMPUSettings->coproc_area ) & ( ~( ( portPOINTER_SIZE_TYPE ) portBYTE_ALIGNMENT_MASK ) ) ); - xMPUSettings->coproc_area = ( StackType_t * ) ( ( ( uint32_t ) xMPUSettings->coproc_area - XT_CP_SIZE ) & ~0xf ); - - - /* NOTE: we cannot initialize the coprocessor save area here because FreeRTOS is going to - * clear the stack area after we return. This is done in pxPortInitialiseStack(). - */ -#endif -} +void _xt_coproc_release(volatile void *coproc_sa_base); -void vPortReleaseTaskMPUSettings( xMPU_SETTINGS *xMPUSettings ) +void vPortCleanUpCoprocArea(void *pvTCB) { - /* If task has live floating point registers somewhere, release them */ - _xt_coproc_release( xMPUSettings->coproc_area ); + /* Get a pointer to the task's coprocessor save area */ + UBaseType_t uxCoprocArea; + uxCoprocArea = ( UBaseType_t ) ( ( ( StaticTask_t * ) pvTCB )->pxDummy8 ); /* Get TCB_t.pxEndOfStack */ + uxCoprocArea = STACKPTR_ALIGN_DOWN(16, uxCoprocArea - XT_CP_SIZE); + _xt_coproc_release( ( void * ) uxCoprocArea ); } -#endif /* portUSING_MPU_WRAPPERS */ +#endif /* XCHAL_CP_NUM > 0 */ diff --git a/components/freertos/FreeRTOS-Kernel/portable/xtensa/portasm.S b/components/freertos/FreeRTOS-Kernel/portable/xtensa/portasm.S index 303db0171f8..b7fe0f36a7c 100644 --- a/components/freertos/FreeRTOS-Kernel/portable/xtensa/portasm.S +++ b/components/freertos/FreeRTOS-Kernel/portable/xtensa/portasm.S @@ -32,9 +32,47 @@ #include "sdkconfig.h" #define TOPOFSTACK_OFFS 0x00 /* StackType_t *pxTopOfStack */ -#define CP_TOPOFSTACK_OFFS 0x04 /* xMPU_SETTINGS.coproc_area */ .extern pxCurrentTCB +#if XCHAL_CP_NUM > 0 +/* Offsets used to get a task's coprocessor save area (CPSA) from its TCB */ +.extern offset_pxEndOfStack +.extern offset_cpsa +#if configNUM_CORES > 1 +/* Offset to TCB_t.xCoreID member. Used to pin unpinned tasks that use the FPU. */ +.extern offset_xCoreID +#endif /* configNUM_CORES > 1 */ +#endif /* XCHAL_CP_NUM > 0 */ + +/* +-------------------------------------------------------------------------------- + Macro get_cpsa_from_tcb - get the pointer to a task's CPSA form its TCB + + Entry: + - "reg_A" contains a pointer to the task's TCB + Exit: + - "reg_A" contains pointer the the task's CPSA + - "reg_B" clobbered + + The two arguments must be different AR registers. +-------------------------------------------------------------------------------- +*/ +#if XCHAL_CP_NUM > 0 + .macro get_cpsa_from_tcb reg_A reg_B + /* Get TCB.pxEndOfStack from reg_A */ + movi \reg_B, offset_pxEndOfStack /* Move &offset_pxEndOfStack into reg_B */ + l32i \reg_B, \reg_B, 0 /* Load offset_pxEndOfStack into reg_B */ + add \reg_A, \reg_A, \reg_B /* Calculate &pxEndOfStack to reg_A (&TCB + offset_pxEndOfStack) */ + l32i \reg_A, \reg_A, 0 /* Load TCB.pxEndOfStack into reg_A */ + /* Offset to start of CP save area */ + movi \reg_B, offset_cpsa /* Move &offset_cpsa into reg_B */ + l32i \reg_B, \reg_B, 0 /* Load offset_cpsa into reg_B */ + sub \reg_A, \reg_A, \reg_B /* Subtract offset_cpsa from pxEndOfStack to get to start of CP save area (unaligned) */ + /* Align down start of CP save area to 16 byte boundary */ + movi \reg_B, ~(0xF) + and \reg_A, \reg_A, \reg_B /* Align CP save area pointer to 16 bytes */ + .endm +#endif /* XCHAL_CP_NUM > 0 */ .global port_IntStack .global port_switch_flag //Required by sysview_tracing build @@ -120,23 +158,19 @@ _frxt_int_enter: mull a2, a4, a2 add a1, a1, a2 /* for current proc */ - #ifdef CONFIG_FREERTOS_FPU_IN_ISR - #if XCHAL_CP_NUM > 0 + #if CONFIG_FREERTOS_FPU_IN_ISR && XCHAL_CP_NUM > 0 rsr a3, CPENABLE /* Restore thread scope CPENABLE */ addi sp, sp,-4 /* ISR will manage FPU coprocessor by forcing */ s32i a3, a1, 0 /* its trigger */ #endif - #endif .Lnested: 1: - #ifdef CONFIG_FREERTOS_FPU_IN_ISR - #if XCHAL_CP_NUM > 0 + #if CONFIG_FREERTOS_FPU_IN_ISR && XCHAL_CP_NUM > 0 movi a3, 0 /* whilst ISRs pending keep CPENABLE exception active */ wsr a3, CPENABLE rsync #endif - #endif mov a0, a12 /* restore return addr and return */ ret @@ -174,14 +208,12 @@ _frxt_int_exit: s32i a2, a3, 0 /* save nesting count */ bnez a2, .Lnesting /* !=0 after decr so still nested */ - #ifdef CONFIG_FREERTOS_FPU_IN_ISR - #if XCHAL_CP_NUM > 0 + #if CONFIG_FREERTOS_FPU_IN_ISR && XCHAL_CP_NUM > 0 l32i a3, sp, 0 /* Grab last CPENABLE before leave ISR */ addi sp, sp, 4 wsr a3, CPENABLE rsync /* ensure CPENABLE was modified */ #endif - #endif movi a2, pxCurrentTCB addx4 a2, a4, a2 @@ -460,11 +492,11 @@ _frxt_dispatch: #if XCHAL_CP_NUM > 0 /* Restore CPENABLE from task's co-processor save area. */ - movi a3, pxCurrentTCB /* cp_state = */ - getcoreid a2 - addx4 a3, a2, a3 - l32i a3, a3, 0 - l32i a2, a3, CP_TOPOFSTACK_OFFS /* StackType_t *pxStack; */ + movi a2, pxCurrentTCB /* cp_state = */ + getcoreid a3 + addx4 a2, a3, a2 + l32i a2, a2, 0 + get_cpsa_from_tcb a2, a3 /* After this, pointer to CP save area is in a2, a3 is destroyed */ l16ui a3, a2, XT_CPENABLE /* CPENABLE = cp_state->cpenable; */ wsr a3, CPENABLE #endif @@ -563,7 +595,7 @@ vPortYield: #if XCHAL_CP_NUM > 0 /* Clear CPENABLE, also in task's co-processor state save area. */ - l32i a2, a2, CP_TOPOFSTACK_OFFS /* a2 = pxCurrentTCB->cp_state */ + get_cpsa_from_tcb a2, a3 /* After this, pointer to CP save area is in a2, a3 is destroyed */ movi a3, 0 wsr a3, CPENABLE beqz a2, 1f @@ -604,12 +636,12 @@ vPortYieldFromInt: #if XCHAL_CP_NUM > 0 /* Save CPENABLE in task's co-processor save area, and clear CPENABLE. */ - movi a3, pxCurrentTCB /* cp_state = */ - getcoreid a2 - addx4 a3, a2, a3 - l32i a3, a3, 0 + movi a2, pxCurrentTCB /* cp_state = */ + getcoreid a3 + addx4 a2, a3, a2 + l32i a2, a2, 0 - l32i a2, a3, CP_TOPOFSTACK_OFFS + get_cpsa_from_tcb a2, a3 /* After this, pointer to CP save area is in a2, a3 is destroyed */ rsr a3, CPENABLE s16i a3, a2, XT_CPENABLE /* cp_state->cpenable = CPENABLE; */ @@ -663,10 +695,58 @@ _frxt_task_coproc_state: l32i a15, a15, 0 /* && pxCurrentTCB != 0) { */ beqz a15, 2f - l32i a15, a15, CP_TOPOFSTACK_OFFS + get_cpsa_from_tcb a15, a3 /* After this, pointer to CP save area is in a15, a3 is destroyed */ ret 1: movi a15, 0 2: ret #endif /* XCHAL_CP_NUM > 0 */ + +/* +********************************************************************************************************** +* _frxt_coproc_exc_hook +* void _frxt_coproc_exc_hook(void) +* +* Implements the Xtensa RTOS porting layer's XT_RTOS_CP_EXC_HOOK function for FreeRTOS. +* +* May only be called from assembly code by the 'call0' instruction. Does NOT obey ABI conventions. +* May only only use a2-4, a15 (all other regs must be preserved). +* See the detailed description of the XT_RTOS_ENTER macro in xtensa_rtos.h. +* +********************************************************************************************************** +*/ +#if XCHAL_CP_NUM > 0 + + .globl _frxt_coproc_exc_hook + .type _frxt_coproc_exc_hook,@function + .align 4 +_frxt_coproc_exc_hook: + + #if configNUM_CORES > 1 + getcoreid a2 /* a2 = xCurCoreID */ + /* if (port_xSchedulerRunning[xCurCoreID] == 0) */ + movi a3, port_xSchedulerRunning + addx4 a3, a2, a3 + l32i a3, a3, 0 + beqz a3, 1f /* Scheduler hasn't started yet. Return. */ + /* if (port_interruptNesting[xCurCoreID] != 0) */ + movi a3, port_interruptNesting + addx4 a3, a2, a3 + l32i a3, a3, 0 + bnez a3, 1f /* We are in an interrupt. Return*/ + /* CP operations are incompatible with unpinned tasks. Thus we pin the task + to the current running core. */ + movi a3, pxCurrentTCB + addx4 a3, a2, a3 + l32i a3, a3, 0 /* a3 = pxCurrentTCB[xCurCoreID] */ + movi a4, offset_xCoreID + l32i a4, a4, 0 /* a4 = offset_xCoreID */ + add a3, a3, a4 /* a3 = &TCB.xCoreID */ + s32i a2, a3, 0 /* TCB.xCoreID = xCurCoreID */ +1: + #endif /* configNUM_CORES > 1 */ + + ret + +#endif /* XCHAL_CP_NUM > 0 */ diff --git a/components/freertos/FreeRTOS-Kernel/portable/xtensa/xtensa_vectors.S b/components/freertos/FreeRTOS-Kernel/portable/xtensa/xtensa_vectors.S index 85657163fa3..a6571c6113d 100644 --- a/components/freertos/FreeRTOS-Kernel/portable/xtensa/xtensa_vectors.S +++ b/components/freertos/FreeRTOS-Kernel/portable/xtensa/xtensa_vectors.S @@ -103,12 +103,6 @@ #include "sdkconfig.h" #include "soc/soc.h" -/* - Define for workaround: pin no-cpu-affinity tasks to a cpu when fpu is used. - Please change this when the tcb structure is changed -*/ -#define TASKTCB_XCOREID_OFFSET (0x38+configMAX_TASK_NAME_LEN+3)&~3 -.extern pxCurrentTCB /* -------------------------------------------------------------------------------- @@ -907,29 +901,15 @@ _xt_coproc_exc: s32i a4, sp, XT_STK_A4 s32i a15, sp, XT_STK_A15 + /* Call the RTOS coprocessor exception hook */ + call0 XT_RTOS_CP_EXC_HOOK + /* Get co-processor state save area of new owner thread. */ call0 XT_RTOS_CP_STATE /* a15 = new owner's save area */ - #if CONFIG_FREERTOS_FPU_IN_ISR - beqz a15, .L_skip_core_pin /* CP used in ISR, skip task pinning */ - #else + #if !CONFIG_FREERTOS_FPU_IN_ISR beqz a15, .L_goto_invalid /* not in a thread (invalid) */ #endif -#if configNUM_CORES > 1 - /* CP operations are incompatible with unpinned tasks. Thus we pin the task - to the current running core. */ - movi a2, pxCurrentTCB - getcoreid a3 /* a3 = current core ID */ - addx4 a2, a3, a2 - l32i a2, a2, 0 /* a2 = start of pxCurrentTCB[cpuid] */ - addi a2, a2, TASKTCB_XCOREID_OFFSET /* a2 = &TCB.xCoreID */ - s32i a3, a2, 0 /* TCB.xCoreID = current core ID */ -#endif // configNUM_CORES > 1 - -#if CONFIG_FREERTOS_FPU_IN_ISR -.L_skip_core_pin: -#endif - /* Enable the co-processor's bit in CPENABLE. */ movi a0, _xt_coproc_mask rsr a4, CPENABLE /* a4 = CPENABLE */ @@ -945,6 +925,7 @@ everywhere): _xt_coproc_release assumes it works like this in order not to need locking. */ /* Grab correct xt_coproc_owner_sa for this core */ + getcoreid a3 /* a3 = current core ID */ movi a2, XCHAL_CP_MAX << 2 mull a2, a2, a3 /* multiply by current processor id */ movi a3, _xt_coproc_owner_sa /* a3 = base of owner array */ @@ -960,8 +941,9 @@ locking. /* If float is necessary on ISR, we need to remove this check */ /* below, because on restoring from ISR we may have new == old condition used * to force cp restore to next thread + * Todo: IDF-6418 */ - #ifndef CONFIG_FREERTOS_FPU_IN_ISR + #if !CONFIG_FREERTOS_FPU_IN_ISR bne a15, a2, .L_switch_context j .L_goto_done /* new owner == old, we're done */ .L_switch_context: diff --git a/components/freertos/FreeRTOS-Kernel/tasks.c b/components/freertos/FreeRTOS-Kernel/tasks.c index 590d4858db0..d4a1b48e223 100644 --- a/components/freertos/FreeRTOS-Kernel/tasks.c +++ b/components/freertos/FreeRTOS-Kernel/tasks.c @@ -4848,6 +4848,10 @@ BaseType_t xTaskGetAffinity( TaskHandle_t xTask ) vPortReleaseTaskMPUSettings( &( pxTCB->xMPUSettings ) ); #endif + #ifdef portCLEAN_UP_COPROC + portCLEAN_UP_COPROC( ( void * ) pxTCB ); + #endif + #if ( ( configSUPPORT_DYNAMIC_ALLOCATION == 1 ) && ( configSUPPORT_STATIC_ALLOCATION == 0 ) && ( portUSING_MPU_WRAPPERS == 0 ) ) { /* The task can only have been allocated dynamically - free both diff --git a/components/freertos/esp_additions/include/freertos/FreeRTOSConfig.h b/components/freertos/esp_additions/include/freertos/FreeRTOSConfig.h index 4239d4bb1eb..d6582c4b36c 100644 --- a/components/freertos/esp_additions/include/freertos/FreeRTOSConfig.h +++ b/components/freertos/esp_additions/include/freertos/FreeRTOSConfig.h @@ -171,7 +171,7 @@ This file get's pulled into assembly sources. Therefore, some includes need to b #elif CONFIG_FREERTOS_CHECK_STACKOVERFLOW_CANARY #define configCHECK_FOR_STACK_OVERFLOW 2 #endif -#define configRECORD_STACK_HIGH_ADDRESS 1 +#define configRECORD_STACK_HIGH_ADDRESS 1 // This must be set as the port requires TCB.pxEndOfStack // ------------------- Run-time Stats ---------------------- diff --git a/components/freertos/linker.lf b/components/freertos/linker.lf index 294e301f4c3..9d63d868b41 100644 --- a/components/freertos/linker.lf +++ b/components/freertos/linker.lf @@ -13,10 +13,7 @@ entries: port: pxPortInitialiseStack (default) port: xPortStartScheduler (default) if IDF_TARGET_ESP32 = y || IDF_TARGET_ESP32S3 = y : - port: vPortReleaseTaskMPUSettings (default) - tasks: xTaskCreateRestricted (default) - port: vPortStoreTaskMPUSettings (default) - tasks: vTaskAllocateMPURegions (default) + port: vPortCleanUpCoprocArea (default) tasks: prvTaskCheckFreeStackSpace (default) tasks: prvInitialiseNewTask (default) tasks: prvInitialiseTaskLists (default) diff --git a/docs/doxygen-known-warnings.txt b/docs/doxygen-known-warnings.txt index 68658e34590..24789964de5 100644 --- a/docs/doxygen-known-warnings.txt +++ b/docs/doxygen-known-warnings.txt @@ -1,3 +1,5 @@ semphr.h:line: warning: argument 'pxStaticSemaphore' of command @param is not found in the argument list of xSemaphoreCreateCounting(uxMaxCount, uxInitialCount) task.h:line: warning: argument 'pxTaskDefinition' of command @param is not found in the argument list of vTaskAllocateMPURegions(TaskHandle_t xTask, const MemoryRegion_t *const pxRegions) task.h:line: warning: argument 'pxCreatedTask' of command @param is not found in the argument list of vTaskAllocateMPURegions(TaskHandle_t xTask, const MemoryRegion_t *const pxRegions) +task.h:line: warning: argument 'pxTaskDefinition' of command @param is not found in the argument list of vTaskAllocateMPURegions(TaskHandle_t xTask, const MemoryRegion_t *const pxRegions) +task.h:line: warning: argument 'pxCreatedTask' of command @param is not found in the argument list of vTaskAllocateMPURegions(TaskHandle_t xTask, const MemoryRegion_t *const pxRegions) diff --git a/docs/doxygen/Doxyfile b/docs/doxygen/Doxyfile index d7a47864e0c..74140455f4f 100644 --- a/docs/doxygen/Doxyfile +++ b/docs/doxygen/Doxyfile @@ -303,7 +303,6 @@ PREDEFINED = \ configNUM_THREAD_LOCAL_STORAGE_POINTERS=1 \ configUSE_APPLICATION_TASK_TAG=1 \ configTASKLIST_INCLUDE_COREID=1 \ - portUSING_MPU_WRAPPERS=1 \ PRIVILEGED_FUNCTION= \ "ESP_EVENT_DECLARE_BASE(x)=extern esp_event_base_t x" diff --git a/tools/test_idf_monitor/Makefile b/tools/test_idf_monitor/Makefile index 14434123c42..ec455486df6 100644 --- a/tools/test_idf_monitor/Makefile +++ b/tools/test_idf_monitor/Makefile @@ -7,9 +7,7 @@ PREFIX_RISCV ?= riscv32-esp-elf- PROG_XTENSA := dummy_xtensa.elf PROG_RISCV := dummy_riscv.elf -# This actually depends on the value of portUSING_MPU_WRAPPERS. -# I.e. ESP32-S2 would also have TASK_NAME_OFFSET=52 since portUSING_MPU_WRAPPERS is 0. -CPPFLAGS_XTENSA := -DTASK_NAME_OFFSET=56 +CPPFLAGS_XTENSA := -DTASK_NAME_OFFSET=52 CPPFLAGS_RISCV := -DTASK_NAME_OFFSET=52 all: $(PROG_XTENSA) $(PROG_RISCV) From 45badf864fa9ac9106b65a8c24b3964b8f0eb3b5 Mon Sep 17 00:00:00 2001 From: Darian Leung Date: Wed, 14 Dec 2022 21:07:48 +0800 Subject: [PATCH 4/6] freertos(IDF): Allow cross-core freeing of task memory when deleting tasks Previously, IDF FreeRTOS would restrict the clean up of task memory (done by vTaskDelete() or the Idle task) to only tasks pinned to the current core or unpinned tasks. This was due to the need to clear the task's coprocessor ownership on the other core (i.e., "_xt_coproc_owner_sa"). But this restriction can be lifted by simply protecting access of "_xt_coproc_owner_sa" with a spinlock. This commit implements a "_xt_coproc_owner_sa_lock" to protect the access of "_xt_coproc_owner_sa", thus vTaskDelete() and prvDeleteTCB() can now delete tasks pinned to the other core so long as that task is not currently running. Note: This fix was copied from the Xtensa port of Amazon SMP FreeRTOS --- .../FreeRTOS-Kernel/portable/xtensa/port.c | 13 ++- .../portable/xtensa/xt_asm_utils.h | 64 ++++++++++- .../portable/xtensa/xtensa_context.S | 31 ++++-- .../portable/xtensa/xtensa_vectors.S | 47 ++++++-- components/freertos/FreeRTOS-Kernel/tasks.c | 104 ++++++++---------- 5 files changed, 175 insertions(+), 84 deletions(-) diff --git a/components/freertos/FreeRTOS-Kernel/portable/xtensa/port.c b/components/freertos/FreeRTOS-Kernel/portable/xtensa/port.c index 430308ac91e..b32af0935dc 100644 --- a/components/freertos/FreeRTOS-Kernel/portable/xtensa/port.c +++ b/components/freertos/FreeRTOS-Kernel/portable/xtensa/port.c @@ -603,14 +603,21 @@ void vPortSetStackWatchpoint( void *pxStackStart ) // -------------------- Co-Processor ----------------------- #if XCHAL_CP_NUM > 0 -void _xt_coproc_release(volatile void *coproc_sa_base); +void _xt_coproc_release(volatile void *coproc_sa_base, BaseType_t xTargetCoreID); void vPortCleanUpCoprocArea(void *pvTCB) { - /* Get a pointer to the task's coprocessor save area */ UBaseType_t uxCoprocArea; + BaseType_t xTargetCoreID; + + /* Get a pointer to the task's coprocessor save area */ uxCoprocArea = ( UBaseType_t ) ( ( ( StaticTask_t * ) pvTCB )->pxDummy8 ); /* Get TCB_t.pxEndOfStack */ uxCoprocArea = STACKPTR_ALIGN_DOWN(16, uxCoprocArea - XT_CP_SIZE); - _xt_coproc_release( ( void * ) uxCoprocArea ); + + /* Get xTargetCoreID from the TCB.xCoreID */ + xTargetCoreID = ( ( StaticTask_t * ) pvTCB )->xDummyCoreID; + + /* If task has live floating point registers somewhere, release them */ + _xt_coproc_release( (void *)uxCoprocArea, xTargetCoreID ); } #endif /* XCHAL_CP_NUM > 0 */ diff --git a/components/freertos/FreeRTOS-Kernel/portable/xtensa/xt_asm_utils.h b/components/freertos/FreeRTOS-Kernel/portable/xtensa/xt_asm_utils.h index d4f25b79f7e..42a0af6db2b 100644 --- a/components/freertos/FreeRTOS-Kernel/portable/xtensa/xt_asm_utils.h +++ b/components/freertos/FreeRTOS-Kernel/portable/xtensa/xt_asm_utils.h @@ -72,4 +72,66 @@ #endif .endm -#endif +/* +-------------------------------------------------------------------------------- + Macro spinlock_take + + This macro will repeatedley attempt to atomically set a spinlock variable + using the s32c1i instruciton. A spinlock is considered free if its value is 0. + + Entry: + - "reg_A/B" as scratch registers + - "lock_var" spinlock variable's symbol + - Interrupts must already be disabled by caller + Exit: + - Spinlock set to current core's ID (PRID) + - "reg_A/B" clobbered +-------------------------------------------------------------------------------- +*/ + +#if portNUM_PROCESSORS > 1 + + .macro spinlock_take reg_A reg_B lock_var + + movi \reg_A, \lock_var /* reg_A = &lock_var */ +.L_spinlock_loop: + movi \reg_B, 0 /* Load spinlock free value (0) into SCOMPARE1 */ + wsr \reg_B, SCOMPARE1 + rsync /* Ensure that SCOMPARE1 is set before s32c1i executes */ + rsr \reg_B, PRID /* Load the current core's ID into reg_B */ + s32c1i \reg_B, \reg_A, 0 /* Attempt *lock_var = reg_B */ + bnez \reg_B, .L_spinlock_loop /* If the write was successful (i.e., lock was free), 0 will have been written back to reg_B */ + + .endm + +#endif /* portNUM_PROCESSORS > 1 */ + +/* +-------------------------------------------------------------------------------- + Macro spinlock_release + + This macro will release a spinlock variable previously taken by the + spinlock_take macro. + + Entry: + - "reg_A/B" as scratch registers + - "lock_var" spinlock variable's symbol + - Interrupts must already be disabled by caller + Exit: + - "reg_A/B" clobbered +-------------------------------------------------------------------------------- +*/ + +#if portNUM_PROCESSORS > 1 + + .macro spinlock_release reg_A reg_B lock_var + + movi \reg_A, \lock_var /* reg_A = &lock_var */ + movi \reg_B, 0 + s32i \reg_B, \reg_A, 0 /* Release the spinlock (*reg_A = 0) */ + + .endm + +#endif /* portNUM_PROCESSORS > 1 */ + +#endif /* __XT_ASM_UTILS_H */ diff --git a/components/freertos/FreeRTOS-Kernel/portable/xtensa/xtensa_context.S b/components/freertos/FreeRTOS-Kernel/portable/xtensa/xtensa_context.S index df0b2324d2d..cde52ea40e5 100644 --- a/components/freertos/FreeRTOS-Kernel/portable/xtensa/xtensa_context.S +++ b/components/freertos/FreeRTOS-Kernel/portable/xtensa/xtensa_context.S @@ -397,17 +397,16 @@ May be called when a thread terminates or completes but does not delete the co-proc save area, to avoid the exception handler having to save the thread's co-proc state before another thread can use it (optimization). -Needs to be called on the processor the thread was running on. Unpinned threads -won't have an entry here because they get pinned as soon they use a coprocessor. - Entry Conditions: A2 = Pointer to base of co-processor state save area. + A3 = Core ID of the task (must be pinned) who's coproc ownership we are + releasing. Exit conditions: None. Obeys ABI conventions per prototype: - void _xt_coproc_release(void * coproc_sa_base) + void _xt_coproc_release(void * coproc_sa_base, BaseType_t xTargetCoreID) *******************************************************************************/ @@ -420,17 +419,21 @@ Obeys ABI conventions per prototype: .align 4 _xt_coproc_release: ENTRY0 /* a2 = base of save area */ + /* a3 = xTargetCoreID */ - getcoreid a5 - movi a3, XCHAL_CP_MAX << 2 - mull a5, a5, a3 - movi a3, _xt_coproc_owner_sa /* a3 = base of owner array */ - add a3, a3, a5 - - addi a4, a3, XCHAL_CP_MAX << 2 /* a4 = top+1 of owner array */ + movi a4, XCHAL_CP_MAX << 2 /* a4 = size of an owner array */ + mull a4, a3, a4 /* a4 = offset to the owner array of the target core */ + movi a3, _xt_coproc_owner_sa /* a3 = base of all owner arrays */ + add a3, a3, a4 /* a3 = base of owner array of the target core */ + addi a4, a3, XCHAL_CP_MAX << 2 /* a4 = top+1 of owner array of the target core */ movi a5, 0 /* a5 = 0 (unowned) */ rsil a6, XCHAL_EXCM_LEVEL /* lock interrupts */ +#if portNUM_PROCESSORS > 1 + /* If multicore, we must also acquire the _xt_coproc_owner_sa_lock spinlock + * to ensure thread safe access of _xt_coproc_owner_sa between cores. */ + spinlock_take a7 a8 _xt_coproc_owner_sa_lock +#endif /* portNUM_PROCESSORS > 1 */ 1: l32i a7, a3, 0 /* a7 = owner at a3 */ bne a2, a7, 2f /* if (coproc_sa_base == owner) */ @@ -438,7 +441,11 @@ _xt_coproc_release: 2: addi a3, a3, 1<<2 /* a3 = next entry in owner array */ bltu a3, a4, 1b /* repeat until end of array */ -3: wsr a6, PS /* restore interrupts */ +#if portNUM_PROCESSORS > 1 + /* Release previously taken spinlock */ + spinlock_release a7 a8 _xt_coproc_owner_sa_lock +#endif /* portNUM_PROCESSORS > 1 */ + wsr a6, PS /* restore interrupts */ RET0 diff --git a/components/freertos/FreeRTOS-Kernel/portable/xtensa/xtensa_vectors.S b/components/freertos/FreeRTOS-Kernel/portable/xtensa/xtensa_vectors.S index a6571c6113d..2b42dec014f 100644 --- a/components/freertos/FreeRTOS-Kernel/portable/xtensa/xtensa_vectors.S +++ b/components/freertos/FreeRTOS-Kernel/portable/xtensa/xtensa_vectors.S @@ -102,6 +102,7 @@ #include "esp_private/panic_reason.h" #include "sdkconfig.h" #include "soc/soc.h" +#include "xt_asm_utils.h" /* @@ -849,9 +850,25 @@ _xt_coproc_mask: _xt_coproc_owner_sa: .space (XCHAL_CP_MAX * portNUM_PROCESSORS) << 2 - .section .iram1,"ax" - +/* Spinlock per core for accessing _xt_coproc_owner_sa array + * + * 0 = Spinlock available + * PRID = Spinlock taken + * + * The lock provides mutual exclusion for accessing the _xt_coproc_owner_sa array. + * The array can be modified by multiple cores simultaneously (via _xt_coproc_exc + * and _xt_coproc_release). Therefore, this spinlock is defined to ensure thread + * safe access of the _xt_coproc_owner_sa array. + */ +#if portNUM_PROCESSORS > 1 + .global _xt_coproc_owner_sa_lock + .type _xt_coproc_owner_sa_lock,@object + .align 16 /* minimize crossing cache boundaries */ +_xt_coproc_owner_sa_lock: + .space 4 +#endif /* portNUM_PROCESSORS > 1 */ + .section .iram1,"ax" .align 4 .L_goto_invalid: j .L_xt_coproc_invalid /* not in a thread (invalid) */ @@ -919,17 +936,18 @@ _xt_coproc_exc: or a4, a4, a2 /* a4 = CPENABLE | (1 << n) */ wsr a4, CPENABLE -/* -Keep loading _xt_coproc_owner_sa[n] atomic (=load once, then use that value -everywhere): _xt_coproc_release assumes it works like this in order not to need -locking. -*/ - /* Grab correct xt_coproc_owner_sa for this core */ + /* Grab the xt_coproc_owner_sa owner array for current core */ getcoreid a3 /* a3 = current core ID */ - movi a2, XCHAL_CP_MAX << 2 - mull a2, a2, a3 /* multiply by current processor id */ - movi a3, _xt_coproc_owner_sa /* a3 = base of owner array */ - add a3, a3, a2 /* a3 = owner area needed for this processor */ + movi a2, XCHAL_CP_MAX << 2 /* a2 = size of an owner array */ + mull a2, a2, a3 /* a2 = offset to the owner array of the current core*/ + movi a3, _xt_coproc_owner_sa /* a3 = base of all owner arrays */ + add a3, a3, a2 /* a3 = base of owner array of the current core */ + +#if portNUM_PROCESSORS > 1 + /* If multicore, we must also acquire the _xt_coproc_owner_sa_lock spinlock + * to ensure thread safe access of _xt_coproc_owner_sa between cores. */ + spinlock_take a0 a2 _xt_coproc_owner_sa_lock +#endif /* portNUM_PROCESSORS > 1 */ /* Get old coprocessor owner thread (save area ptr) and assign new one. */ addx4 a3, a5, a3 /* a3 = &_xt_coproc_owner_sa[n] */ @@ -937,6 +955,11 @@ locking. s32i a15, a3, 0 /* _xt_coproc_owner_sa[n] = new */ rsync /* ensure wsr.CPENABLE is complete */ +#if portNUM_PROCESSORS > 1 + /* Release previously taken spinlock */ + spinlock_release a0 a2 _xt_coproc_owner_sa_lock +#endif /* portNUM_PROCESSORS > 1 */ + /* Only need to context switch if new owner != old owner. */ /* If float is necessary on ISR, we need to remove this check */ /* below, because on restoring from ISR we may have new == old condition used diff --git a/components/freertos/FreeRTOS-Kernel/tasks.c b/components/freertos/FreeRTOS-Kernel/tasks.c index d4a1b48e223..c86cc3b0d43 100644 --- a/components/freertos/FreeRTOS-Kernel/tasks.c +++ b/components/freertos/FreeRTOS-Kernel/tasks.c @@ -1415,18 +1415,8 @@ static void prvAddNewTaskToReadyList( TCB_t * pxNewTCB ) * not return. */ uxTaskNumber++; - /* - * We cannot immediately a task that is - * - Currently running on either core - * - If the task is not currently running but is pinned to the other (due to FPU cleanup) - * Todo: Allow deletion of tasks pinned to other core (IDF-5803) - */ - #if ( configNUM_CORES > 1 ) - xFreeNow = ( taskIS_CURRENTLY_RUNNING( pxTCB ) || ( pxTCB->xCoreID == !xCurCoreID ) ) ? pdFALSE : pdTRUE; - #else - xFreeNow = ( taskIS_CURRENTLY_RUNNING( pxTCB ) ) ? pdFALSE : pdTRUE; - #endif /* configNUM_CORES > 1 */ - + /* We cannot free the task immediately if it is currently running (on either core) */ + xFreeNow = ( taskIS_CURRENTLY_RUNNING( pxTCB ) ) ? pdFALSE : pdTRUE; if( xFreeNow == pdFALSE ) { /* A task is deleting itself. This cannot complete within the @@ -1455,7 +1445,7 @@ static void prvAddNewTaskToReadyList( TCB_t * pxNewTCB ) #if ( configNUM_CORES > 1 ) if( taskIS_CURRENTLY_RUNNING_ON_CORE( pxTCB, !xCurCoreID ) ) { - /* SMP case of deleting a task running on a different core. Same issue + /* SMP case of deleting a task currently running on a different core. Same issue * as a task deleting itself, but we need to send a yield to this task now * before we release xKernelLock. * @@ -4505,71 +4495,73 @@ static void prvInitialiseTaskLists( void ) } /*-----------------------------------------------------------*/ + static void prvCheckTasksWaitingTermination( void ) { /** THIS FUNCTION IS CALLED FROM THE RTOS IDLE TASK **/ #if ( INCLUDE_vTaskDelete == 1 ) { - BaseType_t xListIsEmpty; - BaseType_t core = xPortGetCoreID(); + TCB_t * pxTCB; - /* uxDeletedTasksWaitingCleanUp is used to prevent taskENTER_CRITICAL( &xKernelLock ) + /* uxDeletedTasksWaitingCleanUp is used to prevent taskENTER_CRITICAL() * being called too often in the idle task. */ while( uxDeletedTasksWaitingCleanUp > ( UBaseType_t ) 0U ) { - TCB_t * pxTCB = NULL; - - taskENTER_CRITICAL( &xKernelLock ); - { - xListIsEmpty = listLIST_IS_EMPTY( &xTasksWaitingTermination ); - - if( xListIsEmpty == pdFALSE ) + #if ( configNUM_CORES > 1 ) + pxTCB = NULL; + taskENTER_CRITICAL( &xKernelLock ); { - /* We only want to kill tasks that ran on this core because e.g. _xt_coproc_release needs to - * be called on the core the process is pinned on, if any */ - ListItem_t * target = listGET_HEAD_ENTRY( &xTasksWaitingTermination ); - - for( ; target != listGET_END_MARKER( &xTasksWaitingTermination ); target = listGET_NEXT( target ) ) /*Walk the list */ + /* List may have already been cleared by the other core. Check again */ + if ( listLIST_IS_EMPTY( &xTasksWaitingTermination ) == pdFALSE ) { - TCB_t * tgt_tcb = ( TCB_t * ) listGET_LIST_ITEM_OWNER( target ); - int affinity = tgt_tcb->xCoreID; - - /*Self deleting tasks are added to Termination List before they switch context. Ensure they aren't still currently running */ - if( ( pxCurrentTCB[ core ] == tgt_tcb ) || ( ( configNUM_CORES > 1 ) && ( pxCurrentTCB[ !core ] == tgt_tcb ) ) ) - { - continue; /*Can't free memory of task that is still running */ - } - - if( ( affinity == core ) || ( affinity == tskNO_AFFINITY ) ) /*Find first item not pinned to other core */ + /* We can't delete a task if it is still running on + * the other core. Keep walking the list until we + * find a task we can free, or until we walk the + * entire list. */ + ListItem_t *xEntry; + for ( xEntry = listGET_HEAD_ENTRY( &xTasksWaitingTermination ); xEntry != listGET_END_MARKER( &xTasksWaitingTermination ); xEntry = listGET_NEXT( xEntry ) ) { - pxTCB = tgt_tcb; - break; + if ( !taskIS_CURRENTLY_RUNNING( ( ( TCB_t * ) listGET_LIST_ITEM_OWNER( xEntry ) ) ) ) + { + pxTCB = ( TCB_t * ) listGET_LIST_ITEM_OWNER( xEntry ); + ( void ) uxListRemove( &( pxTCB->xStateListItem ) ); + --uxCurrentNumberOfTasks; + --uxDeletedTasksWaitingCleanUp; + break; + } } } + } + taskEXIT_CRITICAL( &xKernelLock ); - if( pxTCB != NULL ) - { - ( void ) uxListRemove( target ); /*Remove list item from list */ - --uxCurrentNumberOfTasks; - --uxDeletedTasksWaitingCleanUp; - } + if ( pxTCB != NULL ) + { + #if ( configNUM_THREAD_LOCAL_STORAGE_POINTERS > 0 ) && ( configTHREAD_LOCAL_STORAGE_DELETE_CALLBACKS == 1 ) + prvDeleteTLS( pxTCB ); + #endif + prvDeleteTCB( pxTCB ); } - } - taskEXIT_CRITICAL( &xKernelLock ); /*Need to call deletion callbacks outside critical section */ + else + { + /* No task found to delete, break out of loop */ + break; + } + #else + taskENTER_CRITICAL( &xKernelLock ); + { + pxTCB = listGET_OWNER_OF_HEAD_ENTRY( ( &xTasksWaitingTermination ) ); /*lint !e9079 void * is used as this macro is used with timers and co-routines too. Alignment is known to be fine as the type of the pointer stored and retrieved is the same. */ + ( void ) uxListRemove( &( pxTCB->xStateListItem ) ); + --uxCurrentNumberOfTasks; + --uxDeletedTasksWaitingCleanUp; + } + taskEXIT_CRITICAL( &xKernelLock ); - if( pxTCB != NULL ) /*Call deletion callbacks and free TCB memory */ - { #if ( configNUM_THREAD_LOCAL_STORAGE_POINTERS > 0 ) && ( configTHREAD_LOCAL_STORAGE_DELETE_CALLBACKS == 1 ) prvDeleteTLS( pxTCB ); #endif prvDeleteTCB( pxTCB ); - } - else - { - mtCOVERAGE_TEST_MARKER(); - break; /*No TCB found that could be freed by this core, break out of loop */ - } + #endif /* configNUM_CORES > 1 */ } } #endif /* INCLUDE_vTaskDelete */ From 82f28a3e09abd5e3c92f243cdaafa4ea208f8ea8 Mon Sep 17 00:00:00 2001 From: Darian Leung Date: Sat, 3 Dec 2022 00:13:31 +0800 Subject: [PATCH 5/6] freertos: Update FPU unit tests to run for multiple iterations This commit updates the FreeRTOS port FPU unit tests so that they run for multiple iterations, thus checking that a task's FPU context is properly cleaned up on deletion. --- .../freertos/port/test_fpu_in_task.c | 66 ++++++++++++------- 1 file changed, 41 insertions(+), 25 deletions(-) diff --git a/components/freertos/test_apps/freertos/port/test_fpu_in_task.c b/components/freertos/test_apps/freertos/port/test_fpu_in_task.c index 6c5c0657ee2..bc160b40adc 100644 --- a/components/freertos/test_apps/freertos/port/test_fpu_in_task.c +++ b/components/freertos/test_apps/freertos/port/test_fpu_in_task.c @@ -23,16 +23,21 @@ Test FPU usage from a task context Purpose: - Test that the FPU can be used from a task context - Test that FPU context is properly saved and restored + - Test that FPU context is cleaned up on task deletion by running multiple iterations Procedure: - Create TEST_PINNED_NUM_TASKS tasks pinned to each core - Start each task - Each task updates a float variable and then blocks (to allow other tasks to run thus forcing the an FPU context save and restore). + - Delete each task + - Repeat test for TEST_PINNED_NUM_ITERS iterations Expected: - Correct float value calculated by each task + - Each task cleans up its FPU context on deletion */ #define TEST_PINNED_NUM_TASKS 3 +#define TEST_PINNED_NUM_ITERS 5 static void pinned_task(void *arg) { @@ -62,35 +67,38 @@ TEST_CASE("FPU: Usage in task", "[freertos]") SemaphoreHandle_t done_sem = xSemaphoreCreateCounting(configNUM_CORES * TEST_PINNED_NUM_TASKS, 0); TEST_ASSERT_NOT_EQUAL(NULL, done_sem); - TaskHandle_t task_handles[configNUM_CORES][TEST_PINNED_NUM_TASKS]; + for (int iter = 0; iter < TEST_PINNED_NUM_ITERS; iter++) { + TaskHandle_t task_handles[configNUM_CORES][TEST_PINNED_NUM_TASKS]; - // Create test tasks for each core - for (int i = 0; i < configNUM_CORES; i++) { - for (int j = 0; j < TEST_PINNED_NUM_TASKS; j++) { - TEST_ASSERT_EQUAL(pdTRUE, xTaskCreatePinnedToCore(pinned_task, "task", 4096, (void *)done_sem, UNITY_FREERTOS_PRIORITY + 1, &task_handles[i][j], i)); + // Create test tasks for each core + for (int i = 0; i < configNUM_CORES; i++) { + for (int j = 0; j < TEST_PINNED_NUM_TASKS; j++) { + TEST_ASSERT_EQUAL(pdTRUE, xTaskCreatePinnedToCore(pinned_task, "task", 4096, (void *)done_sem, UNITY_FREERTOS_PRIORITY + 1, &task_handles[i][j], i)); + } } - } - // Start the created tasks simultaneously - for (int i = 0; i < configNUM_CORES; i++) { - for (int j = 0; j < TEST_PINNED_NUM_TASKS; j++) { - xTaskNotifyGive(task_handles[i][j]); + // Start the created tasks simultaneously + for (int i = 0; i < configNUM_CORES; i++) { + for (int j = 0; j < TEST_PINNED_NUM_TASKS; j++) { + xTaskNotifyGive(task_handles[i][j]); + } } - } - // Wait for the tasks to complete - for (int i = 0; i < configNUM_CORES * TEST_PINNED_NUM_TASKS; i++) { - xSemaphoreTake(done_sem, portMAX_DELAY); - } + // Wait for the tasks to complete + for (int i = 0; i < configNUM_CORES * TEST_PINNED_NUM_TASKS; i++) { + xSemaphoreTake(done_sem, portMAX_DELAY); + } - // Delete the tasks - for (int i = 0; i < configNUM_CORES; i++) { - for (int j = 0; j < TEST_PINNED_NUM_TASKS; j++) { - vTaskDelete(task_handles[i][j]); + // Delete the tasks + for (int i = 0; i < configNUM_CORES; i++) { + for (int j = 0; j < TEST_PINNED_NUM_TASKS; j++) { + vTaskDelete(task_handles[i][j]); + } } + + vTaskDelay(10); // Short delay to allow idle task to be free task memory and FPU contexts } - vTaskDelay(10); // Short delay to allow idle task to be free task memory and FPU contexts vSemaphoreDelete(done_sem); } @@ -101,18 +109,24 @@ Test FPU usage will pin an unpinned task Purpose: - Test that unpinned tasks are automatically pinned to the current core on the task's first use of the FPU + - Test that FPU context is cleaned up on task deletion by running multiple iterations Procedure: - Create an unpinned task - Task disables scheduling/preemption to ensure that it does not switch cores - Task uses the FPU - Task checks its core affinity after FPU usage + - Task deletes itself + - Repeat test for TEST_UNPINNED_NUM_ITERS iterations Expected: - Task remains unpinned until its first usage of the FPU - The task becomes pinned to the current core after first use of the FPU + - Each task cleans up its FPU context on deletion */ #if configNUM_CORES > 1 +#define TEST_UNPINNED_NUM_ITERS 5 + static void unpinned_task(void *arg) { // Disable scheduling/preemption to make sure current core ID doesn't change @@ -162,11 +176,13 @@ static void unpinned_task(void *arg) TEST_CASE("FPU: Usage in unpinned task", "[freertos]") { TaskHandle_t unity_task_handle = xTaskGetCurrentTaskHandle(); - // Create unpinned task - xTaskCreate(unpinned_task, "unpin", 4096, (void *)unity_task_handle, UNITY_FREERTOS_PRIORITY + 1, NULL); - // Wait for task to complete - ulTaskNotifyTake(pdTRUE, portMAX_DELAY); - vTaskDelay(10); // Short delay to allow task memory to be freed + for (int iter = 0; iter < TEST_UNPINNED_NUM_ITERS; iter++) { + // Create unpinned task + xTaskCreate(unpinned_task, "unpin", 4096, (void *)unity_task_handle, UNITY_FREERTOS_PRIORITY + 1, NULL); + // Wait for task to complete + ulTaskNotifyTake(pdTRUE, portMAX_DELAY); + vTaskDelay(10); // Short delay to allow task memory to be freed + } } #endif // configNUM_CORES > 1 From 92bbf8535053cd5155ebfcf6cbe6bad5f5b566ea Mon Sep 17 00:00:00 2001 From: Darian Leung Date: Tue, 13 Dec 2022 19:02:42 +0800 Subject: [PATCH 6/6] freertos: Fix clang-tidy warning on pxPortInitialiseStack() --- .../freertos/FreeRTOS-Kernel-SMP/portable/riscv/port.c | 5 +++++ .../freertos/FreeRTOS-Kernel-SMP/portable/xtensa/port.c | 5 +++++ components/freertos/FreeRTOS-Kernel/portable/riscv/port.c | 5 +++++ components/freertos/FreeRTOS-Kernel/portable/xtensa/port.c | 5 +++++ 4 files changed, 20 insertions(+) diff --git a/components/freertos/FreeRTOS-Kernel-SMP/portable/riscv/port.c b/components/freertos/FreeRTOS-Kernel-SMP/portable/riscv/port.c index 147edec4891..a0354a37991 100644 --- a/components/freertos/FreeRTOS-Kernel-SMP/portable/riscv/port.c +++ b/components/freertos/FreeRTOS-Kernel-SMP/portable/riscv/port.c @@ -537,6 +537,11 @@ FORCE_INLINE_ATTR UBaseType_t uxInitialiseStackFrame(UBaseType_t uxStackPointer, StackType_t *pxPortInitialiseStack(StackType_t *pxTopOfStack, TaskFunction_t pxCode, void *pvParameters) { +#ifdef __clang_analyzer__ + // Teach clang-tidy that pxTopOfStack cannot be a pointer to const + volatile StackType_t * pxTemp = pxTopOfStack; + pxTopOfStack = pxTemp; +#endif /*__clang_analyzer__ */ /* HIGH ADDRESS |---------------------------| <- pxTopOfStack on entry diff --git a/components/freertos/FreeRTOS-Kernel-SMP/portable/xtensa/port.c b/components/freertos/FreeRTOS-Kernel-SMP/portable/xtensa/port.c index 16ab7437e99..ec6fe289156 100644 --- a/components/freertos/FreeRTOS-Kernel-SMP/portable/xtensa/port.c +++ b/components/freertos/FreeRTOS-Kernel-SMP/portable/xtensa/port.c @@ -684,6 +684,11 @@ StackType_t * pxPortInitialiseStack( StackType_t * pxTopOfStack, void * pvParameters ) #endif { +#ifdef __clang_analyzer__ + // Teach clang-tidy that pxTopOfStack cannot be a pointer to const + volatile StackType_t * pxTemp = pxTopOfStack; + pxTopOfStack = pxTemp; +#endif /*__clang_analyzer__ */ /* HIGH ADDRESS |---------------------------| <- pxTopOfStack on entry diff --git a/components/freertos/FreeRTOS-Kernel/portable/riscv/port.c b/components/freertos/FreeRTOS-Kernel/portable/riscv/port.c index e86466beed2..1326334b8c2 100644 --- a/components/freertos/FreeRTOS-Kernel/portable/riscv/port.c +++ b/components/freertos/FreeRTOS-Kernel/portable/riscv/port.c @@ -274,6 +274,11 @@ FORCE_INLINE_ATTR UBaseType_t uxInitialiseStackFrame(UBaseType_t uxStackPointer, StackType_t *pxPortInitialiseStack(StackType_t *pxTopOfStack, TaskFunction_t pxCode, void *pvParameters) { +#ifdef __clang_analyzer__ + // Teach clang-tidy that pxTopOfStack cannot be a pointer to const + volatile StackType_t * pxTemp = pxTopOfStack; + pxTopOfStack = pxTemp; +#endif /*__clang_analyzer__ */ /* HIGH ADDRESS |---------------------------| <- pxTopOfStack on entry diff --git a/components/freertos/FreeRTOS-Kernel/portable/xtensa/port.c b/components/freertos/FreeRTOS-Kernel/portable/xtensa/port.c index b32af0935dc..7304ceaebd6 100644 --- a/components/freertos/FreeRTOS-Kernel/portable/xtensa/port.c +++ b/components/freertos/FreeRTOS-Kernel/portable/xtensa/port.c @@ -412,6 +412,11 @@ StackType_t * pxPortInitialiseStack( StackType_t * pxTopOfStack, void * pvParameters ) #endif { +#ifdef __clang_analyzer__ + // Teach clang-tidy that pxTopOfStack cannot be a pointer to const + volatile StackType_t * pxTemp = pxTopOfStack; + pxTopOfStack = pxTemp; +#endif /*__clang_analyzer__ */ /* HIGH ADDRESS |---------------------------| <- pxTopOfStack on entry