Skip to content

Commit

Permalink
[FPU] update how FPU registers are saved/loaded in the kernel (WiP)
Browse files Browse the repository at this point in the history
  • Loading branch information
Mikaël BRIDAY committed May 28, 2024
1 parent 91d8327 commit 917f5cc
Show file tree
Hide file tree
Showing 16 changed files with 95 additions and 227 deletions.
19 changes: 5 additions & 14 deletions goil/templates/code/cortex-m/handler_body.goilTemplate
Original file line number Diff line number Diff line change
Expand Up @@ -27,17 +27,11 @@ tpl_primary_irq_handler_% !handlerSource %:
*/
bl % !handlerIRQ %

/*-------------------------------------------------------------------------
* 3b - retrieve the link register from the kernel stack
* if there is no context switch (otherwise it will be overwritten).
*/
pop {r0}

/*-------------------------------------------------------------------------
* 4 - Check the context switch condition in tpl_kern
*/
ldr r2,=tpl_kern
ldrb r1,[r2,#TPL_KERN_OFFSET_NEED_SWITCH]
ldr r0,=tpl_kern
ldrb r1,[r0,#TPL_KERN_OFFSET_NEED_SWITCH]
cmp r1,#NO_NEED_SWITCH
beq tpl_% !handlerSource %_no_context_switch

Expand All @@ -46,7 +40,7 @@ tpl_primary_irq_handler_% !handlerSource %:
* Load in r0 the pointer to the static descriptor of the running task
* r0 already points to tpl_kern (from stage 4)
*/
ldr r0,[r2,#TPL_KERN_OFFSET_S_RUNNING]
ldr r0,[r0,#TPL_KERN_OFFSET_S_RUNNING]
bl tpl_save_context_under_it

/*-------------------------------------------------------------------------
Expand All @@ -70,11 +64,8 @@ tpl_primary_irq_handler_% !handlerSource %:
tpl_% !handlerSource %_no_context_switch:

/*-------------------------------------------------------------------------
* 8 - get lr from r0:
* * from step 3b if there is no context switch
* * from tpl_load_context_under_it in other case.
* 8 - pop LR that was saved at stage 1 into PC and return from the handler
*/
mov lr, r0
bx lr
pop {pc}

/*---------------------------------------------------------------------------*/
16 changes: 5 additions & 11 deletions goil/templates/code/cortex-m/tpl_primary_irq.goilTemplate
Original file line number Diff line number Diff line change
Expand Up @@ -132,17 +132,11 @@ end foreach
* | Return Address | <- PSP+24
* +------------------+
* | xPSR (bit 9 = 1) | <- PSP+28
* +------------------+---------------------\
* | s0 (FPU) | <- PSP+32 - 0x20 |
* +------------------+ |
* | .. | <- PSP+.. - |
* +----------------- + |
* | s15 (FPU) | <- PSP+92 - 0x5C |- only if FPU is available
* +------------------+ | and process is using FPU
* | FPSCR (FPU) | <- PSP+96 - 0x60 | (USEFLOAT = TRUE in .oil)
* +------------------+ |
* | reserved (align) | <- PSP+100- 0x64 |
* +------------------+---------------------/
* +------------------+
* If the FPU is enabled, the FPU context is not saved by hardware,
* as the kernel do not use FPU instructions. The FPU registers should
* be saved/restored in the load/save context functions
* (ArmV7 - Architecture Reference manual (DDI 0403E.e), sec. B.3.2.21)
*
*/
%
Expand Down
5 changes: 0 additions & 5 deletions machines/cortex-m/armv6m/tpl_ctx_switch.S
Original file line number Diff line number Diff line change
Expand Up @@ -144,9 +144,6 @@ tpl_save_context:
*
* r0 contains a pointer to the static descriptor of the running task.
* r1-r3 are working registers
*
* ** it SHOULD return LR in r0 **
*
* values to be loaded into r4 and r5 are put in the MSP.
*/

Expand Down Expand Up @@ -198,8 +195,6 @@ tpl_load_context:
ldr r2,[r1,#CTX_PSP]
msr psp,r2

/* set LR value in return argument (r0) */
ldr r0, =0xFFFFFFFD
bx lr

#define OS_STOP_SEC_CODE
Expand Down
7 changes: 1 addition & 6 deletions machines/cortex-m/armv6m/tpl_ctx_switch_under_it.S
Original file line number Diff line number Diff line change
Expand Up @@ -139,9 +139,6 @@ tpl_save_context_under_it:
*
* r0 contains a pointer to the static descriptor of the running task.
* r1-r3 are working registers
*
* ** it SHOULD return LR final value in r0 **
* LR should be 0xFFFFFFFD, as the context does not use any FPU
*/

.global tpl_load_context_under_it
Expand Down Expand Up @@ -181,9 +178,7 @@ tpl_load_context_under_it:
ldr r2,[r1,#CTX_PSP]
msr psp,r2

ldr r0, =0xFFFFFFFD

bx lr
bx lr

#define OS_STOP_SEC_CODE
#include "tpl_as_memmap.h"
15 changes: 15 additions & 0 deletions machines/cortex-m/armv7em/stm32h743/startup_stm32h743.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
#include <sys/types.h>
#include "stm32l4xx.h" //PFU related
#include "core_cm4.h" //FPU related

void __attribute__((weak)) tpl_continue_reset_handler(void);

Expand Down Expand Up @@ -119,6 +121,19 @@ void __attribute__ ((section(".after_vectors"))) tpl_continue_reset_handler(void
// Zero fill the bss segment
bss_init(&__bss_start__, &__bss_end__);

// start FPU
#if (__FPU_PRESENT == 1) && (__FPU_USED == 1) && (WITH_FLOAT==YES)
/* We do not stack any FP register automatically on interrupt
* This is managed by Trampoline manually if required.
*
* These 2 bits should be configured BEFORE enabling CP10/11
* ArmV7 - Architecture Reference manual (DDI 0403E.e), sec. B.3.2.21
*/
FPU->FPCCR &= ~(1 << FPU_FPCCR_ASPEN_Pos | 1 << FPU_FPCCR_LSPEN_Pos);
/* set CP10 and CP11 Full Access */
SCB->CPACR |= ((3UL << 10*2)|(3UL << 11*2));
#endif

// Call the standard library initialisation (mandatory, SystemInit()
// and C++ static constructors are called from here).
__libc_init_array();
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* @file tpl_machine_stm32l432.c
* @file tpl_machine_stm32h743.c
*
* @section descr File description
*
Expand Down
2 changes: 1 addition & 1 deletion machines/cortex-m/armv7em/stm32l432/handlers_stm32l432.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/**
* @section descr File description
*
* STM32F3 platform specific routines and variables
* STM32 platform specific routines and variables
*
* @section copyright Copyright
*
Expand Down
10 changes: 9 additions & 1 deletion machines/cortex-m/armv7em/stm32l432/startup_stm32l432.c
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,15 @@ void __attribute__ ((section(".after_vectors"))) tpl_continue_reset_handler(void

// start FPU
#if (__FPU_PRESENT == 1) && (__FPU_USED == 1) && (WITH_FLOAT==YES)
SCB->CPACR |= ((3UL << 10*2)|(3UL << 11*2)); /* set CP10 and CP11 Full Access */
/* We do not stack any FP register automatically on interrupt
* This is managed by Trampoline manually if required.
*
* These 2 bits should be configured BEFORE enabling CP10/11
* ArmV7 - Architecture Reference manual (DDI 0403E.e), sec. B.3.2.21
*/
FPU->FPCCR &= ~(1 << FPU_FPCCR_ASPEN_Pos | 1 << FPU_FPCCR_LSPEN_Pos);
/* set CP10 and CP11 Full Access */
SCB->CPACR |= ((3UL << 10*2)|(3UL << 11*2));
#endif

// Call the standard library initialisation (mandatory, SystemInit()
Expand Down
50 changes: 17 additions & 33 deletions machines/cortex-m/armv7em/tpl_ctx_switch.S
Original file line number Diff line number Diff line change
Expand Up @@ -52,17 +52,11 @@
* | Return Address (saved PC/R15) | <- PSP+24 *
* +-------------------------------+ *
* | xPSR (bit 9 = 1) | <- PSP+28 *
* +------------------------+---------------------\ *
* | s0 (FPU) | <- PSP+32 - 0x20 | *
* +------------------------+ | *
* | .. | <- PSP+.. - | *
* +------------------------+ | *
* | s15 (FPU) | <- PSP+92 - 0x5C |- only if FPU is available *
* +------------------------+ | and process is using FPU *
* | FPSCR (FPU) | <- PSP+96 - 0x60 | (USEFLOAT = TRUE in .oil)*
* +------------------------+ | *
* | reserved (align) | <- PSP+100- 0x64 | *
* +------------------------+---------------------/ *
* +-------------------------------+ *
* If the FPU is enabled, the FPU context is not saved by hardware, *
* as the kernel do not use FPU instructions. The FPU registers should *
* be saved/restored in the load/save context functions *
* (ArmV7 - Architecture Reference manual (DDI 0403E.e), sec. B.3.2.21) *
* *
* The second part is stored in a structure defined in tpl_machine_cortex.h *
* with a pointer from the static task descriptor *
Expand All @@ -85,6 +79,10 @@
* +------------------+ *
* | PSP (R13) | <- CTX_PSP (32) *
* +------------------+ *
* *
* if the FPU is enabled for the task (USEFLOAT = TRUE; in .oil), the FP *
* context (pointer from static task descriptor) contains all the FP *
* registers (32 SPR+FPSCR), see arm_float_context
*----------------------------------------------------------------------------*
*/

Expand Down Expand Up @@ -147,16 +145,13 @@ tpl_save_context:
ldr r1,[r0,#FLOAT_CONTEXT]
cmp r1, #0 /* r1 is NULL if there is no float context for this process */
beq no_save_fp
/* save all s16 to 31
* In Lazy Context Switch (LSPEN bit in FPU->FPCCR, enabled by default),
* s0-s15 may have not been pushed on the stack (PSP) yet. This
* FPU related instruction force the push (on PSP).
* So, this instruction leads to write 17 words on PSP and 16 words on r1!
*/
vstm r1!, {s16-s31}
/* save all s0 to s31 */
vstm r1!, {s0-s31}
/* save fpscr */
vmrs r0,fpscr
str r0,[r1]
no_save_fp:
#endif

bx lr

/*=============================================================================
Expand All @@ -165,9 +160,6 @@ no_save_fp:
*
* r0 contains a pointer to the static descriptor of the running task.
* r1-r3 are working registers
*
* ** it SHOULD return LR in r0 **
*
* values to be loaded into r4 and r5 are put in the MSP.
*/

Expand Down Expand Up @@ -219,19 +211,11 @@ tpl_load_context:
ldr r1,[r0,#FLOAT_CONTEXT]
cmp r1, #0 /* r1 is NULL if there is no float context for this process */
beq no_load_fp
vldm r1!, {s16-s31} /* load s[16..31] */
/* now update LR to use the FPU */
mov r0, #0xFFED /* low 16 bits of LR, FPU */
b end_fp
vldm r1!, {s0-s31} /* load s[0..31] */
ldr r0,[r1]
vmsr fpscr, r0 /* load fpscr */
no_load_fp:
ldr r0, =0xE000EF34 /* load FPCCR address */
ldr r1,[r0] /* load FPCCR */
bic.w r1,r1,#1 /* clear FPCCR.LSPACT */
str r1, [r0]
#endif // WITH_FLOAT
mov r0, #0xFFFD /* low 16 bits of LR, NO FPU */
end_fp:
movt r0, #0xFFFF /* high 16 bits of LR */
bx lr

#define OS_STOP_SEC_CODE
Expand Down
49 changes: 14 additions & 35 deletions machines/cortex-m/armv7em/tpl_ctx_switch_under_it.S
Original file line number Diff line number Diff line change
Expand Up @@ -56,17 +56,11 @@
* | Return Address (saved PC/R15) | <- PSP+24 *
* +-------------------------------+ *
* | xPSR (bit 9 = 1) | <- PSP+28 *
* +------------------------+---------------------\ *
* | s0 (FPU) | <- PSP+32 - 0x20 | *
* +------------------------+ | *
* | .. | <- PSP+.. - | *
* +------------------------+ | *
* | s15 (FPU) | <- PSP+92 - 0x5C |- only if FPU is available *
* +------------------------+ | and process is using FPU *
* | FPSCR (FPU) | <- PSP+96 - 0x60 | (USEFLOAT = TRUE in .oil)*
* +------------------------+ | *
* | reserved (align) | <- PSP+100- 0x64 | *
* +------------------------+---------------------/ *
* +-------------------------------+ *
* If the FPU is enabled, the FPU context is not saved by hardware, *
* as the kernel do not use FPU instructions. The FPU registers should *
* be saved/restored in the load/save context functions *
* (ArmV7 - Architecture Reference manual (DDI 0403E.e), sec. B.3.2.21) *
* *
* The second part is stored in a structure defined in tpl_machine_cortex.h *
* with a pointer from the static task descriptor *
Expand Down Expand Up @@ -100,7 +94,6 @@
*
* r0 contains a pointer to the static descriptor of the running task.
* r1-r3 are working registers
*
*/

.global tpl_save_context_under_it
Expand Down Expand Up @@ -139,13 +132,11 @@ tpl_save_context_under_it:
ldr r1,[r0,#FLOAT_CONTEXT]
cmp r1, #0 /* r1 is NULL if there is no float context for this process */
beq no_save_fp
/* save all s16 to 31
* In Lazy Context Switch (LSPEN bit in FPU->FPCCR, enabled by default),
* s0-s15 may have not been pushed on the stack (PSP) yet. This
* FPU related instruction force the push (on PSP).
* So, this instruction leads to write 17 words on PSP and 16 words on r1!
*/
vstm r1!, {s16-s31}
/* save all s0 to s31 */
vstm r1!, {s0-s31}
/* save fpscr */
vmrs r0,fpscr
str r0,[r1]
no_save_fp:
#endif

Expand All @@ -159,11 +150,6 @@ no_save_fp:
*
* r0 contains a pointer to the static descriptor of the running task.
* r1-r3 are working registers
*
* ** it SHOULD return LR final value in r0 **
* LR can be
* * 0xFFFFFFED if the context uses FPU
* * 0xFFFFFFFD if the context uses no FPU
*/

.global tpl_load_context_under_it
Expand Down Expand Up @@ -202,20 +188,13 @@ tpl_load_context_under_it:
ldr r1,[r0,#FLOAT_CONTEXT]
cmp r1, #0 /* r1 is NULL if there is no float context for this process */
beq no_load_fp
vldm r1!, {s16-s31} /* load spr[16..31] */
/* now update LR to use the FPU */
mov r0, #0xFFED /* low 16 bits of LR, FPU */
b end_fp
vldm r1!, {s0-s31} /* load s[0..31] */
ldr r0,[r1]
vmsr fpscr, r0 /* load fpscr */
no_load_fp:
ldr r0, =0xE000EF34 /* load FPCCR */
ldr r1,[r0]
bic.w r1,r1,#1 /* clear FPCCR.LSPACT */
str r1, [r0]
#endif // WITH_FLOAT
mov r0, #0xFFFD /* low 16 bits of LR, NO FPU */
end_fp:
movt r0, #0xFFFF /* high 16 bits of LR */
bx lr
bx lr

#define OS_STOP_SEC_CODE
#include "tpl_as_memmap.h"
5 changes: 0 additions & 5 deletions machines/cortex-m/armv7m/tpl_ctx_switch.S
Original file line number Diff line number Diff line change
Expand Up @@ -136,9 +136,6 @@ tpl_save_context:
*
* r0 contains a pointer to the static descriptor of the running task.
* r1-r3 are working registers
*
* ** it SHOULD return LR in r0 **
*
* values to be loaded into r4 and r5 are put in the MSP.
*/

Expand Down Expand Up @@ -182,8 +179,6 @@ tpl_load_context:
ldr r2,[r1,#CTX_PSP]
msr psp,r2

/* set LR value in return argument (r0) */
ldr r0, =0xFFFFFFFD
bx lr

#define OS_STOP_SEC_CODE
Expand Down
8 changes: 1 addition & 7 deletions machines/cortex-m/armv7m/tpl_ctx_switch_under_it.S
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,6 @@
*
* r0 contains a pointer to the static descriptor of the running task.
* r1-r3 are working registers
*
*/

.global tpl_save_context_under_it
Expand Down Expand Up @@ -130,9 +129,6 @@ tpl_save_context_under_it:
*
* r0 contains a pointer to the static descriptor of the running task.
* r1-r3 are working registers
*
* ** it SHOULD return LR final value in r0 **
* LR should be 0xFFFFFFFD, as the context does not use any FPU
*/

.global tpl_load_context_under_it
Expand Down Expand Up @@ -163,9 +159,7 @@ tpl_load_context_under_it:
ldr r2,[r1,#CTX_PSP]
msr psp,r2

ldr r0, =0xFFFFFFFD

bx lr
bx lr

#define OS_STOP_SEC_CODE
#include "tpl_as_memmap.h"
Loading

0 comments on commit 917f5cc

Please sign in to comment.