Skip to content

Commit 24b8a23

Browse files
torvaldsIngo Molnar
authored andcommitted
x86/fpu: Clean up FPU switching in the middle of task switching
It happens to work, but it's very very wrong, because our 'current' macro is magic that is supposedly loading a stable value. It just happens to be not quite stable enough and the compilers re-load the value enough for this code to work. But it's wrong. The whole struct fpu *prev_fpu = &prev->fpu; thing in __switch_to() is pretty ugly. There's no reason why we should look at that 'prev_fpu' pointer there, or pass it down. And it only generates worse code, in how it loads 'current' when __switch_to() has the right task pointers. The attached patch not only cleans this up, it actually generates better code too: (a) it removes one push/pop pair at entry/exit because there's one less register used (no 'current') (b) it removes that pointless load of 'current' because it just uses the right argument: - movq %gs:pcpu_hot(%rip), %r12 - testq $16384, (%r12) + testq $16384, (%rdi) Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Uros Bizjak <ubizjak@gmail.com> Signed-off-by: Ingo Molnar <mingo@kernel.org> Link: https://lore.kernel.org/r/20231018184227.446318-1-ubizjak@gmail.com
1 parent e39828d commit 24b8a23

File tree

3 files changed

+12
-12
lines changed

3 files changed

+12
-12
lines changed

arch/x86/include/asm/fpu/sched.h

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,12 @@ extern void fpu_flush_thread(void);
3737
* The FPU context is only stored/restored for a user task and
3838
* PF_KTHREAD is used to distinguish between kernel and user threads.
3939
*/
40-
static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu)
40+
static inline void switch_fpu_prepare(struct task_struct *old, int cpu)
4141
{
4242
if (cpu_feature_enabled(X86_FEATURE_FPU) &&
43-
!(current->flags & (PF_KTHREAD | PF_USER_WORKER))) {
43+
!(old->flags & (PF_KTHREAD | PF_USER_WORKER))) {
44+
struct fpu *old_fpu = &old->thread.fpu;
45+
4446
save_fpregs_to_fpstate(old_fpu);
4547
/*
4648
* The save operation preserved register state, so the
@@ -60,10 +62,10 @@ static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu)
6062
* Delay loading of the complete FPU state until the return to userland.
6163
* PKRU is handled separately.
6264
*/
63-
static inline void switch_fpu_finish(void)
65+
static inline void switch_fpu_finish(struct task_struct *new)
6466
{
6567
if (cpu_feature_enabled(X86_FEATURE_FPU))
66-
set_thread_flag(TIF_NEED_FPU_LOAD);
68+
set_tsk_thread_flag(new, TIF_NEED_FPU_LOAD);
6769
}
6870

6971
#endif /* _ASM_X86_FPU_SCHED_H */

arch/x86/kernel/process_32.c

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -156,13 +156,12 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
156156
{
157157
struct thread_struct *prev = &prev_p->thread,
158158
*next = &next_p->thread;
159-
struct fpu *prev_fpu = &prev->fpu;
160159
int cpu = smp_processor_id();
161160

162161
/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
163162

164-
if (!test_thread_flag(TIF_NEED_FPU_LOAD))
165-
switch_fpu_prepare(prev_fpu, cpu);
163+
if (!test_tsk_thread_flag(prev_p, TIF_NEED_FPU_LOAD))
164+
switch_fpu_prepare(prev_p, cpu);
166165

167166
/*
168167
* Save away %gs. No need to save %fs, as it was saved on the
@@ -209,7 +208,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
209208

210209
raw_cpu_write(pcpu_hot.current_task, next_p);
211210

212-
switch_fpu_finish();
211+
switch_fpu_finish(next_p);
213212

214213
/* Load the Intel cache allocation PQR MSR. */
215214
resctrl_sched_in(next_p);

arch/x86/kernel/process_64.c

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -562,14 +562,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
562562
{
563563
struct thread_struct *prev = &prev_p->thread;
564564
struct thread_struct *next = &next_p->thread;
565-
struct fpu *prev_fpu = &prev->fpu;
566565
int cpu = smp_processor_id();
567566

568567
WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
569568
this_cpu_read(pcpu_hot.hardirq_stack_inuse));
570569

571-
if (!test_thread_flag(TIF_NEED_FPU_LOAD))
572-
switch_fpu_prepare(prev_fpu, cpu);
570+
if (!test_tsk_thread_flag(prev_p, TIF_NEED_FPU_LOAD))
571+
switch_fpu_prepare(prev_p, cpu);
573572

574573
/* We must save %fs and %gs before load_TLS() because
575574
* %fs and %gs may be cleared by load_TLS().
@@ -623,7 +622,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
623622
raw_cpu_write(pcpu_hot.current_task, next_p);
624623
raw_cpu_write(pcpu_hot.top_of_stack, task_top_of_stack(next_p));
625624

626-
switch_fpu_finish();
625+
switch_fpu_finish(next_p);
627626

628627
/* Reload sp0. */
629628
update_task_stack(next_p);

0 commit comments

Comments
 (0)