@@ -532,6 +532,12 @@ enum scx_kick_flags {
532532 * task expires and the dispatch path is invoked.
533533 */
534534 SCX_KICK_PREEMPT = 1LLU << 1 ,
535+
536+ /*
537+ * Wait for the CPU to be rescheduled. The scx_bpf_kick_cpu() call will
538+ * return after the target CPU finishes picking the next task.
539+ */
540+ SCX_KICK_WAIT = 1LLU << 2 ,
535541};
536542
537543enum scx_ops_enable_state {
@@ -661,6 +667,9 @@ static struct {
661667
662668#endif /* CONFIG_SMP */
663669
670+ /* for %SCX_KICK_WAIT */
671+ static unsigned long __percpu * scx_kick_cpus_pnt_seqs ;
672+
664673/*
665674 * Direct dispatch marker.
666675 *
@@ -2288,6 +2297,23 @@ static struct task_struct *pick_next_task_scx(struct rq *rq)
22882297 return p ;
22892298}
22902299
2300+ void scx_next_task_picked (struct rq * rq , struct task_struct * p ,
2301+ const struct sched_class * active )
2302+ {
2303+ lockdep_assert_rq_held (rq );
2304+
2305+ if (!scx_enabled ())
2306+ return ;
2307+ #ifdef CONFIG_SMP
2308+ /*
2309+ * Pairs with the smp_load_acquire() issued by a CPU in
2310+ * kick_cpus_irq_workfn() who is waiting for this CPU to perform a
2311+ * resched.
2312+ */
2313+ smp_store_release (& rq -> scx .pnt_seq , rq -> scx .pnt_seq + 1 );
2314+ #endif
2315+ }
2316+
22912317#ifdef CONFIG_SMP
22922318
22932319static bool test_and_clear_cpu_idle (int cpu )
@@ -3673,9 +3699,9 @@ static void scx_dump_state(struct scx_exit_info *ei, size_t dump_len)
36733699 seq_buf_init (& ns , buf , avail );
36743700
36753701 dump_newline (& ns );
3676- dump_line (& ns , "CPU %-4d: nr_run=%u flags=0x%x ops_qseq=%lu" ,
3702+ dump_line (& ns , "CPU %-4d: nr_run=%u flags=0x%x ops_qseq=%lu pnt_seq=%lu " ,
36773703 cpu , rq -> scx .nr_running , rq -> scx .flags ,
3678- rq -> scx .ops_qseq );
3704+ rq -> scx .ops_qseq , rq -> scx . pnt_seq );
36793705 dump_line (& ns , " curr=%s[%d] class=%ps" ,
36803706 rq -> curr -> comm , rq -> curr -> pid ,
36813707 rq -> curr -> sched_class );
@@ -3688,6 +3714,9 @@ static void scx_dump_state(struct scx_exit_info *ei, size_t dump_len)
36883714 if (!cpumask_empty (rq -> scx .cpus_to_preempt ))
36893715 dump_line (& ns , " cpus_to_preempt: %*pb" ,
36903716 cpumask_pr_args (rq -> scx .cpus_to_preempt ));
3717+ if (!cpumask_empty (rq -> scx .cpus_to_wait ))
3718+ dump_line (& ns , " cpus_to_wait : %*pb" ,
3719+ cpumask_pr_args (rq -> scx .cpus_to_wait ));
36913720
36923721 used = seq_buf_used (& ns );
36933722 if (SCX_HAS_OP (dump_cpu )) {
@@ -4383,10 +4412,11 @@ static bool can_skip_idle_kick(struct rq *rq)
43834412 return !is_idle_task (rq -> curr ) && !(rq -> scx .flags & SCX_RQ_BALANCING );
43844413}
43854414
4386- static void kick_one_cpu (s32 cpu , struct rq * this_rq )
4415+ static bool kick_one_cpu (s32 cpu , struct rq * this_rq , unsigned long * pseqs )
43874416{
43884417 struct rq * rq = cpu_rq (cpu );
43894418 struct scx_rq * this_scx = & this_rq -> scx ;
4419+ bool should_wait = false;
43904420 unsigned long flags ;
43914421
43924422 raw_spin_rq_lock_irqsave (rq , flags );
@@ -4402,12 +4432,20 @@ static void kick_one_cpu(s32 cpu, struct rq *this_rq)
44024432 cpumask_clear_cpu (cpu , this_scx -> cpus_to_preempt );
44034433 }
44044434
4435+ if (cpumask_test_cpu (cpu , this_scx -> cpus_to_wait )) {
4436+ pseqs [cpu ] = rq -> scx .pnt_seq ;
4437+ should_wait = true;
4438+ }
4439+
44054440 resched_curr (rq );
44064441 } else {
44074442 cpumask_clear_cpu (cpu , this_scx -> cpus_to_preempt );
4443+ cpumask_clear_cpu (cpu , this_scx -> cpus_to_wait );
44084444 }
44094445
44104446 raw_spin_rq_unlock_irqrestore (rq , flags );
4447+
4448+ return should_wait ;
44114449}
44124450
44134451static void kick_one_cpu_if_idle (s32 cpu , struct rq * this_rq )
@@ -4428,10 +4466,12 @@ static void kick_cpus_irq_workfn(struct irq_work *irq_work)
44284466{
44294467 struct rq * this_rq = this_rq ();
44304468 struct scx_rq * this_scx = & this_rq -> scx ;
4469+ unsigned long * pseqs = this_cpu_ptr (scx_kick_cpus_pnt_seqs );
4470+ bool should_wait = false;
44314471 s32 cpu ;
44324472
44334473 for_each_cpu (cpu , this_scx -> cpus_to_kick ) {
4434- kick_one_cpu (cpu , this_rq );
4474+ should_wait |= kick_one_cpu (cpu , this_rq , pseqs );
44354475 cpumask_clear_cpu (cpu , this_scx -> cpus_to_kick );
44364476 cpumask_clear_cpu (cpu , this_scx -> cpus_to_kick_if_idle );
44374477 }
@@ -4440,6 +4480,28 @@ static void kick_cpus_irq_workfn(struct irq_work *irq_work)
44404480 kick_one_cpu_if_idle (cpu , this_rq );
44414481 cpumask_clear_cpu (cpu , this_scx -> cpus_to_kick_if_idle );
44424482 }
4483+
4484+ if (!should_wait )
4485+ return ;
4486+
4487+ for_each_cpu (cpu , this_scx -> cpus_to_wait ) {
4488+ unsigned long * wait_pnt_seq = & cpu_rq (cpu )-> scx .pnt_seq ;
4489+
4490+ if (cpu != cpu_of (this_rq )) {
4491+ /*
4492+ * Pairs with smp_store_release() issued by this CPU in
4493+ * scx_next_task_picked() on the resched path.
4494+ *
4495+ * We busy-wait here to guarantee that no other task can
4496+ * be scheduled on our core before the target CPU has
4497+ * entered the resched path.
4498+ */
4499+ while (smp_load_acquire (wait_pnt_seq ) == pseqs [cpu ])
4500+ cpu_relax ();
4501+ }
4502+
4503+ cpumask_clear_cpu (cpu , this_scx -> cpus_to_wait );
4504+ }
44434505}
44444506
44454507/**
@@ -4504,6 +4566,11 @@ void __init init_sched_ext_class(void)
45044566 BUG_ON (!alloc_cpumask_var (& idle_masks .cpu , GFP_KERNEL ));
45054567 BUG_ON (!alloc_cpumask_var (& idle_masks .smt , GFP_KERNEL ));
45064568#endif
4569+ scx_kick_cpus_pnt_seqs =
4570+ __alloc_percpu (sizeof (scx_kick_cpus_pnt_seqs [0 ]) * nr_cpu_ids ,
4571+ __alignof__(scx_kick_cpus_pnt_seqs [0 ]));
4572+ BUG_ON (!scx_kick_cpus_pnt_seqs );
4573+
45074574 for_each_possible_cpu (cpu ) {
45084575 struct rq * rq = cpu_rq (cpu );
45094576
@@ -4513,6 +4580,7 @@ void __init init_sched_ext_class(void)
45134580 BUG_ON (!zalloc_cpumask_var (& rq -> scx .cpus_to_kick , GFP_KERNEL ));
45144581 BUG_ON (!zalloc_cpumask_var (& rq -> scx .cpus_to_kick_if_idle , GFP_KERNEL ));
45154582 BUG_ON (!zalloc_cpumask_var (& rq -> scx .cpus_to_preempt , GFP_KERNEL ));
4583+ BUG_ON (!zalloc_cpumask_var (& rq -> scx .cpus_to_wait , GFP_KERNEL ));
45164584 init_irq_work (& rq -> scx .kick_cpus_irq_work , kick_cpus_irq_workfn );
45174585 }
45184586
@@ -4840,8 +4908,8 @@ __bpf_kfunc void scx_bpf_kick_cpu(s32 cpu, u64 flags)
48404908 if (flags & SCX_KICK_IDLE ) {
48414909 struct rq * target_rq = cpu_rq (cpu );
48424910
4843- if (unlikely (flags & SCX_KICK_PREEMPT ))
4844- scx_ops_error ("PREEMPT cannot be used with SCX_KICK_IDLE" );
4911+ if (unlikely (flags & ( SCX_KICK_PREEMPT | SCX_KICK_WAIT ) ))
4912+ scx_ops_error ("PREEMPT/WAIT cannot be used with SCX_KICK_IDLE" );
48454913
48464914 if (raw_spin_rq_trylock (target_rq )) {
48474915 if (can_skip_idle_kick (target_rq )) {
@@ -4856,6 +4924,8 @@ __bpf_kfunc void scx_bpf_kick_cpu(s32 cpu, u64 flags)
48564924
48574925 if (flags & SCX_KICK_PREEMPT )
48584926 cpumask_set_cpu (cpu , this_rq -> scx .cpus_to_preempt );
4927+ if (flags & SCX_KICK_WAIT )
4928+ cpumask_set_cpu (cpu , this_rq -> scx .cpus_to_wait );
48594929 }
48604930
48614931 irq_work_queue (& this_rq -> scx .kick_cpus_irq_work );
0 commit comments