Skip to content

Commit 0df340c

Browse files
committed
Merge branch 'sched/core' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip into for-6.12
Pull tip/sched/core to resolve the following four conflicts. While 2-4 are simple context conflicts, 1 is a bit subtle and easy to resolve incorrectly. 1. 2c8d046 ("sched: Add normal_policy()") vs. faa42d2 ("sched/fair: Make SCHED_IDLE entity be preempted in strict hierarchy") The former converts direct test on p->policy to use the helper normal_policy(). The latter moves the p->policy test to a different location. Resolve by converting the test on p->plicy in the new location to use normal_policy(). 2. a7a9fc5 ("sched_ext: Add boilerplate for extensible scheduler class") vs. a110a81 ("sched/deadline: Deferrable dl server") Both add calls to put_prev_task_idle() and set_next_task_idle(). Simple context conflict. Resolve by taking changes from both. 3. a7a9fc5 ("sched_ext: Add boilerplate for extensible scheduler class") vs. c245910 ("sched/core: Add clearing of ->dl_server in put_prev_task_balance()") The former changes for_each_class() itertion to use for_each_active_class(). The latter moves away the adjacent dl_server handling code. Simple context conflict. Resolve by taking changes from both. 4. 60c27fb ("sched_ext: Implement sched_ext_ops.cpu_online/offline()") vs. 31b164e ("sched/smt: Introduce sched_smt_present_inc/dec() helper") 2f02735 ("sched/core: Introduce sched_set_rq_on/offline() helper") The former adds scx_rq_deactivate() call. The latter two change code around it. Simple context conflict. Resolve by taking changes from both. Signed-off-by: Tejun Heo <tj@kernel.org>
2 parents e99129e + cea5a34 commit 0df340c

File tree

11 files changed

+895
-282
lines changed

11 files changed

+895
-282
lines changed

include/linux/sched.h

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -641,12 +641,26 @@ struct sched_dl_entity {
641641
*
642642
* @dl_overrun tells if the task asked to be informed about runtime
643643
* overruns.
644+
*
645+
* @dl_server tells if this is a server entity.
646+
*
647+
* @dl_defer tells if this is a deferred or regular server. For
648+
* now only defer server exists.
649+
*
650+
* @dl_defer_armed tells if the deferrable server is waiting
651+
* for the replenishment timer to activate it.
652+
*
653+
* @dl_defer_running tells if the deferrable server is actually
654+
* running, skipping the defer phase.
644655
*/
645656
unsigned int dl_throttled : 1;
646657
unsigned int dl_yielded : 1;
647658
unsigned int dl_non_contending : 1;
648659
unsigned int dl_overrun : 1;
649660
unsigned int dl_server : 1;
661+
unsigned int dl_defer : 1;
662+
unsigned int dl_defer_armed : 1;
663+
unsigned int dl_defer_running : 1;
650664

651665
/*
652666
* Bandwidth enforcement timer. Each -deadline task has its
@@ -674,7 +688,8 @@ struct sched_dl_entity {
674688
*/
675689
struct rq *rq;
676690
dl_server_has_tasks_f server_has_tasks;
677-
dl_server_pick_f server_pick;
691+
dl_server_pick_f server_pick_next;
692+
dl_server_pick_f server_pick_task;
678693

679694
#ifdef CONFIG_RT_MUTEXES
680695
/*

kernel/sched/core.c

Lines changed: 107 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,9 @@ static inline int __task_prio(const struct task_struct *p)
163163
if (p->sched_class == &stop_sched_class) /* trumps deadline */
164164
return -2;
165165

166+
if (p->dl_server)
167+
return -1; /* deadline */
168+
166169
if (rt_prio(p->prio)) /* includes deadline */
167170
return p->prio; /* [-1, 99] */
168171

@@ -195,8 +198,24 @@ static inline bool prio_less(const struct task_struct *a,
195198
if (-pb < -pa)
196199
return false;
197200

198-
if (pa == -1) /* dl_prio() doesn't work because of stop_class above */
199-
return !dl_time_before(a->dl.deadline, b->dl.deadline);
201+
if (pa == -1) { /* dl_prio() doesn't work because of stop_class above */
202+
const struct sched_dl_entity *a_dl, *b_dl;
203+
204+
a_dl = &a->dl;
205+
/*
206+
* Since,'a' and 'b' can be CFS tasks served by DL server,
207+
* __task_prio() can return -1 (for DL) even for those. In that
208+
* case, get to the dl_server's DL entity.
209+
*/
210+
if (a->dl_server)
211+
a_dl = a->dl_server;
212+
213+
b_dl = &b->dl;
214+
if (b->dl_server)
215+
b_dl = b->dl_server;
216+
217+
return !dl_time_before(a_dl->deadline, b_dl->deadline);
218+
}
200219

201220
if (pa == MAX_RT_PRIO + MAX_NICE) /* fair */
202221
return cfs_prio_less(a, b, in_fi);
@@ -1280,7 +1299,7 @@ bool sched_can_stop_tick(struct rq *rq)
12801299
* dequeued by migrating while the constrained task continues to run.
12811300
* E.g. going from 2->1 without going through pick_next_task().
12821301
*/
1283-
if (sched_feat(HZ_BW) && __need_bw_check(rq, rq->curr)) {
1302+
if (__need_bw_check(rq, rq->curr)) {
12841303
if (cfs_task_bw_constrained(rq->curr))
12851304
return false;
12861305
}
@@ -2255,6 +2274,12 @@ void migrate_disable(void)
22552274
struct task_struct *p = current;
22562275

22572276
if (p->migration_disabled) {
2277+
#ifdef CONFIG_DEBUG_PREEMPT
2278+
/*
2279+
*Warn about overflow half-way through the range.
2280+
*/
2281+
WARN_ON_ONCE((s16)p->migration_disabled < 0);
2282+
#endif
22582283
p->migration_disabled++;
22592284
return;
22602285
}
@@ -2273,14 +2298,20 @@ void migrate_enable(void)
22732298
.flags = SCA_MIGRATE_ENABLE,
22742299
};
22752300

2301+
#ifdef CONFIG_DEBUG_PREEMPT
2302+
/*
2303+
* Check both overflow from migrate_disable() and superfluous
2304+
* migrate_enable().
2305+
*/
2306+
if (WARN_ON_ONCE((s16)p->migration_disabled <= 0))
2307+
return;
2308+
#endif
2309+
22762310
if (p->migration_disabled > 1) {
22772311
p->migration_disabled--;
22782312
return;
22792313
}
22802314

2281-
if (WARN_ON_ONCE(!p->migration_disabled))
2282-
return;
2283-
22842315
/*
22852316
* Ensure stop_task runs either before or after this, and that
22862317
* __set_cpus_allowed_ptr(SCA_MIGRATE_ENABLE) doesn't schedule().
@@ -4737,7 +4768,7 @@ void wake_up_new_task(struct task_struct *p)
47374768
update_rq_clock(rq);
47384769
post_init_entity_util_avg(p);
47394770

4740-
activate_task(rq, p, ENQUEUE_NOCLOCK);
4771+
activate_task(rq, p, ENQUEUE_NOCLOCK | ENQUEUE_INITIAL);
47414772
trace_sched_wakeup_new(p);
47424773
wakeup_preempt(rq, p, WF_FORK);
47434774
#ifdef CONFIG_SMP
@@ -5855,6 +5886,14 @@ static void put_prev_task_balance(struct rq *rq, struct task_struct *prev,
58555886
#endif
58565887

58575888
put_prev_task(rq, prev);
5889+
5890+
/*
5891+
* We've updated @prev and no longer need the server link, clear it.
5892+
* Must be done before ->pick_next_task() because that can (re)set
5893+
* ->dl_server.
5894+
*/
5895+
if (prev->dl_server)
5896+
prev->dl_server = NULL;
58585897
}
58595898

58605899
/*
@@ -5888,6 +5927,13 @@ __pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
58885927
p = pick_next_task_idle(rq);
58895928
}
58905929

5930+
/*
5931+
* This is a normal CFS pick, but the previous could be a DL pick.
5932+
* Clear it as previous is no longer picked.
5933+
*/
5934+
if (prev->dl_server)
5935+
prev->dl_server = NULL;
5936+
58915937
/*
58925938
* This is the fast path; it cannot be a DL server pick;
58935939
* therefore even if @p == @prev, ->dl_server must be NULL.
@@ -5901,14 +5947,6 @@ __pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
59015947
restart:
59025948
put_prev_task_balance(rq, prev, rf);
59035949

5904-
/*
5905-
* We've updated @prev and no longer need the server link, clear it.
5906-
* Must be done before ->pick_next_task() because that can (re)set
5907-
* ->dl_server.
5908-
*/
5909-
if (prev->dl_server)
5910-
prev->dl_server = NULL;
5911-
59125950
for_each_active_class(class) {
59135951
p = class->pick_next_task(rq);
59145952
if (p) {
@@ -7925,6 +7963,30 @@ void set_rq_offline(struct rq *rq)
79257963
}
79267964
}
79277965

7966+
static inline void sched_set_rq_online(struct rq *rq, int cpu)
7967+
{
7968+
struct rq_flags rf;
7969+
7970+
rq_lock_irqsave(rq, &rf);
7971+
if (rq->rd) {
7972+
BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
7973+
set_rq_online(rq);
7974+
}
7975+
rq_unlock_irqrestore(rq, &rf);
7976+
}
7977+
7978+
static inline void sched_set_rq_offline(struct rq *rq, int cpu)
7979+
{
7980+
struct rq_flags rf;
7981+
7982+
rq_lock_irqsave(rq, &rf);
7983+
if (rq->rd) {
7984+
BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
7985+
set_rq_offline(rq);
7986+
}
7987+
rq_unlock_irqrestore(rq, &rf);
7988+
}
7989+
79287990
/*
79297991
* used to mark begin/end of suspend/resume:
79307992
*/
@@ -7975,24 +8037,36 @@ static int cpuset_cpu_inactive(unsigned int cpu)
79758037
return 0;
79768038
}
79778039

8040+
static inline void sched_smt_present_inc(int cpu)
8041+
{
8042+
#ifdef CONFIG_SCHED_SMT
8043+
if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
8044+
static_branch_inc_cpuslocked(&sched_smt_present);
8045+
#endif
8046+
}
8047+
8048+
static inline void sched_smt_present_dec(int cpu)
8049+
{
8050+
#ifdef CONFIG_SCHED_SMT
8051+
if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
8052+
static_branch_dec_cpuslocked(&sched_smt_present);
8053+
#endif
8054+
}
8055+
79788056
int sched_cpu_activate(unsigned int cpu)
79798057
{
79808058
struct rq *rq = cpu_rq(cpu);
7981-
struct rq_flags rf;
79828059

79838060
/*
79848061
* Clear the balance_push callback and prepare to schedule
79858062
* regular tasks.
79868063
*/
79878064
balance_push_set(cpu, false);
79888065

7989-
#ifdef CONFIG_SCHED_SMT
79908066
/*
79918067
* When going up, increment the number of cores with SMT present.
79928068
*/
7993-
if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
7994-
static_branch_inc_cpuslocked(&sched_smt_present);
7995-
#endif
8069+
sched_smt_present_inc(cpu);
79968070
set_cpu_active(cpu, true);
79978071

79988072
if (sched_smp_initialized) {
@@ -8012,20 +8086,14 @@ int sched_cpu_activate(unsigned int cpu)
80128086
* 2) At runtime, if cpuset_cpu_active() fails to rebuild the
80138087
* domains.
80148088
*/
8015-
rq_lock_irqsave(rq, &rf);
8016-
if (rq->rd) {
8017-
BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
8018-
set_rq_online(rq);
8019-
}
8020-
rq_unlock_irqrestore(rq, &rf);
8089+
sched_set_rq_online(rq, cpu);
80218090

80228091
return 0;
80238092
}
80248093

80258094
int sched_cpu_deactivate(unsigned int cpu)
80268095
{
80278096
struct rq *rq = cpu_rq(cpu);
8028-
struct rq_flags rf;
80298097
int ret;
80308098

80318099
/*
@@ -8056,22 +8124,16 @@ int sched_cpu_deactivate(unsigned int cpu)
80568124
*/
80578125
synchronize_rcu();
80588126

8059-
rq_lock_irqsave(rq, &rf);
8060-
if (rq->rd) {
8061-
BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
8062-
set_rq_offline(rq);
8063-
}
8064-
rq_unlock_irqrestore(rq, &rf);
8127+
sched_set_rq_offline(rq, cpu);
80658128

80668129
scx_rq_deactivate(rq);
80678130

8068-
#ifdef CONFIG_SCHED_SMT
80698131
/*
80708132
* When going down, decrement the number of cores with SMT present.
80718133
*/
8072-
if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
8073-
static_branch_dec_cpuslocked(&sched_smt_present);
8134+
sched_smt_present_dec(cpu);
80748135

8136+
#ifdef CONFIG_SCHED_SMT
80758137
sched_core_cpu_deactivate(cpu);
80768138
#endif
80778139

@@ -8081,6 +8143,8 @@ int sched_cpu_deactivate(unsigned int cpu)
80818143
sched_update_numa(cpu, false);
80828144
ret = cpuset_cpu_inactive(cpu);
80838145
if (ret) {
8146+
sched_smt_present_inc(cpu);
8147+
sched_set_rq_online(rq, cpu);
80848148
balance_push_set(cpu, false);
80858149
set_cpu_active(cpu, true);
80868150
sched_update_numa(cpu, true);
@@ -8290,8 +8354,6 @@ void __init sched_init(void)
82908354
#endif /* CONFIG_RT_GROUP_SCHED */
82918355
}
82928356

8293-
init_rt_bandwidth(&def_rt_bandwidth, global_rt_period(), global_rt_runtime());
8294-
82958357
#ifdef CONFIG_SMP
82968358
init_defrootdomain();
82978359
#endif
@@ -8346,8 +8408,13 @@ void __init sched_init(void)
83468408
init_tg_cfs_entry(&root_task_group, &rq->cfs, NULL, i, NULL);
83478409
#endif /* CONFIG_FAIR_GROUP_SCHED */
83488410

8349-
rq->rt.rt_runtime = def_rt_bandwidth.rt_runtime;
83508411
#ifdef CONFIG_RT_GROUP_SCHED
8412+
/*
8413+
* This is required for init cpu because rt.c:__enable_runtime()
8414+
* starts working after scheduler_running, which is not the case
8415+
* yet.
8416+
*/
8417+
rq->rt.rt_runtime = global_rt_runtime();
83518418
init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, NULL);
83528419
#endif
83538420
#ifdef CONFIG_SMP
@@ -8379,6 +8446,7 @@ void __init sched_init(void)
83798446
#endif /* CONFIG_SMP */
83808447
hrtick_rq_init(rq);
83818448
atomic_set(&rq->nr_iowait, 0);
8449+
fair_server_init(rq);
83828450

83838451
#ifdef CONFIG_SCHED_CORE
83848452
rq->core = rq;

kernel/sched/cputime.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -582,6 +582,12 @@ void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev,
582582
}
583583

584584
stime = mul_u64_u64_div_u64(stime, rtime, stime + utime);
585+
/*
586+
* Because mul_u64_u64_div_u64() can approximate on some
587+
* achitectures; enforce the constraint that: a*b/(b+c) <= a.
588+
*/
589+
if (unlikely(stime > rtime))
590+
stime = rtime;
585591

586592
update:
587593
/*

0 commit comments

Comments
 (0)