@@ -68,14 +68,6 @@ static unsigned int sched_nr_latency = 8;
6868 */
6969unsigned int sysctl_sched_child_runs_first __read_mostly ;
7070
71- /*
72- * sys_sched_yield() compat mode
73- *
74- * This option switches the agressive yield implementation of the
75- * old scheduler back on.
76- */
77- unsigned int __read_mostly sysctl_sched_compat_yield ;
78-
7971/*
8072 * SCHED_OTHER wake-up granularity.
8173 * (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds)
@@ -419,7 +411,7 @@ static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
419411 rb_erase (& se -> run_node , & cfs_rq -> tasks_timeline );
420412}
421413
422- static struct sched_entity * __pick_next_entity (struct cfs_rq * cfs_rq )
414+ static struct sched_entity * __pick_first_entity (struct cfs_rq * cfs_rq )
423415{
424416 struct rb_node * left = cfs_rq -> rb_leftmost ;
425417
@@ -429,6 +421,17 @@ static struct sched_entity *__pick_next_entity(struct cfs_rq *cfs_rq)
429421 return rb_entry (left , struct sched_entity , run_node );
430422}
431423
424+ static struct sched_entity * __pick_next_entity (struct sched_entity * se )
425+ {
426+ struct rb_node * next = rb_next (& se -> run_node );
427+
428+ if (!next )
429+ return NULL ;
430+
431+ return rb_entry (next , struct sched_entity , run_node );
432+ }
433+
434+ #ifdef CONFIG_SCHED_DEBUG
432435static struct sched_entity * __pick_last_entity (struct cfs_rq * cfs_rq )
433436{
434437 struct rb_node * last = rb_last (& cfs_rq -> tasks_timeline );
@@ -443,7 +446,6 @@ static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
443446 * Scheduling class statistics methods:
444447 */
445448
446- #ifdef CONFIG_SCHED_DEBUG
447449int sched_proc_update_handler (struct ctl_table * table , int write ,
448450 void __user * buffer , size_t * lenp ,
449451 loff_t * ppos )
@@ -1017,13 +1019,27 @@ static void __clear_buddies_next(struct sched_entity *se)
10171019 }
10181020}
10191021
1022+ static void __clear_buddies_skip (struct sched_entity * se )
1023+ {
1024+ for_each_sched_entity (se ) {
1025+ struct cfs_rq * cfs_rq = cfs_rq_of (se );
1026+ if (cfs_rq -> skip == se )
1027+ cfs_rq -> skip = NULL ;
1028+ else
1029+ break ;
1030+ }
1031+ }
1032+
10201033static void clear_buddies (struct cfs_rq * cfs_rq , struct sched_entity * se )
10211034{
10221035 if (cfs_rq -> last == se )
10231036 __clear_buddies_last (se );
10241037
10251038 if (cfs_rq -> next == se )
10261039 __clear_buddies_next (se );
1040+
1041+ if (cfs_rq -> skip == se )
1042+ __clear_buddies_skip (se );
10271043}
10281044
10291045static void
@@ -1099,7 +1115,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
10991115 return ;
11001116
11011117 if (cfs_rq -> nr_running > 1 ) {
1102- struct sched_entity * se = __pick_next_entity (cfs_rq );
1118+ struct sched_entity * se = __pick_first_entity (cfs_rq );
11031119 s64 delta = curr -> vruntime - se -> vruntime ;
11041120
11051121 if (delta < 0 )
@@ -1143,20 +1159,40 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
11431159static int
11441160wakeup_preempt_entity (struct sched_entity * curr , struct sched_entity * se );
11451161
1162+ /*
1163+ * Pick the next process, keeping these things in mind, in this order:
1164+ * 1) keep things fair between processes/task groups
1165+ * 2) pick the "next" process, since someone really wants that to run
1166+ * 3) pick the "last" process, for cache locality
1167+ * 4) do not run the "skip" process, if something else is available
1168+ */
11461169static struct sched_entity * pick_next_entity (struct cfs_rq * cfs_rq )
11471170{
1148- struct sched_entity * se = __pick_next_entity (cfs_rq );
1171+ struct sched_entity * se = __pick_first_entity (cfs_rq );
11491172 struct sched_entity * left = se ;
11501173
1151- if (cfs_rq -> next && wakeup_preempt_entity (cfs_rq -> next , left ) < 1 )
1152- se = cfs_rq -> next ;
1174+ /*
1175+ * Avoid running the skip buddy, if running something else can
1176+ * be done without getting too unfair.
1177+ */
1178+ if (cfs_rq -> skip == se ) {
1179+ struct sched_entity * second = __pick_next_entity (se );
1180+ if (second && wakeup_preempt_entity (second , left ) < 1 )
1181+ se = second ;
1182+ }
11531183
11541184 /*
11551185 * Prefer last buddy, try to return the CPU to a preempted task.
11561186 */
11571187 if (cfs_rq -> last && wakeup_preempt_entity (cfs_rq -> last , left ) < 1 )
11581188 se = cfs_rq -> last ;
11591189
1190+ /*
1191+ * Someone really wants this to run. If it's not unfair, run it.
1192+ */
1193+ if (cfs_rq -> next && wakeup_preempt_entity (cfs_rq -> next , left ) < 1 )
1194+ se = cfs_rq -> next ;
1195+
11601196 clear_buddies (cfs_rq , se );
11611197
11621198 return se ;
@@ -1333,52 +1369,6 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
13331369 hrtick_update (rq );
13341370}
13351371
1336- /*
1337- * sched_yield() support is very simple - we dequeue and enqueue.
1338- *
1339- * If compat_yield is turned on then we requeue to the end of the tree.
1340- */
1341- static void yield_task_fair (struct rq * rq )
1342- {
1343- struct task_struct * curr = rq -> curr ;
1344- struct cfs_rq * cfs_rq = task_cfs_rq (curr );
1345- struct sched_entity * rightmost , * se = & curr -> se ;
1346-
1347- /*
1348- * Are we the only task in the tree?
1349- */
1350- if (unlikely (rq -> nr_running == 1 ))
1351- return ;
1352-
1353- clear_buddies (cfs_rq , se );
1354-
1355- if (likely (!sysctl_sched_compat_yield ) && curr -> policy != SCHED_BATCH ) {
1356- update_rq_clock (rq );
1357- /*
1358- * Update run-time statistics of the 'current'.
1359- */
1360- update_curr (cfs_rq );
1361-
1362- return ;
1363- }
1364- /*
1365- * Find the rightmost entry in the rbtree:
1366- */
1367- rightmost = __pick_last_entity (cfs_rq );
1368- /*
1369- * Already in the rightmost position?
1370- */
1371- if (unlikely (!rightmost || entity_before (rightmost , se )))
1372- return ;
1373-
1374- /*
1375- * Minimally necessary key value to be last in the tree:
1376- * Upon rescheduling, sched_class::put_prev_task() will place
1377- * 'current' within the tree based on its new key value.
1378- */
1379- se -> vruntime = rightmost -> vruntime + 1 ;
1380- }
1381-
13821372#ifdef CONFIG_SMP
13831373
13841374static void task_waking_fair (struct rq * rq , struct task_struct * p )
@@ -1849,6 +1839,14 @@ static void set_next_buddy(struct sched_entity *se)
18491839 }
18501840}
18511841
1842+ static void set_skip_buddy (struct sched_entity * se )
1843+ {
1844+ if (likely (task_of (se )-> policy != SCHED_IDLE )) {
1845+ for_each_sched_entity (se )
1846+ cfs_rq_of (se )-> skip = se ;
1847+ }
1848+ }
1849+
18521850/*
18531851 * Preempt the current task with a newly woken task if needed:
18541852 */
@@ -1947,6 +1945,36 @@ static void put_prev_task_fair(struct rq *rq, struct task_struct *prev)
19471945 }
19481946}
19491947
1948+ /*
1949+ * sched_yield() is very simple
1950+ *
1951+ * The magic of dealing with the ->skip buddy is in pick_next_entity.
1952+ */
1953+ static void yield_task_fair (struct rq * rq )
1954+ {
1955+ struct task_struct * curr = rq -> curr ;
1956+ struct cfs_rq * cfs_rq = task_cfs_rq (curr );
1957+ struct sched_entity * se = & curr -> se ;
1958+
1959+ /*
1960+ * Are we the only task in the tree?
1961+ */
1962+ if (unlikely (rq -> nr_running == 1 ))
1963+ return ;
1964+
1965+ clear_buddies (cfs_rq , se );
1966+
1967+ if (curr -> policy != SCHED_BATCH ) {
1968+ update_rq_clock (rq );
1969+ /*
1970+ * Update run-time statistics of the 'current'.
1971+ */
1972+ update_curr (cfs_rq );
1973+ }
1974+
1975+ set_skip_buddy (se );
1976+ }
1977+
19501978#ifdef CONFIG_SMP
19511979/**************************************************
19521980 * Fair scheduling class load-balancing methods:
0 commit comments