@@ -3028,9 +3028,11 @@ enqueue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
30283028static inline void
30293029dequeue_load_avg (struct cfs_rq * cfs_rq , struct sched_entity * se )
30303030{
3031- u32 divider = get_pelt_divider (& se -> avg );
30323031 sub_positive (& cfs_rq -> avg .load_avg , se -> avg .load_avg );
3033- cfs_rq -> avg .load_sum = cfs_rq -> avg .load_avg * divider ;
3032+ sub_positive (& cfs_rq -> avg .load_sum , se_weight (se ) * se -> avg .load_sum );
3033+ /* See update_cfs_rq_load_avg() */
3034+ cfs_rq -> avg .load_sum = max_t (u32 , cfs_rq -> avg .load_sum ,
3035+ cfs_rq -> avg .load_avg * PELT_MIN_DIVIDER );
30343036}
30353037#else
30363038static inline void
@@ -3381,7 +3383,6 @@ void set_task_rq_fair(struct sched_entity *se,
33813383 se -> avg .last_update_time = n_last_update_time ;
33823384}
33833385
3384-
33853386/*
33863387 * When on migration a sched_entity joins/leaves the PELT hierarchy, we need to
33873388 * propagate its contribution. The key to this propagation is the invariant
@@ -3449,15 +3450,14 @@ void set_task_rq_fair(struct sched_entity *se,
34493450 * XXX: only do this for the part of runnable > running ?
34503451 *
34513452 */
3452-
34533453static inline void
34543454update_tg_cfs_util (struct cfs_rq * cfs_rq , struct sched_entity * se , struct cfs_rq * gcfs_rq )
34553455{
3456- long delta = gcfs_rq -> avg .util_avg - se -> avg .util_avg ;
3457- u32 divider ;
3456+ long delta_sum , delta_avg = gcfs_rq -> avg .util_avg - se -> avg .util_avg ;
3457+ u32 new_sum , divider ;
34583458
34593459 /* Nothing to update */
3460- if (!delta )
3460+ if (!delta_avg )
34613461 return ;
34623462
34633463 /*
@@ -3466,23 +3466,30 @@ update_tg_cfs_util(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq
34663466 */
34673467 divider = get_pelt_divider (& cfs_rq -> avg );
34683468
3469+
34693470 /* Set new sched_entity's utilization */
34703471 se -> avg .util_avg = gcfs_rq -> avg .util_avg ;
3471- se -> avg .util_sum = se -> avg .util_avg * divider ;
3472+ new_sum = se -> avg .util_avg * divider ;
3473+ delta_sum = (long )new_sum - (long )se -> avg .util_sum ;
3474+ se -> avg .util_sum = new_sum ;
34723475
34733476 /* Update parent cfs_rq utilization */
3474- add_positive (& cfs_rq -> avg .util_avg , delta );
3475- cfs_rq -> avg .util_sum = cfs_rq -> avg .util_avg * divider ;
3477+ add_positive (& cfs_rq -> avg .util_avg , delta_avg );
3478+ add_positive (& cfs_rq -> avg .util_sum , delta_sum );
3479+
3480+ /* See update_cfs_rq_load_avg() */
3481+ cfs_rq -> avg .util_sum = max_t (u32 , cfs_rq -> avg .util_sum ,
3482+ cfs_rq -> avg .util_avg * PELT_MIN_DIVIDER );
34763483}
34773484
34783485static inline void
34793486update_tg_cfs_runnable (struct cfs_rq * cfs_rq , struct sched_entity * se , struct cfs_rq * gcfs_rq )
34803487{
3481- long delta = gcfs_rq -> avg .runnable_avg - se -> avg .runnable_avg ;
3482- u32 divider ;
3488+ long delta_sum , delta_avg = gcfs_rq -> avg .runnable_avg - se -> avg .runnable_avg ;
3489+ u32 new_sum , divider ;
34833490
34843491 /* Nothing to update */
3485- if (!delta )
3492+ if (!delta_avg )
34863493 return ;
34873494
34883495 /*
@@ -3493,19 +3500,25 @@ update_tg_cfs_runnable(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cf
34933500
34943501 /* Set new sched_entity's runnable */
34953502 se -> avg .runnable_avg = gcfs_rq -> avg .runnable_avg ;
3496- se -> avg .runnable_sum = se -> avg .runnable_avg * divider ;
3503+ new_sum = se -> avg .runnable_avg * divider ;
3504+ delta_sum = (long )new_sum - (long )se -> avg .runnable_sum ;
3505+ se -> avg .runnable_sum = new_sum ;
34973506
34983507 /* Update parent cfs_rq runnable */
3499- add_positive (& cfs_rq -> avg .runnable_avg , delta );
3500- cfs_rq -> avg .runnable_sum = cfs_rq -> avg .runnable_avg * divider ;
3508+ add_positive (& cfs_rq -> avg .runnable_avg , delta_avg );
3509+ add_positive (& cfs_rq -> avg .runnable_sum , delta_sum );
3510+ /* See update_cfs_rq_load_avg() */
3511+ cfs_rq -> avg .runnable_sum = max_t (u32 , cfs_rq -> avg .runnable_sum ,
3512+ cfs_rq -> avg .runnable_avg * PELT_MIN_DIVIDER );
35013513}
35023514
35033515static inline void
35043516update_tg_cfs_load (struct cfs_rq * cfs_rq , struct sched_entity * se , struct cfs_rq * gcfs_rq )
35053517{
3506- long delta , running_sum , runnable_sum = gcfs_rq -> prop_runnable_sum ;
3518+ long delta_avg , running_sum , runnable_sum = gcfs_rq -> prop_runnable_sum ;
35073519 unsigned long load_avg ;
35083520 u64 load_sum = 0 ;
3521+ s64 delta_sum ;
35093522 u32 divider ;
35103523
35113524 if (!runnable_sum )
@@ -3532,7 +3545,7 @@ update_tg_cfs_load(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq
35323545 * assuming all tasks are equally runnable.
35333546 */
35343547 if (scale_load_down (gcfs_rq -> load .weight )) {
3535- load_sum = div_s64 (gcfs_rq -> avg .load_sum ,
3548+ load_sum = div_u64 (gcfs_rq -> avg .load_sum ,
35363549 scale_load_down (gcfs_rq -> load .weight ));
35373550 }
35383551
@@ -3549,19 +3562,22 @@ update_tg_cfs_load(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq
35493562 running_sum = se -> avg .util_sum >> SCHED_CAPACITY_SHIFT ;
35503563 runnable_sum = max (runnable_sum , running_sum );
35513564
3552- load_sum = (s64 )se_weight (se ) * runnable_sum ;
3553- load_avg = div_s64 (load_sum , divider );
3554-
3555- se -> avg .load_sum = runnable_sum ;
3565+ load_sum = se_weight (se ) * runnable_sum ;
3566+ load_avg = div_u64 (load_sum , divider );
35563567
3557- delta = load_avg - se -> avg .load_avg ;
3558- if (!delta )
3568+ delta_avg = load_avg - se -> avg .load_avg ;
3569+ if (!delta_avg )
35593570 return ;
35603571
3561- se -> avg .load_avg = load_avg ;
3572+ delta_sum = load_sum - ( s64 ) se_weight ( se ) * se -> avg .load_sum ;
35623573
3563- add_positive (& cfs_rq -> avg .load_avg , delta );
3564- cfs_rq -> avg .load_sum = cfs_rq -> avg .load_avg * divider ;
3574+ se -> avg .load_sum = runnable_sum ;
3575+ se -> avg .load_avg = load_avg ;
3576+ add_positive (& cfs_rq -> avg .load_avg , delta_avg );
3577+ add_positive (& cfs_rq -> avg .load_sum , delta_sum );
3578+ /* See update_cfs_rq_load_avg() */
3579+ cfs_rq -> avg .load_sum = max_t (u32 , cfs_rq -> avg .load_sum ,
3580+ cfs_rq -> avg .load_avg * PELT_MIN_DIVIDER );
35653581}
35663582
35673583static inline void add_tg_cfs_propagate (struct cfs_rq * cfs_rq , long runnable_sum )
@@ -3652,7 +3668,7 @@ static inline void add_tg_cfs_propagate(struct cfs_rq *cfs_rq, long runnable_sum
36523668 *
36533669 * cfs_rq->avg is used for task_h_load() and update_cfs_share() for example.
36543670 *
3655- * Returns true if the load decayed or we removed load.
3671+ * Return: true if the load decayed or we removed load.
36563672 *
36573673 * Since both these conditions indicate a changed cfs_rq->avg.load we should
36583674 * call update_tg_load_avg() when this function returns true.
@@ -3677,15 +3693,32 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
36773693
36783694 r = removed_load ;
36793695 sub_positive (& sa -> load_avg , r );
3680- sa -> load_sum = sa -> load_avg * divider ;
3696+ sub_positive (& sa -> load_sum , r * divider );
3697+ /* See sa->util_sum below */
3698+ sa -> load_sum = max_t (u32 , sa -> load_sum , sa -> load_avg * PELT_MIN_DIVIDER );
36813699
36823700 r = removed_util ;
36833701 sub_positive (& sa -> util_avg , r );
3684- sa -> util_sum = sa -> util_avg * divider ;
3702+ sub_positive (& sa -> util_sum , r * divider );
3703+ /*
3704+ * Because of rounding, se->util_sum might ends up being +1 more than
3705+ * cfs->util_sum. Although this is not a problem by itself, detaching
3706+ * a lot of tasks with the rounding problem between 2 updates of
3707+ * util_avg (~1ms) can make cfs->util_sum becoming null whereas
3708+ * cfs_util_avg is not.
3709+ * Check that util_sum is still above its lower bound for the new
3710+ * util_avg. Given that period_contrib might have moved since the last
3711+ * sync, we are only sure that util_sum must be above or equal to
3712+ * util_avg * minimum possible divider
3713+ */
3714+ sa -> util_sum = max_t (u32 , sa -> util_sum , sa -> util_avg * PELT_MIN_DIVIDER );
36853715
36863716 r = removed_runnable ;
36873717 sub_positive (& sa -> runnable_avg , r );
3688- sa -> runnable_sum = sa -> runnable_avg * divider ;
3718+ sub_positive (& sa -> runnable_sum , r * divider );
3719+ /* See sa->util_sum above */
3720+ sa -> runnable_sum = max_t (u32 , sa -> runnable_sum ,
3721+ sa -> runnable_avg * PELT_MIN_DIVIDER );
36893722
36903723 /*
36913724 * removed_runnable is the unweighted version of removed_load so we
@@ -3772,17 +3805,18 @@ static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
37723805 */
37733806static void detach_entity_load_avg (struct cfs_rq * cfs_rq , struct sched_entity * se )
37743807{
3775- /*
3776- * cfs_rq->avg.period_contrib can be used for both cfs_rq and se.
3777- * See ___update_load_avg() for details.
3778- */
3779- u32 divider = get_pelt_divider (& cfs_rq -> avg );
3780-
37813808 dequeue_load_avg (cfs_rq , se );
37823809 sub_positive (& cfs_rq -> avg .util_avg , se -> avg .util_avg );
3783- cfs_rq -> avg .util_sum = cfs_rq -> avg .util_avg * divider ;
3810+ sub_positive (& cfs_rq -> avg .util_sum , se -> avg .util_sum );
3811+ /* See update_cfs_rq_load_avg() */
3812+ cfs_rq -> avg .util_sum = max_t (u32 , cfs_rq -> avg .util_sum ,
3813+ cfs_rq -> avg .util_avg * PELT_MIN_DIVIDER );
3814+
37843815 sub_positive (& cfs_rq -> avg .runnable_avg , se -> avg .runnable_avg );
3785- cfs_rq -> avg .runnable_sum = cfs_rq -> avg .runnable_avg * divider ;
3816+ sub_positive (& cfs_rq -> avg .runnable_sum , se -> avg .runnable_sum );
3817+ /* See update_cfs_rq_load_avg() */
3818+ cfs_rq -> avg .runnable_sum = max_t (u32 , cfs_rq -> avg .runnable_sum ,
3819+ cfs_rq -> avg .runnable_avg * PELT_MIN_DIVIDER );
37863820
37873821 add_tg_cfs_propagate (cfs_rq , - se -> avg .load_sum );
37883822
@@ -8539,6 +8573,8 @@ group_type group_classify(unsigned int imbalance_pct,
85398573 *
85408574 * If @sg does not have SMT siblings, only pull tasks if all of the SMT siblings
85418575 * of @dst_cpu are idle and @sg has lower priority.
8576+ *
8577+ * Return: true if @dst_cpu can pull tasks, false otherwise.
85428578 */
85438579static bool asym_smt_can_pull_tasks (int dst_cpu , struct sd_lb_stats * sds ,
85448580 struct sg_lb_stats * sgs ,
@@ -8614,6 +8650,7 @@ sched_asym(struct lb_env *env, struct sd_lb_stats *sds, struct sg_lb_stats *sgs
86148650/**
86158651 * update_sg_lb_stats - Update sched_group's statistics for load balancing.
86168652 * @env: The load balancing environment.
8653+ * @sds: Load-balancing data with statistics of the local group.
86178654 * @group: sched_group whose statistics are to be updated.
86188655 * @sgs: variable to hold the statistics for this group.
86198656 * @sg_status: Holds flag indicating the status of the sched_group
@@ -9421,12 +9458,11 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
94219458/**
94229459 * find_busiest_group - Returns the busiest group within the sched_domain
94239460 * if there is an imbalance.
9461+ * @env: The load balancing environment.
94249462 *
94259463 * Also calculates the amount of runnable load which should be moved
94269464 * to restore balance.
94279465 *
9428- * @env: The load balancing environment.
9429- *
94309466 * Return: - The busiest group if imbalance exists.
94319467 */
94329468static struct sched_group * find_busiest_group (struct lb_env * env )
0 commit comments