Skip to content

Commit

Permalink
sched/fair: Scan cluster before scanning LLC in wake-up path
Browse files Browse the repository at this point in the history
For platforms having clusters like Kunpeng920, CPUs within the same cluster
have lower latency when synchronizing and accessing shared resources like
cache. Thus, this patch tries to find an idle cpu within the cluster of the
target CPU before scanning the whole LLC to gain lower latency.

Note neither Kunpeng920 nor x86 Jacobsville supports SMT, so this patch
doesn't consider SMT for this moment.

Testing has been done on Kunpeng920 by pinning tasks to one numa and two
numa. On Kunpeng920, Each numa has 8 clusters and each cluster has 4 CPUs.

With this patch, We noticed enhancement on tbench within one numa or cross
two numa.

On numa 0:
                            5.19-rc1                patched
Hmean     1        350.27 (   0.00%)      406.88 *  16.16%*
Hmean     2        702.01 (   0.00%)      808.22 *  15.13%*
Hmean     4       1405.14 (   0.00%)     1614.34 *  14.89%*
Hmean     8       2830.53 (   0.00%)     3169.02 *  11.96%*
Hmean     16      5597.95 (   0.00%)     6224.20 *  11.19%*
Hmean     32     10537.38 (   0.00%)    10524.97 *  -0.12%*
Hmean     64      8366.04 (   0.00%)     8437.41 *   0.85%*
Hmean     128     7060.87 (   0.00%)     7150.25 *   1.27%*

On numa 0-1:
                            5.19-rc1                patched
Hmean     1        346.11 (   0.00%)      408.47 *  18.02%*
Hmean     2        693.34 (   0.00%)      805.78 *  16.22%*
Hmean     4       1384.96 (   0.00%)     1602.49 *  15.71%*
Hmean     8       2699.45 (   0.00%)     3069.98 *  13.73%*
Hmean     16      5327.11 (   0.00%)     5688.19 *   6.78%*
Hmean     32     10019.10 (   0.00%)    11862.56 *  18.40%*
Hmean     64     13850.57 (   0.00%)    17748.54 *  28.14%*
Hmean     128    12498.25 (   0.00%)    15541.59 *  24.35%*
Hmean     256    11195.77 (   0.00%)    13854.06 *  23.74%*

Tested-by: Yicong Yang <yangyicong@hisilicon.com>
Signed-off-by: Barry Song <song.bao.hua@hisilicon.com>
Signed-off-by: Yicong Yang <yangyicong@hisilicon.com>
  • Loading branch information
Barry Song authored and intel-lab-lkp committed Jun 8, 2022
1 parent f0e3011 commit f2b15e8
Showing 1 changed file with 40 additions and 3 deletions.
43 changes: 40 additions & 3 deletions kernel/sched/fair.c
Expand Up @@ -6297,6 +6297,39 @@ static inline int select_idle_smt(struct task_struct *p, struct sched_domain *sd

#endif /* CONFIG_SCHED_SMT */

#ifdef CONFIG_SCHED_CLUSTER
/*
* Scan the cluster domain for idle CPUs and clear cluster cpumask after scanning
*/
static inline int scan_cluster(struct task_struct *p, struct cpumask *cpus,
int target, int *nr)
{
struct sched_domain *sd = rcu_dereference(per_cpu(sd_cluster, target));
int cpu, idle_cpu;

/* TODO: Support SMT system with cluster topology */
if (!sched_smt_active() && sd) {
for_each_cpu_and(cpu, cpus, sched_domain_span(sd)) {
if (!--*nr)
break;

idle_cpu = __select_idle_cpu(cpu, p);
if ((unsigned int)idle_cpu < nr_cpumask_bits)
return idle_cpu;
}

cpumask_andnot(cpus, cpus, sched_domain_span(sd));
}

return -1;
}
#else
static inline int scan_cluster(struct task_struct *p, int prev_cpu, int target, int *nr)
{
return -1;
}
#endif

/*
* Scan the LLC domain for idle CPUs; this is dynamically regulated by
* comparing the average scan cost (tracked in sd->avg_scan_cost) against the
Expand Down Expand Up @@ -6345,14 +6378,18 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool
time = cpu_clock(this);
}

idle_cpu = scan_cluster(p, cpus, target, &nr);
if ((unsigned int)idle_cpu < nr_cpumask_bits)
return idle_cpu;

for_each_cpu_wrap(cpu, cpus, target + 1) {
if (has_idle_core) {
i = select_idle_core(p, cpu, cpus, &idle_cpu);
if ((unsigned int)i < nr_cpumask_bits)
return i;

} else {
if (!--nr)
if (--nr <= 0)
return -1;
idle_cpu = __select_idle_cpu(cpu, p);
if ((unsigned int)idle_cpu < nr_cpumask_bits)
Expand Down Expand Up @@ -6451,7 +6488,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
/*
* If the previous CPU is cache affine and idle, don't be stupid:
*/
if (prev != target && cpus_share_cache(prev, target) &&
if (prev != target && cpus_share_resources(prev, target) &&
(available_idle_cpu(prev) || sched_idle_cpu(prev)) &&
asym_fits_capacity(task_util, prev))
return prev;
Expand All @@ -6477,7 +6514,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
p->recent_used_cpu = prev;
if (recent_used_cpu != prev &&
recent_used_cpu != target &&
cpus_share_cache(recent_used_cpu, target) &&
cpus_share_resources(recent_used_cpu, target) &&
(available_idle_cpu(recent_used_cpu) || sched_idle_cpu(recent_used_cpu)) &&
cpumask_test_cpu(p->recent_used_cpu, p->cpus_ptr) &&
asym_fits_capacity(task_util, recent_used_cpu)) {
Expand Down

0 comments on commit f2b15e8

Please sign in to comment.