Skip to content

Commit 6a010a4

Browse files
committed
cgroup: Make !percpu threadgroup_rwsem operations optional
3942a9b ("locking, rcu, cgroup: Avoid synchronize_sched() in __cgroup_procs_write()") disabled percpu operations on threadgroup_rwsem because the impiled synchronize_rcu() on write locking was pushing up the latencies too much for android which constantly moves processes between cgroups. This makes the hotter paths - fork and exit - slower as they're always forced into the slow path. There is no reason to force this on everyone especially given that more common static usage pattern can now completely avoid write-locking the rwsem. Write-locking is elided when turning on and off controllers on empty sub-trees and CLONE_INTO_CGROUP enables seeding a cgroup without grabbing the rwsem. Restore the default percpu operations and introduce the mount option "favordynmods" and config option CGROUP_FAVOR_DYNMODS for users who need lower latencies for the dynamic operations. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Christian Brauner <brauner@kernel.org> Cc: Michal Koutn� <mkoutny@suse.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: John Stultz <john.stultz@linaro.org> Cc: Dmitry Shmidt <dimitrysh@google.com> Cc: Oleg Nesterov <oleg@redhat.com>
1 parent 3031273 commit 6a010a4

File tree

6 files changed

+87
-11
lines changed

6 files changed

+87
-11
lines changed

Documentation/admin-guide/cgroup-v2.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,14 @@ cgroup v2 currently supports the following mount options.
184184
ignored on non-init namespace mounts. Please refer to the
185185
Delegation section for details.
186186

187+
[no]favordynmods
188+
Reduce the latencies of dynamic cgroup modifications such as
189+
task migrations and controller on/offs at the cost of making
190+
hot path operations such as forks and exits more expensive.
191+
The static usage pattern of creating a cgroup, enabling
192+
controllers, and then seeding it with CLONE_INTO_CGROUP is
193+
not affected by this option.
194+
187195
memory_[no]localevents
188196
Only populate memory.events with data for the current cgroup,
189197
and not any subtrees. This is legacy behaviour, the default

include/linux/cgroup-defs.h

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -88,20 +88,33 @@ enum {
8888
*/
8989
CGRP_ROOT_NS_DELEGATE = (1 << 3),
9090

91+
/*
92+
* Reduce latencies on dynamic cgroup modifications such as task
93+
* migrations and controller on/offs by disabling percpu operation on
94+
* cgroup_threadgroup_rwsem. This makes hot path operations such as
95+
* forks and exits into the slow path and more expensive.
96+
*
97+
* The static usage pattern of creating a cgroup, enabling controllers,
98+
* and then seeding it with CLONE_INTO_CGROUP doesn't require write
99+
* locking cgroup_threadgroup_rwsem and thus doesn't benefit from
100+
* favordynmod.
101+
*/
102+
CGRP_ROOT_FAVOR_DYNMODS = (1 << 4),
103+
91104
/*
92105
* Enable cpuset controller in v1 cgroup to use v2 behavior.
93106
*/
94-
CGRP_ROOT_CPUSET_V2_MODE = (1 << 4),
107+
CGRP_ROOT_CPUSET_V2_MODE = (1 << 16),
95108

96109
/*
97110
* Enable legacy local memory.events.
98111
*/
99-
CGRP_ROOT_MEMORY_LOCAL_EVENTS = (1 << 5),
112+
CGRP_ROOT_MEMORY_LOCAL_EVENTS = (1 << 17),
100113

101114
/*
102115
* Enable recursive subtree protection
103116
*/
104-
CGRP_ROOT_MEMORY_RECURSIVE_PROT = (1 << 6),
117+
CGRP_ROOT_MEMORY_RECURSIVE_PROT = (1 << 18),
105118
};
106119

107120
/* cftype->flags */

init/Kconfig

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -936,6 +936,16 @@ if CGROUPS
936936
config PAGE_COUNTER
937937
bool
938938

939+
config CGROUP_FAVOR_DYNMODS
940+
bool "Favor dynamic modification latency reduction by default"
941+
help
942+
This option enables the "favordynmods" mount option by default
943+
which reduces the latencies of dynamic cgroup modifications such
944+
as task migrations and controller on/offs at the cost of making
945+
hot path operations such as forks and exits more expensive.
946+
947+
Say N if unsure.
948+
939949
config MEMCG
940950
bool "Memory controller"
941951
select PAGE_COUNTER

kernel/cgroup/cgroup-internal.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,7 @@ void cgroup_kn_unlock(struct kernfs_node *kn);
233233
int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen,
234234
struct cgroup_namespace *ns);
235235

236+
void cgroup_favor_dynmods(struct cgroup_root *root, bool favor);
236237
void cgroup_free_root(struct cgroup_root *root);
237238
void init_cgroup_root(struct cgroup_fs_context *ctx);
238239
int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask);

kernel/cgroup/cgroup-v1.c

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -875,6 +875,8 @@ static int cgroup1_show_options(struct seq_file *seq, struct kernfs_root *kf_roo
875875
seq_puts(seq, ",xattr");
876876
if (root->flags & CGRP_ROOT_CPUSET_V2_MODE)
877877
seq_puts(seq, ",cpuset_v2_mode");
878+
if (root->flags & CGRP_ROOT_FAVOR_DYNMODS)
879+
seq_puts(seq, ",favordynmods");
878880

879881
spin_lock(&release_agent_path_lock);
880882
if (strlen(root->release_agent_path))
@@ -898,6 +900,8 @@ enum cgroup1_param {
898900
Opt_noprefix,
899901
Opt_release_agent,
900902
Opt_xattr,
903+
Opt_favordynmods,
904+
Opt_nofavordynmods,
901905
};
902906

903907
const struct fs_parameter_spec cgroup1_fs_parameters[] = {
@@ -909,6 +913,8 @@ const struct fs_parameter_spec cgroup1_fs_parameters[] = {
909913
fsparam_flag ("noprefix", Opt_noprefix),
910914
fsparam_string("release_agent", Opt_release_agent),
911915
fsparam_flag ("xattr", Opt_xattr),
916+
fsparam_flag ("favordynmods", Opt_favordynmods),
917+
fsparam_flag ("nofavordynmods", Opt_nofavordynmods),
912918
{}
913919
};
914920

@@ -960,6 +966,12 @@ int cgroup1_parse_param(struct fs_context *fc, struct fs_parameter *param)
960966
case Opt_xattr:
961967
ctx->flags |= CGRP_ROOT_XATTR;
962968
break;
969+
case Opt_favordynmods:
970+
ctx->flags |= CGRP_ROOT_FAVOR_DYNMODS;
971+
break;
972+
case Opt_nofavordynmods:
973+
ctx->flags &= ~CGRP_ROOT_FAVOR_DYNMODS;
974+
break;
963975
case Opt_release_agent:
964976
/* Specifying two release agents is forbidden */
965977
if (ctx->release_agent)
@@ -1211,8 +1223,11 @@ static int cgroup1_root_to_use(struct fs_context *fc)
12111223
init_cgroup_root(ctx);
12121224

12131225
ret = cgroup_setup_root(root, ctx->subsys_mask);
1214-
if (ret)
1226+
if (!ret)
1227+
cgroup_favor_dynmods(root, ctx->flags & CGRP_ROOT_FAVOR_DYNMODS);
1228+
else
12151229
cgroup_free_root(root);
1230+
12161231
return ret;
12171232
}
12181233

kernel/cgroup/cgroup.c

Lines changed: 36 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1305,6 +1305,20 @@ struct cgroup_root *cgroup_root_from_kf(struct kernfs_root *kf_root)
13051305
return root_cgrp->root;
13061306
}
13071307

1308+
void cgroup_favor_dynmods(struct cgroup_root *root, bool favor)
1309+
{
1310+
bool favoring = root->flags & CGRP_ROOT_FAVOR_DYNMODS;
1311+
1312+
/* see the comment above CGRP_ROOT_FAVOR_DYNMODS definition */
1313+
if (favor && !favoring) {
1314+
rcu_sync_enter(&cgroup_threadgroup_rwsem.rss);
1315+
root->flags |= CGRP_ROOT_FAVOR_DYNMODS;
1316+
} else if (!favor && favoring) {
1317+
rcu_sync_exit(&cgroup_threadgroup_rwsem.rss);
1318+
root->flags &= ~CGRP_ROOT_FAVOR_DYNMODS;
1319+
}
1320+
}
1321+
13081322
static int cgroup_init_root_id(struct cgroup_root *root)
13091323
{
13101324
int id;
@@ -1365,6 +1379,7 @@ static void cgroup_destroy_root(struct cgroup_root *root)
13651379
cgroup_root_count--;
13661380
}
13671381

1382+
cgroup_favor_dynmods(root, false);
13681383
cgroup_exit_root_id(root);
13691384

13701385
mutex_unlock(&cgroup_mutex);
@@ -1858,6 +1873,7 @@ int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node,
18581873

18591874
enum cgroup2_param {
18601875
Opt_nsdelegate, Opt_nonsdelegate,
1876+
Opt_favordynmods, Opt_nofavordynmods,
18611877
Opt_memory_localevents, Opt_memory_nolocalevents,
18621878
Opt_memory_recursiveprot, Opt_memory_norecursiveprot,
18631879
nr__cgroup2_params
@@ -1866,6 +1882,8 @@ enum cgroup2_param {
18661882
static const struct fs_parameter_spec cgroup2_fs_parameters[] = {
18671883
fsparam_flag("nsdelegate", Opt_nsdelegate),
18681884
fsparam_flag("nonsdelegate", Opt_nonsdelegate),
1885+
fsparam_flag("favordynmods", Opt_favordynmods),
1886+
fsparam_flag("nofavordynmods", Opt_nofavordynmods),
18691887
fsparam_flag("memory_localevents", Opt_memory_localevents),
18701888
fsparam_flag("memory_nolocalevents", Opt_memory_nolocalevents),
18711889
fsparam_flag("memory_recursiveprot", Opt_memory_recursiveprot),
@@ -1890,6 +1908,12 @@ static int cgroup2_parse_param(struct fs_context *fc, struct fs_parameter *param
18901908
case Opt_nonsdelegate:
18911909
ctx->flags &= ~CGRP_ROOT_NS_DELEGATE;
18921910
return 0;
1911+
case Opt_favordynmods:
1912+
ctx->flags |= CGRP_ROOT_FAVOR_DYNMODS;
1913+
return 0;
1914+
case Opt_nofavordynmods:
1915+
ctx->flags &= ~CGRP_ROOT_FAVOR_DYNMODS;
1916+
return 0;
18931917
case Opt_memory_localevents:
18941918
ctx->flags |= CGRP_ROOT_MEMORY_LOCAL_EVENTS;
18951919
return 0;
@@ -1914,6 +1938,9 @@ static void apply_cgroup_root_flags(unsigned int root_flags)
19141938
else
19151939
cgrp_dfl_root.flags &= ~CGRP_ROOT_NS_DELEGATE;
19161940

1941+
cgroup_favor_dynmods(&cgrp_dfl_root,
1942+
root_flags & CGRP_ROOT_FAVOR_DYNMODS);
1943+
19171944
if (root_flags & CGRP_ROOT_MEMORY_LOCAL_EVENTS)
19181945
cgrp_dfl_root.flags |= CGRP_ROOT_MEMORY_LOCAL_EVENTS;
19191946
else
@@ -1930,6 +1957,8 @@ static int cgroup_show_options(struct seq_file *seq, struct kernfs_root *kf_root
19301957
{
19311958
if (cgrp_dfl_root.flags & CGRP_ROOT_NS_DELEGATE)
19321959
seq_puts(seq, ",nsdelegate");
1960+
if (cgrp_dfl_root.flags & CGRP_ROOT_FAVOR_DYNMODS)
1961+
seq_puts(seq, ",favordynmods");
19331962
if (cgrp_dfl_root.flags & CGRP_ROOT_MEMORY_LOCAL_EVENTS)
19341963
seq_puts(seq, ",memory_localevents");
19351964
if (cgrp_dfl_root.flags & CGRP_ROOT_MEMORY_RECURSIVE_PROT)
@@ -1980,7 +2009,8 @@ void init_cgroup_root(struct cgroup_fs_context *ctx)
19802009
cgrp->root = root;
19812010
init_cgroup_housekeeping(cgrp);
19822011

1983-
root->flags = ctx->flags;
2012+
/* DYNMODS must be modified through cgroup_favor_dynmods() */
2013+
root->flags = ctx->flags & ~CGRP_ROOT_FAVOR_DYNMODS;
19842014
if (ctx->release_agent)
19852015
strscpy(root->release_agent_path, ctx->release_agent, PATH_MAX);
19862016
if (ctx->name)
@@ -2202,6 +2232,10 @@ static int cgroup_init_fs_context(struct fs_context *fc)
22022232
put_user_ns(fc->user_ns);
22032233
fc->user_ns = get_user_ns(ctx->ns->user_ns);
22042234
fc->global = true;
2235+
2236+
#ifdef CONFIG_CGROUP_FAVOR_DYNMODS
2237+
ctx->flags |= CGRP_ROOT_FAVOR_DYNMODS;
2238+
#endif
22052239
return 0;
22062240
}
22072241

@@ -5854,12 +5888,6 @@ int __init cgroup_init(void)
58545888

58555889
cgroup_rstat_boot();
58565890

5857-
/*
5858-
* The latency of the synchronize_rcu() is too high for cgroups,
5859-
* avoid it at the cost of forcing all readers into the slow path.
5860-
*/
5861-
rcu_sync_enter_start(&cgroup_threadgroup_rwsem.rss);
5862-
58635891
get_user_ns(init_cgroup_ns.user_ns);
58645892

58655893
mutex_lock(&cgroup_mutex);
@@ -6771,6 +6799,7 @@ static ssize_t features_show(struct kobject *kobj, struct kobj_attribute *attr,
67716799
{
67726800
return snprintf(buf, PAGE_SIZE,
67736801
"nsdelegate\n"
6802+
"favordynmods\n"
67746803
"memory_localevents\n"
67756804
"memory_recursiveprot\n");
67766805
}

0 commit comments

Comments
 (0)