Skip to content

Commit b6bb70f

Browse files
committed
Merge tag 'cgroup-for-5.20' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup updates from Tejun Heo: "Several core optimizations: - threadgroup_rwsem write locking is skipped when configuring controllers in empty subtrees. Combined with CLONE_INTO_CGROUP, this allows the common static usage pattern to not grab threadgroup_rwsem at all (glibc still doesn't seem ready for CLONE_INTO_CGROUP unfortunately). - threadgroup_rwsem used to be put into non-percpu mode by default due to latency concerns in specific use cases. There's no reason for everyone else to pay for it. Make the behavior optional. - psi no longer allocates memory when disabled. ... along with some code cleanups" * tag 'cgroup-for-5.20' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: cgroup: Skip subtree root in cgroup_update_dfl_csses() cgroup: remove "no" prefixed mount options cgroup: Make !percpu threadgroup_rwsem operations optional cgroup: Add "no" prefixed mount options cgroup: Elide write-locking threadgroup_rwsem when updating csses on an empty subtree cgroup.c: remove redundant check for mixable cgroup in cgroup_migrate_vet_dst cgroup.c: add helper __cset_cgroup_from_root to cleanup duplicated codes psi: dont alloc memory for psi by default
2 parents e2b5421 + 265792d commit b6bb70f

File tree

8 files changed

+151
-63
lines changed

8 files changed

+151
-63
lines changed

Documentation/admin-guide/cgroup-v2.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,14 @@ cgroup v2 currently supports the following mount options.
184184
ignored on non-init namespace mounts. Please refer to the
185185
Delegation section for details.
186186

187+
favordynmods
188+
Reduce the latencies of dynamic cgroup modifications such as
189+
task migrations and controller on/offs at the cost of making
190+
hot path operations such as forks and exits more expensive.
191+
The static usage pattern of creating a cgroup, enabling
192+
controllers, and then seeding it with CLONE_INTO_CGROUP is
193+
not affected by this option.
194+
187195
memory_localevents
188196
Only populate memory.events with data for the current cgroup,
189197
and not any subtrees. This is legacy behaviour, the default

include/linux/cgroup-defs.h

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -88,20 +88,33 @@ enum {
8888
*/
8989
CGRP_ROOT_NS_DELEGATE = (1 << 3),
9090

91+
/*
92+
* Reduce latencies on dynamic cgroup modifications such as task
93+
* migrations and controller on/offs by disabling percpu operation on
94+
* cgroup_threadgroup_rwsem. This makes hot path operations such as
95+
* forks and exits into the slow path and more expensive.
96+
*
97+
* The static usage pattern of creating a cgroup, enabling controllers,
98+
* and then seeding it with CLONE_INTO_CGROUP doesn't require write
99+
* locking cgroup_threadgroup_rwsem and thus doesn't benefit from
100+
* favordynmod.
101+
*/
102+
CGRP_ROOT_FAVOR_DYNMODS = (1 << 4),
103+
91104
/*
92105
* Enable cpuset controller in v1 cgroup to use v2 behavior.
93106
*/
94-
CGRP_ROOT_CPUSET_V2_MODE = (1 << 4),
107+
CGRP_ROOT_CPUSET_V2_MODE = (1 << 16),
95108

96109
/*
97110
* Enable legacy local memory.events.
98111
*/
99-
CGRP_ROOT_MEMORY_LOCAL_EVENTS = (1 << 5),
112+
CGRP_ROOT_MEMORY_LOCAL_EVENTS = (1 << 17),
100113

101114
/*
102115
* Enable recursive subtree protection
103116
*/
104-
CGRP_ROOT_MEMORY_RECURSIVE_PROT = (1 << 6),
117+
CGRP_ROOT_MEMORY_RECURSIVE_PROT = (1 << 18),
105118
};
106119

107120
/* cftype->flags */
@@ -480,7 +493,7 @@ struct cgroup {
480493
struct work_struct release_agent_work;
481494

482495
/* used to track pressure stalls */
483-
struct psi_group psi;
496+
struct psi_group *psi;
484497

485498
/* used to store eBPF programs */
486499
struct cgroup_bpf bpf;

include/linux/cgroup.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -674,7 +674,7 @@ static inline void pr_cont_cgroup_path(struct cgroup *cgrp)
674674

675675
static inline struct psi_group *cgroup_psi(struct cgroup *cgrp)
676676
{
677-
return &cgrp->psi;
677+
return cgrp->psi;
678678
}
679679

680680
bool cgroup_psi_enabled(void);

init/Kconfig

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -945,6 +945,16 @@ if CGROUPS
945945
config PAGE_COUNTER
946946
bool
947947

948+
config CGROUP_FAVOR_DYNMODS
949+
bool "Favor dynamic modification latency reduction by default"
950+
help
951+
This option enables the "favordynmods" mount option by default
952+
which reduces the latencies of dynamic cgroup modifications such
953+
as task migrations and controller on/offs at the cost of making
954+
hot path operations such as forks and exits more expensive.
955+
956+
Say N if unsure.
957+
948958
config MEMCG
949959
bool "Memory controller"
950960
select PAGE_COUNTER

kernel/cgroup/cgroup-internal.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,7 @@ void cgroup_kn_unlock(struct kernfs_node *kn);
233233
int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen,
234234
struct cgroup_namespace *ns);
235235

236+
void cgroup_favor_dynmods(struct cgroup_root *root, bool favor);
236237
void cgroup_free_root(struct cgroup_root *root);
237238
void init_cgroup_root(struct cgroup_fs_context *ctx);
238239
int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask);

kernel/cgroup/cgroup-v1.c

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -875,6 +875,8 @@ static int cgroup1_show_options(struct seq_file *seq, struct kernfs_root *kf_roo
875875
seq_puts(seq, ",xattr");
876876
if (root->flags & CGRP_ROOT_CPUSET_V2_MODE)
877877
seq_puts(seq, ",cpuset_v2_mode");
878+
if (root->flags & CGRP_ROOT_FAVOR_DYNMODS)
879+
seq_puts(seq, ",favordynmods");
878880

879881
spin_lock(&release_agent_path_lock);
880882
if (strlen(root->release_agent_path))
@@ -898,6 +900,8 @@ enum cgroup1_param {
898900
Opt_noprefix,
899901
Opt_release_agent,
900902
Opt_xattr,
903+
Opt_favordynmods,
904+
Opt_nofavordynmods,
901905
};
902906

903907
const struct fs_parameter_spec cgroup1_fs_parameters[] = {
@@ -909,6 +913,8 @@ const struct fs_parameter_spec cgroup1_fs_parameters[] = {
909913
fsparam_flag ("noprefix", Opt_noprefix),
910914
fsparam_string("release_agent", Opt_release_agent),
911915
fsparam_flag ("xattr", Opt_xattr),
916+
fsparam_flag ("favordynmods", Opt_favordynmods),
917+
fsparam_flag ("nofavordynmods", Opt_nofavordynmods),
912918
{}
913919
};
914920

@@ -960,6 +966,12 @@ int cgroup1_parse_param(struct fs_context *fc, struct fs_parameter *param)
960966
case Opt_xattr:
961967
ctx->flags |= CGRP_ROOT_XATTR;
962968
break;
969+
case Opt_favordynmods:
970+
ctx->flags |= CGRP_ROOT_FAVOR_DYNMODS;
971+
break;
972+
case Opt_nofavordynmods:
973+
ctx->flags &= ~CGRP_ROOT_FAVOR_DYNMODS;
974+
break;
963975
case Opt_release_agent:
964976
/* Specifying two release agents is forbidden */
965977
if (ctx->release_agent)
@@ -1211,8 +1223,11 @@ static int cgroup1_root_to_use(struct fs_context *fc)
12111223
init_cgroup_root(ctx);
12121224

12131225
ret = cgroup_setup_root(root, ctx->subsys_mask);
1214-
if (ret)
1226+
if (!ret)
1227+
cgroup_favor_dynmods(root, ctx->flags & CGRP_ROOT_FAVOR_DYNMODS);
1228+
else
12151229
cgroup_free_root(root);
1230+
12161231
return ret;
12171232
}
12181233

0 commit comments

Comments
 (0)