Skip to content

Commit 838f13b

Browse files
htejunaxboe
authored andcommitted
blkcg: allow blkcg_pol_mutex to be grabbed from cgroup [file] methods
blkcg_pol_mutex primarily protects the blkcg_policy array. It also protects cgroup file type [un]registration during policy addition / removal. This puts blkcg_pol_mutex outside cgroup internal synchronization and in turn makes it impossible to grab from blkcg's cgroup methods as that leads to cyclic dependency. Another problematic dependency arising from this is through cgroup interface file deactivation. Removing a cftype requires removing all files of the type which in turn involves draining all on-going invocations of the file methods. This means that an interface file implementation can't grab blkcg_pol_mutex as draining can lead to AA deadlock. blkcg_reset_stats() is already in this situation. It currently trylocks blkcg_pol_mutex and then unwinds and retries the whole operation on failure, which is cumbersome at best. It has a lengthy comment explaining how cgroup internal synchronization is involved and expected to be updated but as explained above this doesn't need cgroup internal locking to deadlock. It's a self-contained AA deadlock. The described circular dependencies can be easily broken by moving cftype [un]registration out of blkcg_pol_mutex and protect them with an outer mutex. This patch introduces blkcg_pol_register_mutex which wraps entire policy [un]registration including cftype operations and shrinks blkcg_pol_mutex critical section. This also makes the trylock dancing in blkcg_reset_stats() unnecessary. Removed. This patch is necessary for the following blkcg_policy_data allocation bug fixes. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Vivek Goyal <vgoyal@redhat.com> Cc: Arianna Avanzini <avanzini.arianna@gmail.com> Signed-off-by: Jens Axboe <axboe@fb.com>
1 parent a322baa commit 838f13b

File tree

1 file changed

+21
-19
lines changed

1 file changed

+21
-19
lines changed

block/blk-cgroup.c

Lines changed: 21 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,14 @@
2929

3030
#define MAX_KEY_LEN 100
3131

32+
/*
33+
* blkcg_pol_mutex protects blkcg_policy[] and policy [de]activation.
34+
* blkcg_pol_register_mutex nests outside of it and synchronizes entire
35+
* policy [un]register operations including cgroup file additions /
36+
* removals. Putting cgroup file registration outside blkcg_pol_mutex
37+
* allows grabbing it from cgroup callbacks.
38+
*/
39+
static DEFINE_MUTEX(blkcg_pol_register_mutex);
3240
static DEFINE_MUTEX(blkcg_pol_mutex);
3341

3442
struct blkcg blkcg_root;
@@ -453,20 +461,7 @@ static int blkcg_reset_stats(struct cgroup_subsys_state *css,
453461
struct blkcg_gq *blkg;
454462
int i;
455463

456-
/*
457-
* XXX: We invoke cgroup_add/rm_cftypes() under blkcg_pol_mutex
458-
* which ends up putting cgroup's internal cgroup_tree_mutex under
459-
* it; however, cgroup_tree_mutex is nested above cgroup file
460-
* active protection and grabbing blkcg_pol_mutex from a cgroup
461-
* file operation creates a possible circular dependency. cgroup
462-
* internal locking is planned to go through further simplification
463-
* and this issue should go away soon. For now, let's trylock
464-
* blkcg_pol_mutex and restart the write on failure.
465-
*
466-
* http://lkml.kernel.org/g/5363C04B.4010400@oracle.com
467-
*/
468-
if (!mutex_trylock(&blkcg_pol_mutex))
469-
return restart_syscall();
464+
mutex_lock(&blkcg_pol_mutex);
470465
spin_lock_irq(&blkcg->lock);
471466

472467
/*
@@ -1190,6 +1185,7 @@ int blkcg_policy_register(struct blkcg_policy *pol)
11901185
if (WARN_ON(pol->pd_size < sizeof(struct blkg_policy_data)))
11911186
return -EINVAL;
11921187

1188+
mutex_lock(&blkcg_pol_register_mutex);
11931189
mutex_lock(&blkcg_pol_mutex);
11941190

11951191
/* find an empty slot */
@@ -1198,19 +1194,23 @@ int blkcg_policy_register(struct blkcg_policy *pol)
11981194
if (!blkcg_policy[i])
11991195
break;
12001196
if (i >= BLKCG_MAX_POLS)
1201-
goto out_unlock;
1197+
goto err_unlock;
12021198

12031199
/* register and update blkgs */
12041200
pol->plid = i;
12051201
blkcg_policy[i] = pol;
1202+
mutex_unlock(&blkcg_pol_mutex);
12061203

12071204
/* everything is in place, add intf files for the new policy */
12081205
if (pol->cftypes)
12091206
WARN_ON(cgroup_add_legacy_cftypes(&blkio_cgrp_subsys,
12101207
pol->cftypes));
1211-
ret = 0;
1212-
out_unlock:
1208+
mutex_unlock(&blkcg_pol_register_mutex);
1209+
return 0;
1210+
1211+
err_unlock:
12131212
mutex_unlock(&blkcg_pol_mutex);
1213+
mutex_unlock(&blkcg_pol_register_mutex);
12141214
return ret;
12151215
}
12161216
EXPORT_SYMBOL_GPL(blkcg_policy_register);
@@ -1223,7 +1223,7 @@ EXPORT_SYMBOL_GPL(blkcg_policy_register);
12231223
*/
12241224
void blkcg_policy_unregister(struct blkcg_policy *pol)
12251225
{
1226-
mutex_lock(&blkcg_pol_mutex);
1226+
mutex_lock(&blkcg_pol_register_mutex);
12271227

12281228
if (WARN_ON(blkcg_policy[pol->plid] != pol))
12291229
goto out_unlock;
@@ -1233,8 +1233,10 @@ void blkcg_policy_unregister(struct blkcg_policy *pol)
12331233
cgroup_rm_cftypes(pol->cftypes);
12341234

12351235
/* unregister and update blkgs */
1236+
mutex_lock(&blkcg_pol_mutex);
12361237
blkcg_policy[pol->plid] = NULL;
1237-
out_unlock:
12381238
mutex_unlock(&blkcg_pol_mutex);
1239+
out_unlock:
1240+
mutex_unlock(&blkcg_pol_register_mutex);
12391241
}
12401242
EXPORT_SYMBOL_GPL(blkcg_policy_unregister);

0 commit comments

Comments
 (0)