Skip to content

Commit 06b285b

Browse files
htejunaxboe
authored andcommitted
blkcg: fix blkcg_policy_data allocation bug
e48453c ("block, cgroup: implement policy-specific per-blkcg data") updated per-blkcg policy data to be dynamically allocated. When a policy is registered, its policy data aren't created. Instead, when the policy is activated on a queue, the policy data are allocated if there are blkg's (blkcg_gq's) which are attached to a given blkcg. This is buggy. Consider the following scenario. 1. A blkcg is created. No blkg's attached yet. 2. The policy is registered. No policy data is allocated. 3. The policy is activated on a queue. As the above blkcg doesn't have any blkg's, it won't allocate the matching blkcg_policy_data. 4. An IO is issued from the blkcg and blkg is created and the blkcg still doesn't have the matching policy data allocated. With cfq-iosched, this leads to an oops. It also doesn't free policy data on policy unregistration assuming that freeing of all policy data on blkcg destruction should take care of it; however, this also is incorrect. 1. A blkcg has policy data. 2. The policy gets unregistered but the policy data remains. 3. Another policy gets registered on the same slot. 4. Later, the new policy tries to allocate policy data on the previous blkcg but the slot is already occupied and gets skipped. The policy ends up operating on the policy data of the previous policy. There's no reason to manage blkcg_policy_data lazily. The reason we do lazy allocation of blkg's is that the number of all possible blkg's is the product of cgroups and block devices which can reach a surprising level. blkcg_policy_data is contrained by the number of cgroups and shouldn't be a problem. This patch makes blkcg_policy_data to be allocated for all existing blkcg's on policy registration and freed on unregistration and removes blkcg_policy_data handling from policy [de]activation paths. This makes that blkcg_policy_data are created and removed with the policy they belong to and fixes the above described problems. Signed-off-by: Tejun Heo <tj@kernel.org> Fixes: e48453c ("block, cgroup: implement policy-specific per-blkcg data") Cc: Vivek Goyal <vgoyal@redhat.com> Cc: Arianna Avanzini <avanzini.arianna@gmail.com> Signed-off-by: Jens Axboe <axboe@fb.com>
1 parent 7876f93 commit 06b285b

File tree

2 files changed

+44
-44
lines changed

2 files changed

+44
-44
lines changed

block/blk-cgroup.c

Lines changed: 42 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1048,10 +1048,8 @@ int blkcg_activate_policy(struct request_queue *q,
10481048
const struct blkcg_policy *pol)
10491049
{
10501050
LIST_HEAD(pds);
1051-
LIST_HEAD(cpds);
10521051
struct blkcg_gq *blkg;
10531052
struct blkg_policy_data *pd, *nd;
1054-
struct blkcg_policy_data *cpd, *cnd;
10551053
int cnt = 0, ret;
10561054

10571055
if (blkcg_policy_enabled(q, pol))
@@ -1064,26 +1062,14 @@ int blkcg_activate_policy(struct request_queue *q,
10641062
cnt++;
10651063
spin_unlock_irq(q->queue_lock);
10661064

1067-
/*
1068-
* Allocate per-blkg and per-blkcg policy data
1069-
* for all existing blkgs.
1070-
*/
1065+
/* allocate per-blkg policy data for all existing blkgs */
10711066
while (cnt--) {
10721067
pd = kzalloc_node(pol->pd_size, GFP_KERNEL, q->node);
10731068
if (!pd) {
10741069
ret = -ENOMEM;
10751070
goto out_free;
10761071
}
10771072
list_add_tail(&pd->alloc_node, &pds);
1078-
1079-
if (!pol->cpd_size)
1080-
continue;
1081-
cpd = kzalloc_node(pol->cpd_size, GFP_KERNEL, q->node);
1082-
if (!cpd) {
1083-
ret = -ENOMEM;
1084-
goto out_free;
1085-
}
1086-
list_add_tail(&cpd->alloc_node, &cpds);
10871073
}
10881074

10891075
/*
@@ -1093,32 +1079,17 @@ int blkcg_activate_policy(struct request_queue *q,
10931079
spin_lock_irq(q->queue_lock);
10941080

10951081
list_for_each_entry(blkg, &q->blkg_list, q_node) {
1096-
if (WARN_ON(list_empty(&pds)) ||
1097-
WARN_ON(pol->cpd_size && list_empty(&cpds))) {
1082+
if (WARN_ON(list_empty(&pds))) {
10981083
/* umm... this shouldn't happen, just abort */
10991084
ret = -ENOMEM;
11001085
goto out_unlock;
11011086
}
1102-
cpd = list_first_entry(&cpds, struct blkcg_policy_data,
1103-
alloc_node);
1104-
list_del_init(&cpd->alloc_node);
11051087
pd = list_first_entry(&pds, struct blkg_policy_data, alloc_node);
11061088
list_del_init(&pd->alloc_node);
11071089

11081090
/* grab blkcg lock too while installing @pd on @blkg */
11091091
spin_lock(&blkg->blkcg->lock);
11101092

1111-
if (!pol->cpd_size)
1112-
goto no_cpd;
1113-
if (!blkg->blkcg->pd[pol->plid]) {
1114-
/* Per-policy per-blkcg data */
1115-
blkg->blkcg->pd[pol->plid] = cpd;
1116-
cpd->plid = pol->plid;
1117-
pol->cpd_init_fn(blkg->blkcg);
1118-
} else { /* must free it as it has already been extracted */
1119-
kfree(cpd);
1120-
}
1121-
no_cpd:
11221093
blkg->pd[pol->plid] = pd;
11231094
pd->blkg = blkg;
11241095
pd->plid = pol->plid;
@@ -1135,8 +1106,6 @@ int blkcg_activate_policy(struct request_queue *q,
11351106
blk_queue_bypass_end(q);
11361107
list_for_each_entry_safe(pd, nd, &pds, alloc_node)
11371108
kfree(pd);
1138-
list_for_each_entry_safe(cpd, cnd, &cpds, alloc_node)
1139-
kfree(cpd);
11401109
return ret;
11411110
}
11421111
EXPORT_SYMBOL_GPL(blkcg_activate_policy);
@@ -1191,6 +1160,7 @@ EXPORT_SYMBOL_GPL(blkcg_deactivate_policy);
11911160
*/
11921161
int blkcg_policy_register(struct blkcg_policy *pol)
11931162
{
1163+
struct blkcg *blkcg;
11941164
int i, ret;
11951165

11961166
if (WARN_ON(pol->pd_size < sizeof(struct blkg_policy_data)))
@@ -1207,9 +1177,27 @@ int blkcg_policy_register(struct blkcg_policy *pol)
12071177
if (i >= BLKCG_MAX_POLS)
12081178
goto err_unlock;
12091179

1210-
/* register and update blkgs */
1180+
/* register @pol */
12111181
pol->plid = i;
1212-
blkcg_policy[i] = pol;
1182+
blkcg_policy[pol->plid] = pol;
1183+
1184+
/* allocate and install cpd's */
1185+
if (pol->cpd_size) {
1186+
list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node) {
1187+
struct blkcg_policy_data *cpd;
1188+
1189+
cpd = kzalloc(pol->cpd_size, GFP_KERNEL);
1190+
if (!cpd) {
1191+
mutex_unlock(&blkcg_pol_mutex);
1192+
goto err_free_cpds;
1193+
}
1194+
1195+
blkcg->pd[pol->plid] = cpd;
1196+
cpd->plid = pol->plid;
1197+
pol->cpd_init_fn(blkcg);
1198+
}
1199+
}
1200+
12131201
mutex_unlock(&blkcg_pol_mutex);
12141202

12151203
/* everything is in place, add intf files for the new policy */
@@ -1219,6 +1207,14 @@ int blkcg_policy_register(struct blkcg_policy *pol)
12191207
mutex_unlock(&blkcg_pol_register_mutex);
12201208
return 0;
12211209

1210+
err_free_cpds:
1211+
if (pol->cpd_size) {
1212+
list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node) {
1213+
kfree(blkcg->pd[pol->plid]);
1214+
blkcg->pd[pol->plid] = NULL;
1215+
}
1216+
}
1217+
blkcg_policy[pol->plid] = NULL;
12221218
err_unlock:
12231219
mutex_unlock(&blkcg_pol_mutex);
12241220
mutex_unlock(&blkcg_pol_register_mutex);
@@ -1234,6 +1230,8 @@ EXPORT_SYMBOL_GPL(blkcg_policy_register);
12341230
*/
12351231
void blkcg_policy_unregister(struct blkcg_policy *pol)
12361232
{
1233+
struct blkcg *blkcg;
1234+
12371235
mutex_lock(&blkcg_pol_register_mutex);
12381236

12391237
if (WARN_ON(blkcg_policy[pol->plid] != pol))
@@ -1243,9 +1241,17 @@ void blkcg_policy_unregister(struct blkcg_policy *pol)
12431241
if (pol->cftypes)
12441242
cgroup_rm_cftypes(pol->cftypes);
12451243

1246-
/* unregister and update blkgs */
1244+
/* remove cpds and unregister */
12471245
mutex_lock(&blkcg_pol_mutex);
1246+
1247+
if (pol->cpd_size) {
1248+
list_for_each_entry(blkcg, &all_blkcgs, all_blkcgs_node) {
1249+
kfree(blkcg->pd[pol->plid]);
1250+
blkcg->pd[pol->plid] = NULL;
1251+
}
1252+
}
12481253
blkcg_policy[pol->plid] = NULL;
1254+
12491255
mutex_unlock(&blkcg_pol_mutex);
12501256
out_unlock:
12511257
mutex_unlock(&blkcg_pol_register_mutex);

include/linux/blk-cgroup.h

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -89,18 +89,12 @@ struct blkg_policy_data {
8989
* Policies that need to keep per-blkcg data which is independent
9090
* from any request_queue associated to it must specify its size
9191
* with the cpd_size field of the blkcg_policy structure and
92-
* embed a blkcg_policy_data in it. blkcg core allocates
93-
* policy-specific per-blkcg structures lazily the first time
94-
* they are actually needed, so it handles them together with
95-
* blkgs. cpd_init() is invoked to let each policy handle
96-
* per-blkcg data.
92+
* embed a blkcg_policy_data in it. cpd_init() is invoked to let
93+
* each policy handle per-blkcg data.
9794
*/
9895
struct blkcg_policy_data {
9996
/* the policy id this per-policy data belongs to */
10097
int plid;
101-
102-
/* used during policy activation */
103-
struct list_head alloc_node;
10498
};
10599

106100
/* association between a blk cgroup and a request queue */

0 commit comments

Comments
 (0)