Skip to content

Commit 0fe061b

Browse files
dennisszhouaxboe
authored andcommitted
blkcg: fix ref count issue with bio_blkcg() using task_css
The bio_blkcg() function turns out to be inconsistent and consequently dangerous to use. The first part returns a blkcg where a reference is owned by the bio meaning it does not need to be rcu protected. However, the third case, the last line, is problematic: return css_to_blkcg(task_css(current, io_cgrp_id)); This can race against task migration and the cgroup dying. It is also semantically different as it must be called rcu protected and is susceptible to failure when trying to get a reference to it. This patch adds association ahead of calling bio_blkcg() rather than after. This makes association a required and explicit step along the code paths for calling bio_blkcg(). In blk-iolatency, association is moved above the bio_blkcg() call to ensure it will not return %NULL. BFQ uses the old bio_blkcg() function, but I do not want to address it in this series due to the complexity. I have created a private version documenting the inconsistency and noting not to use it. Signed-off-by: Dennis Zhou <dennis@kernel.org> Acked-by: Tejun Heo <tj@kernel.org> Reviewed-by: Josef Bacik <josef@toxicpanda.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
1 parent 6e0de61 commit 0fe061b

File tree

5 files changed

+102
-14
lines changed

5 files changed

+102
-14
lines changed

block/bfq-cgroup.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -642,7 +642,7 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
642642
uint64_t serial_nr;
643643

644644
rcu_read_lock();
645-
serial_nr = bio_blkcg(bio)->css.serial_nr;
645+
serial_nr = __bio_blkcg(bio)->css.serial_nr;
646646

647647
/*
648648
* Check whether blkcg has changed. The condition may trigger
@@ -651,7 +651,7 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
651651
if (unlikely(!bfqd) || likely(bic->blkcg_serial_nr == serial_nr))
652652
goto out;
653653

654-
bfqg = __bfq_bic_change_cgroup(bfqd, bic, bio_blkcg(bio));
654+
bfqg = __bfq_bic_change_cgroup(bfqd, bic, __bio_blkcg(bio));
655655
/*
656656
* Update blkg_path for bfq_log_* functions. We cache this
657657
* path, and update it here, for the following

block/bfq-iosched.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4384,7 +4384,7 @@ static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,
43844384

43854385
rcu_read_lock();
43864386

4387-
bfqg = bfq_find_set_group(bfqd, bio_blkcg(bio));
4387+
bfqg = bfq_find_set_group(bfqd, __bio_blkcg(bio));
43884388
if (!bfqg) {
43894389
bfqq = &bfqd->oom_bfqq;
43904390
goto out;

block/bio.c

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1990,13 +1990,19 @@ int bio_associate_blkcg_from_page(struct bio *bio, struct page *page)
19901990
*
19911991
* This function takes an extra reference of @blkcg_css which will be put
19921992
* when @bio is released. The caller must own @bio and is responsible for
1993-
* synchronizing calls to this function.
1993+
* synchronizing calls to this function. If @blkcg_css is %NULL, a call to
1994+
* blkcg_get_css() finds the current css from the kthread or task.
19941995
*/
19951996
int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css)
19961997
{
19971998
if (unlikely(bio->bi_css))
19981999
return -EBUSY;
1999-
css_get(blkcg_css);
2000+
2001+
if (blkcg_css)
2002+
css_get(blkcg_css);
2003+
else
2004+
blkcg_css = blkcg_get_css();
2005+
20002006
bio->bi_css = blkcg_css;
20012007
return 0;
20022008
}

block/blk-iolatency.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -481,8 +481,8 @@ static void blkcg_iolatency_throttle(struct rq_qos *rqos, struct bio *bio)
481481
return;
482482

483483
rcu_read_lock();
484+
bio_associate_blkcg(bio, NULL);
484485
blkcg = bio_blkcg(bio);
485-
bio_associate_blkcg(bio, &blkcg->css);
486486
blkg = blkg_lookup(blkcg, q);
487487
if (unlikely(!blkg)) {
488488
spin_lock_irq(&q->queue_lock);

include/linux/blk-cgroup.h

Lines changed: 90 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -227,22 +227,103 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
227227
char *input, struct blkg_conf_ctx *ctx);
228228
void blkg_conf_finish(struct blkg_conf_ctx *ctx);
229229

230+
/**
231+
* blkcg_css - find the current css
232+
*
233+
* Find the css associated with either the kthread or the current task.
234+
* This may return a dying css, so it is up to the caller to use tryget logic
235+
* to confirm it is alive and well.
236+
*/
237+
static inline struct cgroup_subsys_state *blkcg_css(void)
238+
{
239+
struct cgroup_subsys_state *css;
240+
241+
css = kthread_blkcg();
242+
if (css)
243+
return css;
244+
return task_css(current, io_cgrp_id);
245+
}
246+
247+
/**
248+
* blkcg_get_css - find and get a reference to the css
249+
*
250+
* Find the css associated with either the kthread or the current task.
251+
* This takes a reference on the blkcg which will need to be managed by the
252+
* caller.
253+
*/
254+
static inline struct cgroup_subsys_state *blkcg_get_css(void)
255+
{
256+
struct cgroup_subsys_state *css;
257+
258+
rcu_read_lock();
259+
260+
css = kthread_blkcg();
261+
if (css) {
262+
css_get(css);
263+
} else {
264+
/*
265+
* This is a bit complicated. It is possible task_css() is
266+
* seeing an old css pointer here. This is caused by the
267+
* current thread migrating away from this cgroup and this
268+
* cgroup dying. css_tryget() will fail when trying to take a
269+
* ref on a cgroup that's ref count has hit 0.
270+
*
271+
* Therefore, if it does fail, this means current must have
272+
* been swapped away already and this is waiting for it to
273+
* propagate on the polling cpu. Hence the use of cpu_relax().
274+
*/
275+
while (true) {
276+
css = task_css(current, io_cgrp_id);
277+
if (likely(css_tryget(css)))
278+
break;
279+
cpu_relax();
280+
}
281+
}
282+
283+
rcu_read_unlock();
284+
285+
return css;
286+
}
230287

231288
static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css)
232289
{
233290
return css ? container_of(css, struct blkcg, css) : NULL;
234291
}
235292

236-
static inline struct blkcg *bio_blkcg(struct bio *bio)
293+
/**
294+
* __bio_blkcg - internal, inconsistent version to get blkcg
295+
*
296+
* DO NOT USE.
297+
* This function is inconsistent and consequently is dangerous to use. The
298+
* first part of the function returns a blkcg where a reference is owned by the
299+
* bio. This means it does not need to be rcu protected as it cannot go away
300+
* with the bio owning a reference to it. However, the latter potentially gets
301+
* it from task_css(). This can race against task migration and the cgroup
302+
* dying. It is also semantically different as it must be called rcu protected
303+
* and is susceptible to failure when trying to get a reference to it.
304+
* Therefore, it is not ok to assume that *_get() will always succeed on the
305+
* blkcg returned here.
306+
*/
307+
static inline struct blkcg *__bio_blkcg(struct bio *bio)
237308
{
238-
struct cgroup_subsys_state *css;
309+
if (bio && bio->bi_css)
310+
return css_to_blkcg(bio->bi_css);
311+
return css_to_blkcg(blkcg_css());
312+
}
239313

314+
/**
315+
* bio_blkcg - grab the blkcg associated with a bio
316+
* @bio: target bio
317+
*
318+
* This returns the blkcg associated with a bio, %NULL if not associated.
319+
* Callers are expected to either handle %NULL or know association has been
320+
* done prior to calling this.
321+
*/
322+
static inline struct blkcg *bio_blkcg(struct bio *bio)
323+
{
240324
if (bio && bio->bi_css)
241325
return css_to_blkcg(bio->bi_css);
242-
css = kthread_blkcg();
243-
if (css)
244-
return css_to_blkcg(css);
245-
return css_to_blkcg(task_css(current, io_cgrp_id));
326+
return NULL;
246327
}
247328

248329
static inline bool blk_cgroup_congested(void)
@@ -710,10 +791,10 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q,
710791
bool throtl = false;
711792

712793
rcu_read_lock();
713-
blkcg = bio_blkcg(bio);
714794

715795
/* associate blkcg if bio hasn't attached one */
716-
bio_associate_blkcg(bio, &blkcg->css);
796+
bio_associate_blkcg(bio, NULL);
797+
blkcg = bio_blkcg(bio);
717798

718799
blkg = blkg_lookup(blkcg, q);
719800
if (unlikely(!blkg)) {
@@ -835,6 +916,7 @@ static inline int blkcg_activate_policy(struct request_queue *q,
835916
static inline void blkcg_deactivate_policy(struct request_queue *q,
836917
const struct blkcg_policy *pol) { }
837918

919+
static inline struct blkcg *__bio_blkcg(struct bio *bio) { return NULL; }
838920
static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; }
839921

840922
static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,

0 commit comments

Comments
 (0)