Skip to content

Commit

Permalink
io_uring: fix REQ_F_COMP_LOCKED by killing it
Browse files Browse the repository at this point in the history
ANBZ: torvalds#501

commit 216578e upstream.

REQ_F_COMP_LOCKED is used and implemented in a buggy way. The problem is
that the flag is set before io_put_req() but not cleared after, and if
that wasn't the final reference, the request will be freed with the flag
set from some other context, which may not hold a spinlock. That means
possible races with removing linked timeouts and unsynchronised
completion (e.g. access to CQ).

Instead of fixing REQ_F_COMP_LOCKED, kill the flag and use
task_work_add() to move such requests to a fresh context to free from
it, as was done with __io_free_req_finish().

[ANCK backport notes]
The core idea is defer req put into a task work. So fix code conflicts
based on it.
BTW, adjust __io_free_req() a bit.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Joseph Qi <joseph.qi@linux.alibaba.com>
Reviewed-by: Xiaoguang Wang <xiaoguang.wang@linux.alibaba.com>
  • Loading branch information
isilence authored and josephhz committed Feb 25, 2022
1 parent be4dadc commit ed8ec78
Showing 1 changed file with 58 additions and 75 deletions.
133 changes: 58 additions & 75 deletions fs/io_uring.c
Original file line number Diff line number Diff line change
Expand Up @@ -569,7 +569,6 @@ enum {
REQ_F_TIMEOUT_BIT,
REQ_F_ISREG_BIT,
REQ_F_TIMEOUT_NOSEQ_BIT,
REQ_F_COMP_LOCKED_BIT,
REQ_F_NEED_CLEANUP_BIT,
REQ_F_OVERFLOW_BIT,
REQ_F_POLLED_BIT,
Expand Down Expand Up @@ -617,8 +616,6 @@ enum {
REQ_F_ISREG = BIT(REQ_F_ISREG_BIT),
/* no timeout sequence */
REQ_F_TIMEOUT_NOSEQ = BIT(REQ_F_TIMEOUT_NOSEQ_BIT),
/* completion under lock */
REQ_F_COMP_LOCKED = BIT(REQ_F_COMP_LOCKED_BIT),
/* needs cleanup */
REQ_F_NEED_CLEANUP = BIT(REQ_F_NEED_CLEANUP_BIT),
/* in overflow list */
Expand Down Expand Up @@ -921,7 +918,8 @@ static const struct io_op_def io_op_defs[] = {

static void io_cqring_fill_event(struct io_kiocb *req, long res);
static void io_put_req(struct io_kiocb *req);
static void __io_double_put_req(struct io_kiocb *req);
static void io_put_req_deferred(struct io_kiocb *req, int nr);
static void io_double_put_req(struct io_kiocb *req);
static struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req);
static void __io_queue_linked_timeout(struct io_kiocb *req);
static void io_queue_linked_timeout(struct io_kiocb *req);
Expand Down Expand Up @@ -1197,9 +1195,8 @@ static void io_kill_timeout(struct io_kiocb *req)
if (ret != -1) {
atomic_inc(&req->ctx->cq_timeouts);
list_del_init(&req->list);
req->flags |= REQ_F_COMP_LOCKED;
io_cqring_fill_event(req, 0);
io_put_req(req);
io_put_req_deferred(req, 1);
}
}

Expand Down Expand Up @@ -1228,8 +1225,7 @@ static void __io_queue_deferred(struct io_ring_ctx *ctx)
if (link) {
__io_queue_linked_timeout(link);
/* drop submission reference */
link->flags |= REQ_F_COMP_LOCKED;
io_put_req(link);
io_put_req_deferred(link, 1);
}
} while (!list_empty(&ctx->defer_list));
}
Expand Down Expand Up @@ -1522,14 +1518,17 @@ static void io_dismantle_req(struct io_kiocb *req)
io_req_clean_work(req);
}

static void __io_free_req_finish(struct io_kiocb *req)
static void __io_free_req(struct io_kiocb *req)
{
struct io_ring_ctx *ctx = req->ctx;

io_dismantle_req(req);
__io_put_req_task(req);
percpu_ref_put(&req->ctx->refs);
if (likely(!io_is_fallback_req(req)))
kmem_cache_free(req_cachep, req);
else
clear_bit_unlock(0, (unsigned long *) &req->ctx->fallback_req);
percpu_ref_put(&ctx->refs);
}

struct req_batch {
Expand All @@ -1553,45 +1552,17 @@ static void io_free_req_many(struct io_ring_ctx *ctx, struct req_batch *rb)
rb->to_free = rb->need_iter = 0;
}

static void io_req_task_file_table_put(struct callback_head *cb)
{
struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work);

io_dismantle_req(req);
__io_free_req_finish(req);
}

static void __io_free_req(struct io_kiocb *req)
{
if (!(req->flags & REQ_F_COMP_LOCKED)) {
io_dismantle_req(req);
__io_free_req_finish(req);
} else {
int ret;

init_task_work(&req->task_work, io_req_task_file_table_put);
ret = task_work_add(req->task, &req->task_work, TWA_SIGNAL);
if (unlikely(ret)) {
struct task_struct *tsk;

tsk = io_wq_get_task(req->ctx->io_wq);
task_work_add(tsk, &req->task_work, TWA_NONE);
}
}
}

static bool io_link_cancel_timeout(struct io_kiocb *req)
{
struct io_ring_ctx *ctx = req->ctx;
int ret;

ret = hrtimer_try_to_cancel(&req->io->timeout.timer);
if (ret != -1) {
req->flags |= REQ_F_COMP_LOCKED;
io_cqring_fill_event(req, -ECANCELED);
io_commit_cqring(ctx);
req->flags &= ~REQ_F_LINK_HEAD;
io_put_req(req);
io_put_req_deferred(req, 1);
return true;
}

Expand All @@ -1618,17 +1589,12 @@ static bool __io_kill_linked_timeout(struct io_kiocb *req)
static void io_kill_linked_timeout(struct io_kiocb *req)
{
struct io_ring_ctx *ctx = req->ctx;
unsigned long flags;
bool wake_ev;

if (!(req->flags & REQ_F_COMP_LOCKED)) {
unsigned long flags;

spin_lock_irqsave(&ctx->completion_lock, flags);
wake_ev = __io_kill_linked_timeout(req);
spin_unlock_irqrestore(&ctx->completion_lock, flags);
} else {
wake_ev = __io_kill_linked_timeout(req);
}
spin_lock_irqsave(&ctx->completion_lock, flags);
wake_ev = __io_kill_linked_timeout(req);
spin_unlock_irqrestore(&ctx->completion_lock, flags);

if (wake_ev)
io_cqring_ev_posted(ctx);
Expand Down Expand Up @@ -1673,27 +1639,29 @@ static void __io_fail_links(struct io_kiocb *req)
trace_io_uring_fail_link(req, link);

io_cqring_fill_event(link, -ECANCELED);
link->flags |= REQ_F_COMP_LOCKED;
__io_double_put_req(link);

/*
* It's ok to free under spinlock as they're not linked anymore,
* but avoid REQ_F_WORK_INITIALIZED because it may deadlock on
* work.fs->lock.
*/
if (link->flags & REQ_F_WORK_INITIALIZED)
io_put_req_deferred(link, 2);
else
io_double_put_req(link);
}

io_commit_cqring(ctx);
io_cqring_ev_posted(ctx);
}

static void io_fail_links(struct io_kiocb *req)
{
struct io_ring_ctx *ctx = req->ctx;
unsigned long flags;

if (!(req->flags & REQ_F_COMP_LOCKED)) {
unsigned long flags;

spin_lock_irqsave(&ctx->completion_lock, flags);
__io_fail_links(req);
spin_unlock_irqrestore(&ctx->completion_lock, flags);
} else {
__io_fail_links(req);
}
spin_lock_irqsave(&ctx->completion_lock, flags);
__io_fail_links(req);
spin_unlock_irqrestore(&ctx->completion_lock, flags);

io_cqring_ev_posted(ctx);
}
Expand Down Expand Up @@ -1748,6 +1716,34 @@ static void io_put_req(struct io_kiocb *req)
io_free_req(req);
}

static void io_put_req_deferred_cb(struct callback_head *cb)
{
struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work);

io_free_req(req);
}

static void io_free_req_deferred(struct io_kiocb *req)
{
int ret;

init_task_work(&req->task_work, io_put_req_deferred_cb);
ret = task_work_add(req->task, &req->task_work, TWA_SIGNAL);
if (unlikely(ret)) {
struct task_struct *tsk;

tsk = io_wq_get_task(req->ctx->io_wq);
task_work_add(tsk, &req->task_work, TWA_NONE);
wake_up_process(tsk);
}
}

static inline void io_put_req_deferred(struct io_kiocb *req, int refs)
{
if (refcount_sub_and_test(refs, &req->refs))
io_free_req_deferred(req);
}

static struct io_wq_work *io_steal_work(struct io_kiocb *req)
{
struct io_kiocb *link, *nxt = NULL;
Expand All @@ -1773,17 +1769,6 @@ static struct io_wq_work *io_steal_work(struct io_kiocb *req)
return &nxt->work;
}

/*
* Must only be used if we don't need to care about links, usually from
* within the completion handling itself.
*/
static void __io_double_put_req(struct io_kiocb *req)
{
/* drop both submit and complete references */
if (refcount_sub_and_test(2, &req->refs))
__io_free_req(req);
}

static void io_double_put_req(struct io_kiocb *req)
{
/* drop both submit and complete references */
Expand Down Expand Up @@ -5052,8 +5037,7 @@ static bool io_poll_remove_one(struct io_kiocb *req)
if (do_complete) {
io_cqring_fill_event(req, -ECANCELED);
io_commit_cqring(req->ctx);
req->flags |= REQ_F_COMP_LOCKED;
io_put_req(req);
io_put_req_deferred(req, 1);
}

return do_complete;
Expand Down Expand Up @@ -5250,9 +5234,8 @@ static int io_timeout_cancel(struct io_ring_ctx *ctx, __u64 user_data)
return -EALREADY;

req_set_fail_links(req);
req->flags |= REQ_F_COMP_LOCKED;
io_cqring_fill_event(req, -ECANCELED);
io_put_req(req);
io_put_req_deferred(req, 1);
return 0;
}

Expand Down

0 comments on commit ed8ec78

Please sign in to comment.