Skip to content

Commit 6e3da40

Browse files
committed
Merge branch 'for-6.15/io_uring-epoll-wait' into for-6.15/io_uring-reg-vec
* for-6.15/io_uring-epoll-wait: io_uring/epoll: add support for IORING_OP_EPOLL_WAIT io_uring/epoll: remove CONFIG_EPOLL guards eventpoll: add epoll_sendevents() helper eventpoll: abstract out ep_try_send_events() helper eventpoll: abstract out parameter sanity checking
2 parents 78b6f6e + 19f7e94 commit 6e3da40

File tree

7 files changed

+122
-30
lines changed

7 files changed

+122
-30
lines changed

fs/eventpoll.c

Lines changed: 63 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1980,6 +1980,22 @@ static int ep_autoremove_wake_function(struct wait_queue_entry *wq_entry,
19801980
return ret;
19811981
}
19821982

1983+
static int ep_try_send_events(struct eventpoll *ep,
1984+
struct epoll_event __user *events, int maxevents)
1985+
{
1986+
int res;
1987+
1988+
/*
1989+
* Try to transfer events to user space. In case we get 0 events and
1990+
* there's still timeout left over, we go trying again in search of
1991+
* more luck.
1992+
*/
1993+
res = ep_send_events(ep, events, maxevents);
1994+
if (res > 0)
1995+
ep_suspend_napi_irqs(ep);
1996+
return res;
1997+
}
1998+
19831999
/**
19842000
* ep_poll - Retrieves ready events, and delivers them to the caller-supplied
19852001
* event buffer.
@@ -2031,17 +2047,9 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
20312047

20322048
while (1) {
20332049
if (eavail) {
2034-
/*
2035-
* Try to transfer events to user space. In case we get
2036-
* 0 events and there's still timeout left over, we go
2037-
* trying again in search of more luck.
2038-
*/
2039-
res = ep_send_events(ep, events, maxevents);
2040-
if (res) {
2041-
if (res > 0)
2042-
ep_suspend_napi_irqs(ep);
2050+
res = ep_try_send_events(ep, events, maxevents);
2051+
if (res)
20432052
return res;
2044-
}
20452053
}
20462054

20472055
if (timed_out)
@@ -2445,6 +2453,47 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
24452453
return do_epoll_ctl(epfd, op, fd, &epds, false);
24462454
}
24472455

2456+
static int ep_check_params(struct file *file, struct epoll_event __user *evs,
2457+
int maxevents)
2458+
{
2459+
/* The maximum number of event must be greater than zero */
2460+
if (maxevents <= 0 || maxevents > EP_MAX_EVENTS)
2461+
return -EINVAL;
2462+
2463+
/* Verify that the area passed by the user is writeable */
2464+
if (!access_ok(evs, maxevents * sizeof(struct epoll_event)))
2465+
return -EFAULT;
2466+
2467+
/*
2468+
* We have to check that the file structure underneath the fd
2469+
* the user passed to us _is_ an eventpoll file.
2470+
*/
2471+
if (!is_file_epoll(file))
2472+
return -EINVAL;
2473+
2474+
return 0;
2475+
}
2476+
2477+
int epoll_sendevents(struct file *file, struct epoll_event __user *events,
2478+
int maxevents)
2479+
{
2480+
struct eventpoll *ep;
2481+
int ret;
2482+
2483+
ret = ep_check_params(file, events, maxevents);
2484+
if (unlikely(ret))
2485+
return ret;
2486+
2487+
ep = file->private_data;
2488+
/*
2489+
* Racy call, but that's ok - it should get retried based on
2490+
* poll readiness anyway.
2491+
*/
2492+
if (ep_events_available(ep))
2493+
return ep_try_send_events(ep, events, maxevents);
2494+
return 0;
2495+
}
2496+
24482497
/*
24492498
* Implement the event wait interface for the eventpoll file. It is the kernel
24502499
* part of the user space epoll_wait(2).
@@ -2453,26 +2502,16 @@ static int do_epoll_wait(int epfd, struct epoll_event __user *events,
24532502
int maxevents, struct timespec64 *to)
24542503
{
24552504
struct eventpoll *ep;
2456-
2457-
/* The maximum number of event must be greater than zero */
2458-
if (maxevents <= 0 || maxevents > EP_MAX_EVENTS)
2459-
return -EINVAL;
2460-
2461-
/* Verify that the area passed by the user is writeable */
2462-
if (!access_ok(events, maxevents * sizeof(struct epoll_event)))
2463-
return -EFAULT;
2505+
int ret;
24642506

24652507
/* Get the "struct file *" for the eventpoll file */
24662508
CLASS(fd, f)(epfd);
24672509
if (fd_empty(f))
24682510
return -EBADF;
24692511

2470-
/*
2471-
* We have to check that the file structure underneath the fd
2472-
* the user passed to us _is_ an eventpoll file.
2473-
*/
2474-
if (!is_file_epoll(fd_file(f)))
2475-
return -EINVAL;
2512+
ret = ep_check_params(fd_file(f), events, maxevents);
2513+
if (unlikely(ret))
2514+
return ret;
24762515

24772516
/*
24782517
* At this point it is safe to assume that the "private_data" contains

include/linux/eventpoll.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,10 @@ struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd, unsigned long t
2525
/* Used to release the epoll bits inside the "struct file" */
2626
void eventpoll_release_file(struct file *file);
2727

28+
/* Copy ready events to userspace */
29+
int epoll_sendevents(struct file *file, struct epoll_event __user *events,
30+
int maxevents);
31+
2832
/*
2933
* This is called from inside fs/file_table.c:__fput() to unlink files
3034
* from the eventpoll interface. We need to have this facility to cleanup

include/uapi/linux/io_uring.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,7 @@ enum io_uring_op {
280280
IORING_OP_BIND,
281281
IORING_OP_LISTEN,
282282
IORING_OP_RECV_ZC,
283+
IORING_OP_EPOLL_WAIT,
283284

284285
/* this goes last, obviously */
285286
IORING_OP_LAST,

io_uring/Makefile

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,11 @@ obj-$(CONFIG_IO_URING) += io_uring.o opdef.o kbuf.o rsrc.o notif.o \
1111
eventfd.o uring_cmd.o openclose.o \
1212
sqpoll.o xattr.o nop.o fs.o splice.o \
1313
sync.o msg_ring.o advise.o openclose.o \
14-
epoll.o statx.o timeout.o fdinfo.o \
15-
cancel.o waitid.o register.o \
16-
truncate.o memmap.o alloc_cache.o
14+
statx.o timeout.o fdinfo.o cancel.o \
15+
waitid.o register.o truncate.o \
16+
memmap.o alloc_cache.o
1717
obj-$(CONFIG_IO_URING_ZCRX) += zcrx.o
1818
obj-$(CONFIG_IO_WQ) += io-wq.o
1919
obj-$(CONFIG_FUTEX) += futex.o
20-
obj-$(CONFIG_NET_RX_BUSY_POLL) += napi.o
20+
obj-$(CONFIG_EPOLL) += epoll.o
21+
obj-$(CONFIG_NET_RX_BUSY_POLL) += napi.o

io_uring/epoll.c

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
#include "io_uring.h"
1313
#include "epoll.h"
1414

15-
#if defined(CONFIG_EPOLL)
1615
struct io_epoll {
1716
struct file *file;
1817
int epfd;
@@ -21,6 +20,12 @@ struct io_epoll {
2120
struct epoll_event event;
2221
};
2322

23+
struct io_epoll_wait {
24+
struct file *file;
25+
int maxevents;
26+
struct epoll_event __user *events;
27+
};
28+
2429
int io_epoll_ctl_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
2530
{
2631
struct io_epoll *epoll = io_kiocb_to_cmd(req, struct io_epoll);
@@ -58,4 +63,30 @@ int io_epoll_ctl(struct io_kiocb *req, unsigned int issue_flags)
5863
io_req_set_res(req, ret, 0);
5964
return IOU_OK;
6065
}
61-
#endif
66+
67+
int io_epoll_wait_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
68+
{
69+
struct io_epoll_wait *iew = io_kiocb_to_cmd(req, struct io_epoll_wait);
70+
71+
if (sqe->off || sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in)
72+
return -EINVAL;
73+
74+
iew->maxevents = READ_ONCE(sqe->len);
75+
iew->events = u64_to_user_ptr(READ_ONCE(sqe->addr));
76+
return 0;
77+
}
78+
79+
int io_epoll_wait(struct io_kiocb *req, unsigned int issue_flags)
80+
{
81+
struct io_epoll_wait *iew = io_kiocb_to_cmd(req, struct io_epoll_wait);
82+
int ret;
83+
84+
ret = epoll_sendevents(req->file, iew->events, iew->maxevents);
85+
if (ret == 0)
86+
return -EAGAIN;
87+
if (ret < 0)
88+
req_set_fail(req);
89+
90+
io_req_set_res(req, ret, 0);
91+
return IOU_OK;
92+
}

io_uring/epoll.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,6 @@
33
#if defined(CONFIG_EPOLL)
44
int io_epoll_ctl_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
55
int io_epoll_ctl(struct io_kiocb *req, unsigned int issue_flags);
6+
int io_epoll_wait_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
7+
int io_epoll_wait(struct io_kiocb *req, unsigned int issue_flags);
68
#endif

io_uring/opdef.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -527,6 +527,17 @@ const struct io_issue_def io_issue_defs[] = {
527527
.issue = io_recvzc,
528528
#else
529529
.prep = io_eopnotsupp_prep,
530+
#endif
531+
},
532+
[IORING_OP_EPOLL_WAIT] = {
533+
.needs_file = 1,
534+
.audit_skip = 1,
535+
.pollin = 1,
536+
#if defined(CONFIG_EPOLL)
537+
.prep = io_epoll_wait_prep,
538+
.issue = io_epoll_wait,
539+
#else
540+
.prep = io_eopnotsupp_prep,
530541
#endif
531542
},
532543
};
@@ -761,6 +772,9 @@ const struct io_cold_def io_cold_defs[] = {
761772
[IORING_OP_RECV_ZC] = {
762773
.name = "RECV_ZC",
763774
},
775+
[IORING_OP_EPOLL_WAIT] = {
776+
.name = "EPOLL_WAIT",
777+
},
764778
};
765779

766780
const char *io_uring_get_opcode(u8 opcode)

0 commit comments

Comments
 (0)