Skip to content
/ linux Public

Commit acaf1ea

Browse files
mrpreSasha Levin
authored andcommitted
bpf, sockmap: Fix incorrect copied_seq calculation
[ Upstream commit b40cc5a ] A socket using sockmap has its own independent receive queue: ingress_msg. This queue may contain data from its own protocol stack or from other sockets. The issue is that when reading from ingress_msg, we update tp->copied_seq by default. However, if the data is not from its own protocol stack, tcp->rcv_nxt is not increased. Later, if we convert this socket to a native socket, reading from this socket may fail because copied_seq might be significantly larger than rcv_nxt. This fix also addresses the syzkaller-reported bug referenced in the Closes tag. This patch marks the skmsg objects in ingress_msg. When reading, we update copied_seq only if the data is from its own protocol stack. FD1:read() -- FD1->copied_seq++ | [read data] | [enqueue data] v [sockmap] -> ingress to self -> ingress_msg queue FD1 native stack ------> ^ -- FD1->rcv_nxt++ -> redirect to other | [enqueue data] | | | ingress to FD1 v ^ ... | [sockmap] FD2 native stack Closes: https://syzkaller.appspot.com/bug?extid=06dbd397158ec0ea4983 Fixes: 04919be ("tcp: Introduce tcp_read_skb()") Reviewed-by: Jakub Sitnicki <jakub@cloudflare.com> Reviewed-by: John Fastabend <john.fastabend@gmail.com> Signed-off-by: Jiayuan Chen <jiayuan.chen@linux.dev> Link: https://lore.kernel.org/r/20260124113314.113584-2-jiayuan.chen@linux.dev Signed-off-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: Sasha Levin <sashal@kernel.org>
1 parent 7111701 commit acaf1ea

File tree

3 files changed

+29
-5
lines changed

3 files changed

+29
-5
lines changed

include/linux/skmsg.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,8 @@ int sk_msg_memcopy_from_iter(struct sock *sk, struct iov_iter *from,
132132
struct sk_msg *msg, u32 bytes);
133133
int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
134134
int len, int flags);
135+
int __sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
136+
int len, int flags, int *copied_from_self);
135137
bool sk_msg_is_readable(struct sock *sk);
136138

137139
static inline void sk_msg_check_to_free(struct sk_msg *msg, u32 i, u32 bytes)

net/core/skmsg.c

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -408,22 +408,26 @@ int sk_msg_memcopy_from_iter(struct sock *sk, struct iov_iter *from,
408408
}
409409
EXPORT_SYMBOL_GPL(sk_msg_memcopy_from_iter);
410410

411-
/* Receive sk_msg from psock->ingress_msg to @msg. */
412-
int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
413-
int len, int flags)
411+
int __sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
412+
int len, int flags, int *copied_from_self)
414413
{
415414
struct iov_iter *iter = &msg->msg_iter;
416415
int peek = flags & MSG_PEEK;
417416
struct sk_msg *msg_rx;
418417
int i, copied = 0;
418+
bool from_self;
419419

420420
msg_rx = sk_psock_peek_msg(psock);
421+
if (copied_from_self)
422+
*copied_from_self = 0;
423+
421424
while (copied != len) {
422425
struct scatterlist *sge;
423426

424427
if (unlikely(!msg_rx))
425428
break;
426429

430+
from_self = msg_rx->sk == sk;
427431
i = msg_rx->sg.start;
428432
do {
429433
struct page *page;
@@ -442,6 +446,9 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
442446
}
443447

444448
copied += copy;
449+
if (from_self && copied_from_self)
450+
*copied_from_self += copy;
451+
445452
if (likely(!peek)) {
446453
sge->offset += copy;
447454
sge->length -= copy;
@@ -486,6 +493,13 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
486493
out:
487494
return copied;
488495
}
496+
497+
/* Receive sk_msg from psock->ingress_msg to @msg. */
498+
int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
499+
int len, int flags)
500+
{
501+
return __sk_msg_recvmsg(sk, psock, msg, len, flags, NULL);
502+
}
489503
EXPORT_SYMBOL_GPL(sk_msg_recvmsg);
490504

491505
bool sk_msg_is_readable(struct sock *sk)
@@ -615,6 +629,12 @@ static int sk_psock_skb_ingress_self(struct sk_psock *psock, struct sk_buff *skb
615629
if (unlikely(!msg))
616630
return -EAGAIN;
617631
skb_set_owner_r(skb, sk);
632+
633+
/* This is used in tcp_bpf_recvmsg_parser() to determine whether the
634+
* data originates from the socket's own protocol stack. No need to
635+
* refcount sk because msg's lifetime is bound to sk via the ingress_msg.
636+
*/
637+
msg->sk = sk;
618638
err = sk_psock_skb_ingress_enqueue(skb, off, len, psock, sk, msg, take_ref);
619639
if (err < 0)
620640
kfree(msg);
@@ -908,6 +928,7 @@ int sk_psock_msg_verdict(struct sock *sk, struct sk_psock *psock,
908928
sk_msg_compute_data_pointers(msg);
909929
msg->sk = sk;
910930
ret = bpf_prog_run_pin_on_cpu(prog, msg);
931+
msg->sk = NULL;
911932
ret = sk_psock_map_verd(ret, msg->sk_redir);
912933
psock->apply_bytes = msg->apply_bytes;
913934
if (ret == __SK_REDIRECT) {

net/ipv4/tcp_bpf.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,7 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk,
226226
int peek = flags & MSG_PEEK;
227227
struct sk_psock *psock;
228228
struct tcp_sock *tcp;
229+
int copied_from_self = 0;
229230
int copied = 0;
230231
u32 seq;
231232

@@ -262,7 +263,7 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk,
262263
}
263264

264265
msg_bytes_ready:
265-
copied = sk_msg_recvmsg(sk, psock, msg, len, flags);
266+
copied = __sk_msg_recvmsg(sk, psock, msg, len, flags, &copied_from_self);
266267
/* The typical case for EFAULT is the socket was gracefully
267268
* shutdown with a FIN pkt. So check here the other case is
268269
* some error on copy_page_to_iter which would be unexpected.
@@ -277,7 +278,7 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk,
277278
goto out;
278279
}
279280
}
280-
seq += copied;
281+
seq += copied_from_self;
281282
if (!copied) {
282283
long timeo;
283284
int data;

0 commit comments

Comments
 (0)