Skip to content

Commit 0608c69

Browse files
jrfastabborkmann
authored andcommitted
bpf: sk_msg, sock{map|hash} redirect through ULP
A sockmap program that redirects through a kTLS ULP enabled socket will not work correctly because the ULP layer is skipped. This fixes the behavior to call through the ULP layer on redirect to ensure any operations required on the data stream at the ULP layer continue to be applied. To do this we add an internal flag MSG_SENDPAGE_NOPOLICY to avoid calling the BPF layer on a redirected message. This is required to avoid calling the BPF layer multiple times (possibly recursively) which is not the current/expected behavior without ULPs. In the future we may add a redirect flag if users _do_ want the policy applied again but this would need to work for both ULP and non-ULP sockets and be opt-in to avoid breaking existing programs. Also to avoid polluting the flag space with an internal flag we reuse the flag space overlapping MSG_SENDPAGE_NOPOLICY with MSG_WAITFORONE. Here WAITFORONE is specific to recv path and SENDPAGE_NOPOLICY is only used for sendpage hooks. The last thing to verify is user space API is masked correctly to ensure the flag can not be set by user. (Note this needs to be true regardless because we have internal flags already in-use that user space should not be able to set). But for completeness we have two UAPI paths into sendpage, sendfile and splice. In the sendfile case the function do_sendfile() zero's flags, ./fs/read_write.c: static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, size_t count, loff_t max) { ... fl = 0; #if 0 /* * We need to debate whether we can enable this or not. The * man page documents EAGAIN return for the output at least, * and the application is arguably buggy if it doesn't expect * EAGAIN on a non-blocking file descriptor. */ if (in.file->f_flags & O_NONBLOCK) fl = SPLICE_F_NONBLOCK; #endif file_start_write(out.file); retval = do_splice_direct(in.file, &pos, out.file, &out_pos, count, fl); } In the splice case the pipe_to_sendpage "actor" is used which masks flags with SPLICE_F_MORE. ./fs/splice.c: static int pipe_to_sendpage(struct pipe_inode_info *pipe, struct pipe_buffer *buf, struct splice_desc *sd) { ... more = (sd->flags & SPLICE_F_MORE) ? MSG_MORE : 0; ... } Confirming what we expect that internal flags are in fact internal to socket side. Fixes: d3b18ad ("tls: add bpf support to sk_msg handling") Signed-off-by: John Fastabend <john.fastabend@gmail.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
1 parent a136678 commit 0608c69

File tree

4 files changed

+52
-14
lines changed

4 files changed

+52
-14
lines changed

include/linux/socket.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,7 @@ struct ucred {
286286
#define MSG_NOSIGNAL 0x4000 /* Do not generate SIGPIPE */
287287
#define MSG_MORE 0x8000 /* Sender will send more */
288288
#define MSG_WAITFORONE 0x10000 /* recvmmsg(): block until 1+ packets avail */
289+
#define MSG_SENDPAGE_NOPOLICY 0x10000 /* sendpage() internal : do no apply policy */
289290
#define MSG_SENDPAGE_NOTLAST 0x20000 /* sendpage() internal : not the last page */
290291
#define MSG_BATCH 0x40000 /* sendmmsg(): more messages coming */
291292
#define MSG_EOF MSG_FIN

include/net/tls.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -454,6 +454,15 @@ tls_offload_ctx_tx(const struct tls_context *tls_ctx)
454454
return (struct tls_offload_context_tx *)tls_ctx->priv_ctx_tx;
455455
}
456456

457+
static inline bool tls_sw_has_ctx_tx(const struct sock *sk)
458+
{
459+
struct tls_context *ctx = tls_get_ctx(sk);
460+
461+
if (!ctx)
462+
return false;
463+
return !!tls_sw_ctx_tx(ctx);
464+
}
465+
457466
static inline struct tls_offload_context_rx *
458467
tls_offload_ctx_rx(const struct tls_context *tls_ctx)
459468
{

net/ipv4/tcp_bpf.c

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include <linux/wait.h>
99

1010
#include <net/inet_common.h>
11+
#include <net/tls.h>
1112

1213
static bool tcp_bpf_stream_read(const struct sock *sk)
1314
{
@@ -218,6 +219,8 @@ static int tcp_bpf_push(struct sock *sk, struct sk_msg *msg, u32 apply_bytes,
218219
u32 off;
219220

220221
while (1) {
222+
bool has_tx_ulp;
223+
221224
sge = sk_msg_elem(msg, msg->sg.start);
222225
size = (apply && apply_bytes < sge->length) ?
223226
apply_bytes : sge->length;
@@ -226,7 +229,15 @@ static int tcp_bpf_push(struct sock *sk, struct sk_msg *msg, u32 apply_bytes,
226229

227230
tcp_rate_check_app_limited(sk);
228231
retry:
229-
ret = do_tcp_sendpages(sk, page, off, size, flags);
232+
has_tx_ulp = tls_sw_has_ctx_tx(sk);
233+
if (has_tx_ulp) {
234+
flags |= MSG_SENDPAGE_NOPOLICY;
235+
ret = kernel_sendpage_locked(sk,
236+
page, off, size, flags);
237+
} else {
238+
ret = do_tcp_sendpages(sk, page, off, size, flags);
239+
}
240+
230241
if (ret <= 0)
231242
return ret;
232243
if (apply)

net/tls/tls_sw.c

Lines changed: 30 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -686,12 +686,13 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk,
686686
struct sk_psock *psock;
687687
struct sock *sk_redir;
688688
struct tls_rec *rec;
689+
bool enospc, policy;
689690
int err = 0, send;
690691
u32 delta = 0;
691-
bool enospc;
692692

693+
policy = !(flags & MSG_SENDPAGE_NOPOLICY);
693694
psock = sk_psock_get(sk);
694-
if (!psock)
695+
if (!psock || !policy)
695696
return tls_push_record(sk, flags, record_type);
696697
more_data:
697698
enospc = sk_msg_full(msg);
@@ -1017,8 +1018,8 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
10171018
return copied ? copied : ret;
10181019
}
10191020

1020-
int tls_sw_sendpage(struct sock *sk, struct page *page,
1021-
int offset, size_t size, int flags)
1021+
int tls_sw_do_sendpage(struct sock *sk, struct page *page,
1022+
int offset, size_t size, int flags)
10221023
{
10231024
long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
10241025
struct tls_context *tls_ctx = tls_get_ctx(sk);
@@ -1033,15 +1034,7 @@ int tls_sw_sendpage(struct sock *sk, struct page *page,
10331034
int ret = 0;
10341035
bool eor;
10351036

1036-
if (flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL |
1037-
MSG_SENDPAGE_NOTLAST))
1038-
return -ENOTSUPP;
1039-
1040-
/* No MSG_EOR from splice, only look at MSG_MORE */
10411037
eor = !(flags & (MSG_MORE | MSG_SENDPAGE_NOTLAST));
1042-
1043-
lock_sock(sk);
1044-
10451038
sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
10461039

10471040
/* Wait till there is any pending write on socket */
@@ -1145,10 +1138,34 @@ int tls_sw_sendpage(struct sock *sk, struct page *page,
11451138
}
11461139
sendpage_end:
11471140
ret = sk_stream_error(sk, flags, ret);
1148-
release_sock(sk);
11491141
return copied ? copied : ret;
11501142
}
11511143

1144+
int tls_sw_sendpage_locked(struct sock *sk, struct page *page,
1145+
int offset, size_t size, int flags)
1146+
{
1147+
if (flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL |
1148+
MSG_SENDPAGE_NOTLAST | MSG_SENDPAGE_NOPOLICY))
1149+
return -ENOTSUPP;
1150+
1151+
return tls_sw_do_sendpage(sk, page, offset, size, flags);
1152+
}
1153+
1154+
int tls_sw_sendpage(struct sock *sk, struct page *page,
1155+
int offset, size_t size, int flags)
1156+
{
1157+
int ret;
1158+
1159+
if (flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL |
1160+
MSG_SENDPAGE_NOTLAST | MSG_SENDPAGE_NOPOLICY))
1161+
return -ENOTSUPP;
1162+
1163+
lock_sock(sk);
1164+
ret = tls_sw_do_sendpage(sk, page, offset, size, flags);
1165+
release_sock(sk);
1166+
return ret;
1167+
}
1168+
11521169
static struct sk_buff *tls_wait_data(struct sock *sk, struct sk_psock *psock,
11531170
int flags, long timeo, int *err)
11541171
{

0 commit comments

Comments
 (0)