Skip to content

Commit

Permalink
bpf: Add skb dynptrs
Browse files Browse the repository at this point in the history
Add skb dynptrs, which are dynptrs whose underlying pointer points
to a skb. The dynptr acts on skb data. skb dynptrs have two main
benefits. One is that they allow operations on sizes that are not
statically known at compile-time (eg variable-sized accesses).
Another is that parsing the packet data through dynptrs (instead of
through direct access of skb->data and skb->data_end) can be more
ergonomic and less brittle (eg does not need manual if checking for
being within bounds of data_end).

For bpf prog types that don't support writes on skb data, the dynptr is
read-only. For reads and writes through the bpf_dynptr_read() and
bpf_dynptr_write() interfaces, this supports reading and writing into
data in the non-linear paged buffers. For data slices (through the
bpf_dynptr_data() interface), if the data is in a paged buffer, the user
must first call bpf_skb_pull_data() to pull the data into the linear
portion. The returned data slice from a call to bpf_dynptr_data() is of
reg type PTR_TO_PACKET | PTR_MAYBE_NULL.

Any bpf_dynptr_write() automatically invalidates any prior data slices
to the skb dynptr. This is because a bpf_dynptr_write() may be writing
to data in a paged buffer, so it will need to pull the buffer first into
the head. The reason it needs to be pulled instead of writing directly to
the paged buffers is because they may be cloned (only the head of the skb
is by default uncloned). As such, any bpf_dynptr_write() will
automatically have its prior data slices invalidated, even if the write
is to data in the skb head (the verifier has no way of differentiating
whether the write is to the head or paged buffers during program load
time). Please note as well that any other helper calls that change the
underlying packet buffer (eg bpf_skb_pull_data()) invalidates any data
slices of the skb dynptr as well. Whenever such a helper call is made,
the verifier marks any PTR_TO_PACKET reg type (which includes skb dynptr
slices since they are PTR_TO_PACKETs) as unknown. The stack trace for
this is check_helper_call() -> clear_all_pkt_pointers() ->
__clear_all_pkt_pointers() -> mark_reg_unknown()

For examples of how skb dynptrs can be used, please see the attached
selftests.

Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
  • Loading branch information
joannekoong authored and intel-lab-lkp committed Aug 23, 2022
1 parent b979f00 commit a2c8a74
Show file tree
Hide file tree
Showing 7 changed files with 335 additions and 65 deletions.
83 changes: 53 additions & 30 deletions include/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -407,11 +407,14 @@ enum bpf_type_flag {
/* Size is known at compile time. */
MEM_FIXED_SIZE = BIT(10 + BPF_BASE_TYPE_BITS),

/* DYNPTR points to sk_buff */
DYNPTR_TYPE_SKB = BIT(11 + BPF_BASE_TYPE_BITS),

__BPF_TYPE_FLAG_MAX,
__BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1,
};

#define DYNPTR_TYPE_FLAG_MASK (DYNPTR_TYPE_LOCAL | DYNPTR_TYPE_RINGBUF)
#define DYNPTR_TYPE_FLAG_MASK (DYNPTR_TYPE_LOCAL | DYNPTR_TYPE_RINGBUF | DYNPTR_TYPE_SKB)

/* Max number of base types. */
#define BPF_BASE_TYPE_LIMIT (1UL << BPF_BASE_TYPE_BITS)
Expand Down Expand Up @@ -903,6 +906,36 @@ static __always_inline __nocfi unsigned int bpf_dispatcher_nop_func(
return bpf_func(ctx, insnsi);
}

/* the implementation of the opaque uapi struct bpf_dynptr */
struct bpf_dynptr_kern {
void *data;
/* Size represents the number of usable bytes of dynptr data.
* If for example the offset is at 4 for a local dynptr whose data is
* of type u64, the number of usable bytes is 4.
*
* The upper 8 bits are reserved. It is as follows:
* Bits 0 - 23 = size
* Bits 24 - 30 = dynptr type
* Bit 31 = whether dynptr is read-only
*/
u32 size;
u32 offset;
} __aligned(8);

enum bpf_dynptr_type {
BPF_DYNPTR_TYPE_INVALID,
/* Points to memory that is local to the bpf program */
BPF_DYNPTR_TYPE_LOCAL,
/* Underlying data is a ringbuf record */
BPF_DYNPTR_TYPE_RINGBUF,
/* Underlying data is a sk_buff */
BPF_DYNPTR_TYPE_SKB,
/* Underlying data is a xdp_buff */
BPF_DYNPTR_TYPE_XDP,
};

int bpf_dynptr_check_size(u32 size);

#ifdef CONFIG_BPF_JIT
int bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr);
int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr);
Expand Down Expand Up @@ -1975,6 +2008,12 @@ static inline bool has_current_bpf_ctx(void)
{
return !!current->bpf_ctx;
}

void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data,
enum bpf_dynptr_type type, u32 offset, u32 size);
void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr);
void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr);

#else /* !CONFIG_BPF_SYSCALL */
static inline struct bpf_prog *bpf_prog_get(u32 ufd)
{
Expand Down Expand Up @@ -2188,6 +2227,19 @@ static inline bool has_current_bpf_ctx(void)
{
return false;
}

static inline void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data,
enum bpf_dynptr_type type, u32 offset, u32 size)
{
}

static inline void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr)
{
}

static inline void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr)
{
}
#endif /* CONFIG_BPF_SYSCALL */

void __bpf_free_used_btfs(struct bpf_prog_aux *aux,
Expand Down Expand Up @@ -2548,35 +2600,6 @@ int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
u32 **bin_buf, u32 num_args);
void bpf_bprintf_cleanup(void);

/* the implementation of the opaque uapi struct bpf_dynptr */
struct bpf_dynptr_kern {
void *data;
/* Size represents the number of usable bytes of dynptr data.
* If for example the offset is at 4 for a local dynptr whose data is
* of type u64, the number of usable bytes is 4.
*
* The upper 8 bits are reserved. It is as follows:
* Bits 0 - 23 = size
* Bits 24 - 30 = dynptr type
* Bit 31 = whether dynptr is read-only
*/
u32 size;
u32 offset;
} __aligned(8);

enum bpf_dynptr_type {
BPF_DYNPTR_TYPE_INVALID,
/* Points to memory that is local to the bpf program */
BPF_DYNPTR_TYPE_LOCAL,
/* Underlying data is a ringbuf record */
BPF_DYNPTR_TYPE_RINGBUF,
};

void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data,
enum bpf_dynptr_type type, u32 offset, u32 size);
void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr);
int bpf_dynptr_check_size(u32 size);

#ifdef CONFIG_BPF_LSM
void bpf_cgroup_atype_get(u32 attach_btf_id, int cgroup_atype);
void bpf_cgroup_atype_put(int cgroup_atype);
Expand Down
4 changes: 4 additions & 0 deletions include/linux/filter.h
Original file line number Diff line number Diff line change
Expand Up @@ -1532,4 +1532,8 @@ static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u32 ifind
return XDP_REDIRECT;
}

int __bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset, void *to, u32 len);
int __bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from,
u32 len, u64 flags);

#endif /* __LINUX_FILTER_H__ */
40 changes: 37 additions & 3 deletions include/uapi/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -5253,22 +5253,43 @@ union bpf_attr {
* Description
* Write *len* bytes from *src* into *dst*, starting from *offset*
* into *dst*.
* *flags* is currently unused.
*
* *flags* must be 0 except for skb-type dynptrs.
*
* For skb-type dynptrs:
* * All data slices of the dynptr are automatically
* invalidated after **bpf_dynptr_write**\ (). If you wish to
* avoid this, please perform the write using direct data slices
* instead.
*
* * For *flags*, please see the flags accepted by
* **bpf_skb_store_bytes**\ ().
* Return
* 0 on success, -E2BIG if *offset* + *len* exceeds the length
* of *dst*'s data, -EINVAL if *dst* is an invalid dynptr or if *dst*
* is a read-only dynptr or if *flags* is not 0.
* is a read-only dynptr or if *flags* is not correct. For skb-type dynptrs,
* other errors correspond to errors returned by **bpf_skb_store_bytes**\ ().
*
* void *bpf_dynptr_data(struct bpf_dynptr *ptr, u32 offset, u32 len)
* Description
* Get a pointer to the underlying dynptr data.
*
* *len* must be a statically known value. The returned data slice
* is invalidated whenever the dynptr is invalidated.
*
* For skb-type dynptrs:
* * If *offset* + *len* extends into the skb's paged buffers,
* the user should manually pull the skb with **bpf_skb_pull_data**\ ()
* and try again.
*
* * The data slice is automatically invalidated anytime
* **bpf_dynptr_write**\ () or a helper call that changes
* the underlying packet buffer (eg **bpf_skb_pull_data**\ ())
* is called.
* Return
* Pointer to the underlying dynptr data, NULL if the dynptr is
* read-only, if the dynptr is invalid, or if the offset and length
* is out of bounds.
* is out of bounds or in a paged buffer for skb-type dynptrs.
*
* s64 bpf_tcp_raw_gen_syncookie_ipv4(struct iphdr *iph, struct tcphdr *th, u32 th_len)
* Description
Expand Down Expand Up @@ -5355,6 +5376,18 @@ union bpf_attr {
* Return
* Current *ktime*.
*
* long bpf_dynptr_from_skb(struct sk_buff *skb, u64 flags, struct bpf_dynptr *ptr)
* Description
* Get a dynptr to the data in *skb*. *skb* must be the BPF program
* context. Depending on program type, the dynptr may be read-only.
*
* Calls that change the *skb*'s underlying packet buffer
* (eg **bpf_skb_pull_data**\ ()) do not invalidate the dynptr, but
* they do invalidate any data slices associated with the dynptr.
*
* *flags* is currently unused, it must be 0 for now.
* Return
* 0 on success or -EINVAL if flags is not 0.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
Expand Down Expand Up @@ -5566,6 +5599,7 @@ union bpf_attr {
FN(tcp_raw_check_syncookie_ipv4), \
FN(tcp_raw_check_syncookie_ipv6), \
FN(ktime_get_tai_ns), \
FN(dynptr_from_skb), \
/* */

/* integer value in 'imm' field of BPF_CALL instruction selects which helper
Expand Down
81 changes: 73 additions & 8 deletions kernel/bpf/helpers.c
Original file line number Diff line number Diff line change
Expand Up @@ -1437,11 +1437,21 @@ static bool bpf_dynptr_is_rdonly(struct bpf_dynptr_kern *ptr)
return ptr->size & DYNPTR_RDONLY_BIT;
}

void bpf_dynptr_set_rdonly(struct bpf_dynptr_kern *ptr)
{
ptr->size |= DYNPTR_RDONLY_BIT;
}

static void bpf_dynptr_set_type(struct bpf_dynptr_kern *ptr, enum bpf_dynptr_type type)
{
ptr->size |= type << DYNPTR_TYPE_SHIFT;
}

static enum bpf_dynptr_type bpf_dynptr_get_type(const struct bpf_dynptr_kern *ptr)
{
return (ptr->size & ~(DYNPTR_RDONLY_BIT)) >> DYNPTR_TYPE_SHIFT;
}

static u32 bpf_dynptr_get_size(struct bpf_dynptr_kern *ptr)
{
return ptr->size & DYNPTR_SIZE_MASK;
Expand Down Expand Up @@ -1512,6 +1522,7 @@ static const struct bpf_func_proto bpf_dynptr_from_mem_proto = {
BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, struct bpf_dynptr_kern *, src,
u32, offset, u64, flags)
{
enum bpf_dynptr_type type;
int err;

if (!src->data || flags)
Expand All @@ -1521,9 +1532,19 @@ BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, struct bpf_dynptr_kern *, src
if (err)
return err;

memcpy(dst, src->data + src->offset + offset, len);
type = bpf_dynptr_get_type(src);

return 0;
switch (type) {
case BPF_DYNPTR_TYPE_LOCAL:
case BPF_DYNPTR_TYPE_RINGBUF:
memcpy(dst, src->data + src->offset + offset, len);
return 0;
case BPF_DYNPTR_TYPE_SKB:
return __bpf_skb_load_bytes(src->data, src->offset + offset, dst, len);
default:
WARN(true, "bpf_dynptr_read: unknown dynptr type %d\n", type);
return -EFAULT;
}
}

static const struct bpf_func_proto bpf_dynptr_read_proto = {
Expand All @@ -1540,18 +1561,32 @@ static const struct bpf_func_proto bpf_dynptr_read_proto = {
BPF_CALL_5(bpf_dynptr_write, struct bpf_dynptr_kern *, dst, u32, offset, void *, src,
u32, len, u64, flags)
{
enum bpf_dynptr_type type;
int err;

if (!dst->data || flags || bpf_dynptr_is_rdonly(dst))
if (!dst->data || bpf_dynptr_is_rdonly(dst))
return -EINVAL;

err = bpf_dynptr_check_off_len(dst, offset, len);
if (err)
return err;

memcpy(dst->data + dst->offset + offset, src, len);
type = bpf_dynptr_get_type(dst);

return 0;
switch (type) {
case BPF_DYNPTR_TYPE_LOCAL:
case BPF_DYNPTR_TYPE_RINGBUF:
if (flags)
return -EINVAL;
memcpy(dst->data + dst->offset + offset, src, len);
return 0;
case BPF_DYNPTR_TYPE_SKB:
return __bpf_skb_store_bytes(dst->data, dst->offset + offset, src, len,
flags);
default:
WARN(true, "bpf_dynptr_write: unknown dynptr type %d\n", type);
return -EFAULT;
}
}

static const struct bpf_func_proto bpf_dynptr_write_proto = {
Expand All @@ -1567,6 +1602,9 @@ static const struct bpf_func_proto bpf_dynptr_write_proto = {

BPF_CALL_3(bpf_dynptr_data, struct bpf_dynptr_kern *, ptr, u32, offset, u32, len)
{
enum bpf_dynptr_type type;
bool is_rdonly;
void *data;
int err;

if (!ptr->data)
Expand All @@ -1576,10 +1614,37 @@ BPF_CALL_3(bpf_dynptr_data, struct bpf_dynptr_kern *, ptr, u32, offset, u32, len
if (err)
return 0;

if (bpf_dynptr_is_rdonly(ptr))
return 0;
type = bpf_dynptr_get_type(ptr);

/* Only skb dynptrs can get read-only data slices, because the
* verifier enforces PTR_TO_PACKET accesses
*/
is_rdonly = bpf_dynptr_is_rdonly(ptr);

switch (type) {
case BPF_DYNPTR_TYPE_LOCAL:
case BPF_DYNPTR_TYPE_RINGBUF:
if (is_rdonly)
return 0;

data = ptr->data;
break;
case BPF_DYNPTR_TYPE_SKB:
{
struct sk_buff *skb = ptr->data;

return (unsigned long)(ptr->data + ptr->offset + offset);
/* if the data is paged, the caller needs to pull it first */
if (ptr->offset + offset + len > skb->len - skb->data_len)
return 0;

data = skb->data;
break;
}
default:
WARN(true, "bpf_dynptr_data: unknown dynptr type %d\n", type);
return 0;
}
return (unsigned long)(data + ptr->offset + offset);
}

static const struct bpf_func_proto bpf_dynptr_data_proto = {
Expand Down

0 comments on commit a2c8a74

Please sign in to comment.