Skip to content

Commit 9bb00b2

Browse files
yonghong-songAlexei Starovoitov
authored andcommitted
bpf: Add kfunc bpf_rcu_read_lock/unlock()
Add two kfunc's bpf_rcu_read_lock() and bpf_rcu_read_unlock(). These two kfunc's can be used for all program types. The following is an example about how rcu pointer are used w.r.t. bpf_rcu_read_lock()/bpf_rcu_read_unlock(). struct task_struct { ... struct task_struct *last_wakee; struct task_struct __rcu *real_parent; ... }; Let us say prog does 'task = bpf_get_current_task_btf()' to get a 'task' pointer. The basic rules are: - 'real_parent = task->real_parent' should be inside bpf_rcu_read_lock region. This is to simulate rcu_dereference() operation. The 'real_parent' is marked as MEM_RCU only if (1). task->real_parent is inside bpf_rcu_read_lock region, and (2). task is a trusted ptr. So MEM_RCU marked ptr can be 'trusted' inside the bpf_rcu_read_lock region. - 'last_wakee = real_parent->last_wakee' should be inside bpf_rcu_read_lock region since it tries to access rcu protected memory. - the ptr 'last_wakee' will be marked as PTR_UNTRUSTED since in general it is not clear whether the object pointed by 'last_wakee' is valid or not even inside bpf_rcu_read_lock region. The verifier will reset all rcu pointer register states to untrusted at bpf_rcu_read_unlock() kfunc call site, so any such rcu pointer won't be trusted any more outside the bpf_rcu_read_lock() region. The current implementation does not support nested rcu read lock region in the prog. Acked-by: Martin KaFai Lau <martin.lau@kernel.org> Signed-off-by: Yonghong Song <yhs@fb.com> Link: https://lore.kernel.org/r/20221124053217.2373910-1-yhs@fb.com Signed-off-by: Alexei Starovoitov <ast@kernel.org>
1 parent 01685c5 commit 9bb00b2

File tree

5 files changed

+155
-30
lines changed

5 files changed

+155
-30
lines changed

include/linux/bpf.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -572,6 +572,9 @@ enum bpf_type_flag {
572572
*/
573573
PTR_TRUSTED = BIT(12 + BPF_BASE_TYPE_BITS),
574574

575+
/* MEM is tagged with rcu and memory access needs rcu_read_lock protection. */
576+
MEM_RCU = BIT(13 + BPF_BASE_TYPE_BITS),
577+
575578
__BPF_TYPE_FLAG_MAX,
576579
__BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1,
577580
};

include/linux/bpf_verifier.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -344,6 +344,7 @@ struct bpf_verifier_state {
344344
u32 id;
345345
} active_lock;
346346
bool speculative;
347+
bool active_rcu_lock;
347348

348349
/* first and last insn idx of this verifier state */
349350
u32 first_insn_idx;
@@ -445,6 +446,7 @@ struct bpf_insn_aux_data {
445446
u32 seen; /* this insn was processed by the verifier at env->pass_cnt */
446447
bool sanitize_stack_spill; /* subject to Spectre v4 sanitation */
447448
bool zext_dst; /* this insn zero extends dst reg */
449+
bool storage_get_func_atomic; /* bpf_*_storage_get() with atomic memory alloc */
448450
u8 alu_state; /* used in combination with alu_limit */
449451

450452
/* below fields are initialized once */
@@ -534,6 +536,7 @@ struct bpf_verifier_env {
534536
bool bypass_spec_v1;
535537
bool bypass_spec_v4;
536538
bool seen_direct_write;
539+
bool rcu_tag_supported;
537540
struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */
538541
const struct bpf_line_info *prev_linfo;
539542
struct bpf_verifier_log log;
@@ -680,7 +683,7 @@ static inline bool bpf_prog_check_recur(const struct bpf_prog *prog)
680683
}
681684
}
682685

683-
#define BPF_REG_TRUSTED_MODIFIERS (MEM_ALLOC | PTR_TRUSTED)
686+
#define BPF_REG_TRUSTED_MODIFIERS (MEM_ALLOC | MEM_RCU | PTR_TRUSTED)
684687

685688
static inline bool bpf_type_has_unsafe_modifiers(u32 type)
686689
{

kernel/bpf/btf.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6238,6 +6238,9 @@ static int btf_struct_walk(struct bpf_verifier_log *log, const struct btf *btf,
62386238
/* check __percpu tag */
62396239
if (strcmp(tag_value, "percpu") == 0)
62406240
tmp_flag = MEM_PERCPU;
6241+
/* check __rcu tag */
6242+
if (strcmp(tag_value, "rcu") == 0)
6243+
tmp_flag = MEM_RCU;
62416244
}
62426245

62436246
stype = btf_type_skip_modifiers(btf, mtype->type, &id);

kernel/bpf/helpers.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1990,6 +1990,16 @@ void *bpf_rdonly_cast(void *obj__ign, u32 btf_id__k)
19901990
return obj__ign;
19911991
}
19921992

1993+
void bpf_rcu_read_lock(void)
1994+
{
1995+
rcu_read_lock();
1996+
}
1997+
1998+
void bpf_rcu_read_unlock(void)
1999+
{
2000+
rcu_read_unlock();
2001+
}
2002+
19932003
__diag_pop();
19942004

19952005
BTF_SET8_START(generic_btf_ids)
@@ -2031,6 +2041,8 @@ BTF_ID(func, bpf_cgroup_release)
20312041
BTF_SET8_START(common_btf_ids)
20322042
BTF_ID_FLAGS(func, bpf_cast_to_kern_ctx)
20332043
BTF_ID_FLAGS(func, bpf_rdonly_cast)
2044+
BTF_ID_FLAGS(func, bpf_rcu_read_lock)
2045+
BTF_ID_FLAGS(func, bpf_rcu_read_unlock)
20342046
BTF_SET8_END(common_btf_ids)
20352047

20362048
static const struct btf_kfunc_id_set common_kfunc_set = {

kernel/bpf/verifier.c

Lines changed: 133 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -527,6 +527,14 @@ static bool is_callback_calling_function(enum bpf_func_id func_id)
527527
func_id == BPF_FUNC_user_ringbuf_drain;
528528
}
529529

530+
static bool is_storage_get_function(enum bpf_func_id func_id)
531+
{
532+
return func_id == BPF_FUNC_sk_storage_get ||
533+
func_id == BPF_FUNC_inode_storage_get ||
534+
func_id == BPF_FUNC_task_storage_get ||
535+
func_id == BPF_FUNC_cgrp_storage_get;
536+
}
537+
530538
static bool helper_multiple_ref_obj_use(enum bpf_func_id func_id,
531539
const struct bpf_map *map)
532540
{
@@ -589,11 +597,12 @@ static const char *reg_type_str(struct bpf_verifier_env *env,
589597
strncpy(postfix, "_or_null", 16);
590598
}
591599

592-
snprintf(prefix, sizeof(prefix), "%s%s%s%s%s%s",
600+
snprintf(prefix, sizeof(prefix), "%s%s%s%s%s%s%s",
593601
type & MEM_RDONLY ? "rdonly_" : "",
594602
type & MEM_RINGBUF ? "ringbuf_" : "",
595603
type & MEM_USER ? "user_" : "",
596604
type & MEM_PERCPU ? "percpu_" : "",
605+
type & MEM_RCU ? "rcu_" : "",
597606
type & PTR_UNTRUSTED ? "untrusted_" : "",
598607
type & PTR_TRUSTED ? "trusted_" : ""
599608
);
@@ -1220,6 +1229,7 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state,
12201229
dst_state->frame[i] = NULL;
12211230
}
12221231
dst_state->speculative = src->speculative;
1232+
dst_state->active_rcu_lock = src->active_rcu_lock;
12231233
dst_state->curframe = src->curframe;
12241234
dst_state->active_lock.ptr = src->active_lock.ptr;
12251235
dst_state->active_lock.id = src->active_lock.id;
@@ -4258,6 +4268,25 @@ static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno)
42584268
return reg->type == PTR_TO_FLOW_KEYS;
42594269
}
42604270

4271+
static bool is_trusted_reg(const struct bpf_reg_state *reg)
4272+
{
4273+
/* A referenced register is always trusted. */
4274+
if (reg->ref_obj_id)
4275+
return true;
4276+
4277+
/* If a register is not referenced, it is trusted if it has the
4278+
* MEM_ALLOC, MEM_RCU or PTR_TRUSTED type modifiers, and no others. Some of the
4279+
* other type modifiers may be safe, but we elect to take an opt-in
4280+
* approach here as some (e.g. PTR_UNTRUSTED and PTR_MAYBE_NULL) are
4281+
* not.
4282+
*
4283+
* Eventually, we should make PTR_TRUSTED the single source of truth
4284+
* for whether a register is trusted.
4285+
*/
4286+
return type_flag(reg->type) & BPF_REG_TRUSTED_MODIFIERS &&
4287+
!bpf_type_has_unsafe_modifiers(reg->type);
4288+
}
4289+
42614290
static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
42624291
const struct bpf_reg_state *reg,
42634292
int off, int size, bool strict)
@@ -4737,9 +4766,28 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
47374766
if (type_flag(reg->type) & PTR_UNTRUSTED)
47384767
flag |= PTR_UNTRUSTED;
47394768

4740-
/* Any pointer obtained from walking a trusted pointer is no longer trusted. */
4769+
/* By default any pointer obtained from walking a trusted pointer is
4770+
* no longer trusted except the rcu case below.
4771+
*/
47414772
flag &= ~PTR_TRUSTED;
47424773

4774+
if (flag & MEM_RCU) {
4775+
/* Mark value register as MEM_RCU only if it is protected by
4776+
* bpf_rcu_read_lock() and the ptr reg is trusted. MEM_RCU
4777+
* itself can already indicate trustedness inside the rcu
4778+
* read lock region. Also mark it as PTR_TRUSTED.
4779+
*/
4780+
if (!env->cur_state->active_rcu_lock || !is_trusted_reg(reg))
4781+
flag &= ~MEM_RCU;
4782+
else
4783+
flag |= PTR_TRUSTED;
4784+
} else if (reg->type & MEM_RCU) {
4785+
/* ptr (reg) is marked as MEM_RCU, but the struct field is not tagged
4786+
* with __rcu. Mark the flag as PTR_UNTRUSTED conservatively.
4787+
*/
4788+
flag |= PTR_UNTRUSTED;
4789+
}
4790+
47434791
if (atype == BPF_READ && value_regno >= 0)
47444792
mark_btf_ld_reg(env, regs, value_regno, ret, reg->btf, btf_id, flag);
47454793

@@ -5897,6 +5945,7 @@ static const struct bpf_reg_types btf_ptr_types = {
58975945
.types = {
58985946
PTR_TO_BTF_ID,
58995947
PTR_TO_BTF_ID | PTR_TRUSTED,
5948+
PTR_TO_BTF_ID | MEM_RCU | PTR_TRUSTED,
59005949
},
59015950
};
59025951
static const struct bpf_reg_types percpu_btf_ptr_types = {
@@ -6075,6 +6124,7 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env,
60756124
case PTR_TO_BTF_ID:
60766125
case PTR_TO_BTF_ID | MEM_ALLOC:
60776126
case PTR_TO_BTF_ID | PTR_TRUSTED:
6127+
case PTR_TO_BTF_ID | MEM_RCU | PTR_TRUSTED:
60786128
case PTR_TO_BTF_ID | MEM_ALLOC | PTR_TRUSTED:
60796129
/* When referenced PTR_TO_BTF_ID is passed to release function,
60806130
* it's fixed offset must be 0. In the other cases, fixed offset
@@ -7539,6 +7589,17 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
75397589
return err;
75407590
}
75417591

7592+
if (env->cur_state->active_rcu_lock) {
7593+
if (fn->might_sleep) {
7594+
verbose(env, "sleepable helper %s#%d in rcu_read_lock region\n",
7595+
func_id_name(func_id), func_id);
7596+
return -EINVAL;
7597+
}
7598+
7599+
if (env->prog->aux->sleepable && is_storage_get_function(func_id))
7600+
env->insn_aux_data[insn_idx].storage_get_func_atomic = true;
7601+
}
7602+
75427603
meta.func_id = func_id;
75437604
/* check args */
75447605
for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
@@ -7966,25 +8027,6 @@ static bool is_kfunc_arg_kptr_get(struct bpf_kfunc_call_arg_meta *meta, int arg)
79668027
return arg == 0 && (meta->kfunc_flags & KF_KPTR_GET);
79678028
}
79688029

7969-
static bool is_trusted_reg(const struct bpf_reg_state *reg)
7970-
{
7971-
/* A referenced register is always trusted. */
7972-
if (reg->ref_obj_id)
7973-
return true;
7974-
7975-
/* If a register is not referenced, it is trusted if it has either the
7976-
* MEM_ALLOC or PTR_TRUSTED type modifiers, and no others. Some of the
7977-
* other type modifiers may be safe, but we elect to take an opt-in
7978-
* approach here as some (e.g. PTR_UNTRUSTED and PTR_MAYBE_NULL) are
7979-
* not.
7980-
*
7981-
* Eventually, we should make PTR_TRUSTED the single source of truth
7982-
* for whether a register is trusted.
7983-
*/
7984-
return type_flag(reg->type) & BPF_REG_TRUSTED_MODIFIERS &&
7985-
!bpf_type_has_unsafe_modifiers(reg->type);
7986-
}
7987-
79888030
static bool __kfunc_param_match_suffix(const struct btf *btf,
79898031
const struct btf_param *arg,
79908032
const char *suffix)
@@ -8163,6 +8205,8 @@ enum special_kfunc_type {
81638205
KF_bpf_list_pop_back,
81648206
KF_bpf_cast_to_kern_ctx,
81658207
KF_bpf_rdonly_cast,
8208+
KF_bpf_rcu_read_lock,
8209+
KF_bpf_rcu_read_unlock,
81668210
};
81678211

81688212
BTF_SET_START(special_kfunc_set)
@@ -8185,6 +8229,18 @@ BTF_ID(func, bpf_list_pop_front)
81858229
BTF_ID(func, bpf_list_pop_back)
81868230
BTF_ID(func, bpf_cast_to_kern_ctx)
81878231
BTF_ID(func, bpf_rdonly_cast)
8232+
BTF_ID(func, bpf_rcu_read_lock)
8233+
BTF_ID(func, bpf_rcu_read_unlock)
8234+
8235+
static bool is_kfunc_bpf_rcu_read_lock(struct bpf_kfunc_call_arg_meta *meta)
8236+
{
8237+
return meta->func_id == special_kfunc_list[KF_bpf_rcu_read_lock];
8238+
}
8239+
8240+
static bool is_kfunc_bpf_rcu_read_unlock(struct bpf_kfunc_call_arg_meta *meta)
8241+
{
8242+
return meta->func_id == special_kfunc_list[KF_bpf_rcu_read_unlock];
8243+
}
81888244

81898245
static enum kfunc_ptr_arg_type
81908246
get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
@@ -8817,6 +8873,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
88178873
const struct btf_type *t, *func, *func_proto, *ptr_type;
88188874
struct bpf_reg_state *regs = cur_regs(env);
88198875
const char *func_name, *ptr_type_name;
8876+
bool sleepable, rcu_lock, rcu_unlock;
88208877
struct bpf_kfunc_call_arg_meta meta;
88218878
u32 i, nargs, func_id, ptr_type_id;
88228879
int err, insn_idx = *insn_idx_p;
@@ -8858,11 +8915,45 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
88588915
return -EACCES;
88598916
}
88608917

8861-
if (is_kfunc_sleepable(&meta) && !env->prog->aux->sleepable) {
8918+
sleepable = is_kfunc_sleepable(&meta);
8919+
if (sleepable && !env->prog->aux->sleepable) {
88628920
verbose(env, "program must be sleepable to call sleepable kfunc %s\n", func_name);
88638921
return -EACCES;
88648922
}
88658923

8924+
rcu_lock = is_kfunc_bpf_rcu_read_lock(&meta);
8925+
rcu_unlock = is_kfunc_bpf_rcu_read_unlock(&meta);
8926+
if ((rcu_lock || rcu_unlock) && !env->rcu_tag_supported) {
8927+
verbose(env, "no vmlinux btf rcu tag support for kfunc %s\n", func_name);
8928+
return -EACCES;
8929+
}
8930+
8931+
if (env->cur_state->active_rcu_lock) {
8932+
struct bpf_func_state *state;
8933+
struct bpf_reg_state *reg;
8934+
8935+
if (rcu_lock) {
8936+
verbose(env, "nested rcu read lock (kernel function %s)\n", func_name);
8937+
return -EINVAL;
8938+
} else if (rcu_unlock) {
8939+
bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
8940+
if (reg->type & MEM_RCU) {
8941+
reg->type &= ~(MEM_RCU | PTR_TRUSTED);
8942+
reg->type |= PTR_UNTRUSTED;
8943+
}
8944+
}));
8945+
env->cur_state->active_rcu_lock = false;
8946+
} else if (sleepable) {
8947+
verbose(env, "kernel func %s is sleepable within rcu_read_lock region\n", func_name);
8948+
return -EACCES;
8949+
}
8950+
} else if (rcu_lock) {
8951+
env->cur_state->active_rcu_lock = true;
8952+
} else if (rcu_unlock) {
8953+
verbose(env, "unmatched rcu read unlock (kernel function %s)\n", func_name);
8954+
return -EINVAL;
8955+
}
8956+
88668957
/* Check the arguments */
88678958
err = check_kfunc_args(env, &meta);
88688959
if (err < 0)
@@ -11754,6 +11845,11 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
1175411845
return -EINVAL;
1175511846
}
1175611847

11848+
if (env->cur_state->active_rcu_lock) {
11849+
verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_rcu_read_lock-ed region\n");
11850+
return -EINVAL;
11851+
}
11852+
1175711853
if (regs[ctx_reg].type != PTR_TO_CTX) {
1175811854
verbose(env,
1175911855
"at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
@@ -13019,6 +13115,9 @@ static bool states_equal(struct bpf_verifier_env *env,
1301913115
old->active_lock.id != cur->active_lock.id)
1302013116
return false;
1302113117

13118+
if (old->active_rcu_lock != cur->active_rcu_lock)
13119+
return false;
13120+
1302213121
/* for states to be equal callsites have to be the same
1302313122
* and all frame states need to be equivalent
1302413123
*/
@@ -13706,6 +13805,11 @@ static int do_check(struct bpf_verifier_env *env)
1370613805
return -EINVAL;
1370713806
}
1370813807

13808+
if (env->cur_state->active_rcu_lock) {
13809+
verbose(env, "bpf_rcu_read_unlock is missing\n");
13810+
return -EINVAL;
13811+
}
13812+
1370913813
/* We must do check_reference_leak here before
1371013814
* prepare_func_exit to handle the case when
1371113815
* state->curframe > 0, it may be a callback
@@ -15494,14 +15598,12 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
1549415598
goto patch_call_imm;
1549515599
}
1549615600

15497-
if (insn->imm == BPF_FUNC_task_storage_get ||
15498-
insn->imm == BPF_FUNC_sk_storage_get ||
15499-
insn->imm == BPF_FUNC_inode_storage_get ||
15500-
insn->imm == BPF_FUNC_cgrp_storage_get) {
15501-
if (env->prog->aux->sleepable)
15502-
insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_KERNEL);
15503-
else
15601+
if (is_storage_get_function(insn->imm)) {
15602+
if (!env->prog->aux->sleepable ||
15603+
env->insn_aux_data[i + delta].storage_get_func_atomic)
1550415604
insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_ATOMIC);
15605+
else
15606+
insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_KERNEL);
1550515607
insn_buf[1] = *insn;
1550615608
cnt = 2;
1550715609

@@ -16580,6 +16682,8 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr)
1658016682
env->bypass_spec_v1 = bpf_bypass_spec_v1();
1658116683
env->bypass_spec_v4 = bpf_bypass_spec_v4();
1658216684
env->bpf_capable = bpf_capable();
16685+
env->rcu_tag_supported = btf_vmlinux &&
16686+
btf_find_by_name_kind(btf_vmlinux, "rcu", BTF_KIND_TYPE_TAG) > 0;
1658316687

1658416688
if (is_priv)
1658516689
env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;

0 commit comments

Comments
 (0)