@@ -527,6 +527,14 @@ static bool is_callback_calling_function(enum bpf_func_id func_id)
527527 func_id == BPF_FUNC_user_ringbuf_drain ;
528528}
529529
530+ static bool is_storage_get_function (enum bpf_func_id func_id )
531+ {
532+ return func_id == BPF_FUNC_sk_storage_get ||
533+ func_id == BPF_FUNC_inode_storage_get ||
534+ func_id == BPF_FUNC_task_storage_get ||
535+ func_id == BPF_FUNC_cgrp_storage_get ;
536+ }
537+
530538static bool helper_multiple_ref_obj_use (enum bpf_func_id func_id ,
531539 const struct bpf_map * map )
532540{
@@ -589,11 +597,12 @@ static const char *reg_type_str(struct bpf_verifier_env *env,
589597 strncpy (postfix , "_or_null" , 16 );
590598 }
591599
592- snprintf (prefix , sizeof (prefix ), "%s%s%s%s%s%s" ,
600+ snprintf (prefix , sizeof (prefix ), "%s%s%s%s%s%s%s " ,
593601 type & MEM_RDONLY ? "rdonly_" : "" ,
594602 type & MEM_RINGBUF ? "ringbuf_" : "" ,
595603 type & MEM_USER ? "user_" : "" ,
596604 type & MEM_PERCPU ? "percpu_" : "" ,
605+ type & MEM_RCU ? "rcu_" : "" ,
597606 type & PTR_UNTRUSTED ? "untrusted_" : "" ,
598607 type & PTR_TRUSTED ? "trusted_" : ""
599608 );
@@ -1220,6 +1229,7 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state,
12201229 dst_state -> frame [i ] = NULL ;
12211230 }
12221231 dst_state -> speculative = src -> speculative ;
1232+ dst_state -> active_rcu_lock = src -> active_rcu_lock ;
12231233 dst_state -> curframe = src -> curframe ;
12241234 dst_state -> active_lock .ptr = src -> active_lock .ptr ;
12251235 dst_state -> active_lock .id = src -> active_lock .id ;
@@ -4258,6 +4268,25 @@ static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno)
42584268 return reg -> type == PTR_TO_FLOW_KEYS ;
42594269}
42604270
4271+ static bool is_trusted_reg (const struct bpf_reg_state * reg )
4272+ {
4273+ /* A referenced register is always trusted. */
4274+ if (reg -> ref_obj_id )
4275+ return true;
4276+
4277+ /* If a register is not referenced, it is trusted if it has the
4278+ * MEM_ALLOC, MEM_RCU or PTR_TRUSTED type modifiers, and no others. Some of the
4279+ * other type modifiers may be safe, but we elect to take an opt-in
4280+ * approach here as some (e.g. PTR_UNTRUSTED and PTR_MAYBE_NULL) are
4281+ * not.
4282+ *
4283+ * Eventually, we should make PTR_TRUSTED the single source of truth
4284+ * for whether a register is trusted.
4285+ */
4286+ return type_flag (reg -> type ) & BPF_REG_TRUSTED_MODIFIERS &&
4287+ !bpf_type_has_unsafe_modifiers (reg -> type );
4288+ }
4289+
42614290static int check_pkt_ptr_alignment (struct bpf_verifier_env * env ,
42624291 const struct bpf_reg_state * reg ,
42634292 int off , int size , bool strict )
@@ -4737,9 +4766,28 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
47374766 if (type_flag (reg -> type ) & PTR_UNTRUSTED )
47384767 flag |= PTR_UNTRUSTED ;
47394768
4740- /* Any pointer obtained from walking a trusted pointer is no longer trusted. */
4769+ /* By default any pointer obtained from walking a trusted pointer is
4770+ * no longer trusted except the rcu case below.
4771+ */
47414772 flag &= ~PTR_TRUSTED ;
47424773
4774+ if (flag & MEM_RCU ) {
4775+ /* Mark value register as MEM_RCU only if it is protected by
4776+ * bpf_rcu_read_lock() and the ptr reg is trusted. MEM_RCU
4777+ * itself can already indicate trustedness inside the rcu
4778+ * read lock region. Also mark it as PTR_TRUSTED.
4779+ */
4780+ if (!env -> cur_state -> active_rcu_lock || !is_trusted_reg (reg ))
4781+ flag &= ~MEM_RCU ;
4782+ else
4783+ flag |= PTR_TRUSTED ;
4784+ } else if (reg -> type & MEM_RCU ) {
4785+ /* ptr (reg) is marked as MEM_RCU, but the struct field is not tagged
4786+ * with __rcu. Mark the flag as PTR_UNTRUSTED conservatively.
4787+ */
4788+ flag |= PTR_UNTRUSTED ;
4789+ }
4790+
47434791 if (atype == BPF_READ && value_regno >= 0 )
47444792 mark_btf_ld_reg (env , regs , value_regno , ret , reg -> btf , btf_id , flag );
47454793
@@ -5897,6 +5945,7 @@ static const struct bpf_reg_types btf_ptr_types = {
58975945 .types = {
58985946 PTR_TO_BTF_ID ,
58995947 PTR_TO_BTF_ID | PTR_TRUSTED ,
5948+ PTR_TO_BTF_ID | MEM_RCU | PTR_TRUSTED ,
59005949 },
59015950};
59025951static const struct bpf_reg_types percpu_btf_ptr_types = {
@@ -6075,6 +6124,7 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env,
60756124 case PTR_TO_BTF_ID :
60766125 case PTR_TO_BTF_ID | MEM_ALLOC :
60776126 case PTR_TO_BTF_ID | PTR_TRUSTED :
6127+ case PTR_TO_BTF_ID | MEM_RCU | PTR_TRUSTED :
60786128 case PTR_TO_BTF_ID | MEM_ALLOC | PTR_TRUSTED :
60796129 /* When referenced PTR_TO_BTF_ID is passed to release function,
60806130 * it's fixed offset must be 0. In the other cases, fixed offset
@@ -7539,6 +7589,17 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
75397589 return err ;
75407590 }
75417591
7592+ if (env -> cur_state -> active_rcu_lock ) {
7593+ if (fn -> might_sleep ) {
7594+ verbose (env , "sleepable helper %s#%d in rcu_read_lock region\n" ,
7595+ func_id_name (func_id ), func_id );
7596+ return - EINVAL ;
7597+ }
7598+
7599+ if (env -> prog -> aux -> sleepable && is_storage_get_function (func_id ))
7600+ env -> insn_aux_data [insn_idx ].storage_get_func_atomic = true;
7601+ }
7602+
75427603 meta .func_id = func_id ;
75437604 /* check args */
75447605 for (i = 0 ; i < MAX_BPF_FUNC_REG_ARGS ; i ++ ) {
@@ -7966,25 +8027,6 @@ static bool is_kfunc_arg_kptr_get(struct bpf_kfunc_call_arg_meta *meta, int arg)
79668027 return arg == 0 && (meta -> kfunc_flags & KF_KPTR_GET );
79678028}
79688029
7969- static bool is_trusted_reg (const struct bpf_reg_state * reg )
7970- {
7971- /* A referenced register is always trusted. */
7972- if (reg -> ref_obj_id )
7973- return true;
7974-
7975- /* If a register is not referenced, it is trusted if it has either the
7976- * MEM_ALLOC or PTR_TRUSTED type modifiers, and no others. Some of the
7977- * other type modifiers may be safe, but we elect to take an opt-in
7978- * approach here as some (e.g. PTR_UNTRUSTED and PTR_MAYBE_NULL) are
7979- * not.
7980- *
7981- * Eventually, we should make PTR_TRUSTED the single source of truth
7982- * for whether a register is trusted.
7983- */
7984- return type_flag (reg -> type ) & BPF_REG_TRUSTED_MODIFIERS &&
7985- !bpf_type_has_unsafe_modifiers (reg -> type );
7986- }
7987-
79888030static bool __kfunc_param_match_suffix (const struct btf * btf ,
79898031 const struct btf_param * arg ,
79908032 const char * suffix )
@@ -8163,6 +8205,8 @@ enum special_kfunc_type {
81638205 KF_bpf_list_pop_back ,
81648206 KF_bpf_cast_to_kern_ctx ,
81658207 KF_bpf_rdonly_cast ,
8208+ KF_bpf_rcu_read_lock ,
8209+ KF_bpf_rcu_read_unlock ,
81668210};
81678211
81688212BTF_SET_START (special_kfunc_set )
@@ -8185,6 +8229,18 @@ BTF_ID(func, bpf_list_pop_front)
81858229BTF_ID (func , bpf_list_pop_back )
81868230BTF_ID (func , bpf_cast_to_kern_ctx )
81878231BTF_ID (func , bpf_rdonly_cast )
8232+ BTF_ID (func , bpf_rcu_read_lock )
8233+ BTF_ID (func , bpf_rcu_read_unlock )
8234+
8235+ static bool is_kfunc_bpf_rcu_read_lock (struct bpf_kfunc_call_arg_meta * meta )
8236+ {
8237+ return meta -> func_id == special_kfunc_list [KF_bpf_rcu_read_lock ];
8238+ }
8239+
8240+ static bool is_kfunc_bpf_rcu_read_unlock (struct bpf_kfunc_call_arg_meta * meta )
8241+ {
8242+ return meta -> func_id == special_kfunc_list [KF_bpf_rcu_read_unlock ];
8243+ }
81888244
81898245static enum kfunc_ptr_arg_type
81908246get_kfunc_ptr_arg_type (struct bpf_verifier_env * env ,
@@ -8817,6 +8873,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
88178873 const struct btf_type * t , * func , * func_proto , * ptr_type ;
88188874 struct bpf_reg_state * regs = cur_regs (env );
88198875 const char * func_name , * ptr_type_name ;
8876+ bool sleepable , rcu_lock , rcu_unlock ;
88208877 struct bpf_kfunc_call_arg_meta meta ;
88218878 u32 i , nargs , func_id , ptr_type_id ;
88228879 int err , insn_idx = * insn_idx_p ;
@@ -8858,11 +8915,45 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
88588915 return - EACCES ;
88598916 }
88608917
8861- if (is_kfunc_sleepable (& meta ) && !env -> prog -> aux -> sleepable ) {
8918+ sleepable = is_kfunc_sleepable (& meta );
8919+ if (sleepable && !env -> prog -> aux -> sleepable ) {
88628920 verbose (env , "program must be sleepable to call sleepable kfunc %s\n" , func_name );
88638921 return - EACCES ;
88648922 }
88658923
8924+ rcu_lock = is_kfunc_bpf_rcu_read_lock (& meta );
8925+ rcu_unlock = is_kfunc_bpf_rcu_read_unlock (& meta );
8926+ if ((rcu_lock || rcu_unlock ) && !env -> rcu_tag_supported ) {
8927+ verbose (env , "no vmlinux btf rcu tag support for kfunc %s\n" , func_name );
8928+ return - EACCES ;
8929+ }
8930+
8931+ if (env -> cur_state -> active_rcu_lock ) {
8932+ struct bpf_func_state * state ;
8933+ struct bpf_reg_state * reg ;
8934+
8935+ if (rcu_lock ) {
8936+ verbose (env , "nested rcu read lock (kernel function %s)\n" , func_name );
8937+ return - EINVAL ;
8938+ } else if (rcu_unlock ) {
8939+ bpf_for_each_reg_in_vstate (env -> cur_state , state , reg , ({
8940+ if (reg -> type & MEM_RCU ) {
8941+ reg -> type &= ~(MEM_RCU | PTR_TRUSTED );
8942+ reg -> type |= PTR_UNTRUSTED ;
8943+ }
8944+ }));
8945+ env -> cur_state -> active_rcu_lock = false;
8946+ } else if (sleepable ) {
8947+ verbose (env , "kernel func %s is sleepable within rcu_read_lock region\n" , func_name );
8948+ return - EACCES ;
8949+ }
8950+ } else if (rcu_lock ) {
8951+ env -> cur_state -> active_rcu_lock = true;
8952+ } else if (rcu_unlock ) {
8953+ verbose (env , "unmatched rcu read unlock (kernel function %s)\n" , func_name );
8954+ return - EINVAL ;
8955+ }
8956+
88668957 /* Check the arguments */
88678958 err = check_kfunc_args (env , & meta );
88688959 if (err < 0 )
@@ -11754,6 +11845,11 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
1175411845 return - EINVAL ;
1175511846 }
1175611847
11848+ if (env -> cur_state -> active_rcu_lock ) {
11849+ verbose (env , "BPF_LD_[ABS|IND] cannot be used inside bpf_rcu_read_lock-ed region\n" );
11850+ return - EINVAL ;
11851+ }
11852+
1175711853 if (regs [ctx_reg ].type != PTR_TO_CTX ) {
1175811854 verbose (env ,
1175911855 "at the time of BPF_LD_ABS|IND R6 != pointer to skb\n" );
@@ -13019,6 +13115,9 @@ static bool states_equal(struct bpf_verifier_env *env,
1301913115 old -> active_lock .id != cur -> active_lock .id )
1302013116 return false;
1302113117
13118+ if (old -> active_rcu_lock != cur -> active_rcu_lock )
13119+ return false;
13120+
1302213121 /* for states to be equal callsites have to be the same
1302313122 * and all frame states need to be equivalent
1302413123 */
@@ -13706,6 +13805,11 @@ static int do_check(struct bpf_verifier_env *env)
1370613805 return - EINVAL ;
1370713806 }
1370813807
13808+ if (env -> cur_state -> active_rcu_lock ) {
13809+ verbose (env , "bpf_rcu_read_unlock is missing\n" );
13810+ return - EINVAL ;
13811+ }
13812+
1370913813 /* We must do check_reference_leak here before
1371013814 * prepare_func_exit to handle the case when
1371113815 * state->curframe > 0, it may be a callback
@@ -15494,14 +15598,12 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
1549415598 goto patch_call_imm ;
1549515599 }
1549615600
15497- if (insn -> imm == BPF_FUNC_task_storage_get ||
15498- insn -> imm == BPF_FUNC_sk_storage_get ||
15499- insn -> imm == BPF_FUNC_inode_storage_get ||
15500- insn -> imm == BPF_FUNC_cgrp_storage_get ) {
15501- if (env -> prog -> aux -> sleepable )
15502- insn_buf [0 ] = BPF_MOV64_IMM (BPF_REG_5 , (__force __s32 )GFP_KERNEL );
15503- else
15601+ if (is_storage_get_function (insn -> imm )) {
15602+ if (!env -> prog -> aux -> sleepable ||
15603+ env -> insn_aux_data [i + delta ].storage_get_func_atomic )
1550415604 insn_buf [0 ] = BPF_MOV64_IMM (BPF_REG_5 , (__force __s32 )GFP_ATOMIC );
15605+ else
15606+ insn_buf [0 ] = BPF_MOV64_IMM (BPF_REG_5 , (__force __s32 )GFP_KERNEL );
1550515607 insn_buf [1 ] = * insn ;
1550615608 cnt = 2 ;
1550715609
@@ -16580,6 +16682,8 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr)
1658016682 env -> bypass_spec_v1 = bpf_bypass_spec_v1 ();
1658116683 env -> bypass_spec_v4 = bpf_bypass_spec_v4 ();
1658216684 env -> bpf_capable = bpf_capable ();
16685+ env -> rcu_tag_supported = btf_vmlinux &&
16686+ btf_find_by_name_kind (btf_vmlinux , "rcu" , BTF_KIND_TYPE_TAG ) > 0 ;
1658316687
1658416688 if (is_priv )
1658516689 env -> test_state_freq = attr -> prog_flags & BPF_F_TEST_STATE_FREQ ;
0 commit comments