@@ -3155,6 +3155,143 @@ struct bpf_iter__udp {
31553155 int bucket __aligned (8 );
31563156};
31573157
3158+ struct bpf_udp_iter_state {
3159+ struct udp_iter_state state ;
3160+ unsigned int cur_sk ;
3161+ unsigned int end_sk ;
3162+ unsigned int max_sk ;
3163+ int offset ;
3164+ struct sock * * batch ;
3165+ bool st_bucket_done ;
3166+ };
3167+
3168+ static int bpf_iter_udp_realloc_batch (struct bpf_udp_iter_state * iter ,
3169+ unsigned int new_batch_sz );
3170+ static struct sock * bpf_iter_udp_batch (struct seq_file * seq )
3171+ {
3172+ struct bpf_udp_iter_state * iter = seq -> private ;
3173+ struct udp_iter_state * state = & iter -> state ;
3174+ struct net * net = seq_file_net (seq );
3175+ struct udp_table * udptable ;
3176+ unsigned int batch_sks = 0 ;
3177+ bool resized = false;
3178+ struct sock * sk ;
3179+
3180+ /* The current batch is done, so advance the bucket. */
3181+ if (iter -> st_bucket_done ) {
3182+ state -> bucket ++ ;
3183+ iter -> offset = 0 ;
3184+ }
3185+
3186+ udptable = udp_get_table_seq (seq , net );
3187+
3188+ again :
3189+ /* New batch for the next bucket.
3190+ * Iterate over the hash table to find a bucket with sockets matching
3191+ * the iterator attributes, and return the first matching socket from
3192+ * the bucket. The remaining matched sockets from the bucket are batched
3193+ * before releasing the bucket lock. This allows BPF programs that are
3194+ * called in seq_show to acquire the bucket lock if needed.
3195+ */
3196+ iter -> cur_sk = 0 ;
3197+ iter -> end_sk = 0 ;
3198+ iter -> st_bucket_done = false;
3199+ batch_sks = 0 ;
3200+
3201+ for (; state -> bucket <= udptable -> mask ; state -> bucket ++ ) {
3202+ struct udp_hslot * hslot2 = & udptable -> hash2 [state -> bucket ];
3203+
3204+ if (hlist_empty (& hslot2 -> head )) {
3205+ iter -> offset = 0 ;
3206+ continue ;
3207+ }
3208+
3209+ spin_lock_bh (& hslot2 -> lock );
3210+ udp_portaddr_for_each_entry (sk , & hslot2 -> head ) {
3211+ if (seq_sk_match (seq , sk )) {
3212+ /* Resume from the last iterated socket at the
3213+ * offset in the bucket before iterator was stopped.
3214+ */
3215+ if (iter -> offset ) {
3216+ -- iter -> offset ;
3217+ continue ;
3218+ }
3219+ if (iter -> end_sk < iter -> max_sk ) {
3220+ sock_hold (sk );
3221+ iter -> batch [iter -> end_sk ++ ] = sk ;
3222+ }
3223+ batch_sks ++ ;
3224+ }
3225+ }
3226+ spin_unlock_bh (& hslot2 -> lock );
3227+
3228+ if (iter -> end_sk )
3229+ break ;
3230+
3231+ /* Reset the current bucket's offset before moving to the next bucket. */
3232+ iter -> offset = 0 ;
3233+ }
3234+
3235+ /* All done: no batch made. */
3236+ if (!iter -> end_sk )
3237+ return NULL ;
3238+
3239+ if (iter -> end_sk == batch_sks ) {
3240+ /* Batching is done for the current bucket; return the first
3241+ * socket to be iterated from the batch.
3242+ */
3243+ iter -> st_bucket_done = true;
3244+ goto done ;
3245+ }
3246+ if (!resized && !bpf_iter_udp_realloc_batch (iter , batch_sks * 3 / 2 )) {
3247+ resized = true;
3248+ /* After allocating a larger batch, retry one more time to grab
3249+ * the whole bucket.
3250+ */
3251+ state -> bucket -- ;
3252+ goto again ;
3253+ }
3254+ done :
3255+ return iter -> batch [0 ];
3256+ }
3257+
3258+ static void * bpf_iter_udp_seq_next (struct seq_file * seq , void * v , loff_t * pos )
3259+ {
3260+ struct bpf_udp_iter_state * iter = seq -> private ;
3261+ struct sock * sk ;
3262+
3263+ /* Whenever seq_next() is called, the iter->cur_sk is
3264+ * done with seq_show(), so unref the iter->cur_sk.
3265+ */
3266+ if (iter -> cur_sk < iter -> end_sk ) {
3267+ sock_put (iter -> batch [iter -> cur_sk ++ ]);
3268+ ++ iter -> offset ;
3269+ }
3270+
3271+ /* After updating iter->cur_sk, check if there are more sockets
3272+ * available in the current bucket batch.
3273+ */
3274+ if (iter -> cur_sk < iter -> end_sk )
3275+ sk = iter -> batch [iter -> cur_sk ];
3276+ else
3277+ /* Prepare a new batch. */
3278+ sk = bpf_iter_udp_batch (seq );
3279+
3280+ ++ * pos ;
3281+ return sk ;
3282+ }
3283+
3284+ static void * bpf_iter_udp_seq_start (struct seq_file * seq , loff_t * pos )
3285+ {
3286+ /* bpf iter does not support lseek, so it always
3287+ * continue from where it was stop()-ped.
3288+ */
3289+ if (* pos )
3290+ return bpf_iter_udp_batch (seq );
3291+
3292+ return SEQ_START_TOKEN ;
3293+ }
3294+
31583295static int udp_prog_seq_show (struct bpf_prog * prog , struct bpf_iter_meta * meta ,
31593296 struct udp_sock * udp_sk , uid_t uid , int bucket )
31603297{
@@ -3175,18 +3312,37 @@ static int bpf_iter_udp_seq_show(struct seq_file *seq, void *v)
31753312 struct bpf_prog * prog ;
31763313 struct sock * sk = v ;
31773314 uid_t uid ;
3315+ int ret ;
31783316
31793317 if (v == SEQ_START_TOKEN )
31803318 return 0 ;
31813319
3320+ lock_sock (sk );
3321+
3322+ if (unlikely (sk_unhashed (sk ))) {
3323+ ret = SEQ_SKIP ;
3324+ goto unlock ;
3325+ }
3326+
31823327 uid = from_kuid_munged (seq_user_ns (seq ), sock_i_uid (sk ));
31833328 meta .seq = seq ;
31843329 prog = bpf_iter_get_info (& meta , false);
3185- return udp_prog_seq_show (prog , & meta , v , uid , state -> bucket );
3330+ ret = udp_prog_seq_show (prog , & meta , v , uid , state -> bucket );
3331+
3332+ unlock :
3333+ release_sock (sk );
3334+ return ret ;
3335+ }
3336+
3337+ static void bpf_iter_udp_put_batch (struct bpf_udp_iter_state * iter )
3338+ {
3339+ while (iter -> cur_sk < iter -> end_sk )
3340+ sock_put (iter -> batch [iter -> cur_sk ++ ]);
31863341}
31873342
31883343static void bpf_iter_udp_seq_stop (struct seq_file * seq , void * v )
31893344{
3345+ struct bpf_udp_iter_state * iter = seq -> private ;
31903346 struct bpf_iter_meta meta ;
31913347 struct bpf_prog * prog ;
31923348
@@ -3197,12 +3353,15 @@ static void bpf_iter_udp_seq_stop(struct seq_file *seq, void *v)
31973353 (void )udp_prog_seq_show (prog , & meta , v , 0 , 0 );
31983354 }
31993355
3200- udp_seq_stop (seq , v );
3356+ if (iter -> cur_sk < iter -> end_sk ) {
3357+ bpf_iter_udp_put_batch (iter );
3358+ iter -> st_bucket_done = false;
3359+ }
32013360}
32023361
32033362static const struct seq_operations bpf_iter_udp_seq_ops = {
3204- .start = udp_seq_start ,
3205- .next = udp_seq_next ,
3363+ .start = bpf_iter_udp_seq_start ,
3364+ .next = bpf_iter_udp_seq_next ,
32063365 .stop = bpf_iter_udp_seq_stop ,
32073366 .show = bpf_iter_udp_seq_show ,
32083367};
@@ -3431,21 +3590,55 @@ static struct pernet_operations __net_initdata udp_sysctl_ops = {
34313590DEFINE_BPF_ITER_FUNC (udp , struct bpf_iter_meta * meta ,
34323591 struct udp_sock * udp_sk , uid_t uid , int bucket )
34333592
3593+ static int bpf_iter_udp_realloc_batch (struct bpf_udp_iter_state * iter ,
3594+ unsigned int new_batch_sz )
3595+ {
3596+ struct sock * * new_batch ;
3597+
3598+ new_batch = kvmalloc_array (new_batch_sz , sizeof (* new_batch ),
3599+ GFP_USER | __GFP_NOWARN );
3600+ if (!new_batch )
3601+ return - ENOMEM ;
3602+
3603+ bpf_iter_udp_put_batch (iter );
3604+ kvfree (iter -> batch );
3605+ iter -> batch = new_batch ;
3606+ iter -> max_sk = new_batch_sz ;
3607+
3608+ return 0 ;
3609+ }
3610+
3611+ #define INIT_BATCH_SZ 16
3612+
34343613static int bpf_iter_init_udp (void * priv_data , struct bpf_iter_aux_info * aux )
34353614{
3436- return bpf_iter_init_seq_net (priv_data , aux );
3615+ struct bpf_udp_iter_state * iter = priv_data ;
3616+ int ret ;
3617+
3618+ ret = bpf_iter_init_seq_net (priv_data , aux );
3619+ if (ret )
3620+ return ret ;
3621+
3622+ ret = bpf_iter_udp_realloc_batch (iter , INIT_BATCH_SZ );
3623+ if (ret )
3624+ bpf_iter_fini_seq_net (priv_data );
3625+
3626+ return ret ;
34373627}
34383628
34393629static void bpf_iter_fini_udp (void * priv_data )
34403630{
3631+ struct bpf_udp_iter_state * iter = priv_data ;
3632+
34413633 bpf_iter_fini_seq_net (priv_data );
3634+ kvfree (iter -> batch );
34423635}
34433636
34443637static const struct bpf_iter_seq_info udp_seq_info = {
34453638 .seq_ops = & bpf_iter_udp_seq_ops ,
34463639 .init_seq_private = bpf_iter_init_udp ,
34473640 .fini_seq_private = bpf_iter_fini_udp ,
3448- .seq_priv_size = sizeof (struct udp_iter_state ),
3641+ .seq_priv_size = sizeof (struct bpf_udp_iter_state ),
34493642};
34503643
34513644static struct bpf_iter_reg udp_reg_info = {
0 commit comments