@@ -113,9 +113,9 @@ static void bpf_selem_free_rcu(struct rcu_head *rcu)
113113 * The caller must ensure selem->smap is still valid to be
114114 * dereferenced for its smap->elem_size and smap->cache_idx.
115115 */
116- bool bpf_selem_unlink_storage_nolock (struct bpf_local_storage * local_storage ,
117- struct bpf_local_storage_elem * selem ,
118- bool uncharge_mem , bool use_trace_rcu )
116+ static bool bpf_selem_unlink_storage_nolock (struct bpf_local_storage * local_storage ,
117+ struct bpf_local_storage_elem * selem ,
118+ bool uncharge_mem , bool use_trace_rcu )
119119{
120120 struct bpf_local_storage_map * smap ;
121121 bool free_local_storage ;
@@ -501,7 +501,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
501501 return ERR_PTR (err );
502502}
503503
504- u16 bpf_local_storage_cache_idx_get (struct bpf_local_storage_cache * cache )
504+ static u16 bpf_local_storage_cache_idx_get (struct bpf_local_storage_cache * cache )
505505{
506506 u64 min_usage = U64_MAX ;
507507 u16 i , res = 0 ;
@@ -525,76 +525,14 @@ u16 bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache)
525525 return res ;
526526}
527527
528- void bpf_local_storage_cache_idx_free (struct bpf_local_storage_cache * cache ,
529- u16 idx )
528+ static void bpf_local_storage_cache_idx_free (struct bpf_local_storage_cache * cache ,
529+ u16 idx )
530530{
531531 spin_lock (& cache -> idx_lock );
532532 cache -> idx_usage_counts [idx ]-- ;
533533 spin_unlock (& cache -> idx_lock );
534534}
535535
536- void bpf_local_storage_map_free (struct bpf_local_storage_map * smap ,
537- int __percpu * busy_counter )
538- {
539- struct bpf_local_storage_elem * selem ;
540- struct bpf_local_storage_map_bucket * b ;
541- unsigned int i ;
542-
543- /* Note that this map might be concurrently cloned from
544- * bpf_sk_storage_clone. Wait for any existing bpf_sk_storage_clone
545- * RCU read section to finish before proceeding. New RCU
546- * read sections should be prevented via bpf_map_inc_not_zero.
547- */
548- synchronize_rcu ();
549-
550- /* bpf prog and the userspace can no longer access this map
551- * now. No new selem (of this map) can be added
552- * to the owner->storage or to the map bucket's list.
553- *
554- * The elem of this map can be cleaned up here
555- * or when the storage is freed e.g.
556- * by bpf_sk_storage_free() during __sk_destruct().
557- */
558- for (i = 0 ; i < (1U << smap -> bucket_log ); i ++ ) {
559- b = & smap -> buckets [i ];
560-
561- rcu_read_lock ();
562- /* No one is adding to b->list now */
563- while ((selem = hlist_entry_safe (
564- rcu_dereference_raw (hlist_first_rcu (& b -> list )),
565- struct bpf_local_storage_elem , map_node ))) {
566- if (busy_counter ) {
567- migrate_disable ();
568- this_cpu_inc (* busy_counter );
569- }
570- bpf_selem_unlink (selem , false);
571- if (busy_counter ) {
572- this_cpu_dec (* busy_counter );
573- migrate_enable ();
574- }
575- cond_resched_rcu ();
576- }
577- rcu_read_unlock ();
578- }
579-
580- /* While freeing the storage we may still need to access the map.
581- *
582- * e.g. when bpf_sk_storage_free() has unlinked selem from the map
583- * which then made the above while((selem = ...)) loop
584- * exit immediately.
585- *
586- * However, while freeing the storage one still needs to access the
587- * smap->elem_size to do the uncharging in
588- * bpf_selem_unlink_storage_nolock().
589- *
590- * Hence, wait another rcu grace period for the storage to be freed.
591- */
592- synchronize_rcu ();
593-
594- kvfree (smap -> buckets );
595- bpf_map_area_free (smap );
596- }
597-
598536int bpf_local_storage_map_alloc_check (union bpf_attr * attr )
599537{
600538 if (attr -> map_flags & ~BPF_LOCAL_STORAGE_CREATE_FLAG_MASK ||
@@ -614,7 +552,7 @@ int bpf_local_storage_map_alloc_check(union bpf_attr *attr)
614552 return 0 ;
615553}
616554
617- struct bpf_local_storage_map * bpf_local_storage_map_alloc (union bpf_attr * attr )
555+ static struct bpf_local_storage_map * __bpf_local_storage_map_alloc (union bpf_attr * attr )
618556{
619557 struct bpf_local_storage_map * smap ;
620558 unsigned int i ;
@@ -664,3 +602,117 @@ int bpf_local_storage_map_check_btf(const struct bpf_map *map,
664602
665603 return 0 ;
666604}
605+
606+ bool bpf_local_storage_unlink_nolock (struct bpf_local_storage * local_storage )
607+ {
608+ struct bpf_local_storage_elem * selem ;
609+ bool free_storage = false;
610+ struct hlist_node * n ;
611+
612+ /* Neither the bpf_prog nor the bpf_map's syscall
613+ * could be modifying the local_storage->list now.
614+ * Thus, no elem can be added to or deleted from the
615+ * local_storage->list by the bpf_prog or by the bpf_map's syscall.
616+ *
617+ * It is racing with bpf_local_storage_map_free() alone
618+ * when unlinking elem from the local_storage->list and
619+ * the map's bucket->list.
620+ */
621+ hlist_for_each_entry_safe (selem , n , & local_storage -> list , snode ) {
622+ /* Always unlink from map before unlinking from
623+ * local_storage.
624+ */
625+ bpf_selem_unlink_map (selem );
626+ /* If local_storage list has only one element, the
627+ * bpf_selem_unlink_storage_nolock() will return true.
628+ * Otherwise, it will return false. The current loop iteration
629+ * intends to remove all local storage. So the last iteration
630+ * of the loop will set the free_cgroup_storage to true.
631+ */
632+ free_storage = bpf_selem_unlink_storage_nolock (
633+ local_storage , selem , false, false);
634+ }
635+
636+ return free_storage ;
637+ }
638+
639+ struct bpf_map *
640+ bpf_local_storage_map_alloc (union bpf_attr * attr ,
641+ struct bpf_local_storage_cache * cache )
642+ {
643+ struct bpf_local_storage_map * smap ;
644+
645+ smap = __bpf_local_storage_map_alloc (attr );
646+ if (IS_ERR (smap ))
647+ return ERR_CAST (smap );
648+
649+ smap -> cache_idx = bpf_local_storage_cache_idx_get (cache );
650+ return & smap -> map ;
651+ }
652+
653+ void bpf_local_storage_map_free (struct bpf_map * map ,
654+ struct bpf_local_storage_cache * cache ,
655+ int __percpu * busy_counter )
656+ {
657+ struct bpf_local_storage_map_bucket * b ;
658+ struct bpf_local_storage_elem * selem ;
659+ struct bpf_local_storage_map * smap ;
660+ unsigned int i ;
661+
662+ smap = (struct bpf_local_storage_map * )map ;
663+ bpf_local_storage_cache_idx_free (cache , smap -> cache_idx );
664+
665+ /* Note that this map might be concurrently cloned from
666+ * bpf_sk_storage_clone. Wait for any existing bpf_sk_storage_clone
667+ * RCU read section to finish before proceeding. New RCU
668+ * read sections should be prevented via bpf_map_inc_not_zero.
669+ */
670+ synchronize_rcu ();
671+
672+ /* bpf prog and the userspace can no longer access this map
673+ * now. No new selem (of this map) can be added
674+ * to the owner->storage or to the map bucket's list.
675+ *
676+ * The elem of this map can be cleaned up here
677+ * or when the storage is freed e.g.
678+ * by bpf_sk_storage_free() during __sk_destruct().
679+ */
680+ for (i = 0 ; i < (1U << smap -> bucket_log ); i ++ ) {
681+ b = & smap -> buckets [i ];
682+
683+ rcu_read_lock ();
684+ /* No one is adding to b->list now */
685+ while ((selem = hlist_entry_safe (
686+ rcu_dereference_raw (hlist_first_rcu (& b -> list )),
687+ struct bpf_local_storage_elem , map_node ))) {
688+ if (busy_counter ) {
689+ migrate_disable ();
690+ this_cpu_inc (* busy_counter );
691+ }
692+ bpf_selem_unlink (selem , false);
693+ if (busy_counter ) {
694+ this_cpu_dec (* busy_counter );
695+ migrate_enable ();
696+ }
697+ cond_resched_rcu ();
698+ }
699+ rcu_read_unlock ();
700+ }
701+
702+ /* While freeing the storage we may still need to access the map.
703+ *
704+ * e.g. when bpf_sk_storage_free() has unlinked selem from the map
705+ * which then made the above while((selem = ...)) loop
706+ * exit immediately.
707+ *
708+ * However, while freeing the storage one still needs to access the
709+ * smap->elem_size to do the uncharging in
710+ * bpf_selem_unlink_storage_nolock().
711+ *
712+ * Hence, wait another rcu grace period for the storage to be freed.
713+ */
714+ synchronize_rcu ();
715+
716+ kvfree (smap -> buckets );
717+ bpf_map_area_free (smap );
718+ }
0 commit comments