forked from torvalds/linux
Permalink
Show file tree
Hide file tree
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Browse files
bpf: introduce priority queue based map
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
- Loading branch information
Showing
2 changed files
with
206 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,205 @@ | ||
| // SPDX-License-Identifier: GPL-2.0 | ||
| /* | ||
| * priority_queue_map.c: BPF priority queue map | ||
| * | ||
| * Copyright (C) 2021, Bytedance, Cong Wang <cong.wang@bytedance.com> | ||
| */ | ||
| #include <linux/bpf.h> | ||
| #include <linux/slab.h> | ||
| #include <linux/capability.h> | ||
| #include <linux/priority_queue.h> | ||
|
|
||
| #define PRIORITY_QUEUE_CREATE_FLAG_MASK \ | ||
| (BPF_F_NUMA_NODE | BPF_F_ACCESS_MASK) | ||
|
|
||
| struct bpf_priority_queue { | ||
| struct bpf_map map; | ||
| raw_spinlock_t lock; | ||
| struct pq_root root; | ||
| }; | ||
|
|
||
| struct bpf_priority_queue_node { | ||
| struct pq_node node; | ||
| u32 key_size; | ||
| char key[] __aligned(8); | ||
| }; | ||
|
|
||
| static struct bpf_priority_queue *bpf_priority_queue(struct bpf_map *map) | ||
| { | ||
| return container_of(map, struct bpf_priority_queue, map); | ||
| } | ||
|
|
||
| /* Called from syscall */ | ||
| static int priority_queue_map_alloc_check(union bpf_attr *attr) | ||
| { | ||
| if (!bpf_capable()) | ||
| return -EPERM; | ||
|
|
||
| /* check sanity of attributes */ | ||
| if (attr->max_entries == 0 || attr->key_size == 0 || | ||
| attr->value_size == 0 || | ||
| attr->map_flags & ~PRIORITY_QUEUE_CREATE_FLAG_MASK || | ||
| !bpf_map_flags_access_ok(attr->map_flags)) | ||
| return -EINVAL; | ||
|
|
||
| if (attr->value_size > KMALLOC_MAX_SIZE) | ||
| /* if value_size is bigger, the user space won't be able to | ||
| * access the elements. | ||
| */ | ||
| return -E2BIG; | ||
|
|
||
| return 0; | ||
| } | ||
|
|
||
| static bool bpf_priority_queue_cmp(struct pq_node *l, struct pq_node *r) | ||
| { | ||
| struct bpf_priority_queue_node *lnode, *rnode; | ||
|
|
||
| lnode = container_of(l, struct bpf_priority_queue_node, node); | ||
| rnode = container_of(r, struct bpf_priority_queue_node, node); | ||
|
|
||
| return memcmp(lnode->key, rnode->key, lnode->key_size); | ||
| } | ||
|
|
||
| static struct bpf_map *priority_queue_map_alloc(union bpf_attr *attr) | ||
| { | ||
| int numa_node = bpf_map_attr_numa_node(attr); | ||
| struct bpf_priority_queue *pq; | ||
| u64 size, queue_size; | ||
|
|
||
| queue_size = sizeof(*pq) + size * attr->value_size; | ||
| pq = bpf_map_area_alloc(queue_size, numa_node); | ||
| if (!pq) | ||
| return ERR_PTR(-ENOMEM); | ||
|
|
||
| memset(pq, 0, sizeof(*pq)); | ||
| bpf_map_init_from_attr(&pq->map, attr); | ||
| pq->size = (u64) attr->max_entries + 1; | ||
| raw_spin_lock_init(&pq->lock); | ||
| pq_root_init(&pq->root, bpf_priority_queue_cmp); | ||
| pq->elem_size = sizeof(struct bpf_priority_queue_node ) + | ||
| round_up(pq->map.key_size, 8) + | ||
| round_up(pq->map.value_size, 8); | ||
| return &pq->map; | ||
| } | ||
|
|
||
| /* Called when map->refcnt goes to zero, either from workqueue or from syscall */ | ||
| static void priority_queue_map_free(struct bpf_map *map) | ||
| { | ||
| struct bpf_priority_queue *pq = bpf_priority_queue(map); | ||
|
|
||
| pq_flush(&pq->root, NULL); | ||
| bpf_map_area_free(pq); | ||
| } | ||
|
|
||
| /* Called from syscall or from eBPF program */ | ||
| static int priority_queue_map_peek(struct bpf_map *map, void *value) | ||
| { | ||
| struct bpf_priority_queue *pq = bpf_priority_queue(map); | ||
| struct bpf_priority_queue_node *n; | ||
| u32 key_size = map->key_size; | ||
| struct pq_node *node; | ||
| unsigned long flags; | ||
|
|
||
| raw_spin_lock_irqsave(&pq->lock, flags); | ||
| node = pq_top(&pq->root); | ||
| if (!node) { | ||
| raw_spin_unlock_irqrestore(&pq->lock, flags); | ||
| return -ENOENT; | ||
| } | ||
| n = container_of(node, struct bpf_priority_queue_node, node); | ||
| ptr = n->key + pq->map.key_size; | ||
| memcpy(value, ptr, pq->map.value_size); | ||
| raw_spin_unlock_irqrestore(&pq->lock, flags); | ||
| return 0; | ||
| } | ||
|
|
||
| /* Called from syscall or from eBPF program */ | ||
| static int priority_queue_map_pop_elem(struct bpf_map *map, void *value) | ||
| { | ||
| struct bpf_priority_queue *pq = bpf_priority_queue(map); | ||
| struct bpf_priority_queue_node *n; | ||
| u32 key_size = map->key_size; | ||
| struct pq_node *node; | ||
| unsigned long flags; | ||
|
|
||
| raw_spin_lock_irqsave(&pq->lock, flags); | ||
| node = pq_pop(&pq->root); | ||
| if (!node) { | ||
| raw_spin_unlock_irqrestore(&pq->lock, flags); | ||
| return -ENOENT; | ||
| } | ||
| n = container_of(node, struct bpf_priority_queue_node, node); | ||
| ptr = n->key + pq->map.key_size; | ||
| memcpy(value, ptr, pq->map.value_size); | ||
| raw_spin_unlock_irqrestore(&pq->lock, flags); | ||
| return 0; | ||
| } | ||
|
|
||
| /* Called from syscall or from eBPF program */ | ||
| static int priority_queue_map_push_elem(struct bpf_map *map, void *value, | ||
| u64 flags) | ||
| { | ||
| return -EINVAL; | ||
| } | ||
|
|
||
| /* Called from syscall or from eBPF program */ | ||
| static void *priority_queue_map_lookup_elem(struct bpf_map *map, void *key) | ||
| { | ||
| return NULL; | ||
| } | ||
|
|
||
| struct bpf_priority_queue_node *alloc_priority_queue_node(struct bpf_priority_queue *pq) | ||
| { | ||
| return bpf_map_kmalloc_node(&pq->map, pq->elem_size, | ||
| GFP_ATOMIC | __GFP_NOWARN, | ||
| pq->map.numa_node); | ||
| } | ||
|
|
||
| /* Called from syscall or from eBPF program */ | ||
| static int priority_queue_map_update_elem(struct bpf_map *map, void *key, | ||
| void *value, u64 flags) | ||
| { | ||
| struct bpf_priority_queue *pq = bpf_priority_queue(map); | ||
| struct bpf_priority_queue_node *n; | ||
| u32 key_size = map->key_size; | ||
| unsigned long flags; | ||
|
|
||
| n = alloc_priority_queue_node(pq); | ||
| if (!n) | ||
| return -ENOMEM; | ||
| raw_spin_lock_irqsave(&pq->lock, flags); | ||
| pq_push(&pq->root, &n->node); | ||
| raw_spin_unlock_irqrestore(&pq->lock, flags); | ||
| return 0; | ||
| } | ||
|
|
||
| /* Called from syscall or from eBPF program */ | ||
| static int priority_queue_map_delete_elem(struct bpf_map *map, void *key) | ||
| { | ||
| return -EINVAL; | ||
| } | ||
|
|
||
| /* Called from syscall */ | ||
| static int priority_queue_map_get_next_key(struct bpf_map *map, void *key, | ||
| void *next_key) | ||
| { | ||
| return -EINVAL; | ||
| } | ||
|
|
||
| static int queue_map_btf_id; | ||
| const struct bpf_map_ops queue_map_ops = { | ||
| .map_meta_equal = bpf_map_meta_equal, | ||
| .map_alloc_check = priority_queue_map_alloc_check, | ||
| .map_alloc = priority_queue_map_alloc, | ||
| .map_free = priority_queue_map_free, | ||
| .map_lookup_elem = priority_queue_map_lookup_elem, | ||
| .map_update_elem = priority_queue_map_update_elem, | ||
| .map_delete_elem = priority_queue_map_delete_elem, | ||
| .map_push_elem = priority_queue_map_push_elem, | ||
| .map_pop_elem = priority_queue_map_pop_elem, | ||
| .map_peek_elem = priority_queue_map_peek, | ||
| .map_get_next_key = priority_queue_map_get_next_key, | ||
| .map_btf_name = "bpf_priority_queue", | ||
| .map_btf_id = &queue_map_btf_id, | ||
| }; |