Skip to content

Commit

Permalink
bpf: Implement bpf namespace
Browse files Browse the repository at this point in the history
It is similar with pid namespace. When we create a new bpf object in a
child BPF namespace, it will alloc the id in current BPF namespace and
its parent BPF namespace. The hierarchy as follows,

    init_bpf_ns                  : level = 0
    /        \
  child_a   child_b              : level = 1
            /    \
       child_b_a  child_b_b      : level = 2

When we create a bpf object in child_bb, it will allocate IDs for this
object in child_bb, child_b and the init_bpf_ns.

We will allocate the id for bpf_map, bpf_prog and bpf_link in bpf
namespace.

Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
  • Loading branch information
laoar authored and intel-lab-lkp committed Mar 26, 2023
1 parent f408244 commit 97c1724
Show file tree
Hide file tree
Showing 9 changed files with 294 additions and 2 deletions.
4 changes: 4 additions & 0 deletions fs/proc/namespaces.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include <linux/ipc_namespace.h>
#include <linux/pid_namespace.h>
#include <linux/user_namespace.h>
#include <linux/bpf_namespace.h>
#include "internal.h"


Expand Down Expand Up @@ -37,6 +38,9 @@ static const struct proc_ns_operations *ns_entries[] = {
&timens_operations,
&timens_for_children_operations,
#endif
#ifdef CONFIG_BPF
&bpfns_operations,
#endif
};

static const char *proc_ns_get_link(struct dentry *dentry,
Expand Down
46 changes: 46 additions & 0 deletions include/linux/bpf_namespace.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_BPF_ID_NS_H
#define _LINUX_BPF_ID_NS_H
#include <linux/types.h>
#include <linux/idr.h>
#include <linux/ns_common.h>
#include <linux/user_namespace.h>

struct ubpf_obj_id {
int nr;
struct bpf_namespace *ns;
};

struct bpf_obj_id {
refcount_t count;
unsigned int level;
struct rcu_head rcu;
struct ubpf_obj_id numbers[1];
};

enum {
MAP_OBJ_ID = 0,
PROG_OBJ_ID,
LINK_OBJ_ID,
OBJ_ID_NUM,
};

struct bpf_namespace {
struct idr idr[OBJ_ID_NUM];
struct rcu_head rcu;
int level;
struct ns_common ns;
struct user_namespace *user_ns;
struct kmem_cache *obj_id_cachep;
struct bpf_namespace *parent;
struct ucounts *ucounts;
};

extern struct bpf_namespace init_bpf_ns;
extern struct proc_ns_operations bpfns_operations;

struct bpf_namespace *copy_bpfns(unsigned long flags,
struct user_namespace *user_ns,
struct bpf_namespace *old_ns);
void put_bpfns(struct bpf_namespace *ns);
#endif /* _LINUX_BPF_ID_NS_H */
4 changes: 4 additions & 0 deletions include/linux/nsproxy.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ struct uts_namespace;
struct ipc_namespace;
struct pid_namespace;
struct cgroup_namespace;
#ifdef CONFIG_BPF
struct bpf_namespace;
#endif
struct fs_struct;

/*
Expand Down Expand Up @@ -38,6 +41,7 @@ struct nsproxy {
struct time_namespace *time_ns;
struct time_namespace *time_ns_for_children;
struct cgroup_namespace *cgroup_ns;
struct bpf_namespace *bpf_ns;
};
extern struct nsproxy init_nsproxy;

Expand Down
1 change: 1 addition & 0 deletions include/linux/proc_ns.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ enum {
PROC_PID_INIT_INO = 0xEFFFFFFCU,
PROC_CGROUP_INIT_INO = 0xEFFFFFFBU,
PROC_TIME_INIT_INO = 0xEFFFFFFAU,
PROC_BPF_INIT_INO = 0xEFFFFFF9U,
};

#ifdef CONFIG_PROC_FS
Expand Down
1 change: 1 addition & 0 deletions include/linux/user_namespace.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ enum ucount_type {
UCOUNT_FANOTIFY_GROUPS,
UCOUNT_FANOTIFY_MARKS,
#endif
UCOUNT_BPF_NAMESPACES,
UCOUNT_COUNTS,
};

Expand Down
1 change: 1 addition & 0 deletions kernel/bpf/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,4 @@ obj-$(CONFIG_BPF_PRELOAD) += preload/
obj-$(CONFIG_BPF_SYSCALL) += relo_core.o
$(obj)/relo_core.o: $(srctree)/tools/lib/bpf/relo_core.c FORCE
$(call if_changed_rule,cc_o_c)
obj-$(CONFIG_BPF_SYSCALL) += bpf_namespace.o
219 changes: 219 additions & 0 deletions kernel/bpf/bpf_namespace.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
// SPDX-License-Identifier: GPL-2.0-only
#include <linux/ns_common.h>
#include <linux/syscalls.h>
#include <linux/err.h>
#include <linux/slab.h>
#include <linux/proc_ns.h>
#include <linux/sched/task.h>
#include <linux/sched/signal.h>
#include <linux/idr.h>
#include <linux/user_namespace.h>
#include <linux/bpf_namespace.h>

#define MAX_BPF_NS_LEVEL 32
static struct kmem_cache *bpfns_cachep;
static struct kmem_cache *obj_id_cache[MAX_PID_NS_LEVEL];
static struct ns_common *bpfns_get(struct task_struct *task);
static void bpfns_put(struct ns_common *ns);
static struct kmem_cache *create_bpf_cachep(unsigned int level);
static DEFINE_MUTEX(obj_id_caches_mutex);

static int bpfns_install(struct nsset *nsset, struct ns_common *ns)
{
pr_info("setns not supported for bpf namespace");
return -EOPNOTSUPP;
}

struct proc_ns_operations bpfns_operations = {
.name = "bpf",
.type = CLONE_NEWBPF,
.get = bpfns_get,
.put = bpfns_put,
.install = bpfns_install,
};

struct bpf_namespace init_bpf_ns = {
.level = 0,
.user_ns = &init_user_ns,
.ns.ops = &bpfns_operations,
.ns.inum = PROC_BPF_INIT_INO,
};

static struct bpf_namespace *get_bpfns(struct bpf_namespace *ns)
{
if (ns != &init_bpf_ns)
refcount_inc(&ns->ns.count);
return ns;
}

static struct ns_common *bpfns_get(struct task_struct *task)
{
struct ns_common *ns = NULL;
struct nsproxy *nsproxy;

rcu_read_lock();
nsproxy = task->nsproxy;
if (nsproxy) {
ns = &nsproxy->bpf_ns->ns;
get_bpfns(container_of(ns, struct bpf_namespace, ns));
}
rcu_read_unlock();
return ns;
}

static struct ucounts *inc_bpf_namespaces(struct user_namespace *ns)
{
return inc_ucount(ns, current_euid(), UCOUNT_BPF_NAMESPACES);
}

static void dec_bpf_namespaces(struct ucounts *ucounts)
{
dec_ucount(ucounts, UCOUNT_BPF_NAMESPACES);
}

static void delayed_free_bpfns(struct rcu_head *p)
{
struct bpf_namespace *ns = container_of(p, struct bpf_namespace, rcu);

dec_bpf_namespaces(ns->ucounts);
put_user_ns(ns->user_ns);
kmem_cache_free(bpfns_cachep, ns);
}

static void destroy_bpf_namespace(struct bpf_namespace *ns)
{
int i;

ns_free_inum(&ns->ns);
for (i = 0; i < OBJ_ID_NUM; i++)
idr_destroy(&ns->idr[i]);
call_rcu(&ns->rcu, delayed_free_bpfns);
}

void put_bpfns(struct bpf_namespace *ns)
{
struct bpf_namespace *parent;

while (ns != &init_bpf_ns) {
parent = ns->parent;
if (!refcount_dec_and_test(&ns->ns.count))
break;
destroy_bpf_namespace(ns);
ns = parent;
}
}

static void bpfns_put(struct ns_common *ns)
{
struct bpf_namespace *bpf_ns;

bpf_ns = container_of(ns, struct bpf_namespace, ns);
put_bpfns(bpf_ns);
}

static struct bpf_namespace *
create_bpf_namespace(struct user_namespace *user_ns,
struct bpf_namespace *parent_bpfns)
{
struct bpf_namespace *ns;
unsigned int level = parent_bpfns->level + 1;
struct ucounts *ucounts;
int err;
int i;

err = -EINVAL;
if (!in_userns(parent_bpfns->user_ns, user_ns))
goto out;

err = -ENOSPC;
if (level > MAX_BPF_NS_LEVEL)
goto out;
ucounts = inc_bpf_namespaces(user_ns);
if (!ucounts)
goto out;

err = -ENOMEM;
ns = kmem_cache_zalloc(bpfns_cachep, GFP_KERNEL);
if (!ns)
goto out_dec;

for (i = 0; i < OBJ_ID_NUM; i++)
idr_init(&ns->idr[i]);

ns->obj_id_cachep = create_bpf_cachep(level);
if (!ns->obj_id_cachep)
goto out_free_idr;

err = ns_alloc_inum(&ns->ns);
if (err)
goto out_free_idr;
ns->ns.ops = &bpfns_operations;

refcount_set(&ns->ns.count, 1);
ns->level = level;
ns->parent = get_bpfns(parent_bpfns);
ns->user_ns = get_user_ns(user_ns);
ns->ucounts = ucounts;
return ns;

out_free_idr:
for (i = 0; i < OBJ_ID_NUM; i++)
idr_destroy(&ns->idr[i]);
kmem_cache_free(bpfns_cachep, ns);
out_dec:
dec_bpf_namespaces(ucounts);
out:
return ERR_PTR(err);
}

struct bpf_namespace *copy_bpfns(unsigned long flags,
struct user_namespace *user_ns,
struct bpf_namespace *old_ns)
{
if (!(flags & CLONE_NEWBPF))
return get_bpfns(old_ns);
return create_bpf_namespace(user_ns, old_ns);
}

static struct kmem_cache *create_bpf_cachep(unsigned int level)
{
/* Level 0 is init_bpf_ns.obj_id_cachep */
struct kmem_cache **pkc = &obj_id_cache[level - 1];
struct kmem_cache *kc;
char name[4 + 10 + 1];
unsigned int len;

kc = READ_ONCE(*pkc);
if (kc)
return kc;

snprintf(name, sizeof(name), "bpf_%u", level + 1);
len = sizeof(struct bpf_obj_id) + level * sizeof(struct ubpf_obj_id);
mutex_lock(&obj_id_caches_mutex);
/* Name collision forces to do allocation under mutex. */
if (!*pkc)
*pkc = kmem_cache_create(name, len, 0,
SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL);
mutex_unlock(&obj_id_caches_mutex);
/* current can fail, but someone else can succeed. */
return READ_ONCE(*pkc);
}

static void __init bpfns_idr_init(void)
{
int i;

init_bpf_ns.obj_id_cachep =
KMEM_CACHE(pid, SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT);
for (i = 0; i < OBJ_ID_NUM; i++)
idr_init(&init_bpf_ns.idr[i]);
}

static __init int bpf_namespaces_init(void)
{
bpfns_cachep = KMEM_CACHE(bpf_namespace, SLAB_PANIC | SLAB_ACCOUNT);
bpfns_idr_init();
return 0;
}

late_initcall(bpf_namespaces_init);
19 changes: 17 additions & 2 deletions kernel/nsproxy.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,15 @@
#include <net/net_namespace.h>
#include <linux/ipc_namespace.h>
#include <linux/time_namespace.h>
#include <linux/bpf_namespace.h>
#include <linux/fs_struct.h>
#include <linux/proc_fs.h>
#include <linux/proc_ns.h>
#include <linux/file.h>
#include <linux/syscalls.h>
#include <linux/cgroup.h>
#include <linux/perf_event.h>
#include <linux/bpf_namespace.h>

static struct kmem_cache *nsproxy_cachep;

Expand All @@ -47,6 +49,9 @@ struct nsproxy init_nsproxy = {
.time_ns = &init_time_ns,
.time_ns_for_children = &init_time_ns,
#endif
#ifdef CONFIG_BPF
.bpf_ns = &init_bpf_ns,
#endif
};

static inline struct nsproxy *create_nsproxy(void)
Expand Down Expand Up @@ -121,8 +126,16 @@ static struct nsproxy *create_new_namespaces(unsigned long flags,
}
new_nsp->time_ns = get_time_ns(tsk->nsproxy->time_ns);

new_nsp->bpf_ns = copy_bpfns(flags, user_ns, tsk->nsproxy->bpf_ns);
if (IS_ERR(new_nsp->bpf_ns)) {
err = PTR_ERR(new_nsp->bpf_ns);
goto out_bpf;
}
return new_nsp;

out_bpf:
put_time_ns(new_nsp->time_ns);
put_time_ns(new_nsp->time_ns_for_children);
out_time:
put_net(new_nsp->net_ns);
out_net:
Expand Down Expand Up @@ -156,7 +169,7 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)

if (likely(!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
CLONE_NEWPID | CLONE_NEWNET |
CLONE_NEWCGROUP | CLONE_NEWTIME)))) {
CLONE_NEWCGROUP | CLONE_NEWTIME | CLONE_NEWBPF)))) {
if ((flags & CLONE_VM) ||
likely(old_ns->time_ns_for_children == old_ns->time_ns)) {
get_nsproxy(old_ns);
Expand Down Expand Up @@ -203,6 +216,8 @@ void free_nsproxy(struct nsproxy *ns)
put_time_ns(ns->time_ns_for_children);
put_cgroup_ns(ns->cgroup_ns);
put_net(ns->net_ns);
if (ns->bpf_ns)
put_bpfns(ns->bpf_ns);
kmem_cache_free(nsproxy_cachep, ns);
}

Expand All @@ -218,7 +233,7 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags,

if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
CLONE_NEWNET | CLONE_NEWPID | CLONE_NEWCGROUP |
CLONE_NEWTIME)))
CLONE_NEWTIME | CLONE_NEWBPF)))
return 0;

user_ns = new_cred ? new_cred->user_ns : current_user_ns();
Expand Down

0 comments on commit 97c1724

Please sign in to comment.