Skip to content

Commit

Permalink
bpf: Move BPF sysctls from kernel/sysctl.c to BPF core
Browse files Browse the repository at this point in the history
We're moving sysctls out of kernel/sysctl.c as it is a mess. We
already moved all filesystem sysctls out. And with time the goal
is to move all sysctls out to their own subsystem/actual user.

kernel/sysctl.c has grown to an insane mess and its easy to run
into conflicts with it. The effort to move them out into various
subsystems is part of this.

Signed-off-by: Yan Zhu <zhuyan34@huawei.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Link: https://lore.kernel.org/bpf/20220407070759.29506-1-zhuyan34@huawei.com
  • Loading branch information
juyin authored and borkmann committed Apr 13, 2022
1 parent 3123109 commit 2900005
Show file tree
Hide file tree
Showing 2 changed files with 87 additions and 79 deletions.
87 changes: 87 additions & 0 deletions kernel/bpf/syscall.c
Expand Up @@ -4908,3 +4908,90 @@ const struct bpf_verifier_ops bpf_syscall_verifier_ops = {
const struct bpf_prog_ops bpf_syscall_prog_ops = {
.test_run = bpf_prog_test_run_syscall,
};

#ifdef CONFIG_SYSCTL
static int bpf_stats_handler(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct static_key *key = (struct static_key *)table->data;
static int saved_val;
int val, ret;
struct ctl_table tmp = {
.data = &val,
.maxlen = sizeof(val),
.mode = table->mode,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
};

if (write && !capable(CAP_SYS_ADMIN))
return -EPERM;

mutex_lock(&bpf_stats_enabled_mutex);
val = saved_val;
ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
if (write && !ret && val != saved_val) {
if (val)
static_key_slow_inc(key);
else
static_key_slow_dec(key);
saved_val = val;
}
mutex_unlock(&bpf_stats_enabled_mutex);
return ret;
}

void __weak unpriv_ebpf_notify(int new_state)
{
}

static int bpf_unpriv_handler(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int ret, unpriv_enable = *(int *)table->data;
bool locked_state = unpriv_enable == 1;
struct ctl_table tmp = *table;

if (write && !capable(CAP_SYS_ADMIN))
return -EPERM;

tmp.data = &unpriv_enable;
ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
if (write && !ret) {
if (locked_state && unpriv_enable != 1)
return -EPERM;
*(int *)table->data = unpriv_enable;
}

unpriv_ebpf_notify(unpriv_enable);

return ret;
}

static struct ctl_table bpf_syscall_table[] = {
{
.procname = "unprivileged_bpf_disabled",
.data = &sysctl_unprivileged_bpf_disabled,
.maxlen = sizeof(sysctl_unprivileged_bpf_disabled),
.mode = 0644,
.proc_handler = bpf_unpriv_handler,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_TWO,
},
{
.procname = "bpf_stats_enabled",
.data = &bpf_stats_enabled_key.key,
.maxlen = sizeof(bpf_stats_enabled_key),
.mode = 0644,
.proc_handler = bpf_stats_handler,
},
{ }
};

static int __init bpf_syscall_sysctl_init(void)
{
register_sysctl_init("kernel", bpf_syscall_table);
return 0;
}
late_initcall(bpf_syscall_sysctl_init);
#endif /* CONFIG_SYSCTL */
79 changes: 0 additions & 79 deletions kernel/sysctl.c
Expand Up @@ -62,7 +62,6 @@
#include <linux/binfmts.h>
#include <linux/sched/sysctl.h>
#include <linux/kexec.h>
#include <linux/bpf.h>
#include <linux/mount.h>
#include <linux/userfaultfd_k.h>
#include <linux/latencytop.h>
Expand Down Expand Up @@ -148,66 +147,6 @@ static const int max_extfrag_threshold = 1000;

#endif /* CONFIG_SYSCTL */

#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_SYSCTL)
static int bpf_stats_handler(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct static_key *key = (struct static_key *)table->data;
static int saved_val;
int val, ret;
struct ctl_table tmp = {
.data = &val,
.maxlen = sizeof(val),
.mode = table->mode,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
};

if (write && !capable(CAP_SYS_ADMIN))
return -EPERM;

mutex_lock(&bpf_stats_enabled_mutex);
val = saved_val;
ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
if (write && !ret && val != saved_val) {
if (val)
static_key_slow_inc(key);
else
static_key_slow_dec(key);
saved_val = val;
}
mutex_unlock(&bpf_stats_enabled_mutex);
return ret;
}

void __weak unpriv_ebpf_notify(int new_state)
{
}

static int bpf_unpriv_handler(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int ret, unpriv_enable = *(int *)table->data;
bool locked_state = unpriv_enable == 1;
struct ctl_table tmp = *table;

if (write && !capable(CAP_SYS_ADMIN))
return -EPERM;

tmp.data = &unpriv_enable;
ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
if (write && !ret) {
if (locked_state && unpriv_enable != 1)
return -EPERM;
*(int *)table->data = unpriv_enable;
}

unpriv_ebpf_notify(unpriv_enable);

return ret;
}
#endif /* CONFIG_BPF_SYSCALL && CONFIG_SYSCTL */

/*
* /proc/sys support
*/
Expand Down Expand Up @@ -2299,24 +2238,6 @@ static struct ctl_table kern_table[] = {
.extra2 = SYSCTL_ONE,
},
#endif
#ifdef CONFIG_BPF_SYSCALL
{
.procname = "unprivileged_bpf_disabled",
.data = &sysctl_unprivileged_bpf_disabled,
.maxlen = sizeof(sysctl_unprivileged_bpf_disabled),
.mode = 0644,
.proc_handler = bpf_unpriv_handler,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_TWO,
},
{
.procname = "bpf_stats_enabled",
.data = &bpf_stats_enabled_key.key,
.maxlen = sizeof(bpf_stats_enabled_key),
.mode = 0644,
.proc_handler = bpf_stats_handler,
},
#endif
#if defined(CONFIG_TREE_RCU)
{
.procname = "panic_on_rcu_stall",
Expand Down

0 comments on commit 2900005

Please sign in to comment.