Skip to content

Commit

Permalink
netfilter: ipset: Fix race between namespace cleanup and gc in the li…
Browse files Browse the repository at this point in the history
…st:set type

[ Upstream commit 4e7aaa6 ]

Lion Ackermann reported that there is a race condition between namespace cleanup
in ipset and the garbage collection of the list:set type. The namespace
cleanup can destroy the list:set type of sets while the gc of the set type is
waiting to run in rcu cleanup. The latter uses data from the destroyed set which
thus leads use after free. The patch contains the following parts:

- When destroying all sets, first remove the garbage collectors, then wait
  if needed and then destroy the sets.
- Fix the badly ordered "wait then remove gc" for the destroy a single set
  case.
- Fix the missing rcu locking in the list:set type in the userspace test
  case.
- Use proper RCU list handlings in the list:set type.

The patch depends on c1193d9 (netfilter: ipset: Add list flush to cancel_gc).

Fixes: 97f7cf1 (netfilter: ipset: fix performance regression in swap operation)
Reported-by: Lion Ackermann <nnamrec@gmail.com>
Tested-by: Lion Ackermann <nnamrec@gmail.com>
Signed-off-by: Jozsef Kadlecsik <kadlec@netfilter.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
  • Loading branch information
Jozsef Kadlecsik authored and gregkh committed Jul 5, 2024
1 parent ea1a98c commit 93b53c2
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 51 deletions.
81 changes: 46 additions & 35 deletions net/netfilter/ipset/ip_set_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -1176,23 +1176,50 @@ ip_set_setname_policy[IPSET_ATTR_CMD_MAX + 1] = {
.len = IPSET_MAXNAMELEN - 1 },
};

/* In order to return quickly when destroying a single set, it is split
* into two stages:
* - Cancel garbage collector
* - Destroy the set itself via call_rcu()
*/

static void
ip_set_destroy_set(struct ip_set *set)
ip_set_destroy_set_rcu(struct rcu_head *head)
{
pr_debug("set: %s\n", set->name);
struct ip_set *set = container_of(head, struct ip_set, rcu);

/* Must call it without holding any lock */
set->variant->destroy(set);
module_put(set->type->me);
kfree(set);
}

static void
ip_set_destroy_set_rcu(struct rcu_head *head)
_destroy_all_sets(struct ip_set_net *inst)
{
struct ip_set *set = container_of(head, struct ip_set, rcu);
struct ip_set *set;
ip_set_id_t i;
bool need_wait = false;

ip_set_destroy_set(set);
/* First cancel gc's: set:list sets are flushed as well */
for (i = 0; i < inst->ip_set_max; i++) {
set = ip_set(inst, i);
if (set) {
set->variant->cancel_gc(set);
if (set->type->features & IPSET_TYPE_NAME)
need_wait = true;
}
}
/* Must wait for flush to be really finished */
if (need_wait)
rcu_barrier();
for (i = 0; i < inst->ip_set_max; i++) {
set = ip_set(inst, i);
if (set) {
ip_set(inst, i) = NULL;
set->variant->destroy(set);
module_put(set->type->me);
kfree(set);
}
}
}

static int ip_set_destroy(struct net *net, struct sock *ctnl,
Expand All @@ -1208,20 +1235,17 @@ static int ip_set_destroy(struct net *net, struct sock *ctnl,
if (unlikely(protocol_min_failed(attr)))
return -IPSET_ERR_PROTOCOL;


/* Commands are serialized and references are
* protected by the ip_set_ref_lock.
* External systems (i.e. xt_set) must call
* ip_set_put|get_nfnl_* functions, that way we
* ip_set_nfnl_get_* functions, that way we
* can safely check references here.
*
* list:set timer can only decrement the reference
* counter, so if it's already zero, we can proceed
* without holding the lock.
*/
if (!attr[IPSET_ATTR_SETNAME]) {
/* Must wait for flush to be really finished in list:set */
rcu_barrier();
read_lock_bh(&ip_set_ref_lock);
for (i = 0; i < inst->ip_set_max; i++) {
s = ip_set(inst, i);
Expand All @@ -1232,15 +1256,7 @@ static int ip_set_destroy(struct net *net, struct sock *ctnl,
}
inst->is_destroyed = true;
read_unlock_bh(&ip_set_ref_lock);
for (i = 0; i < inst->ip_set_max; i++) {
s = ip_set(inst, i);
if (s) {
ip_set(inst, i) = NULL;
/* Must cancel garbage collectors */
s->variant->cancel_gc(s);
ip_set_destroy_set(s);
}
}
_destroy_all_sets(inst);
/* Modified by ip_set_destroy() only, which is serialized */
inst->is_destroyed = false;
} else {
Expand All @@ -1259,12 +1275,12 @@ static int ip_set_destroy(struct net *net, struct sock *ctnl,
features = s->type->features;
ip_set(inst, i) = NULL;
read_unlock_bh(&ip_set_ref_lock);
/* Must cancel garbage collectors */
s->variant->cancel_gc(s);
if (features & IPSET_TYPE_NAME) {
/* Must wait for flush to be really finished */
rcu_barrier();
}
/* Must cancel garbage collectors */
s->variant->cancel_gc(s);
call_rcu(&s->rcu, ip_set_destroy_set_rcu);
}
return 0;
Expand Down Expand Up @@ -2400,30 +2416,25 @@ ip_set_net_init(struct net *net)
}

static void __net_exit
ip_set_net_exit(struct net *net)
ip_set_net_pre_exit(struct net *net)
{
struct ip_set_net *inst = ip_set_pernet(net);

struct ip_set *set = NULL;
ip_set_id_t i;

inst->is_deleted = true; /* flag for ip_set_nfnl_put */
}

nfnl_lock(NFNL_SUBSYS_IPSET);
for (i = 0; i < inst->ip_set_max; i++) {
set = ip_set(inst, i);
if (set) {
ip_set(inst, i) = NULL;
set->variant->cancel_gc(set);
ip_set_destroy_set(set);
}
}
nfnl_unlock(NFNL_SUBSYS_IPSET);
static void __net_exit
ip_set_net_exit(struct net *net)
{
struct ip_set_net *inst = ip_set_pernet(net);

_destroy_all_sets(inst);
kvfree(rcu_dereference_protected(inst->ip_set_list, 1));
}

static struct pernet_operations ip_set_net_ops = {
.init = ip_set_net_init,
.pre_exit = ip_set_net_pre_exit,
.exit = ip_set_net_exit,
.id = &ip_set_net_id,
.size = sizeof(struct ip_set_net),
Expand Down
30 changes: 14 additions & 16 deletions net/netfilter/ipset/ip_set_list_set.c
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ list_set_kadd(struct ip_set *set, const struct sk_buff *skb,
struct set_elem *e;
int ret;

list_for_each_entry(e, &map->members, list) {
list_for_each_entry_rcu(e, &map->members, list) {
if (SET_WITH_TIMEOUT(set) &&
ip_set_timeout_expired(ext_timeout(e, set)))
continue;
Expand All @@ -99,7 +99,7 @@ list_set_kdel(struct ip_set *set, const struct sk_buff *skb,
struct set_elem *e;
int ret;

list_for_each_entry(e, &map->members, list) {
list_for_each_entry_rcu(e, &map->members, list) {
if (SET_WITH_TIMEOUT(set) &&
ip_set_timeout_expired(ext_timeout(e, set)))
continue;
Expand Down Expand Up @@ -188,9 +188,10 @@ list_set_utest(struct ip_set *set, void *value, const struct ip_set_ext *ext,
struct list_set *map = set->data;
struct set_adt_elem *d = value;
struct set_elem *e, *next, *prev = NULL;
int ret;
int ret = 0;

list_for_each_entry(e, &map->members, list) {
rcu_read_lock();
list_for_each_entry_rcu(e, &map->members, list) {
if (SET_WITH_TIMEOUT(set) &&
ip_set_timeout_expired(ext_timeout(e, set)))
continue;
Expand All @@ -201,16 +202,19 @@ list_set_utest(struct ip_set *set, void *value, const struct ip_set_ext *ext,

if (d->before == 0) {
ret = 1;
goto out;
} else if (d->before > 0) {
next = list_next_entry(e, list);
ret = !list_is_last(&e->list, &map->members) &&
next->id == d->refid;
} else {
ret = prev && prev->id == d->refid;
}
return ret;
goto out;
}
return 0;
out:
rcu_read_unlock();
return ret;
}

static void
Expand Down Expand Up @@ -239,7 +243,7 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,

/* Find where to add the new entry */
n = prev = next = NULL;
list_for_each_entry(e, &map->members, list) {
list_for_each_entry_rcu(e, &map->members, list) {
if (SET_WITH_TIMEOUT(set) &&
ip_set_timeout_expired(ext_timeout(e, set)))
continue;
Expand Down Expand Up @@ -316,9 +320,9 @@ list_set_udel(struct ip_set *set, void *value, const struct ip_set_ext *ext,
{
struct list_set *map = set->data;
struct set_adt_elem *d = value;
struct set_elem *e, *next, *prev = NULL;
struct set_elem *e, *n, *next, *prev = NULL;

list_for_each_entry(e, &map->members, list) {
list_for_each_entry_safe(e, n, &map->members, list) {
if (SET_WITH_TIMEOUT(set) &&
ip_set_timeout_expired(ext_timeout(e, set)))
continue;
Expand Down Expand Up @@ -424,14 +428,8 @@ static void
list_set_destroy(struct ip_set *set)
{
struct list_set *map = set->data;
struct set_elem *e, *n;

list_for_each_entry_safe(e, n, &map->members, list) {
list_del(&e->list);
ip_set_put_byindex(map->net, e->id);
ip_set_ext_destroy(set, e);
kfree(e);
}
WARN_ON_ONCE(!list_empty(&map->members));
kfree(map);

set->data = NULL;
Expand Down

0 comments on commit 93b53c2

Please sign in to comment.