Skip to content

Commit 09cf85e

Browse files
committed
Merge branch 'ipv4-namespacify-ipv4-address-hash-table'
Kuniyuki Iwashima says: ==================== ipv4: Namespacify IPv4 address hash table. This is a prep of per-net RTNL conversion for RTM_(NEW|DEL|SET)ADDR. Currently, each IPv4 address is linked to the global hash table, and this needs to be protected by another global lock or namespacified to support per-net RTNL. Adding a global lock will cause deadlock in the rtnetlink path and GC, rtnetlink check_lifetime |- rtnl_net_lock(net) |- acquire the global lock |- acquire the global lock |- check ifa's netns `- put ifa into hash table `- rtnl_net_lock(net) so we need to namespacify the hash table. The IPv6 one is already namespacified, let's follow that. v2: https://lore.kernel.org/netdev/20241004195958.64396-1-kuniyu@amazon.com/ v1: https://lore.kernel.org/netdev/20241001024837.96425-1-kuniyu@amazon.com/ ==================== Link: https://patch.msgid.link/20241008172906.1326-1-kuniyu@amazon.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2 parents 22ee378 + 99ee348 commit 09cf85e

File tree

3 files changed

+42
-31
lines changed

3 files changed

+42
-31
lines changed

include/linux/inetdevice.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev)
141141
ARP_EVICT_NOCARRIER)
142142

143143
struct in_ifaddr {
144-
struct hlist_node hash;
144+
struct hlist_node addr_lst;
145145
struct in_ifaddr __rcu *ifa_next;
146146
struct in_device *ifa_dev;
147147
struct rcu_head rcu_head;

include/net/netns/ipv4.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -270,5 +270,7 @@ struct netns_ipv4 {
270270

271271
atomic_t rt_genid;
272272
siphash_key_t ip_id_key;
273+
struct hlist_head *inet_addr_lst;
274+
struct delayed_work addr_chk_work;
273275
};
274276
#endif

net/ipv4/devinet.c

Lines changed: 39 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -119,8 +119,6 @@ struct inet_fill_args {
119119
#define IN4_ADDR_HSIZE_SHIFT 8
120120
#define IN4_ADDR_HSIZE (1U << IN4_ADDR_HSIZE_SHIFT)
121121

122-
static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
123-
124122
static u32 inet_addr_hash(const struct net *net, __be32 addr)
125123
{
126124
u32 val = (__force u32) addr ^ net_hash_mix(net);
@@ -133,13 +131,13 @@ static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
133131
u32 hash = inet_addr_hash(net, ifa->ifa_local);
134132

135133
ASSERT_RTNL();
136-
hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
134+
hlist_add_head_rcu(&ifa->addr_lst, &net->ipv4.inet_addr_lst[hash]);
137135
}
138136

139137
static void inet_hash_remove(struct in_ifaddr *ifa)
140138
{
141139
ASSERT_RTNL();
142-
hlist_del_init_rcu(&ifa->hash);
140+
hlist_del_init_rcu(&ifa->addr_lst);
143141
}
144142

145143
/**
@@ -186,9 +184,8 @@ struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
186184
u32 hash = inet_addr_hash(net, addr);
187185
struct in_ifaddr *ifa;
188186

189-
hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
190-
if (ifa->ifa_local == addr &&
191-
net_eq(dev_net(ifa->ifa_dev->dev), net))
187+
hlist_for_each_entry_rcu(ifa, &net->ipv4.inet_addr_lst[hash], addr_lst)
188+
if (ifa->ifa_local == addr)
192189
return ifa;
193190

194191
return NULL;
@@ -227,7 +224,7 @@ static struct in_ifaddr *inet_alloc_ifa(struct in_device *in_dev)
227224
in_dev_hold(in_dev);
228225
ifa->ifa_dev = in_dev;
229226

230-
INIT_HLIST_NODE(&ifa->hash);
227+
INIT_HLIST_NODE(&ifa->addr_lst);
231228

232229
return ifa;
233230
}
@@ -484,15 +481,12 @@ static void inet_del_ifa(struct in_device *in_dev,
484481
__inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
485482
}
486483

487-
static void check_lifetime(struct work_struct *work);
488-
489-
static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
490-
491484
static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
492485
u32 portid, struct netlink_ext_ack *extack)
493486
{
494487
struct in_ifaddr __rcu **last_primary, **ifap;
495488
struct in_device *in_dev = ifa->ifa_dev;
489+
struct net *net = dev_net(in_dev->dev);
496490
struct in_validator_info ivi;
497491
struct in_ifaddr *ifa1;
498492
int ret;
@@ -561,8 +555,8 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
561555

562556
inet_hash_insert(dev_net(in_dev->dev), ifa);
563557

564-
cancel_delayed_work(&check_lifetime_work);
565-
queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
558+
cancel_delayed_work(&net->ipv4.addr_chk_work);
559+
queue_delayed_work(system_power_efficient_wq, &net->ipv4.addr_chk_work, 0);
566560

567561
/* Send message first, then call notifier.
568562
Notifier will trigger FIB update, so that
@@ -708,16 +702,19 @@ static void check_lifetime(struct work_struct *work)
708702
unsigned long now, next, next_sec, next_sched;
709703
struct in_ifaddr *ifa;
710704
struct hlist_node *n;
705+
struct net *net;
711706
int i;
712707

708+
net = container_of(to_delayed_work(work), struct net, ipv4.addr_chk_work);
713709
now = jiffies;
714710
next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
715711

716712
for (i = 0; i < IN4_ADDR_HSIZE; i++) {
713+
struct hlist_head *head = &net->ipv4.inet_addr_lst[i];
717714
bool change_needed = false;
718715

719716
rcu_read_lock();
720-
hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) {
717+
hlist_for_each_entry_rcu(ifa, head, addr_lst) {
721718
unsigned long age, tstamp;
722719
u32 preferred_lft;
723720
u32 valid_lft;
@@ -755,7 +752,7 @@ static void check_lifetime(struct work_struct *work)
755752
if (!change_needed)
756753
continue;
757754
rtnl_lock();
758-
hlist_for_each_entry_safe(ifa, n, &inet_addr_lst[i], hash) {
755+
hlist_for_each_entry_safe(ifa, n, head, addr_lst) {
759756
unsigned long age;
760757

761758
if (ifa->ifa_flags & IFA_F_PERMANENT)
@@ -804,8 +801,8 @@ static void check_lifetime(struct work_struct *work)
804801
if (time_before(next_sched, now + ADDRCONF_TIMER_FUZZ_MAX))
805802
next_sched = now + ADDRCONF_TIMER_FUZZ_MAX;
806803

807-
queue_delayed_work(system_power_efficient_wq, &check_lifetime_work,
808-
next_sched - now);
804+
queue_delayed_work(system_power_efficient_wq, &net->ipv4.addr_chk_work,
805+
next_sched - now);
809806
}
810807

811808
static void set_ifa_lifetime(struct in_ifaddr *ifa, __u32 valid_lft,
@@ -1002,9 +999,9 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
1002999
ifa->ifa_proto = new_proto;
10031000

10041001
set_ifa_lifetime(ifa, valid_lft, prefered_lft);
1005-
cancel_delayed_work(&check_lifetime_work);
1002+
cancel_delayed_work(&net->ipv4.addr_chk_work);
10061003
queue_delayed_work(system_power_efficient_wq,
1007-
&check_lifetime_work, 0);
1004+
&net->ipv4.addr_chk_work, 0);
10081005
rtmsg_ifa(RTM_NEWADDR, ifa, nlh, NETLINK_CB(skb).portid);
10091006
}
10101007
return 0;
@@ -2663,14 +2660,21 @@ static struct ctl_table ctl_forward_entry[] = {
26632660

26642661
static __net_init int devinet_init_net(struct net *net)
26652662
{
2666-
int err;
2667-
struct ipv4_devconf *all, *dflt;
26682663
#ifdef CONFIG_SYSCTL
2669-
struct ctl_table *tbl;
26702664
struct ctl_table_header *forw_hdr;
2665+
struct ctl_table *tbl;
26712666
#endif
2667+
struct ipv4_devconf *all, *dflt;
2668+
int err;
2669+
int i;
26722670

26732671
err = -ENOMEM;
2672+
net->ipv4.inet_addr_lst = kmalloc_array(IN4_ADDR_HSIZE,
2673+
sizeof(struct hlist_head),
2674+
GFP_KERNEL);
2675+
if (!net->ipv4.inet_addr_lst)
2676+
goto err_alloc_hash;
2677+
26742678
all = kmemdup(&ipv4_devconf, sizeof(ipv4_devconf), GFP_KERNEL);
26752679
if (!all)
26762680
goto err_alloc_all;
@@ -2731,6 +2735,11 @@ static __net_init int devinet_init_net(struct net *net)
27312735
net->ipv4.forw_hdr = forw_hdr;
27322736
#endif
27332737

2738+
for (i = 0; i < IN4_ADDR_HSIZE; i++)
2739+
INIT_HLIST_HEAD(&net->ipv4.inet_addr_lst[i]);
2740+
2741+
INIT_DEFERRABLE_WORK(&net->ipv4.addr_chk_work, check_lifetime);
2742+
27342743
net->ipv4.devconf_all = all;
27352744
net->ipv4.devconf_dflt = dflt;
27362745
return 0;
@@ -2748,14 +2757,20 @@ static __net_init int devinet_init_net(struct net *net)
27482757
err_alloc_dflt:
27492758
kfree(all);
27502759
err_alloc_all:
2760+
kfree(net->ipv4.inet_addr_lst);
2761+
err_alloc_hash:
27512762
return err;
27522763
}
27532764

27542765
static __net_exit void devinet_exit_net(struct net *net)
27552766
{
27562767
#ifdef CONFIG_SYSCTL
27572768
const struct ctl_table *tbl;
2769+
#endif
2770+
2771+
cancel_delayed_work_sync(&net->ipv4.addr_chk_work);
27582772

2773+
#ifdef CONFIG_SYSCTL
27592774
tbl = net->ipv4.forw_hdr->ctl_table_arg;
27602775
unregister_net_sysctl_table(net->ipv4.forw_hdr);
27612776
__devinet_sysctl_unregister(net, net->ipv4.devconf_dflt,
@@ -2766,6 +2781,7 @@ static __net_exit void devinet_exit_net(struct net *net)
27662781
#endif
27672782
kfree(net->ipv4.devconf_dflt);
27682783
kfree(net->ipv4.devconf_all);
2784+
kfree(net->ipv4.inet_addr_lst);
27692785
}
27702786

27712787
static __net_initdata struct pernet_operations devinet_ops = {
@@ -2783,16 +2799,9 @@ static struct rtnl_af_ops inet_af_ops __read_mostly = {
27832799

27842800
void __init devinet_init(void)
27852801
{
2786-
int i;
2787-
2788-
for (i = 0; i < IN4_ADDR_HSIZE; i++)
2789-
INIT_HLIST_HEAD(&inet_addr_lst[i]);
2790-
27912802
register_pernet_subsys(&devinet_ops);
27922803
register_netdevice_notifier(&ip_netdev_notifier);
27932804

2794-
queue_delayed_work(system_power_efficient_wq, &check_lifetime_work, 0);
2795-
27962805
rtnl_af_register(&inet_af_ops);
27972806

27982807
rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0);

0 commit comments

Comments
 (0)