From edf6cd5c8c85542d0ec520998c84cfb8d772ddcf Mon Sep 17 00:00:00 2001 From: Quentin Armitage Date: Sun, 17 Sep 2023 11:59:08 +0100 Subject: [PATCH 1/2] vrrp: work around missing promiscuous netlink notifications If the base interface does not implement IFF_UNICAST_FLT, for example it is a bridge interface, no netlink notification is sent by the kernel when promiscuity is set on the base interface. The promiscuous state of the base interface is correct in the kernel but it is in incorrect in daemons that listen to the interface netlink messages (eg. DPDK). The issue is still there in kernel 6.4.6. Force a notification by re-setting IFLA_GROUP for the base interface. Initial patch by and signed-off-by: Louis Scalbert Signed-off-by: Quentin Armitage --- doc/man/man5/keepalived.conf.5.in | 6 +++- keepalived/core/keepalived_netlink.c | 7 +++++ keepalived/include/vrrp.h | 1 + keepalived/include/vrrp_if.h | 3 +- keepalived/vrrp/vrrp_data.c | 2 ++ keepalived/vrrp/vrrp_if.c | 1 + keepalived/vrrp/vrrp_parser.c | 7 +++++ keepalived/vrrp/vrrp_vmac.c | 41 ++++++++++++++++++++++++++++ 8 files changed, 65 insertions(+), 3 deletions(-) diff --git a/doc/man/man5/keepalived.conf.5.in b/doc/man/man5/keepalived.conf.5.in index 6b05b2dc1c..c10bca44e6 100644 --- a/doc/man/man5/keepalived.conf.5.in +++ b/doc/man/man5/keepalived.conf.5.in @@ -1619,7 +1619,11 @@ The syntax for vrrp_instance is : # vmac_xmit_base must be set. # The MAC address can be specified with only 5 octets, in which case # the virtual_router_id will be used as the last octet. - \fBuse_vmac \fR[] [MAC_ADDRESS] + # If \fBnetlink_notify_msg\fR is specified, when keepalived creates a macvlan + # interface it will force a netlink message to be sent for the base interface + # since the kernel does not send one, even if the promiscuity of the base + # interface has been updated. + \fBuse_vmac \fR[] [MAC_ADDRESS] [netlink_notify_msg] # use_vmac_addr is used to create VMAC (macvlan) interfaces for # each interface that is used by a VIP or eVIP where the interface diff --git a/keepalived/core/keepalived_netlink.c b/keepalived/core/keepalived_netlink.c index 9a42a2724f..c843b30870 100644 --- a/keepalived/core/keepalived_netlink.c +++ b/keepalived/core/keepalived_netlink.c @@ -1887,6 +1887,8 @@ netlink_if_link_populate(interface_t *ifp, struct rtattr *tb[], struct ifinfomsg ifp->base_ifp = ifp; ifp->base_ifindex = 0; + ifp->group = *PTR_CAST(uint32_t, RTA_DATA(tb[IFLA_GROUP])); + if (tb[IFLA_LINKINFO]) { if (linkinfo[IFLA_INFO_KIND]) { /* See if this interface is a MACVLAN */ @@ -2174,6 +2176,11 @@ netlink_link_filter(__attribute__((unused)) struct sockaddr_nl *snl, struct nlms } } +#ifdef _HAVE_VRRP_VMAC_ + if (tb[IFLA_GROUP]) + ifp->group = *PTR_CAST(uint32_t, RTA_DATA(tb[IFLA_GROUP])); +#endif + if (strcmp(ifp->ifname, name)) { /* The name can change, so handle that here */ log_message(LOG_INFO, "Interface name has changed from %s to %s", ifp->ifname, name); diff --git a/keepalived/include/vrrp.h b/keepalived/include/vrrp.h index 275d9359b2..89849c7991 100644 --- a/keepalived/include/vrrp.h +++ b/keepalived/include/vrrp.h @@ -72,6 +72,7 @@ enum vrrp_flags_bits { VRRP_VMAC_UP_BIT, VRRP_VMAC_XMITBASE_BIT, VRRP_VMAC_ADDR_BIT, + VRRP_VMAC_NETLINK_NOTIFY, #ifdef _HAVE_VRRP_IPVLAN_ VRRP_IPVLAN_BIT, #endif diff --git a/keepalived/include/vrrp_if.h b/keepalived/include/vrrp_if.h index c46340acfd..0ee5bb7576 100644 --- a/keepalived/include/vrrp_if.h +++ b/keepalived/include/vrrp_if.h @@ -159,10 +159,8 @@ typedef struct _interface { #endif struct _interface *base_ifp; /* Base interface (if interface is a VMAC interface), otherwise the physical interface */ -#ifdef _HAVE_VRRP_VMAC_ bool is_ours; /* keepalived created the interface */ bool deleting; /* Set when we are deleting the interface */ -#endif bool seen_interface; /* The interface has existed at some point since we started */ bool changeable_type; /* The interface type or underlying interface can be changed */ #ifdef _HAVE_VRF_ @@ -174,6 +172,7 @@ typedef struct _interface { bool arp_ignore; /* Original value of arp_ignore to be restored */ bool arp_filter; /* Original value of arp_filter to be restored */ unsigned rp_filter; /* < UINT_MAX if we have changed the value */ + uint32_t group; /* GROUP for this interface_t */ #endif garp_delay_t *garp_delay; /* Delays for sending gratuitous ARP/NA */ timeval_t last_gna_router_check; /* Time we last checked if IPv6 forwarding set on interface */ diff --git a/keepalived/vrrp/vrrp_data.c b/keepalived/vrrp/vrrp_data.c index 784ad28bfd..a697e3a6ef 100644 --- a/keepalived/vrrp/vrrp_data.c +++ b/keepalived/vrrp/vrrp_data.c @@ -648,6 +648,8 @@ dump_vrrp(FILE *fp, const vrrp_t *vrrp) vrrp->ll_addr[0], vrrp->ll_addr[1], vrrp->ll_addr[2], vrrp->ll_addr[3], vrrp->ll_addr[4], vrrp->ll_addr[5], __test_bit(VRRP_VMAC_MAC_USE_VRID, &vrrp->flags) ? " (using VRID)" : ""); } + if (__test_bit(VRRP_VMAC_NETLINK_NOTIFY, &vrrp->flags)) + conf_write(fp, " Force netlink update for base interface"); if (__test_bit(VRRP_VMAC_ADDR_BIT, &vrrp->flags)) conf_write(fp, " Use VMAC for VIPs on other interfaces"); #ifdef _HAVE_VRRP_IPVLAN_ diff --git a/keepalived/vrrp/vrrp_if.c b/keepalived/vrrp/vrrp_if.c index 88e1b9684a..ec27995c62 100644 --- a/keepalived/vrrp/vrrp_if.c +++ b/keepalived/vrrp/vrrp_if.c @@ -572,6 +572,7 @@ dump_if(FILE *fp, const interface_t *ifp) conf_write(fp, " Delayed state change running = %s", ifp->flags_change_thread ? "true" : "false"); conf_write(fp, " Up debounce timer = %uus", ifp->up_debounce_timer); conf_write(fp, " Down debounce timer = %uus", ifp->down_debounce_timer); + conf_write(fp, " Group = %u", ifp->group); #ifdef _HAVE_VRRP_VMAC_ if (IS_MAC_IP_VLAN(ifp)) { diff --git a/keepalived/vrrp/vrrp_parser.c b/keepalived/vrrp/vrrp_parser.c index 6b8d48b0b7..4e350e702a 100644 --- a/keepalived/vrrp/vrrp_parser.c +++ b/keepalived/vrrp/vrrp_parser.c @@ -568,6 +568,13 @@ vrrp_vmac_handler(const vector_t *strvec) continue; } + /* The string "netlink_notify_msg" needs to be longer than IFNAMSIZ + * so that it cannot be a valid interface name. */ + if (!strcmp(name, "netlink_notify_msg")) { + __set_bit(VRRP_VMAC_NETLINK_NOTIFY, ¤t_vrrp->flags); + continue; + } + if (!dev_name_valid(name)) { report_config_error(CONFIG_GENERAL_ERROR, "VMAC interface name '%s' too long or invalid characters - ignoring", name); continue; diff --git a/keepalived/vrrp/vrrp_vmac.c b/keepalived/vrrp/vrrp_vmac.c index 5642138918..e7e7602bf5 100644 --- a/keepalived/vrrp/vrrp_vmac.c +++ b/keepalived/vrrp/vrrp_vmac.c @@ -230,6 +230,26 @@ netlink_link_up(vrrp_t *vrrp) return status; } +static void +netlink_link_group(interface_t *base_ifp) +{ + uint32_t group = base_ifp->group; + struct { + struct nlmsghdr n; + struct ifinfomsg ifi; + char buf[256]; + } req = { .buf[0] = 0 }; + + req.n.nlmsg_len = NLMSG_LENGTH(sizeof (struct ifinfomsg)); + req.n.nlmsg_flags = NLM_F_REQUEST; + req.n.nlmsg_type = RTM_NEWLINK; + req.ifi.ifi_family = AF_UNSPEC; + req.ifi.ifi_index = (int)IF_INDEX(base_ifp); + + addattr_l(&req.n, sizeof(req), IFLA_GROUP, &group, sizeof(group)); + netlink_talk(&nl_cmd, &req.n); +} + bool set_link_local_address(const vrrp_t *vrrp) { @@ -480,6 +500,21 @@ netlink_link_add_vmac(vrrp_t *vrrp, const interface_t *old_interface) log_message(LOG_INFO, "(%s) adding link-local address to %s failed", vrrp->iname, vrrp->ifp->ifname); } + /* If the base interface does not implement IFF_UNICAST_FLT, for example + * it is a bridge interface, no netlink notification is sent by the kernel + * when promiscuity is set on the base interface. + * The promiscuous state of the base interface is correct in the kernel + * but it is in incorrect in processes that listen to the interface netlink + * messages due to the missing netlink message. + * + * Force a notification by re-setting IFLA_GROUP for the base interface. + * NOTE: there is a window here where the group may have been changed by + * some other process but we have not received the netlink message yet. + */ + if (create_interface && vrrp->configured_ifp->base_ifp->ifindex && + __test_bit(VRRP_VMAC_NETLINK_NOTIFY, &vrrp->flags)) + netlink_link_group(vrrp->configured_ifp->base_ifp); + #if !HAVE_DECL_IFLA_INET6_ADDR_GEN_MODE if (vrrp->family == AF_INET6 || __test_bit(VRRP_FLAG_EVIP_OTHER_FAMILY, &vrrp->flags)) { /* Delete the automatically created link-local address based on the @@ -711,6 +746,12 @@ netlink_link_del_vmac(vrrp_t *vrrp) return; } + if (__test_bit(VRRP_VMAC_NETLINK_NOTIFY, &vrrp->flags)) { + /* Force a netlink RTM_NEWLINK message for the base interface + * since promiscuity may have been decremented. */ + netlink_link_group(vrrp->configured_ifp->base_ifp); + } + #ifdef _WITH_FIREWALL_ // Why do we need this test? // PROBLEM !!! We have deleted the link, but firewall_remove_vmac uses the ifindex. From 14359caa2847eae34abd0c6e3ea1b58ba95ed461 Mon Sep 17 00:00:00 2001 From: Quentin Armitage Date: Sun, 17 Sep 2023 12:08:03 +0100 Subject: [PATCH 2/2] vrrp: fix build failure from previous commit Commit edf6cd5 - "vrrp: work around missing promiscuous netlink notifications" caused a build failure with configuration option --disable-vmac. Signed-off-by: Quentin Armitage --- keepalived/vrrp/vrrp_if.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/keepalived/vrrp/vrrp_if.c b/keepalived/vrrp/vrrp_if.c index ec27995c62..f9bde8dbae 100644 --- a/keepalived/vrrp/vrrp_if.c +++ b/keepalived/vrrp/vrrp_if.c @@ -572,9 +572,10 @@ dump_if(FILE *fp, const interface_t *ifp) conf_write(fp, " Delayed state change running = %s", ifp->flags_change_thread ? "true" : "false"); conf_write(fp, " Up debounce timer = %uus", ifp->up_debounce_timer); conf_write(fp, " Down debounce timer = %uus", ifp->down_debounce_timer); - conf_write(fp, " Group = %u", ifp->group); #ifdef _HAVE_VRRP_VMAC_ + conf_write(fp, " Group = %u", ifp->group); + if (IS_MAC_IP_VLAN(ifp)) { const char *if_type = #ifdef _HAVE_VRRP_IPVLAN_