Skip to content

Commit

Permalink
vrrp: work around missing promiscuous netlink notifications
Browse files Browse the repository at this point in the history
If the base interface does not implement IFF_UNICAST_FLT, for example
it is a bridge interface, no netlink notification is sent by the kernel
when promiscuity is set on the base interface.

The promiscuous state of the base interface is correct in the kernel but
it is in incorrect in daemons that listen to the interface netlink
messages (eg. DPDK).

The issue is still there in kernel 6.4.6.

Force a notification by re-setting IFLA_GROUP for the base interface.

Initial patch by and signed-off-by: Louis Scalbert <louis.scalbert@6wind.com>

Signed-off-by: Quentin Armitage <quentin@armitage.org.uk>
  • Loading branch information
pqarmitage committed Sep 17, 2023
1 parent b91a718 commit edf6cd5
Show file tree
Hide file tree
Showing 8 changed files with 65 additions and 3 deletions.
6 changes: 5 additions & 1 deletion doc/man/man5/keepalived.conf.5.in
Original file line number Diff line number Diff line change
Expand Up @@ -1619,7 +1619,11 @@ The syntax for vrrp_instance is :
# vmac_xmit_base must be set.
# The MAC address can be specified with only 5 octets, in which case
# the virtual_router_id will be used as the last octet.
\fBuse_vmac \fR[<VMAC_INTERFACE_NAME>] [MAC_ADDRESS]
# If \fBnetlink_notify_msg\fR is specified, when keepalived creates a macvlan
# interface it will force a netlink message to be sent for the base interface
# since the kernel does not send one, even if the promiscuity of the base
# interface has been updated.
\fBuse_vmac \fR[<VMAC_INTERFACE_NAME>] [MAC_ADDRESS] [netlink_notify_msg]

# use_vmac_addr is used to create VMAC (macvlan) interfaces for
# each interface that is used by a VIP or eVIP where the interface
Expand Down
7 changes: 7 additions & 0 deletions keepalived/core/keepalived_netlink.c
Original file line number Diff line number Diff line change
Expand Up @@ -1887,6 +1887,8 @@ netlink_if_link_populate(interface_t *ifp, struct rtattr *tb[], struct ifinfomsg
ifp->base_ifp = ifp;
ifp->base_ifindex = 0;

ifp->group = *PTR_CAST(uint32_t, RTA_DATA(tb[IFLA_GROUP]));

if (tb[IFLA_LINKINFO]) {
if (linkinfo[IFLA_INFO_KIND]) {
/* See if this interface is a MACVLAN */
Expand Down Expand Up @@ -2174,6 +2176,11 @@ netlink_link_filter(__attribute__((unused)) struct sockaddr_nl *snl, struct nlms
}
}

#ifdef _HAVE_VRRP_VMAC_
if (tb[IFLA_GROUP])
ifp->group = *PTR_CAST(uint32_t, RTA_DATA(tb[IFLA_GROUP]));
#endif

if (strcmp(ifp->ifname, name)) {
/* The name can change, so handle that here */
log_message(LOG_INFO, "Interface name has changed from %s to %s", ifp->ifname, name);
Expand Down
1 change: 1 addition & 0 deletions keepalived/include/vrrp.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ enum vrrp_flags_bits {
VRRP_VMAC_UP_BIT,
VRRP_VMAC_XMITBASE_BIT,
VRRP_VMAC_ADDR_BIT,
VRRP_VMAC_NETLINK_NOTIFY,
#ifdef _HAVE_VRRP_IPVLAN_
VRRP_IPVLAN_BIT,
#endif
Expand Down
3 changes: 1 addition & 2 deletions keepalived/include/vrrp_if.h
Original file line number Diff line number Diff line change
Expand Up @@ -159,10 +159,8 @@ typedef struct _interface {
#endif
struct _interface *base_ifp; /* Base interface (if interface is a VMAC interface),
otherwise the physical interface */
#ifdef _HAVE_VRRP_VMAC_
bool is_ours; /* keepalived created the interface */
bool deleting; /* Set when we are deleting the interface */
#endif
bool seen_interface; /* The interface has existed at some point since we started */
bool changeable_type; /* The interface type or underlying interface can be changed */
#ifdef _HAVE_VRF_
Expand All @@ -174,6 +172,7 @@ typedef struct _interface {
bool arp_ignore; /* Original value of arp_ignore to be restored */
bool arp_filter; /* Original value of arp_filter to be restored */
unsigned rp_filter; /* < UINT_MAX if we have changed the value */
uint32_t group; /* GROUP for this interface_t */
#endif
garp_delay_t *garp_delay; /* Delays for sending gratuitous ARP/NA */
timeval_t last_gna_router_check; /* Time we last checked if IPv6 forwarding set on interface */
Expand Down
2 changes: 2 additions & 0 deletions keepalived/vrrp/vrrp_data.c
Original file line number Diff line number Diff line change
Expand Up @@ -648,6 +648,8 @@ dump_vrrp(FILE *fp, const vrrp_t *vrrp)
vrrp->ll_addr[0], vrrp->ll_addr[1], vrrp->ll_addr[2], vrrp->ll_addr[3], vrrp->ll_addr[4], vrrp->ll_addr[5],
__test_bit(VRRP_VMAC_MAC_USE_VRID, &vrrp->flags) ? " (using VRID)" : "");
}
if (__test_bit(VRRP_VMAC_NETLINK_NOTIFY, &vrrp->flags))
conf_write(fp, " Force netlink update for base interface");
if (__test_bit(VRRP_VMAC_ADDR_BIT, &vrrp->flags))
conf_write(fp, " Use VMAC for VIPs on other interfaces");
#ifdef _HAVE_VRRP_IPVLAN_
Expand Down
1 change: 1 addition & 0 deletions keepalived/vrrp/vrrp_if.c
Original file line number Diff line number Diff line change
Expand Up @@ -572,6 +572,7 @@ dump_if(FILE *fp, const interface_t *ifp)
conf_write(fp, " Delayed state change running = %s", ifp->flags_change_thread ? "true" : "false");
conf_write(fp, " Up debounce timer = %uus", ifp->up_debounce_timer);
conf_write(fp, " Down debounce timer = %uus", ifp->down_debounce_timer);
conf_write(fp, " Group = %u", ifp->group);

#ifdef _HAVE_VRRP_VMAC_
if (IS_MAC_IP_VLAN(ifp)) {
Expand Down
7 changes: 7 additions & 0 deletions keepalived/vrrp/vrrp_parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -568,6 +568,13 @@ vrrp_vmac_handler(const vector_t *strvec)
continue;
}

/* The string "netlink_notify_msg" needs to be longer than IFNAMSIZ
* so that it cannot be a valid interface name. */
if (!strcmp(name, "netlink_notify_msg")) {
__set_bit(VRRP_VMAC_NETLINK_NOTIFY, &current_vrrp->flags);
continue;
}

if (!dev_name_valid(name)) {
report_config_error(CONFIG_GENERAL_ERROR, "VMAC interface name '%s' too long or invalid characters - ignoring", name);
continue;
Expand Down
41 changes: 41 additions & 0 deletions keepalived/vrrp/vrrp_vmac.c
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,26 @@ netlink_link_up(vrrp_t *vrrp)
return status;
}

static void
netlink_link_group(interface_t *base_ifp)
{
uint32_t group = base_ifp->group;
struct {
struct nlmsghdr n;
struct ifinfomsg ifi;
char buf[256];
} req = { .buf[0] = 0 };

req.n.nlmsg_len = NLMSG_LENGTH(sizeof (struct ifinfomsg));
req.n.nlmsg_flags = NLM_F_REQUEST;
req.n.nlmsg_type = RTM_NEWLINK;
req.ifi.ifi_family = AF_UNSPEC;
req.ifi.ifi_index = (int)IF_INDEX(base_ifp);

addattr_l(&req.n, sizeof(req), IFLA_GROUP, &group, sizeof(group));
netlink_talk(&nl_cmd, &req.n);
}

bool
set_link_local_address(const vrrp_t *vrrp)
{
Expand Down Expand Up @@ -480,6 +500,21 @@ netlink_link_add_vmac(vrrp_t *vrrp, const interface_t *old_interface)
log_message(LOG_INFO, "(%s) adding link-local address to %s failed", vrrp->iname, vrrp->ifp->ifname);
}

/* If the base interface does not implement IFF_UNICAST_FLT, for example
* it is a bridge interface, no netlink notification is sent by the kernel
* when promiscuity is set on the base interface.
* The promiscuous state of the base interface is correct in the kernel
* but it is in incorrect in processes that listen to the interface netlink
* messages due to the missing netlink message.
*
* Force a notification by re-setting IFLA_GROUP for the base interface.
* NOTE: there is a window here where the group may have been changed by
* some other process but we have not received the netlink message yet.
*/
if (create_interface && vrrp->configured_ifp->base_ifp->ifindex &&
__test_bit(VRRP_VMAC_NETLINK_NOTIFY, &vrrp->flags))
netlink_link_group(vrrp->configured_ifp->base_ifp);

#if !HAVE_DECL_IFLA_INET6_ADDR_GEN_MODE
if (vrrp->family == AF_INET6 || __test_bit(VRRP_FLAG_EVIP_OTHER_FAMILY, &vrrp->flags)) {
/* Delete the automatically created link-local address based on the
Expand Down Expand Up @@ -711,6 +746,12 @@ netlink_link_del_vmac(vrrp_t *vrrp)
return;
}

if (__test_bit(VRRP_VMAC_NETLINK_NOTIFY, &vrrp->flags)) {
/* Force a netlink RTM_NEWLINK message for the base interface
* since promiscuity may have been decremented. */
netlink_link_group(vrrp->configured_ifp->base_ifp);
}

#ifdef _WITH_FIREWALL_
// Why do we need this test?
// PROBLEM !!! We have deleted the link, but firewall_remove_vmac uses the ifindex.
Expand Down

0 comments on commit edf6cd5

Please sign in to comment.