Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[v1.15] Multicast Datapath Backport #31668

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 3 additions & 0 deletions api/v1/flow/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -1002,6 +1002,9 @@ here.
| TTL_EXCEEDED | 196 | |
| NO_NODE_ID | 197 | |
| DROP_RATE_LIMITED | 198 | |
| IGMP_HANDLED | 199 | |
| IGMP_SUBSCRIBED | 200 | |
| MULTICAST_HANDLED | 201 | |
| DROP_HOST_NOT_READY | 202 | A BPF program wants to tail call into bpf_host, but the host datapath hasn't been loaded yet. |


Expand Down
353 changes: 183 additions & 170 deletions api/v1/flow/flow.pb.go

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions api/v1/flow/flow.proto
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,9 @@ enum DropReason {
TTL_EXCEEDED = 196;
NO_NODE_ID = 197;
DROP_RATE_LIMITED = 198;
IGMP_HANDLED = 199;
IGMP_SUBSCRIBED = 200;
MULTICAST_HANDLED = 201;
// A BPF program wants to tail call into bpf_host, but the host datapath
// hasn't been loaded yet.
DROP_HOST_NOT_READY = 202;
Expand Down
3 changes: 3 additions & 0 deletions api/v1/observer/observer.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions bpf/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -95,10 +95,10 @@ HOST_OPTIONS = $(LXC_OPTIONS) \
-DDISABLE_LOOPBACK_LB:-DENABLE_IPV4:-DENABLE_IPV6:-DENCAP_IFINDEX:-DTUNNEL_MODE:-DPOLICY_VERDICT_NOTIFY:-DENABLE_NODEPORT:-DENABLE_NODEPORT_ACCELERATION:-DENABLE_DSR:-DENABLE_DSR_HYBRID:-DENABLE_PREFILTER:-DENABLE_HOST_FIREWALL: \
-DDISABLE_LOOPBACK_LB:-DENABLE_IPV4:-DENABLE_IPV6:-DENCAP_IFINDEX:-DTUNNEL_MODE:-DPOLICY_VERDICT_NOTIFY:-DENABLE_NODEPORT:-DENABLE_NODEPORT_ACCELERATION:-DENABLE_DSR:-DENABLE_DSR_HYBRID:-DENABLE_PREFILTER:-DENABLE_SESSION_AFFINITY:-DENABLE_HOST_FIREWALL: \
-DDISABLE_LOOPBACK_LB:-DENABLE_IPV4:-DENABLE_IPV6:-DENCAP_IFINDEX:-DTUNNEL_MODE:-DPOLICY_VERDICT_NOTIFY:-DENABLE_NODEPORT:-DENABLE_NODEPORT_ACCELERATION:-DENABLE_DSR:-DENABLE_DSR_HYBRID:-DENABLE_PREFILTER:-DENABLE_SESSION_AFFINITY:-DENABLE_HOST_FIREWALL:-DENABLE_ICMP_RULE: \
-DDISABLE_LOOPBACK_LB:-DENABLE_IPV6:-DENCAP_IFINDEX:-DTUNNEL_MODE:-DPOLICY_VERDICT_NOTIFY:-DENABLE_NODEPORT:-DENABLE_NODEPORT_ACCELERATION:-DENABLE_DSR:-DENABLE_DSR_HYBRID:-DENABLE_PREFILTER:-DENABLE_SESSION_AFFINITY:-DENABLE_HOST_FIREWALL:-DENABLE_ICMP_RULE:-DENABLE_SRV6: \
-DDISABLE_LOOPBACK_LB:-DENABLE_IPV4:-DENABLE_IPV6:-DENCAP_IFINDEX:-DTUNNEL_MODE:-DPOLICY_VERDICT_NOTIFY:-DENABLE_NODEPORT:-DENABLE_NODEPORT_ACCELERATION:-DENABLE_DSR:-DENABLE_DSR_HYBRID:-DENABLE_PREFILTER:-DENABLE_SESSION_AFFINITY:-DENABLE_HOST_FIREWALL:-DENABLE_ICMP_RULE:-DENABLE_SRV6: \
-DDISABLE_LOOPBACK_LB:-DENABLE_IPV4:-DENABLE_IPV6:-DENCAP_IFINDEX:-DTUNNEL_MODE:-DPOLICY_VERDICT_NOTIFY:-DENABLE_NODEPORT:-DENABLE_NODEPORT_ACCELERATION:-DENABLE_DSR:-DENABLE_DSR_HYBRID:-DENABLE_PREFILTER:-DENABLE_SESSION_AFFINITY:-DENABLE_HOST_FIREWALL:-DENABLE_ICMP_RULE:-DENABLE_SRV6:-DENABLE_SRV6_SRH_ENCAP:-DENABLE_SCTP: \
-DDISABLE_LOOPBACK_LB:-DENABLE_IPV4:-DENABLE_IPV6:-DENCAP_IFINDEX:-DTUNNEL_MODE:-DPOLICY_VERDICT_NOTIFY:-DENABLE_NODEPORT:-DENABLE_NODEPORT_ACCELERATION:-DENABLE_DSR:-DENABLE_DSR_HYBRID:-DENABLE_PREFILTER:-DENABLE_SESSION_AFFINITY:-DENABLE_HOST_FIREWALL:-DENABLE_ICMP_RULE:-DENABLE_SRV6:-DENABLE_SRV6_SRH_ENCAP:-DENABLE_SCTP:-DENABLE_VTEP: \
-DDISABLE_LOOPBACK_LB:-DENABLE_IPV6:-DENCAP_IFINDEX:-DTUNNEL_MODE:-DPOLICY_VERDICT_NOTIFY:-DENABLE_NODEPORT:-DENABLE_NODEPORT_ACCELERATION:-DENABLE_DSR:-DENABLE_DSR_HYBRID:-DENABLE_PREFILTER:-DENABLE_SESSION_AFFINITY:-DENABLE_HOST_FIREWALL:-DENABLE_ICMP_RULE:-DENABLE_SRV6:-DENABLE_MULTICAST: \
-DDISABLE_LOOPBACK_LB:-DENABLE_IPV4:-DENABLE_IPV6:-DENCAP_IFINDEX:-DTUNNEL_MODE:-DPOLICY_VERDICT_NOTIFY:-DENABLE_NODEPORT:-DENABLE_NODEPORT_ACCELERATION:-DENABLE_DSR:-DENABLE_DSR_HYBRID:-DENABLE_PREFILTER:-DENABLE_SESSION_AFFINITY:-DENABLE_HOST_FIREWALL:-DENABLE_ICMP_RULE:-DENABLE_SRV6:-DENABLE_MULTICAST: \
-DDISABLE_LOOPBACK_LB:-DENABLE_IPV4:-DENABLE_IPV6:-DENCAP_IFINDEX:-DTUNNEL_MODE:-DPOLICY_VERDICT_NOTIFY:-DENABLE_NODEPORT:-DENABLE_NODEPORT_ACCELERATION:-DENABLE_DSR:-DENABLE_DSR_HYBRID:-DENABLE_PREFILTER:-DENABLE_SESSION_AFFINITY:-DENABLE_HOST_FIREWALL:-DENABLE_ICMP_RULE:-DENABLE_SRV6:-DENABLE_SRV6_SRH_ENCAP:-DENABLE_SCTP:-DENABLE_MULTICAST: \
-DDISABLE_LOOPBACK_LB:-DENABLE_IPV4:-DENABLE_IPV6:-DENCAP_IFINDEX:-DTUNNEL_MODE:-DPOLICY_VERDICT_NOTIFY:-DENABLE_NODEPORT:-DENABLE_NODEPORT_ACCELERATION:-DENABLE_DSR:-DENABLE_DSR_HYBRID:-DENABLE_PREFILTER:-DENABLE_SESSION_AFFINITY:-DENABLE_HOST_FIREWALL:-DENABLE_ICMP_RULE:-DENABLE_SRV6:-DENABLE_SRV6_SRH_ENCAP:-DENABLE_SCTP:-DENABLE_VTEP:-DENABLE_MULTICAST: \
-DDISABLE_LOOPBACK_LB:-DENABLE_IPV4:-DENABLE_IPV6:-DENCAP_IFINDEX:-DPOLICY_VERDICT_NOTIFY:-DENABLE_NODEPORT:-DENABLE_NODEPORT_ACCELERATION:-DENABLE_DSR:-DDSR_ENCAP_MODE:-DDSR_ENCAP_GENEVE:-DENABLE_PREFILTER:-DENABLE_SESSION_AFFINITY:-DENABLE_HOST_FIREWALL:-DENABLE_ICMP_RULE:-DENABLE_SRV6:-DENABLE_SRV6_SRH_ENCAP:-DENABLE_SCTP:-DENABLE_VTEP:-DENABLE_HIGH_SCALE_IPCACHE:-DDSR_ENCAP_IPIP=2 \

ifndef MAX_HOST_OPTIONS
Expand Down
2 changes: 2 additions & 0 deletions bpf/bpf_alignchecker.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "lib/policy_log.h"
#include "lib/pcap.h"
#include "lib/trace_sock.h"
#include "lib/mcast.h"

/*
* The __COUNTER__ macro expands to an integer value which is increasing every
Expand Down Expand Up @@ -85,3 +86,4 @@ add_type(struct tunnel_key);
add_type(struct tunnel_value);
add_type(struct auth_key);
add_type(struct auth_info);
add_type(struct mcast_subscriber_v4);
19 changes: 19 additions & 0 deletions bpf/bpf_lxc.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include "bpf/types_mapper.h"
#include <bpf/ctx/skb.h>
#include <bpf/api.h>
#include <linux/in.h>

#include <ep_config.h>
#include <node_config.h>
Expand Down Expand Up @@ -33,6 +34,7 @@
#include "lib/lxc.h"
#include "lib/identity.h"
#include "lib/policy.h"
#include "lib/mcast.h"

/* Override LB_SELECTION initially defined in node_config.h to force bpf_lxc to use the random backend selection
* algorithm for in-cluster traffic. Otherwise, it will fail with the Maglev hash algorithm because Cilium doesn't provision
Expand Down Expand Up @@ -1355,6 +1357,23 @@ static __always_inline int __tail_handle_ipv4(struct __ctx_buff *ctx,
if (unlikely(!is_valid_lxc_src_ipv4(ip4)))
return DROP_INVALID_SIP;

#ifdef ENABLE_MULTICAST
if (mcast_ipv4_is_igmp(ip4)) {
/* note:
* we will always drop IGMP from this point on as we have no
* need to forward to the stack
*/
return mcast_ipv4_handle_igmp(ctx, ip4, data, data_end);
}

if (IN_MULTICAST(bpf_ntohl(ip4->daddr))) {
if (mcast_lookup_subscriber_map(&ip4->daddr)) {
ep_tail_call(ctx, CILIUM_CALL_MULTICAST_EP_DELIVERY);
return DROP_MISSED_TAIL_CALL;
}
}
#endif /* ENABLE_MULTICAST */

#ifdef ENABLE_PER_PACKET_LB
/* will tailcall internally or return error */
return __per_packet_lb_svc_xlate_4(ctx, ip4, ext_err);
Expand Down
10 changes: 10 additions & 0 deletions bpf/bpf_overlay.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

#include <node_config.h>
#include <netdev_config.h>
#include "lib/mcast.h"

#define IS_BPF_OVERLAY 1

Expand Down Expand Up @@ -299,6 +300,15 @@ static __always_inline int handle_ipv4(struct __ctx_buff *ctx,
return DROP_FRAG_NOSUPPORT;
#endif

#ifdef ENABLE_MULTICAST
if (IN_MULTICAST(bpf_ntohl(ip4->daddr))) {
if (mcast_lookup_subscriber_map(&ip4->daddr)) {
ep_tail_call(ctx, CILIUM_CALL_MULTICAST_EP_DELIVERY);
return DROP_MISSED_TAIL_CALL;
}
}
#endif /* ENABLE_MULTICAST */

#ifdef ENABLE_NODEPORT
if (!ctx_skip_nodeport(ctx)) {
ret = nodeport_lb4(ctx, ip4, ETH_HLEN, *identity, ext_err, &is_dsr);
Expand Down
6 changes: 6 additions & 0 deletions bpf/include/bpf/ctx/skb.h
Original file line number Diff line number Diff line change
Expand Up @@ -139,4 +139,10 @@ ctx_get_ifindex(const struct __sk_buff *ctx)
return ctx->ifindex;
}

static __always_inline __maybe_unused __u32
ctx_get_ingress_ifindex(const struct __sk_buff *ctx)
{
return ctx->ingress_ifindex;
}

#endif /* __BPF_CTX_SKB_H_ */
6 changes: 6 additions & 0 deletions bpf/include/bpf/ctx/xdp.h
Original file line number Diff line number Diff line change
Expand Up @@ -420,4 +420,10 @@ ctx_get_ifindex(const struct xdp_md *ctx)
{
return ctx->ingress_ifindex;
}

static __always_inline __maybe_unused __u32
ctx_get_ingress_ifindex(const struct xdp_md *ctx)
{
return ctx->ingress_ifindex;
}
#endif /* __BPF_CTX_XDP_H_ */
2 changes: 2 additions & 0 deletions bpf/include/bpf/helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ static int BPF_FUNC(map_update_elem, const void *map, const void *key,
static int BPF_FUNC(map_delete_elem, const void *map, const void *key);
static void *BPF_FUNC(map_lookup_percpu_elem, void *map, const void *key,
unsigned int cpu);
static long BPF_FUNC(for_each_map_elem, void *map, void *callback_fn,
void *callback_ctx, __u64 flags);

/* Time access */
static __u64 BPF_FUNC(ktime_get_ns);
Expand Down
1 change: 1 addition & 0 deletions bpf/include/bpf/helpers_skb.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ static int BPF_FUNC(redirect, int ifindex, __u32 flags);
static int BPF_FUNC(redirect_neigh, int ifindex, struct bpf_redir_neigh *params,
int plen, __u32 flags);
static int BPF_FUNC(redirect_peer, int ifindex, __u32 flags);
static int BPF_FUNC(clone_redirect, struct __sk_buff *skb, __u32 ifindex, __u64 flags);

/* Packet manipulation */
static int BPF_FUNC(skb_load_bytes, struct __sk_buff *skb, __u32 off,
Expand Down
129 changes: 129 additions & 0 deletions bpf/include/linux/igmp.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
/*
* Linux NET3: Internet Group Management Protocol [IGMP]
*
* Authors:
* Alan Cox <alan@lxorguk.ukuu.org.uk>
*
* Extended to talk the BSD extended IGMP protocol of mrouted 3.6
*
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/

#ifndef _UAPI_LINUX_IGMP_H
#define _UAPI_LINUX_IGMP_H

#include <linux/types.h>

/*
* IGMP protocol structures
*/

/*
* Header in on cable format
*/

struct igmphdr {
__u8 type;
__u8 code; /* For newer IGMP */
__sum16 csum;
__be32 group;
};

/* V3 group record types [grec_type] */
#define IGMPV3_MODE_IS_INCLUDE 1
#define IGMPV3_MODE_IS_EXCLUDE 2
#define IGMPV3_CHANGE_TO_INCLUDE 3
#define IGMPV3_CHANGE_TO_EXCLUDE 4
#define IGMPV3_ALLOW_NEW_SOURCES 5
#define IGMPV3_BLOCK_OLD_SOURCES 6

struct igmpv3_grec {
__u8 grec_type;
__u8 grec_auxwords;
__be16 grec_nsrcs;
__be32 grec_mca;
__be32 grec_src[];
};

struct igmpv3_report {
__u8 type;
__u8 resv1;
__sum16 csum;
__be16 resv2;
__be16 ngrec;
struct igmpv3_grec grec[];
};

struct igmpv3_query {
__u8 type;
__u8 code;
__sum16 csum;
__be32 group;
#if defined(__LITTLE_ENDIAN_BITFIELD)
__u8 qrv:3,
suppress:1,
resv:4;
#elif defined(__BIG_ENDIAN_BITFIELD)
__u8 resv:4,
suppress:1,
qrv:3;
#else
#error "Please fix <asm/byteorder.h>"
#endif
__u8 qqic;
__be16 nsrcs;
__be32 srcs[];
};

#define IGMP_HOST_MEMBERSHIP_QUERY 0x11 /* From RFC1112 */
#define IGMP_HOST_MEMBERSHIP_REPORT 0x12 /* Ditto */
#define IGMP_DVMRP 0x13 /* DVMRP routing */
#define IGMP_PIM 0x14 /* PIM routing */
#define IGMP_TRACE 0x15
#define IGMPV2_HOST_MEMBERSHIP_REPORT 0x16 /* V2 version of 0x12 */
#define IGMP_HOST_LEAVE_MESSAGE 0x17
#define IGMPV3_HOST_MEMBERSHIP_REPORT 0x22 /* V3 version of 0x12 */

#define IGMP_MTRACE_RESP 0x1e
#define IGMP_MTRACE 0x1f

#define IGMP_MRDISC_ADV 0x30 /* From RFC4286 */

/*
* Use the BSD names for these for compatibility
*/

#define IGMP_DELAYING_MEMBER 0x01
#define IGMP_IDLE_MEMBER 0x02
#define IGMP_LAZY_MEMBER 0x03
#define IGMP_SLEEPING_MEMBER 0x04
#define IGMP_AWAKENING_MEMBER 0x05

#define IGMP_MINLEN 8

#define IGMP_MAX_HOST_REPORT_DELAY 10 /* max delay for response to */
/* query (in seconds) */

#define IGMP_TIMER_SCALE 10 /* denotes that the igmphdr->timer field */
/* specifies time in 10th of seconds */

#define IGMP_AGE_THRESHOLD 400 /* If this host don't hear any IGMP V1 */
/* message in this period of time, */
/* revert to IGMP v2 router. */

#define IGMP_ALL_HOSTS htonl(0xE0000001L)
#define IGMP_ALL_ROUTER htonl(0xE0000002L)
#define IGMPV3_ALL_MCR htonl(0xE0000016L)
#define IGMP_LOCAL_GROUP htonl(0xE0000000L)
#define IGMP_LOCAL_GROUP_MASK htonl(0xFFFFFF00L)

/*
* struct for keeping the multicast list in
*/

#endif /* _UAPI_LINUX_IGMP_H */
6 changes: 5 additions & 1 deletion bpf/lib/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,8 @@ enum {
#define CILIUM_CALL_IPV6_CONT_FROM_NETDEV 46
#define CILIUM_CALL_IPV4_NO_SERVICE 47
#define CILIUM_CALL_IPV6_NO_SERVICE 48
#define CILIUM_CALL_SIZE 49
#define CILIUM_CALL_MULTICAST_EP_DELIVERY 49
#define CILIUM_CALL_SIZE 50

typedef __u64 mac_t;

Expand Down Expand Up @@ -639,6 +640,9 @@ enum {
#define DROP_TTL_EXCEEDED -196
#define DROP_NO_NODE_ID -197
#define DROP_RATE_LIMITED -198
#define DROP_IGMP_HANDLED -199
#define DROP_IGMP_SUBSCRIBED -200
#define DROP_MULTICAST_HANDLED -201
#define DROP_HOST_NOT_READY -202

#define NAT_PUNT_TO_STACK DROP_NAT_NOT_NEEDED
Expand Down