Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Change cilium_host IPv6 address #24208

Merged
merged 4 commits into from
Apr 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
8 changes: 4 additions & 4 deletions bpf/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ build_all: force
@$(ECHO_CHECK)/*.c BUILD_PERMUTATIONS=1
$(QUIET) $(MAKE) $(SUBMAKEOPTS) bpf_all BUILD_PERMUTATIONS=1

testdata:
testdata:
${CLANG} ${FLAGS} -target bpf -Wall -Werror \
-c ../pkg/alignchecker/testdata/bpf_foo.c \
-o ../pkg/alignchecker/testdata/bpf_foo.o
Expand Down Expand Up @@ -58,7 +58,7 @@ LB_OPTIONS = \
-DENABLE_IPV6:-DENCAP_IFINDEX:-DTUNNEL_MODE:-DENABLE_IPSEC: \
-DENABLE_IPV4:-DENABLE_IPV6:-DENCAP_IFINDEX:-DTUNNEL_MODE: \
-DENABLE_IPV4:-DENABLE_IPV6:-DENCAP_IFINDEX:-DTUNNEL_MODE:-DENABLE_IPSEC: \
-DENABLE_IPV4:-DENABLE_IPV6:-DENCAP_IFINDEX:-DTUNNEL_MODE:-DENABLE_IPSEC:-DENABLE_ENCAP_HOST_REMAP: \
-DENABLE_IPV4:-DENABLE_IPV6:-DENCAP_IFINDEX:-DTUNNEL_MODE:-DENABLE_IPSEC: \
Comment on lines 60 to +61
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Once merged, could you open a follow-on PR to remove this duplication? No need to fix it here unless you need to push for some other reason 👍.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For sure! I took 30 seconds to understand what duplication you were saying LOL

-DENABLE_IPV4:-DENABLE_IPV6:-DENCAP_IFINDEX:-DTUNNEL_MODE:-DENABLE_NODEPORT: \
-DENABLE_IPV4:-DENABLE_IPV6:-DENCAP_IFINDEX:-DTUNNEL_MODE:-DENABLE_IPSEC:-DENABLE_NODEPORT: \
-DENABLE_IPV4:-DENABLE_IPV6:-DENCAP_IFINDEX:-DTUNNEL_MODE:-DENABLE_IPSEC:-DENABLE_NODEPORT:-DENABLE_NODEPORT_ACCELERATION: \
Expand Down Expand Up @@ -234,8 +234,8 @@ LXC_OPTIONS = \
-DENABLE_IPV6:-DENCAP_IFINDEX:-DTUNNEL_MODE:-DPOLICY_VERDICT_NOTIFY:-DENABLE_IPSEC: \
-DENABLE_IPV6:-DENCAP_IFINDEX:-DTUNNEL_MODE:-DPOLICY_VERDICT_NOTIFY:-DENABLE_IPV4: \
-DENABLE_IPV6:-DENCAP_IFINDEX:-DTUNNEL_MODE:-DPOLICY_VERDICT_NOTIFY:-DENABLE_IPV4:-DENABLE_ROUTING: \
-DENABLE_IPV6:-DENCAP_IFINDEX:-DTUNNEL_MODE:-DPOLICY_VERDICT_NOTIFY:-DENABLE_IPV4:-DENABLE_IPSEC:-DENABLE_ENCAP_HOST_REMAP: \
-DENABLE_IPV6:-DENCAP_IFINDEX:-DTUNNEL_MODE:-DPOLICY_VERDICT_NOTIFY:-DENABLE_IPV4:-DENABLE_IPSEC:-DENABLE_ENCAP_HOST_REMAP:-DENABLE_L7_LB: \
-DENABLE_IPV6:-DENCAP_IFINDEX:-DTUNNEL_MODE:-DPOLICY_VERDICT_NOTIFY:-DENABLE_IPV4:-DENABLE_IPSEC: \
-DENABLE_IPV6:-DENCAP_IFINDEX:-DTUNNEL_MODE:-DPOLICY_VERDICT_NOTIFY:-DENABLE_IPV4:-DENABLE_IPSEC:-DENABLE_L7_LB: \
-DENABLE_IPV4:-DENCAP_IFINDEX:-DTUNNEL_MODE:-DPOLICY_VERDICT_NOTIFY:-DENABLE_IPV6: \
-DENABLE_IPV4:-DENCAP_IFINDEX:-DTUNNEL_MODE:-DPOLICY_VERDICT_NOTIFY:-DENABLE_IPV6:-DENABLE_TPROXY: \
-DENABLE_IPV4:-DENCAP_IFINDEX:-DTUNNEL_MODE:-DPOLICY_VERDICT_NOTIFY:-DENABLE_IPV6:-DENABLE_TPROXY:-DENABLE_L7_LB: \
Expand Down
11 changes: 2 additions & 9 deletions bpf/bpf_overlay.c
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,6 @@ static __always_inline int handle_ipv6(struct __ctx_buff *ctx,
return ret;
}
#endif
ret = encap_remap_v6_host_address(ctx, false);
if (unlikely(ret < 0))
return ret;

if (!revalidate_data(ctx, &data, &data_end, &ip6))
return DROP_INVALID;
Expand Down Expand Up @@ -628,13 +625,9 @@ int cil_from_overlay(struct __ctx_buff *ctx)
__section("to-overlay")
int cil_to_overlay(struct __ctx_buff *ctx)
{
int ret;
int ret = TC_ACT_OK;
__u32 cluster_id __maybe_unused = 0;

ret = encap_remap_v6_host_address(ctx, true);
if (unlikely(ret < 0))
goto out;

#ifdef ENABLE_BANDWIDTH_MANAGER
/* In tunneling mode, we should do this as close as possible to the
* phys dev where FQ runs, but the issue is that the aggregate state
Expand Down Expand Up @@ -665,8 +658,8 @@ int cil_to_overlay(struct __ctx_buff *ctx)
cluster_id = ctx_get_cluster_id_mark(ctx);
#endif
ret = handle_nat_fwd(ctx, cluster_id);
#endif
out:
#endif
if (IS_ERR(ret))
return send_drop_notify_error(ctx, 0, ret, CTX_ACT_DROP, METRIC_EGRESS);
return ret;
Expand Down
7 changes: 0 additions & 7 deletions bpf/lib/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,6 @@
#define NOT_VTEP_DST 0
#endif

/* TODO: ipsec v6 tunnel datapath still needs separate fixing */
#ifndef ENABLE_IPSEC
# ifdef ENABLE_IPV6
# define ENABLE_ENCAP_HOST_REMAP 1
# endif
#endif

/* XFER_FLAGS that get transferred from XDP to SKB */
enum {
XFER_PKT_NO_SVC = (1 << 0), /* Skip upper service handling. */
Expand Down
59 changes: 0 additions & 59 deletions bpf/lib/encap.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,65 +52,6 @@ encap_and_redirect_ipsec(struct __ctx_buff *ctx, __u8 key, __u16 node_id,
}
#endif /* ENABLE_IPSEC */

static __always_inline int
encap_remap_v6_host_address(struct __ctx_buff *ctx __maybe_unused,
const bool egress __maybe_unused)
{
#ifdef ENABLE_ENCAP_HOST_REMAP
struct csum_offset csum = {};
union v6addr host_ip;
void *data, *data_end;
struct ipv6hdr *ip6;
union v6addr *which;
__u8 nexthdr;
__u16 proto;
__be32 sum;
__u32 noff;
__u64 off;
int ret;

validate_ethertype(ctx, &proto);
if (proto != bpf_htons(ETH_P_IPV6))
return 0;
if (!revalidate_data(ctx, &data, &data_end, &ip6))
return DROP_INVALID;
/* For requests routed via tunnel with external v6 node IP
* we need to remap their source address to the router address
* as otherwise replies are not routed via tunnel but public
* address instead.
*/
if (egress) {
BPF_V6(host_ip, HOST_IP);
which = (union v6addr *)&ip6->saddr;
} else {
BPF_V6(host_ip, ROUTER_IP);
which = (union v6addr *)&ip6->daddr;
}
if (ipv6_addrcmp(which, &host_ip))
return 0;
nexthdr = ip6->nexthdr;
ret = ipv6_hdrlen(ctx, &nexthdr);
if (ret < 0)
return ret;
off = ((void *)ip6 - data) + ret;
if (egress) {
BPF_V6(host_ip, ROUTER_IP);
noff = ETH_HLEN + offsetof(struct ipv6hdr, saddr);
} else {
BPF_V6(host_ip, HOST_IP);
noff = ETH_HLEN + offsetof(struct ipv6hdr, daddr);
}
sum = csum_diff(which, 16, &host_ip, 16, 0);
csum_l4_offset_and_flags(nexthdr, &csum);
if (ctx_store_bytes(ctx, noff, &host_ip, 16, 0) < 0)
return DROP_WRITE_ERROR;
if (csum.offset &&
csum_l4_replace(ctx, off, &csum, 0, sum, BPF_F_PSEUDO_HDR) < 0)
return DROP_CSUM_L4;
#endif /* ENABLE_ENCAP_HOST_REMAP */
return 0;
}

static __always_inline int
__encap_with_nodeid(struct __ctx_buff *ctx, __be32 tunnel_endpoint,
__u32 seclabel, __u32 dstid, __u32 vni __maybe_unused,
Expand Down
52 changes: 31 additions & 21 deletions pkg/datapath/iptables/iptables.go
Original file line number Diff line number Diff line change
Expand Up @@ -623,10 +623,22 @@ func (m *IptablesManager) installStaticProxyRules() error {
return err
}

// No conntrack for proxy return traffic
// No conntrack for proxy return traffic that is heading to cilium_host
if err := ip6tables.runProg([]string{
"-t", "raw",
"-A", ciliumOutputRawChain,
"-o", defaults.HostDevice,
"-m", "mark", "--mark", matchProxyReply,
"-m", "comment", "--comment", "cilium: NOTRACK for proxy return traffic",
"-j", "CT", "--notrack"}); err != nil {
return err
}

// No conntrack for proxy upstream traffic that is heading to lxc+
if err := ip6tables.runProg([]string{
"-t", "raw",
"-A", ciliumOutputRawChain,
"-o", "lxc+",
"-m", "mark", "--mark", matchProxyReply,
"-m", "comment", "--comment", "cilium: NOTRACK for proxy return traffic",
"-j", "CT", "--notrack"}); err != nil {
Expand Down Expand Up @@ -1597,13 +1609,13 @@ func (m *IptablesManager) addCiliumAcceptXfrmRules() error {
return nil
}

insertAcceptXfrm := func(table, chain string) error {
insertAcceptXfrm := func(ipt *ipt, table, chain string) error {
matchFromIPSecEncrypt := fmt.Sprintf("%#08x/%#08x", linux_defaults.RouteMarkDecrypt, linux_defaults.RouteMarkMask)
matchFromIPSecDecrypt := fmt.Sprintf("%#08x/%#08x", linux_defaults.RouteMarkEncrypt, linux_defaults.RouteMarkMask)

comment := "exclude xfrm marks from " + table + " " + chain + " chain"

if err := ip4tables.runProg([]string{
if err := ipt.runProg([]string{
"-t", table,
"-A", chain,
"-m", "mark", "--mark", matchFromIPSecEncrypt,
Expand All @@ -1612,31 +1624,29 @@ func (m *IptablesManager) addCiliumAcceptXfrmRules() error {
return err
}

return ip4tables.runProg([]string{
return ipt.runProg([]string{
"-t", table,
"-A", chain,
"-m", "mark", "--mark", matchFromIPSecDecrypt,
"-m", "comment", "--comment", comment,
"-j", "ACCEPT"})
}

if err := insertAcceptXfrm("filter", ciliumInputChain); err != nil {
return err
}
if err := insertAcceptXfrm("filter", ciliumOutputChain); err != nil {
return err
}
if err := insertAcceptXfrm("filter", ciliumForwardChain); err != nil {
return err
}
if err := insertAcceptXfrm("nat", ciliumPostNatChain); err != nil {
return err
}
if err := insertAcceptXfrm("nat", ciliumPreNatChain); err != nil {
return err
}
if err := insertAcceptXfrm("nat", ciliumOutputNatChain); err != nil {
return err
for _, chain := range ciliumChains {
switch chain.table {
case "filter", "nat":
if option.Config.EnableIPv4 {
if err := insertAcceptXfrm(ip4tables, chain.table, chain.name); err != nil {
return err
}
}
// ip6tables chain exists only if chain.ipv6 is true
if option.Config.EnableIPv6 && chain.ipv6 {
if err := insertAcceptXfrm(ip6tables, chain.table, chain.name); err != nil {
return err
}
}
}
}
return nil
}
Expand Down
20 changes: 12 additions & 8 deletions pkg/datapath/linux/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -340,8 +340,9 @@ func (n *linuxNodeHandler) deleteDirectRoute(CIDR *cidr.CIDR, nodeIP net.IP) {
// f00d::a0a:0:0:0/112 via f00d::a0a:0:0:1 dev cilium_host src fd04::11 metric 1024 pref medium
func (n *linuxNodeHandler) createNodeRouteSpec(prefix *cidr.CIDR, isLocalNode bool) (route.Route, error) {
var (
local, nexthop net.IP
mtu int
local net.IP
nexthop *net.IP
mtu int
)
if prefix.IP.To4() != nil {
if n.nodeAddressing.IPv4() == nil {
Expand All @@ -352,8 +353,8 @@ func (n *linuxNodeHandler) createNodeRouteSpec(prefix *cidr.CIDR, isLocalNode bo
return route.Route{}, fmt.Errorf("IPv4 router address unavailable")
}

nexthop = n.nodeAddressing.IPv4().Router()
local = nexthop
local = n.nodeAddressing.IPv4().Router()
nexthop = &local
} else {
if n.nodeAddressing.IPv6() == nil {
return route.Route{}, fmt.Errorf("IPv6 addressing unavailable")
Expand All @@ -367,8 +368,11 @@ func (n *linuxNodeHandler) createNodeRouteSpec(prefix *cidr.CIDR, isLocalNode bo
return route.Route{}, fmt.Errorf("External IPv6 address unavailable")
}

nexthop = n.nodeAddressing.IPv6().Router()
local = n.nodeAddressing.IPv6().PrimaryExternal()
// For ipv6, kernel will reject "ip r a $cidr via $ipv6_cilium_host dev cilium_host"
// with "Error: Gateway can not be a local address". Instead, we have to remove "via"
// as "ip r a $cidr dev cilium_host" to make it work.
nexthop = nil
local = n.nodeAddressing.IPv6().Router()
}

if !isLocalNode {
Expand All @@ -377,7 +381,7 @@ func (n *linuxNodeHandler) createNodeRouteSpec(prefix *cidr.CIDR, isLocalNode bo

// The default routing table accounts for encryption overhead for encrypt-node traffic
return route.Route{
Nexthop: &nexthop,
Nexthop: nexthop,
Local: local,
Device: n.datapathConfig.HostDevice,
Prefix: *prefix.IPNet,
Expand Down Expand Up @@ -1021,7 +1025,7 @@ func (n *linuxNodeHandler) enableIPsec(newNode *nodeTypes.Node) {
upsertIPsecLog(err, "out IPv6", wildcardCIDR, cidr, spi)
}
} else {
localCIDR := &net.IPNet{IP: localIP, Mask: net.CIDRMask(0, 0)}
localCIDR := n.nodeAddressing.IPv6().AllocationCIDR().IPNet
remoteCIDR := newNode.IPv6AllocCIDR.IPNet
n.replaceNodeIPSecOutRoute(new6Net)
spi, err := ipsec.UpsertIPsecEndpoint(localCIDR, remoteCIDR, localIP, remoteIP, remoteNodeID, ipsec.IPSecDirOut, false)
Expand Down
9 changes: 6 additions & 3 deletions pkg/datapath/linux/node_linux_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,11 +99,14 @@ func (s *linuxPrivilegedBaseTestSuite) SetUpTest(c *check.C, addressing datapath
err := setupDummyDevice(dummyExternalDeviceName, ips...)
c.Assert(err, check.IsNil)

ips = []net.IP{}
if enableIPv4 {
err = setupDummyDevice(dummyHostDeviceName, s.nodeAddressing.IPv4().Router())
} else {
err = setupDummyDevice(dummyHostDeviceName)
ips = append(ips, s.nodeAddressing.IPv4().Router())
}
if enableIPv6 {
ips = append(ips, s.nodeAddressing.IPv6().Router())
}
err = setupDummyDevice(dummyHostDeviceName, ips...)
c.Assert(err, check.IsNil)

tunnel.SetTunnelMap(tunnel.NewTunnelMap("test_cilium_tunnel_map"))
Expand Down
4 changes: 2 additions & 2 deletions pkg/datapath/linux/node_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,8 @@ func (s *linuxTestSuite) TestCreateNodeRoute(c *check.C) {
c.Assert(err, check.IsNil)
c.Assert(generatedRoute.Prefix, checker.DeepEquals, *c1.IPNet)
c.Assert(generatedRoute.Device, check.Equals, dpConfig.HostDevice)
c.Assert(*generatedRoute.Nexthop, checker.DeepEquals, fakeNodeAddressing.IPv6().Router())
c.Assert(generatedRoute.Local, checker.DeepEquals, fakeNodeAddressing.IPv6().PrimaryExternal())
c.Assert(generatedRoute.Nexthop, check.IsNil)
c.Assert(generatedRoute.Local, checker.DeepEquals, fakeNodeAddressing.IPv6().Router())
}

func (s *linuxTestSuite) TestCreateNodeRouteSpecMtu(c *check.C) {
Expand Down
2 changes: 1 addition & 1 deletion pkg/datapath/loader/base.go
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,7 @@ func (l *Loader) Reinitialize(ctx context.Context, o datapath.BaseProgramOwner,
}

if option.Config.EnableIPv6 {
args[initArgIPv6NodeIP] = node.GetIPv6().String()
args[initArgIPv6NodeIP] = node.GetIPv6Router().String()
// Docker <17.05 has an issue which causes IPv6 to be disabled in the initns for all
// interface (https://github.com/docker/libnetwork/issues/1720)
// Enable IPv6 for now
Expand Down
2 changes: 1 addition & 1 deletion pkg/node/address.go
Original file line number Diff line number Diff line change
Expand Up @@ -580,7 +580,7 @@ func GetIPv6() net.IP {
// GetHostMasqueradeIPv6 returns the IPv6 address to be used for masquerading
// any traffic that is being forwarded from the host into the Cilium cluster.
func GetHostMasqueradeIPv6() net.IP {
return GetIPv6()
return GetIPv6Router()
}

// GetIPv6Router returns the IPv6 address of the router, e.g. address
Expand Down