From 8bcd7caa00e714df38bbd12f795cb51bd0c61432 Mon Sep 17 00:00:00 2001 From: Gray Liang Date: Fri, 27 Oct 2023 17:46:19 +0800 Subject: [PATCH] datapath: disable net.ipv4.ip_early_demux for IPsec + L7 proxy [ upstream commit 5201896e0a393ec4199cf9b5be4ebac6374be12a ] [ backporter's notes: this is a backport to pre-cell iptables ] After forward traffic for an egress proxy onnection has traversed through cilium_host / cilium_net, we expect IPsec-marked packets to get handled by xfrm. This currently conflicts with early demux, which matches the connection's transparent socket and assigns it to the packet: ``` // https://elixir.bootlin.com/linux/v6.2/source/net/ipv4/tcp_ipv4.c#L1770 int tcp_v4_early_demux(struct sk_buff *skb) { ... sk = __inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, iph->saddr, th->source, iph->daddr, ntohs(th->dest), skb->skb_iif, inet_sdif(skb)); if (sk) { skb->sk = sk; ... } ``` It then gets dropped in ip_forward(), before reaching xfrm: ``` // https://elixir.bootlin.com/linux/v6.2/source/net/ipv4/ip_forward.c#L100 int ip_forward(struct sk_buff *skb) { ... if (unlikely(skb->sk)) goto drop; ... } ``` To avoid this we disable early-demux in a L7 + IPsec config. Note that the L7 proxy feature needs to deal with similar troubles, as the comment for inboundProxyRedirectRule() describes. Ideally we would build a similar solution for IPsec, diverting traffic with policy routing so that it doesn't get intercepted by early-demux. Signed-off-by: Zhichuan Liang Signed-off-by: Julian Wiedmann --- pkg/datapath/iptables/iptables.go | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/pkg/datapath/iptables/iptables.go b/pkg/datapath/iptables/iptables.go index d3ea090b8a78..e2f49d2cf081 100644 --- a/pkg/datapath/iptables/iptables.go +++ b/pkg/datapath/iptables/iptables.go @@ -324,6 +324,10 @@ func (m *IptablesManager) Init() { m.haveIp6tables = haveIp6tables + if option.Config.EnableIPSec && option.Config.EnableL7Proxy { + m.DisableIPEarlyDemux() + } + if err := modulesManager.FindOrLoadModules("xt_socket"); err != nil { if option.Config.Tunnel == option.TunnelDisabled { // xt_socket module is needed to circumvent an explicit drop in ip_forward() @@ -344,14 +348,7 @@ func (m *IptablesManager) Init() { log.WithError(err).Warning("xt_socket kernel module could not be loaded") if option.Config.EnableXTSocketFallback { - disabled := sysctl.Disable("net.ipv4.ip_early_demux") == nil - - if disabled { - m.ipEarlyDemuxDisabled = true - log.Warning("Disabled ip_early_demux to allow proxy redirection with original source/destination address without xt_socket support also in non-tunneled datapath modes.") - } else { - log.WithError(err).Warning("Could not disable ip_early_demux, traffic redirected due to an HTTP policy or visibility may be dropped unexpectedly") - } + m.DisableIPEarlyDemux() } } } else { @@ -363,6 +360,20 @@ func (m *IptablesManager) Init() { ip6tables.initArgs(int(option.Config.IPTablesLockTimeout / time.Second)) } +func (m *IptablesManager) DisableIPEarlyDemux() { + if m.ipEarlyDemuxDisabled { + return + } + + err := sysctl.Disable("net.ipv4.ip_early_demux") + if err == nil { + m.ipEarlyDemuxDisabled = true + log.Info("Disabled ip_early_demux to allow proxy redirection.") + } else { + log.WithError(err).Warning("Could not disable ip_early_demux, traffic redirected due to an HTTP policy or visibility may be dropped unexpectedly") + } +} + // SupportsOriginalSourceAddr tells if an L7 proxy can use POD's original source address and port in // the upstream connection to allow the destination to properly derive the source security ID from // the source IP address.