From ad96aac7742754df0fb0915e623eecc19305eab6 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Wed, 24 Jun 2020 12:10:14 -0700 Subject: [PATCH 1/3] test: Always run L4 services test. There is no need to skip the L4 services test if it runs without kube-proxy (i.e., with NodePort BPF), as this test does not deploy any L7 redirects. Signed-off-by: Jarno Rajahalme --- test/k8sT/Services.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/k8sT/Services.go b/test/k8sT/Services.go index e9c6c5384eea..45a2165be599 100644 --- a/test/k8sT/Services.go +++ b/test/k8sT/Services.go @@ -1113,7 +1113,7 @@ var _ = Describe("K8sServicesTest", func() { testExternalTrafficPolicyLocal() }) - SkipContextIf(helpers.RunsWithoutKubeProxy, "with L4 policy", func() { + Context("with L4 policy", func() { var ( demoPolicy string ) From a7f76f700eb9b10dcad69bd17f19cc0970fffbd0 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Wed, 24 Jun 2020 14:43:50 -0700 Subject: [PATCH 2/3] test: Add TFTP / DNS collision test. Add a test case where a TFTP client in k8s1 uses the DNS proxy port of k8s2 as it's ephemeral local (source) port number. This exposes a problem with the iptables rules used in proxy redirection in k8s2, as the response TFTP packets get intercepted by a socket match rule. As of now, this test case fails, but the fix in a following commit fixes the underlying problem. Related: #12241 Signed-off-by: Jarno Rajahalme --- test/helpers/kubectl.go | 18 +++++++++++ test/k8sT/Services.go | 66 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+) diff --git a/test/helpers/kubectl.go b/test/helpers/kubectl.go index 7b2b2596100e..21953d57806a 100644 --- a/test/helpers/kubectl.go +++ b/test/helpers/kubectl.go @@ -4077,3 +4077,21 @@ func logGathererSelector(allNodes bool) string { return selector } + +// GetDNSProxyPort returns the port the Cilium DNS proxy is listening on +func (kub *Kubectl) GetDNSProxyPort(ciliumPod string) int { + const pickDNSProxyPort = `iptables-save -t mangle | sed -n '/udp.*TPROXY to host cilium-dns-egress/ s/.*--on-port \([1-9][0-9]*\).*/\1/p'` + + // Find out the DNS proxy ports in use + res := kub.CiliumExecContext(context.TODO(), ciliumPod, pickDNSProxyPort) + if !res.WasSuccessful() { + ginkgoext.Failf("Cannot find DNS proxy port on %s", ciliumPod) + } + portStr := res.GetStdOut().String() + gomega.ExpectWithOffset(1, portStr).ShouldNot(gomega.BeEmpty(), "No DNS proxy port found on %s", ciliumPod) + port, err := strconv.Atoi(strings.TrimSpace(portStr)) + if err != nil || port == 0 { + ginkgoext.Failf("Invalid DNS proxy port on %s: %s", ciliumPod, portStr) + } + return port +} diff --git a/test/k8sT/Services.go b/test/k8sT/Services.go index 45a2165be599..e935d77678b5 100644 --- a/test/k8sT/Services.go +++ b/test/k8sT/Services.go @@ -1113,6 +1113,72 @@ var _ = Describe("K8sServicesTest", func() { testExternalTrafficPolicyLocal() }) + Context("TFTP with DNS Proxy port collision", func() { + var ( + demoPolicy string + ciliumPodK8s1 string + ciliumPodK8s2 string + DNSProxyPort1 int + DNSProxyPort2 int + ) + + BeforeAll(func() { + var err error + ciliumPodK8s1, err = kubectl.GetCiliumPodOnNodeWithLabel(helpers.CiliumNamespace, helpers.K8s1) + Expect(err).Should(BeNil(), "Cannot get cilium pod on %s", helpers.K8s1) + ciliumPodK8s2, err = kubectl.GetCiliumPodOnNodeWithLabel(helpers.CiliumNamespace, helpers.K8s2) + Expect(err).Should(BeNil(), "Cannot get cilium pod on %s", helpers.K8s2) + + // Find out the DNS proxy ports in use + DNSProxyPort1 = kubectl.GetDNSProxyPort(ciliumPodK8s1) + By("DNS Proxy port in k8s1 (%s): %d", ciliumPodK8s1, DNSProxyPort1) + DNSProxyPort2 = kubectl.GetDNSProxyPort(ciliumPodK8s2) + By("DNS Proxy port in k8s2 (%s): %d", ciliumPodK8s2, DNSProxyPort2) + + demoPolicy = helpers.ManifestGet(kubectl.BasePath(), "l4-policy-demo.yaml") + }) + + AfterAll(func() { + // Explicitly ignore result of deletion of resources to avoid incomplete + // teardown if any step fails. + _ = kubectl.Delete(demoPolicy) + }) + + It("Tests TFTP from DNS Proxy Port", func() { + if DNSProxyPort2 == DNSProxyPort1 { + Skip(fmt.Sprintf("TFTP source port test can not be done when both nodes have the same proxy port (%d == %d)", DNSProxyPort1, DNSProxyPort2)) + } + monitorRes1, monitorCancel1 := kubectl.MonitorStart(helpers.CiliumNamespace, ciliumPodK8s1) + monitorRes2, monitorCancel2 := kubectl.MonitorStart(helpers.CiliumNamespace, ciliumPodK8s2) + defer func() { + monitorCancel1() + monitorCancel2() + helpers.WriteToReportFile(monitorRes1.CombineOutput().Bytes(), "tftp-with-l4-policy-monitor-k8s1.log") + helpers.WriteToReportFile(monitorRes2.CombineOutput().Bytes(), "tftp-with-l4-policy-monitor-k8s2.log") + }() + + applyPolicy(demoPolicy) + + var data v1.Service + err := kubectl.Get(helpers.DefaultNamespace, "service test-nodeport").Unmarshal(&data) + Expect(err).Should(BeNil(), "Can not retrieve service") + + // Test enough times to get random backend selection from both nodes. + // The interesting case is when the backend is at k8s2. + count := 10 + fails := 0 + // Client from k8s1 + clientPod, _ := kubectl.GetPodOnNodeWithOffset(helpers.K8s1, testDSClient, 1) + // Destination is a NodePort in k8s2, curl (in k8s1) binding to the same local port as the DNS proxy port + // in k8s2 + url := getTFTPLink(k8s2IP, data.Spec.Ports[1].NodePort) + fmt.Sprintf(" --local-port %d", DNSProxyPort2) + cmd := testCommand(helpers.CurlFailNoStats(url), count, fails) + By("Making %d curl requests from %s pod to service %s using source port %d", count, clientPod, url, DNSProxyPort2) + res := kubectl.ExecPodCmd(helpers.DefaultNamespace, clientPod, cmd) + Expect(res).Should(helpers.CMDSuccess(), "Request from %s pod to service %s failed", clientPod, url) + }) + }) + Context("with L4 policy", func() { var ( demoPolicy string From bd620cb6e5a91df090f781a707b8805b0cc5ac06 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Tue, 23 Jun 2020 16:40:20 -0700 Subject: [PATCH 3/3] iptables: Remove '--nowildcard' from socket match '--no-wildcard' allows the socket match to find zero-bound (listening) sockets, which we do not want, as this may intercept (reply) traffic intended for other nodes when an ephemeral source port number allocated in one node happens to be the same as the allocated proxy port number in 'this' node (the node doing the iptables socket match changed here). Fixes: #12241 Related: #8864 Signed-off-by: Jarno Rajahalme --- pkg/datapath/iptables/iptables.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/datapath/iptables/iptables.go b/pkg/datapath/iptables/iptables.go index 3d3983c8b13b..dc1b2d6799c8 100644 --- a/pkg/datapath/iptables/iptables.go +++ b/pkg/datapath/iptables/iptables.go @@ -488,7 +488,7 @@ func (m *IptablesManager) inboundProxyRedirectRule(cmd string) []string { return append(m.waitArgs, "-t", "mangle", cmd, ciliumPreMangleChain, - "-m", "socket", "--transparent", "--nowildcard", + "-m", "socket", "--transparent", "-m", "comment", "--comment", "cilium: any->pod redirect proxied traffic to host proxy", "-j", "MARK", "--set-mark", toProxyMark)