Skip to content

Commit

Permalink
bpf: skip in-cluster xlation for non-local ips
Browse files Browse the repository at this point in the history
Skip in-cluster xlation for local traffic from the machine for services
with external IPs and simply just let it pass-through. Only HOST_ID'ed
addresses are xlated on the node, the rest via bpf_host coming into the
node. This also allows for bpf_lxc's packet-based DNAT to be compiled out
entirely for 4.19.57 and onwards.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
  • Loading branch information
borkmann committed Jun 22, 2020
1 parent 6501817 commit 1d8589a
Show file tree
Hide file tree
Showing 5 changed files with 30 additions and 69 deletions.
42 changes: 18 additions & 24 deletions bpf/bpf_lxc.c
Expand Up @@ -85,30 +85,28 @@ static __always_inline int ipv6_l3_from_lxc(struct __ctx_buff *ctx,

l4_off = l3_off + hdrlen;

/*
* Check if the destination address is among the address that should be
* load balanced. This operation is performed before we go through the
* connection tracker to allow storing the reverse nat index in the CT
* entry for destination endpoints where we can't encode the state in the
* address.
*/
#ifdef ENABLE_SERVICES
# if !defined(ENABLE_HOST_SERVICES_FULL) || \
(defined(ENABLE_EXTERNAL_IP) && !defined(BPF_HAVE_NETNS_COOKIE))
#if defined(ENABLE_SERVICES) && !defined(ENABLE_HOST_SERVICES_FULL)
{
struct lb6_service *svc;
struct lb6_key key = {};

ret = lb6_extract_key(ctx, tuple, l4_off, &key, &csum_off, CT_EGRESS);
ret = lb6_extract_key(ctx, tuple, l4_off, &key, &csum_off,
CT_EGRESS);
if (IS_ERR(ret)) {
if (ret == DROP_UNKNOWN_L4)
goto skip_service_lookup;
else
return ret;
}

if ((svc = lb6_lookup_service(&key)) != NULL &&
lb6_svc_needs_lxc_xlation(svc)) {
/*
* Check if the destination address is among the address that should
* be load balanced. This operation is performed before we go through
* the connection tracker to allow storing the reverse nat index in
* the CT entry for destination endpoints where we can't encode the
* state in the address.
*/
if ((svc = lb6_lookup_service(&key)) != NULL) {
ret = lb6_local(get_ct_map6(tuple), ctx, l3_off, l4_off,
&csum_off, &key, tuple, svc, &ct_state_new);
if (IS_ERR(ret))
Expand All @@ -118,8 +116,7 @@ static __always_inline int ipv6_l3_from_lxc(struct __ctx_buff *ctx,
}

skip_service_lookup:
# endif /* !ENABLE_HOST_SERVICES_FULL || ENABLE_EXTERNAL_IP && !BPF_HAVE_NETNS_COOKIE */
#endif /* ENABLE_SERVICES */
#endif /* ENABLE_SERVICES && !ENABLE_HOST_SERVICES_FULL */

/* The verifier wants to see this assignment here in case the above goto
* skip_service_lookup is hit. However, in the case the packet
Expand Down Expand Up @@ -460,9 +457,7 @@ static __always_inline int handle_ipv4_from_lxc(struct __ctx_buff *ctx,

l4_off = l3_off + ipv4_hdrlen(ip4);

#ifdef ENABLE_SERVICES
# if !defined(ENABLE_HOST_SERVICES_FULL) || \
(defined(ENABLE_EXTERNAL_IP) && !defined(BPF_HAVE_NETNS_COOKIE))
#if defined(ENABLE_SERVICES) && !defined(ENABLE_HOST_SERVICES_FULL)
{
struct lb4_service *svc;
struct lb4_key key = {};
Expand All @@ -476,19 +471,18 @@ static __always_inline int handle_ipv4_from_lxc(struct __ctx_buff *ctx,
return ret;
}

if ((svc = lb4_lookup_service(&key)) != NULL &&
lb4_svc_needs_lxc_xlation(svc)) {
ret = lb4_local(get_ct_map4(&tuple), ctx, l3_off, l4_off, &csum_off,
&key, &tuple, svc, &ct_state_new, ip4->saddr);
if ((svc = lb4_lookup_service(&key)) != NULL) {
ret = lb4_local(get_ct_map4(&tuple), ctx, l3_off, l4_off,
&csum_off, &key, &tuple, svc, &ct_state_new,
ip4->saddr);
if (IS_ERR(ret))
return ret;
hairpin_flow |= ct_state_new.loopback;
}
}

skip_service_lookup:
# endif /* !ENABLE_HOST_SERVICES_FULL || ENABLE_EXTERNAL_IP && !BPF_HAVE_NETNS_COOKIE */
#endif /* ENABLE_SERVICES */
#endif /* ENABLE_SERVICES && !ENABLE_HOST_SERVICES_FULL */

/* The verifier wants to see this assignment here in case the above goto
* skip_service_lookup is hit. However, in the case the packet
Expand Down
26 changes: 6 additions & 20 deletions bpf/bpf_sock.c
Expand Up @@ -185,8 +185,7 @@ int sock4_update_revnat(struct bpf_sock_addr *ctx __maybe_unused,
#endif /* ENABLE_HOST_SERVICES_UDP || ENABLE_HOST_SERVICES_PEER */

static __always_inline bool
sock4_skip_xlate(struct lb4_service *svc, const bool in_hostns,
__be32 address)
sock4_skip_xlate(struct lb4_service *svc, __be32 address)
{
if (is_v4_loopback(address))
return false;
Expand All @@ -195,14 +194,8 @@ sock4_skip_xlate(struct lb4_service *svc, const bool in_hostns,

info = ipcache_lookup4(&IPCACHE_MAP, address,
V4_CACHE_KEY_LEN);
if (info == NULL ||
(svc->local_scope && info->sec_label != HOST_ID))
if (info == NULL || info->sec_label != HOST_ID)
return true;
if (lb4_svc_is_external_ip(svc)) {
if (info->sec_label != HOST_ID &&
info->sec_label != REMOTE_NODE_ID)
return in_hostns;
}
}

return false;
Expand Down Expand Up @@ -283,7 +276,7 @@ static __always_inline int __sock4_xlate_fwd(struct bpf_sock_addr *ctx,
* IP address. But do the service translation if the IP
* is from the host.
*/
if (sock4_skip_xlate(svc, in_hostns, orig_key.address))
if (sock4_skip_xlate(svc, orig_key.address))
return -EPERM;

if (svc->affinity) {
Expand Down Expand Up @@ -537,8 +530,7 @@ static __always_inline void ctx_set_v6_address(struct bpf_sock_addr *ctx,
}

static __always_inline __maybe_unused bool
sock6_skip_xlate(struct lb6_service *svc, const bool in_hostns,
union v6addr *address)
sock6_skip_xlate(struct lb6_service *svc, union v6addr *address)
{
if (is_v6_loopback(address))
return false;
Expand All @@ -547,14 +539,8 @@ sock6_skip_xlate(struct lb6_service *svc, const bool in_hostns,

info = ipcache_lookup6(&IPCACHE_MAP, address,
V6_CACHE_KEY_LEN);
if (info == NULL ||
(svc->local_scope && info->sec_label != HOST_ID))
if (info == NULL || info->sec_label != HOST_ID)
return true;
if (lb6_svc_is_external_ip(svc)) {
if (info->sec_label != HOST_ID &&
info->sec_label != REMOTE_NODE_ID)
return in_hostns;
}
}

return false;
Expand Down Expand Up @@ -718,7 +704,7 @@ static __always_inline int __sock6_xlate_fwd(struct bpf_sock_addr *ctx,
if (!svc)
return -ENXIO;

if (sock6_skip_xlate(svc, in_hostns, &orig_key.address))
if (sock6_skip_xlate(svc, &orig_key.address))
return -EPERM;

if (svc->affinity) {
Expand Down
20 changes: 0 additions & 20 deletions bpf/lib/lb.h
Expand Up @@ -157,26 +157,6 @@ bool lb6_svc_is_external_ip(const struct lb6_service *svc __maybe_unused)
#endif
}

static __always_inline
bool lb4_svc_needs_lxc_xlation(const struct lb4_service *svc __maybe_unused)
{
#if defined(ENABLE_HOST_SERVICES_FULL) && defined(ENABLE_EXTERNAL_IP)
return lb4_svc_is_external_ip(svc);
#else
return true;
#endif
}

static __always_inline
bool lb6_svc_needs_lxc_xlation(const struct lb6_service *svc __maybe_unused)
{
#if defined(ENABLE_HOST_SERVICES_FULL) && defined(ENABLE_EXTERNAL_IP)
return lb6_svc_is_external_ip(svc);
#else
return true;
#endif
}

static __always_inline
bool lb4_svc_is_hostport(const struct lb4_service *svc __maybe_unused)
{
Expand Down
4 changes: 3 additions & 1 deletion daemon/cmd/status.go
Expand Up @@ -117,7 +117,9 @@ func (d *Daemon) getMasqueradingStatus() *models.Masquerading {
return s
}

s.SnatExclusionCidr = datapath.RemoteSNATDstAddrExclusionCIDR().String()
if option.Config.EnableIPv4 {
s.SnatExclusionCidr = datapath.RemoteSNATDstAddrExclusionCIDR().String()
}

if option.Config.EnableBPFMasquerade {
s.Mode = models.MasqueradingModeBPF
Expand Down
7 changes: 3 additions & 4 deletions test/k8sT/Services.go
Expand Up @@ -785,10 +785,9 @@ var _ = Describe("K8sServicesTest", func() {
// Should work from outside via the external IP
testCurlFromOutside(httpURL, count, false)
testCurlFromOutside(tftpURL, count, false)
// Same from inside a pod
testCurlFromPods(testDSClient, httpURL, 10, 0)
testCurlFromPods(testDSClient, tftpURL, 10, 0)
// But not from the host netns (to prevent MITM)
// Should fail from inside a pod & hostns
testCurlFromPodsFail(testDSClient, httpURL)
testCurlFromPodsFail(testDSClient, tftpURL)
testCurlFailFromPodInHostNetNS(httpURL, 1, k8s1NodeName)
testCurlFailFromPodInHostNetNS(httpURL, 1, k8s1NodeName)
testCurlFailFromPodInHostNetNS(httpURL, 1, k8s2NodeName)
Expand Down

0 comments on commit 1d8589a

Please sign in to comment.