Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bpf, xdp: various optimizations for nodeport #11082

Merged
merged 4 commits into from Apr 22, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions bpf/Makefile
Expand Up @@ -112,6 +112,7 @@ NETDEV_OPTIONS = $(LB_OPTIONS) \
-DENABLE_IPV4:-DENABLE_IPV6:-DLB_L3:-DLB_L4:-DENABLE_IPSEC:-DENABLE_HOST_SERVICES_UDP:-DENABLE_HOST_SERVICES_TCP:-DENABLE_NODEPORT:-DENABLE_EXTERNAL_IP:-DENABLE_MASQUERADE \
-DENABLE_IPV4:-DENABLE_IPV6:-DLB_L3:-DLB_L4:-DENABLE_HOST_SERVICES_UDP:-DENABLE_HOST_SERVICES_TCP:-DENABLE_NODEPORT:-DENABLE_EXTERNAL_IP:-DENABLE_DSR \
-DENABLE_IPV4:-DENABLE_IPV6:-DLB_L3:-DLB_L4:-DENABLE_HOST_SERVICES_UDP:-DENABLE_HOST_SERVICES_TCP:-DENABLE_NODEPORT:-DENABLE_EXTERNAL_IP:-DENABLE_DSR:-DENABLE_DSR_HYBRID \
-DENABLE_IPV4:-DENABLE_IPV6:-DLB_L3:-DLB_L4:-DENABLE_HOST_SERVICES_UDP:-DENABLE_HOST_SERVICES_TCP:-DENABLE_NODEPORT:-DENABLE_NODEPORT_ACCELERATION:-DENABLE_EXTERNAL_IP:-DENABLE_DSR:-DENABLE_DSR_HYBRID:-DENABLE_PREFILTER

MAX_NETDEV_OPTIONS = $(MAX_LB_OPTIONS)

Expand Down
20 changes: 12 additions & 8 deletions bpf/bpf_netdev.c
Expand Up @@ -131,10 +131,12 @@ static __always_inline int handle_ipv6(struct __ctx_buff *ctx,
return DROP_INVALID;

#ifdef ENABLE_NODEPORT
if (!bpf_skip_nodeport(ctx)) {
ret = nodeport_lb6(ctx, srcid_from_proxy);
if (ret < 0)
return ret;
if (ctx_get_xfer(ctx) != XFER_PKT_NO_SVC) {
if (!bpf_skip_nodeport(ctx)) {
ret = nodeport_lb6(ctx, srcid_from_proxy);
if (ret < 0)
return ret;
}
}
#if defined(ENCAP_IFINDEX) || defined(NO_REDIRECT)
/* See IPv4 case for NO_REDIRECT comments */
Expand Down Expand Up @@ -319,10 +321,12 @@ static __always_inline int handle_ipv4(struct __ctx_buff *ctx,
return DROP_INVALID;

#ifdef ENABLE_NODEPORT
if (!bpf_skip_nodeport(ctx)) {
int ret = nodeport_lb4(ctx, srcid_from_proxy);
if (ret < 0)
return ret;
if (ctx_get_xfer(ctx) != XFER_PKT_NO_SVC) {
if (!bpf_skip_nodeport(ctx)) {
int ret = nodeport_lb4(ctx, srcid_from_proxy);
if (ret < 0)
return ret;
}
}
#if defined(ENCAP_IFINDEX) || defined(NO_REDIRECT)
/* We cannot redirect a packet to a local endpoint in the direct
Expand Down
44 changes: 41 additions & 3 deletions bpf/bpf_xdp.c
Expand Up @@ -17,6 +17,7 @@
#include "lib/events.h"
#include "lib/nodeport.h"

#ifdef ENABLE_PREFILTER
#ifndef HAVE_LPM_TRIE_MAP_TYPE
# undef CIDR4_LPM_PREFILTER
# undef CIDR6_LPM_PREFILTER
Expand Down Expand Up @@ -80,6 +81,28 @@ struct bpf_elf_map __section_maps CIDR6_LMAP_NAME = {
};
#endif /* CIDR6_LPM_PREFILTER */
#endif /* CIDR6_FILTER */
#endif /* ENABLE_PREFILTER */

static __always_inline __maybe_unused int
bpf_xdp_exit(struct __ctx_buff *ctx, const int verdict)
{
/* Undo meta data, so GRO can perform natural aggregation. */
if (verdict == CTX_ACT_OK) {
__u32 meta_xfer = ctx_load_meta(ctx, XFER_MARKER);

/* We transfer data from XFER_MARKER. This specifically
* does not break packet trains in GRO.
*/
if (meta_xfer) {
ctx_adjust_meta(ctx, META_PIVOT - sizeof(__u32));
ctx_store_meta(ctx, 0, meta_xfer);
} else {
ctx_adjust_meta(ctx, META_PIVOT);
}
}

return verdict;
}

#ifdef ENABLE_IPV4
#ifdef ENABLE_NODEPORT_ACCELERATION
Expand All @@ -95,7 +118,7 @@ int tail_lb_ipv4(struct __ctx_buff *ctx)
METRIC_INGRESS);
}

return ret;
return bpf_xdp_exit(ctx, ret);
}

static __always_inline int check_v4_lb(struct __ctx_buff *ctx)
Expand All @@ -111,6 +134,7 @@ static __always_inline int check_v4_lb(struct __ctx_buff *ctx __maybe_unused)
}
#endif /* ENABLE_NODEPORT_ACCELERATION */

#ifdef ENABLE_PREFILTER
static __always_inline int check_v4(struct __ctx_buff *ctx)
{
void *data_end = ctx_data_end(ctx);
Expand All @@ -136,6 +160,12 @@ static __always_inline int check_v4(struct __ctx_buff *ctx)
return check_v4_lb(ctx);
#endif /* CIDR4_FILTER */
}
#else
static __always_inline int check_v4(struct __ctx_buff *ctx)
{
return check_v4_lb(ctx);
}
#endif /* ENABLE_PREFILTER */
#endif /* ENABLE_IPV4 */

#ifdef ENABLE_IPV6
Expand All @@ -152,7 +182,7 @@ int tail_lb_ipv6(struct __ctx_buff *ctx)
METRIC_INGRESS);
}

return ret;
return bpf_xdp_exit(ctx, ret);
}

static __always_inline int check_v6_lb(struct __ctx_buff *ctx)
Expand All @@ -168,6 +198,7 @@ static __always_inline int check_v6_lb(struct __ctx_buff *ctx __maybe_unused)
}
#endif /* ENABLE_NODEPORT */

#ifdef ENABLE_PREFILTER
static __always_inline int check_v6(struct __ctx_buff *ctx)
{
void *data_end = ctx_data_end(ctx);
Expand All @@ -193,6 +224,12 @@ static __always_inline int check_v6(struct __ctx_buff *ctx)
return check_v6_lb(ctx);
#endif /* CIDR6_FILTER */
}
#else
static __always_inline int check_v6(struct __ctx_buff *ctx)
{
return check_v6_lb(ctx);
}
#endif /* ENABLE_PREFILTER */
#endif /* ENABLE_IPV6 */

static __always_inline int check_filters(struct __ctx_buff *ctx)
Expand All @@ -205,6 +242,7 @@ static __always_inline int check_filters(struct __ctx_buff *ctx)
if (ctx_adjust_meta(ctx, -META_PIVOT))
return CTX_ACT_OK;

ctx_store_meta(ctx, XFER_MARKER, 0);
bpf_skip_nodeport_clear(ctx);

switch (proto) {
Expand All @@ -222,7 +260,7 @@ static __always_inline int check_filters(struct __ctx_buff *ctx)
break;
}

return ret;
return bpf_xdp_exit(ctx, ret);
}

__section("from-netdev")
Expand Down
3 changes: 2 additions & 1 deletion bpf/include/bpf/ctx/xdp.h
Expand Up @@ -17,8 +17,9 @@
#define CTX_ACT_DROP XDP_DROP
#define CTX_ACT_TX XDP_TX /* hairpin only */

/* cb + RECIRC_MARKER + XFER_MARKER */
#define META_PIVOT ((int)(field_sizeof(struct __sk_buff, cb) + \
sizeof(__u32))) /* cb + RECIRC_MARKER */
sizeof(__u32) * 2))

#define __CTX_OFF_MAX 0xff

Expand Down
3 changes: 3 additions & 0 deletions bpf/lib/common.h
Expand Up @@ -46,6 +46,9 @@
# endif
#endif

/* XDP to SKB transferred meta data. */
#define XFER_PKT_NO_SVC 1 /* Skip upper service handling. */

/* These are shared with test/bpf/check-complexity.sh, when modifying any of
* the below, that script should also be updated. */
#define CILIUM_CALL_DROP_NOTIFY 1
Expand Down
4 changes: 4 additions & 0 deletions bpf/lib/nodeport.h
Expand Up @@ -498,6 +498,8 @@ static __always_inline int nodeport_lb6(struct __ctx_buff *ctx,
if (svc)
return DROP_IS_CLUSTER_IP;

ctx_set_xfer(ctx, XFER_PKT_NO_SVC);

if (nodeport_uses_dsr6(&tuple)) {
return CTX_ACT_OK;
} else {
Expand Down Expand Up @@ -1061,6 +1063,8 @@ static __always_inline int nodeport_lb4(struct __ctx_buff *ctx,
if (svc)
return DROP_IS_CLUSTER_IP;

ctx_set_xfer(ctx, XFER_PKT_NO_SVC);

if (nodeport_uses_dsr4(&tuple)) {
return CTX_ACT_OK;
} else {
Expand Down
14 changes: 14 additions & 0 deletions bpf/lib/overloadable_skb.h
Expand Up @@ -106,4 +106,18 @@ ctx_skip_nodeport(struct __sk_buff *ctx __maybe_unused)
#endif
}

static __always_inline __maybe_unused __u32 ctx_get_xfer(struct __sk_buff *ctx)
{
__u32 *data_meta = ctx_data_meta(ctx);
void *data = ctx_data(ctx);

return !ctx_no_room(data_meta + 1, data) ? data_meta[0] : 0;
}

static __always_inline __maybe_unused void
ctx_set_xfer(struct __sk_buff *ctx __maybe_unused, __u32 meta __maybe_unused)
{
/* Only possible from XDP -> SKB. */
}

#endif /* __LIB_OVERLOADABLE_SKB_H_ */
15 changes: 14 additions & 1 deletion bpf/lib/overloadable_xdp.h
Expand Up @@ -52,7 +52,8 @@ redirect_self(struct xdp_md *ctx __maybe_unused)
#endif
}

#define RECIRC_MARKER 5
#define RECIRC_MARKER 5 /* tail call recirculation */
#define XFER_MARKER 6 /* xdp -> skb meta transfer */

static __always_inline __maybe_unused void
ctx_skip_nodeport_clear(struct xdp_md *ctx __maybe_unused)
Expand Down Expand Up @@ -80,4 +81,16 @@ ctx_skip_nodeport(struct xdp_md *ctx __maybe_unused)
#endif
}

static __always_inline __maybe_unused __u32
ctx_get_xfer(struct xdp_md *ctx __maybe_unused)
{
return 0; /* Only intended for SKB context. */
}

static __always_inline __maybe_unused void ctx_set_xfer(struct xdp_md *ctx,
__u32 meta)
{
ctx_store_meta(ctx, XFER_MARKER, meta);
}

#endif /* __LIB_OVERLOADABLE_XDP_H_ */
4 changes: 4 additions & 0 deletions pkg/datapath/linux/config/config.go
Expand Up @@ -201,6 +201,10 @@ func (h *HeaderfileWriter) WriteNodeConfig(w io.Writer, cfg *datapath.LocalNodeC
cDefinesMap["ENCRYPT_NODE"] = "1"
}

if option.Config.DevicePreFilter != "undefined" {
cDefinesMap["ENABLE_PREFILTER"] = "1"
}

if !option.Config.DisableK8sServices {
cDefinesMap["ENABLE_SERVICES"] = "1"
}
Expand Down