Skip to content

Commit

Permalink
bpf/nodeport: split the cilium call ipv6 nodeport nat
Browse files Browse the repository at this point in the history
Having the process running on two different tail calls will decrease
the verifier complexity as well as improving maintenability of the
code.

Signed-off-by: Sahid Orentino Ferdjaoui <sahid.ferdjaoui@industrialdiscipline.com>
  • Loading branch information
sahid authored and gandro committed Jul 26, 2022
1 parent 6e34314 commit 536ad0c
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 63 deletions.
5 changes: 3 additions & 2 deletions bpf/lib/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@
#define CILIUM_CALL_IPV4_TO_ENDPOINT 13
#define CILIUM_CALL_IPV6_TO_ENDPOINT 14
#define CILIUM_CALL_IPV4_NODEPORT_NAT_EGRESS 15
#define CILIUM_CALL_IPV6_NODEPORT_NAT 16
#define CILIUM_CALL_IPV6_NODEPORT_NAT_EGRESS 16
#define CILIUM_CALL_IPV4_NODEPORT_REVNAT 17
#define CILIUM_CALL_IPV6_NODEPORT_REVNAT 18
#define CILIUM_CALL_IPV4_ENCAP_NODEPORT_NAT 19
Expand All @@ -101,7 +101,8 @@
#define CILIUM_CALL_SRV6_DECAP 34
#define CILIUM_CALL_SRV6_REPLY 35
#define CILIUM_CALL_IPV4_NODEPORT_NAT_INGRESS 36
#define CILIUM_CALL_SIZE 37
#define CILIUM_CALL_IPV6_NODEPORT_NAT_INGRESS 37
#define CILIUM_CALL_SIZE 38

typedef __u64 mac_t;

Expand Down
135 changes: 76 additions & 59 deletions bpf/lib/nodeport.h
Original file line number Diff line number Diff line change
Expand Up @@ -571,10 +571,48 @@ int tail_nodeport_ipv6_dsr(struct __ctx_buff *ctx)
}
#endif /* ENABLE_DSR */

__section_tail(CILIUM_MAP_CALLS, CILIUM_CALL_IPV6_NODEPORT_NAT)
int tail_nodeport_nat_ipv6(struct __ctx_buff *ctx)
__section_tail(CILIUM_MAP_CALLS, CILIUM_CALL_IPV6_NODEPORT_NAT_INGRESS)
int tail_nodeport_nat_ingress_ipv6(struct __ctx_buff *ctx)
{
const bool nat_46x64 = ctx_load_meta(ctx, CB_NAT_46X64);
union v6addr tmp = IPV6_DIRECT_ROUTING;
struct ipv6_nat_target target = {
.min_port = NODEPORT_PORT_MIN_NAT,
.max_port = NODEPORT_PORT_MAX_NAT,
.src_from_world = true,
};
int ret;

if (nat_46x64)
build_v4_in_v6(&tmp, IPV4_DIRECT_ROUTING);
target.addr = tmp;

ret = snat_v6_process(ctx, NAT_DIR_INGRESS, &target);
if (IS_ERR(ret)) {
/* In case of no mapping, recircle back to main path. SNAT is very
* expensive in terms of instructions (since we don't have BPF to
* BPF calls as we use tail calls) and complexity, hence this is
* done inside a tail call here.
*/
bpf_skip_nodeport_set(ctx);
ep_tail_call(ctx, CILIUM_CALL_IPV6_FROM_NETDEV);
ret = DROP_MISSED_TAIL_CALL;
goto drop_err;
}

bpf_mark_snat_done(ctx);

ep_tail_call(ctx, CILIUM_CALL_IPV6_NODEPORT_REVNAT);
ret = DROP_MISSED_TAIL_CALL;
goto drop_err;

drop_err:
return send_drop_notify_error(ctx, 0, ret, CTX_ACT_DROP, METRIC_INGRESS);
}

__section_tail(CILIUM_MAP_CALLS, CILIUM_CALL_IPV6_NODEPORT_NAT_EGRESS)
int tail_nodeport_nat_ipv6_egress(struct __ctx_buff *ctx)
{
enum nat_dir dir = (enum nat_dir)ctx_load_meta(ctx, CB_NAT);
const bool nat_46x64 = ctx_load_meta(ctx, CB_NAT_46X64);
union v6addr tmp = IPV6_DIRECT_ROUTING;
struct bpf_fib_lookup_padded fib_params = {
Expand All @@ -593,69 +631,52 @@ int tail_nodeport_nat_ipv6(struct __ctx_buff *ctx)
struct ipv6hdr *ip6;
int ret, ext_err = 0;

#ifdef TUNNEL_MODE
struct remote_endpoint_info *info;
union v6addr *dst;
#endif

if (nat_46x64)
build_v4_in_v6(&tmp, IPV4_DIRECT_ROUTING);
target.addr = tmp;

#ifdef TUNNEL_MODE
if (dir == NAT_DIR_EGRESS) {
struct remote_endpoint_info *info;
union v6addr *dst;
if (!revalidate_data(ctx, &data, &data_end, &ip6)) {
ret = DROP_INVALID;
goto drop_err;
}

if (!revalidate_data(ctx, &data, &data_end, &ip6)) {
ret = DROP_INVALID;
dst = (union v6addr *)&ip6->daddr;
info = ipcache_lookup6(&IPCACHE_MAP, dst, V6_CACHE_KEY_LEN);
if (info && info->tunnel_endpoint != 0) {
ret = __encap_with_nodeid(ctx, info->tunnel_endpoint,
WORLD_ID,
NOT_VTEP_DST,
(enum trace_reason)CT_NEW,
TRACE_PAYLOAD_LEN);
if (ret)
goto drop_err;
}

dst = (union v6addr *)&ip6->daddr;
info = ipcache_lookup6(&IPCACHE_MAP, dst, V6_CACHE_KEY_LEN);
if (info != NULL && info->tunnel_endpoint != 0) {
ret = __encap_with_nodeid(ctx, info->tunnel_endpoint,
WORLD_ID,
NOT_VTEP_DST,
(enum trace_reason)CT_NEW,
TRACE_PAYLOAD_LEN);
if (ret)
goto drop_err;

BPF_V6(target.addr, ROUTER_IP);
fib_params.l.ifindex = ENCAP_IFINDEX;
BPF_V6(target.addr, ROUTER_IP);
fib_params.l.ifindex = ENCAP_IFINDEX;

/* fib lookup not necessary when going over tunnel. */
if (eth_store_daddr(ctx, fib_params.l.dmac, 0) < 0) {
ret = DROP_WRITE_ERROR;
goto drop_err;
}
if (eth_store_saddr(ctx, fib_params.l.smac, 0) < 0) {
ret = DROP_WRITE_ERROR;
goto drop_err;
}
}
}
#endif
ret = snat_v6_process(ctx, dir, &target);
if (IS_ERR(ret)) {
/* In case of no mapping, recircle back to main path. SNAT is very
* expensive in terms of instructions (since we don't have BPF to
* BPF calls as we use tail calls) and complexity, hence this is
* done inside a tail call here.
*/
if (dir == NAT_DIR_INGRESS) {
bpf_skip_nodeport_set(ctx);
ep_tail_call(ctx, CILIUM_CALL_IPV6_FROM_NETDEV);
ret = DROP_MISSED_TAIL_CALL;
/* fib lookup not necessary when going over tunnel. */
if (eth_store_daddr(ctx, fib_params.l.dmac, 0) < 0) {
ret = DROP_WRITE_ERROR;
goto drop_err;
}
if (ret != NAT_PUNT_TO_STACK)
if (eth_store_saddr(ctx, fib_params.l.smac, 0) < 0) {
ret = DROP_WRITE_ERROR;
goto drop_err;
}
}
#endif
ret = snat_v6_process(ctx, NAT_DIR_EGRESS, &target);
if (IS_ERR(ret) && ret != NAT_PUNT_TO_STACK)
goto drop_err;

bpf_mark_snat_done(ctx);

if (dir == NAT_DIR_INGRESS) {
ep_tail_call(ctx, CILIUM_CALL_IPV6_NODEPORT_REVNAT);
ret = DROP_MISSED_TAIL_CALL;
goto drop_err;
}
#ifdef TUNNEL_MODE
if (fib_params.l.ifindex == ENCAP_IFINDEX)
goto out_send;
Expand Down Expand Up @@ -709,9 +730,7 @@ int tail_nodeport_nat_ipv6(struct __ctx_buff *ctx)
cilium_capture_out(ctx);
return ctx_redirect(ctx, fib_params.l.ifindex, 0);
drop_err:
return send_drop_notify_error_ext(ctx, 0, ret, ext_err, CTX_ACT_DROP,
dir == NAT_DIR_INGRESS ?
METRIC_INGRESS : METRIC_EGRESS);
return send_drop_notify_error_ext(ctx, 0, ret, ext_err, CTX_ACT_DROP, METRIC_EGRESS);
}

/* See nodeport_lb4(). */
Expand Down Expand Up @@ -788,10 +807,9 @@ static __always_inline int nodeport_lb6(struct __ctx_buff *ctx,
if (nodeport_uses_dsr6(&tuple))
return CTX_ACT_OK;

ctx_store_meta(ctx, CB_NAT, NAT_DIR_INGRESS);
ctx_store_meta(ctx, CB_NAT_46X64, 0);
ctx_store_meta(ctx, CB_SRC_IDENTITY, src_identity);
ep_tail_call(ctx, CILIUM_CALL_IPV6_NODEPORT_NAT);
ep_tail_call(ctx, CILIUM_CALL_IPV6_NODEPORT_NAT_INGRESS);
return DROP_MISSED_TAIL_CALL;
}

Expand Down Expand Up @@ -859,7 +877,6 @@ static __always_inline int nodeport_lb6(struct __ctx_buff *ctx,
#endif /* DSR_ENCAP_MODE */
ep_tail_call(ctx, CILIUM_CALL_IPV6_NODEPORT_DSR);
} else {
ctx_store_meta(ctx, CB_NAT, NAT_DIR_EGRESS);
/* This code path is not only hit for NAT64, but also
* for NAT46. For the latter we initially hit the IPv4
* NodePort path, then migrate the request to IPv6 and
Expand All @@ -870,7 +887,7 @@ static __always_inline int nodeport_lb6(struct __ctx_buff *ctx,
ctx_store_meta(ctx, CB_NAT_46X64,
!is_v4_in_v6(&key.address) &&
lb6_to_lb4_service(svc));
ep_tail_call(ctx, CILIUM_CALL_IPV6_NODEPORT_NAT);
ep_tail_call(ctx, CILIUM_CALL_IPV6_NODEPORT_NAT_EGRESS);
}
return DROP_MISSED_TAIL_CALL;
}
Expand Down Expand Up @@ -1877,7 +1894,7 @@ static __always_inline int nodeport_lb4(struct __ctx_buff *ctx,
ret = lb4_to_lb6(ctx, ip4, l3_off);
if (ret)
return ret;
ep_tail_call(ctx, CILIUM_CALL_IPV6_NODEPORT_NAT);
ep_tail_call(ctx, CILIUM_CALL_IPV6_NODEPORT_NAT_INGRESS);
} else {
ep_tail_call(ctx, CILIUM_CALL_IPV4_NODEPORT_NAT_INGRESS);
}
Expand Down
5 changes: 3 additions & 2 deletions test/bpf/check-complexity.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ function annotate_section_names {
-e "s/\(section '2\/13'\)/\1 (tail_call IPV4_TO_ENDPOINT)/" \
-e "s/\(section '2\/14'\)/\1 (tail_call IPV6_TO_ENDPOINT)/" \
-e "s/\(section '2\/15'\)/\1 (tail_call IPV4_NODEPORT_NAT_EGRESS)/" \
-e "s/\(section '2\/16'\)/\1 (tail_call IPV6_NODEPORT_NAT)/" \
-e "s/\(section '2\/16'\)/\1 (tail_call IPV6_NODEPORT_NAT_EGRESS)/" \
-e "s/\(section '2\/17'\)/\1 (tail_call IPV4_NODEPORT_REVNAT)/" \
-e "s/\(section '2\/18'\)/\1 (tail_call IPV6_NODEPORT_REVNAT)/" \
-e "s/\(section '2\/19'\)/\1 (tail_call IPV4_ENCAP_NODEPORT_NAT)/" \
Expand All @@ -48,9 +48,10 @@ function annotate_section_names {
-e "s/\(section '2\/34'\)/\1 (tail_call SRV6_DECAP)/" \
-e "s/\(section '2\/35'\)/\1 (tail_call SRV6_REPLY)/"
-e "s/\(section '2\/36'\)/\1 (tail_call IPV4_NODEPORT_NAT_INGRESS)/" \
-e "s/\(section '2\/37'\)/\1 (tail_call IPV6_NODEPORT_NAT_INGRESS)/" \
}

if ! grep -q "CILIUM_CALL_SIZE.*37" "$BPFDIR/lib/common.h" ; then
if ! grep -q "CILIUM_CALL_SIZE.*38" "$BPFDIR/lib/common.h" ; then
echo "This script is out of date compared to CILIUM_CALL_SIZE." 1>&2
exit 1
fi
Expand Down

0 comments on commit 536ad0c

Please sign in to comment.