Skip to content

Commit

Permalink
bpf: nat: enable CT-driven trace aggregation
Browse files Browse the repository at this point in the history
When the NAT code creates a CT entry for a SNATed connection (for instance
for EgressGW traffic), pass back the resulting trace information to
tail_handle_snat_fwd_ipv*().

Note that this also wires up the path from tail_nodeport_nat_egress_ipv*(),
but those connections never require CT on the NAT level.

Signed-off-by: Julian Wiedmann <jwi@isovalent.com>
  • Loading branch information
julianwiedmann committed Aug 1, 2023
1 parent 4c85662 commit a0d8cf9
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 29 deletions.
34 changes: 21 additions & 13 deletions bpf/lib/nat.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "icmp6.h"
#include "nat_46x64.h"
#include "stubs.h"
#include "trace.h"

enum nat_dir {
NAT_DIR_EGRESS = TUPLE_F_OUT,
Expand Down Expand Up @@ -265,6 +266,7 @@ snat_v4_nat_handle_mapping(struct __ctx_buff *ctx,
struct ipv4_nat_entry *tmp,
__u32 off,
const struct ipv4_nat_target *target,
struct trace_ctx *trace,
__s8 *ext_err)
{
bool needs_ct = target->needs_ct;
Expand All @@ -279,7 +281,6 @@ snat_v4_nat_handle_mapping(struct __ctx_buff *ctx,
if (needs_ct) {
struct ipv4_ct_tuple tuple_snat;
struct ct_state ct_state = {};
__u32 monitor = 0;
int ret;

memcpy(&tuple_snat, tuple, sizeof(tuple_snat));
Expand All @@ -288,10 +289,11 @@ snat_v4_nat_handle_mapping(struct __ctx_buff *ctx,

ret = ct_lazy_lookup4(get_ct_map4(&tuple_snat), &tuple_snat,
ctx, off, has_l4_header, ct_action, CT_EGRESS,
SCOPE_FORWARD, &ct_state, &monitor);
SCOPE_FORWARD, &ct_state, &trace->monitor);
if (ret < 0)
return ret;

trace->reason = (enum trace_reason)ret;
if (ret == CT_NEW) {
ret = ct_create4(get_ct_map4(&tuple_snat), NULL,
&tuple_snat, ctx, CT_EGRESS,
Expand Down Expand Up @@ -905,14 +907,15 @@ snat_v4_nat_handle_icmp_frag_needed(struct __ctx_buff *ctx, __u64 off,
static __always_inline int
__snat_v4_nat(struct __ctx_buff *ctx, struct ipv4_ct_tuple *tuple,
bool has_l4_header, int l4_off, enum ct_action action,
bool update_tuple,
const struct ipv4_nat_target *target, __s8 *ext_err)
bool update_tuple, const struct ipv4_nat_target *target,
struct trace_ctx *trace, __s8 *ext_err)
{
struct ipv4_nat_entry *state, tmp;
int ret;

ret = snat_v4_nat_handle_mapping(ctx, tuple, has_l4_header, action,
&state, &tmp, l4_off, target, ext_err);
&state, &tmp, l4_off, target,
trace, ext_err);
if (ret < 0)
return ret;

Expand All @@ -928,7 +931,8 @@ __snat_v4_nat(struct __ctx_buff *ctx, struct ipv4_ct_tuple *tuple,

static __always_inline __maybe_unused int
snat_v4_nat(struct __ctx_buff *ctx, struct ipv4_ct_tuple *tuple, int off,
bool has_l4_header, const struct ipv4_nat_target *target, __s8 *ext_err)
bool has_l4_header, const struct ipv4_nat_target *target,
struct trace_ctx *trace, __s8 *ext_err)
{
enum ct_action ct_action = ACTION_UNSPEC;
struct icmphdr icmphdr __align_stack_8;
Expand Down Expand Up @@ -983,7 +987,7 @@ snat_v4_nat(struct __ctx_buff *ctx, struct ipv4_ct_tuple *tuple, int off,
return NAT_PUNT_TO_STACK;

return __snat_v4_nat(ctx, tuple, has_l4_header, off, ct_action,
false, target, ext_err);
false, target, trace, ext_err);
}

static __always_inline __maybe_unused int
Expand Down Expand Up @@ -1306,6 +1310,7 @@ snat_v6_nat_handle_mapping(struct __ctx_buff *ctx,
struct ipv6_nat_entry *tmp,
__u32 off,
const struct ipv6_nat_target *target,
struct trace_ctx *trace,
__s8 *ext_err)
{
bool needs_ct = target->needs_ct;
Expand All @@ -1315,7 +1320,6 @@ snat_v6_nat_handle_mapping(struct __ctx_buff *ctx,
if (needs_ct) {
struct ipv6_ct_tuple tuple_snat;
struct ct_state ct_state = {};
__u32 monitor = 0;
int ret;

memcpy(&tuple_snat, tuple, sizeof(tuple_snat));
Expand All @@ -1324,10 +1328,11 @@ snat_v6_nat_handle_mapping(struct __ctx_buff *ctx,

ret = ct_lazy_lookup6(get_ct_map6(&tuple_snat), &tuple_snat,
ctx, off, ct_action, CT_EGRESS,
SCOPE_FORWARD, &ct_state, &monitor);
SCOPE_FORWARD, &ct_state, &trace->monitor);
if (ret < 0)
return ret;

trace->reason = (enum trace_reason)ret;
if (ret == CT_NEW) {
ret = ct_create6(get_ct_map6(&tuple_snat), NULL,
&tuple_snat, ctx, CT_EGRESS,
Expand Down Expand Up @@ -1696,13 +1701,14 @@ snat_v6_needs_masquerade(struct __ctx_buff *ctx __maybe_unused,
static __always_inline int
__snat_v6_nat(struct __ctx_buff *ctx, struct ipv6_ct_tuple *tuple,
int l4_off, enum ct_action action, bool update_tuple,
const struct ipv6_nat_target *target, __s8 *ext_err)
const struct ipv6_nat_target *target,
struct trace_ctx *trace, __s8 *ext_err)
{
struct ipv6_nat_entry *state, tmp;
int ret;

ret = snat_v6_nat_handle_mapping(ctx, tuple, action, &state, &tmp,
l4_off, target, ext_err);
l4_off, target, trace, ext_err);
if (ret < 0)
return ret;

Expand All @@ -1718,7 +1724,8 @@ __snat_v6_nat(struct __ctx_buff *ctx, struct ipv6_ct_tuple *tuple,

static __always_inline __maybe_unused int
snat_v6_nat(struct __ctx_buff *ctx, struct ipv6_ct_tuple *tuple, int off,
const struct ipv6_nat_target *target, __s8 *ext_err)
const struct ipv6_nat_target *target, struct trace_ctx *trace,
__s8 *ext_err)
{
enum ct_action ct_action = ACTION_UNSPEC;
struct icmp6hdr icmp6hdr __align_stack_8;
Expand Down Expand Up @@ -1766,7 +1773,8 @@ snat_v6_nat(struct __ctx_buff *ctx, struct ipv6_ct_tuple *tuple, int off,
if (snat_v6_nat_can_skip(target, tuple))
return NAT_PUNT_TO_STACK;

return __snat_v6_nat(ctx, tuple, off, ct_action, false, target, ext_err);
return __snat_v6_nat(ctx, tuple, off, ct_action, false, target,
trace, ext_err);
}

static __always_inline __maybe_unused int
Expand Down
44 changes: 32 additions & 12 deletions bpf/lib/nodeport.h
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ nodeport_has_nat_conflict_ipv6(const struct ipv6hdr *ip6 __maybe_unused,
}

static __always_inline int nodeport_snat_fwd_ipv6(struct __ctx_buff *ctx,
struct trace_ctx *trace,
__s8 *ext_err)
{
struct ipv6_nat_target target = {
Expand Down Expand Up @@ -196,7 +197,7 @@ static __always_inline int nodeport_snat_fwd_ipv6(struct __ctx_buff *ctx,
goto out;

apply_snat:
ret = snat_v6_nat(ctx, &tuple, l4_off, &target, ext_err);
ret = snat_v6_nat(ctx, &tuple, l4_off, &target, trace, ext_err);

out:
if (ret == NAT_PUNT_TO_STACK)
Expand Down Expand Up @@ -978,6 +979,10 @@ int tail_nodeport_nat_egress_ipv6(struct __ctx_buff *ctx)
.addr = IPV6_DIRECT_ROUTING,
};
struct ipv6_ct_tuple tuple = {};
struct trace_ctx trace = {
.reason = (enum trace_reason)CT_NEW,
.monitor = TRACE_PAYLOAD_LEN,
};
int ret, l4_off, oif = 0;
void *data, *data_end;
struct ipv6hdr *ip6;
Expand Down Expand Up @@ -1016,7 +1021,7 @@ int tail_nodeport_nat_egress_ipv6(struct __ctx_buff *ctx)
tuple.flags = TUPLE_F_OUT;

ret = __snat_v6_nat(ctx, &tuple, l4_off, ACTION_CREATE, true,
&target, &ext_err);
&target, &trace, &ext_err);
if (IS_ERR(ret))
goto drop_err;

Expand All @@ -1033,8 +1038,9 @@ int tail_nodeport_nat_egress_ipv6(struct __ctx_buff *ctx)
tunnel_endpoint,
WORLD_ID,
dst_sec_identity,
(enum trace_reason)CT_NEW,
TRACE_PAYLOAD_LEN, &oif);
trace.reason,
trace.monitor,
&oif);
if (IS_ERR(ret))
goto drop_err;

Expand Down Expand Up @@ -1463,6 +1469,10 @@ int tail_rev_nodeport_lb6(struct __ctx_buff *ctx)
__section_tail(CILIUM_MAP_CALLS, CILIUM_CALL_IPV6_NODEPORT_SNAT_FWD)
int tail_handle_snat_fwd_ipv6(struct __ctx_buff *ctx)
{
struct trace_ctx trace = {
.reason = TRACE_REASON_UNKNOWN,
.monitor = 0,
};
enum trace_point obs_point;
int ret;
__s8 ext_err = 0;
Expand All @@ -1473,12 +1483,12 @@ int tail_handle_snat_fwd_ipv6(struct __ctx_buff *ctx)
obs_point = TRACE_TO_NETWORK;
#endif

ret = nodeport_snat_fwd_ipv6(ctx, &ext_err);
ret = nodeport_snat_fwd_ipv6(ctx, &trace, &ext_err);
if (IS_ERR(ret))
return send_drop_notify_error_ext(ctx, 0, ret, ext_err,
CTX_ACT_DROP, METRIC_EGRESS);

send_trace_notify(ctx, obs_point, 0, 0, 0, 0, TRACE_REASON_UNKNOWN, 0);
send_trace_notify(ctx, obs_point, 0, 0, 0, 0, trace.reason, trace.monitor);

return ret;
}
Expand Down Expand Up @@ -1582,6 +1592,7 @@ nodeport_has_nat_conflict_ipv4(const struct iphdr *ip4 __maybe_unused,

static __always_inline int nodeport_snat_fwd_ipv4(struct __ctx_buff *ctx,
__u32 cluster_id __maybe_unused,
struct trace_ctx *trace,
__s8 *ext_err)
{
struct ipv4_nat_target target = {
Expand Down Expand Up @@ -1615,7 +1626,7 @@ static __always_inline int nodeport_snat_fwd_ipv4(struct __ctx_buff *ctx,

apply_snat:
ret = snat_v4_nat(ctx, &tuple, l4_off, ipv4_has_l4_header(ip4),
&target, ext_err);
&target, trace, ext_err);

out:
if (ret == NAT_PUNT_TO_STACK)
Expand Down Expand Up @@ -2401,6 +2412,10 @@ int tail_nodeport_nat_egress_ipv4(struct __ctx_buff *ctx)
.addr = IPV4_DIRECT_ROUTING,
};
struct ipv4_ct_tuple tuple = {};
struct trace_ctx trace = {
.reason = (enum trace_reason)CT_NEW,
.monitor = TRACE_PAYLOAD_LEN,
};
int ret, l4_off, oif = 0;
void *data, *data_end;
struct iphdr *ip4;
Expand Down Expand Up @@ -2437,7 +2452,7 @@ int tail_nodeport_nat_egress_ipv4(struct __ctx_buff *ctx)
tuple.flags = TUPLE_F_OUT;

ret = __snat_v4_nat(ctx, &tuple, ipv4_has_l4_header(ip4), l4_off,
ACTION_CREATE, true, &target, &ext_err);
ACTION_CREATE, true, &target, &trace, &ext_err);
if (IS_ERR(ret))
goto drop_err;

Expand All @@ -2461,8 +2476,9 @@ int tail_nodeport_nat_egress_ipv4(struct __ctx_buff *ctx)
tunnel_endpoint,
WORLD_ID,
dst_sec_identity,
(enum trace_reason)CT_NEW,
TRACE_PAYLOAD_LEN, &oif);
trace.reason,
trace.monitor,
&oif);
if (IS_ERR(ret))
goto drop_err;

Expand Down Expand Up @@ -2929,6 +2945,10 @@ __section_tail(CILIUM_MAP_CALLS, CILIUM_CALL_IPV4_NODEPORT_SNAT_FWD)
int tail_handle_snat_fwd_ipv4(struct __ctx_buff *ctx)
{
__u32 cluster_id = ctx_load_meta(ctx, CB_CLUSTER_ID_EGRESS);
struct trace_ctx trace = {
.reason = TRACE_REASON_UNKNOWN,
.monitor = 0,
};
enum trace_point obs_point;
int ret;
__s8 ext_err = 0;
Expand All @@ -2941,12 +2961,12 @@ int tail_handle_snat_fwd_ipv4(struct __ctx_buff *ctx)
obs_point = TRACE_TO_NETWORK;
#endif

ret = nodeport_snat_fwd_ipv4(ctx, cluster_id, &ext_err);
ret = nodeport_snat_fwd_ipv4(ctx, cluster_id, &trace, &ext_err);
if (IS_ERR(ret))
return send_drop_notify_error_ext(ctx, 0, ret, ext_err,
CTX_ACT_DROP, METRIC_EGRESS);

send_trace_notify(ctx, obs_point, 0, 0, 0, 0, TRACE_REASON_UNKNOWN, 0);
send_trace_notify(ctx, obs_point, 0, 0, 0, 0, trace.reason, trace.monitor);

return ret;
}
Expand Down
12 changes: 8 additions & 4 deletions bpf/tests/bpf_nat_tests.c
Original file line number Diff line number Diff line change
Expand Up @@ -568,6 +568,7 @@ int test_nat4_icmp_error_tcp_egress(__maybe_unused struct __ctx_buff *ctx)
assert(ret == 0);

struct ipv4_ct_tuple icmp_tuple = {};
struct trace_ctx trace;
void *data, *data_end;
struct iphdr *ip4;
int l4_off;
Expand All @@ -580,7 +581,7 @@ int test_nat4_icmp_error_tcp_egress(__maybe_unused struct __ctx_buff *ctx)
* snat_v4_nat().
*/
ret = snat_v4_nat(ctx, &icmp_tuple, l4_off, ipv4_has_l4_header(ip4),
&target, NULL);
&target, &trace, NULL);
assert(ret == 0);

__u16 proto;
Expand Down Expand Up @@ -682,6 +683,7 @@ int test_nat4_icmp_error_udp_egress(__maybe_unused struct __ctx_buff *ctx)
assert(ret == 0);

struct ipv4_ct_tuple icmp_tuple = {};
struct trace_ctx trace;
void *data, *data_end;
struct iphdr *ip4;
int l4_off;
Expand All @@ -694,7 +696,7 @@ int test_nat4_icmp_error_udp_egress(__maybe_unused struct __ctx_buff *ctx)
* snat_v4_nat().
*/
ret = snat_v4_nat(ctx, &icmp_tuple, l4_off, ipv4_has_l4_header(ip4),
&target, NULL);
&target, &trace, NULL);
assert(ret == 0);

__u16 proto;
Expand Down Expand Up @@ -795,6 +797,7 @@ int test_nat4_icmp_error_icmp_egress(__maybe_unused struct __ctx_buff *ctx)
assert(ret == 0);

struct ipv4_ct_tuple icmp_tuple = {};
struct trace_ctx trace;
void *data, *data_end;
struct iphdr *ip4;
int l4_off;
Expand All @@ -807,7 +810,7 @@ int test_nat4_icmp_error_icmp_egress(__maybe_unused struct __ctx_buff *ctx)
* snat_v4_nat().
*/
ret = snat_v4_nat(ctx, &icmp_tuple, l4_off, ipv4_has_l4_header(ip4),
&target, NULL);
&target, &trace, NULL);
assert(ret == 0);

__u16 proto;
Expand Down Expand Up @@ -897,6 +900,7 @@ int test_nat4_icmp_error_sctp_egress(__maybe_unused struct __ctx_buff *ctx)
assert(ret == 0);

struct ipv4_ct_tuple icmp_tuple = {};
struct trace_ctx trace;
void *data, *data_end;
struct iphdr *ip4;
int l4_off;
Expand All @@ -909,7 +913,7 @@ int test_nat4_icmp_error_sctp_egress(__maybe_unused struct __ctx_buff *ctx)
* snat_v4_nat().
*/
ret = snat_v4_nat(ctx, &icmp_tuple, l4_off, ipv4_has_l4_header(ip4),
&target, NULL);
&target, &trace, NULL);
assert(ret == 0);

__u16 proto;
Expand Down

0 comments on commit a0d8cf9

Please sign in to comment.