Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[v1.14] bpf: EgressGW-related tracing improvements #27524

Merged
merged 3 commits into from
Aug 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
94 changes: 40 additions & 54 deletions bpf/lib/nat.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "icmp6.h"
#include "nat_46x64.h"
#include "stubs.h"
#include "trace.h"

enum nat_dir {
NAT_DIR_EGRESS = TUPLE_F_OUT,
Expand Down Expand Up @@ -101,6 +102,7 @@ struct ipv4_nat_target {
bool from_local_endpoint;
bool egress_gateway; /* NAT is needed because of an egress gateway policy */
__u32 cluster_id;
bool needs_ct;
};

#if defined(ENABLE_IPV4) && defined(ENABLE_NODEPORT)
Expand Down Expand Up @@ -258,31 +260,6 @@ static __always_inline int snat_v4_new_mapping(struct __ctx_buff *ctx,
return 0;
}

static __always_inline bool
snat_v4_needs_ct(const struct ipv4_ct_tuple *tuple,
const struct ipv4_nat_target *target)
{
if (tuple->saddr == target->addr) {
/* Host-local connection. */
return true;
}

#if defined(ENABLE_EGRESS_GATEWAY)
/* Track egress gateway connections, but only if they are related to a
* remote endpoint (if the endpoint is local then the connection is
* already tracked).
*/
if (target->egress_gateway && !target->from_local_endpoint) {
/* Track established egress gateway connections to extend the
* CT entry expiration timeout.
*/
return true;
}
#endif

return false;
}

static __always_inline int
snat_v4_nat_handle_mapping(struct __ctx_buff *ctx,
struct ipv4_ct_tuple *tuple,
Expand All @@ -292,22 +269,21 @@ snat_v4_nat_handle_mapping(struct __ctx_buff *ctx,
struct ipv4_nat_entry *tmp,
__u32 off,
const struct ipv4_nat_target *target,
struct trace_ctx *trace,
__s8 *ext_err)
{
bool needs_ct;
bool needs_ct = target->needs_ct;
void *map;

map = get_cluster_snat_map_v4(target->cluster_id);
if (!map)
return DROP_SNAT_NO_MAP_FOUND;

*state = __snat_lookup(map, tuple);
needs_ct = *state ? (*state)->common.needs_ct : snat_v4_needs_ct(tuple, target);

if (needs_ct) {
struct ipv4_ct_tuple tuple_snat;
struct ct_state ct_state = {};
__u32 monitor = 0;
int ret;

memcpy(&tuple_snat, tuple, sizeof(tuple_snat));
Expand All @@ -319,10 +295,11 @@ snat_v4_nat_handle_mapping(struct __ctx_buff *ctx,

ret = ct_lazy_lookup4(get_ct_map4(&tuple_snat), &tuple_snat,
ctx, off, has_l4_header, ct_action, CT_EGRESS,
SCOPE_FORWARD, &ct_state, &monitor);
SCOPE_FORWARD, &ct_state, &trace->monitor);
if (ret < 0)
return ret;

trace->reason = (enum trace_reason)ret;
if (ret == CT_NEW) {
ret = ct_create4(get_ct_map4(&tuple_snat), NULL,
&tuple_snat, ctx, CT_EGRESS,
Expand Down Expand Up @@ -734,6 +711,8 @@ static __always_inline bool snat_v4_prepare_state(struct __ctx_buff *ctx,
#if defined(TUNNEL_MODE) && defined(IS_BPF_OVERLAY)
if (ip4->saddr == IPV4_GATEWAY) {
target->addr = IPV4_GATEWAY;
target->needs_ct = true;

return true;
}

Expand All @@ -752,11 +731,15 @@ static __always_inline bool snat_v4_prepare_state(struct __ctx_buff *ctx,
if (DIRECT_ROUTING_DEV_IFINDEX == NATIVE_DEV_IFINDEX &&
ip4->saddr == IPV4_DIRECT_ROUTING) {
target->addr = IPV4_DIRECT_ROUTING;
target->needs_ct = true;

return true;
}
# ifdef ENABLE_MASQUERADE_IPV4
if (ip4->saddr == IPV4_MASQUERADE) {
target->addr = IPV4_MASQUERADE;
target->needs_ct = true;

return true;
}
# endif /* ENABLE_MASQUERADE_IPV4 */
Expand Down Expand Up @@ -817,6 +800,9 @@ static __always_inline bool snat_v4_prepare_state(struct __ctx_buff *ctx,

if (egress_gw_snat_needed(ip4, &target->addr)) {
target->egress_gateway = true;
/* If the endpoint is local, then the connection is already tracked. */
if (!local_ep)
target->needs_ct = true;

return true;
}
Expand Down Expand Up @@ -961,14 +947,15 @@ snat_v4_nat_handle_icmp_frag_needed(struct __ctx_buff *ctx, __u64 off,
static __always_inline int
__snat_v4_nat(struct __ctx_buff *ctx, struct ipv4_ct_tuple *tuple,
bool has_l4_header, int l4_off, enum ct_action action,
bool update_tuple,
const struct ipv4_nat_target *target, __s8 *ext_err)
bool update_tuple, const struct ipv4_nat_target *target,
struct trace_ctx *trace, __s8 *ext_err)
{
struct ipv4_nat_entry *state, tmp;
int ret;

ret = snat_v4_nat_handle_mapping(ctx, tuple, has_l4_header, action,
&state, &tmp, l4_off, target, ext_err);
&state, &tmp, l4_off, target,
trace, ext_err);
if (ret > 0)
return CTX_ACT_OK;
if (ret < 0)
Expand All @@ -985,7 +972,8 @@ __snat_v4_nat(struct __ctx_buff *ctx, struct ipv4_ct_tuple *tuple,
}

static __always_inline __maybe_unused int
snat_v4_nat(struct __ctx_buff *ctx, const struct ipv4_nat_target *target, __s8 *ext_err)
snat_v4_nat(struct __ctx_buff *ctx, const struct ipv4_nat_target *target,
struct trace_ctx *trace, __s8 *ext_err)
{
enum ct_action ct_action = ACTION_UNSPEC;
struct icmphdr icmphdr __align_stack_8;
Expand Down Expand Up @@ -1052,7 +1040,7 @@ snat_v4_nat(struct __ctx_buff *ctx, const struct ipv4_nat_target *target, __s8 *
return NAT_PUNT_TO_STACK;

return __snat_v4_nat(ctx, &tuple, has_l4_header, off, ct_action,
false, target, ext_err);
false, target, trace, ext_err);
}

static __always_inline __maybe_unused int
Expand Down Expand Up @@ -1241,6 +1229,7 @@ struct ipv6_nat_target {
const __u16 max_port; /* host endianness */
bool src_from_world;
bool from_local_endpoint;
bool needs_ct;
};

#if defined(ENABLE_IPV6) && defined(ENABLE_NODEPORT)
Expand Down Expand Up @@ -1377,18 +1366,6 @@ static __always_inline int snat_v6_new_mapping(struct __ctx_buff *ctx,
return 0;
}

static __always_inline bool
snat_v6_needs_ct(struct ipv6_ct_tuple *tuple,
const struct ipv6_nat_target *target)
{
if (ipv6_addr_equals(&tuple->saddr, &target->addr)) {
/* Host-local connection. */
return true;
}

return false;
}

static __always_inline int
snat_v6_nat_handle_mapping(struct __ctx_buff *ctx,
struct ipv6_ct_tuple *tuple,
Expand All @@ -1397,17 +1374,16 @@ snat_v6_nat_handle_mapping(struct __ctx_buff *ctx,
struct ipv6_nat_entry *tmp,
__u32 off,
const struct ipv6_nat_target *target,
struct trace_ctx *trace,
__s8 *ext_err)
{
bool needs_ct;
bool needs_ct = target->needs_ct;

*state = snat_v6_lookup(tuple);
needs_ct = *state ? (*state)->common.needs_ct : snat_v6_needs_ct(tuple, target);

if (needs_ct) {
struct ipv6_ct_tuple tuple_snat;
struct ct_state ct_state = {};
__u32 monitor = 0;
int ret;

memcpy(&tuple_snat, tuple, sizeof(tuple_snat));
Expand All @@ -1418,10 +1394,11 @@ snat_v6_nat_handle_mapping(struct __ctx_buff *ctx,

ret = ct_lazy_lookup6(get_ct_map6(&tuple_snat), &tuple_snat,
ctx, off, ct_action, CT_EGRESS,
SCOPE_FORWARD, &ct_state, &monitor);
SCOPE_FORWARD, &ct_state, &trace->monitor);
if (ret < 0)
return ret;

trace->reason = (enum trace_reason)ret;
if (ret == CT_NEW) {
ret = ct_create6(get_ct_map6(&tuple_snat), NULL,
&tuple_snat, ctx, CT_EGRESS,
Expand Down Expand Up @@ -1723,19 +1700,25 @@ snat_v6_prepare_state(struct __ctx_buff *ctx, struct ipv6_nat_target *target)
BPF_V6(router_ip, ROUTER_IP);
if (ipv6_addr_equals((union v6addr *)&ip6->saddr, &router_ip)) {
ipv6_addr_copy(&target->addr, &router_ip);
target->needs_ct = true;

return true;
}
#else
/* See comment in snat_v4_prepare_state(). */
if (DIRECT_ROUTING_DEV_IFINDEX == NATIVE_DEV_IFINDEX &&
ipv6_addr_equals((union v6addr *)&ip6->saddr, &dr_addr)) {
ipv6_addr_copy(&target->addr, &dr_addr);
target->needs_ct = true;

return true;
}
# ifdef ENABLE_MASQUERADE_IPV6 /* SNAT local pod to world packets */
BPF_V6(masq_addr, IPV6_MASQUERADE);
if (ipv6_addr_equals((union v6addr *)&ip6->saddr, &masq_addr)) {
ipv6_addr_copy(&target->addr, &masq_addr);
target->needs_ct = true;

return true;
}
# endif /* ENABLE_MASQUERADE_IPV6 */
Expand Down Expand Up @@ -1830,13 +1813,14 @@ snat_v6_prepare_state(struct __ctx_buff *ctx, struct ipv6_nat_target *target)
static __always_inline int
__snat_v6_nat(struct __ctx_buff *ctx, struct ipv6_ct_tuple *tuple,
int l4_off, enum ct_action action, bool update_tuple,
const struct ipv6_nat_target *target, __s8 *ext_err)
const struct ipv6_nat_target *target,
struct trace_ctx *trace, __s8 *ext_err)
{
struct ipv6_nat_entry *state, tmp;
int ret;

ret = snat_v6_nat_handle_mapping(ctx, tuple, action, &state, &tmp,
l4_off, target, ext_err);
l4_off, target, trace, ext_err);
if (ret > 0)
return CTX_ACT_OK;
if (ret < 0)
Expand All @@ -1853,7 +1837,8 @@ __snat_v6_nat(struct __ctx_buff *ctx, struct ipv6_ct_tuple *tuple,
}

static __always_inline __maybe_unused int
snat_v6_nat(struct __ctx_buff *ctx, const struct ipv6_nat_target *target, __s8 *ext_err)
snat_v6_nat(struct __ctx_buff *ctx, const struct ipv6_nat_target *target,
struct trace_ctx *trace, __s8 *ext_err)
{
enum ct_action ct_action = ACTION_UNSPEC;
struct icmp6hdr icmp6hdr __align_stack_8;
Expand Down Expand Up @@ -1920,7 +1905,8 @@ snat_v6_nat(struct __ctx_buff *ctx, const struct ipv6_nat_target *target, __s8 *
if (snat_v6_nat_can_skip(target, &tuple, icmp_echoreply))
return NAT_PUNT_TO_STACK;

return __snat_v6_nat(ctx, &tuple, off, ct_action, false, target, ext_err);
return __snat_v6_nat(ctx, &tuple, off, ct_action, false, target,
trace, ext_err);
}

static __always_inline __maybe_unused int
Expand Down