Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extend the CT expiration timeout for egress gateway packets #24905

Merged
merged 3 commits into from
Apr 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
18 changes: 9 additions & 9 deletions bpf/lib/nat.h
Original file line number Diff line number Diff line change
Expand Up @@ -319,17 +319,17 @@ static __always_inline int snat_v4_track_connection(struct __ctx_buff *ctx,

if (state && state->common.host_local) {
needs_ct = true;
} else if (!state && dir == NAT_DIR_EGRESS) {
if (tuple->saddr == target->addr)
needs_ct = true;
#if defined(ENABLE_EGRESS_GATEWAY)
/* Track egress gateway connections, but only if they are
* related to a remote endpoint (if the endpoint is local then
* the connection is already tracked).
*/
else if (target->egress_gateway && !target->from_local_endpoint)
needs_ct = true;
/* Track egress gateway connections, but only if they are related to a
* remote endpoint (if the endpoint is local then the connection is
* already tracked). NAT_DIR_EGRESS is implied. Do ct_lookup4 also for
* established connections to extend the expiration timeout.
*/
} else if (target->egress_gateway && !target->from_local_endpoint) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: I think we can get rid of all these brackes

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Speaking of the next time we touch this code, I have a follow-up WIP pull request, and this snippet looks better there:

5e81d6d#diff-ef710180f228e8f6bbad249e2670dbc42a54f45da3bbc4c5aebbb1f3865979e9R309-R325

(Although still with brackets, but the comments naturally fit inside.)

needs_ct = true;
#endif
} else if (!state && dir == NAT_DIR_EGRESS && tuple->saddr == target->addr) {
needs_ct = true;
}
if (!needs_ct)
return 0;
Expand Down
265 changes: 182 additions & 83 deletions bpf/tests/tc_egressgw_snat.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,23 +35,21 @@ static volatile const __u8 *ext_svc_mac = mac_two;
#include "bpf_host.c"

#define TO_NETDEV 0
#define FROM_NETDEV 1

struct {
__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
__uint(key_size, sizeof(__u32));
__uint(max_entries, 1);
__uint(max_entries, 2);
__array(values, int());
} entry_call_map __section(".maps") = {
.values = {
[TO_NETDEV] = &cil_to_netdev,
[FROM_NETDEV] = &cil_from_netdev,
},
};

/* Test that a packet matching an egress gateway policy on the to-netdev program
* gets correctly SNATed with the egress IP of the policy.
*/
PKTGEN("tc", "tc_egressgw_snat")
int egressgw_snat_pktgen(struct __ctx_buff *ctx)
static __always_inline int egressgw_snat_pktgen(struct __ctx_buff *ctx, bool reply)
{
struct pktgen builder;
struct tcphdr *l4;
Expand All @@ -67,23 +65,48 @@ int egressgw_snat_pktgen(struct __ctx_buff *ctx)
if (!l2)
return TEST_ERROR;

ethhdr__set_macs(l2, (__u8 *)client_mac, (__u8 *)ext_svc_mac);
if (reply)
ethhdr__set_macs(l2, (__u8 *)ext_svc_mac, (__u8 *)client_mac);
else
ethhdr__set_macs(l2, (__u8 *)client_mac, (__u8 *)ext_svc_mac);

/* Push IPv4 header */
l3 = pktgen__push_default_iphdr(&builder);
if (!l3)
return TEST_ERROR;

l3->saddr = CLIENT_IP;
l3->daddr = EXTERNAL_SVC_IP;
if (reply) {
l3->saddr = EXTERNAL_SVC_IP;
l3->daddr = EGRESS_IP;
} else {
l3->saddr = CLIENT_IP;
l3->daddr = EXTERNAL_SVC_IP;
}

/* Push TCP header */
l4 = pktgen__push_default_tcphdr(&builder);
if (!l4)
return TEST_ERROR;

l4->source = CLIENT_PORT;
l4->dest = EXTERNAL_SVC_PORT;
if (reply) {
/* Get the destination port from the NAT entry. */
struct ipv4_ct_tuple tuple = {
.saddr = CLIENT_IP,
.daddr = EXTERNAL_SVC_IP,
.dport = EXTERNAL_SVC_PORT,
.sport = CLIENT_PORT,
.nexthdr = IPPROTO_TCP,
};
struct ipv4_nat_entry *nat_entry = __snat_lookup(&SNAT_MAPPING_IPV4, &tuple);

if (!nat_entry)
return TEST_ERROR;
l4->source = EXTERNAL_SVC_PORT;
l4->dest = nat_entry->to_sport;
} else {
l4->source = CLIENT_PORT;
l4->dest = EXTERNAL_SVC_PORT;
}

data = pktgen__push_data(&builder, default_data, sizeof(default_data));
if (!data)
Expand All @@ -95,30 +118,8 @@ int egressgw_snat_pktgen(struct __ctx_buff *ctx)
return 0;
}

SETUP("tc", "tc_egressgw_snat")
int egressgw_snat_setup(struct __ctx_buff *ctx)
{
struct egress_gw_policy_key in_key = {
.lpm_key = { EGRESS_PREFIX_LEN(24), {} },
.saddr = CLIENT_IP,
.daddr = EXTERNAL_SVC_IP & 0Xffffff,
};

struct egress_gw_policy_entry in_val = {
.egress_ip = EGRESS_IP,
.gateway_ip = NODE_IP,
};

map_update_elem(&EGRESS_POLICY_MAP, &in_key, &in_val, 0);

/* Jump into the entrypoint */
tail_call_static(ctx, &entry_call_map, TO_NETDEV);
/* Fail if we didn't jump */
return TEST_ERROR;
}

CHECK("tc", "tc_egressgw_snat")
int egressgw_snat_check(const struct __ctx_buff *ctx)
static __always_inline int egressgw_snat_check(const struct __ctx_buff *ctx, bool reply,
__u64 tx_packets, __u64 rx_packets)
{
void *data, *data_end;
__u32 *status_code;
Expand Down Expand Up @@ -149,87 +150,185 @@ int egressgw_snat_check(const struct __ctx_buff *ctx)
if ((void *)l4 + sizeof(struct tcphdr) > data_end)
test_fatal("l4 out of bounds");

if (memcmp(l2->h_source, (__u8 *)client_mac, ETH_ALEN) != 0)
test_fatal("src MAC is not the client MAC")
if (reply) {
if (memcmp(l2->h_source, (__u8 *)ext_svc_mac, ETH_ALEN) != 0)
test_fatal("src MAC is not the external svc MAC")

if (memcmp(l2->h_dest, (__u8 *)ext_svc_mac, ETH_ALEN) != 0)
test_fatal("dst MAC is not the external svc MAC")
if (memcmp(l2->h_dest, (__u8 *)client_mac, ETH_ALEN) != 0)
test_fatal("dst MAC is not the client MAC")

if (l3->saddr != EGRESS_IP)
test_fatal("src IP hasn't been NATed to egress gateway IP");
if (l3->saddr != EXTERNAL_SVC_IP)
test_fatal("src IP has changed");

if (l3->daddr != EXTERNAL_SVC_IP)
test_fatal("dst IP has changed");
if (l3->daddr != CLIENT_IP)
test_fatal("dst IP hasn't been revSNATed to client IP");
} else {
if (memcmp(l2->h_source, (__u8 *)client_mac, ETH_ALEN) != 0)
test_fatal("src MAC is not the client MAC")

if (memcmp(l2->h_dest, (__u8 *)ext_svc_mac, ETH_ALEN) != 0)
test_fatal("dst MAC is not the external svc MAC")

if (l3->saddr != EGRESS_IP)
test_fatal("src IP hasn't been NATed to egress gateway IP");

if (l3->daddr != EXTERNAL_SVC_IP)
test_fatal("dst IP has changed");
}

/* Lookup the SNAT mapping for the original packet to determine the new source port */
struct ipv4_ct_tuple tuple = {
.daddr = EXTERNAL_SVC_IP,
.saddr = CLIENT_IP,
.daddr = CLIENT_IP,
.saddr = EXTERNAL_SVC_IP,
.dport = EXTERNAL_SVC_PORT,
.sport = CLIENT_PORT,
.nexthdr = IPPROTO_TCP,
.flags = TUPLE_F_OUT,
};
struct ct_entry *ct_entry = map_lookup_elem(get_ct_map4(&tuple), &tuple);

if (!ct_entry)
test_fatal("no CT entry found");

if (ct_entry->tx_packets != tx_packets)
test_fatal("bad TX packet count (expected %u, actual %u)",
tx_packets, ct_entry->tx_packets)
if (ct_entry->rx_packets != rx_packets)
#ifdef ISSUE_25110_FIXED
/* This test fails until this issue is fixed:
* https://github.com/cilium/cilium/issues/25110
*/
test_fatal("bad RX packet count (expected %u, actual %u)",
rx_packets, ct_entry->rx_packets)
#else
;
#endif

tuple.saddr = CLIENT_IP;
tuple.daddr = EXTERNAL_SVC_IP;

struct ipv4_nat_entry *nat_entry = __snat_lookup(&SNAT_MAPPING_IPV4, &tuple);

if (!nat_entry)
test_fatal("could not find a NAT entry for the packet");

if (l4->source != nat_entry->to_sport)
test_fatal("src TCP port hasn't been NATed to egress gateway port");
if (reply) {
if (l4->source != EXTERNAL_SVC_PORT)
test_fatal("src port has changed");

if (l4->dest != EXTERNAL_SVC_PORT)
test_fatal("dst port has changed");
if (l4->dest != CLIENT_PORT)
test_fatal("dst TCP port hasn't been revSNATed to client port");
} else {
if (l4->source != nat_entry->to_sport)
test_fatal("src TCP port hasn't been NATed to egress gateway port");

if (l4->dest != EXTERNAL_SVC_PORT)
test_fatal("dst port has changed");
}

test_finish();
}

/* Test that a packet matching an excluded CIDR egress gateway policy on the
* to-netdev program does not get SNATed with the egress IP of the policy.
/* Test that a packet matching an egress gateway policy on the to-netdev program
* gets correctly SNATed with the egress IP of the policy.
*/
PKTGEN("tc", "tc_egressgw_skip_excluded_cidr_snat")
int egressgw_skip_excluded_cidr_snat_pktgen(struct __ctx_buff *ctx)
PKTGEN("tc", "tc_egressgw_snat1")
int egressgw_snat1_pktgen(struct __ctx_buff *ctx)
{
struct pktgen builder;
struct tcphdr *l4;
struct ethhdr *l2;
struct iphdr *l3;
void *data;
return egressgw_snat_pktgen(ctx, false);
jibi marked this conversation as resolved.
Show resolved Hide resolved
}

/* Init packet builder */
pktgen__init(&builder, ctx);
SETUP("tc", "tc_egressgw_snat1")
int egressgw_snat1_setup(struct __ctx_buff *ctx)
{
struct egress_gw_policy_key in_key = {
.lpm_key = { EGRESS_PREFIX_LEN(24), {} },
.saddr = CLIENT_IP,
.daddr = EXTERNAL_SVC_IP & 0Xffffff,
};

/* Push ethernet header */
l2 = pktgen__push_ethhdr(&builder);
if (!l2)
return TEST_ERROR;
struct egress_gw_policy_entry in_val = {
.egress_ip = EGRESS_IP,
.gateway_ip = NODE_IP,
};

ethhdr__set_macs(l2, (__u8 *)client_mac, (__u8 *)ext_svc_mac);
map_update_elem(&EGRESS_POLICY_MAP, &in_key, &in_val, 0);

/* Push IPv4 header */
l3 = pktgen__push_default_iphdr(&builder);
if (!l3)
return TEST_ERROR;
/* Jump into the entrypoint */
tail_call_static(ctx, &entry_call_map, TO_NETDEV);
/* Fail if we didn't jump */
return TEST_ERROR;
}

l3->saddr = CLIENT_IP;
l3->daddr = EXTERNAL_SVC_IP;
CHECK("tc", "tc_egressgw_snat1")
int egressgw_snat1_check(const struct __ctx_buff *ctx)
{
return egressgw_snat_check(ctx, false, 1, 0);
}

/* Push TCP header */
l4 = pktgen__push_default_tcphdr(&builder);
if (!l4)
return TEST_ERROR;
/* Test that a packet matching an egress gateway policy on the from-netdev program
* gets correctly revSNATed and connection tracked.
*/
PKTGEN("tc", "tc_egressgw_snat2_reply")
int egressgw_snat2_reply_pktgen(struct __ctx_buff *ctx)
{
return egressgw_snat_pktgen(ctx, true);
}

l4->source = CLIENT_PORT;
l4->dest = EXTERNAL_SVC_PORT;
SETUP("tc", "tc_egressgw_snat2_reply")
int egressgw_snat2_reply_setup(struct __ctx_buff *ctx)
{
/* Jump into the entrypoint */
tail_call_static(ctx, &entry_call_map, FROM_NETDEV);
/* Fail if we didn't jump */
return TEST_ERROR;
}

data = pktgen__push_data(&builder, default_data, sizeof(default_data));
if (!data)
return TEST_ERROR;
CHECK("tc", "tc_egressgw_snat2_reply")
int egressgw_snat2_reply_check(const struct __ctx_buff *ctx)
{
return egressgw_snat_check(ctx, true, 1, 1);
}

/* Calc lengths, set protocol fields and calc checksums */
pktgen__finish(&builder);
PKTGEN("tc", "tc_egressgw_snat3")
int egressgw_snat3_pktgen(struct __ctx_buff *ctx)
{
return egressgw_snat_pktgen(ctx, false);
}

return 0;
SETUP("tc", "tc_egressgw_snat3")
int egressgw_snat3_setup(struct __ctx_buff *ctx)
{
/* Jump into the entrypoint */
tail_call_static(ctx, &entry_call_map, TO_NETDEV);
/* Fail if we didn't jump */
return TEST_ERROR;
}

CHECK("tc", "tc_egressgw_snat3")
int egressgw_snat3_check(struct __ctx_buff *ctx)
{
int ret = egressgw_snat_check(ctx, false, 2, 1);

struct egress_gw_policy_key in_key = {
.lpm_key = { EGRESS_PREFIX_LEN(24), {} },
.saddr = CLIENT_IP,
.daddr = EXTERNAL_SVC_IP & 0Xffffff,
};

/* Remove the policy to eliminate interference with other tests. */
map_delete_elem(&EGRESS_POLICY_MAP, &in_key);

return ret;
Comment on lines +311 to +322
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For my understanding, is that a fix for the existing excluded-CIDR test below? We're using the same key, so it should just overwrite the existing policy entry - no?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We're using the same key, so it should just overwrite the existing policy entry - no?

I noticed that the excluded-CIDR test below runs first, not last. It's probably because of the alphabetic order.

}

/* Test that a packet matching an excluded CIDR egress gateway policy on the
* to-netdev program does not get SNATed with the egress IP of the policy.
*/
PKTGEN("tc", "tc_egressgw_skip_excluded_cidr_snat")
int egressgw_skip_excluded_cidr_snat_pktgen(struct __ctx_buff *ctx)
{
return egressgw_snat_pktgen(ctx, false);
}

SETUP("tc", "tc_egressgw_skip_excluded_cidr_snat")
Expand Down