Skip to content

Commit

Permalink
bpf: ct: introduce scope parameter for CT lookup
Browse files Browse the repository at this point in the history
A CT lookup is usually done in two directions - first the reverse
direction (to match replies), then the actual direction. Currently the only
exception is CT_SERVICE lookups, where we only check the reverse direction.

Control this behaviour independently of the CT_SERVICE type, so that we can
introduce more uni-directional lookups with subsequent patches.

No functional change intended.

Note that users of SCOPE_REVERSE need to tolerate that their passed-in
CT tuple won't be reversed after the CT lookup.

Signed-off-by: Julian Wiedmann <jwi@isovalent.com>
  • Loading branch information
julianwiedmann committed Jun 5, 2023
1 parent 8c36f8a commit bde55cd
Show file tree
Hide file tree
Showing 6 changed files with 81 additions and 60 deletions.
108 changes: 62 additions & 46 deletions bpf/lib/conntrack.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,12 @@ enum {
ACTION_CLOSE,
};

enum ct_scope {
SCOPE_FORWARD,
SCOPE_REVERSE,
SCOPE_BIDIR,
};

#ifdef ENABLE_IPV4
struct ct_buffer4 {
struct ipv4_ct_tuple tuple;
Expand Down Expand Up @@ -365,8 +371,8 @@ ipv6_ct_tuple_swap_ports(struct ipv6_ct_tuple *tuple)

static __always_inline int
__ct_lookup6(const void *map, struct ipv6_ct_tuple *tuple, struct __ctx_buff *ctx,
int l4_off, int action, enum ct_dir dir, struct ct_state *ct_state,
__u32 *monitor)
int l4_off, int action, enum ct_dir dir, enum ct_scope scope,
struct ct_state *ct_state, __u32 *monitor)
{
bool is_tcp = tuple->nexthdr == IPPROTO_TCP;
union tcp_flags tcp_flags = { .value = 0 };
Expand All @@ -380,32 +386,37 @@ __ct_lookup6(const void *map, struct ipv6_ct_tuple *tuple, struct __ctx_buff *ct
action = ACTION_CLOSE;
}

/* Lookup the reverse direction
*
* This will find an existing flow in the reverse direction.
* The reverse direction is the one where reverse nat index is stored.
*/
cilium_dbg3(ctx, DBG_CT_LOOKUP6_1, (__u32)tuple->saddr.p4, (__u32)tuple->daddr.p4,
(bpf_ntohs(tuple->sport) << 16) | bpf_ntohs(tuple->dport));
cilium_dbg3(ctx, DBG_CT_LOOKUP6_2, (tuple->nexthdr << 8) | tuple->flags, 0, 0);
ret = __ct_lookup(map, ctx, tuple, action, dir, ct_state, is_tcp,
tcp_flags, monitor);
if (ret != CT_NEW) {
if (likely(ret == CT_ESTABLISHED || ret == CT_REOPENED)) {
if (unlikely(tuple->flags & TUPLE_F_RELATED))
ret = CT_RELATED;
else
ret = CT_REPLY;
cilium_dbg3(ctx, DBG_CT_LOOKUP6_2, (tuple->nexthdr << 8) | tuple->flags,
dir, scope);

switch (scope) {
case SCOPE_REVERSE:
case SCOPE_BIDIR:
/* Lookup in the reverse direction first: */
ret = __ct_lookup(map, ctx, tuple, action, dir, ct_state, is_tcp,
tcp_flags, monitor);
if (ret != CT_NEW) {
if (likely(ret == CT_ESTABLISHED || ret == CT_REOPENED)) {
if (unlikely(tuple->flags & TUPLE_F_RELATED))
ret = CT_RELATED;
else
ret = CT_REPLY;
}
goto out;
}
goto out;
}

/* Lookup entry in forward direction */
if (dir != CT_SERVICE) {
if (scope != SCOPE_BIDIR)
goto out;

/* now lookup in forward direction: */
case SCOPE_FORWARD:
ipv6_ct_tuple_reverse(tuple);
ret = __ct_lookup(map, ctx, tuple, action, dir, ct_state,
is_tcp, tcp_flags, monitor);
}

out:
cilium_dbg(ctx, DBG_CT_VERDICT, ret, ct_state->rev_nat_index);
return ret;
Expand All @@ -415,7 +426,7 @@ __ct_lookup6(const void *map, struct ipv6_ct_tuple *tuple, struct __ctx_buff *ct
static __always_inline int
ct_lazy_lookup6(const void *map, struct ipv6_ct_tuple *tuple,
struct __ctx_buff *ctx, int l4_off, int action, enum ct_dir dir,
struct ct_state *ct_state, __u32 *monitor)
enum ct_scope scope, struct ct_state *ct_state, __u32 *monitor)
{
/* The tuple is created in reverse order initially to find a
* potential reverse flow. This is required because the RELATED
Expand All @@ -433,7 +444,7 @@ ct_lazy_lookup6(const void *map, struct ipv6_ct_tuple *tuple,
else
return DROP_CT_INVALID_HDR;

return __ct_lookup6(map, tuple, ctx, l4_off, action, dir,
return __ct_lookup6(map, tuple, ctx, l4_off, action, dir, scope,
ct_state, monitor);
}

Expand Down Expand Up @@ -518,7 +529,8 @@ static __always_inline int ct_lookup6(const void *map,
return DROP_CT_UNKNOWN_PROTO;
}

return __ct_lookup6(map, tuple, ctx, action, l4_off, dir, ct_state, monitor);
return __ct_lookup6(map, tuple, ctx, action, l4_off, dir, SCOPE_BIDIR,
ct_state, monitor);
}

static __always_inline int
Expand Down Expand Up @@ -709,7 +721,7 @@ ct_is_reply4(const void *map, struct __ctx_buff *ctx, int off,
static __always_inline int
__ct_lookup4(const void *map, struct ipv4_ct_tuple *tuple, struct __ctx_buff *ctx,
int l4_off, bool has_l4_header, int action, enum ct_dir dir,
struct ct_state *ct_state, __u32 *monitor)
enum ct_scope scope, struct ct_state *ct_state, __u32 *monitor)
{
bool is_tcp = tuple->nexthdr == IPPROTO_TCP;
union tcp_flags tcp_flags = { .value = 0 };
Expand All @@ -723,35 +735,39 @@ __ct_lookup4(const void *map, struct ipv4_ct_tuple *tuple, struct __ctx_buff *ct
action = ACTION_CLOSE;
}

/* Lookup the reverse direction
*
* This will find an existing flow in the reverse direction.
*/
#ifndef QUIET_CT
cilium_dbg3(ctx, DBG_CT_LOOKUP4_1, tuple->saddr, tuple->daddr,
(bpf_ntohs(tuple->sport) << 16) | bpf_ntohs(tuple->dport));
cilium_dbg3(ctx, DBG_CT_LOOKUP4_2, (tuple->nexthdr << 8) | tuple->flags, 0, 0);
cilium_dbg3(ctx, DBG_CT_LOOKUP4_2, (tuple->nexthdr << 8) | tuple->flags,
dir, scope);
#endif
ret = __ct_lookup(map, ctx, tuple, action, dir, ct_state, is_tcp,
tcp_flags, monitor);
if (ret != CT_NEW) {
if (likely(ret == CT_ESTABLISHED || ret == CT_REOPENED)) {
if (unlikely(tuple->flags & TUPLE_F_RELATED))
ret = CT_RELATED;
else
ret = CT_REPLY;

switch (scope) {
case SCOPE_REVERSE:
case SCOPE_BIDIR:
/* Lookup in the reverse direction first: */
ret = __ct_lookup(map, ctx, tuple, action, dir, ct_state, is_tcp,
tcp_flags, monitor);
if (ret != CT_NEW) {
if (likely(ret == CT_ESTABLISHED || ret == CT_REOPENED)) {
if (unlikely(tuple->flags & TUPLE_F_RELATED))
ret = CT_RELATED;
else
ret = CT_REPLY;
}
goto out;
}
goto out;
}

relax_verifier();
if (scope != SCOPE_BIDIR)
goto out;

/* Lookup entry in forward direction */
if (dir != CT_SERVICE) {
/* now lookup in forward direction: */
case SCOPE_FORWARD:
ipv4_ct_tuple_reverse(tuple);
ret = __ct_lookup(map, ctx, tuple, action, dir, ct_state,
is_tcp, tcp_flags, monitor);
}

out:
cilium_dbg(ctx, DBG_CT_VERDICT, ret, ct_state->rev_nat_index);
return ret;
Expand Down Expand Up @@ -779,8 +795,8 @@ __ct_lookup4(const void *map, struct ipv4_ct_tuple *tuple, struct __ctx_buff *ct
static __always_inline int
ct_lazy_lookup4(const void *map, struct ipv4_ct_tuple *tuple,
struct __ctx_buff *ctx, int l4_off, bool has_l4_header,
int action, enum ct_dir dir, struct ct_state *ct_state,
__u32 *monitor)
int action, enum ct_dir dir, enum ct_scope scope,
struct ct_state *ct_state, __u32 *monitor)
{
/* The tuple is created in reverse order initially to find a
* potential reverse flow. This is required because the RELATED
Expand All @@ -799,7 +815,7 @@ ct_lazy_lookup4(const void *map, struct ipv4_ct_tuple *tuple,
return DROP_CT_INVALID_HDR;

return __ct_lookup4(map, tuple, ctx, l4_off, has_l4_header,
action, dir, ct_state, monitor);
action, dir, scope, ct_state, monitor);
}

/* Offset must point to IPv4 header */
Expand Down Expand Up @@ -832,7 +848,7 @@ static __always_inline int ct_lookup4(const void *map,
return action;

return __ct_lookup4(map, tuple, ctx, off, has_l4_header,
action, dir, ct_state, monitor);
action, dir, SCOPE_BIDIR, ct_state, monitor);
}

/* Offset must point to IPv6 */
Expand Down
4 changes: 2 additions & 2 deletions bpf/lib/dbg.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ enum {
*/
DBG_CT_LOOKUP4_2, /* arg1: (nexthdr << 8) | flags
* arg2: direction
* arg3: unused
* arg3: scope
*/
DBG_CT_CREATED4, /* arg1: (unused << 16) | rev_nat_index
* arg2: src sec-id
Expand All @@ -72,7 +72,7 @@ enum {
*/
DBG_CT_LOOKUP6_2, /* arg1: (nexthdr << 8) | flags
* arg2: direction
* arg3: unused
* arg3: scope
*/
DBG_CT_CREATED6, /* arg1: (unused << 16) | rev_nat_index
* arg2: src sec-id
Expand Down
5 changes: 3 additions & 2 deletions bpf/lib/lb.h
Original file line number Diff line number Diff line change
Expand Up @@ -854,7 +854,8 @@ static __always_inline int lb6_local(const void *map, struct __ctx_buff *ctx,
return DROP_NO_SERVICE;

/* See lb4_local comments re svc endpoint lookup process */
ret = ct_lazy_lookup6(map, tuple, ctx, l4_off, ACTION_CREATE, CT_SERVICE, state, &monitor);
ret = ct_lazy_lookup6(map, tuple, ctx, l4_off, ACTION_CREATE, CT_SERVICE,
SCOPE_REVERSE, state, &monitor);
switch (ret) {
case CT_NEW:
#ifdef ENABLE_SESSION_AFFINITY
Expand Down Expand Up @@ -1531,7 +1532,7 @@ static __always_inline int lb4_local(const void *map, struct __ctx_buff *ctx,
return DROP_NO_SERVICE;

ret = ct_lazy_lookup4(map, tuple, ctx, l4_off, has_l4_header, ACTION_CREATE,
CT_SERVICE, state, &monitor);
CT_SERVICE, SCOPE_REVERSE, state, &monitor);
switch (ret) {
case CT_NEW:
#ifdef ENABLE_SESSION_AFFINITY
Expand Down
4 changes: 2 additions & 2 deletions bpf/lib/nat.h
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,7 @@ static __always_inline int snat_v4_track_connection(struct __ctx_buff *ctx,
ipv4_ct_tuple_swap_ports(&tmp);

ret = ct_lazy_lookup4(get_ct_map4(&tmp), &tmp, ctx, off, has_l4_header,
ct_action, where, &ct_state, &monitor);
ct_action, where, SCOPE_BIDIR, &ct_state, &monitor);
if (ret < 0) {
return ret;
} else if (ret == CT_NEW) {
Expand Down Expand Up @@ -1479,7 +1479,7 @@ static __always_inline int snat_v6_track_connection(struct __ctx_buff *ctx,
ipv6_ct_tuple_swap_ports(&tmp);

ret = ct_lazy_lookup6(get_ct_map6(&tmp), &tmp, ctx, off, ct_action,
where, &ct_state, &monitor);
where, SCOPE_BIDIR, &ct_state, &monitor);
if (ret < 0) {
return ret;
} else if (ret == CT_NEW) {
Expand Down
19 changes: 11 additions & 8 deletions bpf/lib/nodeport.h
Original file line number Diff line number Diff line change
Expand Up @@ -680,7 +680,7 @@ int tail_nodeport_dsr_ingress_ipv6(struct __ctx_buff *ctx)
}

ret = ct_lazy_lookup6(get_ct_map6(&tuple), &tuple, ctx, l4_off, ACTION_CREATE,
CT_EGRESS, &ct_state, &monitor);
CT_EGRESS, SCOPE_BIDIR, &ct_state, &monitor);
switch (ret) {
case CT_NEW:
case CT_REOPENED:
Expand Down Expand Up @@ -1061,7 +1061,7 @@ static __always_inline int nodeport_lb6(struct __ctx_buff *ctx,
struct ct_state ct_state = {};

ret = ct_lazy_lookup6(get_ct_map6(&tuple), &tuple, ctx, l4_off, ACTION_CREATE,
CT_EGRESS, &ct_state, &monitor);
CT_EGRESS, SCOPE_BIDIR, &ct_state, &monitor);
switch (ret) {
case CT_REPLY:
ipv6_ct_tuple_reverse(&tuple);
Expand Down Expand Up @@ -1153,7 +1153,7 @@ nodeport_rev_dnat_fwd_ipv6(struct __ctx_buff *ctx, struct trace_ctx *trace)
return CTX_ACT_OK;

ret = ct_lazy_lookup6(get_ct_map6(&tuple), &tuple, ctx, l4_off, ACTION_CREATE,
CT_INGRESS, &ct_state, &trace->monitor);
CT_INGRESS, SCOPE_BIDIR, &ct_state, &trace->monitor);
if (ret == CT_REPLY) {
trace->reason = TRACE_REASON_CT_REPLY;

Expand Down Expand Up @@ -1216,7 +1216,7 @@ static __always_inline int rev_nodeport_lb6(struct __ctx_buff *ctx, __s8 *ext_er
goto out;

ret = ct_lazy_lookup6(get_ct_map6(&tuple), &tuple, ctx, l4_off, ACTION_CREATE,
CT_INGRESS, &ct_state, &monitor);
CT_INGRESS, SCOPE_BIDIR, &ct_state, &monitor);
if (ret == CT_REPLY && ct_state.node_port == 1 && ct_state.rev_nat_index != 0) {
ret = lb6_rev_nat(ctx, l4_off, ct_state.rev_nat_index,
&tuple, REV_NAT_F_TUPLE_SADDR);
Expand Down Expand Up @@ -1972,7 +1972,8 @@ int tail_nodeport_dsr_ingress_ipv4(struct __ctx_buff *ctx)
}

ret = ct_lazy_lookup4(get_ct_map4(&tuple), &tuple, ctx, l4_off,
has_l4_header, ACTION_CREATE, CT_EGRESS, &ct_state, &monitor);
has_l4_header, ACTION_CREATE, CT_EGRESS, SCOPE_BIDIR,
&ct_state, &monitor);
switch (ret) {
case CT_NEW:
/* Maybe we can be a bit more selective about CT_REOPENED?
Expand Down Expand Up @@ -2338,7 +2339,8 @@ static __always_inline int nodeport_lb4(struct __ctx_buff *ctx,
struct ct_state ct_state = {};

ret = ct_lazy_lookup4(get_ct_map4(&tuple), &tuple, ctx, l4_off, has_l4_header,
ACTION_CREATE, CT_EGRESS, &ct_state, &monitor);
ACTION_CREATE, CT_EGRESS, SCOPE_BIDIR,
&ct_state, &monitor);
switch (ret) {
case CT_REPLY:
/* SVC request should never be considered a reply, so this
Expand Down Expand Up @@ -2426,7 +2428,8 @@ nodeport_rev_dnat_fwd_ipv4(struct __ctx_buff *ctx, struct trace_ctx *trace)
return CTX_ACT_OK;

ret = ct_lazy_lookup4(get_ct_map4(&tuple), &tuple, ctx, l4_off, has_l4_header,
ACTION_CREATE, CT_INGRESS, &ct_state, &trace->monitor);
ACTION_CREATE, CT_INGRESS, SCOPE_BIDIR, &ct_state,
&trace->monitor);
if (ret == CT_REPLY) {
trace->reason = TRACE_REASON_CT_REPLY;

Expand Down Expand Up @@ -2524,7 +2527,7 @@ static __always_inline int rev_nodeport_lb4(struct __ctx_buff *ctx, __s8 *ext_er
goto out;

ret = ct_lazy_lookup4(get_ct_map4(&tuple), &tuple, ctx, l4_off, has_l4_header,
ACTION_CREATE, CT_INGRESS, &ct_state, &monitor);
ACTION_CREATE, CT_INGRESS, SCOPE_BIDIR, &ct_state, &monitor);
if (ret == CT_REPLY && ct_state.node_port == 1 && ct_state.rev_nat_index != 0) {
reason = TRACE_REASON_CT_REPLY;
ret = lb4_rev_nat(ctx, l3_off, l4_off, &ct_state, &tuple,
Expand Down
1 change: 1 addition & 0 deletions bpf/tests/nat_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ static __always_inline int mock_ct_lazy_lookup4(__maybe_unused const void *map,
__maybe_unused bool has_l4_header,
__maybe_unused int action,
__maybe_unused enum ct_dir dir,
__maybe_unused enum ct_scope scope,
__maybe_unused struct ct_state *ct_state,
__maybe_unused __u32 *monitor)
{
Expand Down

0 comments on commit bde55cd

Please sign in to comment.