Skip to content

Commit

Permalink
datapath: Enable DSR in the tunneling mode
Browse files Browse the repository at this point in the history
Adds DSR mode that can be used with tunl mode.
In the added DSR mode, the packet is forwarded through IPIP tunl to the node where the actual Pod is located.
To enable the added DSR mode, you need to add the settings below.

```
bpf-lb-mode: dsr-tunl
enable-remote-node-identity: "True"
```

Fixes: cilium#10114
Signed-off-by: changwoo.nam <changwoo.nam@navercorp.com>
  • Loading branch information
changwoo-nam committed Dec 10, 2021
1 parent 701967f commit 3121d25
Show file tree
Hide file tree
Showing 21 changed files with 361 additions and 75 deletions.
2 changes: 1 addition & 1 deletion Documentation/cmdref/cilium-agent.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 5 additions & 2 deletions api/v1/models/kube_proxy_replacement.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions api/v1/openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1946,6 +1946,7 @@ definitions:
- SNAT
- DSR
- Hybrid
- DSR-Tunl
algorithm:
type: string
enum:
Expand Down
12 changes: 8 additions & 4 deletions api/v1/server/embedded_spec.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

21 changes: 21 additions & 0 deletions bpf/bpf_host.c
Original file line number Diff line number Diff line change
Expand Up @@ -842,6 +842,7 @@ do_netdev(struct __ctx_buff *ctx, __u16 proto, const bool from_host)
{
__u32 __maybe_unused identity = 0;
__u32 __maybe_unused ipcache_srcid = 0;
__u32 __maybe_unused lb_selection_rule = 0;
int ret;

#ifdef ENABLE_IPSEC
Expand All @@ -857,8 +858,19 @@ do_netdev(struct __ctx_buff *ctx, __u16 proto, const bool from_host)
return CTX_ACT_OK;
}
#endif

#ifdef ENABLE_DSR_TUNL
lb_selection_rule = ctx_load_meta(ctx, CB_LB_SELECTION_RULE);
#endif

bpf_clear_meta(ctx);

#ifdef ENABLE_DSR_TUNL
if ((lb_selection_rule & LB_LOCAL_BACKEND_ONLY) == LB_LOCAL_BACKEND_ONLY) {
ctx_store_meta(ctx, CB_LB_SELECTION_RULE, LB_LOCAL_BACKEND_ONLY);
}
#endif

if (from_host) {
int trace = TRACE_FROM_HOST;
bool from_proxy;
Expand Down Expand Up @@ -969,8 +981,16 @@ handle_netdev(struct __ctx_buff *ctx, const bool from_host)
__section("from-netdev")
int from_netdev(struct __ctx_buff *ctx)
{
int __maybe_unused ret = CTX_ACT_OK;
__u32 __maybe_unused vlan_id;

#ifdef ENABLE_DSR_TUNL
ret = decap_ipip(ctx);
if (IS_ERR(ret)) {
return send_drop_notify_error(ctx, 0, ret, CTX_ACT_DROP, METRIC_INGRESS);
}
#endif

/* Filter allowed vlan id's and pass them back to kernel.
*/
if (ctx->vlan_present) {
Expand Down Expand Up @@ -1077,6 +1097,7 @@ int to_netdev(struct __ctx_buff *ctx __maybe_unused)
#if defined(ENABLE_NODEPORT) && \
(!defined(ENABLE_DSR) || \
(defined(ENABLE_DSR) && defined(ENABLE_DSR_HYBRID)) || \
(defined(ENABLE_DSR) && defined(ENABLE_DSR_TUNL)) || \
defined(ENABLE_MASQUERADE) || \
defined(ENABLE_EGRESS_GATEWAY))
if ((ctx->mark & MARK_MAGIC_SNAT_DONE) != MARK_MAGIC_SNAT_DONE) {
Expand Down
16 changes: 8 additions & 8 deletions bpf/bpf_lxc.c
Original file line number Diff line number Diff line change
Expand Up @@ -277,13 +277,13 @@ static __always_inline int ipv6_l3_from_lxc(struct __ctx_buff *ctx,
ep_tail_call(ctx, CILIUM_CALL_IPV6_NODEPORT_REVNAT);
return DROP_MISSED_TAIL_CALL;
}
# ifdef ENABLE_DSR
# if defined(ENABLE_DSR) && !defined(ENABLE_DSR_TUNL)
if (ct_state.dsr) {
ret = xlate_dsr_v6(ctx, tuple, l4_off);
if (ret != 0)
return ret;
}
# endif /* ENABLE_DSR */
# endif /* ENABLE_DSR && !ENABLE_DSR_TUNL */
#endif /* ENABLE_NODEPORT */
if (ct_state.rev_nat_index) {
ret = lb6_rev_nat(ctx, l4_off, &csum_off,
Expand Down Expand Up @@ -710,13 +710,13 @@ static __always_inline int handle_ipv4_from_lxc(struct __ctx_buff *ctx,
ep_tail_call(ctx, CILIUM_CALL_IPV4_NODEPORT_REVNAT);
return DROP_MISSED_TAIL_CALL;
}
# ifdef ENABLE_DSR
# if defined(ENABLE_DSR) && !defined(ENABLE_DSR_TUNL)
if (ct_state.dsr) {
ret = xlate_dsr_v4(ctx, &tuple, l4_off, has_l4_header);
if (ret != 0)
return ret;
}
# endif /* ENABLE_DSR */
# endif /* ENABLE_DSR && !ENABLE_DSR_TUNL */
#endif /* ENABLE_NODEPORT */

if (ct_state.rev_nat_index) {
Expand Down Expand Up @@ -1120,7 +1120,7 @@ ipv6_policy(struct __ctx_buff *ctx, int ifindex, __u32 src_label, __u8 *reason,
}

if (ret == CT_NEW) {
#ifdef ENABLE_DSR
#if defined(ENABLE_DSR) && !defined(ENABLE_DSR_TUNL)
{
bool dsr = false;

Expand All @@ -1130,7 +1130,7 @@ ipv6_policy(struct __ctx_buff *ctx, int ifindex, __u32 src_label, __u8 *reason,

ct_state_new.dsr = dsr;
}
#endif /* ENABLE_DSR */
#endif /* ENABLE_DSR && !ENABLE_DSR_TUNL */

ct_state_new.src_sec_id = src_label;
ct_state_new.node_port = ct_state.node_port;
Expand Down Expand Up @@ -1422,7 +1422,7 @@ ipv4_policy(struct __ctx_buff *ctx, int ifindex, __u32 src_label, __u8 *reason,
#endif /* !ENABLE_HOST_SERVICES_FULL && !DISABLE_LOOPBACK_LB */

if (ret == CT_NEW) {
#ifdef ENABLE_DSR
#if defined(ENABLE_DSR) && !defined(ENABLE_DSR_TUNL)
{
bool dsr = false;

Expand All @@ -1432,7 +1432,7 @@ ipv4_policy(struct __ctx_buff *ctx, int ifindex, __u32 src_label, __u8 *reason,

ct_state_new.dsr = dsr;
}
#endif /* ENABLE_DSR */
#endif /* ENABLE_DSR && !ENABLE_DSR_TUNL */

ct_state_new.src_sec_id = src_label;
ct_state_new.node_port = ct_state.node_port;
Expand Down
7 changes: 5 additions & 2 deletions bpf/lib/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -598,6 +598,7 @@ enum {
* Not used by xfrm.
*/
#define CB_CUSTOM_CALLS CB_CT_STATE /* Alias, non-overlapping */
#define CB_LB_SELECTION_RULE CB_CT_STATE /* Alias, non-overlapping */
};

/* State values for NAT46 */
Expand Down Expand Up @@ -630,6 +631,8 @@ enum {
CT_REOPENED,
};

#define LB_LOCAL_BACKEND_ONLY 0xFFA0

/* Service flags (lb{4,6}_service->flags) */
enum {
SVC_FLAG_EXTERNAL_IP = (1 << 0), /* External IPs */
Expand Down Expand Up @@ -736,10 +739,10 @@ struct lb6_service {
__u32 affinity_timeout; /* In seconds, only for svc frontend */
};
__u16 count;
__u16 local_count;
__u16 rev_nat_index;
__u8 flags;
__u8 flags2;
__u8 pad[2];
};

/* See lb4_backend comments */
Expand Down Expand Up @@ -790,10 +793,10 @@ struct lb4_service {
* slots (otherwise zero).
*/
__u16 count;
__u16 local_count;
__u16 rev_nat_index; /* Reverse NAT ID in lb4_reverse_nat */
__u8 flags;
__u8 flags2;
__u8 pad[2];
};

struct lb4_backend {
Expand Down
54 changes: 51 additions & 3 deletions bpf/lib/lb.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "ipv4.h"
#include "hash.h"
#include "ids.h"
#include "eps.h"

#ifdef ENABLE_IPV6
struct {
Expand Down Expand Up @@ -266,7 +267,11 @@ bool lb6_svc_has_src_range_check(const struct lb6_service *svc __maybe_unused)

static __always_inline bool lb_skip_l4_dnat(void)
{
#ifdef ENABLE_DSR_TUNL
return false;
#else
return DSR_XLATE_MODE == DSR_XLATE_FRONTEND;
#endif /* ENABLE_DSR_TUNL */
}

static __always_inline
Expand Down Expand Up @@ -1111,7 +1116,7 @@ struct lb4_service *lb4_lookup_backend_slot(struct __ctx_buff *ctx __maybe_unuse
/* Backend slot 0 is always reserved for the service frontend. */
#if LB_SELECTION == LB_SELECTION_RANDOM
static __always_inline __u32
lb4_select_backend_id(struct __ctx_buff *ctx,
lb4_select_backend_id_internal(struct __ctx_buff *ctx,
struct lb4_key *key,
const struct ipv4_ct_tuple *tuple __maybe_unused,
const struct lb4_service *svc)
Expand All @@ -1123,7 +1128,7 @@ lb4_select_backend_id(struct __ctx_buff *ctx,
}
#elif LB_SELECTION == LB_SELECTION_MAGLEV
static __always_inline __u32
lb4_select_backend_id(struct __ctx_buff *ctx __maybe_unused,
lb4_select_backend_id_internal(struct __ctx_buff *ctx __maybe_unused,
struct lb4_key *key __maybe_unused,
const struct ipv4_ct_tuple *tuple,
const struct lb4_service *svc)
Expand All @@ -1147,6 +1152,37 @@ lb4_select_backend_id(struct __ctx_buff *ctx __maybe_unused,
# error "Invalid load balancer backend selection algorithm!"
#endif /* LB_SELECTION */

static __always_inline __u16
lb4_select_backend_id(struct __ctx_buff *ctx,
struct lb4_key *key,
const struct ipv4_ct_tuple *tuple __maybe_unused,
const struct lb4_service *svc)
{
__u32 __maybe_unused lb_selection_rule = 0;
__u32 __maybe_unused slot = 0;
struct lb4_service __maybe_unused *be = NULL;

#ifdef ENABLE_DSR_TUNL
if (lb4_svc_is_loadbalancer(svc) || lb4_svc_is_external_ip(svc)) {
lb_selection_rule = ctx_load_meta(ctx, CB_LB_SELECTION_RULE);

if (svc->local_count > 0) {
slot = (get_prandom_u32() % svc->local_count) + 1;
be = lb4_lookup_backend_slot(ctx, key, slot);
if (be) {
return be->backend_id;
}
}

if ((lb_selection_rule & LB_LOCAL_BACKEND_ONLY) == LB_LOCAL_BACKEND_ONLY) {
return 0;
}
}
#endif

return lb4_select_backend_id_internal(ctx, key, tuple, svc);
}

static __always_inline int
lb4_xlate(struct __ctx_buff *ctx, __be32 *new_daddr, __be32 *new_saddr __maybe_unused,
__be32 *old_saddr __maybe_unused, __u8 nexthdr __maybe_unused, int l3_off,
Expand Down Expand Up @@ -1316,11 +1352,16 @@ static __always_inline int lb4_local(const void *map, struct __ctx_buff *ctx,
__u8 flags = tuple->flags;
struct lb4_backend *backend;
__u32 backend_id = 0;
bool r_skip_l3_xlate = skip_l3_xlate;
int ret;
#ifdef ENABLE_SESSION_AFFINITY
union lb4_affinity_client_id client_id = {
.client_ip = saddr,
};
#ifdef ENABLE_DSR_TUNL
bool backend_local;
#endif

#endif
ret = ct_lookup4(map, tuple, ctx, l4_off, CT_SERVICE, state, &monitor);
switch (ret) {
Expand Down Expand Up @@ -1447,10 +1488,17 @@ static __always_inline int lb4_local(const void *map, struct __ctx_buff *ctx,
#endif
tuple->daddr = backend->address;

#ifdef ENABLE_DSR_TUNL
backend_local = __lookup_ip4_endpoint(tuple->daddr);
if (backend_local) {
r_skip_l3_xlate = false;
}
#endif

return lb_skip_l4_dnat() ? CTX_ACT_OK :
lb4_xlate(ctx, &new_daddr, &new_saddr, &saddr,
tuple->nexthdr, l3_off, l4_off, csum_off, key,
backend, has_l4_header, skip_l3_xlate);
backend, has_l4_header, r_skip_l3_xlate);
drop_no_service:
tuple->flags = flags;
return DROP_NO_SERVICE;
Expand Down

0 comments on commit 3121d25

Please sign in to comment.