Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bpf: dedicated functions for per-packet lb xlate #23715

Merged
merged 1 commit into from
Feb 21, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
213 changes: 116 additions & 97 deletions bpf/bpf_lxc.c
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,112 @@
# define ENABLE_PER_PACKET_LB 1
#endif

#ifdef ENABLE_PER_PACKET_LB

#ifdef ENABLE_IPV4
static __always_inline int __per_packet_lb_svc_xlate_4(void *ctx, struct iphdr *ip4)
{
struct ipv4_ct_tuple tuple = {};
struct csum_offset csum_off = {};
struct ct_state ct_state_new = {};
bool has_l4_header;
struct lb4_service *svc;
struct lb4_key key = {};
__u16 proxy_port = 0;
int l4_off;
int ret = 0;

has_l4_header = ipv4_has_l4_header(ip4);

ret = lb4_extract_tuple(ctx, ip4, &l4_off, &tuple);
if (IS_ERR(ret)) {
if (ret == DROP_NO_SERVICE || ret == DROP_UNKNOWN_L4)
goto skip_service_lookup;
else
return ret;
}

lb4_fill_key(&key, &tuple);
if (has_l4_header)
csum_l4_offset_and_flags(tuple.nexthdr, &csum_off);

svc = lb4_lookup_service(&key, is_defined(ENABLE_NODEPORT), false);
if (svc) {
#if defined(ENABLE_L7_LB)
if (lb4_svc_is_l7loadbalancer(svc)) {
proxy_port = (__u16)svc->l7_lb_proxy_port;
goto skip_service_lookup;
}
#endif /* ENABLE_L7_LB */
ret = lb4_local(get_ct_map4(&tuple), ctx, ETH_HLEN, l4_off,
&csum_off, &key, &tuple, svc, &ct_state_new,
ip4->saddr, has_l4_header, false);
if (IS_ERR(ret))
return ret;
}
skip_service_lookup:
/* Store state to be picked up on the continuation tail call. */
lb4_ctx_store_state(ctx, &ct_state_new, proxy_port);
ep_tail_call(ctx, CILIUM_CALL_IPV4_CT_EGRESS);
return ret;
}
#endif /* ENABLE_IPV4 */

#ifdef ENABLE_IPV6
static __always_inline int __per_packet_lb_svc_xlate_6(void *ctx, struct ipv6hdr *ip6)
{
struct ipv6_ct_tuple tuple = {};
struct csum_offset csum_off = {};
struct ct_state ct_state_new = {};
struct lb6_service *svc;
struct lb6_key key = {};
__u16 proxy_port = 0;
int l4_off;
int ret = 0;

ret = lb6_extract_tuple(ctx, ip6, &l4_off, &tuple);
if (IS_ERR(ret)) {
if (ret == DROP_NO_SERVICE || ret == DROP_UNKNOWN_L4)
goto skip_service_lookup;
else
return ret;
}

lb6_fill_key(&key, &tuple);
csum_l4_offset_and_flags(tuple.nexthdr, &csum_off);

/*
* Check if the destination address is among the address that should
* be load balanced. This operation is performed before we go through
* the connection tracker to allow storing the reverse nat index in
* the CT entry for destination endpoints where we can't encode the
* state in the address.
*/
svc = lb6_lookup_service(&key, is_defined(ENABLE_NODEPORT), false);
if (svc) {
#if defined(ENABLE_L7_LB)
if (lb6_svc_is_l7loadbalancer(svc)) {
proxy_port = (__u16)svc->l7_lb_proxy_port;
goto skip_service_lookup;
}
#endif /* ENABLE_L7_LB */
ret = lb6_local(get_ct_map6(&tuple), ctx, ETH_HLEN, l4_off,
&csum_off, &key, &tuple, svc, &ct_state_new,
false);
if (IS_ERR(ret))
return ret;
}

skip_service_lookup:
/* Store state to be picked up on the continuation tail call. */
lb6_ctx_store_state(ctx, &ct_state_new, proxy_port);
ep_tail_call(ctx, CILIUM_CALL_IPV6_CT_EGRESS);
return ret;
}
#endif /* ENABLE_IPV6 */

#endif

#if defined(ENABLE_ARP_PASSTHROUGH) && defined(ENABLE_ARP_RESPONDER)
#error "Either ENABLE_ARP_PASSTHROUGH or ENABLE_ARP_RESPONDER can be defined"
#endif
Expand Down Expand Up @@ -610,57 +716,12 @@ static __always_inline int __tail_handle_ipv6(struct __ctx_buff *ctx)
return DROP_INVALID_SIP;

#ifdef ENABLE_PER_PACKET_LB
{
struct ipv6_ct_tuple tuple = {};
struct csum_offset csum_off = {};
struct ct_state ct_state_new = {};
struct lb6_service *svc;
struct lb6_key key = {};
__u16 proxy_port = 0;
int l4_off;

ret = lb6_extract_tuple(ctx, ip6, &l4_off, &tuple);
if (IS_ERR(ret)) {
if (ret == DROP_NO_SERVICE || ret == DROP_UNKNOWN_L4)
goto skip_service_lookup;
else
return ret;
}

lb6_fill_key(&key, &tuple);
csum_l4_offset_and_flags(tuple.nexthdr, &csum_off);

/*
* Check if the destination address is among the address that should
* be load balanced. This operation is performed before we go through
* the connection tracker to allow storing the reverse nat index in
* the CT entry for destination endpoints where we can't encode the
* state in the address.
*/
svc = lb6_lookup_service(&key, is_defined(ENABLE_NODEPORT), false);
if (svc) {
#if defined(ENABLE_L7_LB)
if (lb6_svc_is_l7loadbalancer(svc)) {
proxy_port = (__u16)svc->l7_lb_proxy_port;
goto skip_service_lookup;
}
#endif /* ENABLE_L7_LB */
ret = lb6_local(get_ct_map6(&tuple), ctx, ETH_HLEN, l4_off,
&csum_off, &key, &tuple, svc, &ct_state_new,
false);
if (IS_ERR(ret))
return ret;
}

skip_service_lookup:
/* Store state to be picked up on the continuation tail call. */
lb6_ctx_store_state(ctx, &ct_state_new, proxy_port);
}
/* will tailcall internally or return error */
return __per_packet_lb_svc_xlate_6(ctx, ip6);
#else
/* won't be a tailcall, see TAIL_CT_LOOKUP6 */
return tail_ipv6_ct_egress(ctx);
#endif /* ENABLE_PER_PACKET_LB */

invoke_tailcall_if(is_defined(ENABLE_PER_PACKET_LB),
CILIUM_CALL_IPV6_CT_EGRESS, tail_ipv6_ct_egress);
return ret;
}

__section_tail(CILIUM_MAP_CALLS, CILIUM_CALL_IPV6_FROM_LXC)
Expand Down Expand Up @@ -1134,7 +1195,6 @@ static __always_inline int __tail_handle_ipv4(struct __ctx_buff *ctx)
{
void *data, *data_end;
struct iphdr *ip4;
int ret;

if (!revalidate_data_pull(ctx, &data, &data_end, &ip4))
return DROP_INVALID;
Expand All @@ -1152,53 +1212,12 @@ static __always_inline int __tail_handle_ipv4(struct __ctx_buff *ctx)
return DROP_INVALID_SIP;

#ifdef ENABLE_PER_PACKET_LB
{
struct ipv4_ct_tuple tuple = {};
struct csum_offset csum_off = {};
struct ct_state ct_state_new = {};
bool has_l4_header;
struct lb4_service *svc;
struct lb4_key key = {};
__u16 proxy_port = 0;
int l4_off;

has_l4_header = ipv4_has_l4_header(ip4);

ret = lb4_extract_tuple(ctx, ip4, &l4_off, &tuple);
if (IS_ERR(ret)) {
if (ret == DROP_NO_SERVICE || ret == DROP_UNKNOWN_L4)
goto skip_service_lookup;
else
return ret;
}

lb4_fill_key(&key, &tuple);
if (has_l4_header)
csum_l4_offset_and_flags(tuple.nexthdr, &csum_off);

svc = lb4_lookup_service(&key, is_defined(ENABLE_NODEPORT), false);
if (svc) {
#if defined(ENABLE_L7_LB)
if (lb4_svc_is_l7loadbalancer(svc)) {
proxy_port = (__u16)svc->l7_lb_proxy_port;
goto skip_service_lookup;
}
#endif /* ENABLE_L7_LB */
ret = lb4_local(get_ct_map4(&tuple), ctx, ETH_HLEN, l4_off,
&csum_off, &key, &tuple, svc, &ct_state_new,
ip4->saddr, has_l4_header, false);
if (IS_ERR(ret))
return ret;
}
skip_service_lookup:
/* Store state to be picked up on the continuation tail call. */
lb4_ctx_store_state(ctx, &ct_state_new, proxy_port);
}
/* will tailcall internally or return error */
return __per_packet_lb_svc_xlate_4(ctx, ip4);
#else
/* won't be a tailcall, see TAIL_CT_LOOKUP4 */
return tail_ipv4_ct_egress(ctx);
#endif /* ENABLE_PER_PACKET_LB */

invoke_tailcall_if(is_defined(ENABLE_PER_PACKET_LB),
CILIUM_CALL_IPV4_CT_EGRESS, tail_ipv4_ct_egress);
return ret;
}

__section_tail(CILIUM_MAP_CALLS, CILIUM_CALL_IPV4_FROM_LXC)
Expand Down