Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

datapath: Switch to LPM policy map #23885

Merged
merged 8 commits into from
Apr 25, 2023
32 changes: 29 additions & 3 deletions bpf/lib/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -316,18 +316,44 @@ struct remote_endpoint_info {
__u8 key;
};

/*
* Longest-prefix match map lookup only matches the number of bits from the
* beginning of the key stored in the map indicated by the 'lpm_key' field in
* the same stored map key, not including the 'lpm_key' field itself. Note that
* the 'lpm_key' value passed in the lookup function argument needs to be a
* "full prefix" (POLICY_FULL_PREFIX defined below).
*
* Since we need to be able to wildcard 'sec_label' independently on 'protocol'
* and 'dport' fields, we'll need to do that explicitly with a separate lookup
* where 'sec_label' is zero. For the 'protocol' and 'port' we can use the
* longest-prefix match by placing them at the end ot the key in this specific
* order, as we want to be able to wildcard those fields in a specific pattern:
* 'protocol' can only be wildcarded if dport is also fully wildcarded.
* 'protocol' is never partially wildcarded, so it is either fully wildcarded or
* not wildcarded at all. 'dport' can be partially wildcarded, but only when
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does partially wildcarded mean? Does it indicate a port range?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This PR is a prerequisite for implementing port ranges, yes.

* 'protocol' is fully specified. This follows the logic that the destination
* port is a property of a transport protocol and can not be specified without
* also specifying the protocol.
aditighag marked this conversation as resolved.
Show resolved Hide resolved
*/
struct policy_key {
struct bpf_lpm_trie_key lpm_key;
jrajahalme marked this conversation as resolved.
Show resolved Hide resolved
__u32 sec_label;
__u16 dport;
__u8 protocol;
__u8 egress:1,
pad:7;
__u8 protocol; /* can be wildcarded if 'dport' is fully wildcarded */
__u16 dport; /* can be wildcarded with CIDR-like prefix */
};

/* POLICY_FULL_PREFIX gets full prefix length of policy_key */
#define POLICY_FULL_PREFIX \
(8 * (sizeof(struct policy_key) - sizeof(struct bpf_lpm_trie_key)))

struct policy_entry {
__be16 proxy_port;
__u8 deny:1,
pad:7;
wildcard_protocol:1, /* protocol is fully wildcarded */
wildcard_dport:1, /* dport is fully wildcarded */
pad:5;
__u8 auth_type;
__u16 pad1;
__u16 pad2;
Expand Down
4 changes: 2 additions & 2 deletions bpf/lib/maps.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,12 +67,12 @@ struct {
#ifdef POLICY_MAP
/* Per-endpoint policy enforcement map */
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(type, BPF_MAP_TYPE_LPM_TRIE);
__type(key, struct policy_key);
__type(value, struct policy_entry);
__uint(pinning, LIBBPF_PIN_BY_NAME);
__uint(max_entries, POLICY_MAP_SIZE);
__uint(map_flags, CONDITIONAL_PREALLOC);
__uint(map_flags, BPF_F_NO_PREALLOC);
jrajahalme marked this conversation as resolved.
Show resolved Hide resolved
} POLICY_MAP __section_maps_btf;
#endif

Expand Down
118 changes: 62 additions & 56 deletions bpf/lib/policy.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,24 @@
#include "eps.h"
#include "maps.h"

static __always_inline void
account(struct __ctx_buff *ctx, struct policy_entry *policy)
static __always_inline int
__account_and_check(struct __ctx_buff *ctx, struct policy_entry *policy,
__s8 *ext_err, __u16 *proxy_port)
{
/* FIXME: Use per cpu counters */
__sync_fetch_and_add(&policy->packets, 1);
__sync_fetch_and_add(&policy->bytes, ctx_full_len(ctx));

if (unlikely(policy->deny))
return DROP_POLICY_DENY;

*proxy_port = policy->proxy_port;
if (unlikely(policy->auth_type)) {
if (ext_err)
*ext_err = (__s8)policy->auth_type;
return DROP_POLICY_AUTH_REQUIRED;
}
return CTX_ACT_OK;
}

static __always_inline int
Expand All @@ -26,12 +38,14 @@ __policy_can_access(const void *map, struct __ctx_buff *ctx, __u32 local_id,
__u16 *proxy_port)
{
struct policy_entry *policy;
struct policy_entry *l4policy;
struct policy_key key = {
.lpm_key = { POLICY_FULL_PREFIX, {} }, /* always look up with unwildcarded data */
.sec_label = remote_id,
.dport = dport,
.protocol = proto,
.egress = !dir,
.pad = 0,
.protocol = proto,
.dport = dport,
};

#ifdef ALLOW_ICMP_FRAG_NEEDED
Expand Down Expand Up @@ -108,58 +122,64 @@ __policy_can_access(const void *map, struct __ctx_buff *ctx, __u32 local_id,
* 5. id/ANY/ANY (L3-only)
* 6. ANY/ANY/ANY (All)
*/
/* L4 lookup can't be done on untracked fragments. */
if (!is_untracked_fragment) {
/* Start with L3/L4 lookup. */
policy = map_lookup_elem(map, &key);
if (likely(policy)) {
cilium_dbg3(ctx, DBG_L4_CREATE, remote_id, local_id,
dport << 16 | proto);
*match_type = POLICY_MATCH_L3_L4; /* 1. id/proto/port */
goto policy_check_entry;
}

/* L4-only lookup. */
key.sec_label = 0;
policy = map_lookup_elem(map, &key);
if (likely(policy)) {
*match_type = POLICY_MATCH_L4_ONLY; /* 2. ANY/proto/port */
goto policy_check_entry;
}
/* Start with L3/L4 lookup.
* LPM precedence order with L3:
* 1. id/proto/port
* 3. id/proto/ANY (check L4-only match first)
* 5. id/ANY/ANY (check proto match first)
*
* Note: Untracked fragments always have zero ports in the tuple so they can
* only match entries that have fully wildcarded ports.
*/
policy = map_lookup_elem(map, &key);

/* This is a full L3/L4 match if port is not wildcarded,
* need to check for L4-only policy first if it is.
*/
if (likely(policy && !policy->wildcard_dport)) {
cilium_dbg3(ctx, DBG_L4_CREATE, remote_id, local_id,
dport << 16 | proto);
*match_type = POLICY_MATCH_L3_L4; /* 1. id/proto/port */
return __account_and_check(ctx, policy, ext_err, proxy_port);
}

/* Check L3-proto policy */
key.sec_label = remote_id;
key.dport = 0;
policy = map_lookup_elem(map, &key);
if (likely(policy)) {
/* L4-only lookup. */
key.sec_label = 0;
/* LPM precedence order without L3:
* 2. ANY/proto/port
* 4. ANY/proto/ANY
* 6. ANY/ANY/ANY == allow-all as L3 is zeroed in this lookup,
* defer this until L3 match has been ruled out below.
*
* Untracked fragments always have zero ports in the tuple so they can
* only match entries that have fully wildcarded ports.
*/
l4policy = map_lookup_elem(map, &key);

if (likely(l4policy && !l4policy->wildcard_dport)) {
*match_type = POLICY_MATCH_L4_ONLY; /* 2. ANY/proto/port */
return __account_and_check(ctx, l4policy, ext_err, proxy_port);
}

if (likely(policy && !policy->wildcard_protocol)) {
*match_type = POLICY_MATCH_L3_PROTO; /* 3. id/proto/ANY */
goto policy_check_entry;
return __account_and_check(ctx, policy, ext_err, proxy_port);
}

/* Check Proto-only policy */
key.sec_label = 0;
policy = map_lookup_elem(map, &key);
if (likely(policy)) {
if (likely(l4policy && !l4policy->wildcard_protocol)) {
*match_type = POLICY_MATCH_PROTO_ONLY; /* 4. ANY/proto/ANY */
goto policy_check_entry;
return __account_and_check(ctx, l4policy, ext_err, proxy_port);
}

/* If L4 policy check misses, fall back to L3-only. */
key.sec_label = remote_id;
key.protocol = 0;
policy = map_lookup_elem(map, &key);
if (likely(policy)) {
*match_type = POLICY_MATCH_L3_ONLY; /* 5. id/ANY/ANY */
goto policy_check_entry;
return __account_and_check(ctx, policy, ext_err, proxy_port);
}

/* Final fallback if allow-all policy is in place. */
key.sec_label = 0;
policy = map_lookup_elem(map, &key);
if (policy) {
if (likely(l4policy)) {
*match_type = POLICY_MATCH_ALL; /* 6. ANY/ANY/ANY */
goto policy_check_entry;
return __account_and_check(ctx, l4policy, ext_err, proxy_port);
}

/* TODO: Consider skipping policy lookup in this case? */
Expand All @@ -172,20 +192,6 @@ __policy_can_access(const void *map, struct __ctx_buff *ctx, __u32 local_id,
return DROP_FRAG_NOSUPPORT;

return DROP_POLICY;

policy_check_entry:
account(ctx, policy);

if (unlikely(policy->deny))
return DROP_POLICY_DENY;

*proxy_port = policy->proxy_port;
if (unlikely(policy->auth_type)) {
if (ext_err)
*ext_err = (__s8)policy->auth_type;
return DROP_POLICY_AUTH_REQUIRED;
}
return CTX_ACT_OK;
}

/**
Expand Down
39 changes: 17 additions & 22 deletions cilium/cmd/bpf_policy_get.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,13 @@ import (

"github.com/spf13/cobra"

"github.com/cilium/cilium/api/v1/models"
"github.com/cilium/cilium/pkg/bpf"
"github.com/cilium/cilium/pkg/byteorder"
"github.com/cilium/cilium/pkg/command"
"github.com/cilium/cilium/pkg/common"
"github.com/cilium/cilium/pkg/identity"
identitymodel "github.com/cilium/cilium/pkg/identity/model"
"github.com/cilium/cilium/pkg/maps/policymap"
"github.com/cilium/cilium/pkg/policy/trafficdirection"
"github.com/cilium/cilium/pkg/u8proto"
)

var (
Expand Down Expand Up @@ -124,6 +121,7 @@ func formatMap(w io.Writer, statsMap []policymap.PolicyEntryDump) {
proxyPortTitle = "PROXY PORT"
bytesTitle = "BYTES"
packetsTitle = "PACKETS"
prefixTitle = "PREFIX"
)

labelsID := map[identity.NumericIdentity]*identity.Identity{}
Expand All @@ -141,49 +139,46 @@ func formatMap(w io.Writer, statsMap []policymap.PolicyEntryDump) {
}

if printIDs {
fmt.Fprintf(w, "%s\t%s\t%s\t%s\t%s\t%s\t%s\t\n",
policyTitle, trafficDirectionTitle, labelsIDTitle, portTitle, proxyPortTitle, bytesTitle, packetsTitle)
fmt.Fprintf(w, "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t\n",
policyTitle, trafficDirectionTitle, labelsIDTitle, portTitle, proxyPortTitle, bytesTitle, packetsTitle, prefixTitle)
} else {
fmt.Fprintf(w, "%s\t%s\t%s\t%s\t%s\t%s\t%s\t\n",
policyTitle, trafficDirectionTitle, labelsDesTitle, portTitle, proxyPortTitle, bytesTitle, packetsTitle)
fmt.Fprintf(w, "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t\n",
policyTitle, trafficDirectionTitle, labelsDesTitle, portTitle, proxyPortTitle, bytesTitle, packetsTitle, prefixTitle)
}
for _, stat := range statsMap {
prefixLen := stat.Key.Prefixlen - policymap.StaticPrefixBits
id := identity.NumericIdentity(stat.Key.Identity)
trafficDirection := trafficdirection.TrafficDirection(stat.Key.TrafficDirection)
trafficDirectionString := trafficDirection.String()
port := models.PortProtocolANY
if stat.Key.DestPort != 0 || stat.Key.Nexthdr == uint8(u8proto.ICMP) || stat.Key.Nexthdr == uint8(u8proto.ICMPv6) {
dport := byteorder.NetworkToHost16(stat.Key.DestPort)
proto := u8proto.U8proto(stat.Key.Nexthdr)
port = fmt.Sprintf("%d/%s", dport, proto.String())
}
port := stat.Key.PortProtoString()
proxyPort := "NONE"
if stat.ProxyPort != 0 {
proxyPort = strconv.FormatUint(uint64(byteorder.NetworkToHost16(stat.ProxyPort)), 10)
pp := stat.GetProxyPort()
if pp != 0 {
proxyPort = strconv.FormatUint(uint64(pp), 10)
}
var policyStr string
if policymap.PolicyEntryFlags(stat.Flags).IsDeny() {
if stat.IsDeny() {
policyStr = "Deny"
} else {
policyStr = "Allow"
}
if printIDs {
fmt.Fprintf(w, "%s\t%s\t%d\t%s\t%s\t%d\t%d\t\n",
policyStr, trafficDirectionString, id, port, proxyPort, stat.Bytes, stat.Packets)
fmt.Fprintf(w, "%s\t%s\t%d\t%s\t%s\t%d\t%d\t%d\t\n",
policyStr, trafficDirectionString, id, port, proxyPort, stat.Bytes, stat.Packets, prefixLen)
} else if lbls := labelsID[id]; lbls != nil && len(lbls.Labels) > 0 {
first := true
for _, lbl := range lbls.Labels.GetPrintableModel() {
if first {
fmt.Fprintf(w, "%s\t%s\t%s\t%s\t%s\t%d\t%d\t\n",
policyStr, trafficDirectionString, lbl, port, proxyPort, stat.Bytes, stat.Packets)
fmt.Fprintf(w, "%s\t%s\t%s\t%s\t%s\t%d\t%d\t%d\t\n",
policyStr, trafficDirectionString, lbl, port, proxyPort, stat.Bytes, stat.Packets, prefixLen)
first = false
} else {
fmt.Fprintf(w, "\t\t%s\t\t\t\t\t\t\n", lbl)
}
}
} else {
fmt.Fprintf(w, "%s\t%s\t%d\t%s\t%s\t%d\t%d\t\n",
policyStr, trafficDirectionString, id, port, proxyPort, stat.Bytes, stat.Packets)
fmt.Fprintf(w, "%s\t%s\t%d\t%s\t%s\t%d\t%d\t%d\t\n",
policyStr, trafficDirectionString, id, port, proxyPort, stat.Bytes, stat.Packets, prefixLen)
}
}
}
34 changes: 12 additions & 22 deletions pkg/endpoint/bpf.go
Original file line number Diff line number Diff line change
Expand Up @@ -1089,12 +1089,8 @@ func (e *Endpoint) updatePolicyMapPressureMetric() {
// not otherwise changed (e.g., it is never set to 'false').
func (e *Endpoint) deletePolicyKey(keyToDelete policy.Key, incremental bool, hadProxy *bool) bool {
// Convert from policy.Key to policymap.Key
policymapKey := policymap.PolicyKey{
Identity: keyToDelete.Identity,
DestPort: keyToDelete.DestPort,
Nexthdr: keyToDelete.Nexthdr,
TrafficDirection: keyToDelete.TrafficDirection,
}
policymapKey := policymap.NewKey(keyToDelete.Identity, keyToDelete.DestPort,
aditighag marked this conversation as resolved.
Show resolved Hide resolved
keyToDelete.Nexthdr, keyToDelete.TrafficDirection)

// Do not error out if the map entry was already deleted from the bpf map.
// Incremental updates depend on this being OK in cases where identity change
Expand Down Expand Up @@ -1131,12 +1127,8 @@ func (e *Endpoint) deletePolicyKey(keyToDelete policy.Key, incremental bool, had

func (e *Endpoint) addPolicyKey(keyToAdd policy.Key, entry policy.MapStateEntry, incremental bool) bool {
// Convert from policy.Key to policymap.Key
policymapKey := policymap.PolicyKey{
Identity: keyToAdd.Identity,
DestPort: keyToAdd.DestPort,
Nexthdr: keyToAdd.Nexthdr,
TrafficDirection: keyToAdd.TrafficDirection,
}
policymapKey := policymap.NewKey(keyToAdd.Identity, keyToAdd.DestPort,
keyToAdd.Nexthdr, keyToAdd.TrafficDirection)

var err error
if entry.IsDeny {
Expand Down Expand Up @@ -1347,21 +1339,19 @@ func (e *Endpoint) dumpPolicyMapToMapState() (policy.MapState, error) {
currentMap := make(policy.MapState)

cb := func(key bpf.MapKey, value bpf.MapValue) {
// Convert key to host byte-order. ToHost() makes a copy.
keyHostOrder := key.(*policymap.PolicyKey).ToHost()
policymapKey := key.(*policymap.PolicyKey)
// Convert from policymap.Key to policy.Key
policyKey := policy.Key{
Identity: keyHostOrder.Identity,
DestPort: keyHostOrder.DestPort,
Nexthdr: keyHostOrder.Nexthdr,
TrafficDirection: keyHostOrder.TrafficDirection,
Identity: policymapKey.Identity,
DestPort: policymapKey.GetDestPort(),
Nexthdr: policymapKey.Nexthdr,
TrafficDirection: policymapKey.TrafficDirection,
}
// Convert value to host byte-order. ToHost() makes a copy.
entryHostOrder := value.(*policymap.PolicyEntry).ToHost()
policymapEntry := value.(*policymap.PolicyEntry)
// Convert from policymap.PolicyEntry to policy.MapStateEntry.
policyEntry := policy.MapStateEntry{
ProxyPort: entryHostOrder.ProxyPort,
IsDeny: policymap.PolicyEntryFlags(entryHostOrder.GetFlags()).IsDeny(),
ProxyPort: policymapEntry.GetProxyPort(),
IsDeny: policymapEntry.IsDeny(),
}
currentMap[policyKey] = policyEntry
}
Expand Down