Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

v1.14 Backports 2023-07-19 #26914

Merged
merged 9 commits into from
Jul 19, 2023
7 changes: 6 additions & 1 deletion .github/workflows/conformance-e2e.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,12 @@ jobs:
provision: 'false'
cmd: |
cd /host/
./contrib/scripts/kind.sh --xdp "" 3 "" "" "${{ matrix.kube-proxy }}" "dual"

IP_FAM="dual"
if [ "${{ matrix.ipv6 }}" == "false" ]; then
IP_FAM="ipv4"
fi
./contrib/scripts/kind.sh --xdp "" 3 "" "" "${{ matrix.kube-proxy }}" \$IP_FAM

kubectl patch node kind-worker3 --type=json -p='[{"op":"add","path":"/metadata/labels/cilium.io~1no-schedule","value":"true"}]'
if [ "${{ matrix.encryption }}" == "ipsec" ]; then
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/conformance-gke.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ jobs:
--helm-set=hubble.relay.enabled=true \
--helm-set=hubble.relay.image.repository=quay.io/${{ env.QUAY_ORGANIZATION_DEV }}/hubble-relay-ci \
--helm-set=hubble.relay.image.tag=${SHA} \
--helm-set=hubble.relay.image.useDigest=false \
--helm-set=agentNotReadyTaintKey=ignore-taint.cluster-autoscaler.kubernetes.io/cilium-agent-not-ready \
--helm-set loadBalancer.l7.backend=envoy \
--helm-set tls.secretsBackend=k8s \
Expand Down
8 changes: 5 additions & 3 deletions Documentation/network/l2-announcements.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@

.. _l2_announcements:

************************************
L2 Announcements / L2 Aware LB
************************************
*************************************
L2 Announcements / L2 Aware LB (Beta)
*************************************

.. include:: ../beta.rst

L2 Announcements is a feature which makes services visible and reachable on
the local area network. This feature is primarily intended for on-premises
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@ This example shows you how to enforce mutual authentication between two Pods.

Deploy a client (pod-worker) and a server (echo) using the following manifest:

.. code-block:: shell-session
.. parsed-literal::

$ kubectl apply -f |SCM_WEB|\/examples/kubernetes/servicemesh/mutual-auth-example.yaml
$ kubectl apply -f |SCM_WEB|\/examples/kubernetes/servicemesh/cnp-without-mutual-auth.yaml
$ kubectl apply -f \ |SCM_WEB|\/examples/kubernetes/servicemesh/mutual-auth-example.yaml
$ kubectl apply -f \ |SCM_WEB|\/examples/kubernetes/servicemesh/cnp-without-mutual-auth.yaml
service/echo created
deployment.apps/echo created
pod/pod-worker created
Expand Down Expand Up @@ -203,7 +203,7 @@ Update the existing rule to only allow ingress access to mutually authenticated

.. parsed-literal::

$ kubectl apply -f |SCM_WEB|\/examples/kubernetes/servicemesh/cnp-with-mutual-auth.yaml
$ kubectl apply -f \ |SCM_WEB|\/examples/kubernetes/servicemesh/cnp-with-mutual-auth.yaml

Verify Mutual Authentication
============================
Expand Down
14 changes: 7 additions & 7 deletions bpf/lib/encap.h
Original file line number Diff line number Diff line change
Expand Up @@ -127,12 +127,12 @@ __encap_and_redirect_lxc(struct __ctx_buff *ctx, __be32 tunnel_endpoint,
seclabel);
#endif

#if !defined(ENABLE_NODEPORT) && (defined(ENABLE_IPSEC) || defined(ENABLE_HOST_FIREWALL))
/* For IPSec and the host firewall, traffic from a pod to a remote node
* is sent through the tunnel. In the case of node --> VIP@remote pod,
* packets may be DNATed when they enter the remote node. If kube-proxy
* is used, the response needs to go through the stack on the way to
* the tunnel, to apply the correct reverse DNAT.
#if !defined(ENABLE_NODEPORT) && defined(ENABLE_HOST_FIREWALL)
/* For the host firewall, traffic from a pod to a remote node is sent
* through the tunnel. In the case of node --> VIP@remote pod, packets may
* be DNATed when they enter the remote node. If kube-proxy is used, the
* response needs to go through the stack on the way to the tunnel, to
* apply the correct reverse DNAT.
* See #14674 for details.
*/
ret = __encap_with_nodeid(ctx, 0, 0, tunnel_endpoint, seclabel, dstid,
Expand All @@ -146,7 +146,7 @@ __encap_and_redirect_lxc(struct __ctx_buff *ctx, __be32 tunnel_endpoint,
#else
return __encap_and_redirect_with_nodeid(ctx, 0, tunnel_endpoint,
seclabel, dstid, NOT_VTEP_DST, trace);
#endif /* !ENABLE_NODEPORT && (ENABLE_IPSEC || ENABLE_HOST_FIREWALL) */
#endif /* !ENABLE_NODEPORT && ENABLE_HOST_FIREWALL */
}

#if defined(TUNNEL_MODE) || defined(ENABLE_HIGH_SCALE_IPCACHE)
Expand Down
18 changes: 18 additions & 0 deletions bpf/lib/identity.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,24 @@ static __always_inline __u32 inherit_identity_from_host(struct __ctx_buff *ctx,
*identity = HOST_ID;
} else if (magic == MARK_MAGIC_ENCRYPT) {
*identity = ctx_load_meta(ctx, CB_ENCRYPT_IDENTITY);

/* Special case needed to handle upgrades. Can be removed in v1.15.
* Before the upgrade, bpf_lxc will write the tunnel endpoint in
* skb->cb[4]. After the upgrade, it will write the security identity.
* For the upgrade to happen without drops, bpf_host thus needs to
* handle both cases.
* We can distinguish between the two cases by looking at the first
* byte. Identities are on 24-bits so the first byte will be zero;
* conversely, tunnel endpoint addresses within the range 0.0.0.0/8
* (first byte is zero) are impossible because special purpose
* (RFC6890).
*/
if ((*identity & 0xFF000000) != 0) {
/* skb->cb[4] was actually carrying the tunnel endpoint and the
* security identity is in the mark.
*/
*identity = get_identity(ctx);
}
#if defined(ENABLE_L7_LB)
} else if (magic == MARK_MAGIC_PROXY_EGRESS_EPID) {
*identity = get_epid(ctx); /* endpoint identity, not security identity! */
Expand Down
8 changes: 6 additions & 2 deletions bpf/tests/ipsec_from_host_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -103,13 +103,17 @@ int ipv4_ipsec_from_host_setup(struct __ctx_buff *ctx)
struct ipcache_key cache_key = {};
struct remote_endpoint_info cache_value = {};

/* This is the ipcache entry for the CiliumInternalIP of the remote node.
* It allows us to lookup the tunnel endpoint from the outer destination IP
* address of the ESP packet. The CiliumInternalIPs are used for that outer
* header.
*/
cache_key.lpm_key.prefixlen = IPCACHE_PREFIX_LEN(32);
cache_key.family = ENDPOINT_KEY_IPV4;
cache_key.ip4 = v4_pod_two;
cache_value.sec_identity = 233;
cache_value.tunnel_endpoint = v4_node_two;
cache_value.node_id = NODE_ID;
cache_value.key = ENCRYPT_KEY;
map_update_elem(&IPCACHE_MAP, &cache_key, &cache_value, BPF_ANY);

set_encrypt_key_mark(ctx, ENCRYPT_KEY, NODE_ID);
Expand Down Expand Up @@ -230,13 +234,13 @@ int ipv6_ipsec_from_host_setup(struct __ctx_buff *ctx)
struct ipcache_key cache_key = {};
struct remote_endpoint_info cache_value = {};

/* See comment for IPv4 counterpart. */
cache_key.lpm_key.prefixlen = IPCACHE_PREFIX_LEN(128);
cache_key.family = ENDPOINT_KEY_IPV6;
memcpy(&cache_key.ip6, (__u8 *)v6_pod_two, 16);
cache_value.sec_identity = 233;
cache_value.tunnel_endpoint = v4_node_two;
cache_value.node_id = NODE_ID;
cache_value.key = ENCRYPT_KEY;
map_update_elem(&IPCACHE_MAP, &cache_key, &cache_value, BPF_ANY);

set_encrypt_key_mark(ctx, ENCRYPT_KEY, NODE_ID);
Expand Down
7 changes: 2 additions & 5 deletions daemon/cmd/state.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ import (
"github.com/cilium/cilium/pkg/controller"
"github.com/cilium/cilium/pkg/endpoint"
"github.com/cilium/cilium/pkg/ipam"
ipamOption "github.com/cilium/cilium/pkg/ipam/option"
slim_corev1 "github.com/cilium/cilium/pkg/k8s/slim/k8s/api/core/v1"
"github.com/cilium/cilium/pkg/k8s/watchers/resources"
"github.com/cilium/cilium/pkg/labels"
Expand Down Expand Up @@ -305,8 +304,7 @@ func (d *Daemon) regenerateRestoredEndpoints(state *endpointRestoreState) (resto
}
}

if option.Config.EnableIPSec &&
(option.Config.IPAM == ipamOption.IPAMENI || option.Config.IPAM == ipamOption.IPAMAzure) {
if option.Config.EnableIPSec {
// If IPsec is enabled on EKS or AKS, we need to restore the host
// endpoint before any other endpoint, to ensure a dropless upgrade.
// This code can be removed in v1.15.
Expand All @@ -333,8 +331,7 @@ func (d *Daemon) regenerateRestoredEndpoints(state *endpointRestoreState) (resto
}

for _, ep := range state.restored {
if ep.IsHost() && option.Config.EnableIPSec &&
(option.Config.IPAM == ipamOption.IPAMENI || option.Config.IPAM == ipamOption.IPAMAzure) {
if ep.IsHost() && option.Config.EnableIPSec {
// The host endpoint was handled above.
continue
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ spec:
value: "hubble-relay:80"
{{- end }}
{{- with .Values.hubble.ui.backend.extraEnv }}
{{- toYaml . | trim | nindent 10 }}
{{- toYaml . | trim | nindent 8 }}
{{- end }}
ports:
- name: grpc
Expand Down
10 changes: 4 additions & 6 deletions pkg/datapath/linux/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -1075,11 +1075,10 @@ func (n *linuxNodeHandler) enableIPsecIPv4(newNode *nodeTypes.Node, zeroMark boo
upsertIPsecLog(err, "out IPv4", wildcardCIDR, cidr, spi)
}
} else {
localCIDR := n.nodeAddressing.IPv4().AllocationCIDR().IPNet
remoteCIDR := newNode.IPv4AllocCIDR.IPNet
n.replaceNodeIPSecOutRoute(new4Net)
spi, err = ipsec.UpsertIPsecEndpoint(localCIDR, remoteCIDR, localIP, remoteIP, remoteNodeID, ipsec.IPSecDirOut, false)
upsertIPsecLog(err, "out IPv4", localCIDR, remoteCIDR, spi)
spi, err = ipsec.UpsertIPsecEndpoint(wildcardCIDR, remoteCIDR, localIP, remoteIP, remoteNodeID, ipsec.IPSecDirOut, false)
upsertIPsecLog(err, "out IPv4", wildcardCIDR, remoteCIDR, spi)
}
}
}
Expand Down Expand Up @@ -1149,11 +1148,10 @@ func (n *linuxNodeHandler) enableIPsecIPv6(newNode *nodeTypes.Node, zeroMark boo
upsertIPsecLog(err, "out IPv6", wildcardCIDR, cidr, spi)
}
} else {
localCIDR := n.nodeAddressing.IPv6().AllocationCIDR().IPNet
remoteCIDR := newNode.IPv6AllocCIDR.IPNet
n.replaceNodeIPSecOutRoute(new6Net)
spi, err := ipsec.UpsertIPsecEndpoint(localCIDR, remoteCIDR, localIP, remoteIP, remoteNodeID, ipsec.IPSecDirOut, false)
upsertIPsecLog(err, "out IPv6", localCIDR, remoteCIDR, spi)
spi, err := ipsec.UpsertIPsecEndpoint(wildcardCIDR, remoteCIDR, localIP, remoteIP, remoteNodeID, ipsec.IPSecDirOut, false)
upsertIPsecLog(err, "out IPv6", wildcardCIDR, remoteCIDR, spi)
}
}
}
Expand Down
8 changes: 1 addition & 7 deletions pkg/node/manager/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,6 @@ type Configuration interface {
TunnelingEnabled() bool
RemoteNodeIdentitiesEnabled() bool
NodeEncryptionEnabled() bool
EncryptionEnabled() bool
}

var _ Notifier = (*manager)(nil)
Expand Down Expand Up @@ -324,11 +323,6 @@ func (m *manager) legacyNodeIpBehavior() bool {
if m.conf.NodeEncryptionEnabled() {
return false
}
// Needed to store the tunnel endpoint for pod->remote node in the
// ipcache so that this traffic goes through the tunnel.
if m.conf.EncryptionEnabled() && m.conf.TunnelingEnabled() {
return false
}
return true
}

Expand All @@ -337,7 +331,7 @@ func (m *manager) nodeAddressHasTunnelIP(address nodeTypes.Address) bool {
// through the tunnel to preserve the source identity as part of the
// encapsulation. In encryption case we also want to use vxlan device
// to create symmetric traffic when sending nodeIP->pod and pod->nodeIP.
return address.Type == addressing.NodeCiliumInternalIP || m.conf.EncryptionEnabled() ||
return address.Type == addressing.NodeCiliumInternalIP || m.conf.NodeEncryptionEnabled() ||
option.Config.EnableHostFirewall || option.Config.JoinCluster
}

Expand Down