Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

v1.14 Backports 2023-10-30 #28870

Merged
merged 25 commits into from
Nov 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
ddbcbaa
labels/cidr: Cache GetCIDRLabels computation
joamaki Aug 18, 2023
87a9972
labels/cidr: Use a lru cache to store CIDR labels
pippolo84 Oct 13, 2023
08696cb
cidr/labels: Add benchmark for cache heap usage
pippolo84 Oct 13, 2023
772e78c
labels/cidr: Add benchmark for concurrent exection of GetCIDRLabels
pippolo84 Oct 13, 2023
7aa62ff
envoy: extract getEndpointsForLBBackends with unittest
mhofstetter Sep 4, 2023
52962ec
envoy: fix lb backend endpoint calculation
mhofstetter Sep 4, 2023
3eae191
operator: Fix logic used to sync Cilium's IngressClass on startup
learnitall Oct 17, 2023
802666b
gha: test geneve tunneling in addition to vxlan
giorio94 Oct 24, 2023
b474c96
pkg/endpoint: run the metadata resolver after registering the endpoint
aanm Oct 25, 2023
1c255fc
bugtool: Collect XFRM error counters twice
pchaigno Oct 25, 2023
12f5544
helm: Add missing type to poststart iptables regex
nebril Oct 19, 2023
0a63d87
helm: Always delete AWS iptable rules
nebril Oct 19, 2023
c74aad6
labels/cidr: Fix labels memoization in GetCIDRLabels
pippolo84 Oct 24, 2023
04cec79
labels/cidr: Improve CIDR labels testing
pippolo84 Oct 24, 2023
814f490
labels: Move away from checker for CIDR labels testing
pippolo84 Oct 25, 2023
fd99b32
labels: Refactor CIDRLabelsCacheHeapUsage into tests
pippolo84 Oct 25, 2023
266b572
labels: Halve CIDR labels LRU cache size
pippolo84 Oct 25, 2023
70d91ee
ctmap: clean up hard-coded values
julianwiedmann Oct 29, 2023
7724178
ctmap: add GC test-case for SNATed TCP
julianwiedmann Oct 29, 2023
4863f10
ctmap: add test for Legacy DSR
julianwiedmann Oct 29, 2023
a8eab2b
ctmap: improve description for PurgeOrphanNATEntries()
julianwiedmann Oct 29, 2023
ad2c710
ctmap: set `dsr` flag for relevant CT entries in TestOrphanNatGC()
julianwiedmann Oct 30, 2023
b3d0437
ctmap: move some NAT GC logic into ctmap
julianwiedmann Oct 29, 2023
712f9f1
ctmap: limit DSR purge to CT entries with .dsr flag
julianwiedmann Oct 29, 2023
2a4145c
ctmap: add NAT purge for nodeport-backed DSR NAT entries
julianwiedmann Oct 29, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/conformance-clustermesh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ jobs:
cm-auth-mode-2: 'cluster'

- name: '7'
tunnel: 'vxlan'
tunnel: 'geneve'
ipfamily: 'ipv4'
encryption: 'wireguard'
kube-proxy: 'iptables'
Expand Down
14 changes: 13 additions & 1 deletion bugtool/cmd/configuration.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,10 @@ func defaultCommands(confDir string, cmdDir string, k8sPods []string) []string {
var commands []string
// Not expecting all of the commands to be available
commands = []string{
// We want to collect this twice: at the very beginning and at the
// very end of the bugtool collection, to see if the counters are
// increasing.
"cat /proc/net/xfrm_stat",
// Host and misc
"ps auxfw",
"hostname",
Expand Down Expand Up @@ -214,6 +218,15 @@ func defaultCommands(confDir string, cmdDir string, k8sPods []string) []string {
commands = append(commands, tcCommands...)
}

// We want to collect this twice: at the very beginning and at the
// very end of the bugtool collection, to see if the counters are
// increasing.
// The commands end up being the names of the files where their output
// is stored, so we can't have the two commands be the exact same or the
// second would overwrite. To avoid that, we use the -u flag in this second
// command; that flag is documented as being ignored.
commands = append(commands, "cat -u /proc/net/xfrm_stat")

return k8sCommands(commands, k8sPods)
}

Expand Down Expand Up @@ -269,7 +282,6 @@ func tcInterfaceCommands() ([]string, error) {

func catCommands() []string {
files := []string{
"/proc/net/xfrm_stat",
"/proc/sys/net/core/bpf_jit_enable",
"/proc/kallsyms",
"/etc/resolv.conf",
Expand Down
48 changes: 14 additions & 34 deletions daemon/cmd/endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ import (

"github.com/cilium/cilium/api/v1/models"
. "github.com/cilium/cilium/api/v1/server/restapi/endpoint"
"github.com/cilium/cilium/pkg/annotation"
"github.com/cilium/cilium/pkg/api"
"github.com/cilium/cilium/pkg/bandwidth"
"github.com/cilium/cilium/pkg/endpoint"
Expand Down Expand Up @@ -460,11 +459,10 @@ func (d *Daemon) createEndpoint(ctx context.Context, owner regeneration.Owner, e
// is available or has received the notification that includes the
// static pod's labels. In this case, start a controller to attempt to
// resolve the labels.
k8sLabelsConfigured := true
if ep.K8sNamespaceAndPodNameIsSet() && d.clientset.IsEnabled() {
// If there are labels, but no pod namespace, then it's
// likely that there are no k8s labels at all. Resolve.
if _, k8sLabelsConfigured = addLabels[k8sConst.PodNamespaceLabel]; !k8sLabelsConfigured {
if _, k8sLabelsConfigured := addLabels[k8sConst.PodNamespaceLabel]; !k8sLabelsConfigured {
ep.RunMetadataResolver(d.fetchK8sMetadataForEndpoint)
}
}
Expand All @@ -475,39 +473,21 @@ func (d *Daemon) createEndpoint(ctx context.Context, owner regeneration.Owner, e
return d.errorDuringCreation(ep, fmt.Errorf("unable to insert endpoint into manager: %s", err))
}

// We need to update the visibility policy after adding the endpoint in
// the endpoint manager because the endpoint manager create the endpoint
// queue of the endpoint. If we execute this function before the endpoint
// manager creates the endpoint queue the operation will fail.
if ep.K8sNamespaceAndPodNameIsSet() && d.clientset.IsEnabled() && k8sLabelsConfigured {
ep.UpdateVisibilityPolicy(func(ns, podName string) (proxyVisibility string, err error) {
_, p, err := d.endpointMetadataFetcher.Fetch(ns, podName)
if err != nil {
return "", err
}
value, _ := annotation.Get(p, annotation.ProxyVisibility, annotation.ProxyVisibilityAlias)
return value, nil
})

ep.UpdateBandwidthPolicy(func(ns, podName string) (bandwidthEgress string, err error) {
_, p, err := d.endpointMetadataFetcher.Fetch(ns, podName)
if err != nil {
return "", err
}
return p.Annotations[bandwidth.EgressBandwidth], nil
})
ep.UpdateNoTrackRules(func(ns, podName string) (noTrackPort string, err error) {
_, p, err := d.endpointMetadataFetcher.Fetch(ns, podName)
if err != nil {
return "", err
}
value, _ := annotation.Get(p, annotation.NoTrack, annotation.NoTrackAlias)
return value, nil
})
var regenTriggered bool
if ep.K8sNamespaceAndPodNameIsSet() && d.clientset.IsEnabled() {
// We need to refetch the pod labels again because we have just added
// the endpoint into the endpoint manager. If we have received any pod
// events, more specifically any events that modified the pod labels,
// between the time the pod was created and the time it was added
// into the endpoint manager, the pod event would not have been processed
// since the pod event handler would not find the endpoint for that pod
// in the endpoint manager. Thus, we will fetch the labels again
// and update the endpoint with these labels.
ep.RunMetadataResolver(d.fetchK8sMetadataForEndpoint)
} else {
regenTriggered = ep.UpdateLabels(ctx, addLabels, infoLabels, true)
}

regenTriggered := ep.UpdateLabels(ctx, addLabels, infoLabels, true)

select {
case <-ctx.Done():
return d.errorDuringCreation(ep, fmt.Errorf("request cancelled while resolving identity"))
Expand Down
2 changes: 1 addition & 1 deletion install/kubernetes/cilium/files/agent/poststart-eni.bash
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ set -o nounset
# dependencies on anything that is part of the startup script
# itself, and can be safely run multiple times per node (e.g. in
# case of a restart).
if [[ "$(iptables-save | grep -c AWS-SNAT-CHAIN)" != "0" ]];
if [[ "$(iptables-save | grep -c 'AWS-SNAT-CHAIN|AWS-CONNMARK-CHAIN')" != "0" ]];
then
echo 'Deleting iptables rules created by the AWS CNI VPC plugin'
iptables-save | grep -v 'AWS-SNAT-CHAIN|AWS-CONNMARK-CHAIN' | iptables-restore
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ spec:
{{- end }}
{{- if .Values.cni.install }}
lifecycle:
{{- if .Values.eni.enabled }}
{{- if ne .Values.cni.chainingMode "aws-cni" }}
postStart:
exec:
command:
Expand Down
4 changes: 3 additions & 1 deletion operator/cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -687,7 +687,9 @@ func (legacy *legacyOnLeader) onStart(_ hive.HookContext) error {

if operatorOption.Config.EnableIngressController {
ingressController, err := ingress.NewController(
legacy.ctx,
legacy.clientset,
legacy.resources.IngressClasses,
ingress.WithHTTPSEnforced(operatorOption.Config.EnforceIngressHTTPS),
ingress.WithSecretsSyncEnabled(operatorOption.Config.EnableIngressSecretsSync),
ingress.WithSecretsNamespace(operatorOption.Config.IngressSecretsNamespace),
Expand All @@ -703,7 +705,7 @@ func (legacy *legacyOnLeader) onStart(_ hive.HookContext) error {
log.WithError(err).WithField(logfields.LogSubsys, ingress.Subsys).Fatal(
"Failed to start ingress controller")
}
go ingressController.Run()
go ingressController.Run(legacy.ctx)
}

if operatorOption.Config.EnableGatewayAPI {
Expand Down
3 changes: 3 additions & 0 deletions operator/k8s/resources.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
cilium_api_v2alpha1 "github.com/cilium/cilium/pkg/k8s/apis/cilium.io/v2alpha1"
"github.com/cilium/cilium/pkg/k8s/resource"
slim_corev1 "github.com/cilium/cilium/pkg/k8s/slim/k8s/api/core/v1"
slim_networkingv1 "github.com/cilium/cilium/pkg/k8s/slim/k8s/api/networking/v1"
)

var (
Expand All @@ -28,6 +29,7 @@ var (
k8s.LBIPPoolsResource,
k8s.CiliumIdentityResource,
k8s.CiliumPodIPPoolResource,
k8s.IngressClassResource,
),
)
)
Expand All @@ -41,4 +43,5 @@ type Resources struct {
LBIPPools resource.Resource[*cilium_api_v2alpha1.CiliumLoadBalancerIPPool]
Identities resource.Resource[*cilium_api_v2.CiliumIdentity]
CiliumPodIPPools resource.Resource[*cilium_api_v2alpha1.CiliumPodIPPool]
IngressClasses resource.Resource[*slim_networkingv1.IngressClass]
}
120 changes: 60 additions & 60 deletions operator/pkg/ingress/ingress.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ package ingress
import (
"context"
"fmt"
"strconv"
"strings"

"github.com/sirupsen/logrus"
Expand All @@ -26,6 +25,7 @@ import (
ciliumv2 "github.com/cilium/cilium/pkg/k8s/apis/cilium.io/v2"
k8sClient "github.com/cilium/cilium/pkg/k8s/client"
"github.com/cilium/cilium/pkg/k8s/informer"
"github.com/cilium/cilium/pkg/k8s/resource"
slim_corev1 "github.com/cilium/cilium/pkg/k8s/slim/k8s/api/core/v1"
slim_networkingv1 "github.com/cilium/cilium/pkg/k8s/slim/k8s/api/networking/v1"
"github.com/cilium/cilium/pkg/k8s/utils"
Expand Down Expand Up @@ -92,15 +92,20 @@ type Controller struct {
sharedLBServiceName string
ciliumNamespace string
defaultLoadbalancerMode string
isDefaultIngressClass bool
defaultSecretNamespace string
defaultSecretName string

defaultSecretNamespace string
defaultSecretName string

sharedLBStatus *slim_corev1.LoadBalancerStatus
}

// NewController returns a controller for ingress objects having ingressClassName as cilium
func NewController(clientset k8sClient.Clientset, options ...Option) (*Controller, error) {
func NewController(
ctx context.Context,
clientset k8sClient.Clientset,
ingressClasses resource.Resource[*slim_networkingv1.IngressClass],
options ...Option,
) (*Controller, error) {
opts := DefaultIngressOptions
for _, opt := range options {
if err := opt(&opts); err != nil {
Expand Down Expand Up @@ -157,10 +162,7 @@ func NewController(clientset k8sClient.Clientset, options ...Option) (*Controlle
nil,
)

ingressClassManager, err := newIngressClassManager(clientset, ic.queue, opts.MaxRetries)
if err != nil {
return nil, err
}
ingressClassManager := newIngressClassManager(ctx, ic.queue, ingressClasses)
ic.ingressClassManager = ingressClassManager

serviceManager, err := newServiceManager(clientset, ic.queue, opts.MaxRetries)
Expand Down Expand Up @@ -195,19 +197,28 @@ func NewController(clientset k8sClient.Clientset, options ...Option) (*Controlle
}

// Run kicks off the controlled loop
func (ic *Controller) Run() {
func (ic *Controller) Run(ctx context.Context) error {
defer ic.queue.ShutDown()

go ic.ingressClassManager.Run(ctx)

// This should only return an error if the context is canceled.
if err := ic.ingressClassManager.WaitForSync(ctx); err != nil {
return err
}

go ic.ingressInformer.Run(wait.NeverStop)
if !cache.WaitForCacheSync(wait.NeverStop, ic.ingressInformer.HasSynced) {
return
return fmt.Errorf("unable to wait for Ingress cache sync")
}

go ic.ingressClassManager.Run()
go ic.serviceManager.Run()
go ic.secretManager.Run()

for ic.processEvent() {
}

return nil
}

func (ic *Controller) processEvent() bool {
Expand Down Expand Up @@ -246,7 +257,7 @@ func hasEmptyIngressClass(ingress *slim_networkingv1.Ingress) bool {
func (ic *Controller) isCiliumIngressEntry(ingress *slim_networkingv1.Ingress) bool {
className := getIngressClassName(ingress)

if (className == nil || *className == "") && ic.isDefaultIngressClass {
if (className == nil || *className == "") && ic.ingressClassManager.IsDefault() {
return true
}

Expand Down Expand Up @@ -354,40 +365,30 @@ func (ic *Controller) handleIngressServiceUpdatedEvent(ingressServiceUpdated ing
}

func (ic *Controller) handleCiliumIngressClassUpdatedEvent(event ciliumIngressClassUpdatedEvent) error {
log.Debugf("Cilium IngressClass updated")
previousValue := ic.isDefaultIngressClass
if val, ok := event.ingressClass.GetAnnotations()[slim_networkingv1.AnnotationIsDefaultIngressClass]; ok {
isDefault, err := strconv.ParseBool(val)
if !event.changed {
return nil
}

log.WithField(CiliumIngressClassIsDefault, event.isDefault).Info(
"Cilium IngressClass default value changed, re-syncing ingresses",
)
// ensure that all ingresses are in the correct state
for _, k := range ic.ingressStore.ListKeys() {
ing, err := ic.getByKey(k)
if err != nil {
log.WithError(err).Warnf("Failed to parse annotation value for %q", slim_networkingv1.AnnotationIsDefaultIngressClass)
return err
}
ic.isDefaultIngressClass = isDefault
} else {
// if the annotation is not set we are not the default ingress class
ic.isDefaultIngressClass = false
}

if previousValue != ic.isDefaultIngressClass {
log.Debugf("Cilium IngressClass default value changed, re-syncing ingresses")
// ensure that all ingresses are in the correct state
for _, k := range ic.ingressStore.ListKeys() {
ing, err := ic.getByKey(k)
if err != nil {
if ic.isCiliumIngressEntry(ing) {
// make sure that the ingress is in the correct state
if err := ic.ensureResources(ing, false); err != nil {
return err
}

if ic.isCiliumIngressEntry(ing) {
// make sure that the ingress is in the correct state
if err := ic.ensureResources(ing, false); err != nil {
return err
}
} else if hasEmptyIngressClass(ing) && !ic.isDefaultIngressClass {
// if we are no longer the default ingress class, we need to clean up
// the resources that we created for the ingress
if err := ic.deleteResources(ing); err != nil {
return err
}
} else if hasEmptyIngressClass(ing) && !event.isDefault {
// if we are no longer the default ingress class, we need to clean up
// the resources that we created for the ingress
if err := ic.deleteResources(ing); err != nil {
return err
}
}
}
Expand All @@ -396,28 +397,27 @@ func (ic *Controller) handleCiliumIngressClassUpdatedEvent(event ciliumIngressCl
}

func (ic *Controller) handleCiliumIngressClassDeletedEvent(event ciliumIngressClassDeletedEvent) error {
log.Debug("Cilium IngressClass deleted")
if !event.wasDefault {
return nil
}

if ic.isDefaultIngressClass {
// if we were the default ingress class, we need to clean up all ingresses
for _, k := range ic.ingressStore.ListKeys() {
ing, err := ic.getByKey(k)
if err != nil {
return err
}
log.Debug("Cilium IngressClass deleted, performing cleanup")
// if we were the default ingress class, we need to clean up all ingresses
for _, k := range ic.ingressStore.ListKeys() {
ing, err := ic.getByKey(k)
if err != nil {
return err
}

if hasEmptyIngressClass(ing) {
// if we are no longer the default ingress class, we need to clean up
// the resources that we created for the ingress
if err := ic.deleteResources(ing); err != nil {
return err
}
if hasEmptyIngressClass(ing) {
// if we are no longer the default ingress class, we need to clean up
// the resources that we created for the ingress
if err := ic.deleteResources(ing); err != nil {
return err
}
}

// disable the default ingress class behavior
ic.isDefaultIngressClass = false
}

return nil
}

Expand Down Expand Up @@ -506,10 +506,10 @@ func (ic *Controller) handleEvent(event interface{}) error {
log.WithField(logfields.ServiceKey, ev.ingressService.Name).WithField(logfields.K8sNamespace, ev.ingressService.Namespace).Debug("Handling ingress service updated event")
err = ic.handleIngressServiceUpdatedEvent(ev)
case ciliumIngressClassUpdatedEvent:
log.WithField(logfields.IngressClass, ev.ingressClass.Name).Debug("Handling cilium ingress class updated event")
log.Debug("Handling cilium ingress class updated event")
err = ic.handleCiliumIngressClassUpdatedEvent(ev)
case ciliumIngressClassDeletedEvent:
log.WithField(logfields.IngressClass, ev.ingressClass.Name).Debug("Handling cilium ingress class deleted event")
log.Debug("Handling cilium ingress class deleted event")
err = ic.handleCiliumIngressClassDeletedEvent(ev)
default:
err = fmt.Errorf("received an unknown event: %t", ev)
Expand Down