Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CNI: keep cni config on shutdown; taint instead, queue deletions #23486

Merged
merged 5 commits into from
Mar 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions Documentation/cmdref/cilium-operator-alibabacloud.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Documentation/cmdref/cilium-operator-aws.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Documentation/cmdref/cilium-operator-azure.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Documentation/cmdref/cilium-operator-generic.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Documentation/cmdref/cilium-operator.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 5 additions & 1 deletion Documentation/helm-values.rst

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions Documentation/installation/taints.rst
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,9 @@ node. The mechanism works as follows:
3. From this point on, pods will start being scheduled and running on the node,
having their networking managed by Cilium.

4. If Cilium is temporarily removed from the node, the Operator will re-apply
the taint (but only with NoSchedule).

By default, the taint key is ``node.cilium.io/agent-not-ready``, but in some
scenarios (such as when Cluster Autoscaler is being used but its flags cannot be
configured) this key may need to be tweaked. This can be done using the
Expand Down
1 change: 1 addition & 0 deletions Documentation/spelling_wordlist.txt
Original file line number Diff line number Diff line change
Expand Up @@ -925,6 +925,7 @@ serviceMonitor
servicePort
sessionAffinity
setNodeNetworkStatus
setNodeTaints
setrlimit
setupinterface
setupiptables
Expand Down
7 changes: 7 additions & 0 deletions daemon/cmd/daemon_main.go
Original file line number Diff line number Diff line change
Expand Up @@ -1820,6 +1820,12 @@ func runDaemon(d *Daemon, restoredEndpoints *endpointRestoreState, cleaner *daem
}
}

// Process queued deletion requests
bootstrapStats.deleteQueue.Start()
unlockDeleteDir := d.processQueuedDeletes()
bootstrapStats.deleteQueue.End(true)

// Start up the local api socket
bootstrapStats.initAPI.Start()
srv := server.NewServer(d.instantiateAPI())
srv.EnabledListeners = []string{"unix"}
Expand All @@ -1829,6 +1835,7 @@ func runDaemon(d *Daemon, restoredEndpoints *endpointRestoreState, cleaner *daem
cleaner.cleanupFuncs.Add(func() { srv.Shutdown() })

srv.ConfigureAPI()
unlockDeleteDir() // can't unlock this until the api socket is written
bootstrapStats.initAPI.End(true)

err := d.SendNotification(monitorAPI.StartMessage(time.Now()))
Expand Down
86 changes: 86 additions & 0 deletions daemon/cmd/deletion_queue.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright Authors of Cilium

package cmd

import (
"context"
"os"
"path/filepath"
"time"

"github.com/cilium/cilium/pkg/defaults"
"github.com/cilium/cilium/pkg/lock/lockfile"
"github.com/cilium/cilium/pkg/logging/logfields"
)

// processQueuedDeletes is the agent-side of the identity deletion queue.
// The CNI plugin queues deletions when the agent is down, because
// containerd / crio expect CNI DEL to always succeed. It does so
// by writing a file to /run/cilium/deleteQueue with the endpoint ID
// to delete.
//
// On startup, we will grab a lockfile in this directory, then process
// all deletions. Then, we start up the agent server, then drop the lock.
// Any CNI processes waiting in that period of time will, after getting
// the lock.
//
// Returns a done function that drops the lock. It should be called
// after the server is running.
func (d *Daemon) processQueuedDeletes() func() {
if err := os.MkdirAll(defaults.DeleteQueueDir, 0755); err != nil {
log.WithError(err).WithField(logfields.Path, defaults.DeleteQueueDir).Error("Failed to ensure CNI deletion queue directory exists")
return func() {}
}

log.Infof("Processing queued endpoint deletion requests from %s", defaults.DeleteQueueDir)

var lf *lockfile.Lockfile
locked := false

unlock := func() {
if lf != nil && locked {
lf.Unlock()
}
if lf != nil {
lf.Close()
}
}

lf, err := lockfile.NewLockfile(defaults.DeleteQueueLockfile)
if err != nil {
log.WithError(err).WithField(logfields.Path, defaults.DeleteQueueLockfile).Warn("Failed to lock queued deletion directory, proceeding anyways. This may cause CNI deletions to be missed.")
} else {
ctx, cancel := context.WithTimeout(d.ctx, 10*time.Second)
defer cancel()
err = lf.Lock(ctx, true)
if err != nil {
log.WithError(err).WithField(logfields.Path, defaults.DeleteQueueLockfile).Warn("Failed to lock queued deletion directory, proceeding anyways. This may cause CNI deletions to be missed.")
} else {
locked = true
}
}

// OK, we have the lock; process the deletes
files, err := filepath.Glob(defaults.DeleteQueueDir + "/*.delete")
if err != nil {
log.WithError(err).WithField(logfields.Path, defaults.DeleteQueueDir).Error("Failed to list queued CNI deletion requests. CNI deletions may be missed.")
}

log.Infof("processing %d queued deletion requests", len(files))
for _, file := range files {
// get the container id
epID, err := os.ReadFile(file)
if err != nil {
log.WithError(err).WithField(logfields.Path, file).Error("Failed to read queued CNI deletion entry. Endpoint will not be deleted.")
} else {
_, _ = d.DeleteEndpoint(string(epID)) // this will log errors elsewhere
}

if err := os.Remove(file); err != nil {
log.WithError(err).WithField(logfields.Path, file).Error("Failed to remve queued CNI deletion entry, but deletion was successful.")
}
}

return unlock
}
2 changes: 2 additions & 0 deletions daemon/cmd/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ type bootstrapStatistics struct {
fqdn spanstat.SpanStat
enableConntrack spanstat.SpanStat
kvstore spanstat.SpanStat
deleteQueue spanstat.SpanStat
}

func (b *bootstrapStatistics) updateMetrics() {
Expand Down Expand Up @@ -105,5 +106,6 @@ func (b *bootstrapStatistics) getMap() map[string]*spanstat.SpanStat {
"fqdn": &b.fqdn,
"enableConntrack": &b.enableConntrack,
"kvstore": &b.kvstore,
"deleteQueue": &b.deleteQueue,
}
}
3 changes: 2 additions & 1 deletion install/kubernetes/cilium/README.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 9 additions & 0 deletions install/kubernetes/cilium/templates/cilium-configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -947,9 +947,18 @@ data:
annotate-k8s-node: "true"
{{- end }}

{{- if and .Values.operator.setNodeTaints (not .Values.operator.removeNodeTaints) -}}
{{ fail "Cannot have operator.setNodeTaintsMaxNodes and not operator.removeNodeTaints = false" }}
{{- end -}}
{{- if .Values.operator.removeNodeTaints }}
remove-cilium-node-taints: "true"
{{- end }}
{{- /* set node taints if setNodeTaints is explicitly enabled or removeNodeTaints is set */ -}}
{{- if or .Values.operator.setNodeTaints
( and (kindIs "invalid" .Values.operator.setNodeTaints)
.Values.operator.removeNodeTaints ) }}
set-cilium-node-taints: "true"
{{- end }}
{{- if .Values.operator.setNodeNetworkStatus }}
set-cilium-is-up-condition: "true"
{{- end }}
Expand Down
7 changes: 6 additions & 1 deletion install/kubernetes/cilium/values.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 6 additions & 1 deletion install/kubernetes/cilium/values.yaml.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -469,7 +469,7 @@ cni:
# if you're removing Cilium from the cluster. Disable this to prevent the CNI
# configuration file from being removed during agent upgrade, which can cause
# nodes to go unmanageable.
uninstall: true
uninstall: false

# -- Configure chaining on top of other CNI plugins. Possible values:
# - none
Expand Down Expand Up @@ -2082,6 +2082,11 @@ operator:
# pod running.
removeNodeTaints: true

# -- Taint nodes where Cilium is scheduled but not running. This prevents pods
# from being scheduled to nodes where Cilium is not the default CNI provider.
# @default -- same as removeNodeTaints
setNodeTaints: ~

# -- Set Node condition NetworkUnavailable to 'false' with the reason
# 'CiliumIsUp' for nodes that have a healthy Cilium pod.
setNodeNetworkStatus: true
Expand Down
3 changes: 3 additions & 0 deletions operator/cmd/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,9 @@ func init() {
flags.Bool(operatorOption.RemoveCiliumNodeTaints, true, fmt.Sprintf("Remove node taint %q from Kubernetes nodes once Cilium is up and running", option.Config.AgentNotReadyNodeTaintValue()))
option.BindEnv(Vp, operatorOption.RemoveCiliumNodeTaints)

flags.Bool(operatorOption.SetCiliumNodeTaints, false, fmt.Sprintf("Set node taint %q from Kubernetes nodes if Cilium is scheduled but not up and running", option.Config.AgentNotReadyNodeTaintValue()))
option.BindEnv(Vp, operatorOption.SetCiliumNodeTaints)

flags.Bool(operatorOption.SetCiliumIsUpCondition, true, "Set CiliumIsUp Node condition to mark a Kubernetes Node that a Cilium pod is up and running in that node")
option.BindEnv(Vp, operatorOption.SetCiliumIsUpCondition)

Expand Down
3 changes: 2 additions & 1 deletion operator/cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -582,8 +582,9 @@ func (legacy *legacyOnLeader) onStart(_ hive.HookContext) error {
logfields.K8sNamespace: operatorOption.Config.CiliumK8sNamespace,
"label-selector": operatorOption.Config.CiliumPodLabels,
"remove-cilium-node-taints": operatorOption.Config.RemoveCiliumNodeTaints,
"set-cilium-node-taints": operatorOption.Config.SetCiliumNodeTaints,
"set-cilium-is-up-condition": operatorOption.Config.SetCiliumIsUpCondition,
}).Info("Removing Cilium Node Taints or Setting Cilium Is Up Condition for Kubernetes Nodes")
}).Info("Managing Cilium Node Taints or Setting Cilium Is Up Condition for Kubernetes Nodes")

operatorWatchers.HandleNodeTolerationAndTaints(&legacy.wg, legacy.clientset, legacy.ctx.Done())
}
Expand Down
9 changes: 9 additions & 0 deletions operator/option/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,10 @@ const (
// should be removed in Kubernetes nodes.
RemoveCiliumNodeTaints = "remove-cilium-node-taints"

// SetCiliumNodeTaints is whether or not to taint nodes that do not have
// a running Cilium instance.
SetCiliumNodeTaints = "set-cilium-node-taints"

// SetCiliumIsUpCondition sets the CiliumIsUp node condition in Kubernetes
// nodes.
SetCiliumIsUpCondition = "set-cilium-is-up-condition"
Expand Down Expand Up @@ -537,6 +541,10 @@ type OperatorConfig struct {
// should be removed in Kubernetes nodes.
RemoveCiliumNodeTaints bool

// SetCiliumNodeTaints is whether or not to set taints on nodes that do not
// have a running Cilium pod.
SetCiliumNodeTaints bool

// SetCiliumIsUpCondition sets the CiliumIsUp node condition in Kubernetes
// nodes.
SetCiliumIsUpCondition bool
Expand Down Expand Up @@ -598,6 +606,7 @@ func (c *OperatorConfig) Populate(vp *viper.Viper) {
c.EnableGatewayAPISecretsSync = vp.GetBool(EnableGatewayAPISecretsSync)
c.CiliumPodLabels = vp.GetString(CiliumPodLabels)
c.RemoveCiliumNodeTaints = vp.GetBool(RemoveCiliumNodeTaints)
c.SetCiliumNodeTaints = vp.GetBool(SetCiliumNodeTaints)
c.SetCiliumIsUpCondition = vp.GetBool(SetCiliumIsUpCondition)
c.IngressLBAnnotationPrefixes = vp.GetStringSlice(IngressLBAnnotationPrefixes)
c.IngressSharedLBServiceName = vp.GetString(IngressSharedLBServiceName)
Expand Down