Skip to content

Commit

Permalink
feat: deleted the pods that are not unmanaged by Cilium
Browse files Browse the repository at this point in the history
Set operator to remove the label of a pod that existed before the node taint

1. Delete the specified label pod according to the parameter --pod-restart-selector, default value is k8s-app=kube-dns
2. --pod-restart-selector="" Remove all pods

Fixes: #21594
Signed-off-by: tigerK <yanru.lv@daocloud.io>
  • Loading branch information
lvyanru8200 authored and aanm committed Jan 10, 2023
1 parent baf7f34 commit 5af4a0e
Show file tree
Hide file tree
Showing 10 changed files with 41 additions and 17 deletions.
1 change: 1 addition & 0 deletions Documentation/cmdref/cilium-operator-alibabacloud.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Documentation/cmdref/cilium-operator-aws.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Documentation/cmdref/cilium-operator-azure.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Documentation/cmdref/cilium-operator-generic.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Documentation/cmdref/cilium-operator.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions operator/cmd/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,9 @@ func init() {
flags.Bool(operatorOption.EnableK8s, true, `Enable operation of Kubernetes-related services/controllers when using Cilium with Kubernetes`)
option.BindEnv(Vp, operatorOption.EnableK8s)

flags.String(operatorOption.PodRestartSelector, "k8s-app=kube-dns", "cilium-operator will delete/restart any pods with these labels if the pod is not managed by Cilium. If this option is empty, then all pods may be restarted")
option.BindEnv(Vp, operatorOption.PodRestartSelector)

flags.Duration(option.KVstoreLeaseTTL, defaults.KVstoreLeaseTTL, "Time-to-live for the KVstore lease.")
flags.MarkHidden(option.KVstoreLeaseTTL)
option.BindEnv(Vp, option.KVstoreLeaseTTL)
Expand Down
22 changes: 11 additions & 11 deletions operator/cmd/k8s_pod_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,18 +21,18 @@ import (
)

const (
minimalPodRestartInterval = 5 * time.Minute
unmanagedKubeDnsMinimalAge = 30 * time.Second
minimalPodRestartInterval = 5 * time.Minute
unmanagedPodMinimalAge = 30 * time.Second
)

var (
lastPodRestart = map[string]time.Time{}
)

func enableUnmanagedKubeDNSController(ctx context.Context, wg *sync.WaitGroup, clientset k8sClient.Clientset) {
func enableUnmanagedController(ctx context.Context, wg *sync.WaitGroup, clientset k8sClient.Clientset) {
// These functions will block until the resources are synced with k8s.
watchers.CiliumEndpointsInit(ctx, wg, clientset)
watchers.UnmanagedKubeDNSPodsInit(ctx, wg, clientset)
watchers.UnmanagedPodsInit(ctx, wg, clientset)

mgr := controller.NewManager()

Expand All @@ -43,7 +43,7 @@ func enableUnmanagedKubeDNSController(ctx context.Context, wg *sync.WaitGroup, c
mgr.RemoveAllAndWait()
}()

mgr.UpdateController("restart-unmanaged-kube-dns",
mgr.UpdateController("restart-unmanaged-pods",
controller.ControllerParams{
RunInterval: time.Duration(operatorOption.Config.UnmanagedPodWatcherInterval) * time.Second,
DoFunc: func(ctx context.Context) error {
Expand All @@ -52,7 +52,7 @@ func enableUnmanagedKubeDNSController(ctx context.Context, wg *sync.WaitGroup, c
delete(lastPodRestart, podName)
}
}
for _, podItem := range watchers.UnmanagedKubeDNSPodStore.List() {
for _, podItem := range watchers.UnmanagedPodStore.List() {
pod, ok := podItem.(*slim_corev1.Pod)
if !ok {
log.Errorf("unexpected type mapping: found %T, expected %T", pod, &slim_corev1.Pod{})
Expand All @@ -72,20 +72,20 @@ func enableUnmanagedKubeDNSController(ctx context.Context, wg *sync.WaitGroup, c
log.WithFields(logrus.Fields{
logfields.K8sPodName: podID,
logfields.Identity: cep.Status.ID,
}).Debug("Found kube-dns pod")
}).Debug("Found Unmanaged pod")
} else {
log.WithField(logfields.K8sPodName, podID).Debugf("Found unmanaged kube-dns pod")
log.WithField(logfields.K8sPodName, podID).Debugf("Found unmanaged pod")
if startTime := pod.Status.StartTime; startTime != nil {
if age := time.Since((*startTime).Time); age > unmanagedKubeDnsMinimalAge {
if age := time.Since((*startTime).Time); age > unmanagedPodMinimalAge {
if lastRestart, ok := lastPodRestart[podID]; ok {
if timeSinceRestart := time.Since(lastRestart); timeSinceRestart < minimalPodRestartInterval {
log.WithField(logfields.K8sPodName, podID).
Debugf("Not restaring kube-dns pod, only %s since last restart", timeSinceRestart)
Debugf("Not restaring unmanaged pod, only %s since last restart", timeSinceRestart)
continue
}
}

log.WithField(logfields.K8sPodName, podID).Infof("Restarting unmanaged kube-dns pod, started %s ago", age)
log.WithField(logfields.K8sPodName, podID).Infof("Restarting unmanaged pod, started %s ago", age)
if err := clientset.CoreV1().Pods(pod.Namespace).Delete(ctx, pod.Name, metav1.DeleteOptions{}); err != nil {
log.WithError(err).WithField(logfields.K8sPodName, podID).Warning("Unable to restart pod")
} else {
Expand Down
2 changes: 1 addition & 1 deletion operator/cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -443,7 +443,7 @@ func (legacy *legacyOnLeader) onStart(_ hive.HookContext) error {
legacy.wg.Add(1)
go func() {
defer legacy.wg.Done()
enableUnmanagedKubeDNSController(legacy.ctx, &legacy.wg, legacy.clientset)
enableUnmanagedController(legacy.ctx, &legacy.wg, legacy.clientset)
}()
}

Expand Down
8 changes: 8 additions & 0 deletions operator/option/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,10 @@ const (
// Intended for operating cilium with CNI-compatible orchestrators
// other than Kubernetes. (default is true)
EnableK8s = "enable-k8s"

// PodRestartSelector specify the labels contained in the pod that needs to be restarted before the node can be de-stained
// default values: k8s-app=kube-dns
PodRestartSelector = "pod-restart-selector"
)

// OperatorConfig is the configuration used by the operator.
Expand Down Expand Up @@ -588,6 +592,9 @@ type OperatorConfig struct {
// Intended for operating cilium with CNI-compatible orquestrators
// othern than Kubernetes. (default is true)
EnableK8s bool

// PodRestartSelector specify the labels contained in the pod that needs to be restarted before the node can be de-stained
PodRestartSelector string
}

// Populate sets all options with the values from viper.
Expand Down Expand Up @@ -638,6 +645,7 @@ func (c *OperatorConfig) Populate(vp *viper.Viper) {
c.IngressSharedLBServiceName = vp.GetString(IngressSharedLBServiceName)
c.IngressDefaultLoadbalancerMode = vp.GetString(IngressDefaultLoadbalancerMode)
c.EnableK8s = vp.GetBool(EnableK8s)
c.PodRestartSelector = vp.GetString(PodRestartSelector)

c.CiliumK8sNamespace = vp.GetString(CiliumK8sNamespace)

Expand Down
18 changes: 13 additions & 5 deletions operator/watchers/pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"k8s.io/apimachinery/pkg/fields"
"k8s.io/client-go/tools/cache"

operatorOption "github.com/cilium/cilium/operator/option"
k8sClient "github.com/cilium/cilium/pkg/k8s/client"
"github.com/cilium/cilium/pkg/k8s/informer"
slim_corev1 "github.com/cilium/cilium/pkg/k8s/slim/k8s/api/core/v1"
Expand All @@ -26,11 +27,18 @@ var (
// operations in k8s as some of its fields are not populated.
PodStore cache.Store

// UnmanagedKubeDNSPodStore has a minimal copy of the unmanaged kube-dns pods running
// PodStoreSynced is closed once the PodStore is synced with k8s.
PodStoreSynced = make(chan struct{})

// UnmanagedPodStore has a minimal copy of the unmanaged pods running
// in the cluster.
// Warning: The pods stored in the cache are not intended to be used for Update
// operations in k8s as some of its fields are not populated.
UnmanagedKubeDNSPodStore cache.Store
UnmanagedPodStore cache.Store

// UnmanagedPodStoreSynced is closed once the UnmanagedKubeDNSPodStore is synced
// with k8s.
UnmanagedPodStoreSynced = make(chan struct{})
)

// podNodeNameIndexFunc indexes pods by node name
Expand Down Expand Up @@ -118,14 +126,14 @@ func convertToPod(obj interface{}) interface{} {
}
}

func UnmanagedKubeDNSPodsInit(ctx context.Context, wg *sync.WaitGroup, clientset k8sClient.Clientset) {
func UnmanagedPodsInit(ctx context.Context, wg *sync.WaitGroup, clientset k8sClient.Clientset) {
var unmanagedPodInformer cache.Controller
UnmanagedKubeDNSPodStore, unmanagedPodInformer = informer.NewInformer(
UnmanagedPodStore, unmanagedPodInformer = informer.NewInformer(
k8sUtils.ListerWatcherWithModifier(
k8sUtils.ListerWatcherFromTyped[*slim_corev1.PodList](clientset.Slim().CoreV1().Pods("")),
func(options *metav1.ListOptions) {
options.LabelSelector = "k8s-app=kube-dns"
options.FieldSelector = "status.phase=Running"
options.LabelSelector = operatorOption.Config.PodRestartSelector
}),
&slim_corev1.Pod{},
0,
Expand Down

0 comments on commit 5af4a0e

Please sign in to comment.