Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ which includes:

- **Namespace**: `deployment-tracker`
- **ServiceAccount**: Identity for the controller pod
- **ClusterRole**: Minimal permissions (`get`, `list`, `watch` on pods; `get` on other supported objects)
- **ClusterRole**: Minimal permissions (`get`, `list`, `watch` on pods and deployments; `get` on other supported objects)
- **ClusterRoleBinding**: Binds the ServiceAccount to the ClusterRole
- **Deployment**: Runs the controller with security hardening

Expand Down Expand Up @@ -140,6 +140,8 @@ The controller requires the following minimum permissions:
| API Group | Resource | Verbs |
|-----------|----------|-------|
| `""` (core) | `pods` | `get`, `list`, `watch` |
| `apps` | `deployments` | `get`, `list`, `watch` |
| `apps` | `replicasets` | `get` |

If you only need to monitor a single namespace, you can modify the manifest to use a `Role` and `RoleBinding` instead of `ClusterRole` and `ClusterRoleBinding` for more restricted permissions.

Expand Down
2 changes: 2 additions & 0 deletions deploy/charts/deployment-tracker/templates/clusterrole.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ rules:
- deployments
verbs:
- get
- list
- watch
- apiGroups:
- apps
resources:
Expand Down
2 changes: 1 addition & 1 deletion deploy/manifest.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ rules:
verbs: ["get", "list", "watch"]
- apiGroups: ["apps"]
resources: ["deployments"]
verbs: ["get"]
verbs: ["get", "list", "watch"]
- apiGroups: ["apps"]
resources: ["replicasets"]
verbs: ["get"]
Expand Down
30 changes: 18 additions & 12 deletions internal/controller/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/informers"
"k8s.io/client-go/kubernetes"
appslisters "k8s.io/client-go/listers/apps/v1"
"k8s.io/client-go/tools/cache"
"k8s.io/client-go/util/workqueue"
)
Expand Down Expand Up @@ -64,6 +65,8 @@ type Controller struct {
clientset kubernetes.Interface
metadataAggregator podMetadataAggregator
podInformer cache.SharedIndexInformer
deploymentInformer cache.SharedIndexInformer
deploymentLister appslisters.DeploymentLister
workqueue workqueue.TypedRateLimitingInterface[PodEvent]
apiClient deploymentRecordPoster
cfg *Config
Expand All @@ -82,6 +85,8 @@ func New(clientset kubernetes.Interface, metadataAggregator podMetadataAggregato
factory := createInformerFactory(clientset, namespace, excludeNamespaces)

podInformer := factory.Core().V1().Pods().Informer()
deploymentInformer := factory.Apps().V1().Deployments().Informer()
deploymentLister := factory.Apps().V1().Deployments().Lister()

// Create work queue with rate limiting
queue := workqueue.NewTypedRateLimitingQueue(
Expand Down Expand Up @@ -117,6 +122,8 @@ func New(clientset kubernetes.Interface, metadataAggregator podMetadataAggregato
clientset: clientset,
metadataAggregator: metadataAggregator,
podInformer: podInformer,
deploymentInformer: deploymentInformer,
deploymentLister: deploymentLister,
workqueue: queue,
apiClient: apiClient,
cfg: cfg,
Expand Down Expand Up @@ -237,14 +244,15 @@ func (c *Controller) Run(ctx context.Context, workers int) error {
defer runtime.HandleCrash()
defer c.workqueue.ShutDown()

slog.Info("Starting pod informer")
slog.Info("Starting informers")

// Start the informer
// Start the informers
go c.podInformer.Run(ctx.Done())
go c.deploymentInformer.Run(ctx.Done())

// Wait for the cache to be synced
slog.Info("Waiting for informer cache to sync")
if !cache.WaitForCacheSync(ctx.Done(), c.podInformer.HasSynced) {
// Wait for the caches to be synced
slog.Info("Waiting for informer caches to sync")
if !cache.WaitForCacheSync(ctx.Done(), c.podInformer.HasSynced, c.deploymentInformer.HasSynced) {
return errors.New("timed out waiting for caches to sync")
}

Expand Down Expand Up @@ -327,7 +335,7 @@ func (c *Controller) processEvent(ctx context.Context, event PodEvent) error {
// the referenced image digest to the newly observed (via
// the create event).
deploymentName := getDeploymentName(pod)
if deploymentName != "" && c.deploymentExists(ctx, pod.Namespace, deploymentName) {
if deploymentName != "" && c.deploymentExists(pod.Namespace, deploymentName) {
slog.Debug("Deployment still exists, skipping pod delete (scale down)",
"namespace", pod.Namespace,
"deployment", deploymentName,
Expand Down Expand Up @@ -390,16 +398,14 @@ func (c *Controller) processEvent(ctx context.Context, event PodEvent) error {
return lastErr
}

// deploymentExists checks if a deployment exists in the cluster.
func (c *Controller) deploymentExists(ctx context.Context, namespace, name string) bool {
_, err := c.clientset.AppsV1().Deployments(namespace).Get(ctx, name, metav1.GetOptions{})
// deploymentExists checks if a deployment exists in the local informer cache.
func (c *Controller) deploymentExists(namespace, name string) bool {
_, err := c.deploymentLister.Deployments(namespace).Get(name)
if err != nil {
if k8serrors.IsNotFound(err) {
return false
}
// On error, assume it exists to be safe
// (avoid false decommissions)
slog.Warn("Failed to check if deployment exists, assuming it does",
slog.Warn("Failed to check if deployment exists in cache, assuming it does",
"namespace", namespace,
"deployment", name,
"error", err,
Expand Down
2 changes: 1 addition & 1 deletion internal/controller/controller_integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ func setup(t *testing.T, onlyNamespace string, excludeNamespaces string) (*kuber
go func() {
_ = ctrl.Run(ctx, 1)
}()
if !cache.WaitForCacheSync(ctx.Done(), ctrl.podInformer.HasSynced) {
if !cache.WaitForCacheSync(ctx.Done(), ctrl.podInformer.HasSynced, ctrl.deploymentInformer.HasSynced) {
t.Fatal("timed out waiting for informer cache to sync")
}

Expand Down
Loading