Skip to content

Commit fa99ce8

Browse files
fix(k8s): error when re-deploying failed StatefulSet or DaemonSet (#7798)
Prior to this, outdated pods (from older workload versions) would factor into the deplyoment status checks. Co-authored-by: Jon Edvald <edvald@gmail.com>
1 parent f5b042c commit fa99ce8

File tree

2 files changed

+100
-16
lines changed

2 files changed

+100
-16
lines changed

core/src/plugins/kubernetes/status/status.ts

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ import type {
3333
V1ReplicationController,
3434
V1Service,
3535
} from "@kubernetes/client-node"
36-
import { getPods, getResourceKey, hashManifest } from "../util.js"
36+
import { getPodsBySelector, getResourceKey, hashManifest } from "../util.js"
3737
import { checkWorkloadStatus } from "./workload.js"
3838
import { checkWorkloadPodStatus } from "./pod.js"
3939
import { deline, stableStringify } from "../../../util/string.js"
@@ -106,12 +106,16 @@ const objHandlers: { [kind: string]: StatusHandler } = {
106106
ReplicaSet: async ({ api, namespace, resource }: StatusHandlerParams<V1ReplicaSet>) => {
107107
return checkWorkloadPodStatus(
108108
resource,
109-
await getPods(api, namespace, (<KubernetesServerResource<V1ReplicaSet>>resource).spec.selector!.matchLabels!)
109+
await getPodsBySelector(
110+
api,
111+
namespace,
112+
(<KubernetesServerResource<V1ReplicaSet>>resource).spec.selector!.matchLabels!
113+
)
110114
)
111115
},
112116

113117
ReplicationController: async ({ api, namespace, resource }: StatusHandlerParams<V1ReplicationController>) => {
114-
return checkWorkloadPodStatus(resource, await getPods(api, namespace, resource.spec!.selector!))
118+
return checkWorkloadPodStatus(resource, await getPodsBySelector(api, namespace, resource.spec!.selector!))
115119
},
116120

117121
Service: async ({ resource }: StatusHandlerParams<V1Service>) => {

core/src/plugins/kubernetes/util.ts

Lines changed: 93 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,8 @@ export async function getCurrentWorkloadPods(
168168
* Retrieve a list of pods based on the given resource/manifest. If passed a Pod manifest, it's read from the
169169
* remote namespace and returned directly.
170170
*
171-
* In the case of Deployments, only the pods belonging the latest ReplicaSet are returned.
171+
* For Deployments, only the pods belonging to the current ReplicaSet are returned.
172+
* For StatefulSets and DaemonSets, pods are filtered by ownerReferences.
172173
*/
173174
export async function getWorkloadPods({
174175
api,
@@ -190,26 +191,103 @@ export async function getWorkloadPods({
190191

191192
// We don't match on the garden.io/version label because it can fall out of sync
192193
const selector = omit(getSelectorFromResource(resource), gardenAnnotationKey("version"))
193-
const pods = await getPods(api, resource.metadata?.namespace || namespace, selector)
194+
const pods = await getPodsBySelector(api, resource.metadata?.namespace || namespace, selector)
194195

195196
if (resource.kind === "Deployment") {
196-
// Make sure we only return the pods from the current ReplicaSet
197+
// For Deployments: find the current ReplicaSet and filter pods by pod-template-hash
197198
const selectorString = labelSelectorToString(selector)
198199
const replicaSetRes = await api.apps.listNamespacedReplicaSet({
199200
namespace: resource.metadata?.namespace || namespace,
200201
labelSelector: selectorString,
201202
})
202203

203-
const replicaSets = replicaSetRes.items.filter((r) => (r.spec.replicas || 0) > 0)
204+
if (replicaSetRes.items.length === 0) {
205+
return []
206+
}
207+
208+
// Get the deployment UID, fetching from server if needed
209+
let deploymentUid = resource.metadata?.uid
210+
if (!deploymentUid) {
211+
// Resource is a local manifest without UID - fetch from cluster
212+
try {
213+
const serverResource = await api.apps.readNamespacedDeployment({
214+
name: resource.metadata.name,
215+
namespace: resource.metadata?.namespace || namespace,
216+
})
217+
deploymentUid = serverResource.metadata?.uid
218+
} catch (err) {
219+
if (err instanceof KubernetesError && err.responseStatusCode === 404) {
220+
// Deployment doesn't exist in cluster yet
221+
return []
222+
}
223+
throw err
224+
}
225+
}
226+
227+
// Find the current ReplicaSet by checking owner references and revision
228+
const currentReplicaSets = replicaSetRes.items.filter((rs) => {
229+
// Check if this ReplicaSet is owned by this Deployment
230+
const ownedByDeployment = rs.metadata.ownerReferences?.some(
231+
(ref) => ref.kind === "Deployment" && ref.uid === deploymentUid
232+
)
233+
return ownedByDeployment && (rs.status?.replicas || 0) > 0
234+
})
204235

205-
if (replicaSets.length === 0) {
236+
if (currentReplicaSets.length === 0) {
206237
return []
207238
}
208239

209-
const sorted = sortBy(replicaSets, (r) => r.metadata.creationTimestamp!)
210-
const currentReplicaSet = sorted[replicaSets.length - 1]
211-
return pods.filter((p) => p.metadata.name.startsWith(currentReplicaSet.metadata.name))
240+
// Sort by revision annotation to get the latest
241+
const sorted = sortBy(currentReplicaSets, (rs) => {
242+
const revision = rs.metadata.annotations?.["deployment.kubernetes.io/revision"]
243+
return revision ? parseInt(revision, 10) : 0
244+
})
245+
const currentReplicaSet = sorted[sorted.length - 1]
246+
247+
// Filter pods by pod-template-hash label (canonical way)
248+
const podTemplateHash = currentReplicaSet.metadata.labels?.["pod-template-hash"]
249+
if (podTemplateHash) {
250+
return pods.filter((p) => p.metadata.labels?.["pod-template-hash"] === podTemplateHash)
251+
} else {
252+
// Fallback to owner reference check
253+
const rsUid = currentReplicaSet.metadata.uid
254+
return pods.filter((p) =>
255+
p.metadata.ownerReferences?.some((ref) => ref.kind === "ReplicaSet" && ref.uid === rsUid)
256+
)
257+
}
258+
} else if (resource.kind === "StatefulSet" || resource.kind === "DaemonSet") {
259+
// For StatefulSets and DaemonSets: pods are owned directly by the workload
260+
// Get the resource UID, fetching from server if needed
261+
let resourceUid = resource.metadata?.uid
262+
if (!resourceUid) {
263+
// Resource is a local manifest without UID - fetch from cluster
264+
try {
265+
const serverResource =
266+
resource.kind === "StatefulSet"
267+
? await api.apps.readNamespacedStatefulSet({
268+
name: resource.metadata.name,
269+
namespace: resource.metadata?.namespace || namespace,
270+
})
271+
: await api.apps.readNamespacedDaemonSet({
272+
name: resource.metadata.name,
273+
namespace: resource.metadata?.namespace || namespace,
274+
})
275+
resourceUid = serverResource.metadata?.uid
276+
} catch (err) {
277+
if (err instanceof KubernetesError && err.responseStatusCode === 404) {
278+
// Workload doesn't exist in cluster yet
279+
return []
280+
}
281+
throw err
282+
}
283+
}
284+
285+
// Filter by ownerReferences to ensure we only get pods from this specific resource
286+
return pods.filter((p) =>
287+
p.metadata.ownerReferences?.some((ref) => ref.kind === resource.kind && ref.uid === resourceUid)
288+
)
212289
} else {
290+
// For other workload types (e.g., ReplicaSet), use label selector
213291
return pods
214292
}
215293
}
@@ -223,7 +301,7 @@ export function labelSelectorToString(selector: { [key: string]: string }) {
223301
/**
224302
* Retrieve a list of pods based on the provided label selector.
225303
*/
226-
export async function getPods(
304+
export async function getPodsBySelector(
227305
api: KubeApi,
228306
namespace: string,
229307
selector: { [key: string]: string }
@@ -240,8 +318,8 @@ export async function getPods(
240318
/**
241319
* Retrieve a list of *ready* pods based on the provided label selector.
242320
*/
243-
export async function getReadyPods(api: KubeApi, namespace: string, selector: { [key: string]: string }) {
244-
const pods = await getPods(api, namespace, selector)
321+
export async function getReadyPodsBySelector(api: KubeApi, namespace: string, selector: { [key: string]: string }) {
322+
const pods = await getPodsBySelector(api, namespace, selector)
245323
return pods.filter((pod) => checkPodStatus(pod) === "ready")
246324
}
247325

@@ -593,7 +671,7 @@ export async function getTargetResource({
593671
})
594672

595673
if (query.podSelector && !isEmpty(query.podSelector)) {
596-
const pods = await getReadyPods(api, namespace, query.podSelector)
674+
const pods = await getReadyPodsBySelector(api, namespace, query.podSelector)
597675
const pod = sample(pods)
598676
if (!pod) {
599677
const selectorStr = getSelectorString(query.podSelector)
@@ -804,6 +882,8 @@ export function renderWorkloadEvents(events: CoreV1Event[], workloadKind: string
804882
text += `${styles.accent(`━━━ Latest events from ${workloadKind} ${workloadName} ━━━`)}\n`
805883
for (const event of events) {
806884
const obj = event.involvedObject
885+
const timestamp = event.lastTimestamp || event.firstTimestamp || event.eventTime
886+
const timeStr = timestamp ? new Date(timestamp).toISOString() : "<no timestamp>"
807887
const name = styles.highlight(`${obj.kind} ${obj.name}:`)
808888
const msg = `${event.reason} - ${event.message}`
809889
const colored =
@@ -812,7 +892,7 @@ export function renderWorkloadEvents(events: CoreV1Event[], workloadKind: string
812892
: event.type === "Warning"
813893
? styles.warning(msg)
814894
: styles.highlight(msg)
815-
text += `${name} ${colored}\n`
895+
text += `${styles.primary(timeStr)} ${name} ${colored}\n`
816896
}
817897

818898
if (events.length === 0) {

0 commit comments

Comments
 (0)