diff --git a/pkg/operator/k8s/pod.go b/pkg/operator/k8s/pod.go index cdf501deeb..42ef326967 100644 --- a/pkg/operator/k8s/pod.go +++ b/pkg/operator/k8s/pod.go @@ -105,6 +105,19 @@ func GetPodStatus(pod *corev1.Pod) string { case corev1.PodSucceeded: return PodStatusSucceeded case corev1.PodFailed: + for _, containerStatus := range pod.Status.ContainerStatuses { + if containerStatus.LastTerminationState.Terminated != nil { + exitCode := containerStatus.LastTerminationState.Terminated.ExitCode + if killStatuses[exitCode] { + return PodStatusKilled + } + } else if containerStatus.State.Terminated != nil { + exitCode := containerStatus.State.Terminated.ExitCode + if killStatuses[exitCode] { + return PodStatusKilled + } + } + } return PodStatusFailed case corev1.PodRunning: if pod.ObjectMeta.DeletionTimestamp != nil { diff --git a/pkg/operator/workloads/workload_spec.go b/pkg/operator/workloads/workload_spec.go index be25e07db7..ac1c717ef9 100644 --- a/pkg/operator/workloads/workload_spec.go +++ b/pkg/operator/workloads/workload_spec.go @@ -29,6 +29,7 @@ import ( "github.com/cortexlabs/cortex/pkg/lib/sets/strset" "github.com/cortexlabs/cortex/pkg/operator/aws" ocontext "github.com/cortexlabs/cortex/pkg/operator/context" + "github.com/cortexlabs/cortex/pkg/operator/k8s" ) type WorkloadSpec struct { @@ -148,7 +149,12 @@ func UpdateDataWorkflowErrors(failedPods []corev1.Pod) error { if savedStatus.Start == nil { savedStatus.Start = nowTime } + savedStatus.ExitCode = resource.ExitCodeDataFailed + if k8s.GetPodStatus(&pod) == k8s.PodStatusKilled { + savedStatus.ExitCode = resource.ExitCodeDataKilled + } + savedStatusesToUpload = append(savedStatusesToUpload, savedStatus) } }