From b7f313bbdb6e3bc63cab559e0d106f7c253c8fad Mon Sep 17 00:00:00 2001 From: Ivan Zhang Date: Wed, 20 Mar 2019 22:51:18 -0400 Subject: [PATCH 1/3] match pod status with container status --- pkg/operator/workloads/workload_spec.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pkg/operator/workloads/workload_spec.go b/pkg/operator/workloads/workload_spec.go index be25e07db7..dadec3809d 100644 --- a/pkg/operator/workloads/workload_spec.go +++ b/pkg/operator/workloads/workload_spec.go @@ -148,7 +148,12 @@ func UpdateDataWorkflowErrors(failedPods []corev1.Pod) error { if savedStatus.Start == nil { savedStatus.Start = nowTime } + savedStatus.ExitCode = resource.ExitCodeDataFailed + if k8s.GetPodStatus(&pod) == k8s.PodStatusKilled { + savedStatus.ExitCode = resource.ExitCodeDataKilled + } + savedStatusesToUpload = append(savedStatusesToUpload, savedStatus) } } From 5b0fc9cd00876d54fc2ce5405ff3d75c177563e1 Mon Sep 17 00:00:00 2001 From: Ivan Zhang Date: Wed, 20 Mar 2019 22:53:06 -0400 Subject: [PATCH 2/3] import k8s --- pkg/operator/workloads/workload_spec.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/operator/workloads/workload_spec.go b/pkg/operator/workloads/workload_spec.go index dadec3809d..ac1c717ef9 100644 --- a/pkg/operator/workloads/workload_spec.go +++ b/pkg/operator/workloads/workload_spec.go @@ -29,6 +29,7 @@ import ( "github.com/cortexlabs/cortex/pkg/lib/sets/strset" "github.com/cortexlabs/cortex/pkg/operator/aws" ocontext "github.com/cortexlabs/cortex/pkg/operator/context" + "github.com/cortexlabs/cortex/pkg/operator/k8s" ) type WorkloadSpec struct { From 88ac392799b215fe4089ff7fe9de5add133eadfb Mon Sep 17 00:00:00 2001 From: Ivan Zhang Date: Thu, 21 Mar 2019 14:25:40 -0400 Subject: [PATCH 3/3] expose terminated --- pkg/operator/k8s/pod.go | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/pkg/operator/k8s/pod.go b/pkg/operator/k8s/pod.go index cdf501deeb..42ef326967 100644 --- a/pkg/operator/k8s/pod.go +++ b/pkg/operator/k8s/pod.go @@ -105,6 +105,19 @@ func GetPodStatus(pod *corev1.Pod) string { case corev1.PodSucceeded: return PodStatusSucceeded case corev1.PodFailed: + for _, containerStatus := range pod.Status.ContainerStatuses { + if containerStatus.LastTerminationState.Terminated != nil { + exitCode := containerStatus.LastTerminationState.Terminated.ExitCode + if killStatuses[exitCode] { + return PodStatusKilled + } + } else if containerStatus.State.Terminated != nil { + exitCode := containerStatus.State.Terminated.ExitCode + if killStatuses[exitCode] { + return PodStatusKilled + } + } + } return PodStatusFailed case corev1.PodRunning: if pod.ObjectMeta.DeletionTimestamp != nil {