forked from openshift/origin
-
Notifications
You must be signed in to change notification settings - Fork 1
/
pod.go
135 lines (113 loc) · 4.56 KB
/
pod.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
package analysis
import (
"fmt"
"time"
"github.com/MakeNowJust/heredoc"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
osgraph "github.com/openshift/origin/pkg/oc/lib/graph/genericgraph"
kubegraph "github.com/openshift/origin/pkg/oc/lib/graph/kubegraph/nodes"
)
const (
CrashLoopingPodError = "CrashLoopingPod"
RestartingPodWarning = "RestartingPod"
RestartThreshold = 5
// TODO: if you change this, you must change the messages below.
RestartRecentDuration = 10 * time.Minute
)
// exposed for testing
var nowFn = metav1.Now
// FindRestartingPods inspects all Pods to see if they've restarted more than the threshold. logsCommandName is the name of
// the command that should be invoked to see pod logs. securityPolicyCommandPattern is a format string accepting two replacement
// variables for fmt.Sprintf - 1, the namespace of the current pod, 2 the service account of the pod.
func FindRestartingPods(g osgraph.Graph, f osgraph.Namer, logsCommandName, securityPolicyCommandPattern string) []osgraph.Marker {
markers := []osgraph.Marker{}
for _, uncastPodNode := range g.NodesByKind(kubegraph.PodNodeKind) {
podNode := uncastPodNode.(*kubegraph.PodNode)
pod, ok := podNode.Object().(*corev1.Pod)
if !ok {
continue
}
for _, containerStatus := range pod.Status.ContainerStatuses {
containerString := ""
if len(pod.Spec.Containers) > 1 {
containerString = fmt.Sprintf("container %q in ", containerStatus.Name)
}
switch {
case containerCrashLoopBackOff(containerStatus):
var suggestion string
switch {
case containerIsNonRoot(pod, containerStatus.Name):
suggestion = heredoc.Docf(`
The container is starting and exiting repeatedly. This usually means the container is unable
to start, misconfigured, or limited by security restrictions. Check the container logs with
%s %s -c %s
Current security policy prevents your containers from being run as the root user. Some images
may fail expecting to be able to change ownership or permissions on directories. Your admin
can grant you access to run containers that need to run as the root user with this command:
%s
`, logsCommandName, pod.Name, containerStatus.Name, fmt.Sprintf(securityPolicyCommandPattern, pod.Namespace, pod.Spec.ServiceAccountName))
default:
suggestion = heredoc.Docf(`
The container is starting and exiting repeatedly. This usually means the container is unable
to start, misconfigured, or limited by security restrictions. Check the container logs with
%s %s -c %s
`, logsCommandName, pod.Name, containerStatus.Name)
}
markers = append(markers, osgraph.Marker{
Node: podNode,
Severity: osgraph.ErrorSeverity,
Key: CrashLoopingPodError,
Message: fmt.Sprintf("%s%s is crash-looping", containerString,
f.ResourceName(podNode)),
Suggestion: osgraph.Suggestion(suggestion),
})
case ContainerRestartedRecently(containerStatus, nowFn()):
markers = append(markers, osgraph.Marker{
Node: podNode,
Severity: osgraph.WarningSeverity,
Key: RestartingPodWarning,
Message: fmt.Sprintf("%s%s has restarted within the last 10 minutes", containerString,
f.ResourceName(podNode)),
})
case containerRestartedFrequently(containerStatus):
markers = append(markers, osgraph.Marker{
Node: podNode,
Severity: osgraph.WarningSeverity,
Key: RestartingPodWarning,
Message: fmt.Sprintf("%s%s has restarted %d times", containerString,
f.ResourceName(podNode), containerStatus.RestartCount),
})
}
}
}
return markers
}
func containerIsNonRoot(pod *corev1.Pod, container string) bool {
for _, c := range pod.Spec.Containers {
if c.Name != container || c.SecurityContext == nil {
continue
}
switch {
case c.SecurityContext.RunAsUser != nil && *c.SecurityContext.RunAsUser != 0:
//c.SecurityContext.RunAsNonRoot != nil && *c.SecurityContext.RunAsNonRoot,
return true
}
}
return false
}
func containerCrashLoopBackOff(status corev1.ContainerStatus) bool {
return status.State.Waiting != nil && status.State.Waiting.Reason == "CrashLoopBackOff"
}
func ContainerRestartedRecently(status corev1.ContainerStatus, now metav1.Time) bool {
if status.RestartCount == 0 {
return false
}
if status.LastTerminationState.Terminated != nil && now.Sub(status.LastTerminationState.Terminated.FinishedAt.Time) < RestartRecentDuration {
return true
}
return false
}
func containerRestartedFrequently(status corev1.ContainerStatus) bool {
return status.RestartCount > RestartThreshold
}