forked from openshift/origin
-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_diagnostics_pod.go
162 lines (150 loc) · 6 KB
/
run_diagnostics_pod.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
package client
import (
"bufio"
"fmt"
"regexp"
"strconv"
"time"
kapi "k8s.io/kubernetes/pkg/api"
kclient "k8s.io/kubernetes/pkg/client/unversioned"
osclientcmd "github.com/openshift/origin/pkg/cmd/util/clientcmd"
"github.com/openshift/origin/pkg/cmd/util/variable"
"github.com/openshift/origin/pkg/diagnostics/types"
)
const (
DiagnosticPodName = "DiagnosticPod"
)
// DiagnosticPod is a diagnostic that runs a diagnostic pod and relays the results.
type DiagnosticPod struct {
KubeClient kclient.Client
Namespace string
Level int
Factory *osclientcmd.Factory
PreventModification bool
ImageTemplate variable.ImageTemplate
}
// Name is part of the Diagnostic interface and just returns name.
func (d *DiagnosticPod) Name() string {
return DiagnosticPodName
}
// Description is part of the Diagnostic interface and provides a user-focused description of what the diagnostic does.
func (d *DiagnosticPod) Description() string {
return "Create a pod to run diagnostics from the application standpoint"
}
// CanRun is part of the Diagnostic interface; it determines if the conditions are right to run this diagnostic.
func (d *DiagnosticPod) CanRun() (bool, error) {
if d.PreventModification {
return false, fmt.Errorf("running the diagnostic pod is an API change, which is prevented as you indicated")
}
return true, nil
}
// Check is part of the Diagnostic interface; it runs the actual diagnostic logic
func (d *DiagnosticPod) Check() types.DiagnosticResult {
r := types.NewDiagnosticResult("DiagnosticPod")
d.runDiagnosticPod(nil, r)
return r
}
func (d *DiagnosticPod) runDiagnosticPod(service *kapi.Service, r types.DiagnosticResult) {
loglevel := d.Level
if loglevel > 2 {
loglevel = 2 // need to show summary at least
}
imageName := d.ImageTemplate.ExpandOrDie("deployer")
pod, err := d.KubeClient.Pods(d.Namespace).Create(&kapi.Pod{
ObjectMeta: kapi.ObjectMeta{GenerateName: "pod-diagnostic-test-"},
Spec: kapi.PodSpec{
RestartPolicy: kapi.RestartPolicyNever,
Containers: []kapi.Container{
{
Name: "pod-diagnostics",
Image: imageName,
Command: []string{"openshift", "infra", "diagnostic-pod", "-l", strconv.Itoa(loglevel)},
},
},
},
})
if err != nil {
r.Error("DCli2001", err, fmt.Sprintf("Creating diagnostic pod with image %s failed. Error: (%[2]T) %[2]v", imageName, err))
return
}
defer func() { // delete what we created, or notify that we couldn't
zero := int64(0)
delOpts := kapi.DeleteOptions{TypeMeta: pod.TypeMeta, GracePeriodSeconds: &zero}
if err := d.KubeClient.Pods(d.Namespace).Delete(pod.ObjectMeta.Name, &delOpts); err != nil {
r.Error("DCl2002", err, fmt.Sprintf("Deleting diagnostic pod '%s' failed. Error: %s", pod.ObjectMeta.Name, fmt.Sprintf("(%T) %[1]s", err)))
}
}()
pod, err = d.KubeClient.Pods(d.Namespace).Get(pod.ObjectMeta.Name) // status is filled in post-create
if err != nil {
r.Error("DCli2003", err, fmt.Sprintf("Retrieving the diagnostic pod definition failed. Error: (%T) %[1]v", err))
return
}
r.Debug("DCli2004", fmt.Sprintf("Created diagnostic pod named %v running image %s.", pod.ObjectMeta.Name, imageName))
bytelim := int64(1024000)
podLogsOpts := &kapi.PodLogOptions{
TypeMeta: pod.TypeMeta,
Container: "pod-diagnostics",
Follow: true,
LimitBytes: &bytelim,
}
req, err := d.Factory.LogsForObject(pod, podLogsOpts)
if err != nil {
r.Error("DCli2005", err, fmt.Sprintf("The request for diagnostic pod logs failed unexpectedly. Error: (%T) %[1]v", err))
return
}
// wait for pod to be started and logs available
var scanner *bufio.Scanner
var lastError error
for times := 1; true; times++ {
if times <= 25 {
readCloser, err := req.Stream()
if err != nil {
lastError = err
r.Debug("DCli2010", fmt.Sprintf("Could not get diagnostic pod logs (loop %d): (%T[2]) %[2]v", times, err))
time.Sleep(time.Duration(times*100) * time.Millisecond)
continue
}
defer readCloser.Close()
// make sure we can actually get something from the stream before going on.
// it seems the creation of docker logs can trail the container start a bit.
lineScanner := bufio.NewScanner(readCloser)
if lineScanner.Scan() {
scanner = lineScanner
break // success - drop down to reading the logs.
}
// no luck - try, try again
lastError = fmt.Errorf("Diagnostics pod is ready but not its logs (loop %d). Retry.", times)
r.Debug("DCli2010", lastError.Error())
time.Sleep(time.Duration(times*100) * time.Millisecond)
continue
}
// after 25 times trying:
r.Warn("DCli2006", err, fmt.Sprintf("Timed out preparing diagnostic pod logs for streaming, so this diagnostic cannot run.\nIt is likely that the image '%s' was not pulled and running yet.\nLast error: (%T[2]) %[2]v", pod.Spec.Containers[0].Image, lastError))
return
}
// then watch logs and wait until it exits
podLogs, warnings, errors := "", 0, 0
errorRegex := regexp.MustCompile(`^\[Note\]\s+Errors\s+seen:\s+(\d+)`)
warnRegex := regexp.MustCompile(`^\[Note\]\s+Warnings\s+seen:\s+(\d+)`)
// keep in mind one test line was already scanned, so scan after the loop runs once
for scanned := true; scanned; scanned = scanner.Scan() {
line := scanner.Text()
podLogs += line + "\n"
if matches := errorRegex.FindStringSubmatch(line); matches != nil {
errors, _ = strconv.Atoi(matches[1])
} else if matches := warnRegex.FindStringSubmatch(line); matches != nil {
warnings, _ = strconv.Atoi(matches[1])
}
}
if err := scanner.Err(); err != nil { // Scan terminated abnormally
r.Error("DCli2009", err, fmt.Sprintf("Unexpected error reading diagnostic pod logs: (%T) %[1]v\nLogs are:\n%[2]s", err, podLogs))
} else {
if errors > 0 {
r.Error("DCli2012", nil, "See the errors below in the output from the diagnostic pod:\n"+podLogs)
} else if warnings > 0 {
r.Warn("DCli2013", nil, "See the warnings below in the output from the diagnostic pod:\n"+podLogs)
} else {
r.Info("DCli2008", fmt.Sprintf("Output from the diagnostic pod (image %s):\n", imageName)+podLogs)
}
}
}