pkg/diagnostics/client/run_diagnostics_pod.go

package client

import (
	"bufio"
	"fmt"
	"regexp"
	"strconv"
	"time"

	kapi "k8s.io/kubernetes/pkg/api"
	kclient "k8s.io/kubernetes/pkg/client/unversioned"

	osclientcmd "github.com/openshift/origin/pkg/cmd/util/clientcmd"
	"github.com/openshift/origin/pkg/cmd/util/variable"
	"github.com/openshift/origin/pkg/diagnostics/types"
)

const (
	DiagnosticPodName = "DiagnosticPod"
)

// DiagnosticPod is a diagnostic that runs a diagnostic pod and relays the results.
type DiagnosticPod struct {
	KubeClient          kclient.Client
	Namespace           string
	Level               int
	Factory             *osclientcmd.Factory
	PreventModification bool
	ImageTemplate       variable.ImageTemplate
}

// Name is part of the Diagnostic interface and just returns name.
func (d *DiagnosticPod) Name() string {
	return DiagnosticPodName
}

// Description is part of the Diagnostic interface and provides a user-focused description of what the diagnostic does.
func (d *DiagnosticPod) Description() string {
	return "Create a pod to run diagnostics from the application standpoint"
}

// CanRun is part of the Diagnostic interface; it determines if the conditions are right to run this diagnostic.
func (d *DiagnosticPod) CanRun() (bool, error) {
	if d.PreventModification {
		return false, fmt.Errorf("running the diagnostic pod is an API change, which is prevented as you indicated")
	}
	return true, nil
}

// Check is part of the Diagnostic interface; it runs the actual diagnostic logic
func (d *DiagnosticPod) Check() types.DiagnosticResult {
	r := types.NewDiagnosticResult("DiagnosticPod")
	d.runDiagnosticPod(nil, r)
	return r
}

func (d *DiagnosticPod) runDiagnosticPod(service *kapi.Service, r types.DiagnosticResult) {
	loglevel := d.Level
	if loglevel > 2 {
		loglevel = 2 // need to show summary at least
	}
	imageName := d.ImageTemplate.ExpandOrDie("deployer")
	pod, err := d.KubeClient.Pods(d.Namespace).Create(&kapi.Pod{
		ObjectMeta: kapi.ObjectMeta{GenerateName: "pod-diagnostic-test-"},
		Spec: kapi.PodSpec{
			RestartPolicy: kapi.RestartPolicyNever,
			Containers: []kapi.Container{
				{
					Name:    "pod-diagnostics",
					Image:   imageName,
					Command: []string{"openshift", "infra", "diagnostic-pod", "-l", strconv.Itoa(loglevel)},
				},
			},
		},
	})
	if err != nil {
		r.Error("DCli2001", err, fmt.Sprintf("Creating diagnostic pod with image %s failed. Error: (%[2]T) %[2]v", imageName, err))
		return
	}
	defer func() { // delete what we created, or notify that we couldn't
		zero := int64(0)
		delOpts := kapi.DeleteOptions{TypeMeta: pod.TypeMeta, GracePeriodSeconds: &zero}
		if err := d.KubeClient.Pods(d.Namespace).Delete(pod.ObjectMeta.Name, &delOpts); err != nil {
			r.Error("DCl2002", err, fmt.Sprintf("Deleting diagnostic pod '%s' failed. Error: %s", pod.ObjectMeta.Name, fmt.Sprintf("(%T) %[1]s", err)))
		}
	}()
	pod, err = d.KubeClient.Pods(d.Namespace).Get(pod.ObjectMeta.Name) // status is filled in post-create
	if err != nil {
		r.Error("DCli2003", err, fmt.Sprintf("Retrieving the diagnostic pod definition failed. Error: (%T) %[1]v", err))
		return
	}
	r.Debug("DCli2004", fmt.Sprintf("Created diagnostic pod named %v running image %s.", pod.ObjectMeta.Name, imageName))

	bytelim := int64(1024000)
	podLogsOpts := &kapi.PodLogOptions{
		TypeMeta:   pod.TypeMeta,
		Container:  "pod-diagnostics",
		Follow:     true,
		LimitBytes: &bytelim,
	}
	req, err := d.Factory.LogsForObject(pod, podLogsOpts)
	if err != nil {
		r.Error("DCli2005", err, fmt.Sprintf("The request for diagnostic pod logs failed unexpectedly. Error: (%T) %[1]v", err))
		return
	}

	// wait for pod to be started and logs available
	var scanner *bufio.Scanner
	var lastError error
	for times := 1; true; times++ {
		if times <= 25 {
			readCloser, err := req.Stream()
			if err != nil {
				lastError = err
				r.Debug("DCli2010", fmt.Sprintf("Could not get diagnostic pod logs (loop %d): (%T[2]) %[2]v", times, err))
				time.Sleep(time.Duration(times*100) * time.Millisecond)
				continue
			}
			defer readCloser.Close()
			// make sure we can actually get something from the stream before going on.
			// it seems the creation of docker logs can trail the container start a bit.
			lineScanner := bufio.NewScanner(readCloser)
			if lineScanner.Scan() {
				scanner = lineScanner
				break // success - drop down to reading the logs.
			}
			// no luck - try, try again
			lastError = fmt.Errorf("Diagnostics pod is ready but not its logs (loop %d). Retry.", times)
			r.Debug("DCli2010", lastError.Error())
			time.Sleep(time.Duration(times*100) * time.Millisecond)
			continue
		}
		// after 25 times trying:
		r.Warn("DCli2006", err, fmt.Sprintf("Timed out preparing diagnostic pod logs for streaming, so this diagnostic cannot run.\nIt is likely that the image '%s' was not pulled and running yet.\nLast error: (%T[2]) %[2]v", pod.Spec.Containers[0].Image, lastError))
		return
	}
	// then watch logs and wait until it exits
	podLogs, warnings, errors := "", 0, 0
	errorRegex := regexp.MustCompile(`^\[Note\]\s+Errors\s+seen:\s+(\d+)`)
	warnRegex := regexp.MustCompile(`^\[Note\]\s+Warnings\s+seen:\s+(\d+)`)
	// keep in mind one test line was already scanned, so scan after the loop runs once
	for scanned := true; scanned; scanned = scanner.Scan() {
		line := scanner.Text()
		podLogs += line + "\n"
		if matches := errorRegex.FindStringSubmatch(line); matches != nil {
			errors, _ = strconv.Atoi(matches[1])
		} else if matches := warnRegex.FindStringSubmatch(line); matches != nil {
			warnings, _ = strconv.Atoi(matches[1])
		}
	}
	if err := scanner.Err(); err != nil { // Scan terminated abnormally
		r.Error("DCli2009", err, fmt.Sprintf("Unexpected error reading diagnostic pod logs: (%T) %[1]v\nLogs are:\n%[2]s", err, podLogs))
	} else {
		if errors > 0 {
			r.Error("DCli2012", nil, "See the errors below in the output from the diagnostic pod:\n"+podLogs)
		} else if warnings > 0 {
			r.Warn("DCli2013", nil, "See the warnings below in the output from the diagnostic pod:\n"+podLogs)
		} else {
			r.Info("DCli2008", fmt.Sprintf("Output from the diagnostic pod (image %s):\n", imageName)+podLogs)
		}
	}
}