forked from openshift/origin
-
Notifications
You must be signed in to change notification settings - Fork 0
/
node_definitions.go
122 lines (102 loc) · 3.94 KB
/
node_definitions.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
package cluster
// The purpose of this diagnostic is to detect nodes that are out of commission
// (which may affect the ability to schedule pods) for user awareness.
import (
"errors"
"fmt"
kapi "k8s.io/kubernetes/pkg/api"
kclientset "k8s.io/kubernetes/pkg/client/clientset_generated/internalclientset"
authorizationapi "github.com/openshift/origin/pkg/authorization/api"
osclient "github.com/openshift/origin/pkg/client"
"github.com/openshift/origin/pkg/diagnostics/log"
"github.com/openshift/origin/pkg/diagnostics/types"
)
const (
clientErrorGettingNodes = `Client error while retrieving node records. Client retrieved records
during discovery, so this is likely to be a transient error. Try running
diagnostics again. If this message persists, there may be a permissions
problem with getting node records. The error was:
(%T) %[1]v`
nodeNotReady = `Node {{.node}} is defined but is not marked as ready.
Ready status is {{.status}} because "{{.reason}}"
If the node is not intentionally disabled, check that the master can
reach the node hostname for a health check and the node is checking in
to the master with the same hostname.
While in this state, pods should not be scheduled to deploy on the node,
and any existing scheduled pods will be considered failed and removed.
`
nodeNotSched = `Node {{.node}} is ready but is marked Unschedulable.
This is usually set manually for administrative reasons.
An administrator can mark the node schedulable with:
oadm manage-node {{.node}} --schedulable=true
While in this state, pods should not be scheduled to deploy on the node.
Existing pods will continue to run until completed or evacuated (see
other options for 'oadm manage-node').
`
)
// NodeDefinitions is a Diagnostic for analyzing the nodes in a cluster.
type NodeDefinitions struct {
KubeClient kclientset.Interface
OsClient *osclient.Client
}
const NodeDefinitionsName = "NodeDefinitions"
func (d *NodeDefinitions) Name() string {
return NodeDefinitionsName
}
func (d *NodeDefinitions) Description() string {
return "Check node records on master"
}
func (d *NodeDefinitions) CanRun() (bool, error) {
if d.KubeClient == nil || d.OsClient == nil {
return false, errors.New("must have kube and os client")
}
can, err := userCan(d.OsClient, authorizationapi.Action{
Verb: "list",
Group: kapi.GroupName,
Resource: "nodes",
})
if err != nil {
return false, types.DiagnosticError{ID: "DClu0005", LogMessage: fmt.Sprintf(clientErrorGettingNodes, err), Cause: err}
} else if !can {
return false, types.DiagnosticError{ID: "DClu0006", LogMessage: "Client does not have access to see node status", Cause: err}
}
return true, nil
}
func (d *NodeDefinitions) Check() types.DiagnosticResult {
r := types.NewDiagnosticResult("NodeDefinition")
nodes, err := d.KubeClient.Core().Nodes().List(kapi.ListOptions{})
if err != nil {
r.Error("DClu0001", err, fmt.Sprintf(clientErrorGettingNodes, err))
return r
}
anyNodesAvail := false
for _, node := range nodes.Items {
var ready *kapi.NodeCondition
for i, condition := range node.Status.Conditions {
switch condition.Type {
// Each condition appears only once. Currently there's only one... used to be more
case kapi.NodeReady:
ready = &node.Status.Conditions[i]
}
}
if ready == nil || ready.Status != kapi.ConditionTrue {
templateData := log.Hash{"node": node.Name}
if ready == nil {
templateData["status"] = "None"
templateData["reason"] = "There is no readiness record."
} else {
templateData["status"] = ready.Status
templateData["reason"] = ready.Reason
}
r.Warn("DClu0002", nil, log.EvalTemplate("DClu0002", nodeNotReady, templateData))
} else if node.Spec.Unschedulable {
r.Warn("DClu0003", nil, log.EvalTemplate("DClu0003", nodeNotSched, log.Hash{"node": node.Name}))
} else {
anyNodesAvail = true
}
}
if !anyNodesAvail {
r.Error("DClu0004", nil, "There were no nodes available to use. No new pods can be scheduled.")
}
return r
}