-
Notifications
You must be signed in to change notification settings - Fork 0
/
node-status.go
112 lines (100 loc) · 3.7 KB
/
node-status.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
package model
import (
"fmt"
"github.com/duncanpierce/hetzanetes/env"
"github.com/duncanpierce/hetzanetes/label"
"github.com/duncanpierce/hetzanetes/rest"
"github.com/duncanpierce/hetzanetes/tmpl"
"log"
"time"
)
func (n *NodeStatus) SetPhase(phase Phase, reason string) {
n.Phases = append(n.Phases, PhaseChange{
Phase: phase,
Reason: reason,
Time: time.Now(),
})
}
func (n *NodeStatus) MakeProgress(cluster *Cluster, actions Actions) {
var err error
switch n.Phases.Current().Phase {
case Create:
var templateToUse string
labels := label.Labels{}
labels.Set(label.ClusterNameLabel, cluster.Metadata.Name)
if n.ApiServer {
templateToUse = "add-api-server.yaml"
labels.Mark(label.ApiServerLabel)
} else {
templateToUse = "add-worker.yaml"
labels.Mark(label.WorkerLabel)
}
kubernetesVersion := fmt.Sprintf("v%s", cluster.Status.Versions.NewNodeVersion(n.ApiServer).String())
log.Printf("Version %s chosen for node %s\n", kubernetesVersion, n.Name)
config := tmpl.ClusterConfig{
KubernetesVersion: kubernetesVersion,
ApiEndpoint: n.JoinEndpoint,
JoinToken: env.K3sToken(), // TODO this should come from a named Secret
PrivateIpRange: cluster.Status.ClusterNetwork.IpRange,
SshPublicKey: env.SshPublicKey(), // TODO this should come from a named Secret
}
cloudInit := tmpl.Cloudinit(config, templateToUse)
log.Printf("Cloudinit for new node %s:\n%s\n\n", n.Name, cloudInit)
sshKeys, err := actions.GetSshKeyIds()
if err != nil {
log.Printf("error getting SSH key names: %s\n", err.Error())
} else {
n.CloudId, n.ClusterIP, err = actions.CreateServer(n.Name, n.ServerType, n.BaseImage, n.Location, cluster.Status.ClusterNetwork.CloudId, nil, labels, sshKeys, cloudInit)
if err == nil {
n.SetPhase(Joining, "waiting for node to join") // TODO once we use SSH, next phase will be Creating
} else if err == rest.Conflict {
log.Printf("Conflict: node %s has already been created\n", n.Name)
existingServer, err := actions.GetBootstrapServer(n.Name, n.ApiServer, n.Version)
if err != nil {
log.Printf("Unable to get existing server '%s' details from Hetzner: %s\n", n.Name, err.Error())
} else {
n.CloudId = existingServer.CloudId
n.ClusterIP = existingServer.ClusterIP
n.SetPhase(Joining, "waiting for previously-created node to join")
}
} else {
log.Printf("error creating server '%s': %s", n.Name, err.Error())
}
}
case Joining:
nodeResource, err := actions.GetKubernetesNode(*n)
if err != nil {
log.Printf("got error from kubernetes api getting node '%s': %s\n", n.Name, err.Error())
break
}
if nodeResource.IsReady() {
n.SetPhase(Active, "node has joined")
}
case Delete:
err = actions.DrainNode(*n) // TODO might fail if we go straight from Create/Join to Delete with node ever registering - even if we check whether node has registered and answer is no, we still can't proceed because it's racing us
if err == nil {
n.SetPhase(Draining, "")
} else {
log.Printf("error draining node '%s': %s", n.Name, err.Error())
}
case Draining:
// TODO draining is complete as soon as there are no non-DaemonSet pods - the timeout should be an upper limit (and should be higher)
if LongerThan(3 * time.Minute)(*n) {
err := actions.DeleteNode(*n) // TODO might fail if we go straight from Create/Join to Delete with node ever registering
if err == nil {
n.SetPhase(Deleting, "")
} else {
log.Printf("error deleting node %s\n", n.Name)
}
}
case Deleting:
_, err = actions.GetKubernetesNode(*n)
if err == rest.NotFound && LongerThan(2*time.Minute)(*n) {
notFound := actions.DeleteServer(*n)
if notFound {
n.SetPhase(Deleted, "")
}
}
default:
}
}