This repository has been archived by the owner on Apr 29, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 9
/
nodeagent.go
229 lines (201 loc) · 8.64 KB
/
nodeagent.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
package core
import (
"bytes"
"fmt"
"os"
"strings"
"time"
"github.com/caos/orbos/internal/orb"
"github.com/caos/orbos/internal/helpers"
"github.com/caos/orbos/internal/operator/common"
"github.com/caos/orbos/internal/executables"
"github.com/caos/orbos/internal/operator/orbiter/kinds/clusters/core/infra"
"github.com/caos/orbos/mntr"
"github.com/pkg/errors"
)
type IterateNodeAgentFuncs func(currentNodeAgents *common.CurrentNodeAgents) (queryNodeAgent func(machine infra.Machine, orbiterCommit string) (bool, error), install func(machine infra.Machine) error)
func ConfigureNodeAgents(svc MachinesService, monitor mntr.Monitor, orb orb.Orb) error {
configure, _ := NodeAgentFuncs(monitor, orb.URL, orb.Repokey)
return Each(svc, func(pool string, machine infra.Machine) error {
err := configure(machine)
if err != nil {
return fmt.Errorf("configuring node agent on machine %s in pool %s failed: %w", machine.ID(), pool, err)
}
monitor.WithFields(map[string]interface{}{
"pool": pool,
"machine:": machine.ID(),
}).Info("Node agent configured")
return nil
})
}
func NodeAgentFuncs(
monitor mntr.Monitor,
repoURL string,
repoKey string) (reconfigure func(machines infra.Machine) error, iterate IterateNodeAgentFuncs) {
configure := func(machine infra.Machine) func() error {
machineMonitor := monitor.WithField("machine", machine.ID())
return helpers.Fanout([]func() error{
func() error {
keyPath := "/var/orbiter/repo-key"
if err := infra.Try(machineMonitor, time.NewTimer(8*time.Second), 2*time.Second, machine, func(cmp infra.Machine) error {
return errors.Wrapf(cmp.WriteFile(keyPath, strings.NewReader(repoKey), 400), "creating remote file %s failed", keyPath)
}); err != nil {
return errors.Wrap(err, "writing repokey failed")
}
machineMonitor.WithFields(map[string]interface{}{
"path": keyPath,
}).Debug("Written file")
return nil
},
func() error {
urlPath := "/var/orbiter/repo-url"
if err := infra.Try(machineMonitor, time.NewTimer(8*time.Second), 2*time.Second, machine, func(cmp infra.Machine) error {
return errors.Wrapf(cmp.WriteFile(urlPath, strings.NewReader(repoURL), 400), "creating remote file %s failed", urlPath)
}); err != nil {
return errors.Wrap(err, "writing repourl failed")
}
machineMonitor.WithFields(map[string]interface{}{
"path": urlPath,
}).Debug("Written file")
return nil
},
})
}
return func(machine infra.Machine) error {
return configure(machine)()
}, func(currentNodeAgents *common.CurrentNodeAgents) (func(infra.Machine, string) (bool, error), func(infra.Machine) error) {
return func(machine infra.Machine, orbiterCommit string) (running bool, err error) {
machineMonitor := monitor.WithField("machine", machine.ID())
var response []byte
isActive := "sudo systemctl is-active node-agentd"
err = infra.Try(machineMonitor, time.NewTimer(7*time.Second), 2*time.Second, machine, func(cmp infra.Machine) error {
var cbErr error
response, cbErr = cmp.Execute(nil, isActive)
return errors.Wrapf(cbErr, "remote command %s returned an unsuccessful exit code", isActive)
})
machineMonitor.WithFields(map[string]interface{}{
"command": isActive,
"response": string(response),
}).Debug("Executed command")
if err != nil && !strings.Contains(string(response), "activating") {
return false, nil
}
current, ok := currentNodeAgents.Get(machine.ID())
if ok && current.Commit == orbiterCommit {
return true, nil
}
showVersion := "node-agent --version"
err = infra.Try(machineMonitor, time.NewTimer(7*time.Second), 2*time.Second, machine, func(cmp infra.Machine) error {
var cbErr error
response, cbErr = cmp.Execute(nil, showVersion)
return errors.Wrapf(cbErr, "running command %s remotely failed", showVersion)
})
if err != nil {
return false, err
}
machineMonitor.WithFields(map[string]interface{}{
"command": showVersion,
"response": string(response),
}).Debug("Executed command")
fields := strings.Fields(string(response))
return len(fields) > 1 && fields[1] == orbiterCommit, nil
}, func(machine infra.Machine) error {
machineMonitor := monitor.WithField("machine", machine.ID())
systemdEntry := "node-agentd"
systemdPath := fmt.Sprintf("/lib/systemd/system/%s.service", systemdEntry)
nodeAgentPath := "/usr/local/bin/node-agent"
healthPath := "/usr/local/bin/health"
binary := nodeAgentPath
if os.Getenv("MODE") == "DEBUG" {
// Run node agent in debug mode
if _, err := machine.Execute(nil, "sudo apt-get update && sudo apt-get install -y git && wget https://dl.google.com/go/go1.13.3.linux-amd64.tar.gz && sudo tar -zxvf go1.13.3.linux-amd64.tar.gz -C / && sudo chown -R $(id -u):$(id -g) /go && /go/bin/go get -u github.com/go-delve/delve/cmd/dlv && /go/bin/go install github.com/go-delve/delve/cmd/dlv && mv ${HOME}/go/bin/dlv /usr/local/bin"); err != nil {
panic(err)
}
binary = fmt.Sprintf("dlv exec %s --api-version 2 --headless --listen 0.0.0.0:5000 --continue --accept-multiclient --", nodeAgentPath)
}
stopSystemd := fmt.Sprintf("sudo systemctl stop %s orbos.health* || true", systemdEntry)
if err := infra.Try(machineMonitor, time.NewTimer(8*time.Second), 2*time.Second, machine, func(cmp infra.Machine) error {
_, cbErr := cmp.Execute(nil, stopSystemd)
return errors.Wrapf(cbErr, "running command %s remotely failed", stopSystemd)
}); err != nil {
return errors.Wrap(err, "remotely stopping systemd services failed")
}
machineMonitor.WithFields(map[string]interface{}{
"command": stopSystemd,
}).Debug("Executed command")
if err := helpers.Fanout([]func() error{
configure(machine),
func() error {
if err := infra.Try(machineMonitor, time.NewTimer(8*time.Second), 2*time.Second, machine, func(cmp infra.Machine) error {
return errors.Wrapf(cmp.WriteFile(systemdPath, strings.NewReader(fmt.Sprintf(`[Unit]
Description=Node Agent
After=network.target
[Service]
Type=simple
User=root
ExecStart=%s --id "%s"
Restart=always
MemoryLimit=250M
MemoryAccounting=yes
RestartSec=10
CPUAccounting=yes
MemoryAccounting=yes
[Install]
WantedBy=multi-user.target
`, binary, machine.ID())), 600), "creating remote file %s failed", systemdPath)
}); err != nil {
return errors.Wrap(err, "remotely configuring Node Agent systemd unit failed")
}
machineMonitor.WithFields(map[string]interface{}{
"path": systemdPath,
}).Debug("Written file")
return nil
},
func() error {
nodeagent, err := executables.PreBuilt("nodeagent")
if err != nil {
return err
}
if err := infra.Try(machineMonitor, time.NewTimer(20*time.Second), 2*time.Second, machine, func(cmp infra.Machine) error {
return errors.Wrapf(cmp.WriteFile(nodeAgentPath, bytes.NewReader(nodeagent), 700), "creating remote file %s failed", nodeAgentPath)
}); err != nil {
return errors.Wrap(err, "remotely installing Node Agent failed")
}
machineMonitor.WithFields(map[string]interface{}{
"path": nodeAgentPath,
}).Debug("Written file")
return nil
},
func() error {
health, err := executables.PreBuilt("health")
if err != nil {
return err
}
if err := infra.Try(machineMonitor, time.NewTimer(20*time.Second), 2*time.Second, machine, func(cmp infra.Machine) error {
return errors.Wrapf(cmp.WriteFile(healthPath, bytes.NewReader(health), 711), "creating remote file %s failed", healthPath)
}); err != nil {
return errors.Wrap(err, "remotely installing health executable failed")
}
machineMonitor.WithFields(map[string]interface{}{
"path": healthPath,
}).Debug("Written file")
return nil
},
})(); err != nil {
return err
}
enableSystemd := fmt.Sprintf("sudo systemctl daemon-reload && sudo systemctl enable %s && sudo systemctl restart %s", systemdPath, systemdEntry)
if err := infra.Try(machineMonitor, time.NewTimer(8*time.Second), 2*time.Second, machine, func(cmp infra.Machine) error {
_, cbErr := cmp.Execute(nil, enableSystemd)
return errors.Wrapf(cbErr, "running command %s remotely failed", enableSystemd)
}); err != nil {
return errors.Wrap(err, "remotely configuring systemd to autostart Node Agent after booting failed")
}
machineMonitor.WithFields(map[string]interface{}{
"command": enableSystemd,
}).Debug("Executed command")
machineMonitor.Info("Node Agent installed")
return nil
}
}
}