-
Notifications
You must be signed in to change notification settings - Fork 124
/
host.go
394 lines (354 loc) · 14.3 KB
/
host.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
package data
import (
"context"
"fmt"
"net/http"
"time"
"github.com/evergreen-ci/evergreen"
"github.com/evergreen-ci/evergreen/cloud"
"github.com/evergreen-ci/evergreen/model/event"
"github.com/evergreen-ci/evergreen/model/host"
"github.com/evergreen-ci/evergreen/model/user"
"github.com/evergreen-ci/evergreen/rest/model"
restmodel "github.com/evergreen-ci/evergreen/rest/model"
"github.com/evergreen-ci/evergreen/units"
"github.com/evergreen-ci/gimlet"
"github.com/evergreen-ci/utility"
"github.com/mongodb/amboy"
"github.com/mongodb/grip"
"github.com/mongodb/grip/message"
"github.com/pkg/errors"
)
func FindHostsInRange(ctx context.Context, apiParams restmodel.APIHostParams, username string) ([]host.Host, error) {
params := host.HostsInRangeParams{
CreatedBefore: apiParams.CreatedBefore,
CreatedAfter: apiParams.CreatedAfter,
Distro: apiParams.Distro,
UserSpawned: apiParams.UserSpawned,
Status: apiParams.Status,
Region: apiParams.Region,
User: username,
}
hostRes, err := host.FindHostsInRange(ctx, params)
if err != nil {
return nil, err
}
return hostRes, nil
}
// NewIntentHost is a method to insert an intent host given a distro and a public key
// The public key can be the name of a saved key or the actual key string
func NewIntentHost(ctx context.Context, options *restmodel.HostRequestOptions, user *user.DBUser,
env evergreen.Environment) (*host.Host, error) {
spawnOptions, err := makeSpawnOptions(options, user)
if err != nil {
return nil, err
}
intentHost, err := cloud.CreateSpawnHost(ctx, *spawnOptions, env.Settings())
if err != nil {
return nil, errors.Wrap(err, "creating spawn host")
}
if err := intentHost.Insert(ctx); err != nil {
return nil, err
}
event.LogHostCreated(intentHost.Id)
grip.Info(message.Fields{
"message": "inserted intent host",
"host_id": intentHost.Id,
"host_tag": intentHost.Tag,
"distro": intentHost.Distro.Id,
"user": user.Username(),
})
if err := units.EnqueueHostCreateJobs(ctx, env, []host.Host{*intentHost}); err != nil {
return nil, errors.Wrapf(err, "enqueueing host create job for '%s'", intentHost.Id)
}
return intentHost, nil
}
// GenerateHostProvisioningScript generates and returns the script to
// provision the host given by host ID.
func GenerateHostProvisioningScript(ctx context.Context, env evergreen.Environment, hostID string) (string, error) {
if hostID == "" {
return "", gimlet.ErrorResponse{
StatusCode: http.StatusBadRequest,
Message: "cannot generate host provisioning script without a host ID",
}
}
h, err := host.FindOneByIdOrTag(ctx, hostID)
if err != nil {
return "", gimlet.ErrorResponse{
StatusCode: http.StatusInternalServerError,
Message: errors.Wrapf(err, "finding host '%s'", hostID).Error(),
}
}
if h == nil {
return "", gimlet.ErrorResponse{
StatusCode: http.StatusNotFound,
Message: fmt.Sprintf("host with id '%s' not found", hostID),
}
}
creds, err := h.GenerateJasperCredentials(ctx, env)
if err != nil {
return "", gimlet.ErrorResponse{
StatusCode: http.StatusInternalServerError,
Message: errors.Wrap(err, "generating Jasper credentials").Error(),
}
}
script, err := h.GenerateUserDataProvisioningScript(ctx, env.Settings(), creds)
if err != nil {
return "", gimlet.ErrorResponse{
StatusCode: http.StatusInternalServerError,
Message: errors.Wrap(err, "generating host provisioning script").Error(),
}
}
if err := h.SaveJasperCredentials(ctx, env, creds); err != nil {
return "", gimlet.ErrorResponse{
StatusCode: http.StatusInternalServerError,
Message: errors.Wrap(err, "saving Jasper credentials").Error(),
}
}
return script, nil
}
// FindHostByIdWithOwner finds a host with given host ID that was
// started by the given user. If the given user is a super-user,
// the host will also be returned regardless of who the host was
// started by
func FindHostByIdWithOwner(ctx context.Context, hostID string, user gimlet.User) (*host.Host, error) {
hostById, err := host.FindOneId(ctx, hostID)
if err != nil {
return nil, gimlet.ErrorResponse{
StatusCode: http.StatusInternalServerError,
Message: "fetching host information",
}
}
if hostById == nil {
return nil, gimlet.ErrorResponse{
StatusCode: http.StatusNotFound,
Message: fmt.Sprintf("host '%s' not found", hostID),
}
}
if user.Username() != hostById.StartedBy {
if !user.HasPermission(gimlet.PermissionOpts{
Resource: hostById.Distro.Id,
ResourceType: evergreen.DistroResourceType,
Permission: evergreen.PermissionHosts,
RequiredLevel: evergreen.HostsEdit.Value,
}) {
return nil, gimlet.ErrorResponse{
StatusCode: http.StatusUnauthorized,
Message: "not authorized to modify host",
}
}
}
return hostById, nil
}
var errHostStatusChangeConflict = errors.New("conflicting host status modification is in progress")
// TerminateSpawnHost enqueues a job to terminate a spawn host.
func TerminateSpawnHost(ctx context.Context, env evergreen.Environment, u *user.DBUser, h *host.Host) (int, error) {
if h.Status == evergreen.HostTerminated {
return http.StatusBadRequest, errors.Errorf("host '%s' is already terminated", h.Id)
}
ts := utility.RoundPartOfMinute(1).Format(units.TSFormat)
terminateJob := units.NewSpawnHostTerminationJob(h, u.Id, ts)
if err := units.EnqueueSpawnHostModificationJob(ctx, env, terminateJob); err != nil {
if amboy.IsDuplicateJobScopeError(err) {
err = errHostStatusChangeConflict
}
return http.StatusInternalServerError, err
}
return http.StatusOK, nil
}
// StopSpawnHost enqueues a job to stop a running spawn host.
func StopSpawnHost(ctx context.Context, env evergreen.Environment, u *user.DBUser, h *host.Host, shouldKeepOff bool) (int, error) {
if !utility.StringSliceContains(evergreen.StoppableHostStatuses, h.Status) {
return http.StatusBadRequest, errors.Errorf("host '%s' cannot be stopped because because its status ('%s') is not a stoppable state", h.Id, h.Status)
}
ts := utility.RoundPartOfMinute(1).Format(units.TSFormat)
stopJob := units.NewSpawnhostStopJob(h, shouldKeepOff, evergreen.ModifySpawnHostManual, u.Id, ts)
if err := units.EnqueueSpawnHostModificationJob(ctx, env, stopJob); err != nil {
if amboy.IsDuplicateJobScopeError(err) {
err = errHostStatusChangeConflict
}
return http.StatusInternalServerError, err
}
return http.StatusOK, nil
}
// StartSpawnHost enqueues a job to start a stopped spawn host.
func StartSpawnHost(ctx context.Context, env evergreen.Environment, u *user.DBUser, h *host.Host) (int, error) {
if !utility.StringSliceContains(evergreen.StartableHostStatuses, h.Status) {
return http.StatusBadRequest, errors.Errorf("host '%s' cannot be started because because its status ('%s') is not a startable state", h.Id, h.Status)
}
ts := utility.RoundPartOfMinute(1).Format(units.TSFormat)
startJob := units.NewSpawnhostStartJob(h, evergreen.ModifySpawnHostManual, u.Id, ts)
if err := units.EnqueueSpawnHostModificationJob(ctx, env, startJob); err != nil {
if amboy.IsDuplicateJobScopeError(err) {
err = errHostStatusChangeConflict
}
return http.StatusInternalServerError, err
}
return http.StatusOK, nil
}
// StartSpawnHost enqueues a job to modify a spawn host.
func ModifySpawnHost(ctx context.Context, env evergreen.Environment, u *user.DBUser, h *host.Host, opts host.HostModifyOptions) (int, error) {
ts := utility.RoundPartOfMinute(1).Format(units.TSFormat)
modifyJob := units.NewSpawnhostModifyJob(h, opts, ts)
if err := units.EnqueueSpawnHostModificationJob(ctx, env, modifyJob); err != nil {
if amboy.IsDuplicateJobScopeError(err) {
err = errHostStatusChangeConflict
}
return http.StatusInternalServerError, err
}
return http.StatusOK, nil
}
// makeSpawnOptions is a utility for validating and converting a HostRequestOptions
// struct into a SpawnOptions struct.
func makeSpawnOptions(options *restmodel.HostRequestOptions, user *user.DBUser) (*cloud.SpawnOptions, error) {
// Get key value if PublicKey is a name
keyVal, err := user.GetPublicKey(options.KeyName)
if err != nil {
// if the keyname is populated but isn't a valid name, it may be the key value itself
if options.KeyName == "" {
return nil, errors.Wrap(err, "key name is empty")
}
keyVal = options.KeyName
}
if keyVal == "" {
return nil, errors.Errorf("public key '%s' cannot have an empty value", options.KeyName)
}
spawnOptions := cloud.SpawnOptions{
DistroId: options.DistroID,
Userdata: options.UserData,
UserName: user.Username(),
PublicKey: keyVal,
InstanceTags: options.InstanceTags,
InstanceType: options.InstanceType,
NoExpiration: options.NoExpiration,
IsVirtualWorkstation: options.IsVirtualWorkstation,
IsCluster: options.IsCluster,
HomeVolumeSize: options.HomeVolumeSize,
HomeVolumeID: options.HomeVolumeID,
Region: options.Region,
Expiration: options.Expiration,
UseProjectSetupScript: options.UseProjectSetupScript,
ProvisionOptions: &host.ProvisionOptions{
TaskId: options.TaskID,
TaskSync: options.TaskSync,
SetupScript: options.SetupScript,
OwnerId: user.Id,
},
}
return &spawnOptions, nil
}
// PostHostIsUp indicates to the app server that a host is up.
func PostHostIsUp(ctx context.Context, params restmodel.APIHostIsUpOptions) (*restmodel.APIHost, error) {
h, err := host.FindOneByIdOrTag(ctx, params.HostID)
if err != nil {
return nil, gimlet.ErrorResponse{
StatusCode: http.StatusInternalServerError,
Message: errors.Wrapf(err, "finding host '%s'", params.HostID).Error(),
}
}
if h == nil {
return nil, gimlet.ErrorResponse{
StatusCode: http.StatusNotFound,
Message: fmt.Sprintf("host '%s' not found", params.HostID),
}
}
if err := fixProvisioningIntentHost(ctx, h, params.EC2InstanceID); err != nil {
return nil, gimlet.ErrorResponse{
StatusCode: http.StatusInternalServerError,
Message: errors.Wrap(err, "fixing intent host").Error(),
}
}
var apiHost model.APIHost
apiHost.BuildFromService(h, nil)
return &apiHost, nil
}
// fixProvisioningIntentHost fixes a special case in which Evergreen believes a
// host is still an intent host but the host is already running and trying to
// provision in preparation to run tasks.
func fixProvisioningIntentHost(ctx context.Context, h *host.Host, instanceID string) error {
if !evergreen.IsEc2Provider(h.Distro.Provider) {
// Intent host issues only affect ephemeral (i.e. EC2) hosts.
return nil
}
if cloud.IsEC2InstanceID(h.Id) {
// If the host already has an instance ID, it's not an intent host, so
// the host does not need to be fixed.
return nil
}
if instanceID == "" {
// If the host is an intent host but the agent does not send the EC2
// instance ID, there's nothing that can be done to fix it here.
msg := "intent host is up, but it did not provide an EC2 instance ID, which is required"
grip.Warning(message.Fields{
"message": msg,
"host_id": h.Id,
"host_status": h.Status,
"provider": h.Distro.Provider,
"distro": h.Distro.Id,
})
// TODO (DEVPROD-6752): should return an error once all hosts roll over
// from the deploy and are providing their EC2 instance ID to this
// route. All intent hosts should be sending their EC2 instance ID. If
// they don't, it should fail provisioning and should not start the
// agent.
return errors.New(msg)
}
env := evergreen.GetEnvironment()
switch h.Status {
case evergreen.HostBuilding:
return errors.Wrap(transitionIntentHostToStarting(ctx, env, h, instanceID), "starting intent host that actually succeeded")
case evergreen.HostBuildingFailed, evergreen.HostDecommissioned, evergreen.HostTerminated:
return errors.Wrap(transitionIntentHostToDecommissioned(ctx, env, h, instanceID), "decommissioning intent host")
default:
return errors.Errorf("logical error: intent host is in state '%s', which should be impossible when host is up and provisioning", h.Status)
}
}
// transitionIntentHostToStarting converts an intent host to a real host because
// it's up and running. It is marked as starting to indicate that the host has
// started and can run tasks.
func transitionIntentHostToStarting(ctx context.Context, env evergreen.Environment, hostToStart *host.Host, instanceID string) error {
grip.Notice(message.Fields{
"message": "DB-EC2 state mismatch - EC2 instance started but Evergreen still has it stored as an intent host, fixing now",
"old_host_id": hostToStart.Id,
"new_host_id": instanceID,
"host_tag": hostToStart.Tag,
"distro": hostToStart.Distro.Id,
"host_status": hostToStart.Status,
})
intentHostID := hostToStart.Id
hostToStart.Id = instanceID
hostToStart.Status = evergreen.HostStarting
hostToStart.StartTime = time.Now()
if err := host.UnsafeReplace(ctx, env, intentHostID, hostToStart); err != nil {
return errors.Wrap(err, "replacing intent host with real host")
}
event.LogHostStartSucceeded(hostToStart.Id, evergreen.User)
return nil
}
// transitionIntentHostToDecommissioned converts an intent host to a real
// host because it's up and running. It is marked as decommissioned to
// indicate that the host is not valid anymore and should be terminated.
func transitionIntentHostToDecommissioned(ctx context.Context, env evergreen.Environment, hostToDecommission *host.Host, instanceID string) error {
grip.Notice(message.Fields{
"message": "DB-EC2 state mismatch - EC2 instance started but Evergreen already gave up on this host, fixing now",
"host_id": hostToDecommission.Id,
"instance_id": instanceID,
"host_status": hostToDecommission.Status,
})
intentHostID := hostToDecommission.Id
hostToDecommission.Id = instanceID
oldStatus := hostToDecommission.Status
hostToDecommission.Status = evergreen.HostDecommissioned
if err := host.UnsafeReplace(ctx, env, intentHostID, hostToDecommission); err != nil {
return errors.Wrap(err, "replacing intent host with real host")
}
event.LogHostStatusChanged(hostToDecommission.Id, oldStatus, hostToDecommission.Status, evergreen.User, "host started agent but intent host is already considered a failure")
grip.Info(message.Fields{
"message": "intent host decommissioned",
"host_id": hostToDecommission.Id,
"host_tag": hostToDecommission.Tag,
"distro": hostToDecommission.Distro.Id,
"old_status": oldStatus,
})
return nil
}