Skip to content

Commit

Permalink
AWS: Dedicated hosts management (#66)
Browse files Browse the repository at this point in the history
This change enables dedicated hosts management on AWS and a couple of other useful things

* AWS: Integrated one-node dedicated host pool manager. It can spin-up the hosts when needed and will monitor the usage to release the host when not needed anymore. Now label definition could have `pool` option which will utilize the pre-defined dedicated pool to allocate or reuse the allocated host.
* AWS: Updated SDK dependency to the latest versions
* AWS: Switched to create objects instead of references and utilized more aws functions to convert to/from aws types
* AWS: Added retries for the API communication, due to the issues with utilizing Host in Allocation after it becomes Available: https://docs.aws.amazon.com/ec2/latest/devguide/eventual-consistency.html
* "Driver: AWS" wiki page was updated
  • Loading branch information
sparshev authored Jun 17, 2024
1 parent e7e9009 commit e8463e2
Show file tree
Hide file tree
Showing 9 changed files with 1,013 additions and 91 deletions.
19 changes: 10 additions & 9 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@ go 1.21.0
require (
github.com/alessio/shellescape v1.4.1
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5
github.com/aws/aws-sdk-go-v2 v1.17.8
github.com/aws/aws-sdk-go-v2/service/ec2 v1.93.2
github.com/aws/aws-sdk-go-v2/service/kms v1.20.10
github.com/aws/aws-sdk-go-v2/service/servicequotas v1.14.10
github.com/aws/aws-sdk-go-v2/service/sts v1.18.9
github.com/aws/aws-sdk-go-v2 v1.27.2
github.com/aws/aws-sdk-go-v2/service/ec2 v1.163.1
github.com/aws/aws-sdk-go-v2/service/kms v1.32.3
github.com/aws/aws-sdk-go-v2/service/servicequotas v1.21.10
github.com/aws/aws-sdk-go-v2/service/sts v1.28.12
github.com/deepmap/oapi-codegen v1.12.4
github.com/getkin/kin-openapi v0.115.0
github.com/ghodss/yaml v1.0.0
Expand All @@ -32,10 +32,11 @@ require (

require (
github.com/apapsch/go-jsonmerge/v2 v2.0.0 // indirect
github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.32 // indirect
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.26 // indirect
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.26 // indirect
github.com/aws/smithy-go v1.13.5 // indirect
github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.9 // indirect
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.9 // indirect
github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.11.2 // indirect
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.11.11 // indirect
github.com/aws/smithy-go v1.20.2 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/dustin/go-humanize v1.0.1 // indirect
github.com/fsnotify/fsnotify v1.6.0 // indirect
Expand Down
39 changes: 20 additions & 19 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -5,24 +5,26 @@ github.com/apapsch/go-jsonmerge/v2 v2.0.0 h1:axGnT1gRIfimI7gJifB699GoE/oq+F2MU7D
github.com/apapsch/go-jsonmerge/v2 v2.0.0/go.mod h1:lvDnEdqiQrp0O42VQGgmlKpxL1AP2+08jFMw88y4klk=
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio=
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs=
github.com/aws/aws-sdk-go-v2 v1.17.8 h1:GMupCNNI7FARX27L7GjCJM8NgivWbRgpjNI/hOQjFS8=
github.com/aws/aws-sdk-go-v2 v1.17.8/go.mod h1:uzbQtefpm44goOPmdKyAlXSNcwlRgF3ePWVW6EtJvvw=
github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.32 h1:dpbVNUjczQ8Ae3QKHbpHBpfvaVkRdesxpTOe9pTouhU=
github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.32/go.mod h1:RudqOgadTWdcS3t/erPQo24pcVEoYyqj/kKW5Vya21I=
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.26 h1:QH2kOS3Ht7x+u0gHCh06CXL/h6G8LQJFpZfFBYBNboo=
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.26/go.mod h1:vq86l7956VgFr0/FWQ2BWnK07QC3WYsepKzy33qqY5U=
github.com/aws/aws-sdk-go-v2/service/ec2 v1.93.2 h1:c6a19AjfhEXKlEX63cnlWtSQ4nzENihHZOG0I3wH6BE=
github.com/aws/aws-sdk-go-v2/service/ec2 v1.93.2/go.mod h1:VX22JN3HQXDtQ3uS4h4TtM+K11vydq58tpHTlsm8TL8=
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.26 h1:uUt4XctZLhl9wBE1L8lobU3bVN8SNUP7T+olb0bWBO4=
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.26/go.mod h1:Bd4C/4PkVGubtNe5iMXu5BNnaBi/9t/UsFspPt4ram8=
github.com/aws/aws-sdk-go-v2/service/kms v1.20.10 h1:rmw2sdnYS5kP96hKmcm8Yr+ttZLC/zHER8nuQ9vbomc=
github.com/aws/aws-sdk-go-v2/service/kms v1.20.10/go.mod h1:gSdg6VjsqS8EeGjkXAaLjiwG9fwNrCPAj/kAD6of7EI=
github.com/aws/aws-sdk-go-v2/service/servicequotas v1.14.10 h1:wJPOrMYly0o02eQjL8a33oSWKMmNZYSfkT5/Vf1huEU=
github.com/aws/aws-sdk-go-v2/service/servicequotas v1.14.10/go.mod h1:woMwSEInrmXHCxm703FKJ7T5hAUakPT+rcaHP0fbnMw=
github.com/aws/aws-sdk-go-v2/service/sts v1.18.9 h1:Qf1aWwnsNkyAoqDqmdM3nHwN78XQjec27LjM6b9vyfI=
github.com/aws/aws-sdk-go-v2/service/sts v1.18.9/go.mod h1:yyW88BEPXA2fGFyI2KCcZC3dNpiT0CZAHaF+i656/tQ=
github.com/aws/smithy-go v1.13.5 h1:hgz0X/DX0dGqTYpGALqXJoRKRj5oQ7150i5FdTePzO8=
github.com/aws/smithy-go v1.13.5/go.mod h1:Tg+OJXh4MB2R/uN61Ko2f6hTZwB/ZYGOtib8J3gBHzA=
github.com/aws/aws-sdk-go-v2 v1.27.2 h1:pLsTXqX93rimAOZG2FIYraDQstZaaGVVN4tNw65v0h8=
github.com/aws/aws-sdk-go-v2 v1.27.2/go.mod h1:ffIFB97e2yNsv4aTSGkqtHnppsIJzw7G7BReUZ3jCXM=
github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.9 h1:cy8ahBJuhtM8GTTSyOkfy6WVPV1IE+SS5/wfXUYuulw=
github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.9/go.mod h1:CZBXGLaJnEZI6EVNcPd7a6B5IC5cA/GkRWtu9fp3S6Y=
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.9 h1:A4SYk07ef04+vxZToz9LWvAXl9LW0NClpPpMsi31cz0=
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.9/go.mod h1:5jJcHuwDagxN+ErjQ3PU3ocf6Ylc/p9x+BLO/+X4iXw=
github.com/aws/aws-sdk-go-v2/service/ec2 v1.163.1 h1:0RiDkJO1veM6/FQ+GJcGiIhZgPwXlscX29B0zFE4Ulo=
github.com/aws/aws-sdk-go-v2/service/ec2 v1.163.1/go.mod h1:gYk1NtyvkH1SxPcndDtfro3lwbiE5t0tW4eRki5YnOQ=
github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.11.2 h1:Ji0DY1xUsUr3I8cHps0G+XM3WWU16lP6yG8qu1GAZAs=
github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.11.2/go.mod h1:5CsjAbs3NlGQyZNFACh+zztPDI7fU6eW9QsxjfnuBKg=
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.11.11 h1:o4T+fKxA3gTMcluBNZZXE9DNaMkJuUL1O3mffCUjoJo=
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.11.11/go.mod h1:84oZdJ+VjuJKs9v1UTC9NaodRZRseOXCTgku+vQJWR8=
github.com/aws/aws-sdk-go-v2/service/kms v1.32.3 h1:PtuDgLHjTq9JgykpX93EqGHlbNK0ju8xuDMcdD1Uo5I=
github.com/aws/aws-sdk-go-v2/service/kms v1.32.3/go.mod h1:uQiZ8PiSsPZuVC+hYKe/bSDZEhejdQW8GRemyUp0hio=
github.com/aws/aws-sdk-go-v2/service/servicequotas v1.21.10 h1:B4VK4LEI/L5dtYq2Omzt4XQ9WwtZX7I+YwmkhcDdEV8=
github.com/aws/aws-sdk-go-v2/service/servicequotas v1.21.10/go.mod h1:jAMj6BiwJo5rCrR97LdKlo1M494krOfnPJCS6X7etcU=
github.com/aws/aws-sdk-go-v2/service/sts v1.28.12 h1:M/1u4HBpwLuMtjlxuI2y6HoVLzF5e2mfxHCg7ZVMYmk=
github.com/aws/aws-sdk-go-v2/service/sts v1.28.12/go.mod h1:kcfd+eTdEi/40FIbLq4Hif3XMXnl5b/+t/KTfLt9xIk=
github.com/aws/smithy-go v1.20.2 h1:tbp628ireGtzcHDDmLT/6ADHidqnwgF57XOXZe6tp4Q=
github.com/aws/smithy-go v1.20.2/go.mod h1:krry+ya/rV9RDcV/Q16kpu6ypI4K2czasz0NC3qS14E=
github.com/bmatcuk/doublestar v1.1.1/go.mod h1:UD6OnuiIn0yFxxA2le/rnRU1G4RaI4UvFv1sNto9p6w=
github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
Expand Down Expand Up @@ -55,7 +57,6 @@ github.com/go-test/deep v1.0.8/go.mod h1:5C2ZWiW0ErCdrYzpqxLbTX7MG14M9iiw8DgHncV
github.com/golang-jwt/jwt v3.2.2+incompatible h1:IfV12K8xAKAnZqdXVzCZ+TOjboZ2keLg81eXfW3O+oY=
github.com/golang-jwt/jwt v3.2.2+incompatible/go.mod h1:8pz2t5EyA70fFQQSrl6XZXzqecmYZeUEB8OUGHkxJ+I=
github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/pprof v0.0.0-20221118152302-e6195bd50e26 h1:Xim43kblpZXfIBQsbuBVKCudVG457BR2GZFIz3uw3hQ=
Expand Down
62 changes: 62 additions & 0 deletions lib/drivers/aws/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"github.com/aws/aws-sdk-go-v2/service/sts"

"github.com/adobe/aquarium-fish/lib/log"
"github.com/adobe/aquarium-fish/lib/util"
)

type Config struct {
Expand All @@ -32,6 +33,47 @@ type Config struct {
// Optional
AccountIDs []string `json:"account_ids"` // AWS Trusted account IDs to filter vpc, subnet, sg, images, snapshots...
InstanceTags map[string]string `json:"instance_tags"` // AWS Instance tags to use when this node provision them

// Manage the AWS dedicated hosts to keep them busy and deallocate when not needed
// Key of the map is name of the pool - will be used for identification of the pool
DedicatedPool map[string]DedicatedPoolRecord `json:"dedicated_pool"`
}

// Stores the configuration of AWS dedicated pool of particular type to manage
// aws ec2 allocate-hosts --availability-zone "us-west-2c" --auto-placement "on" --host-recovery "off" --host-maintenance "off" --quantity 1 --instance-type "mac2.metal"
type DedicatedPoolRecord struct {
Type string `json:"type"` // Instance type handled by the dedicated hosts pool (example: "mac2.metal")
Zone string `json:"zone"` // Where to allocate the dedicated host (example: "us-west-2c")
Max uint `json:"max"` // Maximum dedicated hosts to allocate (they sometimes can handle more than 1 capacity slot)

// Is a special optimization for the Mac dedicated hosts to send them in [scrubbing process] to
// save money when we can't release the host due to Apple's license of [24 hours] min limit.
//
// Details:
//
// Apple forces AWS and any of their customers to keep the Mac dedicated hosts allocated for at
// least [24 hours]. So after allocation you have no way to release the dedicated host even if
// you don't need it. This makes the mac hosts very pricey for any kind of dynamic allocation.
// In order to workaround this issue - Aquarium implements optimization to keep the Mac hosts
// busy with [scrubbing process], which is triggered after the instance stop or termination and
// puts Mac host in pending state for 1-2hr. That's the downside of optimization, because you
// not be able to use the machine until it will become available again.
//
// That's why this ScrubbingDelay config exists - we need to give Mac host some time to give
// the workload a chance to utilize the host. If it will not be utilized in this duration - the
// manager will start the scrubbing process. When the host become old enough - the manager will
// release it to clean up space for new fresh mac in the roster.
//
// * When this option is unset or 0 - no optimization is enabled.
// * When it's set - then it's a duration to stay idle and then allocate and terminate empty
// instance to trigger scrubbing.
//
// Current implementation is attached to state update, which could be API consuming, so this
// duration should be >= 1 min, otherwise API requests will be too often.
//
// [24 hours]: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-mac-instances.html#mac-instance-considerations
// [scrubbing process]: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/mac-instance-stop.html
ScrubbingDelay util.Duration `json:"scrubbing_delay"`
}

func (c *Config) Apply(config []byte) error {
Expand Down Expand Up @@ -68,6 +110,11 @@ func (c *Config) Validate() (err error) {
Source: "fish-cfg",
}, nil
}),

// Using retries in order to handle the transient errors:
// https://docs.aws.amazon.com/prescriptive-guidance/latest/cloud-design-patterns/retry-backoff.html
RetryMaxAttempts: 3,
RetryMode: aws.RetryModeStandard,
})
input := &sts.GetCallerIdentityInput{}

Expand Down Expand Up @@ -100,5 +147,20 @@ func (c *Config) Validate() (err error) {
log.Debug("AWS: Using Account IDs:", c.AccountIDs)
}

// Init empty instance tags in case its not set
if c.InstanceTags == nil {
c.InstanceTags = make(map[string]string)
}
// Init empty dedicated pool in case its not set
if c.DedicatedPool == nil {
c.DedicatedPool = make(map[string]DedicatedPoolRecord)
}
// Make sure the ScrubbingDelay either unset or >= 1min or we will face often update API reqs
for name, pool := range c.DedicatedPool {
if pool.ScrubbingDelay > 0 && time.Duration(pool.ScrubbingDelay) < 1*time.Minute {
return fmt.Errorf("AWS: Scrubbing delay of pool %q is less then 1 minute: %v", name, pool.ScrubbingDelay)
}
}

return nil
}
Loading

0 comments on commit e8463e2

Please sign in to comment.