/
task_linux.go
381 lines (335 loc) · 13.2 KB
/
task_linux.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
//go:build linux
// +build linux
// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"). You may
// not use this file except in compliance with the License. A copy of the
// License is located at
//
// http://aws.amazon.com/apache2.0/
//
// or in the "license" file accompanying this file. This file is distributed
// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
// express or implied. See the License for the specific language governing
// permissions and limitations under the License.
package task
import (
"fmt"
"path/filepath"
"time"
"github.com/aws/amazon-ecs-agent/ecs-agent/logger"
"github.com/aws/amazon-ecs-agent/ecs-agent/logger/field"
"github.com/aws/amazon-ecs-agent/agent/config"
"github.com/aws/amazon-ecs-agent/agent/ecscni"
"github.com/aws/amazon-ecs-agent/agent/taskresource"
"github.com/aws/amazon-ecs-agent/agent/taskresource/cgroup"
resourcestatus "github.com/aws/amazon-ecs-agent/agent/taskresource/status"
resourcetype "github.com/aws/amazon-ecs-agent/agent/taskresource/types"
apicontainerstatus "github.com/aws/amazon-ecs-agent/ecs-agent/api/container/status"
"github.com/aws/amazon-ecs-agent/ecs-agent/credentials"
ni "github.com/aws/amazon-ecs-agent/ecs-agent/netlib/model/networkinterface"
"github.com/aws/amazon-ecs-agent/ecs-agent/utils/arn"
"github.com/cihub/seelog"
"github.com/containernetworking/cni/libcni"
dockercontainer "github.com/docker/docker/api/types/container"
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/pkg/errors"
)
const (
// Reference: http://docs.aws.amazon.com/AmazonECS/latest/APIReference/API_ContainerDefinition.html
minimumCPUShare = 2
minimumCPUPercent = 0
bytesPerMegabyte = 1024 * 1024
)
// PlatformFields consists of fields specific to Linux for a task
type PlatformFields struct{}
func (task *Task) adjustForPlatform(cfg *config.Config) {
task.lock.Lock()
defer task.lock.Unlock()
task.MemoryCPULimitsEnabled = cfg.TaskCPUMemLimit.Enabled()
}
func (task *Task) initializeCgroupResourceSpec(cgroupPath string, cGroupCPUPeriod time.Duration, taskPidsLimit int, resourceFields *taskresource.ResourceFields) error {
if !task.MemoryCPULimitsEnabled {
if task.CPU > 0 || task.Memory > 0 {
// Client-side validation/warning if a task with task-level CPU/memory limits specified somehow lands on an instance
// where agent does not support it. These limits will be ignored.
logger.Warn("Ignoring task-level CPU/memory limits since agent does not support the TaskCPUMemLimits capability", logger.Fields{
field.TaskID: task.GetID(),
})
}
return nil
}
cgroupRoot, err := task.BuildCgroupRoot()
if err != nil {
return errors.Wrapf(err, "cgroup resource: unable to determine cgroup root for task")
}
resSpec, err := task.BuildLinuxResourceSpec(cGroupCPUPeriod, taskPidsLimit)
if err != nil {
return errors.Wrapf(err, "cgroup resource: unable to build resource spec for task")
}
cgroupResource := cgroup.NewCgroupResource(task.Arn, resourceFields.Control,
resourceFields.IOUtil, cgroupRoot, cgroupPath, resSpec)
task.AddResource(resourcetype.CgroupKey, cgroupResource)
for _, container := range task.Containers {
container.BuildResourceDependency(cgroupResource.GetName(),
resourcestatus.ResourceStatus(cgroup.CgroupCreated),
apicontainerstatus.ContainerPulled)
}
return nil
}
// BuildCgroupRoot helps build the task cgroup prefix
// Example v1: /ecs/task-id
// Example v2: ecstasks-$TASKID.slice
func (task *Task) BuildCgroupRoot() (string, error) {
taskID, err := arn.TaskIdFromArn(task.Arn)
if err != nil {
return "", err
}
if config.CgroupV2 {
return buildCgroupV2Root(taskID), nil
}
return buildCgroupV1Root(taskID), nil
}
func buildCgroupV1Root(taskID string) string {
return filepath.Join(config.DefaultTaskCgroupV1Prefix, taskID)
}
// buildCgroupV2Root creates a root cgroup using the systemd driver's special "-"
// character. The "-" specifies a parent slice, so tasks and their containers end up
// looking like this in the cgroup directory:
//
// /sys/fs/cgroup/ecstasks.slice/
// ├── ecstasks-XXXXf406f70c4c678073ae96944fXXXX.slice
// │ └── docker-XXXX7c6dc81f2e9a8bf1c566dc769733ccba594b3007dd289a0f50ad7923XXXX.scope
// └── ecstasks-XXXX30467358463ab6bbba4e73afXXXX.slice
// └── docker-XXXX7ef4e942552437c96051356859c1df169f16e1cf9a9fc96fd30614e6XXXX.scope
func buildCgroupV2Root(taskID string) string {
return fmt.Sprintf("%s-%s.slice", config.DefaultTaskCgroupV2Prefix, taskID)
}
// BuildLinuxResourceSpec returns a linuxResources object for the task cgroup
func (task *Task) BuildLinuxResourceSpec(cGroupCPUPeriod time.Duration, taskPidsLimit int) (specs.LinuxResources, error) {
linuxResourceSpec := specs.LinuxResources{}
// If task level CPU limits are requested, set CPU quota + CPU period
// Else set CPU shares
if task.CPU > 0 {
linuxCPUSpec, err := task.buildExplicitLinuxCPUSpec(cGroupCPUPeriod)
if err != nil {
return specs.LinuxResources{}, err
}
linuxResourceSpec.CPU = &linuxCPUSpec
} else {
linuxCPUSpec := task.buildImplicitLinuxCPUSpec()
linuxResourceSpec.CPU = &linuxCPUSpec
}
// Validate and build task memory spec
// NOTE: task memory specifications are optional
if task.Memory > 0 {
linuxMemorySpec, err := task.buildLinuxMemorySpec()
if err != nil {
return specs.LinuxResources{}, err
}
linuxResourceSpec.Memory = &linuxMemorySpec
}
// Set task pids limit if set via ECS_TASK_PIDS_LIMIT env var
if taskPidsLimit > 0 {
pidsLimit := &specs.LinuxPids{
Limit: int64(taskPidsLimit),
}
linuxResourceSpec.Pids = pidsLimit
}
return linuxResourceSpec, nil
}
// buildExplicitLinuxCPUSpec builds CPU spec when task CPU limits are
// explicitly requested
func (task *Task) buildExplicitLinuxCPUSpec(cGroupCPUPeriod time.Duration) (specs.LinuxCPU, error) {
taskCPUPeriod := uint64(cGroupCPUPeriod / time.Microsecond)
taskCPUQuota := int64(task.CPU * float64(taskCPUPeriod))
return specs.LinuxCPU{
Quota: &taskCPUQuota,
Period: &taskCPUPeriod,
}, nil
}
// buildImplicitLinuxCPUSpec builds the implicit task CPU spec when
// task CPU and memory limit feature is enabled
func (task *Task) buildImplicitLinuxCPUSpec() specs.LinuxCPU {
// If task level CPU limits are missing,
// aggregate container CPU shares when present
var taskCPUShares uint64
for _, container := range task.Containers {
if container.CPU < minimumCPUShare {
taskCPUShares += minimumCPUShare
} else {
taskCPUShares += uint64(container.CPU)
}
}
return specs.LinuxCPU{
Shares: &taskCPUShares,
}
}
// buildLinuxMemorySpec validates and builds the task memory spec
func (task *Task) buildLinuxMemorySpec() (specs.LinuxMemory, error) {
// If task memory limit is not present, cgroup parent memory is not set
// If task memory limit is set, ensure that no container
// of this task has a greater request
for _, container := range task.Containers {
containerMemoryLimit := int64(container.Memory)
if containerMemoryLimit > task.Memory {
return specs.LinuxMemory{},
errors.Errorf("task memory spec builder: container memory limit(%d) greater than task memory limit(%d)",
containerMemoryLimit, task.Memory)
}
}
// Kernel expects memory to be expressed in bytes
memoryBytes := task.Memory * bytesPerMegabyte
return specs.LinuxMemory{
Limit: &memoryBytes,
}, nil
}
// platformHostConfigOverride to override platform specific feature sets
func (task *Task) platformHostConfigOverride(hostConfig *dockercontainer.HostConfig) error {
// Override cgroup parent
return task.overrideCgroupParent(hostConfig)
}
// overrideCgroupParent updates hostconfig with cgroup parent when task cgroups
// are enabled
func (task *Task) overrideCgroupParent(hostConfig *dockercontainer.HostConfig) error {
task.lock.RLock()
defer task.lock.RUnlock()
if task.MemoryCPULimitsEnabled {
cgroupRoot, err := task.BuildCgroupRoot()
if err != nil {
return errors.Wrapf(err, "task cgroup override: unable to obtain cgroup root for task: %s", task.Arn)
}
hostConfig.CgroupParent = cgroupRoot
}
return nil
}
// dockerCPUShares converts containerCPU shares if needed as per the logic stated below:
// Docker silently converts 0 to 1024 CPU shares, which is probably not what we
// want. Instead, we convert 0 to 2 to be closer to expected behavior. The
// reason for 2 over 1 is that 1 is an invalid value (Linux's choice, not Docker's).
func (task *Task) dockerCPUShares(containerCPU uint) int64 {
if containerCPU <= 1 {
seelog.Debugf(
"Converting CPU shares to allowed minimum of 2 for task arn: [%s] and cpu shares: %d",
task.Arn, containerCPU)
return 2
}
return int64(containerCPU)
}
func enableIPv6SysctlSetting(hostConfig *dockercontainer.HostConfig) {
if hostConfig.Sysctls == nil {
hostConfig.Sysctls = make(map[string]string)
}
hostConfig.Sysctls[disableIPv6SysctlKey] = sysctlValueOff
}
// requiresFSxWindowsFileServerResource returns true if at least one volume in the task
// is of type 'fsxWindowsFileServer'
func (task *Task) requiresFSxWindowsFileServerResource() bool {
return false
}
// initializeFSxWindowsFileServerResource builds the resource dependency map for the fsxwindowsfileserver resource
func (task *Task) initializeFSxWindowsFileServerResource(cfg *config.Config, credentialsManager credentials.Manager,
resourceFields *taskresource.ResourceFields) error {
return errors.New("task with FSx for Windows File Server volumes is only supported on Windows container instance")
}
// BuildCNIConfigAwsvpc builds a list of CNI network configurations for the task.
// If includeIPAMConfig is set to true, the list also includes the bridge IPAM configuration.
func (task *Task) BuildCNIConfigAwsvpc(includeIPAMConfig bool, cniConfig *ecscni.Config) (*ecscni.Config, error) {
if !task.IsNetworkModeAWSVPC() {
return nil, errors.New("task config: task network mode is not AWSVPC")
}
var netconf *libcni.NetworkConfig
var ifName string
var err error
// Build a CNI network configuration for each ENI.
for _, eni := range task.ENIs {
switch eni.InterfaceAssociationProtocol {
// If the association protocol is set to "default" or unset (to preserve backwards
// compatibility), consider it a "standard" ENI attachment.
case "", ni.DefaultInterfaceAssociationProtocol:
cniConfig.ID = eni.MacAddress
ifName, netconf, err = ecscni.NewVPCENINetworkConfig(eni, cniConfig)
case ni.VLANInterfaceAssociationProtocol:
cniConfig.ID = eni.MacAddress
ifName, netconf, err = ecscni.NewBranchENINetworkConfig(eni, cniConfig)
default:
err = errors.Errorf("task config: unknown interface association type: %s",
eni.InterfaceAssociationProtocol)
}
if err != nil {
return nil, err
}
cniConfig.NetworkConfigs = append(cniConfig.NetworkConfigs, &ecscni.NetworkConfig{
IfName: ifName,
CNINetworkConfig: netconf,
})
}
// Build the bridge CNI network configuration.
// All AWSVPC tasks have a bridge network.
ifName, netconf, err = ecscni.NewBridgeNetworkConfig(cniConfig, includeIPAMConfig)
if err != nil {
return nil, err
}
cniConfig.NetworkConfigs = append(cniConfig.NetworkConfigs, &ecscni.NetworkConfig{
IfName: ifName,
CNINetworkConfig: netconf,
})
// Build a CNI network configuration for AppMesh if enabled.
appMeshConfig := task.GetAppMesh()
if appMeshConfig != nil {
ifName, netconf, err = ecscni.NewAppMeshConfig(appMeshConfig, cniConfig)
if err != nil {
return nil, err
}
cniConfig.NetworkConfigs = append(cniConfig.NetworkConfigs, &ecscni.NetworkConfig{
IfName: ifName,
CNINetworkConfig: netconf,
})
}
// Build a CNI network configuration for ServiceConnect-enabled task in AWSVPC mode
if task.IsServiceConnectEnabled() {
ifName, netconf, err = ecscni.NewServiceConnectNetworkConfig(
task.ServiceConnectConfig,
ecscni.NAT,
false,
task.shouldEnableIPv4(),
task.shouldEnableIPv6(),
cniConfig)
if err != nil {
return nil, err
}
cniConfig.NetworkConfigs = append(cniConfig.NetworkConfigs, &ecscni.NetworkConfig{
IfName: ifName,
CNINetworkConfig: netconf,
})
}
cniConfig.ContainerNetNS = fmt.Sprintf(ecscni.NetnsFormat, cniConfig.ContainerPID)
return cniConfig, nil
}
// BuildCNIConfigBridgeMode builds a list of CNI network configurations for a task in docker bridge mode.
// Currently the only plugin in available is for Service Connect
func (task *Task) BuildCNIConfigBridgeMode(cniConfig *ecscni.Config, containerName string) (*ecscni.Config, error) {
if !task.IsNetworkModeBridge() || !task.IsServiceConnectEnabled() {
return nil, errors.New("only bridge-mode Service-Connect-enabled task should invoke BuildCNIConfigBridgeMode")
}
var netconf *libcni.NetworkConfig
var ifName string
var err error
scNetworkConfig := task.GetServiceConnectNetworkConfig()
ifName, netconf, err = ecscni.NewServiceConnectNetworkConfig(
task.ServiceConnectConfig,
ecscni.TPROXY,
!task.IsContainerServiceConnectPause(containerName),
scNetworkConfig.SCPauseIPv4Addr != "",
scNetworkConfig.SCPauseIPv6Addr != "",
cniConfig)
if err != nil {
return nil, err
}
cniConfig.NetworkConfigs = append(cniConfig.NetworkConfigs, &ecscni.NetworkConfig{
IfName: ifName,
CNINetworkConfig: netconf,
})
cniConfig.ContainerNetNS = fmt.Sprintf(ecscni.NetnsFormat, cniConfig.ContainerPID)
return cniConfig, nil
}