forked from hashicorp/nomad
/
executor_linux.go
252 lines (218 loc) · 7.35 KB
/
executor_linux.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
package executor
import (
"fmt"
"os"
"os/user"
"path/filepath"
"strconv"
"syscall"
"github.com/hashicorp/go-multierror"
"github.com/opencontainers/runc/libcontainer/cgroups"
cgroupFs "github.com/opencontainers/runc/libcontainer/cgroups/fs"
"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
cgroupConfig "github.com/opencontainers/runc/libcontainer/configs"
"github.com/hashicorp/nomad/client/allocdir"
"github.com/hashicorp/nomad/nomad/structs"
)
var (
// A mapping of directories on the host OS to attempt to embed inside each
// task's chroot.
chrootEnv = map[string]string{
"/bin": "/bin",
"/etc": "/etc",
"/lib": "/lib",
"/lib32": "/lib32",
"/lib64": "/lib64",
"/run/resolvconf": "/run/resolvconf",
"/sbin": "/sbin",
"/usr/bin": "/usr/bin",
"/usr/lib": "/usr/lib",
"/usr/sbin": "/usr/sbin",
"/usr/share": "/usr/share",
}
)
func (e *UniversalExecutor) makeExecutable(binPath string) error {
path := binPath
if e.ctx.FSIsolation {
// The path must be relative the chroot
path = filepath.Join(e.taskDir, binPath)
} else if !filepath.IsAbs(binPath) {
// The path must be relative the allocations directory.
path = filepath.Join(e.taskDir, binPath)
}
return e.makeExecutablePosix(path)
}
// configureIsolation configures chroot and creates cgroups
func (e *UniversalExecutor) configureIsolation() error {
if e.ctx.FSIsolation {
if err := e.configureChroot(); err != nil {
return err
}
}
if e.ctx.ResourceLimits {
if err := e.configureCgroups(e.ctx.TaskResources); err != nil {
return fmt.Errorf("error creating cgroups: %v", err)
}
if err := e.applyLimits(os.Getpid()); err != nil {
if er := DestroyCgroup(e.groups); er != nil {
e.logger.Printf("[ERR] executor: error destroying cgroup: %v", er)
}
if er := e.removeChrootMounts(); er != nil {
e.logger.Printf("[ERR] executor: error removing chroot: %v", er)
}
return fmt.Errorf("error entering the plugin process in the cgroup: %v:", err)
}
}
return nil
}
// applyLimits puts a process in a pre-configured cgroup
func (e *UniversalExecutor) applyLimits(pid int) error {
if !e.ctx.ResourceLimits {
return nil
}
// Entering the process in the cgroup
manager := getCgroupManager(e.groups)
if err := manager.Apply(pid); err != nil {
e.logger.Printf("[ERR] executor: unable to join cgroup: %v", err)
if err := e.Exit(); err != nil {
e.logger.Printf("[ERR] executor: unable to kill process: %v", err)
}
return err
}
return nil
}
// configureCgroups converts a Nomad Resources specification into the equivalent
// cgroup configuration. It returns an error if the resources are invalid.
func (e *UniversalExecutor) configureCgroups(resources *structs.Resources) error {
e.groups = &cgroupConfig.Cgroup{}
e.groups.Resources = &cgroupConfig.Resources{}
cgroupName := structs.GenerateUUID()
cgPath, err := cgroups.GetThisCgroupDir("devices")
if err != nil {
return fmt.Errorf("unable to get mount point for devices sub-system: %v", err)
}
e.groups.Path = filepath.Join(cgPath, cgroupName)
// TODO: verify this is needed for things like network access
e.groups.Resources.AllowAllDevices = true
if resources.MemoryMB > 0 {
// Total amount of memory allowed to consume
e.groups.Resources.Memory = int64(resources.MemoryMB * 1024 * 1024)
// Disable swap to avoid issues on the machine
e.groups.Resources.MemorySwap = int64(-1)
}
if resources.CPU < 2 {
return fmt.Errorf("resources.CPU must be equal to or greater than 2: %v", resources.CPU)
}
// Set the relative CPU shares for this cgroup.
e.groups.Resources.CpuShares = int64(resources.CPU)
if resources.IOPS != 0 {
// Validate it is in an acceptable range.
if resources.IOPS < 10 || resources.IOPS > 1000 {
return fmt.Errorf("resources.IOPS must be between 10 and 1000: %d", resources.IOPS)
}
e.groups.Resources.BlkioWeight = uint16(resources.IOPS)
}
return nil
}
// runAs takes a user id as a string and looks up the user, and sets the command
// to execute as that user.
func (e *UniversalExecutor) runAs(userid string) error {
u, err := user.Lookup(userid)
if err != nil {
return fmt.Errorf("Failed to identify user %v: %v", userid, err)
}
// Convert the uid and gid
uid, err := strconv.ParseUint(u.Uid, 10, 32)
if err != nil {
return fmt.Errorf("Unable to convert userid to uint32: %s", err)
}
gid, err := strconv.ParseUint(u.Gid, 10, 32)
if err != nil {
return fmt.Errorf("Unable to convert groupid to uint32: %s", err)
}
// Set the command to run as that user and group.
if e.cmd.SysProcAttr == nil {
e.cmd.SysProcAttr = &syscall.SysProcAttr{}
}
if e.cmd.SysProcAttr.Credential == nil {
e.cmd.SysProcAttr.Credential = &syscall.Credential{}
}
e.cmd.SysProcAttr.Credential.Uid = uint32(uid)
e.cmd.SysProcAttr.Credential.Gid = uint32(gid)
return nil
}
// configureChroot configures a chroot
func (e *UniversalExecutor) configureChroot() error {
allocDir := e.ctx.AllocDir
if err := allocDir.MountSharedDir(e.ctx.TaskName); err != nil {
return err
}
if err := allocDir.Embed(e.ctx.TaskName, chrootEnv); err != nil {
return err
}
// Set the tasks AllocDir environment variable.
e.ctx.TaskEnv.SetAllocDir(filepath.Join("/", allocdir.SharedAllocName)).SetTaskLocalDir(filepath.Join("/", allocdir.TaskLocal)).Build()
if e.cmd.SysProcAttr == nil {
e.cmd.SysProcAttr = &syscall.SysProcAttr{}
}
e.cmd.SysProcAttr.Chroot = e.taskDir
e.cmd.Dir = "/"
if err := allocDir.MountSpecialDirs(e.taskDir); err != nil {
return err
}
return nil
}
// cleanTaskDir is an idempotent operation to clean the task directory and
// should be called when tearing down the task.
func (e *UniversalExecutor) removeChrootMounts() error {
// Prevent a race between Wait/ForceStop
e.lock.Lock()
defer e.lock.Unlock()
return e.ctx.AllocDir.UnmountAll()
}
// destroyCgroup kills all processes in the cgroup and removes the cgroup
// configuration from the host.
func DestroyCgroup(groups *cgroupConfig.Cgroup) error {
merrs := new(multierror.Error)
if groups == nil {
return fmt.Errorf("Can't destroy: cgroup configuration empty")
}
manager := getCgroupManager(groups)
if pids, perr := manager.GetPids(); perr == nil {
for _, pid := range pids {
// If the pid is the pid of the executor then we don't kill it, the
// executor is going to be killed by the driver once the Wait
// returns
if pid == os.Getpid() {
continue
}
proc, err := os.FindProcess(pid)
if err != nil {
merrs.Errors = append(merrs.Errors, fmt.Errorf("error finding process %v: %v", pid, err))
} else {
if e := proc.Kill(); e != nil {
merrs.Errors = append(merrs.Errors, fmt.Errorf("error killing process %v: %v", pid, e))
}
}
}
} else {
merrs.Errors = append(merrs.Errors, fmt.Errorf("error getting pids: %v", perr))
}
// Remove the cgroup.
if err := manager.Destroy(); err != nil {
multierror.Append(merrs, fmt.Errorf("Failed to delete the cgroup directories: %v", err))
}
if len(merrs.Errors) != 0 {
return fmt.Errorf("errors while destroying cgroup: %v", merrs)
}
return nil
}
// getCgroupManager returns the correct libcontainer cgroup manager.
func getCgroupManager(groups *cgroupConfig.Cgroup) cgroups.Manager {
var manager cgroups.Manager
manager = &cgroupFs.Manager{Cgroups: groups}
if systemd.UseSystemd() {
manager = &systemd.Manager{Cgroups: groups}
}
return manager
}