-
Notifications
You must be signed in to change notification settings - Fork 25
/
fork_linux.go
172 lines (147 loc) · 4.35 KB
/
fork_linux.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
package forkexec
import (
"syscall"
"unsafe" // required for go:linkname.
"golang.org/x/sys/unix"
)
// Start will fork, load seccomp and execve and being traced by ptrace
// Return pid and potential error
// The runtime OS thread must be locked before calling this function
// if ptrace is set to true
func (r *Runner) Start() (int, error) {
argv0, argv, env, err := prepareExec(r.Args, r.Env)
if err != nil {
return 0, err
}
// prepare work dir
workdir, err := syscallStringFromString(r.WorkDir)
if err != nil {
return 0, err
}
// prepare hostname
hostname, err := syscallStringFromString(r.HostName)
if err != nil {
return 0, err
}
// prepare domainname
domainname, err := syscallStringFromString(r.DomainName)
if err != nil {
return 0, err
}
// prepare pivot_root param
pivotRoot, err := syscallStringFromString(r.PivotRoot)
if err != nil {
return 0, err
}
// socketpair p used to notify child the uid / gid mapping have been setup
// socketpair p is also used to sync with parent before final execve
// p[0] is used by parent and p[1] is used by child
p, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, 0)
if err != nil {
return 0, err
}
// fork in child
pid, err1 := forkAndExecInChild(r, argv0, argv, env, workdir, hostname, domainname, pivotRoot, p)
// restore all signals
afterFork()
syscall.ForkLock.Unlock()
return syncWithChild(r, p, int(pid), err1)
}
func syncWithChild(r *Runner, p [2]int, pid int, err1 syscall.Errno) (int, error) {
var (
err2 syscall.Errno
err error
unshareUser = r.CloneFlags&unix.CLONE_NEWUSER == unix.CLONE_NEWUSER
childErr ChildError
)
// sync with child
unix.Close(p[1])
// clone syscall failed
if err1 != 0 {
unix.Close(p[0])
childErr.Location = LocClone
childErr.Err = err1
return 0, childErr
}
// synchronize with child for uid / gid map
if unshareUser {
if err = writeIDMaps(r, int(pid)); err != nil {
err2 = err.(syscall.Errno)
}
syscall.RawSyscall(syscall.SYS_WRITE, uintptr(p[0]), uintptr(unsafe.Pointer(&err2)), uintptr(unsafe.Sizeof(err2)))
}
n, err := readChildErr(p[0], &childErr)
// child returned error code
if (n != int(unsafe.Sizeof(err2)) && n != int(unsafe.Sizeof(childErr))) || childErr.Err != 0 || err != nil {
childErr.Err = handlePipeError(n, childErr.Err)
goto fail
}
// if syncfunc return error, then fail child immediately
if r.SyncFunc != nil {
if err = r.SyncFunc(int(pid)); err != nil {
goto fail
}
}
// otherwise, ack child (err1 == 0)
syscall.RawSyscall(syscall.SYS_WRITE, uintptr(p[0]), uintptr(unsafe.Pointer(&err1)), uintptr(unsafe.Sizeof(err1)))
// if stopped before execve by signal SIGSTOP or PTRACE_ME, then do not wait until execve
if r.Ptrace || r.StopBeforeSeccomp {
// let's wait it in another goroutine to avoid SIGPIPE
go func() {
readChildErr(p[0], &childErr)
unix.Close(p[0])
}()
return int(pid), nil
}
// if read anything mean child failed after sync (close_on_exec so it should not block)
n, err = readChildErr(p[0], &childErr)
unix.Close(p[0])
if n != 0 || err != nil {
childErr.Err = handlePipeError(n, childErr.Err)
goto failAfterClose
}
return int(pid), nil
fail:
unix.Close(p[0])
failAfterClose:
handleChildFailed(int(pid))
if childErr.Err == 0 {
return 0, err
}
return 0, childErr
}
func readChildErr(fd int, childErr *ChildError) (n int, err error) {
for {
n, err = readlen(fd, (*byte)(unsafe.Pointer(childErr)), int(unsafe.Sizeof(*childErr)))
if err != syscall.EINTR {
break
}
}
return
}
// https://cs.opensource.google/go/go/+/refs/tags/go1.18.1:src/syscall/zsyscall_linux_amd64.go;l=944
func readlen(fd int, p *byte, np int) (n int, err error) {
r0, _, e1 := syscall.Syscall(syscall.SYS_READ, uintptr(fd), uintptr(unsafe.Pointer(p)), uintptr(np))
n = int(r0)
if e1 != 0 {
err = syscall.Errno(e1)
}
return
}
// check pipe error
func handlePipeError(r1 int, errno syscall.Errno) syscall.Errno {
if uintptr(r1) >= unsafe.Sizeof(errno) {
return syscall.Errno(errno)
}
return syscall.EPIPE
}
func handleChildFailed(pid int) {
var wstatus syscall.WaitStatus
// make sure not blocked
syscall.Kill(pid, syscall.SIGKILL)
// child failed; wait for it to exit, to make sure the zombies don't accumulate
_, err := syscall.Wait4(pid, &wstatus, 0, nil)
for err == syscall.EINTR {
_, err = syscall.Wait4(pid, &wstatus, 0, nil)
}
}