Skip to content

Commit

Permalink
os: make use of pidfd on linux
Browse files Browse the repository at this point in the history
Use Process.handle field to store pidfd, and make use of it. Only use
pidfd functionality if all the needed syscalls are available.

1. Add/use pidfdWorks, which checks that all needed pidfd-related
   functionality works.

2. os.StartProcess: obtain the pidfd from the kernel, if possible, using
   the functionality added by CL 520266. Note we could not modify
   syscall.StartProcess to return pidfd directly because it is a public
   API and its callers do not expect it, so we have to use ensurePidfd
   and getPidfd.

3. (*Process).Kill: use pidfdSendSignal, if available and the pidfd is
   known. Otherwise, fall back to the old implementation.

4. (*Process).Wait: use pidfdWait, if available, otherwise fall back to
   using waitid/wait4. This is more complicated than expected due to
   struct siginfo_t idiosyncrasy.

NOTE pidfdSendSignal and pidfdWait are used without a race workaround
(blockUntilWaitable and sigMu, added by CL 23967) because with pidfd,
PID recycle issue doesn't exist (IOW, pidfd, unlike PID, is guaranteed
to refer to one particular process) and thus the race doesn't exist
either.

Rework of CL 528438 (reverted in CL 566477 because of #65857).

For #62654.
Updates #13987.

Change-Id: If5ef8920bd8619dc428b6282ffe4fb8c258ca224
Reviewed-on: https://go-review.googlesource.com/c/go/+/570036
TryBot-Result: Gopher Robot <gobot@golang.org>
Run-TryBot: Kirill Kolyshkin <kolyshkin@gmail.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Auto-Submit: Cherry Mui <cherryyz@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Michael Pratt <mpratt@google.com>
  • Loading branch information
kolyshkin authored and gopherbot committed May 17, 2024
1 parent 69105d7 commit 2f64268
Show file tree
Hide file tree
Showing 20 changed files with 393 additions and 16 deletions.
8 changes: 8 additions & 0 deletions src/internal/syscall/unix/pidfd_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,11 @@ func PidFDSendSignal(pidfd uintptr, s syscall.Signal) error {
}
return nil
}

func PidFDOpen(pid, flags int) (uintptr, error) {
pidfd, _, errno := syscall.Syscall(pidfdOpenTrap, uintptr(pid), uintptr(flags), 0)
if errno != 0 {
return ^uintptr(0), errno
}
return uintptr(pidfd), nil
}
64 changes: 64 additions & 0 deletions src/internal/syscall/unix/siginfo_linux.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package unix

import (
"syscall"
)

const is64bit = ^uint(0) >> 63 // 0 for 32-bit hosts, 1 for 64-bit ones.

// SiginfoChild is a struct filled in by Linux waitid syscall.
// In C, siginfo_t contains a union with multiple members;
// this struct corresponds to one used when Signo is SIGCHLD.
//
// NOTE fields are exported to be used by TestSiginfoChildLayout.
type SiginfoChild struct {
Signo int32
siErrnoCode // Two int32 fields, swapped on MIPS.
_ [is64bit]int32 // Extra padding for 64-bit hosts only.

// End of common part. Beginning of signal-specific part.

Pid int32
Uid uint32
Status int32

// Pad to 128 bytes.
_ [128 - (6+is64bit)*4]byte
}

const (
// Possible values for SiginfoChild.Code field.
_CLD_EXITED int32 = 1
_CLD_KILLED = 2
_CLD_DUMPED = 3
_CLD_TRAPPED = 4
_CLD_STOPPED = 5
_CLD_CONTINUED = 6

// These are the same as in syscall/syscall_linux.go.
core = 0x80
stopped = 0x7f
continued = 0xffff
)

// WaitStatus converts SiginfoChild, as filled in by the waitid syscall,
// to syscall.WaitStatus.
func (s *SiginfoChild) WaitStatus() (ws syscall.WaitStatus) {
switch s.Code {
case _CLD_EXITED:
ws = syscall.WaitStatus(s.Status << 8)
case _CLD_DUMPED:
ws = syscall.WaitStatus(s.Status) | core
case _CLD_KILLED:
ws = syscall.WaitStatus(s.Status)
case _CLD_TRAPPED, _CLD_STOPPED:
ws = syscall.WaitStatus(s.Status<<8) | stopped
case _CLD_CONTINUED:
ws = continued
}
return
}
12 changes: 12 additions & 0 deletions src/internal/syscall/unix/siginfo_linux_mipsx.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

//go:build linux && (mips || mipsle || mips64 || mips64le)

package unix

type siErrnoCode struct {
Code int32
Errno int32
}
12 changes: 12 additions & 0 deletions src/internal/syscall/unix/siginfo_linux_other.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

//go:build linux && !(mips || mipsle || mips64 || mips64le)

package unix

type siErrnoCode struct {
Errno int32
Code int32
}
59 changes: 59 additions & 0 deletions src/internal/syscall/unix/siginfo_linux_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package unix_test

import (
"internal/goarch"
"internal/syscall/unix"
"runtime"
"strings"
"testing"
"unsafe"
)

// TestSiginfoChildLayout validates SiginfoChild layout. Modelled after
// static assertions in linux kernel's arch/*/kernel/signal*.c.
func TestSiginfoChildLayout(t *testing.T) {
var si unix.SiginfoChild

const host64bit = goarch.PtrSize == 8

if v := unsafe.Sizeof(si); v != 128 {
t.Fatalf("sizeof: got %d, want 128", v)
}

ofSigno := 0
ofErrno := 4
ofCode := 8
if strings.HasPrefix(runtime.GOARCH, "mips") {
// These two fields are swapped on MIPS platforms.
ofErrno, ofCode = ofCode, ofErrno
}
ofPid := 12
if host64bit {
ofPid = 16
}
ofUid := ofPid + 4
ofStatus := ofPid + 8

offsets := []struct {
name string
got uintptr
want int
}{
{"Signo", unsafe.Offsetof(si.Signo), ofSigno},
{"Errno", unsafe.Offsetof(si.Errno), ofErrno},
{"Code", unsafe.Offsetof(si.Code), ofCode},
{"Pid", unsafe.Offsetof(si.Pid), ofPid},
{"Uid", unsafe.Offsetof(si.Uid), ofUid},
{"Status", unsafe.Offsetof(si.Status), ofStatus},
}

for _, tc := range offsets {
if int(tc.got) != tc.want {
t.Errorf("offsetof %s: got %d, want %d", tc.name, tc.got, tc.want)
}
}
}
1 change: 1 addition & 0 deletions src/internal/syscall/unix/sysnum_linux_386.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,5 @@ const (
getrandomTrap uintptr = 355
copyFileRangeTrap uintptr = 377
pidfdSendSignalTrap uintptr = 424
pidfdOpenTrap uintptr = 434
)
1 change: 1 addition & 0 deletions src/internal/syscall/unix/sysnum_linux_amd64.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,5 @@ const (
getrandomTrap uintptr = 318
copyFileRangeTrap uintptr = 326
pidfdSendSignalTrap uintptr = 424
pidfdOpenTrap uintptr = 434
)
1 change: 1 addition & 0 deletions src/internal/syscall/unix/sysnum_linux_arm.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,5 @@ const (
getrandomTrap uintptr = 384
copyFileRangeTrap uintptr = 391
pidfdSendSignalTrap uintptr = 424
pidfdOpenTrap uintptr = 434
)
1 change: 1 addition & 0 deletions src/internal/syscall/unix/sysnum_linux_generic.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,5 @@ const (
getrandomTrap uintptr = 278
copyFileRangeTrap uintptr = 285
pidfdSendSignalTrap uintptr = 424
pidfdOpenTrap uintptr = 434
)
1 change: 1 addition & 0 deletions src/internal/syscall/unix/sysnum_linux_mips64x.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,5 @@ const (
getrandomTrap uintptr = 5313
copyFileRangeTrap uintptr = 5320
pidfdSendSignalTrap uintptr = 5424
pidfdOpenTrap uintptr = 5434
)
1 change: 1 addition & 0 deletions src/internal/syscall/unix/sysnum_linux_mipsx.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,5 @@ const (
getrandomTrap uintptr = 4353
copyFileRangeTrap uintptr = 4360
pidfdSendSignalTrap uintptr = 4424
pidfdOpenTrap uintptr = 4434
)
1 change: 1 addition & 0 deletions src/internal/syscall/unix/sysnum_linux_ppc64x.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,5 @@ const (
getrandomTrap uintptr = 359
copyFileRangeTrap uintptr = 379
pidfdSendSignalTrap uintptr = 424
pidfdOpenTrap uintptr = 434
)
1 change: 1 addition & 0 deletions src/internal/syscall/unix/sysnum_linux_s390x.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,5 @@ const (
getrandomTrap uintptr = 349
copyFileRangeTrap uintptr = 375
pidfdSendSignalTrap uintptr = 424
pidfdOpenTrap uintptr = 434
)
6 changes: 3 additions & 3 deletions src/os/exec.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@ var ErrProcessDone = errors.New("os: process already finished")
// Process stores the information about a process created by [StartProcess].
type Process struct {
Pid int
handle atomic.Uintptr
isdone atomic.Bool // process has been successfully waited on
sigMu sync.RWMutex // avoid race between wait and signal
handle atomic.Uintptr // Process handle for Windows, pidfd for Linux.
isdone atomic.Bool // process has been successfully waited on
sigMu sync.RWMutex // avoid race between wait and signal
}

func newProcess(pid int, handle uintptr) *Process {
Expand Down
11 changes: 10 additions & 1 deletion src/os/exec_posix.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ import (
"syscall"
)

// unsetHandle is a value for Process.handle used when the handle is not set.
// Same as syscall.InvalidHandle for Windows.
const unsetHandle = ^uintptr(0)

// The only signal values guaranteed to be present in the os package on all
// systems are os.Interrupt (send the process an interrupt) and os.Kill (force
// the process to exit). On Windows, sending os.Interrupt to a process with
Expand All @@ -38,7 +42,7 @@ func startProcess(name string, argv []string, attr *ProcAttr) (p *Process, err e
sysattr := &syscall.ProcAttr{
Dir: attr.Dir,
Env: attr.Env,
Sys: attr.Sys,
Sys: ensurePidfd(attr.Sys),
}
if sysattr.Env == nil {
sysattr.Env, err = execenv.Default(sysattr.Sys)
Expand All @@ -60,6 +64,11 @@ func startProcess(name string, argv []string, attr *ProcAttr) (p *Process, err e
return nil, &PathError{Op: "fork/exec", Path: name, Err: e}
}

// For Windows, syscall.StartProcess above already returned a process handle.
if runtime.GOOS != "windows" {
h = getPidfd(sysattr.Sys)
}

return newProcess(pid, h), nil
}

Expand Down
38 changes: 26 additions & 12 deletions src/os/exec_unix.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,15 @@ func (p *Process) wait() (ps *ProcessState, err error) {
if p.Pid == -1 {
return nil, syscall.EINVAL
}
// Wait on pidfd if possible; fallback to using pid on ENOSYS.
//
// When pidfd is used, there is no wait/kill race (described in CL 23967)
// because PID recycle issue doesn't exist (IOW, pidfd, unlike PID, is
// guaranteed to refer to one particular process). Thus, there is no
// need for the workaround (blockUntilWaitable + sigMu) below.
if ps, e := p.pidfdWait(); e != syscall.ENOSYS {
return ps, NewSyscallError("waitid", e)
}

// If we can block until Wait4 will succeed immediately, do so.
ready, err := p.blockUntilWaitable()
Expand Down Expand Up @@ -64,26 +73,31 @@ func (p *Process) signal(sig Signal) error {
if p.Pid == 0 {
return errors.New("os: process not initialized")
}
s, ok := sig.(syscall.Signal)
if !ok {
return errors.New("os: unsupported signal type")
}
// Use pidfd if possible; fallback on ENOSYS.
if err := p.pidfdSendSignal(s); err != syscall.ENOSYS {
return err
}
p.sigMu.RLock()
defer p.sigMu.RUnlock()
if p.done() {
return ErrProcessDone
}
s, ok := sig.(syscall.Signal)
if !ok {
return errors.New("os: unsupported signal type")
}
if e := syscall.Kill(p.Pid, s); e != nil {
if e == syscall.ESRCH {
return ErrProcessDone
}
return e
return convertESRCH(syscall.Kill(p.Pid, s))
}

func convertESRCH(err error) error {
if err == syscall.ESRCH {
return ErrProcessDone
}
return nil
return err
}

func (p *Process) release() error {
// NOOP for unix.
p.pidfdRelease()
p.Pid = -1
// no need for a finalizer anymore
runtime.SetFinalizer(p, nil)
Expand All @@ -92,7 +106,7 @@ func (p *Process) release() error {

func findProcess(pid int) (p *Process, err error) {
// NOOP for unix.
return newProcess(pid, 0), nil
return newProcess(pid, unsetHandle), nil
}

func (p *ProcessState) userTime() time.Duration {
Expand Down
1 change: 1 addition & 0 deletions src/os/export_linux_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,5 @@ var (
PollCopyFileRangeP = &pollCopyFileRange
PollSpliceFile = &pollSplice
GetPollFDAndNetwork = getPollFDAndNetwork
CheckPidfdOnce = checkPidfdOnce
)

0 comments on commit 2f64268

Please sign in to comment.