Skip to content

Commit

Permalink
Fix GetSelf() logic when running with alternate hostfs (#149)
Browse files Browse the repository at this point in the history
Closes
#147

## What does this PR do?

This fixes an issue where `Stats.GetSelf()` would fail when run inside a
container while using a hostfs path, as the PID returned by
`os.Getpid()` would not be valid outside the container runtime.

This changes the logic, so if we have a `hostfs` set, `GetSelf` will
fetch the pid from `/hostf/proc/self`.

## Why is it important?

This is a bug.

## Checklist


- [x] My code follows the style guidelines of this project
- [x] I have commented my code, particularly in hard-to-understand areas
- [x] I have added tests that prove my fix is effective or that my
feature works
- [ ] I have added an entry in `CHANGELOG.md`
  • Loading branch information
fearful-symmetry committed May 3, 2024
1 parent 182b3ee commit 38970da
Show file tree
Hide file tree
Showing 6 changed files with 94 additions and 11 deletions.
13 changes: 10 additions & 3 deletions metric/system/process/process.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ import (
"context"
"errors"
"fmt"
"os"
"sort"
"strings"
"time"
Expand Down Expand Up @@ -152,15 +151,23 @@ func (procStats *Stats) GetOne(pid int) (mapstr.M, error) {
}

// GetSelf gets process info for the beat itself
// Be advised that if you call this method on a Stats object that was created with an alternate
// `Hostfs` setting, this method will return data for that pid as it exists on that hostfs.
// For example, if called from inside a container with a `hostfs` path for the container host,
// the PID in the ProcState object will be the PID as the host sees it.
func (procStats *Stats) GetSelf() (ProcState, error) {
self := os.Getpid()
self, err := GetSelfPid(procStats.Hostfs)
if err != nil {
return ProcState{}, fmt.Errorf("error finding PID: %w", err)
}

pidStat, _, err := procStats.pidFill(self, false)
if err != nil {
return ProcState{}, fmt.Errorf("error fetching PID %d: %w", self, err)
}

procStats.ProcsMap.SetPid(self, pidStat)

return pidStat, nil
}

Expand Down Expand Up @@ -210,7 +217,7 @@ func (c NonFatalErr) Is(other error) bool {
// pidFill is an entrypoint used by OS-specific code to fill out a pid.
// This in turn calls various OS-specific code to fill out the various bits of PID data
// This is done to minimize the code duplication between different OS implementations
// The second return value will only be false if an event has been filtered out
// The second return value will only be false if an event has been filtered out.
func (procStats *Stats) pidFill(pid int, filter bool) (ProcState, bool, error) {
// Fetch proc state so we can get the name for filtering based on user's filter.

Expand Down
35 changes: 29 additions & 6 deletions metric/system/process/process_container_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ import (
"github.com/elastic/elastic-agent-libs/mapstr"
"github.com/elastic/elastic-agent-system-metrics/dev-tools/systemtests"
"github.com/elastic/elastic-agent-system-metrics/metric/system/cgroup"
"github.com/elastic/elastic-agent-system-metrics/metric/system/resolve"
)

// ======================================== NOTE:
Expand All @@ -43,11 +42,34 @@ func TestContainerMonitoringFromInsideContainer(t *testing.T) {
testStats := Stats{CPUTicks: true,
EnableCgroups: true,
EnableNetwork: false,
// TODO: These should use DockerTestResolver,
// once https://github.com/elastic/elastic-agent-system-metrics/issues/147 is merged
Hostfs: resolve.NewTestResolver(""),
Procs: []string{".*"},
CgroupOpts: cgroup.ReaderOptions{RootfsMountpoint: resolve.NewTestResolver("")},
Hostfs: systemtests.DockerTestResolver(),
Procs: []string{".*"},
CgroupOpts: cgroup.ReaderOptions{RootfsMountpoint: systemtests.DockerTestResolver()},
}
err := testStats.Init()
require.NoError(t, err)

stats, err := testStats.GetSelf()
require.NoError(t, err)
if runtime.GOOS == "linux" {
cgstats, err := stats.Cgroup.Format()
require.NoError(t, err)
require.NotEmpty(t, cgstats)
}

require.NotEmpty(t, stats.Cmdline)
require.NotEmpty(t, stats.Username)
require.NotZero(t, stats.Pid)
}

func TestSelfMonitoringFromInsideContainer(t *testing.T) {
_ = logp.DevelopmentSetup()

testStats := Stats{CPUTicks: true,
EnableCgroups: true,
EnableNetwork: false,
Procs: []string{".*"},
CgroupOpts: cgroup.ReaderOptions{},
}
err := testStats.Init()
require.NoError(t, err)
Expand All @@ -62,6 +84,7 @@ func TestContainerMonitoringFromInsideContainer(t *testing.T) {

require.NotEmpty(t, stats.Cmdline)
require.NotEmpty(t, stats.Username)
require.NotZero(t, stats.Pid)
}

func TestSystemHostFromContainer(t *testing.T) {
Expand Down
7 changes: 7 additions & 0 deletions metric/system/process/process_darwin.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ import (
"encoding/binary"
"fmt"
"io"
"os"
"os/user"
"strconv"
"syscall"
Expand All @@ -47,6 +48,12 @@ import (
"github.com/elastic/elastic-agent-system-metrics/metric/system/resolve"
)

// GetSelfPid is the darwin implementation; see the linux version in
// process_linux_common.go for more context.
func GetSelfPid(hostfs resolve.Resolver) (int, error) {
return os.Getpid(), nil
}

// FetchPids returns a map and array of pids
func (procStats *Stats) FetchPids() (ProcsMap, []ProcState, error) {
n := C.proc_listpids(C.PROC_ALL_PIDS, 0, nil, 0)
Expand Down
25 changes: 25 additions & 0 deletions metric/system/process/process_linux_common.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,31 @@ var bootTime uint64 = 0
// system tick multiplier, see C.sysconf(C._SC_CLK_TCK)
const ticks = 100

// GetSelfPid returns the process we're running as.
// for cases of self-monitoring this requires some actual thought;
// if we use os.Getpid() and we're running inside a container, that PID will
// only be valid inside the container, and an attempt to fetch metrics from
// `/hostfs/proc/` for that pid will fail. If we're using a hostfs, revert to _that_ for fetching the pid metrics.
func GetSelfPid(hostfs resolve.Resolver) (int, error) {
if !hostfs.IsSet() {
return os.Getpid(), nil
}

statRaw, err := os.ReadFile(hostfs.ResolveHostFS("/proc/self/stat"))
if err != nil {
return 0, fmt.Errorf("error reading from self/stat while searching for our PID in a container: %w", err)
}

parts := strings.Split(string(statRaw), " ")
pidRaw := parts[0]
pid, err := strconv.ParseInt(pidRaw, 10, 64)
if err != nil {
return 0, fmt.Errorf("error parsing int from `stat` while searching for our pid in a container: %w", err)
}
return int(pid), nil

}

// FetchPids is the linux implementation of FetchPids
func (procStats *Stats) FetchPids() (ProcsMap, []ProcState, error) {
dir, err := os.Open(procStats.Hostfs.ResolveHostFS("proc"))
Expand Down
10 changes: 10 additions & 0 deletions metric/system/process/process_linux_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,16 @@ func TestFetchOtherProcessCgroup(t *testing.T) {
t.Logf("Got %d events", len(evts))
}

func TestGetSelfPidNoHostfs(t *testing.T) {
ourPid := os.Getpid()

foundPid, err := GetSelfPid(resolve.NewTestResolver(""))
require.NoError(t, err)

require.Equal(t, ourPid, foundPid)

}

func TestFetchProcessFromOtherUser(t *testing.T) {
_ = logp.DevelopmentSetup()
// If we just used Get() or FetchPids() to get a list of processes on the system, this would produce a bootstrapping problem
Expand Down
15 changes: 13 additions & 2 deletions metric/system/process/process_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ package process
import (
"errors"
"fmt"
"os"
"path/filepath"
"syscall"
"unsafe"
Expand Down Expand Up @@ -50,6 +51,12 @@ func (procStats *Stats) FetchPids() (ProcsMap, []ProcState, error) {
return procMap, plist, nil
}

// GetSelfPid is the darwin implementation; see the linux version in
// process_linux_common.go for more context.
func GetSelfPid(hostfs resolve.Resolver) (int, error) {
return os.Getpid(), nil
}

// GetInfoForPid returns basic info for the process
func GetInfoForPid(_ resolve.Resolver, pid int) (ProcState, error) {
var err error
Expand Down Expand Up @@ -87,7 +94,9 @@ func FetchNumThreads(pid int) (int, error) {
if err != nil {
return 0, fmt.Errorf("OpenProcess failed for PID %d: %w", pid, err)
}
defer syscall.CloseHandle(targetProcessHandle)
defer func() {
_ = syscall.CloseHandle(targetProcessHandle)
}()

currentProcessHandle, err := syscall.GetCurrentProcess()
if err != nil {
Expand All @@ -96,7 +105,9 @@ func FetchNumThreads(pid int) (int, error) {
// The pseudo handle need not be closed when it is no longer
// needed, calling CloseHandle has no effect. Adding here to
// remind us to close any handles we open.
defer syscall.CloseHandle(currentProcessHandle)
defer func() {
_ = syscall.CloseHandle(currentProcessHandle)
}()

var snapshotHandle syscall.Handle
err = PssCaptureSnapshot(targetProcessHandle, PSSCaptureThreads, 0, &snapshotHandle)
Expand Down

0 comments on commit 38970da

Please sign in to comment.