From 109765e65a90ea23744d83cf13b271161ac0a7fe Mon Sep 17 00:00:00 2001 From: CMGS Date: Wed, 20 May 2026 17:05:17 +0800 Subject: [PATCH] perf: List off-lock IO + DeleteAll shared /proc scan MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Backend.List captures VMRecord snapshots under the DB lock then runs ToVM (which reads pidfile + stat vsock socket) outside the lock. Concurrent writers no longer queue behind status polls. - utils.ScanProcsByBinary walks /proc once and returns a cache; the per-call FindVMMByCmdline becomes a thin wrapper. DeleteAll calls ScanProcsByBinary up-front and reuses the scan via ProcScan.Find for each VM — N walks of /proc collapse to 1. - Non-linux stubs added for ProcScan / ScanProcsByBinary so darwin build stays green. --- hypervisor/inspect.go | 24 +++++++++++++++++---- hypervisor/stop.go | 12 +++++------ utils/process_linux.go | 48 ++++++++++++++++++++++++++++++++++-------- utils/process_other.go | 6 ++++++ 4 files changed, 71 insertions(+), 19 deletions(-) diff --git a/hypervisor/inspect.go b/hypervisor/inspect.go index fb4b9aea..2660e2a3 100644 --- a/hypervisor/inspect.go +++ b/hypervisor/inspect.go @@ -21,12 +21,28 @@ func (b *Backend) Inspect(ctx context.Context, ref string) (*types.VM, error) { }) } +// List snapshots all records under the DB lock then runs ToVM (which does file IO) outside the lock so concurrent writers don't queue behind status polls. Mutable map fields are cloned inside the lock to avoid a concurrent-read race with RecordSnapshot etc. func (b *Backend) List(ctx context.Context) ([]*types.VM, error) { - var result []*types.VM - return result, b.DB.With(ctx, func(idx *VMIndex) error { - result = utils.MapValues(idx.VMs, b.ToVM) + var recs []*VMRecord + if err := b.DB.With(ctx, func(idx *VMIndex) error { + recs = make([]*VMRecord, 0, len(idx.VMs)) + for _, r := range idx.VMs { + if r == nil { + continue + } + cp := *r + cp.SnapshotIDs = maps.Clone(r.SnapshotIDs) + recs = append(recs, &cp) + } return nil - }) + }); err != nil { + return nil, err + } + result := make([]*types.VM, len(recs)) + for i, r := range recs { + result[i] = b.ToVM(r) + } + return result, nil } func (b *Backend) ToVM(rec *VMRecord) *types.VM { diff --git a/hypervisor/stop.go b/hypervisor/stop.go index 2ed34925..badcf858 100644 --- a/hypervisor/stop.go +++ b/hypervisor/stop.go @@ -65,6 +65,11 @@ func (b *Backend) DeleteAll(ctx context.Context, refs []string, force bool, stop if err != nil { return nil, err } + // One /proc scan up-front; per-VM orphan check filters this cache instead of re-walking /proc N times. + procScan, scanErr := utils.ScanProcsByBinary(b.Conf.BinaryName()) + if scanErr != nil { + return nil, fmt.Errorf("refuse delete: /proc scan errored: %w (resolve the host issue and retry)", scanErr) + } return b.ForEachVM(ctx, ids, "Delete", func(ctx context.Context, id string) error { rec, loadErr := b.LoadRecord(ctx, id) if loadErr != nil { @@ -91,12 +96,7 @@ func (b *Backend) DeleteAll(ctx context.Context, refs []string, force bool, stop } return fmt.Errorf("refuse delete: api socket %s still responsive (suspected orphan vmm; kill the vmm process then retry)", sockPath) } - // Catches workers/siblings the pidfile-based stop didn't see; fail-closed on scan error so we never wipe rundir while VMM state is unknown. - scanned, scanErr := utils.FindVMMByCmdline(b.Conf.BinaryName(), sockPath) - if scanErr != nil { - return fmt.Errorf("refuse delete: VM %s /proc scan errored: %w (resolve the host issue and retry)", id, scanErr) - } - for _, pid := range scanned { + for _, pid := range procScan.Find(sockPath) { if termErr := utils.TerminateProcess(ctx, pid, b.Conf.BinaryName(), sockPath, b.Conf.TerminateGracePeriod()); termErr != nil { return fmt.Errorf("terminate orphan VMM pid=%d for VM %s: %w", pid, id, termErr) } diff --git a/utils/process_linux.go b/utils/process_linux.go index 39a4afb3..ea80ffda 100644 --- a/utils/process_linux.go +++ b/utils/process_linux.go @@ -13,39 +13,69 @@ import ( "strings" ) -// FindVMMByCmdline returns pids whose argv[0] basename matches binaryName and args contain expectArg, sorted numerically; fails closed on non-ENOENT cmdline read errors. -func FindVMMByCmdline(binaryName, expectArg string) ([]int, error) { +// ProcScan caches /proc cmdlines for one binaryName. Batch callers scan once then Find per id, replacing N /proc walks with one. +type ProcScan []procEntry + +type procEntry struct { + pid int + cmdline string +} + +// ScanProcsByBinary walks /proc once, capturing argv[0]-basename matches. ENOENT (process exited mid-scan) is skipped; other read errors fail closed. +func ScanProcsByBinary(binaryName string) (ProcScan, error) { entries, err := os.ReadDir("/proc") if err != nil { return nil, err } - var pids []int + var scan ProcScan var firstErr error for _, e := range entries { pid, atoiErr := strconv.Atoi(e.Name()) if atoiErr != nil || pid <= 0 { continue } - matched, readErr := verifyProcessCmdline(pid, binaryName, expectArg) + data, readErr := os.ReadFile(fmt.Sprintf("/proc/%d/cmdline", pid)) if readErr != nil { - // ENOENT = process exited mid-scan, safe to skip; everything else means we can't tell, so callers must fail closed. if !errors.Is(readErr, fs.ErrNotExist) && firstErr == nil { firstErr = fmt.Errorf("read /proc/%d/cmdline: %w", pid, readErr) } continue } - if matched { - pids = append(pids, pid) + argv0, _, _ := strings.Cut(string(data), "\x00") + if filepath.Base(argv0) != binaryName { + continue } + scan = append(scan, procEntry{pid: pid, cmdline: string(data)}) } if firstErr != nil { return nil, firstErr } + return scan, nil +} + +// Find returns the cached pids whose cmdline contains expectArg, sorted numerically; empty expectArg matches all. +func (s ProcScan) Find(expectArg string) []int { + var pids []int + for _, e := range s { + _, rest, _ := strings.Cut(e.cmdline, "\x00") + if expectArg == "" || strings.Contains(rest, expectArg) { + pids = append(pids, e.pid) + } + } slices.Sort(pids) - return pids, nil + return pids +} + +// FindVMMByCmdline is the one-shot equivalent of ScanProcsByBinary().Find(); batch callers should use ScanProcsByBinary directly to share one /proc walk. +func FindVMMByCmdline(binaryName, expectArg string) ([]int, error) { + scan, err := ScanProcsByBinary(binaryName) + if err != nil { + return nil, err + } + return scan.Find(expectArg), nil } -// Match argv[0] basename strictly + expectArg substring on the rest so "bash -c 'cloud-hypervisor ...'" can't impersonate the VMM; error surfaces cmdline-read failures so callers distinguish transient ENOENT from real issues. +// Match argv[0] basename strictly + expectArg substring so "bash -c 'cloud-hypervisor ...'" can't impersonate the VMM. func verifyProcessCmdline(pid int, binaryName, expectArg string) (bool, error) { data, err := os.ReadFile(fmt.Sprintf("/proc/%d/cmdline", pid)) if err != nil { diff --git a/utils/process_other.go b/utils/process_other.go index 4f479f83..972dc4f4 100644 --- a/utils/process_other.go +++ b/utils/process_other.go @@ -6,6 +6,12 @@ import "errors" var errVerifyUnsupported = errors.New("verifyProcessCmdline: unsupported on this OS") +type ProcScan struct{} + +func ScanProcsByBinary(_ string) (ProcScan, error) { return ProcScan{}, nil } + +func (ProcScan) Find(_ string) []int { return nil } + func FindVMMByCmdline(_, _ string) ([]int, error) { return nil, nil }