Skip to content

Commit

Permalink
metrics: report metric errors when caching pids
Browse files Browse the repository at this point in the history
When caching process, report if pid and tid mismatch so we are aware
of any bug that may affect BPF or userspace caching logic.

Signed-off-by: Djalal Harouni <tixxdz@gmail.com>
  • Loading branch information
tixxdz authored and kkourt committed Sep 25, 2023
1 parent 50614c5 commit 2a6972b
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 0 deletions.
1 change: 1 addition & 0 deletions pkg/grpc/exec/exec.go
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,7 @@ func GetProcessExit(event *MsgExitEventUnix) *tetragon.ProcessExit {
"event.process.tid": event.Info.Tid,
"event.process.binary": tetragonProcess.Binary,
}).Warn("ExitEvent: process PID and TID mismatch")
errormetrics.ErrorTotalInc(errormetrics.ProcessPidTidMismatch)
}

tetragonEvent := &tetragon.ProcessExit{
Expand Down
2 changes: 2 additions & 0 deletions pkg/metrics/errormetrics/errormetrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ var (
ProcessCacheEvicted ErrorType = "process_cache_evicted"
// Process not found on remove() call.
ProcessCacheMissOnRemove ErrorType = "process_cache_miss_on_remove"
// Tid and Pid mismatch that could affect BPF and user space caching logic
ProcessPidTidMismatch ErrorType = "process_pid_tid_mismatch"
// Event cache podInfo retries failed.
EventCachePodInfoRetryFailed ErrorType = "event_cache_podinfo_retry_failed"
// Event cache failed to set process information for an event.
Expand Down
3 changes: 3 additions & 0 deletions pkg/process/process.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"sync"
"sync/atomic"

"github.com/cilium/tetragon/pkg/metrics/errormetrics"
hubble "github.com/cilium/tetragon/pkg/oldhubble/cilium"
"github.com/sirupsen/logrus"

Expand Down Expand Up @@ -220,6 +221,7 @@ func initProcessInternalExec(
}).Warn("ExecveEvent: process PID and TID mismatch")
// Explicitly reset TID to be PID
process.TID = process.PID
errormetrics.ErrorTotalInc(errormetrics.ProcessPidTidMismatch)
}
return &ProcessInternal{
process: &tetragon.Process{
Expand Down Expand Up @@ -273,6 +275,7 @@ func initProcessInternalClone(event *tetragonAPI.MsgCloneEvent,
"event.process.exec_id": pi.process.ExecId,
"event.parent.exec_id": parentExecId,
}).Debug("CloneEvent: process PID and TID mismatch")
errormetrics.ErrorTotalInc(errormetrics.ProcessPidTidMismatch)
}
// Set the TID here and if we have an exit without an exec we report
// directly this TID without copying again objects.
Expand Down

0 comments on commit 2a6972b

Please sign in to comment.