Skip to content

Commit

Permalink
tetragon: Add test for exit events race
Browse files Browse the repository at this point in the history
The previous commit fixes the exit event race that might cause
tetragon to receive multiple exit events with same pid values.

The contrib/tester-progs/threads-exit program tries to exploit
this by creating multi threads and synchronize all their exit
calls so it's likely to hit the race window.

The TestEventExitThreads test itself spawn several executions of
threads-exit program (to push the luck a bit and hit the race
window at least once) and records their pid values and then check
we receive single exit event for any given pid value.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
  • Loading branch information
olsajiri authored and kkourt committed Sep 28, 2023
1 parent 0c60ef2 commit 9a8f892
Show file tree
Hide file tree
Showing 3 changed files with 149 additions and 1 deletion.
6 changes: 5 additions & 1 deletion contrib/tester-progs/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ PROGS = sigkill-tester \
uprobe-test-2 \
lseek-pipe \
threads-tester \
bench-reader
bench-reader \
threads-exit

all: $(PROGS)

Expand All @@ -29,6 +30,9 @@ bench-reader: bench-reader.c
threads-tester: threads-tester.c
$(GCC) -Wall -fno-inline $< -o $@ -lcap -lpthread

threads-exit: threads-exit.c
$(GCC) -Wall -fno-inline $< -o $@ -lcap -lpthread

capabilities-tester: capabilities-tester.c
$(GCC) -Wall $< -o $@ -lcap

Expand Down
71 changes: 71 additions & 0 deletions contrib/tester-progs/threads-exit.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
#define _GNU_SOURCE
#include <stdio.h>
#include <pthread.h>
#include <unistd.h>
#include <stdlib.h>
#include <sys/syscall.h>
#include <sys/sysinfo.h>
#include <sched.h>
#include <sys/syscall.h>
#include <unistd.h>

static int goo;

static void *worker(void *ctx)
{
int ready_out = (intptr_t) ctx;

write(ready_out, "R", 1);

while (!goo) {}
syscall(SYS_exit, 0);
return NULL;
}

int main(void)
{
int ncpus = get_nprocs(), nthreads = ncpus * 10;
int i, err, readyfds[2];
pthread_t th[nthreads];
cpu_set_t set;
char dummy;

/* make sure we can run on all cpus */
CPU_ZERO(&set);
for (i = 0; i < ncpus; i++)
CPU_SET(i, &set);
if (sched_setaffinity(0, sizeof(set), &set) == -1) {
perror("sched_setaffinity");
return -1;
}


if (pipe(readyfds)) {
perror("pipe");
return -1;
}

/* print out group leader for test checker */
printf("TGID %d\n", getpid());
fflush(NULL);

for (i = 0; i < nthreads; i++) {
err = pthread_create(&th[i], NULL, worker, (void*)(intptr_t) readyfds[1]);
if (err) {
perror("pthread_create");
return -1;
}
}

/* Make sure all threads started.. */
for (i = 0; i < nthreads; i++) {
if (read(readyfds[0], &dummy, 1) != 1) {
perror("read");
return -1;
}
}

/* .. and then tell threads to exit */
goo = 1;
syscall(SYS_exit, 0);
}
73 changes: 73 additions & 0 deletions pkg/sensors/exec/exec_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
"time"

"github.com/cilium/ebpf"
"github.com/cilium/tetragon/api/v1/tetragon"
ec "github.com/cilium/tetragon/api/v1/tetragon/codegen/eventchecker"
"github.com/cilium/tetragon/pkg/api"
"github.com/cilium/tetragon/pkg/api/dataapi"
Expand Down Expand Up @@ -154,6 +155,78 @@ func TestNamespaces(t *testing.T) {
assert.NoError(t, err)
}

func TestEventExitThreads(t *testing.T) {
var doneWG, readyWG sync.WaitGroup
defer doneWG.Wait()

ctx, cancel := context.WithTimeout(context.Background(), tus.Conf().CmdWaitTime)
defer cancel()

obs, err := observertesthelper.GetDefaultObserver(t, ctx, tus.Conf().TetragonLib, observertesthelper.WithMyPid())
if err != nil {
t.Fatalf("Failed to run observer: %s", err)
}
observertesthelper.LoopEvents(ctx, t, &doneWG, &readyWG, obs)
readyWG.Wait()

testThreadsExit := testutils.RepoRootPath("contrib/tester-progs/threads-exit")

// array of all pids we shuold receive in exet events
tgids := make(map[int]bool)

// running the workload 10 times to make the change we hit the race
// window bigger and collect all tgids from testThreadsExit output
for i := 0; i < 10; i++ {
out, err := exec.Command(testThreadsExit).Output()
if err != nil {
t.Fatalf("Failed to execute test binary: %s\n", err)
}

tgid := 0
if n, err := fmt.Sscanf(string(out[:]), "TGID %d", &tgid); n != 1 || err != nil {
t.Fatalf("Failed to parse test binary output: %s\n", err)
}
tgids[tgid] = false
}

// check we got single exit event for each testThreadsExit
// execution and no more
nextCheck := func(event ec.Event, l *logrus.Logger) (bool, error) {
switch ev := event.(type) {
case *tetragon.ProcessExit:
if ev.Process.Binary != testThreadsExit {
return false, nil
}
// Make sure there's only single exit event with given pid
pid := int(ev.Process.Pid.GetValue())
assert.False(t, tgids[pid], "got extra exit event with pid %d", pid)
tgids[pid] = true
return false, nil
default:
return false, nil

}
}

finalCheck := func(l *logrus.Logger) error {
// Make sure we saw all pids
for pid, used := range tgids {
assert.True(t, used, "did not see exit event for pid %d", pid)
}
return nil
}

checker_ := ec.FnEventChecker{
NextCheckFn: nextCheck,
FinalCheckFn: finalCheck,
}

checker := testsensor.NewTestChecker(&checker_)

err = jsonchecker.JsonTestCheck(t, checker)
assert.NoError(t, err)
}

func TestEventExecve(t *testing.T) {
var doneWG, readyWG sync.WaitGroup
defer doneWG.Wait()
Expand Down

0 comments on commit 9a8f892

Please sign in to comment.