From 32ab858309c84c23049715aaab936ce654ad5792 Mon Sep 17 00:00:00 2001 From: Alban Crequy Date: Sun, 22 Mar 2020 16:06:44 +0100 Subject: [PATCH] tools: add filtering by mount namespace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In previous patches, I added the option --cgroupmap to filter events belonging to a set of cgroup-v2. Although this approach works fine with systemd services and containers when cgroup-v2 is enabled, it does not work with containers when only cgroup-v1 is enabled because bpf_get_current_cgroup_id() only works with cgroup-v2. It also requires Linux 4.18 to get this bpf helper function. This patch adds an additional way to filter by containers, using mount namespaces. Note that this does not help with systemd services since they normally don't create a new mount namespace (unless you set some options like 'ReadOnlyPaths=', see "man 5 systemd.exec"). My goal with this patch is to filter Kubernetes pods, even on distributions with an older kernel (<4.18) or without cgroup-v2 enabled. - This is only implemented for tools that already support filtering by cgroup id (bindsnoop, capable, execsnoop, profile, tcpaccept, tcpconnect, tcptop and tcptracer). - I picked the mount namespace because the other namespaces could be disabled in Kubernetes (e.g. HostNetwork, HostPID, HostIPC). It can be tested by following the example in docs/special_filtering added in this commit, to avoid compiling locally the following command can be used ``` sudo bpftool map create /sys/fs/bpf/mnt_ns_set type hash key 8 value 4 \ entries 128 name mnt_ns_set flags 0 docker run -ti --rm --privileged \ -v /usr/src:/usr/src -v /lib/modules:/lib/modules \ -v /sys/fs/bpf:/sys/fs/bpf --pid=host kinvolk/bcc:alban-containers-filters \ /usr/share/bcc/tools/execsnoop --mntnsmap /sys/fs/bpf/mnt_ns_set ``` Co-authored-by: Alban Crequy Co-authored-by: Mauricio Vásquez --- ...ing_by_cgroups.md => special_filtering.md} | 59 +++++++++++++- man/man8/bindsnoop.8 | 7 +- man/man8/capable.8 | 7 +- man/man8/execsnoop.8 | 14 ++-- man/man8/opensnoop.8 | 7 +- man/man8/profile.8 | 4 +- man/man8/tcpaccept.8 | 7 +- man/man8/tcpconnect.8 | 8 +- man/man8/tcptop.8 | 7 +- man/man8/tcptracer.8 | 7 +- src/python/bcc/containers.py | 80 +++++++++++++++++++ tools/bindsnoop.py | 30 +++---- tools/bindsnoop_example.txt | 5 +- tools/capable.py | 24 +++--- tools/capable_example.txt | 10 ++- tools/execsnoop.py | 29 +++---- tools/execsnoop_example.txt | 6 +- tools/opensnoop.py | 31 +++---- tools/opensnoop_example.txt | 12 ++- tools/profile.py | 23 ++---- tools/profile_example.txt | 8 +- tools/tcpaccept.py | 32 +++----- tools/tcpaccept_example.txt | 5 +- tools/tcpconnect.py | 23 ++---- tools/tcpconnect_example.txt | 7 +- tools/tcptop.py | 42 +++++----- tools/tcptop_example.txt | 4 +- tools/tcptracer.py | 41 +++------- tools/tcptracer_example.txt | 2 +- 29 files changed, 322 insertions(+), 219 deletions(-) rename docs/{filtering_by_cgroups.md => special_filtering.md} (58%) create mode 100644 src/python/bcc/containers.py diff --git a/docs/filtering_by_cgroups.md b/docs/special_filtering.md similarity index 58% rename from docs/filtering_by_cgroups.md rename to docs/special_filtering.md index f2f08926d5e..9b15260ca16 100644 --- a/docs/filtering_by_cgroups.md +++ b/docs/special_filtering.md @@ -1,4 +1,10 @@ -# Demonstrations of filtering by cgroups +# Special Filtering + +Some tools have special filtering capabitilies, the main use case is to trace +processes running in containers, but those mechanisms are generic and could +be used in other cases as well. + +## Filtering by cgroups Some tools have an option to filter by cgroup by referencing a pinned BPF hash map managed externally. @@ -66,3 +72,54 @@ map, bcc tools will display results from this shell. Cgroups can be added and removed from the BPF hash map without restarting the bcc tool. This feature is useful for integrating bcc tools in external projects. + +## Filtering by mount by namespace + +The BPF hash map can be created by: + +``` +# bpftool map create /sys/fs/bpf/mnt_ns_set type hash key 8 value 4 entries 128 \ + name mnt_ns_set flags 0 +``` + +Execute the `execsnoop` tool filtering only the mount namespaces +in `/sys/fs/bpf/mnt_ns_set`: + +``` +# tools/execsnoop.py --mntnsmap /sys/fs/bpf/mnt_ns_set +``` + +Start a terminal in a new mount namespace: + +``` +# unshare -m bash +``` + +Update the hash map with the mount namespace ID of the terminal above: + +``` +FILE=/sys/fs/bpf/mnt_ns_set +if [ $(printf '\1' | od -dAn) -eq 1 ]; then + HOST_ENDIAN_CMD=tac +else + HOST_ENDIAN_CMD=cat +fi + +NS_ID_HEX="$(printf '%016x' $(stat -Lc '%i' /proc/self/ns/mnt) | sed 's/.\{2\}/&\n/g' | $HOST_ENDIAN_CMD)" +bpftool map update pinned $FILE key hex $NS_ID_HEX value hex 00 00 00 00 any +``` + +Execute a command in this terminal: + +``` +# ping kinvolk.io +``` + +You'll see how on the `execsnoop` terminal you started above the call is logged: + +``` +# tools/execsnoop.py --mntnsmap /sys/fs/bpf/mnt_ns_set +[sudo] password for mvb: +PCOMM PID PPID RET ARGS +ping 8096 7970 0 /bin/ping kinvolk.io +``` diff --git a/man/man8/bindsnoop.8 b/man/man8/bindsnoop.8 index ec7ca1dae08..f8fa185021e 100644 --- a/man/man8/bindsnoop.8 +++ b/man/man8/bindsnoop.8 @@ -2,7 +2,7 @@ .SH NAME bindsnoop \- Trace bind() system calls. .SH SYNOPSIS -.B bindsnoop.py [\fB-h\fP] [\fB-w\fP] [\fB-t\fP] [\fB-p\fP PID] [\fB-P\fP PORT] [\fB-E\fP] [\fB-U\fP] [\fB-u\fP UID] [\fB--count\fP] [\fB--cgroupmap MAP\fP] +.B bindsnoop.py [\fB-h\fP] [\fB-w\fP] [\fB-t\fP] [\fB-p\fP PID] [\fB-P\fP PORT] [\fB-E\fP] [\fB-U\fP] [\fB-u\fP UID] [\fB--count\fP] [\fB--cgroupmap MAP\fP] [\fB--mntnsmap MNTNSMAP\fP] .SH DESCRIPTION bindsnoop reports socket options set before the bind call that would impact this system call behavior. .PP @@ -42,6 +42,11 @@ Trace cgroups in this BPF map: .B \fB--cgroupmap\fP MAP .TP +Trace mount namespaces in this BPF map: +.TP +.B +\fB--mntnsmap\fP MNTNSMAP +.TP Include errors in the output: .TP .B diff --git a/man/man8/capable.8 b/man/man8/capable.8 index dfb8a6aadfc..342946f8381 100644 --- a/man/man8/capable.8 +++ b/man/man8/capable.8 @@ -3,7 +3,7 @@ capable \- Trace security capability checks (cap_capable()). .SH SYNOPSIS .B capable [\-h] [\-v] [\-p PID] [\-K] [\-U] [\-x] [\-\-cgroupmap MAPPATH] - [--unique] + [\-\-mntnsmap MAPPATH] [--unique] .SH DESCRIPTION This traces security capability checks in the kernel, and prints details for each call. This can be useful for general debugging, and also security @@ -33,6 +33,9 @@ Show extra fields in TID and INSETID columns. \-\-cgroupmap MAPPATH Trace cgroups in this BPF map only (filtered in-kernel). .TP +\-\-mntnsmap MAPPATH +Trace mount namespaces in this BPF map only (filtered in-kernel). +.TP \-\-unique Don't repeat stacks for the same PID or cgroup. .SH EXAMPLES @@ -45,7 +48,7 @@ Trace capability checks for PID 181: # .B capable \-p 181 .TP -Trace capability checks in a set of cgroups only (see filtering_by_cgroups.md +Trace capability checks in a set of cgroups only (see special_filtering.md from bcc sources for more details): # .B capable \-\-cgroupmap /sys/fs/bpf/test01 diff --git a/man/man8/execsnoop.8 b/man/man8/execsnoop.8 index 4a88e007418..e42ad38ab4e 100644 --- a/man/man8/execsnoop.8 +++ b/man/man8/execsnoop.8 @@ -2,8 +2,8 @@ .SH NAME execsnoop \- Trace new processes via exec() syscalls. Uses Linux eBPF/bcc. .SH SYNOPSIS -.B execsnoop [\-h] [\-T] [\-t] [\-x] [\-\-cgroupmap CGROUPMAP] [\-u USER] -.B [\-q] [\-n NAME] [\-l LINE] [\-U] [\-\-max-args MAX_ARGS] +.B execsnoop [\-h] [\-T] [\-t] [\-x] [\-\-cgroupmap CGROUPMAP] [\-\-mntnsmap MAPPATH] +.B [\-u USER] [\-q] [\-n NAME] [\-l LINE] [\-U] [\-\-max-args MAX_ARGS] .SH DESCRIPTION execsnoop traces new processes, showing the filename executed and argument list. @@ -42,7 +42,7 @@ Include failed exec()s .TP \-q Add "quotemarks" around arguments. Escape quotemarks in arguments with a -backslash. For tracing empty arguments or arguments that contain whitespace. +backslash. For tracing empty arguments or arguments that contain whitespace. .TP \-n NAME Only print command lines matching this name (regex) @@ -55,6 +55,10 @@ Maximum number of arguments parsed and displayed, defaults to 20 .TP \-\-cgroupmap MAPPATH Trace cgroups in this BPF map only (filtered in-kernel). +.TP +\-\-mntnsmap MAPPATH +Trace mount namespaces in this BPF map only (filtered in-kernel). +.TP .SH EXAMPLES .TP Trace all exec() syscalls: @@ -81,7 +85,7 @@ Include failed exec()s: # .B execsnoop \-x .TP -Put quotemarks around arguments. +Put quotemarks around arguments. # .B execsnoop \-q .TP @@ -93,7 +97,7 @@ Only trace exec()s where argument's line contains "testpkg": # .B execsnoop \-l testpkg .TP -Trace a set of cgroups only (see filtering_by_cgroups.md from bcc sources for more details): +Trace a set of cgroups only (see special_filtering.md from bcc sources for more details): # .B execsnoop \-\-cgroupmap /sys/fs/bpf/test01 .SH FIELDS diff --git a/man/man8/opensnoop.8 b/man/man8/opensnoop.8 index 54a7788a291..fee832634e2 100644 --- a/man/man8/opensnoop.8 +++ b/man/man8/opensnoop.8 @@ -4,7 +4,7 @@ opensnoop \- Trace open() syscalls. Uses Linux eBPF/bcc. .SH SYNOPSIS .B opensnoop.py [\-h] [\-T] [\-U] [\-x] [\-p PID] [\-t TID] [\-u UID] [\-d DURATION] [\-n NAME] [\-e] [\-f FLAG_FILTER] - [--cgroupmap MAPPATH] + [--cgroupmap MAPPATH] [--mntnsmap MAPPATH] .SH DESCRIPTION opensnoop traces the open() syscall, showing which processes are attempting to open which files. This can be useful for determining the location of config @@ -58,6 +58,9 @@ Filter on open() flags, e.g., O_WRONLY. .TP \--cgroupmap MAPPATH Trace cgroups in this BPF map only (filtered in-kernel). +.TP +\--mntnsmap MAPPATH +Trace mount namespaces in this BPF map only (filtered in-kernel). .SH EXAMPLES .TP Trace all open() syscalls: @@ -100,7 +103,7 @@ Only print calls for writing: # .B opensnoop \-f O_WRONLY \-f O_RDWR .TP -Trace a set of cgroups only (see filtering_by_cgroups.md from bcc sources for more details): +Trace a set of cgroups only (see special_filtering.md from bcc sources for more details): # .B opensnoop \-\-cgroupmap /sys/fs/bpf/test01 .SH FIELDS diff --git a/man/man8/profile.8 b/man/man8/profile.8 index 823ff699614..30871afeb73 100644 --- a/man/man8/profile.8 +++ b/man/man8/profile.8 @@ -3,7 +3,7 @@ profile \- Profile CPU usage by sampling stack traces. Uses Linux eBPF/bcc. .SH SYNOPSIS .B profile [\-adfh] [\-p PID | \-L TID] [\-U | \-K] [\-F FREQUENCY | \-c COUNT] -.B [\-\-stack\-storage\-size COUNT] [\-\-cgroupmap CGROUPMAP] [duration] +.B [\-\-stack\-storage\-size COUNT] [\-\-cgroupmap CGROUPMAP] [\-\-mntnsmap MAPPATH] [duration] .SH DESCRIPTION This is a CPU profiler. It works by taking samples of stack traces at timed intervals. It will help you understand and quantify CPU usage: which code is @@ -101,7 +101,7 @@ Profile kernel stacks only: # .B profile -K .TP -Profile a set of cgroups only (see filtering_by_cgroups.md from bcc sources for more details): +Profile a set of cgroups only (see special_filtering.md from bcc sources for more details): # .B profile \-\-cgroupmap /sys/fs/bpf/test01 .SH DEBUGGING diff --git a/man/man8/tcpaccept.8 b/man/man8/tcpaccept.8 index 43219260501..603a5ca433f 100644 --- a/man/man8/tcpaccept.8 +++ b/man/man8/tcpaccept.8 @@ -2,7 +2,7 @@ .SH NAME tcpaccept \- Trace TCP passive connections (accept()). Uses Linux eBPF/bcc. .SH SYNOPSIS -.B tcpaccept [\-h] [\-T] [\-t] [\-p PID] [\-P PORTS] [\-\-cgroupmap MAPPATH] +.B tcpaccept [\-h] [\-T] [\-t] [\-p PID] [\-P PORTS] [\-\-cgroupmap MAPPATH] [\-\-mntnsmap MAPPATH] .SH DESCRIPTION This tool traces passive TCP connections (eg, via an accept() syscall; connect() are active connections). This can be useful for general @@ -36,6 +36,9 @@ Comma-separated list of local ports to trace (filtered in-kernel). .TP \-\-cgroupmap MAPPATH Trace cgroups in this BPF map only (filtered in-kernel). +.TP +\-\-mntnsmap MAPPATH +Trace mount namespaces in this BPF map only (filtered in-kernel). .SH EXAMPLES .TP Trace all passive TCP connections (accept()s): @@ -54,7 +57,7 @@ Trace PID 181 only: # .B tcpaccept \-p 181 .TP -Trace a set of cgroups only (see filtering_by_cgroups.md from bcc sources for more details): +Trace a set of cgroups only (see special_filtering.md from bcc sources for more details): # .B tcpaccept \-\-cgroupmap /sys/fs/bpf/test01 .SH FIELDS diff --git a/man/man8/tcpconnect.8 b/man/man8/tcpconnect.8 index 60aac1e21b8..bc2589ed60b 100644 --- a/man/man8/tcpconnect.8 +++ b/man/man8/tcpconnect.8 @@ -2,7 +2,7 @@ .SH NAME tcpconnect \- Trace TCP active connections (connect()). Uses Linux eBPF/bcc. .SH SYNOPSIS -.B tcpconnect [\-h] [\-c] [\-t] [\-x] [\-p PID] [-P PORT] [\-\-cgroupmap MAPPATH] +.B tcpconnect [\-h] [\-c] [\-t] [\-x] [\-p PID] [-P PORT] [\-\-cgroupmap MAPPATH] [\-\-mntnsmap MAPPATH] .SH DESCRIPTION This tool traces active TCP connections (eg, via a connect() syscall; accept() are passive connections). This can be useful for general @@ -72,9 +72,13 @@ Count connects per src ip and dest ip/port: # .B tcpconnect \-c .TP -Trace a set of cgroups only (see filtering_by_cgroups.md from bcc sources for more details): +Trace a set of cgroups only (see special_filtering.md from bcc sources for more details): # .B tcpconnect \-\-cgroupmap /sys/fs/bpf/test01 +.TP +Trace a set of mount namespaces only (see special_filtering.md from bcc sources for more details): +# +.B tcpconnect \-\-mntnsmap /sys/fs/bpf/mnt_ns_set .SH FIELDS .TP TIME(s) diff --git a/man/man8/tcptop.8 b/man/man8/tcptop.8 index 631c00c3272..e636f456965 100644 --- a/man/man8/tcptop.8 +++ b/man/man8/tcptop.8 @@ -3,7 +3,7 @@ tcptop \- Summarize TCP send/recv throughput by host. Top for TCP. .SH SYNOPSIS .B tcptop [\-h] [\-C] [\-S] [\-p PID] [\-\-cgroupmap MAPPATH] - [interval] [count] + [--mntnsmap MAPPATH] [interval] [count] .SH DESCRIPTION This is top for TCP sessions. @@ -39,6 +39,9 @@ Trace this PID only. \-\-cgroupmap MAPPATH Trace cgroups in this BPF map only (filtered in-kernel). .TP +\--mntnsmap MAPPATH +Trace mount namespaces in this BPF map only (filtered in-kernel). +.TP interval Interval between updates, seconds (default 1). .TP @@ -58,7 +61,7 @@ Trace PID 181 only, and don't clear the screen: # .B tcptop \-Cp 181 .TP -Trace a set of cgroups only (see filtering_by_cgroups.md from bcc sources for more details): +Trace a set of cgroups only (see special_filtering.md from bcc sources for more details): # .B tcptop \-\-cgroupmap /sys/fs/bpf/test01 .SH FIELDS diff --git a/man/man8/tcptracer.8 b/man/man8/tcptracer.8 index 728c80af51b..d2346c776d3 100644 --- a/man/man8/tcptracer.8 +++ b/man/man8/tcptracer.8 @@ -2,7 +2,7 @@ .SH NAME tcptracer \- Trace TCP established connections. Uses Linux eBPF/bcc. .SH SYNOPSIS -.B tcptracer [\-h] [\-v] [\-p PID] [\-N NETNS] [\-\-cgroupmap MAPPATH] +.B tcptracer [\-h] [\-v] [\-p PID] [\-N NETNS] [\-\-cgroupmap MAPPATH] [--mntnsmap MAPPATH] .SH DESCRIPTION This tool traces established TCP connections that open and close while tracing, and prints a line of output per connect, accept and close events. This includes @@ -31,6 +31,9 @@ Trace this network namespace only (filtered in-kernel). .TP \-\-cgroupmap MAPPATH Trace cgroups in this BPF map only (filtered in-kernel). +.TP +\-\-mntnsmap MAPPATH +Trace mount namespaces in the map (filtered in-kernel). .SH EXAMPLES .TP Trace all TCP established connections: @@ -49,7 +52,7 @@ Trace connections in network namespace 4026531969 only: # .B tcptracer \-N 4026531969 .TP -Trace a set of cgroups only (see filtering_by_cgroups.md from bcc sources for more details): +Trace a set of cgroups only (see special_filtering.md from bcc sources for more details): # .B tcptracer \-\-cgroupmap /sys/fs/bpf/test01 .SH FIELDS diff --git a/src/python/bcc/containers.py b/src/python/bcc/containers.py new file mode 100644 index 00000000000..b55e0500209 --- /dev/null +++ b/src/python/bcc/containers.py @@ -0,0 +1,80 @@ +# Copyright 2020 Kinvolk GmbH +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +def _cgroup_filter_func_writer(cgroupmap): + if not cgroupmap: + return """ + static inline int _cgroup_filter() { + return 0; + } + """ + + text = """ + BPF_TABLE_PINNED("hash", u64, u64, cgroupset, 1024, "CGROUP_PATH"); + + static inline int _cgroup_filter() { + u64 cgroupid = bpf_get_current_cgroup_id(); + return cgroupset.lookup(&cgroupid) == NULL; + } + """ + + return text.replace('CGROUP_PATH', cgroupmap) + +def _mntns_filter_func_writer(mntnsmap): + if not mntnsmap: + return """ + static inline int _mntns_filter() { + return 0; + } + """ + text = """ + #include + #include + #include + + /* see mountsnoop.py: + * XXX: struct mnt_namespace is defined in fs/mount.h, which is private + * to the VFS and not installed in any kernel-devel packages. So, let's + * duplicate the important part of the definition. There are actually + * more members in the real struct, but we don't need them, and they're + * more likely to change. + */ + struct mnt_namespace { + atomic_t count; + struct ns_common ns; + }; + + BPF_TABLE_PINNED("hash", u64, u32, mount_ns_set, 1024, "MOUNT_NS_PATH"); + + static inline int _mntns_filter() { + struct task_struct *current_task; + current_task = (struct task_struct *)bpf_get_current_task(); + u64 ns_id = current_task->nsproxy->mnt_ns->ns.inum; + return mount_ns_set.lookup(&ns_id) == NULL; + } + """ + + return text.replace('MOUNT_NS_PATH', mntnsmap) + +def filter_by_containers(args): + filter_by_containers_text = """ + static inline int container_should_be_filtered() { + return _cgroup_filter() || _mntns_filter(); + } + """ + + cgroupmap_text = _cgroup_filter_func_writer(args.cgroupmap) + mntnsmap_text = _mntns_filter_func_writer(args.mntnsmap) + + return cgroupmap_text + mntnsmap_text + filter_by_containers_text diff --git a/tools/bindsnoop.py b/tools/bindsnoop.py index 4d3133fcfd2..de569c2515d 100755 --- a/tools/bindsnoop.py +++ b/tools/bindsnoop.py @@ -6,7 +6,7 @@ # based on tcpconnect utility from Brendan Gregg's suite. # # USAGE: bindsnoop [-h] [-t] [-E] [-p PID] [-P PORT[,PORT ...]] [-w] -# [--count] [--cgroupmap mappath] +# [--count] [--cgroupmap mappath] [--mntnsmap mappath] # # bindsnoop reports socket options set before the bind call # that would impact this system call behavior: @@ -28,6 +28,7 @@ from __future__ import print_function, absolute_import, unicode_literals from bcc import BPF, DEBUG_SOURCE +from bcc.containers import filter_by_containers from bcc.utils import printb import argparse import re @@ -51,6 +52,7 @@ ./bindsnoop -E # report bind errors ./bindsnoop --count # count bind per src ip ./bindsnoop --cgroupmap mappath # only trace cgroups in this BPF map + ./bindsnoop --mntnsmap mappath # only trace mount namespaces in the map it is reporting socket options set before the bins call impacting system call behavior: @@ -84,6 +86,8 @@ help="count binds per src ip and port") parser.add_argument("--cgroupmap", help="trace cgroups in this BPF map only") +parser.add_argument("--mntnsmap", + help="trace mount namespaces in this BPF map only") parser.add_argument("--ebpf", action="store_true", help=argparse.SUPPRESS) parser.add_argument("--debug-source", action="store_true", @@ -148,8 +152,6 @@ }; BPF_HASH(ipv6_count, struct ipv6_flow_key_t); -CGROUP_MAP - // bind options for event reporting union bind_options { u8 data; @@ -174,7 +176,9 @@ FILTER_UID - FILTER_CGROUP + if (container_should_be_filtered()) { + return 0; + } // stash the sock ptr for lookup on return currsock.update(&tid, &socket); @@ -323,11 +327,6 @@ bpf_get_current_comm(&data6.task, sizeof(data6.task)); ipv6_bind_events.perf_submit(ctx, &data6, sizeof(data6));""" }, - 'filter_cgroup': """ - u64 cgroupid = bpf_get_current_cgroup_id(); - if (cgroupset.lookup(&cgroupid) == NULL) { - return 0; - }""", } # code substitutions @@ -351,22 +350,11 @@ 'if (uid != %s) { return 0; }' % args.uid) if args.errors: bpf_text = bpf_text.replace('FILTER_ERRORS', 'ignore_errors = 0;') -if args.cgroupmap: - bpf_text = bpf_text.replace('FILTER_CGROUP', struct_init['filter_cgroup']) - bpf_text = bpf_text.replace( - 'CGROUP_MAP', - ( - 'BPF_TABLE_PINNED("hash", u64, u64, cgroupset, 1024, "%s");' % - args.cgroupmap - ) - ) - +bpf_text = filter_by_containers(args) + bpf_text bpf_text = bpf_text.replace('FILTER_PID', '') bpf_text = bpf_text.replace('FILTER_PORT', '') bpf_text = bpf_text.replace('FILTER_UID', '') bpf_text = bpf_text.replace('FILTER_ERRORS', '') -bpf_text = bpf_text.replace('FILTER_CGROUP', '') -bpf_text = bpf_text.replace('CGROUP_MAP', '') # selecting output format - 80 characters or wide, fitting IPv6 addresses header_fmt = "%8s %-12.12s %-4s %-15s %-5s %5s %2s" diff --git a/tools/bindsnoop_example.txt b/tools/bindsnoop_example.txt index 77e040ed706..c7c51353994 100644 --- a/tools/bindsnoop_example.txt +++ b/tools/bindsnoop_example.txt @@ -59,7 +59,7 @@ It is meant to be used with an externally created map. # ./bindsnoop.py --cgroupmap /sys/fs/bpf/test01 -For more details, see docs/filtering_by_cgroups.md +For more details, see docs/special_filtering.md In order to track heavy bind usage one can use --count option @@ -74,7 +74,7 @@ LADDR LPORT BINDS Usage message: # ./bindsnoop.py -h usage: bindsnoop.py [-h] [-t] [-w] [-p PID] [-P PORT] [-E] [-U] [-u UID] - [--count] [--cgroupmap CGROUPMAP] + [--count] [--cgroupmap CGROUPMAP] [--mntnsmap MNTNSMAP] Trace TCP binds @@ -103,6 +103,7 @@ examples: ./bindsnoop -E # report bind errors ./bindsnoop --count # count bind per src ip ./bindsnoop --cgroupmap mappath # only trace cgroups in this BPF map + ./bindsnoop --mntnsmap mappath # only trace mount namespaces in the map it is reporting socket options set before the bins call impacting system call behavior: diff --git a/tools/capable.py b/tools/capable.py index 3852e2247b6..94d1c32a361 100755 --- a/tools/capable.py +++ b/tools/capable.py @@ -15,6 +15,7 @@ from os import getpid from functools import partial from bcc import BPF +from bcc.containers import filter_by_containers import errno import argparse from time import strftime @@ -28,7 +29,8 @@ ./capable -U # add user-space stacks to trace ./capable -x # extra fields: show TID and INSETID columns ./capable --unique # don't repeat stacks for the same pid or cgroup - ./capable --cgroupmap ./mappath # only trace cgroups in this BPF map + ./capable --cgroupmap mappath # only trace cgroups in this BPF map + ./capable --mntnsmap mappath # only trace mount namespaces in the map """ parser = argparse.ArgumentParser( description="Trace security capability checks", @@ -46,6 +48,8 @@ help="show extra fields in TID and INSETID columns") parser.add_argument("--cgroupmap", help="trace cgroups in this BPF map only") +parser.add_argument("--mntnsmap", + help="trace mount namespaces in this BPF map only") parser.add_argument("--unique", action="store_true", help="don't repeat stacks for the same pid or cgroup") args = parser.parse_args() @@ -145,10 +149,6 @@ def __getattr__(self, name): BPF_HASH(seen, struct repeat_t, u64); #endif -#if CGROUPSET -BPF_TABLE_PINNED("hash", u64, u64, cgroupset, 1024, "CGROUPPATH"); -#endif - #if defined(USER_STACKS) || defined(KERNEL_STACKS) BPF_STACK_TRACE(stacks, 2048); #endif @@ -173,12 +173,10 @@ def __getattr__(self, name): FILTER1 FILTER2 FILTER3 -#if CGROUPSET - u64 cgroupid = bpf_get_current_cgroup_id(); - if (cgroupset.lookup(&cgroupid) == NULL) { + + if (container_should_be_filtered()) { return 0; } -#endif u32 uid = bpf_get_current_uid_gid(); struct data_t data = {.tgid = tgid, .pid = pid, .uid = uid, .cap = cap, .audit = audit, .insetid = insetid}; @@ -192,7 +190,7 @@ def __getattr__(self, name): #if UNIQUESET struct repeat_t repeat = {0,}; repeat.cap = cap; -#if CGROUPSET +#if CGROUP_ID_SET repeat.cgroupid = bpf_get_current_cgroup_id(); #else repeat.tgid = tgid; @@ -229,11 +227,7 @@ def __getattr__(self, name): bpf_text = bpf_text.replace('FILTER2', '') bpf_text = bpf_text.replace('FILTER3', 'if (pid == %s) { return 0; }' % getpid()) -if args.cgroupmap: - bpf_text = bpf_text.replace('CGROUPSET', '1') - bpf_text = bpf_text.replace('CGROUPPATH', args.cgroupmap) -else: - bpf_text = bpf_text.replace('CGROUPSET', '0') +bpf_text = filter_by_containers(args) + bpf_text if args.unique: bpf_text = bpf_text.replace('UNIQUESET', '1') else: diff --git a/tools/capable_example.txt b/tools/capable_example.txt index bcd6d01ee46..1701b6a2e44 100644 --- a/tools/capable_example.txt +++ b/tools/capable_example.txt @@ -4,7 +4,7 @@ Demonstrations of capable, the Linux eBPF/bcc version. capable traces calls to the kernel cap_capable() function, which does security capability checks, and prints details for each call. For example: -# ./capable.py +# ./capable.py TIME UID PID COMM CAP NAME AUDIT 22:11:23 114 2676 snmpd 12 CAP_NET_ADMIN 1 22:11:23 0 6990 run 24 CAP_SYS_RESOURCE 1 @@ -100,14 +100,14 @@ with an externally created map. # ./capable.py --cgroupmap /sys/fs/bpf/test01 -For more details, see docs/filtering_by_cgroups.md +For more details, see docs/special_filtering.md USAGE: # ./capable.py -h usage: capable.py [-h] [-v] [-p PID] [-K] [-U] [-x] [--cgroupmap CGROUPMAP] - [--unique] + [--mntnsmap MNTNSMAP] [--unique] Trace security capability checks @@ -120,6 +120,7 @@ optional arguments: -x, --extra show extra fields in TID and INSETID columns --cgroupmap CGROUPMAP trace cgroups in this BPF map only + --mntnsmap MNTNSMAP trace mount namespaces in this BPF map only --unique don't repeat stacks for the same pid or cgroup examples: @@ -130,4 +131,5 @@ examples: ./capable -U # add user-space stacks to trace ./capable -x # extra fields: show TID and INSETID columns ./capable --unique # don't repeat stacks for the same pid or cgroup - ./capable --cgroupmap ./mappath # only trace cgroups in this BPF map + ./capable --cgroupmap mappath # only trace cgroups in this BPF map + ./capable --mntnsmap mappath # only trace mount namespaces in the map diff --git a/tools/execsnoop.py b/tools/execsnoop.py index 9879d2c2fe9..53052d39034 100755 --- a/tools/execsnoop.py +++ b/tools/execsnoop.py @@ -19,6 +19,7 @@ from __future__ import print_function from bcc import BPF +from bcc.containers import filter_by_containers from bcc.utils import ArgString, printb import bcc.utils as utils import argparse @@ -57,7 +58,8 @@ def parse_uid(user): ./execsnoop -q # add "quotemarks" around arguments ./execsnoop -n main # only print command lines containing "main" ./execsnoop -l tpkg # only print command where arguments contains "tpkg" - ./execsnoop --cgroupmap ./mappath # only trace cgroups in this BPF map + ./execsnoop --cgroupmap mappath # only trace cgroups in this BPF map + ./execsnoop --mntnsmap mappath # only trace mount namespaces in the map """ parser = argparse.ArgumentParser( description="Trace exec() syscalls", @@ -71,6 +73,8 @@ def parse_uid(user): help="include failed exec()s") parser.add_argument("--cgroupmap", help="trace cgroups in this BPF map only") +parser.add_argument("--mntnsmap", + help="trace mount namespaces in this BPF map only") parser.add_argument("-u", "--uid", type=parse_uid, metavar='USER', help="trace this UID only") parser.add_argument("-q", "--quote", action="store_true", @@ -113,9 +117,6 @@ def parse_uid(user): int retval; }; -#if CGROUPSET -BPF_TABLE_PINNED("hash", u64, u64, cgroupset, 1024, "CGROUPPATH"); -#endif BPF_PERF_OUTPUT(events); static int __submit_arg(struct pt_regs *ctx, void *ptr, struct data_t *data) @@ -145,12 +146,9 @@ def parse_uid(user): UID_FILTER -#if CGROUPSET - u64 cgroupid = bpf_get_current_cgroup_id(); - if (cgroupset.lookup(&cgroupid) == NULL) { - return 0; + if (container_should_be_filtered()) { + return 0; } -#endif // create data here and pass to submit_arg to save stack space (#555) struct data_t data = {}; @@ -185,12 +183,9 @@ def parse_uid(user): int do_ret_sys_execve(struct pt_regs *ctx) { -#if CGROUPSET - u64 cgroupid = bpf_get_current_cgroup_id(); - if (cgroupset.lookup(&cgroupid) == NULL) { - return 0; + if (container_should_be_filtered()) { + return 0; } -#endif struct data_t data = {}; struct task_struct *task; @@ -223,11 +218,7 @@ def parse_uid(user): 'if (uid != %s) { return 0; }' % args.uid) else: bpf_text = bpf_text.replace('UID_FILTER', '') -if args.cgroupmap: - bpf_text = bpf_text.replace('CGROUPSET', '1') - bpf_text = bpf_text.replace('CGROUPPATH', args.cgroupmap) -else: - bpf_text = bpf_text.replace('CGROUPSET', '0') +bpf_text = filter_by_containers(args) + bpf_text if args.ebpf: print(bpf_text) exit() diff --git a/tools/execsnoop_example.txt b/tools/execsnoop_example.txt index a90d0079400..8cdfe0db721 100644 --- a/tools/execsnoop_example.txt +++ b/tools/execsnoop_example.txt @@ -83,7 +83,7 @@ with an externally created map. # ./execsnoop --cgroupmap /sys/fs/bpf/test01 -For more details, see docs/filtering_by_cgroups.md +For more details, see docs/special_filtering.md The -U option include UID on output: @@ -121,6 +121,7 @@ optional arguments: -x, --fails include failed exec()s --cgroupmap CGROUPMAP trace cgroups in this BPF map only + --mntnsmap MNTNSMAP trace mount namespaces in this BPF map only -u USER, --uid USER trace this UID only -q, --quote Add quotemarks (") around arguments. -n NAME, --name NAME only print commands matching this name (regex), any @@ -142,4 +143,5 @@ examples: ./execsnoop -q # add "quotemarks" around arguments ./execsnoop -n main # only print command lines containing "main" ./execsnoop -l tpkg # only print command where arguments contains "tpkg" - ./execsnoop --cgroupmap ./mappath # only trace cgroups in this BPF map + ./execsnoop --cgroupmap mappath # only trace cgroups in this BPF map + ./execsnoop --mntnsmap mappath # only trace mount namespaces in the map diff --git a/tools/opensnoop.py b/tools/opensnoop.py index 28fe7559bee..a68b13f9a86 100755 --- a/tools/opensnoop.py +++ b/tools/opensnoop.py @@ -17,6 +17,7 @@ from __future__ import print_function from bcc import ArgString, BPF +from bcc.containers import filter_by_containers from bcc.utils import printb import argparse from datetime import datetime, timedelta @@ -35,7 +36,8 @@ ./opensnoop -n main # only print process names containing "main" ./opensnoop -e # show extended fields ./opensnoop -f O_WRONLY -f O_RDWR # only print calls for writing - ./opensnoop --cgroupmap ./mappath # only trace cgroups in this BPF map + ./opensnoop --cgroupmap mappath # only trace cgroups in this BPF map + ./opensnoop --mntnsmap mappath # only trace mount namespaces in the map """ parser = argparse.ArgumentParser( description="Trace open() syscalls", @@ -53,6 +55,8 @@ help="trace this TID only") parser.add_argument("--cgroupmap", help="trace cgroups in this BPF map only") +parser.add_argument("--mntnsmap", + help="trace mount namespaces in this BPF map only") parser.add_argument("-u", "--uid", help="trace this UID only") parser.add_argument("-d", "--duration", @@ -102,9 +106,6 @@ int flags; // EXTENDED_STRUCT_MEMBER }; -#if CGROUPSET -BPF_TABLE_PINNED("hash", u64, u64, cgroupset, 1024, "CGROUPPATH"); -#endif BPF_PERF_OUTPUT(events); """ @@ -122,12 +123,11 @@ PID_TID_FILTER UID_FILTER FLAGS_FILTER -#if CGROUPSET - u64 cgroupid = bpf_get_current_cgroup_id(); - if (cgroupset.lookup(&cgroupid) == NULL) { - return 0; + + if (container_should_be_filtered()) { + return 0; } -#endif + if (bpf_get_current_comm(&val.comm, sizeof(val.comm)) == 0) { val.id = id; val.fname = filename; @@ -177,12 +177,9 @@ PID_TID_FILTER UID_FILTER FLAGS_FILTER -#if CGROUPSET - u64 cgroupid = bpf_get_current_cgroup_id(); - if (cgroupset.lookup(&cgroupid) == NULL) { - return 0; + if (container_should_be_filtered()) { + return 0; } -#endif struct data_t data = {}; bpf_get_current_comm(&data.comm, sizeof(data.comm)); @@ -221,11 +218,7 @@ 'if (uid != %s) { return 0; }' % args.uid) else: bpf_text = bpf_text.replace('UID_FILTER', '') -if args.cgroupmap: - bpf_text = bpf_text.replace('CGROUPSET', '1') - bpf_text = bpf_text.replace('CGROUPPATH', args.cgroupmap) -else: - bpf_text = bpf_text.replace('CGROUPSET', '0') +bpf_text = filter_by_containers(args) + bpf_text if args.flag_filter: bpf_text = bpf_text.replace('FLAGS_FILTER', 'if (!(flags & %d)) { return 0; }' % flag_filter_mask) diff --git a/tools/opensnoop_example.txt b/tools/opensnoop_example.txt index 44f0e337d5e..f15e84f2bda 100644 --- a/tools/opensnoop_example.txt +++ b/tools/opensnoop_example.txt @@ -187,14 +187,15 @@ with an externally created map. # ./opensnoop --cgroupmap /sys/fs/bpf/test01 -For more details, see docs/filtering_by_cgroups.md +For more details, see docs/special_filtering.md USAGE message: # ./opensnoop -h -usage: opensnoop [-h] [-T] [-x] [-p PID] [-t TID] [-d DURATION] [-n NAME] - [-e] [-f FLAG_FILTER] +usage: opensnoop.py [-h] [-T] [-U] [-x] [-p PID] [-t TID] + [--cgroupmap CGROUPMAP] [--mntnsmap MNTNSMAP] [-u UID] + [-d DURATION] [-n NAME] [-e] [-f FLAG_FILTER] Trace open() syscalls @@ -205,6 +206,9 @@ optional arguments: -x, --failed only show failed opens -p PID, --pid PID trace this PID only -t TID, --tid TID trace this TID only + --cgroupmap CGROUPMAP + trace cgroups in this BPF map only + --mntnsmap MNTNSMAP trace mount namespaces in this BPF map on -u UID, --uid UID trace this UID only -d DURATION, --duration DURATION total duration of trace in seconds @@ -226,3 +230,5 @@ examples: ./opensnoop -n main # only print process names containing "main" ./opensnoop -e # show extended fields ./opensnoop -f O_WRONLY -f O_RDWR # only print calls for writing + ./opensnoop --cgroupmap mappath # only trace cgroups in this BPF map + ./opensnoop --mntnsmap mappath # only trace mount namespaces in the map diff --git a/tools/profile.py b/tools/profile.py index 2067933af37..dd6f65fa3a5 100755 --- a/tools/profile.py +++ b/tools/profile.py @@ -24,10 +24,11 @@ # # 15-Jul-2016 Brendan Gregg Created this. # 20-Oct-2016 " " Switched to use the new 4.9 support. -# 26-Jan-2019 " " Changed to exclude CPU idle by default. +# 26-Jan-2019 " " Changed to exclude CPU idle by default. from __future__ import print_function from bcc import BPF, PerfType, PerfSWConfig +from bcc.containers import filter_by_containers from sys import stderr from time import sleep import argparse @@ -72,7 +73,8 @@ def stack_id_err(stack_id): ./profile -L 185 # only profile thread with TID 185 ./profile -U # only show user space stacks (no kernel) ./profile -K # only show kernel space stacks (no user) - ./profile --cgroupmap ./mappath # only trace cgroups in this BPF map + ./profile --cgroupmap mappath # only trace cgroups in this BPF map + ./profile --mntnsmap mappath # only trace mount namespaces in the map """ parser = argparse.ArgumentParser( description="Profile CPU stack traces at a timed interval", @@ -115,6 +117,8 @@ def stack_id_err(stack_id): help=argparse.SUPPRESS) parser.add_argument("--cgroupmap", help="trace cgroups in this BPF map only") +parser.add_argument("--mntnsmap", + help="trace mount namespaces in this BPF map only") # option logic args = parser.parse_args() @@ -146,10 +150,6 @@ def stack_id_err(stack_id): BPF_HASH(counts, struct key_t); BPF_STACK_TRACE(stack_traces, STACK_STORAGE_SIZE); -#if CGROUPSET -BPF_TABLE_PINNED("hash", u64, u64, cgroupset, 1024, "CGROUPPATH"); -#endif - // This code gets a bit complex. Probably not suitable for casual hacking. int do_perf_event(struct bpf_perf_event_data *ctx) { @@ -163,12 +163,9 @@ def stack_id_err(stack_id): if (!(THREAD_FILTER)) return 0; -#if CGROUPSET - u64 cgroupid = bpf_get_current_cgroup_id(); - if (cgroupset.lookup(&cgroupid) == NULL) { + if (container_should_be_filtered()) { return 0; } -#endif // create map key struct key_t key = {.pid = tgid}; @@ -246,11 +243,7 @@ def stack_id_err(stack_id): stack_context = "user + kernel" bpf_text = bpf_text.replace('USER_STACK_GET', user_stack_get) bpf_text = bpf_text.replace('KERNEL_STACK_GET', kernel_stack_get) -if args.cgroupmap: - bpf_text = bpf_text.replace('CGROUPSET', '1') - bpf_text = bpf_text.replace('CGROUPPATH', args.cgroupmap) -else: - bpf_text = bpf_text.replace('CGROUPSET', '0') +bpf_text = filter_by_containers(args) + bpf_text sample_freq = 0 sample_period = 0 diff --git a/tools/profile_example.txt b/tools/profile_example.txt index bb3c5aec945..2c7e702a940 100644 --- a/tools/profile_example.txt +++ b/tools/profile_example.txt @@ -708,7 +708,7 @@ with an externally created map. # ./profile --cgroupmap /sys/fs/bpf/test01 -For more details, see docs/filtering_by_cgroups.md +For more details, see docs/special_filtering.md USAGE message: @@ -717,7 +717,7 @@ USAGE message: usage: profile.py [-h] [-p PID | -L TID] [-U | -K] [-F FREQUENCY | -c COUNT] [-d] [-a] [-I] [-f] [--stack-storage-size STACK_STORAGE_SIZE] [-C CPU] - [--cgroupmap CGROUPMAP] + [--cgroupmap CGROUPMAP] [--mntnsmap MNTNSMAP] [duration] Profile CPU stack traces at a timed interval @@ -750,6 +750,7 @@ optional arguments: -C CPU, --cpu CPU cpu number to run profile on --cgroupmap CGROUPMAP trace cgroups in this BPF map only + --mntnsmap MNTNSMAP trace mount namespaces in this BPF map only examples: ./profile # profile stack traces at 49 Hertz until Ctrl-C @@ -761,4 +762,5 @@ examples: ./profile -L 185 # only profile thread with TID 185 ./profile -U # only show user space stacks (no kernel) ./profile -K # only show kernel space stacks (no user) - ./profile --cgroupmap ./mappath # only trace cgroups in this BPF map + ./profile --cgroupmap mappath # only trace cgroups in this BPF map + ./profile --mntnsmap mappath # only trace mount namespaces in the map diff --git a/tools/tcpaccept.py b/tools/tcpaccept.py index 03b05e0ab7e..4aa7fd7a10b 100755 --- a/tools/tcpaccept.py +++ b/tools/tcpaccept.py @@ -16,6 +16,7 @@ # 14-Feb-2016 " " Switch to bpf_perf_output. from __future__ import print_function +from bcc.containers import filter_by_containers from bcc import BPF from socket import inet_ntop, AF_INET, AF_INET6 from struct import pack @@ -29,7 +30,8 @@ ./tcpaccept -t # include timestamps ./tcpaccept -P 80,81 # only trace port 80 and 81 ./tcpaccept -p 181 # only trace PID 181 - ./tcpaccept --cgroupmap ./mappath # only trace cgroups in this BPF map + ./tcpaccept --cgroupmap mappath # only trace cgroups in this BPF map + ./tcpaccept --mntnsmap mappath # only trace mount namespaces in the map """ parser = argparse.ArgumentParser( description="Trace TCP accepts", @@ -45,6 +47,8 @@ help="comma-separated list of local ports to trace") parser.add_argument("--cgroupmap", help="trace cgroups in this BPF map only") +parser.add_argument("--mntnsmap", + help="trace mount namespaces in this BPF map only") parser.add_argument("--ebpf", action="store_true", help=argparse.SUPPRESS) args = parser.parse_args() @@ -80,11 +84,6 @@ char task[TASK_COMM_LEN]; }; BPF_PERF_OUTPUT(ipv6_events); - -#if CGROUPSET -BPF_TABLE_PINNED("hash", u64, u64, cgroupset, 1024, "CGROUPPATH"); -#endif - """ # @@ -97,12 +96,9 @@ bpf_text_kprobe = """ int kretprobe__inet_csk_accept(struct pt_regs *ctx) { -#if CGROUPSET - u64 cgroupid = bpf_get_current_cgroup_id(); - if (cgroupset.lookup(&cgroupid) == NULL) { + if (container_should_be_filtered()) { return 0; } -#endif struct sock *newsk = (struct sock *)PT_REGS_RC(ctx); u32 pid = bpf_get_current_pid_tgid() >> 32; @@ -115,21 +111,21 @@ // check this is TCP u8 protocol = 0; // workaround for reading the sk_protocol bitfield: - + // Following comments add by Joe Yin: // Unfortunately,it can not work since Linux 4.10, // because the sk_wmem_queued is not following the bitfield of sk_protocol. // And the following member is sk_gso_max_segs. // So, we can use this: // bpf_probe_read(&protocol, 1, (void *)((u64)&newsk->sk_gso_max_segs) - 3); - // In order to diff the pre-4.10 and 4.10+ ,introduce the variables gso_max_segs_offset,sk_lingertime, - // sk_lingertime is closed to the gso_max_segs_offset,and - // the offset between the two members is 4 + // In order to diff the pre-4.10 and 4.10+ ,introduce the variables gso_max_segs_offset,sk_lingertime, + // sk_lingertime is closed to the gso_max_segs_offset,and + // the offset between the two members is 4 int gso_max_segs_offset = offsetof(struct sock, sk_gso_max_segs); int sk_lingertime_offset = offsetof(struct sock, sk_lingertime); - if (sk_lingertime_offset - gso_max_segs_offset == 4) + if (sk_lingertime_offset - gso_max_segs_offset == 4) // 4.10+ with little endian #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ protocol = *(u8 *)((u64)&newsk->sk_gso_max_segs - 3); @@ -199,11 +195,7 @@ lports_if = ' && '.join(['lport != %d' % lport for lport in lports]) bpf_text = bpf_text.replace('##FILTER_PORT##', 'if (%s) { return 0; }' % lports_if) -if args.cgroupmap: - bpf_text = bpf_text.replace('CGROUPSET', '1') - bpf_text = bpf_text.replace('CGROUPPATH', args.cgroupmap) -else: - bpf_text = bpf_text.replace('CGROUPSET', '0') +bpf_text = filter_by_containers(args) + bpf_text if debug or args.ebpf: print(bpf_text) if args.ebpf: diff --git a/tools/tcpaccept_example.txt b/tools/tcpaccept_example.txt index 5b6b1a6d8b7..9381565568a 100644 --- a/tools/tcpaccept_example.txt +++ b/tools/tcpaccept_example.txt @@ -38,7 +38,7 @@ with an externally created map. # ./tcpaccept --cgroupmap /sys/fs/bpf/test01 -For more details, see docs/filtering_by_cgroups.md +For more details, see docs/special_filtering.md USAGE message: @@ -62,4 +62,5 @@ examples: ./tcpaccept -t # include timestamps ./tcpaccept -P 80,81 # only trace port 80 and 81 ./tcpaccept -p 181 # only trace PID 181 - ./tcpaccept --cgroupmap ./mappath # only trace cgroups in this BPF map + ./tcpaccept --cgroupmap mappath # only trace cgroups in this BPF map + ./tcpaccept --mntnsmap mappath # only trace mount namespaces in the map \ No newline at end of file diff --git a/tools/tcpconnect.py b/tools/tcpconnect.py index 67f2cef1970..40878eea0bf 100755 --- a/tools/tcpconnect.py +++ b/tools/tcpconnect.py @@ -21,6 +21,7 @@ from __future__ import print_function from bcc import BPF +from bcc.containers import filter_by_containers from bcc.utils import printb import argparse from socket import inet_ntop, ntohs, AF_INET, AF_INET6 @@ -37,7 +38,8 @@ ./tcpconnect -U # include UID ./tcpconnect -u 1000 # only trace UID 1000 ./tcpconnect -c # count connects per src ip and dest ip/port - ./tcpconnect --cgroupmap ./mappath # only trace cgroups in this BPF map + ./tcpconnect --cgroupmap mappath # only trace cgroups in this BPF map + ./tcpconnect --mntnsmap mappath # only trace mount namespaces in the map """ parser = argparse.ArgumentParser( description="Trace TCP connects", @@ -57,6 +59,8 @@ help="count connects per src ip and dest ip/port") parser.add_argument("--cgroupmap", help="trace cgroups in this BPF map only") +parser.add_argument("--mntnsmap", + help="trace mount namespaces in this BPF map only") parser.add_argument("--ebpf", action="store_true", help=argparse.SUPPRESS) args = parser.parse_args() @@ -70,10 +74,6 @@ BPF_HASH(currsock, u32, struct sock *); -#if CGROUPSET -BPF_TABLE_PINNED("hash", u64, u64, cgroupset, 1024, "CGROUPPATH"); -#endif - // separate data structs for ipv4 and ipv6 struct ipv4_data_t { u64 ts_us; @@ -116,12 +116,9 @@ int trace_connect_entry(struct pt_regs *ctx, struct sock *sk) { -#if CGROUPSET - u64 cgroupid = bpf_get_current_cgroup_id(); - if (cgroupset.lookup(&cgroupid) == NULL) { - return 0; + if (container_should_be_filtered()) { + return 0; } -#endif u64 pid_tgid = bpf_get_current_pid_tgid(); u32 pid = pid_tgid >> 32; @@ -248,11 +245,7 @@ if args.uid: bpf_text = bpf_text.replace('FILTER_UID', 'if (uid != %s) { return 0; }' % args.uid) -if args.cgroupmap: - bpf_text = bpf_text.replace('CGROUPSET', '1') - bpf_text = bpf_text.replace('CGROUPPATH', args.cgroupmap) -else: - bpf_text = bpf_text.replace('CGROUPSET', '0') +bpf_text = filter_by_containers(args) + bpf_text bpf_text = bpf_text.replace('FILTER_PID', '') bpf_text = bpf_text.replace('FILTER_PORT', '') diff --git a/tools/tcpconnect_example.txt b/tools/tcpconnect_example.txt index cf975627238..7efac4a316c 100644 --- a/tools/tcpconnect_example.txt +++ b/tools/tcpconnect_example.txt @@ -73,14 +73,14 @@ with an externally created map. # ./tcpconnect --cgroupmap /sys/fs/bpf/test01 -For more details, see docs/filtering_by_cgroups.md +For more details, see docs/special_filtering.md USAGE message: # ./tcpconnect -h usage: tcpconnect.py [-h] [-t] [-p PID] [-P PORT] [-U] [-u UID] [-c] - [--cgroupmap CGROUPMAP] + [--cgroupmap CGROUPMAP] [--mntnsmap MNTNSMAP] Trace TCP connects @@ -104,4 +104,5 @@ examples: ./tcpconnect -U # include UID ./tcpconnect -u 1000 # only trace UID 1000 ./tcpconnect -c # count connects per src ip and dest ip/port - ./tcpconnect --cgroupmap ./mappath # only trace cgroups in this BPF map + ./tcpconnect --cgroupmap mappath # only trace cgroups in this BPF map + ./tcpconnect --mntnsmap mappath # only trace mount namespaces in the map \ No newline at end of file diff --git a/tools/tcptop.py b/tools/tcptop.py index 9fb3ca2b326..510c4e86b54 100755 --- a/tools/tcptop.py +++ b/tools/tcptop.py @@ -26,6 +26,7 @@ from __future__ import print_function from bcc import BPF +from bcc.containers import filter_by_containers import argparse from socket import inet_ntop, AF_INET, AF_INET6 from struct import pack @@ -45,7 +46,8 @@ def range_check(string): ./tcptop # trace TCP send/recv by host ./tcptop -C # don't clear the screen ./tcptop -p 181 # only trace PID 181 - ./tcptop --cgroupmap ./mappath # only trace cgroups in this BPF map + ./tcptop --cgroupmap mappath # only trace cgroups in this BPF map + ./tcptop --mntnsmap mappath # only trace mount namespaces in the map """ parser = argparse.ArgumentParser( description="Summarize TCP send/recv throughput by host", @@ -63,6 +65,8 @@ def range_check(string): help="number of outputs") parser.add_argument("--cgroupmap", help="trace cgroups in this BPF map only") +parser.add_argument("--mntnsmap", + help="trace mount namespaces in this BPF map only") parser.add_argument("--ebpf", action="store_true", help=argparse.SUPPRESS) args = parser.parse_args() @@ -98,21 +102,16 @@ def range_check(string): BPF_HASH(ipv6_send_bytes, struct ipv6_key_t); BPF_HASH(ipv6_recv_bytes, struct ipv6_key_t); -#if CGROUPSET -BPF_TABLE_PINNED("hash", u64, u64, cgroupset, 1024, "CGROUPPATH"); -#endif - int kprobe__tcp_sendmsg(struct pt_regs *ctx, struct sock *sk, struct msghdr *msg, size_t size) { - u32 pid = bpf_get_current_pid_tgid() >> 32; - FILTER -#if CGROUPSET - u64 cgroupid = bpf_get_current_cgroup_id(); - if (cgroupset.lookup(&cgroupid) == NULL) { + if (container_should_be_filtered()) { return 0; } -#endif + + u32 pid = bpf_get_current_pid_tgid() >> 32; + FILTER_PID + u16 dport = 0, family = sk->__sk_common.skc_family; if (family == AF_INET) { @@ -148,14 +147,13 @@ def range_check(string): */ int kprobe__tcp_cleanup_rbuf(struct pt_regs *ctx, struct sock *sk, int copied) { - u32 pid = bpf_get_current_pid_tgid() >> 32; - FILTER -#if CGROUPSET - u64 cgroupid = bpf_get_current_cgroup_id(); - if (cgroupset.lookup(&cgroupid) == NULL) { + if (container_should_be_filtered()) { return 0; } -#endif + + u32 pid = bpf_get_current_pid_tgid() >> 32; + FILTER_PID + u16 dport = 0, family = sk->__sk_common.skc_family; u64 *val, zero = 0; @@ -190,15 +188,11 @@ def range_check(string): # code substitutions if args.pid: - bpf_text = bpf_text.replace('FILTER', + bpf_text = bpf_text.replace('FILTER_PID', 'if (pid != %s) { return 0; }' % args.pid) else: - bpf_text = bpf_text.replace('FILTER', '') -if args.cgroupmap: - bpf_text = bpf_text.replace('CGROUPSET', '1') - bpf_text = bpf_text.replace('CGROUPPATH', args.cgroupmap) -else: - bpf_text = bpf_text.replace('CGROUPSET', '0') + bpf_text = bpf_text.replace('FILTER_PID', '') +bpf_text = filter_by_containers(args) + bpf_text if debug or args.ebpf: print(bpf_text) if args.ebpf: diff --git a/tools/tcptop_example.txt b/tools/tcptop_example.txt index 379aff2086b..e29e2fa2e03 100644 --- a/tools/tcptop_example.txt +++ b/tools/tcptop_example.txt @@ -97,13 +97,14 @@ with an externally created map. # tcptop --cgroupmap /sys/fs/bpf/test01 -For more details, see docs/filtering_by_cgroups.md +For more details, see docs/special_filtering.md USAGE: # tcptop -h usage: tcptop.py [-h] [-C] [-S] [-p PID] [--cgroupmap CGROUPMAP] + [--mntnsmap MNTNSMAP] [interval] [count] Summarize TCP send/recv throughput by host @@ -125,3 +126,4 @@ examples: ./tcptop -C # don't clear the screen ./tcptop -p 181 # only trace PID 181 ./tcptop --cgroupmap ./mappath # only trace cgroups in this BPF map + ./tcptop --mntnsmap mappath # only trace mount namespaces in the map diff --git a/tools/tcptracer.py b/tools/tcptracer.py index 8e6e1ec2b03..7f67d33816d 100755 --- a/tools/tcptracer.py +++ b/tools/tcptracer.py @@ -16,6 +16,7 @@ # Licensed under the Apache License, Version 2.0 (the "License") from __future__ import print_function from bcc import BPF +from bcc.containers import filter_by_containers import argparse as ap from socket import inet_ntop, AF_INET, AF_INET6 @@ -31,6 +32,8 @@ help="trace this Network Namespace only") parser.add_argument("--cgroupmap", help="trace cgroups in this BPF map only") +parser.add_argument("--mntnsmap", + help="trace mount namespaces in this BPF map only") parser.add_argument("-v", "--verbose", action="store_true", help="include Network Namespace in the output") parser.add_argument("--ebpf", action="store_true", @@ -79,10 +82,6 @@ }; BPF_PERF_OUTPUT(tcp_ipv6_event); -#if CGROUPSET -BPF_TABLE_PINNED("hash", u64, u64, cgroupset, 1024, "CGROUPPATH"); -#endif - // tcp_set_state doesn't run in the context of the process that initiated the // connection so we need to store a map TUPLE -> PID to send the right PID on // the event @@ -179,12 +178,9 @@ int trace_connect_v4_entry(struct pt_regs *ctx, struct sock *sk) { -#if CGROUPSET - u64 cgroupid = bpf_get_current_cgroup_id(); - if (cgroupset.lookup(&cgroupid) == NULL) { - return 0; + if (container_should_be_filtered()) { + return 0; } -#endif u64 pid = bpf_get_current_pid_tgid(); @@ -233,12 +229,9 @@ int trace_connect_v6_entry(struct pt_regs *ctx, struct sock *sk) { -#if CGROUPSET - u64 cgroupid = bpf_get_current_cgroup_id(); - if (cgroupset.lookup(&cgroupid) == NULL) { - return 0; + if (container_should_be_filtered()) { + return 0; } -#endif u64 pid = bpf_get_current_pid_tgid(); ##FILTER_PID## @@ -371,12 +364,9 @@ int trace_close_entry(struct pt_regs *ctx, struct sock *skp) { -#if CGROUPSET - u64 cgroupid = bpf_get_current_cgroup_id(); - if (cgroupset.lookup(&cgroupid) == NULL) { - return 0; + if (container_should_be_filtered()) { + return 0; } -#endif u64 pid = bpf_get_current_pid_tgid(); @@ -439,12 +429,9 @@ int trace_accept_return(struct pt_regs *ctx) { -#if CGROUPSET - u64 cgroupid = bpf_get_current_cgroup_id(); - if (cgroupset.lookup(&cgroupid) == NULL) { - return 0; + if (container_should_be_filtered()) { + return 0; } -#endif struct sock *newsk = (struct sock *)PT_REGS_RC(ctx); u64 pid = bpf_get_current_pid_tgid(); @@ -614,11 +601,7 @@ def print_ipv6_event(cpu, data, size): bpf_text = bpf_text.replace('##FILTER_PID##', pid_filter) bpf_text = bpf_text.replace('##FILTER_NETNS##', netns_filter) -if args.cgroupmap: - bpf_text = bpf_text.replace('CGROUPSET', '1') - bpf_text = bpf_text.replace('CGROUPPATH', args.cgroupmap) -else: - bpf_text = bpf_text.replace('CGROUPSET', '0') +bpf_text = filter_by_containers(args) + bpf_text if args.ebpf: print(bpf_text) diff --git a/tools/tcptracer_example.txt b/tools/tcptracer_example.txt index b6e52589dce..0f61ecc7bd7 100644 --- a/tools/tcptracer_example.txt +++ b/tools/tcptracer_example.txt @@ -42,4 +42,4 @@ with an externally created map. # ./tcptracer --cgroupmap /sys/fs/bpf/test01 -For more details, see docs/filtering_by_cgroups.md +For more details, see docs/special_filtering.md