Skip to content

Commit

Permalink
Support tracing XDP
Browse files Browse the repository at this point in the history
Add an option --filter-trace-xdp to trace all XDP progs on host by
fentry-ing on the progs, like the way tracing tc-bpf
Asphaltt@2347755.

The diff from tracing tc-bpf:

1. Not support to filter mark.
2. No mark in meta output.
3. No proto in meta output.
4. Not support --output-skb.

Signed-off-by: Leon Hwang <hffilwlqm@gmail.com>
  • Loading branch information
Asphaltt committed Mar 4, 2024
1 parent 9ea2ba5 commit 8bbc13b
Show file tree
Hide file tree
Showing 7 changed files with 355 additions and 48 deletions.
99 changes: 99 additions & 0 deletions bpf/headers/bpf/bpf_endian.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
#ifndef __BPF_ENDIAN__
#define __BPF_ENDIAN__

/*
* Isolate byte #n and put it into byte #m, for __u##b type.
* E.g., moving byte #6 (nnnnnnnn) into byte #1 (mmmmmmmm) for __u64:
* 1) xxxxxxxx nnnnnnnn xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx mmmmmmmm xxxxxxxx
* 2) nnnnnnnn xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx mmmmmmmm xxxxxxxx 00000000
* 3) 00000000 00000000 00000000 00000000 00000000 00000000 00000000 nnnnnnnn
* 4) 00000000 00000000 00000000 00000000 00000000 00000000 nnnnnnnn 00000000
*/
#define ___bpf_mvb(x, b, n, m) ((__u##b)(x) << (b-(n+1)*8) >> (b-8) << (m*8))

#define ___bpf_swab16(x) ((__u16)( \
___bpf_mvb(x, 16, 0, 1) | \
___bpf_mvb(x, 16, 1, 0)))

#define ___bpf_swab32(x) ((__u32)( \
___bpf_mvb(x, 32, 0, 3) | \
___bpf_mvb(x, 32, 1, 2) | \
___bpf_mvb(x, 32, 2, 1) | \
___bpf_mvb(x, 32, 3, 0)))

#define ___bpf_swab64(x) ((__u64)( \
___bpf_mvb(x, 64, 0, 7) | \
___bpf_mvb(x, 64, 1, 6) | \
___bpf_mvb(x, 64, 2, 5) | \
___bpf_mvb(x, 64, 3, 4) | \
___bpf_mvb(x, 64, 4, 3) | \
___bpf_mvb(x, 64, 5, 2) | \
___bpf_mvb(x, 64, 6, 1) | \
___bpf_mvb(x, 64, 7, 0)))

/* LLVM's BPF target selects the endianness of the CPU
* it compiles on, or the user specifies (bpfel/bpfeb),
* respectively. The used __BYTE_ORDER__ is defined by
* the compiler, we cannot rely on __BYTE_ORDER from
* libc headers, since it doesn't reflect the actual
* requested byte order.
*
* Note, LLVM's BPF target has different __builtin_bswapX()
* semantics. It does map to BPF_ALU | BPF_END | BPF_TO_BE
* in bpfel and bpfeb case, which means below, that we map
* to cpu_to_be16(). We could use it unconditionally in BPF
* case, but better not rely on it, so that this header here
* can be used from application and BPF program side, which
* use different targets.
*/
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
# define __bpf_ntohs(x) __builtin_bswap16(x)
# define __bpf_htons(x) __builtin_bswap16(x)
# define __bpf_constant_ntohs(x) ___bpf_swab16(x)
# define __bpf_constant_htons(x) ___bpf_swab16(x)
# define __bpf_ntohl(x) __builtin_bswap32(x)
# define __bpf_htonl(x) __builtin_bswap32(x)
# define __bpf_constant_ntohl(x) ___bpf_swab32(x)
# define __bpf_constant_htonl(x) ___bpf_swab32(x)
# define __bpf_be64_to_cpu(x) __builtin_bswap64(x)
# define __bpf_cpu_to_be64(x) __builtin_bswap64(x)
# define __bpf_constant_be64_to_cpu(x) ___bpf_swab64(x)
# define __bpf_constant_cpu_to_be64(x) ___bpf_swab64(x)
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
# define __bpf_ntohs(x) (x)
# define __bpf_htons(x) (x)
# define __bpf_constant_ntohs(x) (x)
# define __bpf_constant_htons(x) (x)
# define __bpf_ntohl(x) (x)
# define __bpf_htonl(x) (x)
# define __bpf_constant_ntohl(x) (x)
# define __bpf_constant_htonl(x) (x)
# define __bpf_be64_to_cpu(x) (x)
# define __bpf_cpu_to_be64(x) (x)
# define __bpf_constant_be64_to_cpu(x) (x)
# define __bpf_constant_cpu_to_be64(x) (x)
#else
# error "Fix your compiler's __BYTE_ORDER__?!"
#endif

#define bpf_htons(x) \
(__builtin_constant_p(x) ? \
__bpf_constant_htons(x) : __bpf_htons(x))
#define bpf_ntohs(x) \
(__builtin_constant_p(x) ? \
__bpf_constant_ntohs(x) : __bpf_ntohs(x))
#define bpf_htonl(x) \
(__builtin_constant_p(x) ? \
__bpf_constant_htonl(x) : __bpf_htonl(x))
#define bpf_ntohl(x) \
(__builtin_constant_p(x) ? \
__bpf_constant_ntohl(x) : __bpf_ntohl(x))
#define bpf_cpu_to_be64(x) \
(__builtin_constant_p(x) ? \
__bpf_constant_cpu_to_be64(x) : __bpf_cpu_to_be64(x))
#define bpf_be64_to_cpu(x) \
(__builtin_constant_p(x) ? \
__bpf_constant_be64_to_cpu(x) : __bpf_be64_to_cpu(x))

#endif /* __BPF_ENDIAN__ */
140 changes: 128 additions & 12 deletions bpf/kprobe_pwru.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,14 @@
#include "bpf/bpf_helpers.h"
#include "bpf/bpf_core_read.h"
#include "bpf/bpf_tracing.h"
#include "bpf/bpf_endian.h"
#include "bpf/bpf_ipv6.h"

#define PRINT_SKB_STR_SIZE 2048

#define ETH_P_IP 0x800
#define ETH_P_IPV6 0x86dd
#define ETH_P_8021Q 0x8100

const static bool TRUE = true;

Expand Down Expand Up @@ -47,6 +49,12 @@ struct tuple {

u64 print_skb_id = 0;

enum event_type {
EVENT_TYPE_KPROBE = 0,
EVENT_TYPE_TC = 1,
EVENT_TYPE_XDP = 2,
};

struct event_t {
u32 pid;
u32 type;
Expand Down Expand Up @@ -190,24 +198,19 @@ set_meta(struct sk_buff *skb, struct skb_meta *meta) {
}

static __always_inline void
set_tuple(struct sk_buff *skb, struct tuple *tpl) {
void *skb_head = BPF_CORE_READ(skb, head);
u16 l3_off = BPF_CORE_READ(skb, network_header);
__set_tuple(struct tuple *tpl, void *data, u16 l3_off, bool is_ipv4) {
u16 l4_off;

struct iphdr *l3_hdr = (struct iphdr *) (skb_head + l3_off);
u8 ip_vsn = BPF_CORE_READ_BITFIELD_PROBED(l3_hdr, version);

if (ip_vsn == 4) {
struct iphdr *ip4 = (struct iphdr *) l3_hdr;
if (is_ipv4) {
struct iphdr *ip4 = (struct iphdr *) (data + l3_off);
BPF_CORE_READ_INTO(&tpl->saddr, ip4, saddr);
BPF_CORE_READ_INTO(&tpl->daddr, ip4, daddr);
tpl->l4_proto = BPF_CORE_READ(ip4, protocol);
tpl->l3_proto = ETH_P_IP;
l4_off = l3_off + BPF_CORE_READ_BITFIELD_PROBED(ip4, ihl) * 4;

} else if (ip_vsn == 6) {
struct ipv6hdr *ip6 = (struct ipv6hdr *) l3_hdr;
} else {
struct ipv6hdr *ip6 = (struct ipv6hdr *) (data + l3_off);
BPF_CORE_READ_INTO(&tpl->saddr, ip6, saddr);
BPF_CORE_READ_INTO(&tpl->daddr, ip6, daddr);
tpl->l4_proto = BPF_CORE_READ(ip6, nexthdr); // TODO: ipv6 l4 protocol
Expand All @@ -216,16 +219,31 @@ set_tuple(struct sk_buff *skb, struct tuple *tpl) {
}

if (tpl->l4_proto == IPPROTO_TCP) {
struct tcphdr *tcp = (struct tcphdr *) (skb_head + l4_off);
struct tcphdr *tcp = (struct tcphdr *) (data + l4_off);
tpl->sport= BPF_CORE_READ(tcp, source);
tpl->dport= BPF_CORE_READ(tcp, dest);
} else if (tpl->l4_proto == IPPROTO_UDP) {
struct udphdr *udp = (struct udphdr *) (skb_head + l4_off);
struct udphdr *udp = (struct udphdr *) (data + l4_off);
tpl->sport= BPF_CORE_READ(udp, source);
tpl->dport= BPF_CORE_READ(udp, dest);
}
}

static __always_inline void
set_tuple(struct sk_buff *skb, struct tuple *tpl) {
void *skb_head = BPF_CORE_READ(skb, head);
u16 l3_off = BPF_CORE_READ(skb, network_header);

struct iphdr *l3_hdr = (struct iphdr *) (skb_head + l3_off);
u8 ip_vsn = BPF_CORE_READ_BITFIELD_PROBED(l3_hdr, version);

if (ip_vsn !=4 && ip_vsn != 6)
return;

bool is_ipv4 = ip_vsn == 4;
__set_tuple(tpl, skb_head, l3_off, is_ipv4);
}

static __always_inline void
set_skb_btf(struct sk_buff *skb, typeof(print_skb_id) *event_id) {
#ifdef OUTPUT_SKB
Expand Down Expand Up @@ -375,6 +393,104 @@ int BPF_PROG(fentry_tc, struct sk_buff *skb) {

event.skb_addr = (u64) skb;
event.addr = BPF_PROG_ADDR;
event.type = EVENT_TYPE_TC;
bpf_map_push_elem(&events, &event, BPF_EXIST);

return BPF_OK;
}

static __always_inline bool
filter_xdp_netns(struct xdp_buff *xdp) {
if (cfg->netns && BPF_CORE_READ(xdp, rxq, dev, nd_net.net, ns.inum) != cfg->netns)
return false;

return true;
}

static __always_inline bool
filter_xdp_ifindex(struct xdp_buff *xdp) {
if (cfg->ifindex && BPF_CORE_READ(xdp, rxq, dev, ifindex) != cfg->ifindex)
return false;

return true;
}

static __always_inline bool
filter_xdp_meta(struct xdp_buff *xdp) {
return filter_xdp_netns(xdp) && filter_xdp_ifindex(xdp);
}

static __always_inline bool
filter_xdp_pcap(struct xdp_buff *xdp) {
void *data = (void *)(long) BPF_CORE_READ(xdp, data);
void *data_end = (void *)(long) BPF_CORE_READ(xdp, data_end);
return filter_pcap_ebpf_l2((void *)xdp, (void *)xdp, (void *)xdp, data, data_end);
}

static __always_inline bool
filter_xdp(struct xdp_buff *xdp) {
return filter_xdp_pcap(xdp) && filter_xdp_meta(xdp);
}

static __always_inline void
set_xdp_meta(struct xdp_buff *xdp, struct skb_meta *meta) {
struct net_device *dev = BPF_CORE_READ(xdp, rxq, dev);
meta->netns = BPF_CORE_READ(dev, nd_net.net, ns.inum);
meta->ifindex = BPF_CORE_READ(dev, ifindex);
meta->mtu = BPF_CORE_READ(dev, mtu);
meta->len = BPF_CORE_READ(xdp, data_end) - BPF_CORE_READ(xdp, data);
}

static __always_inline void
set_xdp_tuple(struct xdp_buff *xdp, struct tuple *tpl) {
void *data = (void *)(long) BPF_CORE_READ(xdp, data);
void *data_end = (void *)(long) BPF_CORE_READ(xdp, data_end);
struct ethhdr *eth = (struct ethhdr *) data;
u16 l3_off = sizeof(*eth);
u16 l4_off;

__be16 proto = BPF_CORE_READ(eth, h_proto);
if (proto == bpf_htons(ETH_P_8021Q)) {
struct vlan_hdr *vlan = (struct vlan_hdr *) (eth + 1);
proto = BPF_CORE_READ(vlan, h_vlan_encapsulated_proto);
l3_off += sizeof(*vlan);
}
if (proto != bpf_htons(ETH_P_IP) && proto != bpf_htons(ETH_P_IPV6))
return;

bool is_ipv4 = proto == bpf_htons(ETH_P_IP);
__set_tuple(tpl, data, l3_off, is_ipv4);
}

static __always_inline void
set_xdp_output(void *ctx, struct xdp_buff *xdp, struct event_t *event) {
if (cfg->output_meta)
set_xdp_meta(xdp, &event->meta);

if (cfg->output_tuple)
set_xdp_tuple(xdp, &event->tuple);

if (cfg->output_stack)
event->print_stack_id = bpf_get_stackid(ctx, &print_stack_map, BPF_F_FAST_STACK_CMP);
}

SEC("fentry/xdp")
int BPF_PROG(fentry_xdp, struct xdp_buff *xdp) {
struct event_t event = {};

if (cfg->is_set) {
if (!filter_xdp(xdp))
return BPF_OK;

set_xdp_output(ctx, xdp, &event);
}

event.pid = bpf_get_current_pid_tgid() >> 32;
event.ts = bpf_ktime_get_ns();
event.cpu_id = bpf_get_smp_processor_id();
event.skb_addr = (u64) xdp;
event.addr = BPF_PROG_ADDR;
event.type = EVENT_TYPE_XDP;
bpf_map_push_elem(&events, &event, BPF_EXIST);

return BPF_OK;
Expand Down
4 changes: 4 additions & 0 deletions internal/libpcap/inject.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ import (
"github.com/cloudflare/cbpfc"
)

func InjectL2Filter(program *ebpf.ProgramSpec, filterExpr string) (err error) {
return injectFilter(program, filterExpr, false)
}

func InjectFilters(program *ebpf.ProgramSpec, filterExpr string) (err error) {
if err = injectFilter(program, filterExpr, false); err != nil {
return
Expand Down
15 changes: 15 additions & 0 deletions internal/pwru/output.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,12 @@ import (

const absoluteTS string = "15:04:05.000"

const (
eventTypeKprobe = 0
eventTypeTracingTc = 1
eventTypeTracingXdp = 2
)

type output struct {
flags *Flags
lastSeenSkb map[uint64]uint64 // skb addr => last seen TS
Expand Down Expand Up @@ -137,6 +143,15 @@ func (o *output) Print(event *Event) {
funcName = fmt.Sprintf("0x%x", addr)
}

if event.Type != eventTypeKprobe {
switch event.Type {
case eventTypeTracingTc:
funcName += "(tc)"
case eventTypeTracingXdp:
funcName += "(xdp)"
}
}

outFuncName := funcName
if funcName == "kfree_skb_reason" {
if reason, ok := o.kfreeReasons[event.ParamSecond]; ok {
Expand Down
Loading

0 comments on commit 8bbc13b

Please sign in to comment.