Skip to content

Commit

Permalink
simplified example by removing xdp and batch hash map delete
Browse files Browse the repository at this point in the history
Signed-off-by: Simone Magnani <simonemagnani.96@gmail.com>
  • Loading branch information
s41m0n committed Feb 28, 2024
1 parent c0b9cf8 commit 5123133
Show file tree
Hide file tree
Showing 9 changed files with 77 additions and 102 deletions.
4 changes: 2 additions & 2 deletions examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
Like kprobes, but with better performance and usability, for kernels 5.5 and later.
* [tcp_connect](fentry/) - Trace outgoing IPv4 TCP connections.
* [tcp_close](tcprtt/) - Log RTT of IPv4 TCP connections using eBPF CO-RE helpers.
* TC and XDP - Attach a program to a network interface to process incoming (XDP) and outgoing (TC) packets.
* [shared_xdp_tc](./shared_xdp_tc/) - monitor and periodically reset the number of incoming and outgoing packets for each network flow identified with the traditional 5-tuple session identifier (IP addresses, L4 Ports, IP protocol).
* TCx - Attach a program to Linux TC (Traffic Control) to process incoming and outgoing packets.
* [tcx](./tcx/) - monitor the number of incoming and outgoing packets for each network flow identified with the traditional 5-tuple session identifier (IP addresses, L4 Ports, IP protocol).
* XDP - Attach a program to a network interface to process incoming packets.
* [xdp](xdp/) - Print packet counts by IPv4 source address.
* Add your use case(s) here!
Expand Down
Binary file removed examples/shared_xdp_tc/bpf_bpfeb.o
Binary file not shown.
Binary file removed examples/shared_xdp_tc/bpf_bpfel.o
Binary file not shown.
File renamed without changes.
Binary file added examples/tcx/bpf_bpfeb.o
Binary file not shown.
File renamed without changes.
Binary file added examples/tcx/bpf_bpfel.o
Binary file not shown.
94 changes: 31 additions & 63 deletions examples/shared_xdp_tc/main.go → examples/tcx/main.go
Original file line number Diff line number Diff line change
@@ -1,11 +1,7 @@
// This program demonstrates attaching an eBPF program to a network interface
// with XDP (eXpress Data Path). The program parses the IPv4 source address
// from packets and writes the packet count by IP to an LRU hash map.
// The userspace program (Go code in this file) prints the contents
// of the map to stdout every second.
// It is possible to modify the XDP program to drop or redirect packets
// as well -- give it a try!
// This example depends on bpf_link, available in Linux kernel version 5.7 or newer.
// with Linux TC. The program parses the IPv4 source address
// from packets and writes the Ingress and Egress packet count by IP to an Hash map.
// The userspace program (Go code in this file) prints the content of the map to stdout.
package main

import (
Expand All @@ -22,20 +18,14 @@ import (
"github.com/cilium/ebpf/link"
)

// erase content of the map after this iterations
const eraseEvery = 5

// mapping between integer value and L4 protocol string
var (
currIter = 0
protoMap = map[uint8]string{
1: "ICMP",
6: "TCP",
17: "UDP",
}
)
var protoMap = map[uint8]string{
1: "ICMP",
6: "TCP",
17: "UDP",
}

//go:generate go run github.com/cilium/ebpf/cmd/bpf2go bpf xdp_tcx.c -- -I../headers
//go:generate go run github.com/cilium/ebpf/cmd/bpf2go bpf tcx.c -- -I../headers

func main() {
if len(os.Args) < 2 {
Expand All @@ -56,17 +46,18 @@ func main() {
}
defer objs.Close()

// Attach the program to Ingress XDP.
l, err := link.AttachXDP(link.XDPOptions{
// Attach the program to Ingress TC.
l, err := link.AttachTCX(link.TCXOptions{
Interface: iface.Index,
Program: objs.IngressProgFunc,
Attach: ebpf.AttachTCXIngress,
})
if err != nil {
log.Fatalf("could not attach TCx program: %s", err)
}
defer l.Close()

log.Printf("Attached XDP program to INGRESS iface %q (index %d)", iface.Name, iface.Index)
log.Printf("Attached TCx program to INGRESS iface %q (index %d)", iface.Name, iface.Index)

// Attach the program to Egress TC.
l2, err := link.AttachTCX(link.TCXOptions{
Expand All @@ -86,69 +77,46 @@ func main() {
ticker := time.NewTicker(1 * time.Second)
defer ticker.Stop()
for range ticker.C {
handleMapContents(objs.StatsMap)
s, err := formatMapContent(objs.StatsMap)
if err != nil {
log.Printf("Error reading map: %s", err)
continue
}

log.Printf("Map contents:\n%s", s)
}
}

// handleMapContents prints the content of the map into a string.
// For each entry (if any), a row is formatted with the following content:
// <src_addr>:<src_port> <dst_addr>:<dst_port> Proto:<l4_proto> => Ingress:<in_packets> Egress:<eg_packets>
// Every nth calls to this function, the entire content of the Hash map is erased
// (lru map would automatically remove old keys, but can also remove additional keys
// so we use hash map to keep constant behaviour)
func handleMapContents(m *ebpf.Map) {
func formatMapContent(m *ebpf.Map) (string, error) {
var (
sb strings.Builder
key bpfSessionKey
val bpfSessionValue
keys []bpfSessionKey
sb strings.Builder
key bpfSessionKey
val bpfSessionValue
)
currIter++
needsErase := currIter%eraseEvery == 0

if needsErase {
keys = make([]bpfSessionKey, 0)
}

iter := m.Iterate()
for iter.Next(&key, &val) {
sb.WriteString(fmt.Sprintf("\t%s:%d - %s:%d Proto:%s => Ingress:%d Egress:%d\n",
intToIp(key.Saddr), portToLE(key.Sport), intToIp(key.Daddr), portToLE(key.Dport),
sb.WriteString(fmt.Sprintf("\t%15s:%5d - %15s:%5d Proto:%3s => Ingress:%10d Egress:%10d\n",
intToIp(key.Saddr), portToLittleEndian(key.Sport),
intToIp(key.Daddr), portToLittleEndian(key.Dport),
protoMap[key.Proto], val.InCount, val.EgCount))
if needsErase {
keys = append(keys, key)
}
}
if iter.Err() != nil {
log.Printf("Error reading map: %s", iter.Err())
return
}

log.Printf("Map contents:\n%s", sb.String())

if !needsErase {
return
}

n, err := m.BatchDelete(keys, nil)
if err != nil {
log.Printf("Error erasing map: %s", err)
return
}
log.Printf("Successfully Erased Map content (%d elements) at Iteration n. %d\n", n, currIter)
return sb.String(), iter.Err()
}

// intToIp convert an int32 value retrieved from the network
// traffic (big endian) into a netip.Addr
// intToIp convert an int32 value retrieved from the network traffic (big endian) into a netip.Addr
func intToIp(val uint32) netip.Addr {
a4 := [4]byte{}
binary.LittleEndian.PutUint32(a4[:], val)
return netip.AddrFrom4(a4)
}

// portToLE convert a uint16 value retrieved from the network
// traffic (big endian) into a little endian
func portToLE(val uint16) uint16 {
// portToLittleEndian convert a uint16 value retrieved from the network traffic (big endian) into a little endian
func portToLittleEndian(val uint16) uint16 {
p2 := [2]byte{}
binary.LittleEndian.PutUint16(p2[:], val)
return binary.LittleEndian.Uint16(p2[:])
Expand Down
81 changes: 44 additions & 37 deletions examples/shared_xdp_tc/xdp_tcx.c → examples/tcx/tcx.c
Original file line number Diff line number Diff line change
@@ -1,42 +1,39 @@
//go:build ignore

#include "common.h"
#include "bpf_endian.h"

char __license[] SEC("license") = "Dual MIT/GPL";

// Session identifier
struct session_key {
__u32 saddr; // IP source address
__u32 daddr; // IP dest address
__u16 sport; // Source port (if ICMP then 0)
__u16 dport; // Dest port (if ICMP then 0)
__u16 sport; // Source port (set to 0 if ICMP)
__u16 dport; // Dest port (set to 0 if ICMP)
__u8 proto; // Protocol ID
};

// Session value
struct session_value {
__u32 in_count;
__u32 eg_count;
__u32 in_count; // Ingress packet count
__u32 eg_count; // Egress packet count
};

#define MAX_MAP_ENTRIES 16

/*
Define an Hash map for storing packet Ingress and Egress count by 5-tuple session identifier
User-space logic is responsible for cleaning the map, if potentially new entries needs to be monitored.
*/
// Define an Hash map for storing packet Ingress and Egress count by 5-tuple session identifier
// User-space logic is responsible for cleaning the map, if potentially new entries needs to be monitored.
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(max_entries, MAX_MAP_ENTRIES);
__type(key, struct session_key);
__type(value, struct session_value);
} stats_map SEC(".maps");

/*
Attempt to parse the 5-tuple session identifierfrom the packet.
Returns 0 if there is no IPv4 header field or if L4 is not a UDP, TCP or ICMP packet; otherwise returns non-zero.
*/
static __always_inline int parse_session_identifier(void *data, void *data_end, struct session_key *key, __u8 is_ingress) {
// Attempt to parse the 5-tuple session identifier from the packet.
// Returns 0 if there is no IPv4 header field or if L4 is not a UDP, TCP or ICMP packet; otherwise returns non-zero.
static __always_inline int parse_session_identifier(void *data, void *data_end, struct session_key *key) {
// First, parse the ethernet header.
struct ethhdr *eth = data;
if ((void *)(eth + 1) > data_end) {
Expand Down Expand Up @@ -89,31 +86,41 @@ static __always_inline int parse_session_identifier(void *data, void *data_end,
key->saddr = (__u32)(ip->saddr);
key->daddr = (__u32)(ip->daddr);

// In case the function is called from Egress hook, swap IP addresses and L4 ports before
// doing the map lookup
if (!is_ingress) {
__u32 tmp = key->saddr;
key->saddr = key->daddr;
key->daddr = tmp;
__u16 tmp2 = key->sport;
key->sport = key->dport;
key->dport = tmp2;
}
return 1;
}

/*
Main program logic shared by either XDP and TC hook. The function attempts to update the entry
in the LRU map corresponding to the 5-tuple identifier; it increases either the ingress or egress
packet counter value. In case of a non IP, TCP, UDP, ICMP packet, the program ignores the packet.
*/
static __always_inline int prog_logic(void *data, void *data_end, __u8 is_ingress, int ret_code) {
// Function to swap addresses and ports from a session identifier, used when parsing packets in the Egress hook.
// This is done to align the session identifiers between Ingress and Egress, so that they point to the same
// entry in the Hash map.
static __always_inline void swap_key(struct session_key *key) {
__u32 tmp = key->saddr;
__u16 tmp2 = key->sport;

key->saddr = key->daddr;
key->sport = key->dport;
key->daddr = tmp;
key->dport = tmp2;

return;
}

// Main program logic shared by Ingress and Egress TC hooks. The function attempts to update the entry
// in the Hash map corresponding to the 5-tuple identifier; it increases either the ingress or egress
// packet counter value. In case of a non IP, TCP, UDP, ICMP packet, the program ignores the packet.
// This function would work also with data and data_end retrieved from a xdp_md structure, and XDP_PASS as return type.
static __always_inline int prog_func(void *data, void *data_end, __u8 is_ingress, int ret_code) {
struct session_key key = {};
if (!parse_session_identifier(data, data_end, &key, is_ingress)) {
if (!parse_session_identifier(data, data_end, &key)) {
// Not an IPv4 packet, so don't count it.
goto done;
}

// In case the function is called from Egress hook, swap addresses
// and L4 port before doing the map lookup
if (!is_ingress) {
swap_key(&key);
}

struct session_value *val = bpf_map_lookup_elem(&stats_map, &key);
if (!val) {
// No entry in the map for this 5-tuple identifier yet, so set the initial value to 1.
Expand All @@ -132,18 +139,18 @@ static __always_inline int prog_logic(void *data, void *data_end, __u8 is_ingres
}

done:
// Return code corresponds to the OK action within either XDP or TC
// Return code corresponds to the PASS action in TC
return ret_code;
}

// XDP Ingress hook
SEC("xdp")
int ingress_prog_func(struct xdp_md *ctx) {
return prog_logic((void *)(long)ctx->data, (void *)(long)ctx->data_end, 0, XDP_PASS);
// TC Ingress hook
SEC("tc")
int ingress_prog_func(struct __sk_buff *skb) {
return prog_func((void *)(long)skb->data, (void *)(long)skb->data_end, 1, TC_ACT_OK);
}

// TC Egress hook
SEC("tc")
int egress_prog_func(struct __sk_buff *ctx) {
return prog_logic((void *)(long)ctx->data, (void *)(long)ctx->data_end, 1, TC_ACT_OK);
int egress_prog_func(struct __sk_buff *skb) {
return prog_func((void *)(long)skb->data, (void *)(long)skb->data_end, 0, TC_ACT_OK);
}

0 comments on commit 5123133

Please sign in to comment.