Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Use cgroup_skb programs for payload inspection #226

Merged
merged 7 commits into from Dec 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
16 changes: 11 additions & 5 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Expand Up @@ -88,7 +88,7 @@ rules-engine = { path = "crates/modules/rules-engine" }
smtp-notifier = { path = "crates/modules/smtp-notifier" }
# External
anyhow = "1"
aya = { git = "https://github.com/aya-rs/aya", rev = "761e4ddbe3abf8b9177ebd6984465fe66696728a", features = ["async_tokio"] }
aya = { git = "https://github.com/aya-rs/aya", rev = "33b2e45ad313f7468a7d11667f13d9c365e2a263", features = ["async_tokio"] }
axum = { version = "0.6.20", features = ["ws"] }
bytes = "1.3.0"
cgroups-rs = { version = "0.3.2" }
Expand Down
27 changes: 27 additions & 0 deletions crates/bpf-builder/include/buffer.bpf.h
Expand Up @@ -111,3 +111,30 @@ static void buffer_append_user_memory(struct buffer *buffer,
index->len += len;
buffer->len += len;
}

static void buffer_append_skb_bytes(struct buffer *buffer,
struct buffer_index *index,
struct __sk_buff *skb,
__u32 offset, int len) {
int pos = (index->start + index->len);
if (pos >= HALF_BUFFER_MASK) {
LOG_ERROR("trying to write over half: %d+%d", index->start, index->len);
return;
}
if (len > HALF_BUFFER_MASK) {
len = HALF_BUFFER_MASK;
} else {
// include space for terminating 0
len = (len + 1) & HALF_BUFFER_MASK;
}

int r = bpf_skb_load_bytes(skb, offset, &((char*)buffer->buffer)[pos],
len & HALF_BUFFER_MASK);
if (r < 0) {
LOG_ERROR("reading failure: %d", r);
return;
}

index->len += len;
buffer->len += len;
}
36 changes: 36 additions & 0 deletions crates/bpf-builder/include/network.bpf.h
@@ -0,0 +1,36 @@
/* SPDX-License-Identifier: GPL-2.0-only */

#pragma once

// Return statuses of cgroup_skb programs.

// Accept the packet in the cgroup_skb program.
#define CGROUP_SKB_OK 1
// Deny the packet in the cgroup_skb program.
#define CGROUP_SKB_SHOT 0

// EtherTypes (indicating which protocol is encapsulated in the payload of the
// Ethernet frame).

// IPv4 EtherType.
#define ETH_P_IPV4 0x0800
// IPv6 EtherType.
#define ETH_P_IPV6 0x86DD

// Protocols encapsulated in the IP(v4/v6) payload.

// TCP protocol.
#define PROTO_TCP 0
// UDP protocol.
#define PROTO_UDP 1

// Address families

// Unix domain sockets.
#define AF_UNIX 1
// POSIX name for AF_UNIX.
#define AF_LOCAL 1
// IPv4 address family.
#define AF_INET 2
// IPv6 address family.
#define AF_INET6 10
1 change: 1 addition & 0 deletions crates/bpf-common/src/parsing/mod.rs
@@ -1,4 +1,5 @@
pub mod containers;
pub mod mountinfo;
pub mod procfs;

mod buffer_index;
Expand Down
145 changes: 145 additions & 0 deletions crates/bpf-common/src/parsing/mountinfo.rs
@@ -0,0 +1,145 @@
use std::{
fs::File,
io::{self, prelude::*, BufReader},
};

use thiserror::Error;

static MOUNTINFO_PATH: &str = "/proc/self/mountinfo";

#[derive(Error, Debug)]
pub enum MountinfoError {
#[error("reading link failed {path}")]
ReadFile {
#[source]
source: io::Error,
path: String,
},
#[error("could not find cgroup2 filesystem mount")]
Cgroup2NotFound,
}

/// Parses information about the mount point of the cgroup v2 hierarchy
/// filesystem from the given buffer reader. The buffer should contain
/// information about mounts provided by the kernel, which is usually available
/// in `/proc/<pid>/mountinfo`.
///
/// The format of the information is described in
/// [the kernel documentation](https://www.kernel.org/doc/Documentation/filesystems/proc.txt).
/// To sum it up, each line contains the following fields:
///
/// ```ignore
/// 36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue
/// (1)(2)(3) (4) (5) (6) (7) (8) (9) (10) (11)
/// ```
///
/// Field 7 is optional, but might occur multiple times. The purpose of
/// separator `-` is to make it clear where the field 9 starts.
///
/// We are interested only in fields 5 (mount point) and 9 (filesystem type).
fn parse_cgroup2_mountpoint<R>(reader: BufReader<R>) -> Result<String, MountinfoError>
where
R: Read,
{
for line in reader.lines().flatten() {
// Mountinfo is separated by `-` into two parts:
//
// * Information about the mount which consist of at least 6 fields,
// but can contain unknown number of optional fields. The `-`
// separator is used due to this uncertainity.
// * Information about the filesystem.
let mut halves = line.splitn(2, '-').map(String::from);
let mount_info = match halves.next() {
Some(mount_info) => mount_info,
None => continue,
};
let filesystem_info = match halves.next() {
Some(filesystem_info) => filesystem_info,
None => continue,
};

let mount_parts: Vec<&str> = mount_info.split_whitespace().collect();
let filesystem_parts: Vec<&str> = filesystem_info.split_whitespace().collect();

// We are interested in:
//
// * The 1st field of filesystem information (filesystem type).
// * The 4th field of mount information (mount point).
let filesystem_type = match filesystem_parts.first() {
Some(filesystem_type) => *filesystem_type,
None => continue,
};
// If the filesystem type is `cgroup2`, return the mount point.
// Otherwise, keep searching.
if filesystem_type == "cgroup2" {
let mountpoint = match mount_parts.get(4) {
Some(mountpoint) => *mountpoint,
None => continue,
};
return Ok(mountpoint.to_owned());
}
}

Err(MountinfoError::Cgroup2NotFound)
}

/// Returns the mount point of the cgroup v2 hierarchy filesystem.
///
/// On the most of Linux distributions, it returns either `/sys/fs/cgroup` or
/// `/sys/fs/cgroup/unified`.
pub fn get_cgroup2_mountpoint() -> Result<String, MountinfoError> {
let file = File::open(MOUNTINFO_PATH).map_err(|source| MountinfoError::ReadFile {
source,
path: MOUNTINFO_PATH.to_owned(),
})?;

let reader = BufReader::new(file);
parse_cgroup2_mountpoint(reader)
}

#[cfg(test)]
mod test {
use super::*;

#[test]
fn test_get_cgroup2() {
let mountinfo = b"24 31 0:22 / /proc rw,nosuid,nodev,noexec,relatime - proc proc rw
25 31 0:23 / /sys rw,nosuid,nodev,noexec,relatime - sysfs sysfs rw
26 31 0:5 / /dev rw,nosuid,noexec - devtmpfs devtmpfs rw,size=10240k,nr_inodes=8117323,mode=755,inode64
27 26 0:24 / /dev/pts rw,nosuid,noexec,relatime - devpts devpts rw,gid=5,mode=620,ptmxmode=000
28 26 0:25 / /dev/shm rw,nosuid,nodev,noexec - tmpfs tmpfs rw,inode64
29 31 0:26 / /run rw,nosuid,nodev,noexec - tmpfs tmpfs rw,mode=755,inode64
31 1 0:27 / / rw,relatime - btrfs /dev/mapper/luks-316de005-f823-43c4-b6be-058f915d8d02 rw,ssd,space_cache=v2,subvolid=5,subvol=/
30 25 0:6 / /sys/kernel/security rw,nosuid,nodev,noexec,relatime - securityfs securityfs rw
32 25 0:7 / /sys/kernel/debug rw,nosuid,nodev,noexec,relatime - debugfs debugfs rw
33 25 0:29 / /sys/fs/pstore rw,nosuid,nodev,noexec,relatime - pstore pstore rw
34 25 0:30 / /sys/fs/cgroup rw,nosuid,nodev,noexec,relatime - tmpfs cgroup_root rw,size=10240k,mode=755,inode64
35 34 0:31 / /sys/fs/cgroup/openrc rw,nosuid,nodev,noexec,relatime - cgroup openrc rw,release_agent=/lib/rc/sh/cgroup-release-agent.sh,name=openrc
36 34 0:32 / /sys/fs/cgroup/unified rw,nosuid,nodev,noexec,relatime - cgroup2 none rw,nsdelegate
37 34 0:33 / /sys/fs/cgroup/cpuset rw,nosuid,nodev,noexec,relatime - cgroup cpuset rw,cpuset
38 34 0:34 / /sys/fs/cgroup/cpu rw,nosuid,nodev,noexec,relatime - cgroup cpu rw,cpu
39 34 0:35 / /sys/fs/cgroup/cpuacct rw,nosuid,nodev,noexec,relatime - cgroup cpuacct rw,cpuacct
40 34 0:36 / /sys/fs/cgroup/blkio rw,nosuid,nodev,noexec,relatime - cgroup blkio rw,blkio
41 34 0:37 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime - cgroup memory rw,memory
42 34 0:38 / /sys/fs/cgroup/devices rw,nosuid,nodev,noexec,relatime - cgroup devices rw,devices
43 34 0:39 / /sys/fs/cgroup/freezer rw,nosuid,nodev,noexec,relatime - cgroup freezer rw,freezer
44 34 0:40 / /sys/fs/cgroup/net_cls rw,nosuid,nodev,noexec,relatime - cgroup net_cls rw,net_cls
45 34 0:41 / /sys/fs/cgroup/perf_event rw,nosuid,nodev,noexec,relatime - cgroup perf_event rw,perf_event
46 34 0:42 / /sys/fs/cgroup/net_prio rw,nosuid,nodev,noexec,relatime - cgroup net_prio rw,net_prio
47 34 0:43 / /sys/fs/cgroup/hugetlb rw,nosuid,nodev,noexec,relatime - cgroup hugetlb rw,hugetlb
48 34 0:44 / /sys/fs/cgroup/pids rw,nosuid,nodev,noexec,relatime - cgroup pids rw,pids
49 34 0:45 / /sys/fs/cgroup/rdma rw,nosuid,nodev,noexec,relatime - cgroup rdma rw,rdma
50 34 0:46 / /sys/fs/cgroup/misc rw,nosuid,nodev,noexec,relatime - cgroup misc rw,misc
51 26 0:20 / /dev/mqueue rw,nosuid,nodev,noexec,relatime - mqueue mqueue rw
52 24 0:47 / /proc/sys/fs/binfmt_misc rw,nosuid,nodev,noexec,relatime - binfmt_misc binfmt_misc rw
53 31 259:4 / /boot rw,relatime - vfat /dev/nvme1n1p1 rw,fmask=0022,dmask=0022,codepage=437,iocharset=iso8859-1,shortname=mixed,utf8,errors=remount-ro
55 29 0:49 / /run/user/1000 rw,nosuid,nodev,relatime - tmpfs tmpfs rw,size=6498984k,nr_inodes=1624746,mode=700,uid=1000,gid=1000,inode64
54 55 0:48 / /run/user/1000/doc rw,nosuid,nodev,relatime - fuse.portal portal rw,user_id=1000,group_id=1000
62 25 0:69 / /sys/fs/bpf rw,relatime - bpf bpf rw
63 32 0:12 / /sys/kernel/debug/tracing rw,nosuid,nodev,noexec,relatime - tracefs tracefs rw";

let reader = BufReader::new(&mountinfo[..]);
let result = parse_cgroup2_mountpoint(reader).unwrap();
assert_eq!(result, "/sys/fs/cgroup/unified");
}
}
59 changes: 55 additions & 4 deletions crates/bpf-common/src/program.rs
Expand Up @@ -4,24 +4,29 @@
//!
use core::fmt;
use std::{
collections::HashSet, convert::TryFrom, fmt::Display, mem::size_of, path::PathBuf, sync::Arc,
time::Duration,
collections::HashSet, convert::TryFrom, fmt::Display, fs::File, io, mem::size_of,
path::PathBuf, sync::Arc, time::Duration,
};

use aya::{
maps::{
perf::{AsyncPerfEventArray, PerfBufferError},
Array, HashMap, Map, MapData,
},
programs::{KProbe, Lsm, RawTracePoint, TracePoint},
programs::{CgroupSkb, CgroupSkbAttachType, KProbe, Lsm, RawTracePoint, TracePoint},
util::online_cpus,
Bpf, BpfLoader, Btf, BtfError, Pod,
};
use bytes::{Buf, Bytes, BytesMut};
use thiserror::Error;
use tokio::{sync::watch, task::JoinError};

use crate::{feature_autodetect::kernel_version::KernelVersion, time::Timestamp, BpfSender, Pid};
use crate::{
feature_autodetect::kernel_version::KernelVersion,
parsing::mountinfo::{get_cgroup2_mountpoint, MountinfoError},
time::Timestamp,
BpfSender, Pid,
};

const PERF_HEADER_SIZE: usize = 4;
const PINNED_MAPS_PATH: &str = "/sys/fs/bpf/pulsar";
Expand Down Expand Up @@ -184,6 +189,14 @@ pub enum ProgramError {
path: PathBuf,
io_error: Box<std::io::Error>,
},
#[error(transparent)]
MountinfoError(#[from] MountinfoError),
#[error("reading link failed {path}")]
ReadFile {
#[source]
source: io::Error,
path: String,
},
}

pub struct ProgramBuilder {
Expand Down Expand Up @@ -234,6 +247,18 @@ impl ProgramBuilder {
self
}

pub fn cgroup_skb_egress(mut self, name: &str) -> Self {
self.programs
.push(ProgramType::CgroupSkbEgress(name.to_string()));
self
}

pub fn cgroup_skb_ingress(mut self, name: &str) -> Self {
self.programs
.push(ProgramType::CgroupSkbIngress(name.to_string()));
self
}

pub async fn start(self) -> Result<Program, ProgramError> {
// We need to notify background tasks reading from maps that we're shutting down.
// We must use oneshot::Receiver as the main shut down machanism because it has
Expand Down Expand Up @@ -283,6 +308,8 @@ enum ProgramType {
Kprobe(String),
Kretprobe(String),
Lsm(String),
CgroupSkbEgress(String),
CgroupSkbIngress(String),
}

impl Display for ProgramType {
Expand All @@ -295,6 +322,10 @@ impl Display for ProgramType {
ProgramType::Kprobe(kprobe) => write!(f, "kprobe {kprobe}"),
ProgramType::Kretprobe(kretprobe) => write!(f, "kretprobe {kretprobe}"),
ProgramType::Lsm(lsm) => write!(f, "lsm {lsm}"),
ProgramType::CgroupSkbEgress(cgroup_skb) => write!(f, "cgroup_skb/egress {cgroup_skb}"),
ProgramType::CgroupSkbIngress(cgroup_skb) => {
write!(f, "cgroup_skb/ingress {cgroup_skb}")
}
}
}
}
Expand Down Expand Up @@ -330,6 +361,26 @@ impl ProgramType {
program.load(lsm, btf).map_err(load_err)?;
program.attach().map_err(attach_err)?;
}
ProgramType::CgroupSkbEgress(cgroup_skb) => {
let program: &mut CgroupSkb = extract_program(bpf, cgroup_skb)?;
let path = get_cgroup2_mountpoint()?;
let cgroup = File::open(&path)
.map_err(|source| MountinfoError::ReadFile { source, path })?;
program.load().map_err(load_err)?;
program
.attach(cgroup, CgroupSkbAttachType::Egress)
.map_err(attach_err)?;
}
ProgramType::CgroupSkbIngress(cgroup_skb) => {
let program: &mut CgroupSkb = extract_program(bpf, cgroup_skb)?;
let path = get_cgroup2_mountpoint()?;
let cgroup = File::open(&path)
.map_err(|source| MountinfoError::ReadFile { source, path })?;
program.load().map_err(load_err)?;
program
.attach(cgroup, CgroupSkbAttachType::Ingress)
.map_err(attach_err)?;
}
}
Ok(())
}
Expand Down