Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Attempt to add arm64 support #151

Merged
merged 6 commits into from
Jan 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 3 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,15 @@ help:

build-unit-test:
cmake -Bbuild -DBPFTIME_ENABLE_UNIT_TESTING=1 -DCMAKE_BUILD_TYPE:STRING=Debug
cmake --build build --config Debug --target bpftime_runtime_tests
cmake --build build --config Debug --target bpftime_runtime_tests bpftime_daemon_tests

unit-test-daemon:
build/daemon/test/bpftime_daemon_tests
./build/daemon/test/bpftime_daemon_tests

unit-test-runtime: ## run catch2 unit tests
make -C runtime/test/bpf && cp runtime/test/bpf/*.bpf.o build/runtime/test/
./build/runtime/unit-test/bpftime_runtime_tests
cd build/runtime/test && ctest -VV
cd build/runtime/test && make && ctest -VV

unit-test: unit-test-daemon unit-test-runtime ## run catch2 unit tests

Expand Down
24 changes: 19 additions & 5 deletions benchmark/test_embed.c
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,22 @@ struct pt_regs {
uint64_t sp;
uint64_t ss;
};
#define PT_REGS_PARM1(x) ((x)->di)
#define PT_REGS_PARM2(x) ((x)->si)
#define PT_REGS_PARM3(x) ((x)->dx)

#elif defined(__aarch64__) || defined(_M_ARM64)
struct pt_regs {
uint64_t regs[31];
uint64_t sp;
uint64_t pc;
uint64_t pstate;
};
#define PT_REGS_PARM1(x) ((x)->regs[0])
#define PT_REGS_PARM2(x) ((x)->regs[1])
#define PT_REGS_PARM3(x) ((x)->regs[2])
#else
#error Only x86_64 is supported
#error "Unsupported architecture"
#endif

struct ebpf_vm *begin_vm = NULL;
Expand Down Expand Up @@ -74,15 +88,15 @@ uint64_t test_func_wrapper(const char *a, int b, uint64_t c)
uint64_t ret;
if (enable_ebpf) {
memset(&regs, 0, sizeof(regs));
regs.di = (uintptr_t)a;
regs.si = b;
regs.dx = c;
PT_REGS_PARM1(&regs) = (uintptr_t)a;
PT_REGS_PARM2(&regs) = b;
PT_REGS_PARM3(&regs) = c;
ebpf_exec(begin_vm, &regs, sizeof(regs), &ret);
}
uint64_t hook_func_ret = __benchmark_test_function3(a, b, c);
if (enable_ebpf) {
memset(&regs, 0, sizeof(regs));
regs.di = hook_func_ret;
PT_REGS_PARM1(&regs) = hook_func_ret;
ebpf_exec(end_vm, &regs, sizeof(regs), &ret);
}
return hook_func_ret;
Expand Down
3 changes: 1 addition & 2 deletions example/malloc/malloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,7 @@ int main(int argc, char **argv)
LIBBPF_OPTS(bpf_uprobe_opts, attach_opts, .func_name = "malloc",
.retprobe = false);
struct bpf_link *attach = bpf_program__attach_uprobe_opts(
skel->progs.do_count, -1, "/lib/x86_64-linux-gnu/libc.so.6", 0,
&attach_opts);
skel->progs.do_count, -1, "libc.so.6", 0, &attach_opts);
if (!attach) {
fprintf(stderr, "Failed to attach BPF skeleton\n");
err = -1;
Expand Down
45 changes: 32 additions & 13 deletions example/tailcall_minimal/tailcall_minimal.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#include "linux/bpf.h"
#include "linux/filter.h"
#include "bpf/bpf.h"
#include <asm/unistd_64.h>
#include <unistd.h>
#include <errno.h>
#include <string.h>
#include <signal.h>
Expand All @@ -17,6 +17,9 @@
#include <stdlib.h>
#include "./.output/tailcall_minimal.skel.h"
#include <inttypes.h>
#include <dlfcn.h>
#include <gnu/lib-names.h>

#define warn(...) fprintf(stderr, __VA_ARGS__)

#define BPF_LD_IMM64_RAW_FULL(DST, SRC, OFF1, OFF2, IMM1, IMM2) \
Expand Down Expand Up @@ -70,6 +73,30 @@
.src_reg = 0, \
.off = 0, \
.imm = IMM })

// https://github.com/torvalds/linux/blob/7ed2632ec7d72e926b9e8bcc9ad1bb0cd37274bf/tools/build/feature/test-bpf.c#L6-L26
#ifndef __NR_bpf
# if defined(__i386__)
# define __NR_bpf 357
# elif defined(__x86_64__)
# define __NR_bpf 321
# elif defined(__aarch64__)
# define __NR_bpf 280
# elif defined(__sparc__)
# define __NR_bpf 349
# elif defined(__s390__)
# define __NR_bpf 351
# elif defined(__mips__) && defined(_ABIO32)
# define __NR_bpf 4355
# elif defined(__mips__) && defined(_ABIN32)
# define __NR_bpf 6319
# elif defined(__mips__) && defined(_ABI64)
# define __NR_bpf 5315
# else
# error __NR_bpf not defined
# endif
#endif

static int libbpf_print_fn(enum libbpf_print_level level, const char *format,
va_list args)
{
Expand All @@ -85,21 +112,13 @@ static void sig_handler(int sig)

static long my_bpf_syscall(long cmd, union bpf_attr *attr, unsigned long size)
{
void* libc_handle = dlopen(LIBC_SO, RTLD_LAZY);
long (*libc_syscall)(long, ...) = dlsym(libc_handle, "syscall");

int attempts = 5;
long fd;
do {
__asm__ volatile("movq %1, %%rax\n"
"movq %2, %%rdi\n"
"movq %3, %%rsi\n"
"movq %4, %%rdx\n"
"movq $0, %%r10\n"
"movq $0, %%r8\n"
"movq $0, %%r9\n"
"syscall\n"
: "=a"(fd)
: "i"((long)__NR_bpf), "m"(cmd), "m"(attr),
"m"(size)
: "memory");
fd = libc_syscall(__NR_bpf, cmd, attr, size);
} while (fd < 0 && fd == -EAGAIN && --attempts > 0);
return fd;
}
Expand Down
2 changes: 1 addition & 1 deletion example/usdt_minimal/victim.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#include <iostream>
#include <ostream>
#include <sys/sdt.h>
#include <sys/sdt.h> // provided by systemtap-sdt-devel package
#include <random>
#include <thread>
using namespace std::chrono_literals;
Expand Down
7 changes: 7 additions & 0 deletions runtime/agent-transformer/text_segment_transformer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ extern "C" void syscall_addr();
static const int NR_syscalls = 512;
static syscall_hooker_func_t call_hook = &call_orig_syscall;

#if defined(__x86_64__)
[[maybe_unused]] void __asm_holder()
{
__asm__(".globl syscall_hooker_asm\n\t"
Expand Down Expand Up @@ -98,6 +99,12 @@ static syscall_hooker_func_t call_hook = &call_orig_syscall;
"syscall\n\t"
"ret\n\t");
}
#elif defined(__aarch64__)
// TODO: implement syscall trace trampoline
#else
#error "Unsupported architecture"
#endif

extern "C" int64_t syscall_hooker_cxx(int64_t sys_nr, int64_t arg1,
int64_t arg2, int64_t arg3, int64_t arg4,
int64_t arg5, int64_t arg6)
Expand Down
28 changes: 28 additions & 0 deletions runtime/src/attach/attach_internal.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,15 @@ struct pt_regs {
uint64_t sp;
uint64_t ss;
};
// https://github.com/torvalds/linux/blob/6613476e225e090cc9aad49be7fa504e290dd33d/tools/lib/bpf/bpf_tracing.h#L79
#define PT_REGS_PARM1(x) ((x)->di)
#define PT_REGS_PARM2(x) ((x)->si)
#define PT_REGS_PARM3(x) ((x)->dx)
#define PT_REGS_PARM4(x) ((x)->cx)
#define PT_REGS_PARM5(x) ((x)->r8)
#define PT_REGS_PARM6(x) ((x)->r9)
#define PT_REGS_RET(x) ((x)->sp)
#define PT_REGS_RC(x) ((x)->ax)

#elif defined(__aarch64__) || defined(_M_ARM64)
struct pt_regs {
Expand All @@ -42,10 +51,29 @@ struct pt_regs {
uint64_t pc;
uint64_t pstate;
};
// https://github.com/torvalds/linux/blob/6613476e225e090cc9aad49be7fa504e290dd33d/tools/lib/bpf/bpf_tracing.h#L217
#define PT_REGS_PARM1(x) ((x)->regs[0])
#define PT_REGS_PARM2(x) ((x)->regs[1])
#define PT_REGS_PARM3(x) ((x)->regs[2])
#define PT_REGS_PARM4(x) ((x)->regs[3])
#define PT_REGS_PARM5(x) ((x)->regs[4])
#define PT_REGS_PARM6(x) ((x)->regs[5])
#define PT_REGS_PARM7(x) ((x)->regs[6])
#define PT_REGS_PARM8(x) ((x)->regs[7])
#define PT_REGS_RET(x) ((x)->regs[30])
#define PT_REGS_RC(x) ((x)->regs[0])

#elif defined(__arm__) || defined(_M_ARM)
// https://github.com/torvalds/linux/blob/6613476e225e090cc9aad49be7fa504e290dd33d/tools/lib/bpf/bpf_tracing.h#L192
struct pt_regs {
uint32_t uregs[18];
};
#define PT_REGS_PARM1(x) ((x)->uregs[0])
#define PT_REGS_PARM2(x) ((x)->uregs[1])
#define PT_REGS_PARM3(x) ((x)->uregs[2])
#define PT_REGS_PARM4(x) ((x)->uregs[3])
#define PT_REGS_RET(x) ((x)->uregs[14])
#define PT_REGS_RC(x) ((x)->uregs[0])
#else
#error "Unsupported architecture"
#endif
Expand Down
2 changes: 1 addition & 1 deletion runtime/src/attach/bpf_attach_ctx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#include "attach/attach_manager/frida_attach_manager.hpp"
#include "bpftime.hpp"
#include "handler/epoll_handler.hpp"
#include <asm/unistd_64.h>
#include <unistd.h>
#include <cerrno>
#include <cstdint>
#include <map>
Expand Down
2 changes: 1 addition & 1 deletion runtime/src/attach/bpf_attach_syscall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#include "attach/attach_manager/frida_attach_manager.hpp"
#include "bpftime.hpp"
#include "handler/epoll_handler.hpp"
#include <asm/unistd_64.h>
#include <unistd.h>
#include <cerrno>
#include <map>
#include <memory>
Expand Down
50 changes: 12 additions & 38 deletions runtime/src/bpf_map/userspace/prog_array.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,34 +11,20 @@
#include <cerrno>
#include <spdlog/spdlog.h>
#include <bpf/libbpf.h>

// Why hand written syscalls? syscall() function was hooked by syscall server,
// direct call to it will lead to a result provided by bpftime. So if we want to
// get things from kernel, we must manually execute `syscall`
#include <dlfcn.h>
#include <gnu/lib-names.h>

#ifndef offsetofend
#define offsetofend(TYPE, FIELD) \
(offsetof(TYPE, FIELD) + sizeof(((TYPE *)0)->FIELD))
#endif
__attribute__((__noinline__, optnone, noinline)) static long
my_bpf_syscall(long cmd, union bpf_attr *attr, unsigned long size)
{
long ret;
__asm__ volatile("movq %1, %%rax\n"
"movq %2, %%rdi\n"
"movq %3, %%rsi\n"
"movq %4, %%rdx\n"
"movq $0, %%r10\n"
"movq $0, %%r8\n"
"movq $0, %%r9\n"
"syscall\n"
"movq %%rax, %0"
: "=g"(ret)
: "i"((long)__NR_bpf), "g"(cmd), "g"(attr), "g"(size)
: "memory", "rdi", "rsi", "rdx", "r10", "r8", "r9",
"rax");
return ret;
}

// syscall() function was hooked by syscall server, direct call to it will lead to
// a result provided by bpftime. So if we want to get things from kernel, we must
// manually execute `syscall` from libc
static void* libc_handle = dlopen(LIBC_SO, RTLD_LAZY);
static auto libc_syscall = reinterpret_cast<decltype(&::syscall)>(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need pay attention to all global variable initializations, since attaching might be executed before the initialization of global variables

dlsym(libc_handle, "syscall"));

static int my_bpf_obj_get_info_by_fd(int bpf_fd, void *info, __u32 *info_len)
{
Expand All @@ -51,7 +37,7 @@ static int my_bpf_obj_get_info_by_fd(int bpf_fd, void *info, __u32 *info_len)
attr.info.info_len = *info_len;
attr.info.info = (uintptr_t)info;

err = my_bpf_syscall(BPF_OBJ_GET_INFO_BY_FD, &attr, attr_sz);
err = libc_syscall(__NR_bpf, BPF_OBJ_GET_INFO_BY_FD, &attr, attr_sz);
if (!err)
*info_len = attr.info.info_len;
return err;
Expand All @@ -63,23 +49,11 @@ my_bpf_syscall_fd(long cmd, union bpf_attr *attr, unsigned long size)
int attempts = 5;
long fd;
do {
__asm__ volatile("movq %1, %%rax\n"
"movq %2, %%rdi\n"
"movq %3, %%rsi\n"
"movq %4, %%rdx\n"
"movq $0, %%r10\n"
"movq $0, %%r8\n"
"movq $0, %%r9\n"
"syscall\n"
"movq %%rax, %0"
: "=g"(fd)
: "i"((long)__NR_bpf), "g"(cmd), "g"(attr),
"g"(size)
: "memory", "rdi", "rsi", "rdx", "r10", "r8",
"r9", "rax");
fd = libc_syscall(__NR_bpf, cmd, attr, size);
} while (fd < 0 && fd == -EAGAIN && --attempts > 0);
return fd;
}

int my_bpf_prog_get_fd_by_id(__u32 id)
{
const size_t attr_sz = offsetofend(union bpf_attr, open_flags);
Expand Down
22 changes: 20 additions & 2 deletions runtime/src/bpf_map/userspace/ringbuf_map.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ enum {
#define READ_ONCE_I(x) (*(volatile int *)&x)
#define WRITE_ONCE_I(x, v) (*(volatile int *)&x) = (v)

#if defined(__x86_64__)
#define barrier() asm volatile("" ::: "memory")
#ifdef __x86_64__
#define smp_store_release_ul(p, v) \
do { \
barrier(); \
Expand All @@ -43,8 +43,26 @@ enum {
___p; \
})

#elif defined(__aarch64__)
// https://github.com/torvalds/linux/blob/master/tools/arch/arm64/include/asm/barrier.h
#define smp_store_release_ul(p, v) \
do { \
asm volatile("stlr %1, %0" : "=Q"(*p) : "r"(v) : "memory"); \
} while (0)
#define smp_load_acquire_ul(p) \
({ \
unsigned long ___p; \
asm volatile("ldar %0, %1" : "=r"(___p) : "Q"(*p) : "memory"); \
___p; \
})
#define smp_load_acquire_i(p) \
({ \
int ___p; \
asm volatile("ldar %w0, %1" : "=r"(___p) : "Q"(*p) : "memory"); \
___p; \
})
#else
#error Only supports x86_64
#error Only supports x86_64 and aarch64
#endif

namespace bpftime
Expand Down
17 changes: 14 additions & 3 deletions runtime/src/handler/perf_event_handler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@
#define READ_ONCE_U64(x) (*(volatile uint64_t *)&x)
#define WRITE_ONCE_U64(x, v) (*(volatile uint64_t *)&x) = (v)

#if defined(__x86_64__)
#define barrier() asm volatile("" ::: "memory")
#ifdef __x86_64__
#define smp_store_release_u64(p, v) \
do { \
barrier(); \
Expand All @@ -34,9 +34,20 @@
barrier(); \
___p; \
})

#elif defined(__aarch64__)
// https://github.com/torvalds/linux/blob/master/tools/arch/arm64/include/asm/barrier.h
#define smp_store_release_u64(p, v) \
do { \
asm volatile("stlr %1, %0" : "=Q"(*p) : "r"(v) : "memory"); \
} while (0)
#define smp_load_acquire_u64(p) \
({ \
uint64_t ___p; \
asm volatile("ldar %0, %1" : "=r"(___p) : "Q"(*p) : "memory"); \
___p; \
})
#else
#error Only supports x86_64
#error Only supports x86_64 and aarch64
#endif

namespace bpftime
Expand Down