diff --git a/CHANGELOG.md b/CHANGELOG.md index 4bb73233e397..fede475f4e67 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,6 +34,8 @@ and this project adheres to - [#3060](https://github.com/bpftrace/bpftrace/pull/3060) - Disable func builtin for kretprobes and uretprobes when `get_func_ip` feature is not available - [#2645](https://github.com/bpftrace/bpftrace/pull/2645) +- Attach probe after function prologue and probe inlined functions + - [#3095](https://github.com/bpftrace/bpftrace/pull/3095) #### Deprecated #### Removed #### Fixed diff --git a/man/adoc/bpftrace.adoc b/man/adoc/bpftrace.adoc index 562f1baee574..30f93da11b94 100644 --- a/man/adoc/bpftrace.adoc +++ b/man/adoc/bpftrace.adoc @@ -1102,12 +1102,6 @@ For string arguments use the `str()` call to retrieve the value. | n/a | Value returned by the function being traced (kretprobe, uretprobe, kretfunc) -| `sarg0`, `sarg1`, `...sargn` -| int64 -| n/a -| n/a -| nth stack value of the function being traced (kprobes, uprobes) - | tid | uint64 | 4.2 @@ -1619,7 +1613,9 @@ This buffer can be printed in the canonical string format using the `%s` format ---- kprobe:arp_create { - printf("SRC %s, DST %s\n", macaddr(sarg0), macaddr(sarg1)); + $stack_arg0 = *(uint8*)(reg("sp") + 8); + $stack_arg1 = *(uint8*)(reg("sp") + 16); + printf("SRC %s, DST %s\n", macaddr($stack_arg0), macaddr($stack_arg1)); } /* @@ -2921,7 +2917,7 @@ kprobe:tcp_reset { } ---- -Function arguments are available through the `argN` and `sargN` builtins, for register args and stack args respectively. +Function arguments are available through the `argN` for register args. Arguments passed on stack are available using the stack pointer, e.g. `$stack_arg0 = *(int64*)reg("sp") + 16`. Whether arguments passed on stack or in a register depends on the architecture and the number or arguments used, e.g. on x86_64 the first 6 non-floating point arguments are passed in registers and all following arguments are passed on the stack. Note that floating point arguments are typically passed in special registers which don't count as `argN` arguments which can cause confusion. Consider a function with the following signature: diff --git a/src/ast/passes/semantic_analyser.cpp b/src/ast/passes/semantic_analyser.cpp index 60d8b743e60b..0e005d65d4a0 100644 --- a/src/ast/passes/semantic_analyser.cpp +++ b/src/ast/passes/semantic_analyser.cpp @@ -364,6 +364,11 @@ void SemanticAnalyser::visit(Builtin &builtin) AddrSpace addrspace = find_addrspace(pt); for (auto &attach_point : *probe->attach_points) { ProbeType type = probetype(attach_point->provider); + if (type == ProbeType::uprobe && + bpftrace_.config_.get(ConfigKeyBool::probe_inline)) + LOG(ERROR, builtin.loc, err_) + << "The " + builtin.ident + " builtin can only be used when " + << "the probe_inline config is disabled."; if (type != ProbeType::kprobe && type != ProbeType::uprobe && type != ProbeType::usdt && type != ProbeType::rawtracepoint) LOG(ERROR, builtin.loc, err_) @@ -390,6 +395,11 @@ void SemanticAnalyser::visit(Builtin &builtin) LOG(ERROR, builtin.loc, err_) << "The " + builtin.ident << " builtin can only be used with 'kprobes' and 'uprobes' probes"; + if (type == ProbeType::uprobe && + bpftrace_.config_.get(ConfigKeyBool::probe_inline)) + LOG(ERROR, builtin.loc, err_) + << "The " + builtin.ident + " builtin can only be used when " + << "the probe_inline config is disabled."; if (is_final_pass() && (attach_point->address != 0 || attach_point->func_offset != 0)) { // If sargX values are needed when using an offset, they can be stored @@ -438,6 +448,12 @@ void SemanticAnalyser::visit(Builtin &builtin) "\"probe1,probe2 {args}\" is not."; } else if (type == ProbeType::kfunc || type == ProbeType::kretfunc || type == ProbeType::uprobe) { + if (type == ProbeType::uprobe && + bpftrace_.config_.get(ConfigKeyBool::probe_inline)) + LOG(ERROR, builtin.loc, err_) + << "The args builtin can only be used when " + << "the probe_inline config is disabled."; + auto type_name = probe->args_typename(); builtin.type = CreateRecord(type_name, bpftrace_.structs.Lookup(type_name)); diff --git a/src/attached_probe.cpp b/src/attached_probe.cpp index 537a72a6997d..574ceb38b736 100644 --- a/src/attached_probe.cpp +++ b/src/attached_probe.cpp @@ -379,7 +379,10 @@ static int sym_address_cb(const char *symname, { struct symbol *sym = static_cast(p); - if (sym->address >= start && sym->address < (start + size)) { + // When size is 0, then [start, start + size) = [start, start) = ΓΈ. + // So we need a special case when size=0, but address matches the symbol's + if (sym->address == start || + (sym->address > start && sym->address < (start + size))) { sym->start = start; sym->size = size; sym->name = symname; diff --git a/src/bpftrace.cpp b/src/bpftrace.cpp index 6f7af8037ed7..df83bb29068b 100644 --- a/src/bpftrace.cpp +++ b/src/bpftrace.cpp @@ -278,6 +278,32 @@ int BPFtrace::add_probe(ast::Probe &p) generateWatchpointSetupProbe(match_ap, p)); resources.watchpoint_probes.emplace_back(std::move(probe)); + } else if (probetype(attach_point->provider) == ProbeType::uprobe) { + bool locations_from_dwarf = false; + + // If the user specified an address/offset, do not overwrite + // their choice with locations from the DebugInfo. + if (probe.address == 0 && probe.func_offset == 0) { + // Get function locations from the DebugInfo, as it skips the + // prologue and also returns locations of inlined function calls. + if (auto *dwarf = get_dwarf(probe.path)) { + const auto locations = dwarf->get_function_locations( + probe.attach_point, config_.get(ConfigKeyBool::probe_inline)); + for (const auto loc : locations) { + // Clear the attach point, so the address will be used instead + Probe probe_copy = probe; + probe_copy.attach_point.clear(); + probe_copy.address = loc; + resources.probes.push_back(std::move(probe_copy)); + + locations_from_dwarf = true; + } + } + } + + // Otherwise, use the location from the symbol table. + if (!locations_from_dwarf) + resources.probes.push_back(std::move(probe)); } else { resources.probes.push_back(probe); } diff --git a/src/config.cpp b/src/config.cpp index ee8097e27bd3..dd4aad1af101 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -13,6 +13,7 @@ Config::Config(bool has_cmd, bool bt_verbose) : bt_verbose_(bt_verbose) config_map_ = { { ConfigKeyBool::cpp_demangle, { .value = true } }, { ConfigKeyBool::lazy_symbolication, { .value = false } }, + { ConfigKeyBool::probe_inline, { .value = false } }, { ConfigKeyInt::log_size, { .value = (uint64_t)1000000 } }, { ConfigKeyInt::max_bpf_progs, { .value = (uint64_t)512 } }, { ConfigKeyInt::max_cat_bytes, { .value = (uint64_t)10240 } }, diff --git a/src/config.h b/src/config.h index f6e17874a33b..e94c09224446 100644 --- a/src/config.h +++ b/src/config.h @@ -25,6 +25,7 @@ enum class ConfigSource { enum class ConfigKeyBool { cpp_demangle, lazy_symbolication, + probe_inline, }; enum class ConfigKeyInt { @@ -71,6 +72,7 @@ const std::map CONFIG_KEY_MAP = { { "max_strlen", ConfigKeyInt::max_strlen }, { "max_type_res_iterations", ConfigKeyInt::max_type_res_iterations }, { "perf_rb_pages", ConfigKeyInt::perf_rb_pages }, + { "probe_inline", ConfigKeyBool::probe_inline }, { "stack_mode", ConfigKeyStackMode::default_ }, { "str_trunc_trailer", ConfigKeyString::str_trunc_trailer }, }; diff --git a/src/dwarf_parser.cpp b/src/dwarf_parser.cpp index a0a6540a67b7..7bf368c7e189 100644 --- a/src/dwarf_parser.cpp +++ b/src/dwarf_parser.cpp @@ -50,6 +50,31 @@ std::unique_ptr Dwarf::GetFromBinary(BPFtrace *bpftrace, } } +std::vector Dwarf::get_function_locations(const std::string &function, + bool include_inlined) +{ + // Locating every inlined instances of a function is expensive, + // so we only do it if the user explicitly requests it. + if (!include_inlined) { + auto syms = target_.FindSymbols(function.c_str(), + lldb::SymbolType::eSymbolTypeCode); + // The given function name MUST identify a unique symbol! + if (syms.GetSize() != 1) + return {}; + auto sym = syms.GetContextAtIndex(0).GetSymbol(); + return { sym.GetStartAddress().GetFileAddress() + + sym.GetPrologueByteSize() }; + } else { + auto bps = target_.BreakpointCreateByName(function.c_str()); + std::vector result(bps.GetNumLocations()); + for (uint32_t i = 0; i < bps.GetNumLocations(); i++) { + auto loc = bps.GetLocationAtIndex(i); + result[i] = loc.GetAddress().GetFileAddress(); + } + return result; + } +} + std::string Dwarf::get_type_name(lldb::SBType type) { std::string type_name = type.GetDisplayTypeName() ?: ""; diff --git a/src/dwarf_parser.h b/src/dwarf_parser.h index 4b6ba2a466d3..72019b1fa010 100644 --- a/src/dwarf_parser.h +++ b/src/dwarf_parser.h @@ -23,6 +23,8 @@ class Dwarf { static std::unique_ptr GetFromBinary(BPFtrace *bpftrace, std::string file_path); + std::vector get_function_locations(const std::string &function, + bool include_inlined); std::vector get_function_params(const std::string &function); Struct resolve_args(const std::string &function); @@ -70,6 +72,14 @@ class Dwarf { return nullptr; } + std::vector get_function_locations(const std::string &function + __attribute__((unused)), + bool include_inlined + __attribute__((unused))) + { + return {}; + } + std::vector get_function_params(const std::string &function __attribute__((unused))) { diff --git a/src/utils.cpp b/src/utils.cpp index 1b173a5927c8..e59dc8280bac 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -847,18 +847,22 @@ std::tuple get_kernel_dirs( const std::string &is_deprecated(const std::string &str) { - std::vector::iterator item; - - for (item = DEPRECATED_LIST.begin(); item != DEPRECATED_LIST.end(); item++) { - if (str == item->old_name) { - if (item->show_warning) { - LOG(WARNING) << item->old_name - << " is deprecated and will be removed in the future. Use " - << item->new_name << " instead."; - item->show_warning = false; - } + for (auto &item : DEPRECATED_LIST) { + if (!item.matches(str)) { + continue; + } - return item->new_name; + if (item.show_warning) { + LOG(WARNING) << item.old_name + << " is deprecated and will be removed in the future. Use " + << item.new_name << " instead."; + item.show_warning = false; + } + + if (item.replace_by_new_name) { + return item.new_name; + } else { + return str; } } diff --git a/src/utils.h b/src/utils.h index 9232b48407fd..d0fd45b28a53 100644 --- a/src/utils.h +++ b/src/utils.h @@ -105,6 +105,18 @@ struct DeprecatedName { std::string old_name; std::string new_name; bool show_warning = true; + bool replace_by_new_name = true; + + bool matches(const std::string &name) const + { + // We allow a prefix match to match against builtins with number (argX) + if (old_name.back() == '*') { + std::string_view old_name_view{ old_name.c_str(), old_name.size() - 1 }; + return name.rfind(old_name_view) == 0; + } + + return name == old_name; + } }; typedef std::unordered_map> @@ -121,7 +133,9 @@ struct KConfig { std::unordered_map config; }; -static std::vector DEPRECATED_LIST = {}; +static std::vector DEPRECATED_LIST = { + { "sarg*", "*(reg(\"sp\") + )", true, false } +}; static std::vector UNSAFE_BUILTIN_FUNCS = { "system", diff --git a/tests/runtime/builtin b/tests/runtime/builtin index 76f989bb4bbc..5d9ce767076e 100644 --- a/tests/runtime/builtin +++ b/tests/runtime/builtin @@ -59,34 +59,6 @@ EXPECT_REGEX ^SUCCESS 0x[0-9a-f]+$ TIMEOUT 5 AFTER ./testprogs/syscall read -NAME sarg -PROG uprobe:./testprogs/stack_args:too_many_args { printf("SUCCESS %d %d\n", sarg0, sarg1); exit(); } -EXPECT SUCCESS 32 64 -ARCH x86_64 -TIMEOUT 5 -AFTER ./testprogs/stack_args - -NAME sarg -PROG uprobe:./testprogs/stack_args:too_many_args { printf("SUCCESS %d %d\n", sarg0, sarg1); exit(); } -EXPECT SUCCESS 128 256 -ARCH aarch64|ppc64|ppc64le -TIMEOUT 5 -AFTER ./testprogs/stack_args - -NAME sarg -PROG uprobe:./testprogs/stack_args:too_many_args { printf("SUCCESS %d %d\n", sarg0, sarg1); exit(); } -EXPECT SUCCESS 16 32 -ARCH s390x -TIMEOUT 5 -AFTER ./testprogs/stack_args - -NAME sarg -PROG uprobe:./testprogs/stack_args:too_many_args { printf("SUCCESS %d %d\n", sarg0, sarg1); exit(); } -EXPECT SUCCESS 8 16 -ARCH armv7l -TIMEOUT 5 -AFTER ./testprogs/stack_args - NAME retval PROG kretprobe:vfs_read { printf("SUCCESS %d\n", retval); exit(); } EXPECT_REGEX SUCCESS .* diff --git a/tests/runtime/call b/tests/runtime/call index 06b4c33d29b1..332e55f3efa1 100644 --- a/tests/runtime/call +++ b/tests/runtime/call @@ -229,7 +229,7 @@ AFTER ./testprogs/uprobe_loop NAME ustack_stack_mode_env_bpftrace PROG u:./testprogs/uprobe_loop:uprobeFunction1 { printf("%s\n", ustack(1)); exit(); } ENV BPFTRACE_STACK_MODE=bpftrace -EXPECT uprobeFunction1+0 +EXPECT_REGEX ^uprobeFunction1\+[0-9]+$ TIMEOUT 5 AFTER ./testprogs/uprobe_loop # This was debugged as far down as std::cout having badbit [0] set after a @@ -249,7 +249,7 @@ SKIP_IF_ENV_HAS CI=true NAME ustack_stack_mode_env_perf PROG u:./testprogs/uprobe_loop:uprobeFunction1 { printf("%s\n", ustack(1)); exit(); } ENV BPFTRACE_STACK_MODE=perf -EXPECT_REGEX \d+ uprobeFunction1\+0 \(.*/uprobe_loop\) +EXPECT_REGEX ^[0-9a-f]+ uprobeFunction1\+[0-9]+ \(.*/uprobe_loop\)$ TIMEOUT 5 AFTER ./testprogs/uprobe_loop # See https://github.com/bpftrace/bpftrace/issues/3080 @@ -276,17 +276,8 @@ SKIP_IF_ENV_HAS CI=true NAME ustack_elf_symtable ENV BPFTRACE_CACHE_USER_SYMBOLS=PER_PROGRAM PROG uprobe:./testprogs/uprobe_symres_exited_process:test { print(ustack); exit(); } -EXPECT_REGEX ^\s+test\+0\s+main\+[1-9][0-9]* +EXPECT_REGEX ^\s+test\+[0-9]+\s+test2\+[0-9]+\s+main\+[0-9]+ AFTER ./testprogs/disable_aslr ./testprogs/uprobe_symres_exited_process -ARCH x86_64 -TIMEOUT 5 - -NAME ustack_elf_symtable -ENV BPFTRACE_CACHE_USER_SYMBOLS=PER_PROGRAM -PROG uprobe:./testprogs/uprobe_symres_exited_process:test { print(ustack); exit(); } -EXPECT_REGEX ^\s+test\+0\s+test2\+[1-9][0-9]*\s+main\+[1-9][0-9]* -AFTER ./testprogs/disable_aslr ./testprogs/uprobe_symres_exited_process -ARCH ppc64le TIMEOUT 5 NAME cat diff --git a/tests/runtime/dwarf b/tests/runtime/dwarf index 8ee649f08a13..0defe94ddc53 100644 --- a/tests/runtime/dwarf +++ b/tests/runtime/dwarf @@ -45,6 +45,63 @@ REQUIRES_FEATURE dwarf TIMEOUT 5 BEFORE ./testprogs/uprobe_test +# We check the expected output after the timeout. +# If we exited once the expected count is reached, we would miss over-counting issues. +# In general, avoid tests that must timeout, as it makes running the whole suite slower! +NAME uprobe skip inlined function +PROG config = { probe_inline = 0 } + uprobe:./testprogs/inline_function:square { @count++ } +EXPECT @count: 1 +REQUIRES_FEATURE dwarf +TIMEOUT 5 +AFTER ./testprogs/inline_function + +NAME uprobe inlined function +PROG config = { probe_inline = 1 } + uprobe:./testprogs/inline_function:square { @count++ } +EXPECT @count: 3 +REQUIRES_FEATURE dwarf +TIMEOUT 5 +AFTER ./testprogs/inline_function + +NAME uprobe inlined function - probe +PROG config = { probe_inline = 1; cache_user_symbols = "PER_PROGRAM"; } + uprobe:./testprogs/inline_function:square { + printf("%s\n", probe); + if (++@count == 3) { exit(); } + } +EXPECT uprobe:./testprogs/inline_function:square + uprobe:./testprogs/inline_function:square + uprobe:./testprogs/inline_function:square +REQUIRES_FEATURE dwarf +TIMEOUT 5 +AFTER ./testprogs/inline_function + +NAME uprobe inlined function - func +PROG config = { probe_inline = 1; cache_user_symbols = "PER_PROGRAM"; } + uprobe:./testprogs/inline_function:square { + printf("%s\n", func); + if (++@count == 3) { exit(); } + } +EXPECT main + main + square +REQUIRES_FEATURE dwarf +TIMEOUT 5 +AFTER ./testprogs/inline_function + +NAME uprobe inlined function - ustack +PROG config = { probe_inline = 1; cache_user_symbols = "PER_PROGRAM"; } + uprobe:./testprogs/inline_function:square { + printf("%s\n", ustack); + if (++@count == 3) { exit(); } + } +EXPECT_REGEX ^\n[ ]+main\+\d+\n[ ]+0x[\da-f]+\n\n$ +EXPECT_REGEX ^\n[ ]+square\+\d+\n[ ]+main\+\d+\n[ ]+0x[\da-f]+\n\n$ +REQUIRES_FEATURE dwarf +TIMEOUT 5 +AFTER ./testprogs/inline_function + # Checking backwards compatibility NAME uprobe args as pointer PROG uprobe:./testprogs/uprobe_test:uprobeFunction1 { printf("c = %c\n", args->c); exit(); } diff --git a/tests/runtime/other b/tests/runtime/other index b0893efc2993..70fce7e6a67c 100644 --- a/tests/runtime/other +++ b/tests/runtime/other @@ -212,7 +212,7 @@ AFTER ./testprogs/syscall nanosleep 1e8 NAME positional ustack RUN {{BPFTRACE}} -e 'u:./testprogs/uprobe_loop:uprobeFunction1 { printf("%s\n%s\n", ustack(), ustack($1)); exit(); }' 1 -EXPECT_REGEX .*uprobeFunction1\+0\n\s+main\+\d+\n.*\n\n\n\s+uprobeFunction1\+0 +EXPECT_REGEX .*uprobeFunction1\+[0-9]+\n\s+spin\+[0-9]+\n\s+main\+[0-9]+\n.*\n\n\n\s+uprobeFunction1\+[0-9]+ TIMEOUT 5 AFTER ./testprogs/uprobe_loop diff --git a/tests/semantic_analyser.cpp b/tests/semantic_analyser.cpp index 6cb735952600..caca59a02a6b 100644 --- a/tests/semantic_analyser.cpp +++ b/tests/semantic_analyser.cpp @@ -206,13 +206,21 @@ void test(std::string_view input, std::string_view expected_ast) EXPECT_EQ(expected_ast, out.str()); } +void test_error(BPFtrace &bpftrace, + std::string_view input, + std::string_view expected_error, + bool has_features = true) +{ + Driver driver(bpftrace); + test(bpftrace, has_features, driver, input, -1, expected_error, true, false); +} + void test_error(std::string_view input, std::string_view expected_error, bool has_features = true) { auto bpftrace = get_mock_bpftrace(); - Driver driver(*bpftrace); - test(*bpftrace, has_features, driver, input, -1, expected_error, true, false); + test_error(*bpftrace, input, expected_error, has_features); } TEST(semantic_analyser, builtin_variables) @@ -254,6 +262,33 @@ kprobe:f { fake } test(feature, "k:f { jiffies }", 1); } +TEST(semantic_analyser, builtin_variables_inline) +{ + auto bpftrace = get_mock_bpftrace(); + ConfigSetter configs{ bpftrace->config_, ConfigSource::script }; + configs.set(ConfigKeyBool::probe_inline, true); + + // Check argument builtins are rejected when `probe_inline` is enabled. + test_error(*bpftrace, "uprobe:/bin/sh:f { arg0 }", R"( +stdin:1:20-24: ERROR: The arg0 builtin can only be used when the probe_inline config is disabled. +uprobe:/bin/sh:f { arg0 } + ~~~~ +)"); + test_error(*bpftrace, "uprobe:/bin/sh:f { sarg0 }", R"( +stdin:1:20-25: ERROR: The sarg0 builtin can only be used when the probe_inline config is disabled. +uprobe:/bin/sh:f { sarg0 } + ~~~~~ +)"); + test_error(*bpftrace, "uprobe:/bin/sh:f { args }", R"( +stdin:1:20-24: ERROR: The args builtin can only be used when the probe_inline config is disabled. +uprobe:/bin/sh:f { args } + ~~~~ +stdin:1:20-24: ERROR: Cannot read function parameters +uprobe:/bin/sh:f { args } + ~~~~ +)"); +} + TEST(semantic_analyser, builtin_cpid) { test("i:ms:100 { printf(\"%d\\n\", cpid); }", 1, false, false); diff --git a/tests/testprogs/inline_function.c b/tests/testprogs/inline_function.c new file mode 100644 index 000000000000..36bbdc966cd8 --- /dev/null +++ b/tests/testprogs/inline_function.c @@ -0,0 +1,27 @@ +#include + +/* always_inline instructs the compiler to inline the function, even in -O0. + * used instructs the compiler to keep the function in the final binary. + * Using both attribute at the same time enable testing both regular and + * inlined call to the square function. The first two calls in main will be + * inlined and the last call will be a regular call. + */ +__attribute__((always_inline, used)) static inline int square(int x) +{ + volatile int res = x * x; + return res; +} + +int main(int argc, char *argv[] __attribute__((unused))) +{ + printf("%d^2 = %d\n", argc, square(argc)); + + printf("%d^2 = %d\n", 8, square(8)); + + int x = 10; + int x_squared = 0; + asm("call square" : "=a"(x_squared) : "D"(x)); + printf("%d^2 = %d\n", x, x_squared); + + return 0; +}