Skip to content

Commit

Permalink
[EXPERIMENT] perf: use PROCFS_PROCMAP_QUERY API
Browse files Browse the repository at this point in the history
Teach perf to use binary ioctl-based API to fetch all VMAs for
a process.

BENCHMARK RESULTS
=================

TEXT-BASED
----------
  # ./perf-parse bench internals synthesize
  # Running 'internals/synthesize' benchmark:
  Computing performance of single threaded perf event synthesis by
  synthesizing events on the perf process itself:
    Average synthesis took: 80.311 usec (+- 0.077 usec)
    Average num. events: 32.000 (+- 0.000)
    Average time per event 2.510 usec
    Average data synthesis took: 84.429 usec (+- 0.066 usec)
    Average num. events: 179.000 (+- 0.000)
    Average time per event 0.472 usec

  # ./perf-parse bench internals synthesize
  # Running 'internals/synthesize' benchmark:
  Computing performance of single threaded perf event synthesis by
  synthesizing events on the perf process itself:
    Average synthesis took: 79.900 usec (+- 0.077 usec)
    Average num. events: 32.000 (+- 0.000)
    Average time per event 2.497 usec
    Average data synthesis took: 84.832 usec (+- 0.074 usec)
    Average num. events: 180.000 (+- 0.000)
    Average time per event 0.471 usec

  # ./perf-parse bench internals synthesize --mt -M 8
  # Running 'internals/synthesize' benchmark:
  Computing performance of multi threaded perf event synthesis by
  synthesizing events on CPU 0:
    Number of synthesis threads: 1
      Average synthesis took: 36338.100 usec (+- 406.091 usec)
      Average num. events: 14091.300 (+- 7.433)
      Average time per event 2.579 usec
    Number of synthesis threads: 2
      Average synthesis took: 37071.200 usec (+- 746.498 usec)
      Average num. events: 14085.900 (+- 1.900)
      Average time per event 2.632 usec
    Number of synthesis threads: 3
      Average synthesis took: 33932.300 usec (+- 626.861 usec)
      Average num. events: 14085.900 (+- 1.900)
      Average time per event 2.409 usec
    Number of synthesis threads: 4
      Average synthesis took: 33822.700 usec (+- 506.290 usec)
      Average num. events: 14099.200 (+- 8.761)
      Average time per event 2.399 usec
    Number of synthesis threads: 5
      Average synthesis took: 33348.200 usec (+- 389.771 usec)
      Average num. events: 14085.900 (+- 1.900)
      Average time per event 2.367 usec
    Number of synthesis threads: 6
      Average synthesis took: 33269.600 usec (+- 350.341 usec)
      Average num. events: 14084.000 (+- 0.000)
      Average time per event 2.362 usec
    Number of synthesis threads: 7
      Average synthesis took: 32663.900 usec (+- 338.870 usec)
      Average num. events: 14085.900 (+- 1.900)
      Average time per event 2.319 usec
    Number of synthesis threads: 8
      Average synthesis took: 32748.400 usec (+- 285.450 usec)
      Average num. events: 14085.900 (+- 1.900)
      Average time per event 2.325 usec

IOCTL-BASED
-----------
  # ./perf-ioctl bench internals synthesize
  # Running 'internals/synthesize' benchmark:
  Computing performance of single threaded perf event synthesis by
  synthesizing events on the perf process itself:
    Average synthesis took: 72.996 usec (+- 0.076 usec)
    Average num. events: 31.000 (+- 0.000)
    Average time per event 2.355 usec
    Average data synthesis took: 79.067 usec (+- 0.074 usec)
    Average num. events: 178.000 (+- 0.000)
    Average time per event 0.444 usec

  # ./perf-ioctl bench internals synthesize
  # Running 'internals/synthesize' benchmark:
  Computing performance of single threaded perf event synthesis by
  synthesizing events on the perf process itself:
    Average synthesis took: 73.921 usec (+- 0.073 usec)
    Average num. events: 31.000 (+- 0.000)
    Average time per event 2.385 usec
    Average data synthesis took: 80.545 usec (+- 0.070 usec)
    Average num. events: 178.000 (+- 0.000)
    Average time per event 0.453 usec

  # ./perf-ioctl bench internals synthesize --mt -M 8
  # Running 'internals/synthesize' benchmark:
  Computing performance of multi threaded perf event synthesis by
  synthesizing events on CPU 0:
    Number of synthesis threads: 1
      Average synthesis took: 35609.500 usec (+- 428.576 usec)
      Average num. events: 14040.700 (+- 1.700)
      Average time per event 2.536 usec
    Number of synthesis threads: 2
      Average synthesis took: 34293.800 usec (+- 453.811 usec)
      Average num. events: 14040.700 (+- 1.700)
      Average time per event 2.442 usec
    Number of synthesis threads: 3
      Average synthesis took: 32385.200 usec (+- 363.106 usec)
      Average num. events: 14040.700 (+- 1.700)
      Average time per event 2.307 usec
    Number of synthesis threads: 4
      Average synthesis took: 33113.100 usec (+- 553.931 usec)
      Average num. events: 14054.500 (+- 11.469)
      Average time per event 2.356 usec
    Number of synthesis threads: 5
      Average synthesis took: 31600.600 usec (+- 297.349 usec)
      Average num. events: 14012.500 (+- 4.590)
      Average time per event 2.255 usec
    Number of synthesis threads: 6
      Average synthesis took: 32309.900 usec (+- 472.225 usec)
      Average num. events: 14004.000 (+- 0.000)
      Average time per event 2.307 usec
    Number of synthesis threads: 7
      Average synthesis took: 31400.100 usec (+- 206.261 usec)
      Average num. events: 14004.800 (+- 0.800)
      Average time per event 2.242 usec
    Number of synthesis threads: 8
      Average synthesis took: 31601.400 usec (+- 303.350 usec)
      Average num. events: 14005.700 (+- 1.700)
      Average time per event 2.256 usec

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
  • Loading branch information
anakryiko committed May 7, 2024
1 parent 0b1eb00 commit 0841fe6
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 5 deletions.
2 changes: 1 addition & 1 deletion tools/perf/util/debug.c
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ int perf_debug_option(const char *str)
/* Allow only verbose value in range (0, 10), otherwise set 0. */
verbose = (verbose < 0) || (verbose > 10) ? 0 : verbose;

#if LIBTRACEEVENT_VERSION >= MAKE_LIBTRACEEVENT_VERSION(1, 3, 0)
#if defined(LIBTRACEEVENT_VERSION) && LIBTRACEEVENT_VERSION >= MAKE_LIBTRACEEVENT_VERSION(1, 3, 0)
if (verbose == 1)
tep_set_loglevel(TEP_LOG_INFO);
else if (verbose == 2)
Expand Down
68 changes: 65 additions & 3 deletions tools/perf/util/synthetic-events.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include <linux/string.h>
#include <linux/zalloc.h>
#include <linux/perf_event.h>
#include <linux/fs.h>
#include <asm/bug.h>
#include <perf/evsel.h>
#include <perf/cpumap.h>
Expand All @@ -40,6 +41,7 @@
#include <api/io.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/ioctl.h>
#include <fcntl.h>
#include <unistd.h>

Expand Down Expand Up @@ -290,11 +292,67 @@ static int perf_event__synthesize_fork(struct perf_tool *tool,
return 0;
}

#if defined(PROCFS_PROCMAP_QUERY)
static bool read_proc_maps_line(struct io *io, __u64 *start, __u64 *end,
u32 *prot, u32 *flags, __u64 *offset,
u32 *maj, u32 *min,
__u64 *inode,
ssize_t pathname_size, char *pathname)
ssize_t pathname_size, char *pathname,
__u64 *next, bool *done)
{
struct procfs_procmap_query q;
int err;

q.size = sizeof(q);
q.query_flags = PROCFS_PROCMAP_EXACT_OR_NEXT_VMA;
q.query_addr = (__u64)*next;
q.vma_name_addr = (__u64)(long)pathname;
q.vma_name_size = pathname_size;
q.build_id_size = 0;
q.build_id_addr = 0;

err = ioctl(io->fd, PROCFS_PROCMAP_QUERY, &q);
if (err < 0 && errno == ENOENT) {
*done = true;
return false;
}
if (err < 0) {
*done = true;
return false;
}

*start = q.vma_start;
*end = q.vma_end;
*offset = q.vma_offset;

*prot = 0;
if (q.vma_flags & PROCFS_PROCMAP_VMA_READABLE)
*prot |= PROT_READ;
if (q.vma_flags & PROCFS_PROCMAP_VMA_WRITABLE)
*prot |= PROT_WRITE;
if (q.vma_flags & PROCFS_PROCMAP_VMA_EXECUTABLE)
*prot |= PROT_EXEC;

if (q.vma_flags & PROCFS_PROCMAP_VMA_SHARED)
*flags = MAP_SHARED;
else
*flags = MAP_PRIVATE;

*inode = q.inode;
*maj = q.dev_major;
*min = q.dev_minor;

*next = q.vma_end;

return true;
}
#else
static bool read_proc_maps_line(struct io *io, __u64 *start, __u64 *end,
u32 *prot, u32 *flags, __u64 *offset,
u32 *maj, u32 *min,
__u64 *inode,
ssize_t pathname_size, char *pathname,
__u64 *next __maybe_unused, bool *done __maybe_unused)
{
__u64 temp;
int ch;
Expand Down Expand Up @@ -362,6 +420,7 @@ static bool read_proc_maps_line(struct io *io, __u64 *start, __u64 *end,
ch = io__get_char(io);
}
}
#endif

static void perf_record_mmap2__read_build_id(struct perf_record_mmap2 *event,
struct machine *machine,
Expand Down Expand Up @@ -433,6 +492,8 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
int rc = 0;
const char *hugetlbfs_mnt = hugetlbfs__mountpoint();
int hugetlbfs_mnt_len = hugetlbfs_mnt ? strlen(hugetlbfs_mnt) : 0;
__u64 next = 0;
bool done = false;

if (machine__is_default_guest(machine))
return 0;
Expand All @@ -453,7 +514,7 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
event->header.type = PERF_RECORD_MMAP2;
t = rdclock();

while (!io.eof) {
while (!done && !io.eof) {
static const char anonstr[] = "//anon";
size_t size, aligned_size;

Expand All @@ -471,7 +532,8 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
&event->mmap2.min,
&event->mmap2.ino,
sizeof(event->mmap2.filename),
event->mmap2.filename))
event->mmap2.filename,
&next, &done))
continue;

if ((rdclock() - t) > timeout) {
Expand Down
3 changes: 2 additions & 1 deletion tools/perf/util/trace-event.h
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,8 @@ int common_lock_depth(struct scripting_context *context);
#define SAMPLE_FLAGS_BUF_SIZE 64
int perf_sample__sprintf_flags(u32 flags, char *str, size_t sz);

#if defined(LIBTRACEEVENT_VERSION) && LIBTRACEEVENT_VERSION >= MAKE_LIBTRACEEVENT_VERSION(1, 5, 0)
#undef LIBTRACEEVENT_VERSION
#if defined(LIBTRACEEVENT_VERSION) && LIBTRACEEVENT_VERSION >= MAKE_LIBTRACEEVENT_VERSION(1, 5, 0)
#include <traceevent/event-parse.h>

static inline bool tep_field_is_relative(unsigned long flags)
Expand Down

0 comments on commit 0841fe6

Please sign in to comment.