Skip to content

Commit 8db5cab

Browse files
weilinwaacmel
authored andcommitted
perf stat: Fork and launch 'perf record' when 'perf stat' needs to get retire latency value for a metric.
When retire_latency value is used in a metric formula, evsel would fork a 'perf record' process with "-e" and "-W" options. 'perf record' will collect required retire_latency values in parallel while 'perf stat' is collecting counting values. At the point of time that 'perf stat' stops counting, evsel would stop 'perf record' by sending sigterm signal to 'perf record' process. Sampled data will be processed to get retire latency value. Another thread is required to synchronize between 'perf stat' and 'perf record' when we pass data through pipe. Retire_latency evsel is not opened for 'perf stat' so that there is no counter wasted on it. This commit includes code suggested by Namhyung to adjust reading size for groups that include retire_latency evsels. In current :R parsing implementation, the parser would recognize events with retire_latency modifier and insert them into the evlist like a normal event. Ideally, we need to avoid counting these events. In this commit, at the time when a retire_latency evsel is read, set the retire latency value processed from the sampled data to count value. This sampled retire latency value will be used for metric calculation and final event count print out. No special metric calculation and event print out code required for retire_latency events. Reviewed-by: Namhyung Kim <namhyung@kernel.org> Signed-off-by: Weilin Wang <weilin.wang@intel.com> Acked-by: Ian Rogers <irogers@google.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Caleb Biggers <caleb.biggers@intel.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Perry Taylor <perry.taylor@intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Samantha Alt <samantha.alt@intel.com> Link: https://lore.kernel.org/r/20240720062102.444578-4-weilin.wang@intel.com [ Squashed the 3rd and 4th commit in the series to keep it building patch by patch ] [ Constified the 'struct perf_tool' pointer in process_sample_event() ] [ Use perf_tool__init(&tool, false) to address a segfault I reported and Ian/Weilin diagnosed ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
1 parent a9a4ca5 commit 8db5cab

File tree

8 files changed

+564
-2
lines changed

8 files changed

+564
-2
lines changed

tools/perf/arch/x86/util/evlist.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,12 @@ int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs)
8989
return 1;
9090
}
9191

92+
/* Retire latency event should not be group leader*/
93+
if (lhs->retire_lat && !rhs->retire_lat)
94+
return 1;
95+
if (!lhs->retire_lat && rhs->retire_lat)
96+
return -1;
97+
9298
/* Default ordering by insertion index. */
9399
return lhs->core.idx - rhs->core.idx;
94100
}

tools/perf/builtin-stat.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@
7070
#include "util/bpf_counter.h"
7171
#include "util/iostat.h"
7272
#include "util/util.h"
73+
#include "util/intel-tpebs.h"
7374
#include "asm/bug.h"
7475

7576
#include <linux/time64.h>
@@ -683,6 +684,9 @@ static enum counter_recovery stat_handle_error(struct evsel *counter)
683684

684685
if (child_pid != -1)
685686
kill(child_pid, SIGTERM);
687+
688+
tpebs_delete();
689+
686690
return COUNTER_FATAL;
687691
}
688692

tools/perf/util/Build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,7 @@ perf-util-y += clockid.o
156156
perf-util-y += list_sort.o
157157
perf-util-y += mutex.o
158158
perf-util-y += sharded_mutex.o
159+
perf-util-$(CONFIG_X86_64) += intel-tpebs.o
159160

160161
perf-util-$(CONFIG_LIBBPF) += bpf_map.o
161162
perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf_counter.o

tools/perf/util/evlist.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
#include "util/bpf-filter.h"
3434
#include "util/stat.h"
3535
#include "util/util.h"
36+
#include "util/intel-tpebs.h"
3637
#include <signal.h>
3738
#include <unistd.h>
3839
#include <sched.h>
@@ -179,6 +180,7 @@ void evlist__delete(struct evlist *evlist)
179180
if (evlist == NULL)
180181
return;
181182

183+
tpebs_delete();
182184
evlist__free_stats(evlist);
183185
evlist__munmap(evlist);
184186
evlist__close(evlist);

tools/perf/util/evsel.c

Lines changed: 79 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@
5959
#include <internal/xyarray.h>
6060
#include <internal/lib.h>
6161
#include <internal/threadmap.h>
62+
#include "util/intel-tpebs.h"
6263

6364
#include <linux/ctype.h>
6465

@@ -1539,13 +1540,24 @@ static int evsel__read_one(struct evsel *evsel, int cpu_map_idx, int thread)
15391540
return perf_evsel__read(&evsel->core, cpu_map_idx, thread, count);
15401541
}
15411542

1543+
static int evsel__read_retire_lat(struct evsel *evsel, int cpu_map_idx, int thread)
1544+
{
1545+
return tpebs_set_evsel(evsel, cpu_map_idx, thread);
1546+
}
1547+
15421548
static void evsel__set_count(struct evsel *counter, int cpu_map_idx, int thread,
15431549
u64 val, u64 ena, u64 run, u64 lost)
15441550
{
15451551
struct perf_counts_values *count;
15461552

15471553
count = perf_counts(counter->counts, cpu_map_idx, thread);
15481554

1555+
if (counter->retire_lat) {
1556+
evsel__read_retire_lat(counter, cpu_map_idx, thread);
1557+
perf_counts__set_loaded(counter->counts, cpu_map_idx, thread, true);
1558+
return;
1559+
}
1560+
15491561
count->val = val;
15501562
count->ena = ena;
15511563
count->run = run;
@@ -1554,6 +1566,60 @@ static void evsel__set_count(struct evsel *counter, int cpu_map_idx, int thread,
15541566
perf_counts__set_loaded(counter->counts, cpu_map_idx, thread, true);
15551567
}
15561568

1569+
static bool evsel__group_has_tpebs(struct evsel *leader)
1570+
{
1571+
struct evsel *evsel;
1572+
1573+
for_each_group_evsel(evsel, leader) {
1574+
if (evsel__is_retire_lat(evsel))
1575+
return true;
1576+
}
1577+
return false;
1578+
}
1579+
1580+
static u64 evsel__group_read_nr_members(struct evsel *leader)
1581+
{
1582+
u64 nr = leader->core.nr_members;
1583+
struct evsel *evsel;
1584+
1585+
for_each_group_evsel(evsel, leader) {
1586+
if (evsel__is_retire_lat(evsel))
1587+
nr--;
1588+
}
1589+
return nr;
1590+
}
1591+
1592+
static u64 evsel__group_read_size(struct evsel *leader)
1593+
{
1594+
u64 read_format = leader->core.attr.read_format;
1595+
int entry = sizeof(u64); /* value */
1596+
int size = 0;
1597+
int nr = 1;
1598+
1599+
if (!evsel__group_has_tpebs(leader))
1600+
return perf_evsel__read_size(&leader->core);
1601+
1602+
if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
1603+
size += sizeof(u64);
1604+
1605+
if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
1606+
size += sizeof(u64);
1607+
1608+
if (read_format & PERF_FORMAT_ID)
1609+
entry += sizeof(u64);
1610+
1611+
if (read_format & PERF_FORMAT_LOST)
1612+
entry += sizeof(u64);
1613+
1614+
if (read_format & PERF_FORMAT_GROUP) {
1615+
nr = evsel__group_read_nr_members(leader);
1616+
size += sizeof(u64);
1617+
}
1618+
1619+
size += entry * nr;
1620+
return size;
1621+
}
1622+
15571623
static int evsel__process_group_data(struct evsel *leader, int cpu_map_idx, int thread, u64 *data)
15581624
{
15591625
u64 read_format = leader->core.attr.read_format;
@@ -1562,7 +1628,7 @@ static int evsel__process_group_data(struct evsel *leader, int cpu_map_idx, int
15621628

15631629
nr = *data++;
15641630

1565-
if (nr != (u64) leader->core.nr_members)
1631+
if (nr != evsel__group_read_nr_members(leader))
15661632
return -EINVAL;
15671633

15681634
if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
@@ -1592,7 +1658,7 @@ static int evsel__read_group(struct evsel *leader, int cpu_map_idx, int thread)
15921658
{
15931659
struct perf_stat_evsel *ps = leader->stats;
15941660
u64 read_format = leader->core.attr.read_format;
1595-
int size = perf_evsel__read_size(&leader->core);
1661+
int size = evsel__group_read_size(leader);
15961662
u64 *data = ps->group_data;
15971663

15981664
if (!(read_format & PERF_FORMAT_ID))
@@ -1784,6 +1850,9 @@ int evsel__read_counter(struct evsel *evsel, int cpu_map_idx, int thread)
17841850
if (evsel__is_tool(evsel))
17851851
return evsel__read_tool(evsel, cpu_map_idx, thread);
17861852

1853+
if (evsel__is_retire_lat(evsel))
1854+
return evsel__read_retire_lat(evsel, cpu_map_idx, thread);
1855+
17871856
if (evsel->core.attr.read_format & PERF_FORMAT_GROUP)
17881857
return evsel__read_group(evsel, cpu_map_idx, thread);
17891858

@@ -2200,6 +2269,9 @@ static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus,
22002269
return 0;
22012270
}
22022271

2272+
if (evsel__is_retire_lat(evsel))
2273+
return tpebs_start(evsel->evlist);
2274+
22032275
err = __evsel__prepare_open(evsel, cpus, threads);
22042276
if (err)
22052277
return err;
@@ -2392,6 +2464,8 @@ int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus,
23922464

23932465
void evsel__close(struct evsel *evsel)
23942466
{
2467+
if (evsel__is_retire_lat(evsel))
2468+
tpebs_delete();
23952469
perf_evsel__close(&evsel->core);
23962470
perf_evsel__free_id(&evsel->core);
23972471
}
@@ -3357,6 +3431,9 @@ static int store_evsel_ids(struct evsel *evsel, struct evlist *evlist)
33573431
{
33583432
int cpu_map_idx, thread;
33593433

3434+
if (evsel__is_retire_lat(evsel))
3435+
return 0;
3436+
33603437
for (cpu_map_idx = 0; cpu_map_idx < xyarray__max_x(evsel->core.fd); cpu_map_idx++) {
33613438
for (thread = 0; thread < xyarray__max_y(evsel->core.fd);
33623439
thread++) {

tools/perf/util/evsel.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,11 @@ static inline bool evsel__is_tool(const struct evsel *evsel)
311311
return evsel->tool_event != PERF_TOOL_NONE;
312312
}
313313

314+
static inline bool evsel__is_retire_lat(const struct evsel *evsel)
315+
{
316+
return evsel->retire_lat;
317+
}
318+
314319
const char *evsel__group_name(struct evsel *evsel);
315320
int evsel__group_desc(struct evsel *evsel, char *buf, size_t size);
316321

0 commit comments

Comments
 (0)