Skip to content

Commit 86a349a

Browse files
Andi KleenIngo Molnar
authored andcommitted
perf/x86/intel: Add Broadwell core support
Add Broadwell support for Broadwell Client to perf. This is very similar to Haswell. It uses a new cache event table, because there were various changes there. The constraint list has one new event that needs to be handled over Haswell. The PEBS event list is the same, so we reuse Haswell's. [fengguang.wu: make intel_bdw_event_constraints[] static] Signed-off-by: Andi Kleen <ak@linux.intel.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: eranian@google.com Link: http://lkml.kernel.org/r/1409683455-29168-3-git-send-email-andi@firstfloor.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
1 parent d86c8ea commit 86a349a

File tree

1 file changed

+150
-0
lines changed

1 file changed

+150
-0
lines changed

arch/x86/kernel/cpu/perf_event_intel.c

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,15 @@ static struct event_constraint intel_hsw_event_constraints[] = {
220220
EVENT_CONSTRAINT_END
221221
};
222222

223+
static struct event_constraint intel_bdw_event_constraints[] = {
224+
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
225+
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
226+
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
227+
INTEL_UEVENT_CONSTRAINT(0x148, 0x4), /* L1D_PEND_MISS.PENDING */
228+
INTEL_EVENT_CONSTRAINT(0xa3, 0x4), /* CYCLE_ACTIVITY.* */
229+
EVENT_CONSTRAINT_END
230+
};
231+
223232
static u64 intel_pmu_event_map(int hw_event)
224233
{
225234
return intel_perfmon_event_map[hw_event];
@@ -415,6 +424,126 @@ static __initconst const u64 snb_hw_cache_event_ids
415424

416425
};
417426

427+
static __initconst const u64 hsw_hw_cache_event_ids
428+
[PERF_COUNT_HW_CACHE_MAX]
429+
[PERF_COUNT_HW_CACHE_OP_MAX]
430+
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
431+
{
432+
[ C(L1D ) ] = {
433+
[ C(OP_READ) ] = {
434+
[ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */
435+
[ C(RESULT_MISS) ] = 0x151, /* L1D.REPLACEMENT */
436+
},
437+
[ C(OP_WRITE) ] = {
438+
[ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */
439+
[ C(RESULT_MISS) ] = 0x0,
440+
},
441+
[ C(OP_PREFETCH) ] = {
442+
[ C(RESULT_ACCESS) ] = 0x0,
443+
[ C(RESULT_MISS) ] = 0x0,
444+
},
445+
},
446+
[ C(L1I ) ] = {
447+
[ C(OP_READ) ] = {
448+
[ C(RESULT_ACCESS) ] = 0x0,
449+
[ C(RESULT_MISS) ] = 0x280, /* ICACHE.MISSES */
450+
},
451+
[ C(OP_WRITE) ] = {
452+
[ C(RESULT_ACCESS) ] = -1,
453+
[ C(RESULT_MISS) ] = -1,
454+
},
455+
[ C(OP_PREFETCH) ] = {
456+
[ C(RESULT_ACCESS) ] = 0x0,
457+
[ C(RESULT_MISS) ] = 0x0,
458+
},
459+
},
460+
[ C(LL ) ] = {
461+
[ C(OP_READ) ] = {
462+
/* OFFCORE_RESPONSE:ALL_DATA_RD|ALL_CODE_RD */
463+
[ C(RESULT_ACCESS) ] = 0x1b7,
464+
/* OFFCORE_RESPONSE:ALL_DATA_RD|ALL_CODE_RD|SUPPLIER_NONE|
465+
L3_MISS|ANY_SNOOP */
466+
[ C(RESULT_MISS) ] = 0x1b7,
467+
},
468+
[ C(OP_WRITE) ] = {
469+
[ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE:ALL_RFO */
470+
/* OFFCORE_RESPONSE:ALL_RFO|SUPPLIER_NONE|L3_MISS|ANY_SNOOP */
471+
[ C(RESULT_MISS) ] = 0x1b7,
472+
},
473+
[ C(OP_PREFETCH) ] = {
474+
[ C(RESULT_ACCESS) ] = 0x0,
475+
[ C(RESULT_MISS) ] = 0x0,
476+
},
477+
},
478+
[ C(DTLB) ] = {
479+
[ C(OP_READ) ] = {
480+
[ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */
481+
[ C(RESULT_MISS) ] = 0x108, /* DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK */
482+
},
483+
[ C(OP_WRITE) ] = {
484+
[ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */
485+
[ C(RESULT_MISS) ] = 0x149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */
486+
},
487+
[ C(OP_PREFETCH) ] = {
488+
[ C(RESULT_ACCESS) ] = 0x0,
489+
[ C(RESULT_MISS) ] = 0x0,
490+
},
491+
},
492+
[ C(ITLB) ] = {
493+
[ C(OP_READ) ] = {
494+
[ C(RESULT_ACCESS) ] = 0x6085, /* ITLB_MISSES.STLB_HIT */
495+
[ C(RESULT_MISS) ] = 0x185, /* ITLB_MISSES.MISS_CAUSES_A_WALK */
496+
},
497+
[ C(OP_WRITE) ] = {
498+
[ C(RESULT_ACCESS) ] = -1,
499+
[ C(RESULT_MISS) ] = -1,
500+
},
501+
[ C(OP_PREFETCH) ] = {
502+
[ C(RESULT_ACCESS) ] = -1,
503+
[ C(RESULT_MISS) ] = -1,
504+
},
505+
},
506+
[ C(BPU ) ] = {
507+
[ C(OP_READ) ] = {
508+
[ C(RESULT_ACCESS) ] = 0xc4, /* BR_INST_RETIRED.ALL_BRANCHES */
509+
[ C(RESULT_MISS) ] = 0xc5, /* BR_MISP_RETIRED.ALL_BRANCHES */
510+
},
511+
[ C(OP_WRITE) ] = {
512+
[ C(RESULT_ACCESS) ] = -1,
513+
[ C(RESULT_MISS) ] = -1,
514+
},
515+
[ C(OP_PREFETCH) ] = {
516+
[ C(RESULT_ACCESS) ] = -1,
517+
[ C(RESULT_MISS) ] = -1,
518+
},
519+
},
520+
};
521+
522+
static __initconst const u64 hsw_hw_cache_extra_regs
523+
[PERF_COUNT_HW_CACHE_MAX]
524+
[PERF_COUNT_HW_CACHE_OP_MAX]
525+
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
526+
{
527+
[ C(LL ) ] = {
528+
[ C(OP_READ) ] = {
529+
/* OFFCORE_RESPONSE:ALL_DATA_RD|ALL_CODE_RD */
530+
[ C(RESULT_ACCESS) ] = 0x2d5,
531+
/* OFFCORE_RESPONSE:ALL_DATA_RD|ALL_CODE_RD|SUPPLIER_NONE|
532+
L3_MISS|ANY_SNOOP */
533+
[ C(RESULT_MISS) ] = 0x3fbc0202d5ull,
534+
},
535+
[ C(OP_WRITE) ] = {
536+
[ C(RESULT_ACCESS) ] = 0x122, /* OFFCORE_RESPONSE:ALL_RFO */
537+
/* OFFCORE_RESPONSE:ALL_RFO|SUPPLIER_NONE|L3_MISS|ANY_SNOOP */
538+
[ C(RESULT_MISS) ] = 0x3fbc020122ull,
539+
},
540+
[ C(OP_PREFETCH) ] = {
541+
[ C(RESULT_ACCESS) ] = 0x0,
542+
[ C(RESULT_MISS) ] = 0x0,
543+
},
544+
},
545+
};
546+
418547
static __initconst const u64 westmere_hw_cache_event_ids
419548
[PERF_COUNT_HW_CACHE_MAX]
420549
[PERF_COUNT_HW_CACHE_OP_MAX]
@@ -2565,6 +2694,27 @@ __init int intel_pmu_init(void)
25652694
pr_cont("Haswell events, ");
25662695
break;
25672696

2697+
case 61: /* 14nm Broadwell Core-M */
2698+
x86_pmu.late_ack = true;
2699+
memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
2700+
memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
2701+
2702+
intel_pmu_lbr_init_snb();
2703+
2704+
x86_pmu.event_constraints = intel_bdw_event_constraints;
2705+
x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints;
2706+
x86_pmu.extra_regs = intel_snbep_extra_regs;
2707+
x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
2708+
/* all extra regs are per-cpu when HT is on */
2709+
x86_pmu.er_flags |= ERF_HAS_RSP_1;
2710+
x86_pmu.er_flags |= ERF_NO_HT_SHARING;
2711+
2712+
x86_pmu.hw_config = hsw_hw_config;
2713+
x86_pmu.get_event_constraints = hsw_get_event_constraints;
2714+
x86_pmu.cpu_events = hsw_events_attrs;
2715+
pr_cont("Broadwell events, ");
2716+
break;
2717+
25682718
default:
25692719
switch (x86_pmu.version) {
25702720
case 1:

0 commit comments

Comments
 (0)