88
99#include "../perf_event.h"
1010
11- static const enum {
12- LBR_EIP_FLAGS = 1 ,
13- LBR_TSX = 2 ,
14- } lbr_desc [LBR_FORMAT_MAX_KNOWN + 1 ] = {
15- [LBR_FORMAT_EIP_FLAGS ] = LBR_EIP_FLAGS ,
16- [LBR_FORMAT_EIP_FLAGS2 ] = LBR_EIP_FLAGS | LBR_TSX ,
17- };
18-
1911/*
2012 * Intel LBR_SELECT bits
2113 * Intel Vol3a, April 2011, Section 16.7 Table 16-10
@@ -243,7 +235,7 @@ void intel_pmu_lbr_reset_64(void)
243235 for (i = 0 ; i < x86_pmu .lbr_nr ; i ++ ) {
244236 wrmsrl (x86_pmu .lbr_from + i , 0 );
245237 wrmsrl (x86_pmu .lbr_to + i , 0 );
246- if (x86_pmu .intel_cap . lbr_format == LBR_FORMAT_INFO )
238+ if (x86_pmu .lbr_has_info )
247239 wrmsrl (x86_pmu .lbr_info + i , 0 );
248240 }
249241}
@@ -305,11 +297,10 @@ enum {
305297 */
306298static inline bool lbr_from_signext_quirk_needed (void )
307299{
308- int lbr_format = x86_pmu .intel_cap .lbr_format ;
309300 bool tsx_support = boot_cpu_has (X86_FEATURE_HLE ) ||
310301 boot_cpu_has (X86_FEATURE_RTM );
311302
312- return !tsx_support && ( lbr_desc [ lbr_format ] & LBR_TSX ) ;
303+ return !tsx_support && x86_pmu . lbr_has_tsx ;
313304}
314305
315306static DEFINE_STATIC_KEY_FALSE (lbr_from_quirk_key );
@@ -427,12 +418,12 @@ rdlbr_all(struct lbr_entry *lbr, unsigned int idx, bool need_info)
427418
428419void intel_pmu_lbr_restore (void * ctx )
429420{
430- bool need_info = x86_pmu .intel_cap .lbr_format == LBR_FORMAT_INFO ;
431421 struct cpu_hw_events * cpuc = this_cpu_ptr (& cpu_hw_events );
432422 struct x86_perf_task_context * task_ctx = ctx ;
433- int i ;
434- unsigned lbr_idx , mask ;
423+ bool need_info = x86_pmu .lbr_has_info ;
435424 u64 tos = task_ctx -> tos ;
425+ unsigned lbr_idx , mask ;
426+ int i ;
436427
437428 mask = x86_pmu .lbr_nr - 1 ;
438429 for (i = 0 ; i < task_ctx -> valid_lbrs ; i ++ ) {
@@ -444,7 +435,7 @@ void intel_pmu_lbr_restore(void *ctx)
444435 lbr_idx = (tos - i ) & mask ;
445436 wrlbr_from (lbr_idx , 0 );
446437 wrlbr_to (lbr_idx , 0 );
447- if (x86_pmu . intel_cap . lbr_format == LBR_FORMAT_INFO )
438+ if (need_info )
448439 wrlbr_info (lbr_idx , 0 );
449440 }
450441
@@ -519,9 +510,9 @@ static void __intel_pmu_lbr_restore(void *ctx)
519510
520511void intel_pmu_lbr_save (void * ctx )
521512{
522- bool need_info = x86_pmu .intel_cap .lbr_format == LBR_FORMAT_INFO ;
523513 struct cpu_hw_events * cpuc = this_cpu_ptr (& cpu_hw_events );
524514 struct x86_perf_task_context * task_ctx = ctx ;
515+ bool need_info = x86_pmu .lbr_has_info ;
525516 unsigned lbr_idx , mask ;
526517 u64 tos ;
527518 int i ;
@@ -816,7 +807,6 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
816807{
817808 bool need_info = false, call_stack = false;
818809 unsigned long mask = x86_pmu .lbr_nr - 1 ;
819- int lbr_format = x86_pmu .intel_cap .lbr_format ;
820810 u64 tos = intel_pmu_lbr_tos ();
821811 int i ;
822812 int out = 0 ;
@@ -831,9 +821,7 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
831821 for (i = 0 ; i < num ; i ++ ) {
832822 unsigned long lbr_idx = (tos - i ) & mask ;
833823 u64 from , to , mis = 0 , pred = 0 , in_tx = 0 , abort = 0 ;
834- int skip = 0 ;
835824 u16 cycles = 0 ;
836- int lbr_flags = lbr_desc [lbr_format ];
837825
838826 from = rdlbr_from (lbr_idx , NULL );
839827 to = rdlbr_to (lbr_idx , NULL );
@@ -845,37 +833,39 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
845833 if (call_stack && !from )
846834 break ;
847835
848- if (lbr_format == LBR_FORMAT_INFO && need_info ) {
849- u64 info ;
850-
851- info = rdlbr_info (lbr_idx , NULL );
852- mis = !!(info & LBR_INFO_MISPRED );
853- pred = !mis ;
854- in_tx = !!(info & LBR_INFO_IN_TX );
855- abort = !!(info & LBR_INFO_ABORT );
856- cycles = (info & LBR_INFO_CYCLES );
857- }
858-
859- if (lbr_format == LBR_FORMAT_TIME ) {
860- mis = !!(from & LBR_FROM_FLAG_MISPRED );
861- pred = !mis ;
862- skip = 1 ;
863- cycles = ((to >> 48 ) & LBR_INFO_CYCLES );
864-
865- to = (u64 )((((s64 )to ) << 16 ) >> 16 );
866- }
867-
868- if (lbr_flags & LBR_EIP_FLAGS ) {
869- mis = !!(from & LBR_FROM_FLAG_MISPRED );
870- pred = !mis ;
871- skip = 1 ;
872- }
873- if (lbr_flags & LBR_TSX ) {
874- in_tx = !!(from & LBR_FROM_FLAG_IN_TX );
875- abort = !!(from & LBR_FROM_FLAG_ABORT );
876- skip = 3 ;
836+ if (x86_pmu .lbr_has_info ) {
837+ if (need_info ) {
838+ u64 info ;
839+
840+ info = rdlbr_info (lbr_idx , NULL );
841+ mis = !!(info & LBR_INFO_MISPRED );
842+ pred = !mis ;
843+ cycles = (info & LBR_INFO_CYCLES );
844+ if (x86_pmu .lbr_has_tsx ) {
845+ in_tx = !!(info & LBR_INFO_IN_TX );
846+ abort = !!(info & LBR_INFO_ABORT );
847+ }
848+ }
849+ } else {
850+ int skip = 0 ;
851+
852+ if (x86_pmu .lbr_from_flags ) {
853+ mis = !!(from & LBR_FROM_FLAG_MISPRED );
854+ pred = !mis ;
855+ skip = 1 ;
856+ }
857+ if (x86_pmu .lbr_has_tsx ) {
858+ in_tx = !!(from & LBR_FROM_FLAG_IN_TX );
859+ abort = !!(from & LBR_FROM_FLAG_ABORT );
860+ skip = 3 ;
861+ }
862+ from = (u64 )((((s64 )from ) << skip ) >> skip );
863+
864+ if (x86_pmu .lbr_to_cycles ) {
865+ cycles = ((to >> 48 ) & LBR_INFO_CYCLES );
866+ to = (u64 )((((s64 )to ) << 16 ) >> 16 );
867+ }
877868 }
878- from = (u64 )((((s64 )from ) << skip ) >> skip );
879869
880870 /*
881871 * Some CPUs report duplicated abort records,
@@ -903,37 +893,40 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
903893 cpuc -> lbr_stack .hw_idx = tos ;
904894}
905895
896+ static DEFINE_STATIC_KEY_FALSE (x86_lbr_mispred );
897+ static DEFINE_STATIC_KEY_FALSE (x86_lbr_cycles );
898+ static DEFINE_STATIC_KEY_FALSE (x86_lbr_type );
899+
906900static __always_inline int get_lbr_br_type (u64 info )
907901{
908- if (!static_cpu_has (X86_FEATURE_ARCH_LBR ) || !x86_pmu .lbr_br_type )
909- return 0 ;
902+ int type = 0 ;
910903
911- return (info & LBR_INFO_BR_TYPE ) >> LBR_INFO_BR_TYPE_OFFSET ;
904+ if (static_branch_likely (& x86_lbr_type ))
905+ type = (info & LBR_INFO_BR_TYPE ) >> LBR_INFO_BR_TYPE_OFFSET ;
906+
907+ return type ;
912908}
913909
914910static __always_inline bool get_lbr_mispred (u64 info )
915911{
916- if (static_cpu_has (X86_FEATURE_ARCH_LBR ) && !x86_pmu .lbr_mispred )
917- return 0 ;
912+ bool mispred = 0 ;
918913
919- return !!( info & LBR_INFO_MISPRED );
920- }
914+ if ( static_branch_likely ( & x86_lbr_mispred ))
915+ mispred = !!( info & LBR_INFO_MISPRED );
921916
922- static __always_inline bool get_lbr_predicted (u64 info )
923- {
924- if (static_cpu_has (X86_FEATURE_ARCH_LBR ) && !x86_pmu .lbr_mispred )
925- return 0 ;
926-
927- return !(info & LBR_INFO_MISPRED );
917+ return mispred ;
928918}
929919
930920static __always_inline u16 get_lbr_cycles (u64 info )
931921{
922+ u16 cycles = info & LBR_INFO_CYCLES ;
923+
932924 if (static_cpu_has (X86_FEATURE_ARCH_LBR ) &&
933- !(x86_pmu .lbr_timed_lbr && info & LBR_INFO_CYC_CNT_VALID ))
934- return 0 ;
925+ (!static_branch_likely (& x86_lbr_cycles ) ||
926+ !(info & LBR_INFO_CYC_CNT_VALID )))
927+ cycles = 0 ;
935928
936- return info & LBR_INFO_CYCLES ;
929+ return cycles ;
937930}
938931
939932static void intel_pmu_store_lbr (struct cpu_hw_events * cpuc ,
@@ -961,7 +954,7 @@ static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc,
961954 e -> from = from ;
962955 e -> to = to ;
963956 e -> mispred = get_lbr_mispred (info );
964- e -> predicted = get_lbr_predicted ( info ) ;
957+ e -> predicted = ! e -> mispred ;
965958 e -> in_tx = !!(info & LBR_INFO_IN_TX );
966959 e -> abort = !!(info & LBR_INFO_ABORT );
967960 e -> cycles = get_lbr_cycles (info );
@@ -1120,7 +1113,7 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
11201113
11211114 if ((br_type & PERF_SAMPLE_BRANCH_NO_CYCLES ) &&
11221115 (br_type & PERF_SAMPLE_BRANCH_NO_FLAGS ) &&
1123- ( x86_pmu .intel_cap . lbr_format == LBR_FORMAT_INFO ) )
1116+ x86_pmu .lbr_has_info )
11241117 reg -> config |= LBR_NO_INFO ;
11251118
11261119 return 0 ;
@@ -1706,6 +1699,38 @@ void intel_pmu_lbr_init_knl(void)
17061699 x86_pmu .intel_cap .lbr_format = LBR_FORMAT_EIP_FLAGS ;
17071700}
17081701
1702+ void intel_pmu_lbr_init (void )
1703+ {
1704+ switch (x86_pmu .intel_cap .lbr_format ) {
1705+ case LBR_FORMAT_EIP_FLAGS2 :
1706+ x86_pmu .lbr_has_tsx = 1 ;
1707+ fallthrough ;
1708+ case LBR_FORMAT_EIP_FLAGS :
1709+ x86_pmu .lbr_from_flags = 1 ;
1710+ break ;
1711+
1712+ case LBR_FORMAT_INFO :
1713+ x86_pmu .lbr_has_tsx = 1 ;
1714+ fallthrough ;
1715+ case LBR_FORMAT_INFO2 :
1716+ x86_pmu .lbr_has_info = 1 ;
1717+ break ;
1718+
1719+ case LBR_FORMAT_TIME :
1720+ x86_pmu .lbr_from_flags = 1 ;
1721+ x86_pmu .lbr_to_cycles = 1 ;
1722+ break ;
1723+ }
1724+
1725+ if (x86_pmu .lbr_has_info ) {
1726+ /*
1727+ * Only used in combination with baseline pebs.
1728+ */
1729+ static_branch_enable (& x86_lbr_mispred );
1730+ static_branch_enable (& x86_lbr_cycles );
1731+ }
1732+ }
1733+
17091734/*
17101735 * LBR state size is variable based on the max number of registers.
17111736 * This calculates the expected state size, which should match
@@ -1726,6 +1751,9 @@ static bool is_arch_lbr_xsave_available(void)
17261751 * Check the LBR state with the corresponding software structure.
17271752 * Disable LBR XSAVES support if the size doesn't match.
17281753 */
1754+ if (xfeature_size (XFEATURE_LBR ) == 0 )
1755+ return false;
1756+
17291757 if (WARN_ON (xfeature_size (XFEATURE_LBR ) != get_lbr_state_size ()))
17301758 return false;
17311759
@@ -1765,6 +1793,12 @@ void __init intel_pmu_arch_lbr_init(void)
17651793 x86_pmu .lbr_br_type = ecx .split .lbr_br_type ;
17661794 x86_pmu .lbr_nr = lbr_nr ;
17671795
1796+ if (x86_pmu .lbr_mispred )
1797+ static_branch_enable (& x86_lbr_mispred );
1798+ if (x86_pmu .lbr_timed_lbr )
1799+ static_branch_enable (& x86_lbr_cycles );
1800+ if (x86_pmu .lbr_br_type )
1801+ static_branch_enable (& x86_lbr_type );
17681802
17691803 arch_lbr_xsave = is_arch_lbr_xsave_available ();
17701804 if (arch_lbr_xsave ) {
0 commit comments