Skip to content

Commit

Permalink
Merge branch 'perfcounters-fixes-for-linus' of git://git.kernel.org/p…
Browse files Browse the repository at this point in the history
…ub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'perfcounters-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  perf_counter: Report the cloning task as parent on perf_counter_fork()
  perf_counter: Fix an ipi-deadlock
  perf: Rework/fix the whole read vs group stuff
  perf_counter: Fix swcounter context invariance
  perf report: Don't show unresolved DSOs and symbols when -S/-d is used
  perf tools: Add a general option to enable raw sample records
  perf tools: Add a per tracepoint counter attribute to get raw sample
  perf_counter: Provide hw_perf_counter_setup_online() APIs
  perf list: Fix large list output by using the pager
  perf_counter, x86: Fix/improve apic fallback
  perf record: Add missing -C option support for specifying profile cpu
  perf tools: Fix dso__new handle() to handle deleted DSOs
  perf tools: Fix fallback to cplus_demangle() when bfd_demangle() is not available
  perf report: Show the tid too in -D
  perf record: Fix .tid and .pid fill-in when synthesizing events
  perf_counter, x86: Fix generic cache events on P6-mobile CPUs
  perf_counter, x86: Fix lapic printk message
  • Loading branch information
torvalds committed Aug 13, 2009
2 parents 919aa96 + 94d5d1b commit 3493e84
Show file tree
Hide file tree
Showing 11 changed files with 429 additions and 190 deletions.
2 changes: 1 addition & 1 deletion arch/x86/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ config X86
select HAVE_UNSTABLE_SCHED_CLOCK
select HAVE_IDE
select HAVE_OPROFILE
select HAVE_PERF_COUNTERS if (!M386 && !M486)
select HAVE_IOREMAP_PROT
select HAVE_KPROBES
select ARCH_WANT_OPTIONAL_GPIOLIB
Expand Down Expand Up @@ -742,7 +743,6 @@ config X86_UP_IOAPIC
config X86_LOCAL_APIC
def_bool y
depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC
select HAVE_PERF_COUNTERS if (!M386 && !M486)

config X86_IO_APIC
def_bool y
Expand Down
40 changes: 33 additions & 7 deletions arch/x86/kernel/cpu/perf_counter.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ struct x86_pmu {
int num_counters_fixed;
int counter_bits;
u64 counter_mask;
int apic;
u64 max_period;
u64 intel_ctrl;
};
Expand All @@ -72,8 +73,8 @@ static const u64 p6_perfmon_event_map[] =
{
[PERF_COUNT_HW_CPU_CYCLES] = 0x0079,
[PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
[PERF_COUNT_HW_CACHE_REFERENCES] = 0x0000,
[PERF_COUNT_HW_CACHE_MISSES] = 0x0000,
[PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e,
[PERF_COUNT_HW_CACHE_MISSES] = 0x012e,
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
[PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
[PERF_COUNT_HW_BUS_CYCLES] = 0x0062,
Expand Down Expand Up @@ -613,6 +614,7 @@ static DEFINE_MUTEX(pmc_reserve_mutex);

static bool reserve_pmc_hardware(void)
{
#ifdef CONFIG_X86_LOCAL_APIC
int i;

if (nmi_watchdog == NMI_LOCAL_APIC)
Expand All @@ -627,9 +629,11 @@ static bool reserve_pmc_hardware(void)
if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
goto eventsel_fail;
}
#endif

return true;

#ifdef CONFIG_X86_LOCAL_APIC
eventsel_fail:
for (i--; i >= 0; i--)
release_evntsel_nmi(x86_pmu.eventsel + i);
Expand All @@ -644,10 +648,12 @@ static bool reserve_pmc_hardware(void)
enable_lapic_nmi_watchdog();

return false;
#endif
}

static void release_pmc_hardware(void)
{
#ifdef CONFIG_X86_LOCAL_APIC
int i;

for (i = 0; i < x86_pmu.num_counters; i++) {
Expand All @@ -657,6 +663,7 @@ static void release_pmc_hardware(void)

if (nmi_watchdog == NMI_LOCAL_APIC)
enable_lapic_nmi_watchdog();
#endif
}

static void hw_perf_counter_destroy(struct perf_counter *counter)
Expand Down Expand Up @@ -748,6 +755,15 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
hwc->sample_period = x86_pmu.max_period;
hwc->last_period = hwc->sample_period;
atomic64_set(&hwc->period_left, hwc->sample_period);
} else {
/*
* If we have a PMU initialized but no APIC
* interrupts, we cannot sample hardware
* counters (user-space has to fall back and
* sample via a hrtimer based software counter):
*/
if (!x86_pmu.apic)
return -EOPNOTSUPP;
}

counter->destroy = hw_perf_counter_destroy;
Expand Down Expand Up @@ -1449,18 +1465,22 @@ void smp_perf_pending_interrupt(struct pt_regs *regs)

void set_perf_counter_pending(void)
{
#ifdef CONFIG_X86_LOCAL_APIC
apic->send_IPI_self(LOCAL_PENDING_VECTOR);
#endif
}

void perf_counters_lapic_init(void)
{
if (!x86_pmu_initialized())
#ifdef CONFIG_X86_LOCAL_APIC
if (!x86_pmu.apic || !x86_pmu_initialized())
return;

/*
* Always use NMI for PMU
*/
apic_write(APIC_LVTPC, APIC_DM_NMI);
#endif
}

static int __kprobes
Expand All @@ -1484,7 +1504,9 @@ perf_counter_nmi_handler(struct notifier_block *self,

regs = args->regs;

#ifdef CONFIG_X86_LOCAL_APIC
apic_write(APIC_LVTPC, APIC_DM_NMI);
#endif
/*
* Can't rely on the handled return value to say it was our NMI, two
* counters could trigger 'simultaneously' raising two back-to-back NMIs.
Expand Down Expand Up @@ -1515,6 +1537,7 @@ static struct x86_pmu p6_pmu = {
.event_map = p6_pmu_event_map,
.raw_event = p6_pmu_raw_event,
.max_events = ARRAY_SIZE(p6_perfmon_event_map),
.apic = 1,
.max_period = (1ULL << 31) - 1,
.version = 0,
.num_counters = 2,
Expand All @@ -1541,6 +1564,7 @@ static struct x86_pmu intel_pmu = {
.event_map = intel_pmu_event_map,
.raw_event = intel_pmu_raw_event,
.max_events = ARRAY_SIZE(intel_perfmon_event_map),
.apic = 1,
/*
* Intel PMCs cannot be accessed sanely above 32 bit width,
* so we install an artificial 1<<31 period regardless of
Expand All @@ -1564,6 +1588,7 @@ static struct x86_pmu amd_pmu = {
.num_counters = 4,
.counter_bits = 48,
.counter_mask = (1ULL << 48) - 1,
.apic = 1,
/* use highest bit to detect overflow */
.max_period = (1ULL << 47) - 1,
};
Expand All @@ -1589,13 +1614,14 @@ static int p6_pmu_init(void)
return -ENODEV;
}

x86_pmu = p6_pmu;

if (!cpu_has_apic) {
pr_info("no Local APIC, try rebooting with lapic");
return -ENODEV;
pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
pr_info("no hardware sampling interrupt available.\n");
x86_pmu.apic = 0;
}

x86_pmu = p6_pmu;

return 0;
}

Expand Down
49 changes: 38 additions & 11 deletions include/linux/perf_counter.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ enum perf_counter_sample_format {
PERF_SAMPLE_TID = 1U << 1,
PERF_SAMPLE_TIME = 1U << 2,
PERF_SAMPLE_ADDR = 1U << 3,
PERF_SAMPLE_GROUP = 1U << 4,
PERF_SAMPLE_READ = 1U << 4,
PERF_SAMPLE_CALLCHAIN = 1U << 5,
PERF_SAMPLE_ID = 1U << 6,
PERF_SAMPLE_CPU = 1U << 7,
Expand All @@ -127,16 +127,32 @@ enum perf_counter_sample_format {
};

/*
* Bits that can be set in attr.read_format to request that
* reads on the counter should return the indicated quantities,
* in increasing order of bit value, after the counter value.
* The format of the data returned by read() on a perf counter fd,
* as specified by attr.read_format:
*
* struct read_format {
* { u64 value;
* { u64 time_enabled; } && PERF_FORMAT_ENABLED
* { u64 time_running; } && PERF_FORMAT_RUNNING
* { u64 id; } && PERF_FORMAT_ID
* } && !PERF_FORMAT_GROUP
*
* { u64 nr;
* { u64 time_enabled; } && PERF_FORMAT_ENABLED
* { u64 time_running; } && PERF_FORMAT_RUNNING
* { u64 value;
* { u64 id; } && PERF_FORMAT_ID
* } cntr[nr];
* } && PERF_FORMAT_GROUP
* };
*/
enum perf_counter_read_format {
PERF_FORMAT_TOTAL_TIME_ENABLED = 1U << 0,
PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1,
PERF_FORMAT_ID = 1U << 2,
PERF_FORMAT_GROUP = 1U << 3,

PERF_FORMAT_MAX = 1U << 3, /* non-ABI */
PERF_FORMAT_MAX = 1U << 4, /* non-ABI */
};

#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */
Expand Down Expand Up @@ -343,10 +359,8 @@ enum perf_event_type {
* struct {
* struct perf_event_header header;
* u32 pid, tid;
* u64 value;
* { u64 time_enabled; } && PERF_FORMAT_ENABLED
* { u64 time_running; } && PERF_FORMAT_RUNNING
* { u64 parent_id; } && PERF_FORMAT_ID
*
* struct read_format values;
* };
*/
PERF_EVENT_READ = 8,
Expand All @@ -364,11 +378,22 @@ enum perf_event_type {
* { u32 cpu, res; } && PERF_SAMPLE_CPU
* { u64 period; } && PERF_SAMPLE_PERIOD
*
* { u64 nr;
* { u64 id, val; } cnt[nr]; } && PERF_SAMPLE_GROUP
* { struct read_format values; } && PERF_SAMPLE_READ
*
* { u64 nr,
* u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN
*
* #
* # The RAW record below is opaque data wrt the ABI
* #
* # That is, the ABI doesn't make any promises wrt to
* # the stability of its content, it may vary depending
* # on event, hardware, kernel version and phase of
* # the moon.
* #
* # In other words, PERF_SAMPLE_RAW contents are not an ABI.
* #
*
* { u32 size;
* char data[size];}&& PERF_SAMPLE_RAW
* };
Expand Down Expand Up @@ -694,6 +719,8 @@ struct perf_sample_data {

extern int perf_counter_overflow(struct perf_counter *counter, int nmi,
struct perf_sample_data *data);
extern void perf_counter_output(struct perf_counter *counter, int nmi,
struct perf_sample_data *data);

/*
* Return 1 for a software counter, 0 for a hardware counter
Expand Down
Loading

0 comments on commit 3493e84

Please sign in to comment.