Skip to content

Commit

Permalink
perf: Add perf probing interface
Browse files Browse the repository at this point in the history
Supports CPU flamegraphs and wait flamegraphs.

Signed-off-by: Pedro Falcato <pedro.falcato@gmail.com>
  • Loading branch information
heatd committed Sep 26, 2022
1 parent 97f946f commit 16e312e
Show file tree
Hide file tree
Showing 18 changed files with 679 additions and 13 deletions.
12 changes: 9 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,15 @@ qemu: iso
-device usb-ehci -device usb-mouse \
-display gtk,gl=on -machine q35

qemu-serial-stdio: iso
qemu-system-$(shell scripts/target-triplet-to-arch.sh $(HOST)) \
-s -cdrom Onyx.iso -drive file=hdd.img,format=raw,media=disk -m 512M \
-serial stdio -boot d -netdev user,id=u1 -device virtio-net,netdev=u1 \
-object filter-dump,id=f1,netdev=u1,file=net.pcap \
-enable-kvm -cpu host,migratable=on,+invtsc -smp 4 -vga qxl \
-device usb-ehci -device usb-mouse \
-display gtk,gl=on -machine q35

ci-test-qemu: liveiso
qemu-system-$(shell scripts/target-triplet-to-arch.sh $(HOST)) \
-s -cdrom Onyx.iso -m 2G -serial stdio -boot d -netdev user,id=u1 \
Expand All @@ -179,6 +188,3 @@ intel-passthrough-qemu: iso

virtualbox: iso
virtualbox --startvm Onyx --dbg



5 changes: 5 additions & 0 deletions kernel/arch/arm64/stubs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,11 @@ void stack_trace()
return;
}

size_t stack_trace_get(unsigned long *stack, unsigned long *pcs, size_t nr_pcs)
{
return 0;
}

uint64_t get_posix_time_early()
{
return 0;
Expand Down
5 changes: 5 additions & 0 deletions kernel/arch/riscv64/debug.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,3 +98,8 @@ void stack_trace(void)
return;
stack_trace_ex((unsigned long *) __builtin_frame_address(1));
}

size_t stack_trace_get(unsigned long *stack, unsigned long *pcs, size_t nr_pcs)
{
return 0;
}
27 changes: 26 additions & 1 deletion kernel/arch/x86_64/apic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
#include <onyx/x86/pit.h>
#include <onyx/x86/tsc.h>

//#define CONFIG_APIC_PERIODIC
// #define CONFIG_APIC_PERIODIC

volatile uint32_t *bsp_lapic = NULL;
volatile uint64_t core_stack = 0;
Expand Down Expand Up @@ -716,6 +716,31 @@ void apic_send_ipi(uint8_t id, uint32_t type, uint32_t page)
spin_unlock_irqrestore(lock, cpu_flags);
}

void apic_send_ipi_all(uint32_t type, uint32_t page)
{
struct spinlock *lock = get_per_cpu_ptr(ipi_lock);
unsigned long cpu_flags = spin_lock_irqsave(lock);

volatile uint32_t *this_lapic = get_per_cpu(lapic);

if (unlikely(!this_lapic))
{
/* If we don't have a lapic yet, just return because we're in early boot
* and we don't need that right now.
*/
return;
}

while (lapic_read(this_lapic, LAPIC_ICR) & (1 << 12))
cpu_relax();

uint64_t icr = 2 << 18 | type << 8 | (page & 0xff);
icr |= (1 << 14);
lapic_write(this_lapic, LAPIC_ICR, (uint32_t) icr);

spin_unlock_irqrestore(lock, cpu_flags);
}

bool apic_send_sipi_and_wait(uint8_t lapicid, struct smp_header *s)
{
boot_send_ipi(lapicid, ICR_DELIVERY_SIPI, 0);
Expand Down
1 change: 1 addition & 0 deletions kernel/arch/x86_64/entry.S
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@
#define KERNEL_DS 0x10

.global syscall_ENTRY64
.type syscall_ENTRY64,@function
syscall_ENTRY64:

.cfi_startproc
Expand Down
2 changes: 1 addition & 1 deletion kernel/arch/x86_64/idt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ void idt_init(void)
x86_reserve_vector(31, isr31);

const unsigned int to_reserve[] = {X86_MESSAGE_VECTOR, X86_RESCHED_VECTOR, X86_SYNC_CALL_VECTOR,
255};
X86_PERFPROBE, 255};
unsigned int len = sizeof(to_reserve) / sizeof(unsigned int);

for (unsigned int i = 0; i < len; i++)
Expand Down
18 changes: 12 additions & 6 deletions kernel/arch/x86_64/interrupts.S
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2016 - 2021 Pedro Falcato
* Copyright (c) 2016 - 2022 Pedro Falcato
* This file is part of Onyx, and is released under the terms of the MIT License
* check LICENSE at the root directory for more information
*
Expand Down Expand Up @@ -120,6 +120,7 @@

.macro ISR_NOERRCODE num
.global isr\num
.type isr\num,@function
isr\num:
cli
sub $8, %rsp
Expand All @@ -134,6 +135,7 @@ isr\num:

.macro IRQ irq_num
.global irq\irq_num
.type irq\irq_num,@ function
irq\irq_num:
cli
sub $8, %rsp
Expand Down Expand Up @@ -169,6 +171,7 @@ x86_interrupt_ret:
2:
INTERRUPT_STACK_RESTORE
.global x86_scheduler_exit
.type x86_scheduler_exit,@function
x86_scheduler_exit:
/* We'll use this bit of code as a trampoline to the new thread too */
mov REGISTER_OFF_CS(%rsp), %rax
Expand All @@ -191,6 +194,7 @@ x86_scheduler_exit:
call handle_signal
jmp 2b

.type x86_interrupt_common,@function
x86_interrupt_common:
.cfi_startproc
pushaq
Expand All @@ -208,15 +212,15 @@ x86_interrupt_common:
mov REGISTER_OFF_CS(%rsp), %rax
test $3, %rax

jz .L3
jz 1f
swapgs
.L3:
1:
mov %rsp, %rdi
mov %rdi, %rbp

.cfi_def_cfa_register rbp
# End the stack frame list so we stop here
#xor %rbp, %rbp
# xor %rbp, %rbp

INTERRUPT_STACK_ALIGN

Expand Down Expand Up @@ -501,6 +505,7 @@ x86_isr_table_end:
.popsection

.global platform_yield
.type platform_yield,@function
platform_yield:
.cfi_startproc
/* Basically we need to set up an IRQ frame on the stack.
Expand All @@ -517,15 +522,15 @@ platform_yield:
pushf /* %rflags */
cli
push $KERNEL_CS /* %cs */
push $.return_label /* %rip */
push $1f /* %rip */
sub $REGISTERS_UNUSED_OFF, %rsp
pushaq /* General purpose registers (%rax - %r15) */
push $KERNEL_DS /* %ds */
mov %rsp, %rdi
call sched_schedule
mov %rax, %rsp
jmp x86_scheduler_exit
.return_label:
1:
pop %rax
pop %rbp

Expand All @@ -534,6 +539,7 @@ platform_yield:


.global __sigret_return
.type __sigret_return,@function
__sigret_return:
cli

Expand Down
2 changes: 1 addition & 1 deletion kernel/arch/x86_64/irq.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2016 - 2021 Pedro Falcato
* Copyright (c) 2016 - 2022 Pedro Falcato
* This file is part of Onyx, and is released under the terms of the MIT License
* check LICENSE at the root directory for more information
*
Expand Down
7 changes: 7 additions & 0 deletions kernel/arch/x86_64/isr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <onyx/exceptions.h>
#include <onyx/panic.h>
#include <onyx/percpu.h>
#include <onyx/perf_probe.h>
#include <onyx/process.h>
#include <onyx/signal.h>
#include <onyx/task_switching.h>
Expand Down Expand Up @@ -564,6 +565,12 @@ extern "C" unsigned long x86_dispatch_interrupt(struct registers *regs)
smp::cpu_handle_sync_calls();
result = INTERRUPT_STACK_ALIGN(regs);
}
else if (vec_no == X86_PERFPROBE)
{
result = INTERRUPT_STACK_ALIGN(regs);
if (perf_probe_is_enabled() && in_kernel_space_regs(regs))
perf_probe_do(regs);
}
else
result = INTERRUPT_STACK_ALIGN(irq_handler(regs));

Expand Down
49 changes: 49 additions & 0 deletions kernel/arch/x86_64/strace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,55 @@ __attribute__((no_sanitize_undefined)) void stack_trace_ex(uint64_t *stack)
}
}

size_t stack_trace_get(unsigned long *stack, unsigned long *pcs, size_t nr_pcs)
{
thread_t *thread = get_current_thread();
size_t unwinds_possible = 0;
if (!thread) // We're still in single tasking mode, just use a safe default
unwinds_possible = DEFAULT_UNWIND_NUMBER; // Early kernel functions don't nest a lot
else
unwinds_possible = 1024; /* It's safe to say the stack won't grow larger than this */

unwinds_possible = min(unwinds_possible, nr_pcs);
uint64_t *rbp = stack;
size_t i;
for (i = 0; i < unwinds_possible; i++)
{
if (thread)
{
if ((uintptr_t) rbp & 0x7)
break;

unsigned long stack_base = ((unsigned long) thread->kernel_stack_top) - 0x4000;

if (rbp >= thread->kernel_stack_top)
break;
if (rbp + 1 >= thread->kernel_stack_top)
break;
if (rbp < (unsigned long *) stack_base)
break;
}

if (!(void *) *(rbp + 1))
break;

auto ip = (unsigned long) *(rbp + 1);
if (ip < VM_HIGHER_HALF)
break;

pcs[i] = ip;

rbp = (uint64_t *) *rbp;
if (!rbp)
break;
}

if (i != unwinds_possible)
pcs[i] = 0;

return i;
}

void stack_trace(void)
{
uint64_t *stack = NULL;
Expand Down
1 change: 1 addition & 0 deletions kernel/include/onyx/cpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,7 @@
#define X86_MESSAGE_VECTOR (130)
#define X86_RESCHED_VECTOR (131)
#define X86_SYNC_CALL_VECTOR (132)
#define X86_PERFPROBE (133)

#define X86_CPU_MANUFACTURER_INTEL 0
#define X86_CPU_MANUFACTURER_AMD 1
Expand Down
64 changes: 64 additions & 0 deletions kernel/include/onyx/perf_probe.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/*
* Copyright (c) 2022 Pedro Falcato
* This file is part of Onyx, and is released under the terms of the MIT License
* check LICENSE at the root directory for more information
*
* SPDX-License-Identifier: MIT
*/
#ifndef _ONYX_PERF_PROBE_H
#define _ONYX_PERF_PROBE_H

#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>

#include <onyx/compiler.h>
#include <onyx/public/perf_probe.h>

struct registers;

size_t stack_trace_get(unsigned long *stack, unsigned long *pcs, size_t nr_pcs);

/**
* @brief Check if CPU perf probing is enabled
*
* @return True if enabled, else false
*/
bool perf_probe_is_enabled();

/**
* @brief Do a CPU perf probe
*
* @param regs Registers
*/
void perf_probe_do(struct registers *regs);

/**
* @brief Check is wait perf probing is enabled
*
* @return True if enabled, else false
*/
bool perf_probe_is_enabled_wait();

/**
* @brief Set up a wait probe. Called right before platform_yield()
*
* @param fge flame_graph_entry, stack allocated
*/
void perf_probe_setup_wait(struct flame_graph_entry *fge);

/**
* @brief Commit the wait probe
*
* @param fge flame_graph_entry, stack allocated
*/
void perf_probe_commit_wait(const struct flame_graph_entry *fge);

/**
* @brief Try to take a trace for the wait probe
*
* @param regs Registers
*/
void perf_probe_try_wait_trace(struct registers *regs);

#endif
36 changes: 36 additions & 0 deletions kernel/include/onyx/public/perf_probe.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/*
* Copyright (c) 2022 Pedro Falcato
* This file is part of Onyx, and is released under the terms of the MIT License
* check LICENSE at the root directory for more information
*
* SPDX-License-Identifier: MIT
*/
#ifndef _ONYX_PUBLIC_PERF_PROBE_H
#define _ONYX_PUBLIC_PERF_PROBE_H

#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>

#define FLAME_GRAPH_FRAMES 32
#define FLAME_GRAPH_NENTRIES 65536

struct flame_graph_entry
{
unsigned long rips[FLAME_GRAPH_FRAMES];
};

struct flame_graph_pcpu
{
uint32_t nentries;
uint32_t windex;
struct flame_graph_entry *fge; /* array of nentries */
int dummy[12];
};

#define PERF_PROBE_ENABLE_DISABLE_CPU 0
#define PERF_PROBE_GET_BUFFER_LENGTH 1
#define PERF_PROBE_READ_DATA 2
#define PERF_PROBE_ENABLE_DISABLE_WAIT 3

#endif
1 change: 1 addition & 0 deletions kernel/include/onyx/x86/apic.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ void apic_timer_smp_init(volatile uint32_t *lapic);
void apic_set_irql(int irql);
int apic_get_irql(void);
void apic_send_ipi(uint8_t id, uint32_t type, uint32_t page);
void apic_send_ipi_all(uint32_t type, uint32_t page);
void lapic_send_eoi(void);
uint32_t apic_get_lapic_id(unsigned int cpu);
void apic_set_lapic_id(unsigned int cpu, uint32_t lapic_id);
Expand Down

0 comments on commit 16e312e

Please sign in to comment.