Skip to content

Commit

Permalink
Allow attaching co_firstlineno to frame name (#428)
Browse files Browse the repository at this point in the history
Before this PR, we had 2 modes:
* --function NOT passed: attach the line number from frame->f_lasti to the frame name
* --function passed: do not attach any line number

The main reason to avoid attaching f_lasti is to have frames aggregated by the function name only;
so different "lines" of the same function are aggregated as a single frame.

However, line numbers are useful as identifiers for functions, especially in large files / for common
function names such as "__init__", which are likely to appear multiple times in the same file.
This PR allows attaching code->co_firstlineno instead, which can serve to help in identification,
while not preventing frames of the same function from being aggregated together.

After this PR, we have these options:
* --function, --nolineno NOT passed: same as before - attach f_lasti to the frame name
* --function passed: attach co_firstlineno to the frame name
* --nolineno: do not attach any line number (also, do not spend time on retrieving the line number from
  the remote process).

Closes: #424
  • Loading branch information
Jongy committed Aug 18, 2021
1 parent 581bc04 commit 5ca90c6
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 18 deletions.
23 changes: 20 additions & 3 deletions src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ pub struct Config {
pub dump_locals: u64,
#[doc(hidden)]
pub full_filenames: bool,
#[doc(hidden)]
pub lineno: LineNo,
}

arg_enum!{
Expand All @@ -76,6 +78,13 @@ pub enum RecordDuration {
Seconds(u64)
}

#[derive(Debug, Clone, Eq, PartialEq, Copy)]
pub enum LineNo {
NoLine,
FirstLineNo,
LastInstruction
}

impl Default for Config {
/// Initializes a new Config object with default parameters
#[allow(dead_code)]
Expand All @@ -86,7 +95,7 @@ impl Default for Config {
duration: RecordDuration::Unlimited, native: false,
gil_only: false, include_idle: false, include_thread_ids: false,
hide_progress: false, capture_output: true, dump_json: false, dump_locals: 0, subprocesses: false,
full_filenames: false}
full_filenames: false, lineno: LineNo::LastInstruction }
}
}

Expand Down Expand Up @@ -182,7 +191,10 @@ impl Config {
.arg(Arg::with_name("function")
.short("F")
.long("function")
.help("Aggregate samples by function name instead of by line number"))
.help("Aggregate samples by function's first line number, instead of current line number"))
.arg(Arg::with_name("nolineno")
.long("nolineno")
.help("Do not show line numbers"))
.arg(Arg::with_name("threads")
.short("t")
.long("threads")
Expand Down Expand Up @@ -291,7 +303,7 @@ impl Config {
config.python_program = matches.values_of("python_program").map(|vals| {
vals.map(|v| v.to_owned()).collect()
});
config.show_line_numbers = matches.occurrences_of("function") == 0;
config.show_line_numbers = matches.occurrences_of("nolineno") == 0;
config.include_idle = matches.occurrences_of("idle") > 0;
config.gil_only = matches.occurrences_of("gil") > 0;
config.include_thread_ids = matches.occurrences_of("threads") > 0;
Expand All @@ -302,6 +314,11 @@ impl Config {
config.dump_locals = matches.occurrences_of("locals");
config.subprocesses = matches.occurrences_of("subprocesses") > 0;
config.full_filenames = matches.occurrences_of("full_filenames") > 0;
config.lineno = if matches.occurrences_of("nolineno") > 0 { LineNo::NoLine } else if matches.occurrences_of("function") > 0 { LineNo::FirstLineNo } else { LineNo::LastInstruction };
if matches.occurrences_of("nolineno") > 0 && matches.occurrences_of("function") > 0 {
eprintln!("--function & --nolinenos can't be used together");
std::process::exit(1);
}

config.capture_output = config.command != "record" || matches.occurrences_of("capture") > 0;
if !config.capture_output {
Expand Down
6 changes: 3 additions & 3 deletions src/python_spy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ use proc_maps::{get_process_maps, MapRange};


use crate::binary_parser::{parse_binary, BinaryInfo};
use crate::config::{Config, LockingStrategy};
use crate::config::{Config, LockingStrategy, LineNo};
#[cfg(unwind)]
use crate::native_stack_trace::NativeStack;
use crate::python_bindings::{pyruntime, v2_7_15, v3_3_7, v3_5_5, v3_6_6, v3_7_0, v3_8_0, v3_9_5};
Expand Down Expand Up @@ -223,7 +223,7 @@ impl PythonSpy {
while !threads.is_null() {
// Get the stack trace of the python thread
let thread = self.process.copy_pointer(threads).context("Failed to copy PyThreadState")?;
let mut trace = get_stack_trace(&thread, &self.process, self.config.dump_locals > 0)?;
let mut trace = get_stack_trace(&thread, &self.process, self.config.dump_locals > 0, self.config.lineno)?;

// Try getting the native thread id
let python_thread_id = thread.thread_id();
Expand Down Expand Up @@ -678,7 +678,7 @@ fn check_interpreter_addresses(addrs: &[usize],
};

// as a final sanity check, try getting the stack_traces, and only return if this works
if thread.interp() as usize == addr && get_stack_traces(&interp, process).is_ok() {
if thread.interp() as usize == addr && get_stack_traces(&interp, process, LineNo::NoLine).is_ok() {
return Ok(addr);
}
}
Expand Down
29 changes: 17 additions & 12 deletions src/stack_trace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use serde_derive::Serialize;

use crate::python_interpreters::{InterpreterState, ThreadState, FrameObject, CodeObject, TupleObject};
use crate::python_data_access::{copy_string, copy_bytes};
use crate::config::LineNo;

/// Call stack for a single python thread
#[derive(Debug, Clone, Serialize)]
Expand Down Expand Up @@ -63,14 +64,14 @@ pub struct ProcessInfo {
}

/// Given an InterpreterState, this function returns a vector of stack traces for each thread
pub fn get_stack_traces<I>(interpreter: &I, process: &Process) -> Result<Vec<StackTrace>, Error>
pub fn get_stack_traces<I>(interpreter: &I, process: &Process, lineno: LineNo) -> Result<Vec<StackTrace>, Error>
where I: InterpreterState {
// TODO: deprecate this method
let mut ret = Vec::new();
let mut threads = interpreter.head();
while !threads.is_null() {
let thread = process.copy_pointer(threads).context("Failed to copy PyThreadState")?;
ret.push(get_stack_trace(&thread, process, false)?);
ret.push(get_stack_trace(&thread, process, false, lineno)?);
// This seems to happen occasionally when scanning BSS addresses for valid interpeters
if ret.len() > 4096 {
return Err(format_err!("Max thread recursion depth reached"));
Expand All @@ -81,7 +82,7 @@ pub fn get_stack_traces<I>(interpreter: &I, process: &Process) -> Result<Vec<Sta
}

/// Gets a stack trace for an individual thread
pub fn get_stack_trace<T>(thread: &T, process: &Process, copy_locals: bool) -> Result<StackTrace, Error>
pub fn get_stack_trace<T>(thread: &T, process: &Process, copy_locals: bool, lineno: LineNo) -> Result<StackTrace, Error>
where T: ThreadState {
// TODO: just return frames here? everything else probably should be returned out of scope
let mut frames = Vec::new();
Expand All @@ -93,15 +94,19 @@ pub fn get_stack_trace<T>(thread: &T, process: &Process, copy_locals: bool) -> R
let filename = copy_string(code.filename(), process).context("Failed to copy filename")?;
let name = copy_string(code.name(), process).context("Failed to copy function name")?;

let line = match get_line_number(&code, frame.lasti(), process) {
Ok(line) => line,
Err(e) => {
// Failling to get the line number really shouldn't be fatal here, but
// can happen in extreme cases (https://github.com/benfred/py-spy/issues/164)
// Rather than fail set the linenumber to 0. This is used by the native extensions
// to indicate that we can't load a line number and it should be handled gracefully
warn!("Failed to get line number from {}.{}: {}", filename, name, e);
0
let line = match lineno {
LineNo::NoLine => 0,
LineNo::FirstLineNo => code.first_lineno(),
LineNo::LastInstruction => match get_line_number(&code, frame.lasti(), process) {
Ok(line) => line,
Err(e) => {
// Failling to get the line number really shouldn't be fatal here, but
// can happen in extreme cases (https://github.com/benfred/py-spy/issues/164)
// Rather than fail set the linenumber to 0. This is used by the native extensions
// to indicate that we can't load a line number and it should be handled gracefully
warn!("Failed to get line number from {}.{}: {}", filename, name, e);
0
}
}
};

Expand Down

0 comments on commit 5ca90c6

Please sign in to comment.