Skip to content

Commit

Permalink
add inferno-collapse-ghcprof to handle GHC's prof files (#300)
Browse files Browse the repository at this point in the history
  • Loading branch information
dten committed Sep 16, 2023
1 parent 25811d8 commit 305be42
Show file tree
Hide file tree
Showing 17 changed files with 2,371 additions and 2 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]
### Added

- Added collapse-ghcprof. [#300](https://github.com/jonhoo/inferno/pull/300)

### Changed

### Removed
Expand Down
5 changes: 5 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,11 @@ name = "inferno-collapse-vsprof"
path = "src/bin/collapse-vsprof.rs"
required-features = ["cli"]

[[bin]]
name = "inferno-collapse-ghcprof"
path = "src/bin/collapse-ghcprof.rs"
required-features = ["cli"]

[[bin]]
name = "inferno-collapse-guess"
path = "src/bin/collapse-guess.rs"
Expand Down
84 changes: 84 additions & 0 deletions src/bin/collapse-ghcprof.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
use std::io;
use std::path::PathBuf;

use clap::{ArgAction, ArgGroup, Parser};
use env_logger::Env;
use inferno::collapse::ghcprof::{Folder, Options, Source};
use inferno::collapse::Collapse;

#[derive(Debug, Parser)]
#[clap(
name = "inferno-collapse-ghcprof",
about,
after_help = "\
[1] This processes the .prof output of GHC (Glasgow Haskell Compiler)
"
)]
#[command(group(
ArgGroup::new("source")
.required(false)
.args(["time", "bytes", "ticks"]),
))]
struct Opt {
// ************* //
// *** FLAGS *** //
// ************* //
/// Source stack cost centre from the %time column (individual total % of runtime)
/// (This is the default if no cost centre specified)
#[clap(long = "time")]
time: bool,
/// Source stack cost centre from the bytes column (bytes allocated)
#[clap(long = "bytes")]
bytes: bool,
/// Source stack cost centre from the ticks column (runtime ticks)
#[clap(long = "ticks")]
ticks: bool,

/// Silence all log output
#[clap(short = 'q', long = "quiet")]
quiet: bool,

/// Verbose logging mode (-v, -vv, -vvv)
#[clap(short = 'v', long = "verbose", action = ArgAction::Count)]
verbose: u8,

// ************ //
// *** ARGS *** //
// ************ //
/// ghc .prof output file, or STDIN if not specified
#[clap(value_name = "PATH")]
infile: Option<PathBuf>,
}

impl Opt {
fn into_parts(self) -> (Option<PathBuf>, Options) {
let mut options = Options::default();
options.source = if self.ticks {
Source::Ticks
} else if self.bytes {
Source::Bytes
} else {
Source::PercentTime
};
(self.infile, options)
}
}

fn main() -> io::Result<()> {
let opt = Opt::parse();

// Initialize logger
if !opt.quiet {
env_logger::Builder::from_env(Env::default().default_filter_or(match opt.verbose {
0 => "warn",
1 => "info",
2 => "debug",
_ => "trace",
}))
.format_timestamp(None)
.init();
}

let (infile, options) = opt.into_parts();
Folder::from(options).collapse_file_to_stdout(infile.as_ref())
}
242 changes: 242 additions & 0 deletions src/collapse/ghcprof.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,242 @@
use std::io::{self, BufRead};

use log::warn;

use crate::collapse::common::Occurrences;
use crate::collapse::Collapse;

// These are the identifying words of the callgraph table, note that ticks and bytes columns are optional so not present
static START_LINE: &[&str] = &[
"COST", "CENTRE", "MODULE", "SRC", "no.", "entries", "%time", "%alloc", "%time", "%alloc",
];

/// `ghcprof` folder configuration options.
#[derive(Clone, Debug, Default)]
#[non_exhaustive]
pub struct Options {
/// Column to source associated value from, default is `Source::PercentTime`.
pub source: Source,
}

/// Which prof column to use as the cost centre of the output stacks
#[derive(Clone, Debug, Default)]
#[non_exhaustive]
pub enum Source {
#[default]
/// The indivial %time column representing individual time as a percent of the total
PercentTime,
/// The ticks column representing individual runtime ticks
Ticks,
/// The bytes column representing individual bytes allocated
Bytes,
}

/// A stack collapser for the output of `ghc`'s prof files.
///
/// To construct one, either use `ghcprof::Folder::default()` or create an [`Options`] and use
/// `ghcprof::Folder::from(options)`.
#[derive(Clone, Default)]
pub struct Folder {
/// Cost for the current stack frame.
current_cost: usize,

/// Function on the stack in this entry thus far.
stack: Vec<String>,

opt: Options,
}

// The starting character offset of important columns
#[derive(Debug)]
struct Cols {
cost_centre: usize,
module: usize,
source: usize,
}

impl Collapse for Folder {
fn collapse<R, W>(&mut self, mut reader: R, writer: W) -> io::Result<()>
where
R: io::BufRead,
W: io::Write,
{
// Consume the header...
let mut line = Vec::new();
let cols = loop {
line.clear();
if reader.read_until(b'\n', &mut line)? == 0 {
warn!("File ended before start of call graph");
return Ok(());
};
let l = String::from_utf8_lossy(&line);

if l.split_whitespace()
.take(START_LINE.len())
.eq(START_LINE.iter().cloned())
{
let cost_centre = 0;
let module = l.find("MODULE").unwrap_or(0);
// Pick out these fixed columns, first two are individual only
// "%time %alloc %time %alloc"
// `ticks` and `bytes` columns are optional and might appear on the end
// ticks header is right aligned
// bytes header is right aligned
// - BUT it has a max width of 9 whilst its values can exceed (but are always space separted)
// "%time %alloc %time %alloc ticks bytes"
let source = match self.opt.source {
Source::PercentTime => l
.find("%time")
.expect("%time is present from matching START_LINE"),
// See note above about ticks and bytes columns
Source::Ticks => one_off_end_of_col_before(l.as_ref(), "ticks")?,
Source::Bytes => one_off_end_of_col_before(l.as_ref(), "bytes")?,
};
break Cols {
cost_centre,
module,
source,
};
}
};
// Skip one line
reader.read_until(b'\n', &mut line)?;

// Process the data...
let mut occurrences = Occurrences::new(1);
loop {
line.clear();
if reader.read_until(b'\n', &mut line)? == 0 {
// The format is not expected to contain any blank lines within the callgraph
break;
}
let l = String::from_utf8_lossy(&line);
let line = l.trim_end();
if line.is_empty() {
break;
} else {
self.on_line(line, &mut occurrences, &cols)?;
}
}

// Write the results...
occurrences.write_and_clear(writer)?;

// Reset the state...
self.current_cost = 0;
self.stack.clear();
Ok(())
}

/// Check for start line of a call graph.
fn is_applicable(&mut self, input: &str) -> Option<bool> {
let mut input = input.as_bytes();
let mut line = String::new();
loop {
line.clear();
if let Ok(n) = input.read_line(&mut line) {
if n == 0 {
break;
}
} else {
return Some(false);
}

if line
.split_whitespace()
.take(START_LINE.len())
.eq(START_LINE.iter().cloned())
{
return Some(true);
}
}
None
}
}

fn one_off_end_of_col_before(line: &str, col: &str) -> io::Result<usize> {
let col_start = match line.find(col) {
Some(col_start) => col_start,
_ => return invalid_data_error!("Expected '{col}' column but it was not present"),
};
let col_end = match line[..col_start].rfind(|c: char| !c.is_whitespace()) {
Some(col_end) => col_end,
_ => return invalid_data_error!("Expected a column before '{col}' but there was none"),
};
Ok(col_end + 1)
}

impl From<Options> for Folder {
fn from(opt: Options) -> Self {
Folder {
opt,
..Default::default()
}
}
}

impl Folder {
// Handle call graph lines of the form:
//
// MAIN MAIN ...
// CAF Options.Applicative.Builder ...
// defaultPrefs Options.Applicative.Builder ...
// idm Options.Applicative.Builder ...
// prefs Options.Applicative.Builder ...
// fullDesc Options.Applicative.Builder ...
// hidden Options.Applicative.Builder ...
// option Options.Applicative.Builder ...
// metavar Options.Applicative.Builder ...
// CAF Options.Applicative.Builder.Internal ...
// internal Options.Applicative.Builder.Internal ...
// noGlobal Options.Applicative.Builder.Internal ...
// optionMod Options.Applicative.Builder.Internal ...

fn on_line(
&mut self,
line: &str,
occurrences: &mut Occurrences,
cols: &Cols,
) -> io::Result<()> {
if let Some(indent_chars) = line.find(|c| c != ' ') {
let prev_len = self.stack.len();
let depth = indent_chars;

if depth < prev_len {
// If the line is not a child, pop stack to the stack before the new depth
self.stack.truncate(depth);
} else if depth != prev_len {
return invalid_data_error!("Skipped indentation level at line:\n{}", line);
}
// There can be non-ascii names so take care to char offset not byte offset
let string_range = |col_start: usize| {
line.chars()
.skip(col_start)
.skip_while(|c| c.is_whitespace())
// it is expected that the values to extract do not contain whitespace
// since this is used for functions/modules/costs where it is not allowed
.take_while(|c| !c.is_whitespace())
.collect::<String>()
};
let cost = string_range(cols.source);
if let Ok(cost) = cost.trim().parse::<f64>() {
let func = string_range(cols.cost_centre);
let module = string_range(cols.module);
// The columns we extract costs from all exclude the cost of their children
self.current_cost = match self.opt.source {
// We must `insert_or_add` a `usize` so convert to per-mille to not lose the 1dp
Source::PercentTime => cost * 10.0,
Source::Ticks => cost,
Source::Bytes => cost,
} as usize;
self.stack
.push(format!("{}.{}", module.trim(), func.trim()));
// identical stacks from other threads can appear so need to insert or add
occurrences.insert_or_add(self.stack.join(";"), self.current_cost);
} else {
return invalid_data_error!("Invalid cost field: \"{}\"", cost);
}
}

Ok(())
}
}
6 changes: 4 additions & 2 deletions src/collapse/guess.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use std::io::{self, Cursor};

use log::{error, info};

use crate::collapse::{self, dtrace, perf, sample, vsprof, vtune, Collapse};
use crate::collapse::{self, dtrace, ghcprof, perf, sample, vsprof, vtune, Collapse};

const LINES_PER_ITERATION: usize = 10;

Expand Down Expand Up @@ -70,10 +70,11 @@ impl Collapse for Folder {
let mut sample = sample::Folder::default();
let mut vtune = vtune::Folder::default();
let mut vsprof = vsprof::Folder::default();
let mut ghcprof = ghcprof::Folder::default();

// Each Collapse impl gets its own flag in this array.
// It gets set to true when the impl has been ruled out.
let mut not_applicable = [false; 5];
let mut not_applicable = [false; 6];

let mut buffer = String::new();
loop {
Expand Down Expand Up @@ -108,6 +109,7 @@ impl Collapse for Folder {
try_collapse_impl!(sample, 2);
try_collapse_impl!(vtune, 3);
try_collapse_impl!(vsprof, 4);
try_collapse_impl!(ghcprof, 5);

if eof {
break;
Expand Down
7 changes: 7 additions & 0 deletions src/collapse/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,13 @@ pub mod recursive;
/// [crate-level documentation]: ../../index.html
pub mod vsprof;

/// Stack collapsing for the output of the [GHC's built-in profiler](https://downloads.haskell.org/ghc/latest/docs/users_guide/profiling.html).
///
/// See the [crate-level documentation] for details.
///
/// [crate-level documentation]: ../../index.html
pub mod ghcprof;

use is_terminal::IsTerminal;

// DEFAULT_NTHREADS is public because we use it in the help text of the binaries,
Expand Down
Loading

0 comments on commit 305be42

Please sign in to comment.