Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Speed up the calling process query #869

Merged
merged 5 commits into from
Jan 3, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ default-features = false
features = ["parsing", "assets", "yaml-load", "dump-load", "regex-onig"]

[dependencies.sysinfo]
version = "0.22.3"
version = "0.22.4"
# no default features to disable the use of threads
default-features = false
features = []
Expand Down
4 changes: 1 addition & 3 deletions src/handlers/git_show_file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,7 @@ impl<'a> StateMachine<'a> {
self.painter.emit()?;
let mut handled_line = false;
if matches!(self.state, State::Unknown) {
if let Some(process::CallingProcess::GitShow(_, extension)) =
process::calling_process().as_deref()
{
if let process::CallingProcess::GitShow(_, extension) = &*process::calling_process() {
self.state = State::GitShowFile;
self.painter.set_syntax(extension.as_deref());
self.painter.set_highlighter();
Expand Down
11 changes: 5 additions & 6 deletions src/handlers/grep.rs
Original file line number Diff line number Diff line change
Expand Up @@ -246,8 +246,8 @@ fn get_code_style_sections<'b>(
}

fn make_output_config() -> GrepOutputConfig {
match process::calling_process().as_deref() {
Some(process::CallingProcess::GitGrep(command_line))
match &*process::calling_process() {
process::CallingProcess::GitGrep(command_line)
if command_line.short_options.contains("-W")
|| command_line.long_options.contains("--function-context") =>
{
Expand All @@ -265,7 +265,7 @@ fn make_output_config() -> GrepOutputConfig {
pad_line_number: true,
}
}
Some(process::CallingProcess::GitGrep(command_line))
process::CallingProcess::GitGrep(command_line)
if command_line.short_options.contains("-p")
|| command_line.long_options.contains("--show-function") =>
{
Expand Down Expand Up @@ -380,9 +380,8 @@ pub fn parse_grep_line(line: &str) -> Option<GrepLine> {
if line.starts_with('{') {
ripgrep_json::parse_line(line)
} else {
match process::calling_process().as_deref() {
Some(process::CallingProcess::GitGrep(_))
| Some(process::CallingProcess::OtherGrep) => [
match &*process::calling_process() {
process::CallingProcess::GitGrep(_) | process::CallingProcess::OtherGrep => [
&*GREP_LINE_REGEX_ASSUMING_FILE_EXTENSION,
&*GREP_LINE_REGEX_ASSUMING_NO_INTERNAL_SEPARATOR_CHARS,
]
Expand Down
12 changes: 5 additions & 7 deletions src/handlers/hunk.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,11 @@ lazy_static! {
}

fn compute_is_word_diff() -> bool {
match process::calling_process().as_deref() {
Some(
CallingProcess::GitDiff(cmd_line)
| CallingProcess::GitShow(cmd_line, _)
| CallingProcess::GitLog(cmd_line)
| CallingProcess::GitReflog(cmd_line),
) => {
match &*process::calling_process() {
CallingProcess::GitDiff(cmd_line)
| CallingProcess::GitShow(cmd_line, _)
| CallingProcess::GitLog(cmd_line)
| CallingProcess::GitReflog(cmd_line) => {
cmd_line.long_options.contains("--word-diff")
|| cmd_line.long_options.contains("--word-diff-regex")
|| cmd_line.long_options.contains("--color-words")
Expand Down
6 changes: 6 additions & 0 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,12 @@ pub mod errors {

#[cfg(not(tarpaulin_include))]
fn main() -> std::io::Result<()> {
// Do this first because both parsing all the input in `run_app()` and
// listing all processes takes about 50ms on Linux.
// It also improves the chance that the calling process is still around when
// input is piped into delta (e.g. `git show --word-diff=color | delta`).
utils::process::start_determining_calling_process_in_thread();

// Ignore ctrl-c (SIGINT) to avoid leaving an orphaned pager process.
// See https://github.com/dandavison/delta/issues/681
ctrlc::set_handler(|| {})
Expand Down
109 changes: 87 additions & 22 deletions src/utils/process.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
use std::borrow::Cow;
use std::collections::{HashMap, HashSet};
use std::path::Path;
use sysinfo::{Pid, Process, ProcessExt, ProcessRefreshKind, SystemExt};
use std::sync::{Arc, Condvar, Mutex, MutexGuard};

use lazy_static::lazy_static;
use sysinfo::{Pid, Process, ProcessExt, ProcessRefreshKind, SystemExt};

#[derive(Clone, Debug, PartialEq)]
pub enum CallingProcess {
Expand All @@ -13,6 +13,8 @@ pub enum CallingProcess {
GitReflog(CommandLine),
GitGrep(CommandLine),
OtherGrep, // rg, grep, ag, ack, etc
None, // no matching process could be found
Pending, // calling process is currently being determined
}
// TODO: Git blame is currently handled differently

Expand All @@ -23,23 +25,63 @@ pub struct CommandLine {
last_arg: Option<String>,
}

pub fn calling_process() -> Option<Cow<'static, CallingProcess>> {
#[cfg(not(test))]
{
CACHED_CALLING_PROCESS.as_ref().map(Cow::Borrowed)
}
#[cfg(test)]
{
determine_calling_process().map(Cow::Owned)
}
lazy_static! {
static ref CALLER: Arc<(Mutex<CallingProcess>, Condvar)> =
Arc::new((Mutex::new(CallingProcess::Pending), Condvar::new()));
}

lazy_static! {
static ref CACHED_CALLING_PROCESS: Option<CallingProcess> = determine_calling_process();
pub fn start_determining_calling_process_in_thread() {
// The handle is neither kept nor returned nor joined but dropped, so the main
// thread can exit early if it does not need to know its parent process.
std::thread::Builder::new()
.name("find_calling_process".into())
.spawn(move || {
let calling_process = determine_calling_process();

let (caller_mutex, determine_done) = &**CALLER;

let mut caller = caller_mutex.lock().unwrap();
*caller = calling_process;
determine_done.notify_all();
})
.unwrap();
}

#[cfg(not(test))]
pub fn calling_process() -> MutexGuard<'static, CallingProcess> {
let (caller_mutex, determine_done) = &**CALLER;

determine_done
.wait_while(caller_mutex.lock().unwrap(), |caller| {
*caller == CallingProcess::Pending
})
.unwrap()
}

// The return value is duck-typed to work in place of a MutexGuard when testing.
#[cfg(test)]
pub fn calling_process() -> Box<CallingProcess> {
type _UnusedImport = MutexGuard<'static, i8>;

if crate::utils::process::tests::FakeParentArgs::are_set() {
// If the (thread-local) FakeParentArgs are set, then the following command returns
// these, so the cached global real ones can not be used.
Box::new(determine_calling_process())
} else {
let (caller_mutex, _) = &**CALLER;

let mut caller = caller_mutex.lock().unwrap();
if *caller == CallingProcess::Pending {
*caller = determine_calling_process();
}

Box::new(caller.clone())
}
}

fn determine_calling_process() -> Option<CallingProcess> {
fn determine_calling_process() -> CallingProcess {
calling_process_cmdline(ProcInfo::new(), describe_calling_process)
.unwrap_or(CallingProcess::None)
}

// Return value of `extract_args(args: &[String]) -> ProcessArgs<T>` function which is
Expand Down Expand Up @@ -226,6 +268,13 @@ struct ProcInfo {
}
impl ProcInfo {
fn new() -> Self {
// On Linux sysinfo optimizes for repeated process queries and keeps per-process
// /proc file descriptors open. This caching is not needed here, so
// set this to zero (this does nothing on other platforms).
// Also, there is currently a kernel bug which slows down syscalls when threads are
// involved (here: the ctrlc handler) and a lot of files are kept open.
Copy link
Owner

@dandavison dandavison Dec 23, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Out of interest, have you identified this bug in the linux kernel issue tracker?

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So this is what allows us to fearlessly query many processes on linux again? Very interesting to see that you've got to the bottom of this problem.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As the Linux bug tracker suggests, for now I have reported it to the Debian kernel team, see #1001818 for a minimal C and C++ example.

And yes, on Linux this makes process queries as fast as e.g. ps auxf.

sysinfo::set_open_files_limit(0);

ProcInfo {
info: sysinfo::System::new(),
}
Expand Down Expand Up @@ -329,7 +378,11 @@ trait ProcessInterface {
};
iter_parents(self, pid, &mut sum_distance);

Some((length_of_process_chain, args))
if length_of_process_chain == usize::MAX {
None
} else {
Some((length_of_process_chain, args))
}
}
_ => None,
})
Expand Down Expand Up @@ -437,13 +490,8 @@ where

match info.find_sibling_in_refreshed_processes(my_pid, &extract_args) {
None => {
#[cfg(test)]
{
info.refresh_processes();
info.find_sibling_in_refreshed_processes(my_pid, &extract_args)
}
#[cfg(not(test))]
None
info.refresh_processes();
info.find_sibling_in_refreshed_processes(my_pid, &extract_args)
}
some => some,
}
Expand Down Expand Up @@ -555,6 +603,9 @@ pub mod tests {
}
})
}
pub fn are_set() -> bool {
FAKE_ARGS.with(|a| *a.borrow() != TlsState::None)
}
fn error(where_: &str) -> ! {
panic!(
"test logic error (in {}): wrong FakeParentArgs scope?",
Expand Down Expand Up @@ -818,6 +869,20 @@ pub mod tests {
calling_process_cmdline(ProcInfo::new(), guess_git_blame_filename_extension);
}

#[test]
fn test_process_blame_no_parent_found() {
let two_trees = MockProcInfo::with(&[
(2, 100, "-shell", None),
(3, 100, "git blame src/main.rs", Some(2)),
(4, 100, "call_delta.sh", None),
(5, 100, "delta", Some(4)),
]);
assert_eq!(
calling_process_cmdline(two_trees, guess_git_blame_filename_extension),
None
);
}

#[test]
fn test_process_blame_info_with_parent() {
let no_processes = MockProcInfo::with(&[]);
Expand Down