From a3bf93ea0eab0486afb19da28ea6399b488fde9d Mon Sep 17 00:00:00 2001 From: "andy.boot" Date: Thu, 12 Aug 2021 12:21:35 +0100 Subject: [PATCH 1/3] Feature: Adding file types filter & F flag changed -t = Show summary of types -e = Filter by regex allows you to specify a file type like -e "\.txt$" Change behaviour of '-f' flag - it now counts only files. Before it counted files & directories. This was needed for compatibility with the new '-e' filter flag --- Cargo.lock | 33 ++++++++++--------- Cargo.toml | 1 + README.md | 2 ++ src/dir_walker.rs | 16 +++++++++ src/display.rs | 4 --- src/filter.rs | 80 ++++++++++++++++++++++++++++++++++++++++++--- src/main.rs | 57 ++++++++++++++++++++++++++++---- src/node.rs | 23 ++++++++++--- src/utils.rs | 20 ++++++++---- tests/test_flags.rs | 30 +++++++++++++++-- 10 files changed, 221 insertions(+), 45 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index cf90261c..d9e2c9aa 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -31,9 +31,9 @@ dependencies = [ [[package]] name = "assert_cmd" -version = "1.0.7" +version = "1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d20831bd004dda4c7c372c19cdabff369f794a95e955b3f13fe460e3e1ae95f" +checksum = "c98233c6673d8601ab23e77eb38f999c51100d46c5703b17288c57fddf3a1ffe" dependencies = [ "bstr", "doc-comment", @@ -62,9 +62,9 @@ checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" [[package]] name = "bitflags" -version = "1.2.1" +version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bstr" @@ -111,9 +111,9 @@ dependencies = [ [[package]] name = "crossbeam-deque" -version = "0.8.0" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94af6efb46fef72616855b036a624cf27ba656ffc9be1b9a3c931cfc7749a9a9" +checksum = "6455c0ca19f0d2fbf751b908d5c55c1f5cbc65e03c4225427254b46890bdde1e" dependencies = [ "cfg-if", "crossbeam-epoch", @@ -164,6 +164,7 @@ dependencies = [ "clap", "lscolors", "rayon", + "regex", "stfu8", "tempfile", "terminal_size", @@ -215,9 +216,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.98" +version = "0.2.101" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "320cfe77175da3a483efed4bc0adc1968ca050b098ce4f2f1c13a56626128790" +checksum = "3cb00336871be5ed2c8ed44b60ae9959dc5b9f08539422ed43f09e34ecaeba21" [[package]] name = "lscolors" @@ -230,9 +231,9 @@ dependencies = [ [[package]] name = "memchr" -version = "2.4.0" +version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b16bd47d9e329435e309c58469fe0791c2d0d1ba96ec0954152a5ae2b04387dc" +checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a" [[package]] name = "memoffset" @@ -261,9 +262,9 @@ checksum = "ac74c624d6b2d21f425f752262f42188365d7b8ff1aff74c82e45136510a4857" [[package]] name = "predicates" -version = "2.0.0" +version = "2.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6e46ca79eb4e21e2ec14430340c71250ab69332abf85521c95d3a8bc336aa76" +checksum = "c143348f141cc87aab5b950021bac6145d0e5ae754b0591de23244cee42c9308" dependencies = [ "difflib", "itertools", @@ -278,9 +279,9 @@ checksum = "57e35a3326b75e49aa85f5dc6ec15b41108cf5aee58eabb1f274dd18b73c2451" [[package]] name = "predicates-tree" -version = "1.0.2" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15f553275e5721409451eb85e15fd9a860a6e5ab4496eb215987502b5f5391f2" +checksum = "d7dd0fd014130206c9352efbdc92be592751b2b9274dff685348341082c6ea3d" dependencies = [ "predicates-core", "treeline", @@ -353,9 +354,9 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.2.9" +version = "0.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ab49abadf3f9e1c4bc499e8845e152ad87d2ad2d30371841171169e9d75feee" +checksum = "8383f39639269cde97d255a32bdb68c047337295414940c68bdd30c2e13203ff" dependencies = [ "bitflags", ] diff --git a/Cargo.toml b/Cargo.toml index deef3788..85ca6029 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,6 +29,7 @@ unicode-width = "0.1" rayon="1" thousands = "0.2" stfu8 = "0.2" +regex = "1" [target.'cfg(windows)'.dependencies] winapi-util = "0.1" diff --git a/README.md b/README.md index 14061aa9..66fca612 100644 --- a/README.md +++ b/README.md @@ -57,6 +57,8 @@ Usage: dust -b (do not show percentages or draw ASCII bars) Usage: dust -i (do not show hidden files) Usage: dust -c (No colors [monochrome]) Usage: dust -f (Count files instead of diskspace) +Usage: dust -t Group by filetype +Usage: dust -e regex Only include files matching this regex (eg dust -e "\.png$" would match png files) ``` diff --git a/src/dir_walker.rs b/src/dir_walker.rs index 7b6ba084..5dc48897 100644 --- a/src/dir_walker.rs +++ b/src/dir_walker.rs @@ -1,8 +1,10 @@ use std::fs; use crate::node::Node; +use crate::utils::is_filtered_out_due_to_regex; use rayon::iter::ParallelBridge; use rayon::prelude::ParallelIterator; +use regex::Regex; use std::path::PathBuf; use std::sync::atomic; @@ -17,6 +19,7 @@ use crate::platform::get_metadata; pub struct WalkData { pub ignore_directories: HashSet, + pub filter_regex: Option, pub allowed_filesystems: HashSet, pub use_apparent_size: bool, pub by_filecount: bool, @@ -84,6 +87,15 @@ fn ignore_file(entry: &DirEntry, walk_data: &WalkData) -> bool { } } } + + // Keeping `walk_data.filter_regex.is_some()` is important for performance reasons, it stops unnecessary work + if walk_data.filter_regex.is_some() + && entry.path().is_file() + && is_filtered_out_due_to_regex(&walk_data.filter_regex, &entry.path()) + { + return true; + } + (is_dot_file && walk_data.ignore_hidden) || is_ignored_path } @@ -110,8 +122,10 @@ fn walk(dir: PathBuf, permissions_flag: &AtomicBool, walk_data: &WalkData) -> Op return build_node( entry.path(), vec![], + &walk_data.filter_regex, walk_data.use_apparent_size, data.is_symlink(), + data.is_file(), walk_data.by_filecount, ); } @@ -128,8 +142,10 @@ fn walk(dir: PathBuf, permissions_flag: &AtomicBool, walk_data: &WalkData) -> Op build_node( dir, children, + &walk_data.filter_regex, walk_data.use_apparent_size, false, + false, walk_data.by_filecount, ) } diff --git a/src/display.rs b/src/display.rs index 5477212b..e9806732 100644 --- a/src/display.rs +++ b/src/display.rs @@ -107,7 +107,6 @@ impl DrawData<'_> { #[allow(clippy::too_many_arguments)] pub fn draw_it( - permission_error: bool, use_full_path: bool, is_reversed: bool, no_colors: bool, @@ -116,9 +115,6 @@ pub fn draw_it( by_filecount: bool, option_root_node: Option, ) { - if permission_error { - eprintln!("Did not have permissions for all directories"); - } if option_root_node.is_none() { return; } diff --git a/src/filter.rs b/src/filter.rs index 623abe37..754bbd81 100644 --- a/src/filter.rs +++ b/src/filter.rs @@ -1,6 +1,7 @@ use crate::display_node::DisplayNode; use crate::node::Node; use std::collections::BinaryHeap; +use std::collections::HashMap; use std::collections::HashSet; use std::path::PathBuf; @@ -13,7 +14,11 @@ pub fn get_by_depth(top_level_nodes: Vec, n: usize) -> Option Some(build_by_depth(&root, n - 1)) } -pub fn get_biggest(top_level_nodes: Vec, n: usize) -> Option { +pub fn get_biggest( + top_level_nodes: Vec, + n: usize, + using_file_type_filter: bool, +) -> Option { if top_level_nodes.is_empty() { // perhaps change this, bring back Error object? return None; @@ -22,18 +27,17 @@ pub fn get_biggest(top_level_nodes: Vec, n: usize) -> Option let mut heap = BinaryHeap::new(); let number_top_level_nodes = top_level_nodes.len(); let root = get_new_root(top_level_nodes); - - root.children.iter().for_each(|c| heap.push(c)); - let mut allowed_nodes = HashSet::new(); + allowed_nodes.insert(&root.name); + heap = add_children(using_file_type_filter, &root, heap); for _ in number_top_level_nodes..n { let line = heap.pop(); match line { Some(line) => { - line.children.iter().for_each(|c| heap.push(c)); allowed_nodes.insert(&line.name); + heap = add_children(using_file_type_filter, line, heap); } None => break, } @@ -41,6 +45,72 @@ pub fn get_biggest(top_level_nodes: Vec, n: usize) -> Option recursive_rebuilder(&allowed_nodes, &root) } +pub fn get_all_file_types(top_level_nodes: Vec, n: usize) -> Option { + let mut map: HashMap = HashMap::new(); + build_by_all_file_types(top_level_nodes, &mut map); + let mut by_types: Vec = map.into_iter().map(|(_k, v)| v).collect(); + by_types.sort(); + by_types.reverse(); + + let displayed = if by_types.len() <= n { + by_types + } else { + let (displayed, rest) = by_types.split_at(if n > 1 { n - 1 } else { 1 }); + let remaining = DisplayNode { + name: PathBuf::from("(others)"), + size: rest.iter().map(|a| a.size).sum(), + children: vec![], + }; + + let mut displayed = displayed.to_vec(); + displayed.push(remaining); + displayed + }; + + let result = DisplayNode { + name: PathBuf::from("(total)"), + size: displayed.iter().map(|a| a.size).sum(), + children: displayed, + }; + Some(result) +} + +fn add_children<'a>( + using_file_type_filter: bool, + line: &'a Node, + mut heap: BinaryHeap<&'a Node>, +) -> BinaryHeap<&'a Node> { + if using_file_type_filter { + line.children.iter().for_each(|c| { + if c.name.is_file() || c.size > 0 { + heap.push(c) + } + }); + } else { + line.children.iter().for_each(|c| heap.push(c)); + } + heap +} + +fn build_by_all_file_types(top_level_nodes: Vec, counter: &mut HashMap) { + for node in top_level_nodes { + if node.name.is_file() { + let ext = node.name.extension(); + let key: String = match ext { + Some(e) => ".".to_string() + &e.to_string_lossy(), + None => "(no extension)".into(), + }; + let mut display_node = counter.entry(key.clone()).or_insert(DisplayNode { + name: PathBuf::from(key), + size: 0, + children: vec![], + }); + display_node.size += node.size; + } + build_by_all_file_types(node.children, counter) + } +} + fn build_by_depth(node: &Node, depth: usize) -> DisplayNode { let new_children = { if depth == 0 { diff --git a/src/main.rs b/src/main.rs index 4601a3fe..73cacf9a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,15 +1,18 @@ #[macro_use] extern crate clap; extern crate rayon; +extern crate regex; extern crate unicode_width; use std::collections::HashSet; +use std::process; use self::display::draw_it; use clap::{App, AppSettings, Arg}; use dir_walker::walk_it; use dir_walker::WalkData; -use filter::{get_biggest, get_by_depth}; +use filter::{get_all_file_types, get_biggest, get_by_depth}; +use regex::Regex; use std::cmp::max; use std::path::PathBuf; use terminal_size::{terminal_size, Height, Width}; @@ -151,6 +154,23 @@ fn main() { .long("ignore_hidden") .help("Do not display hidden files"), ) + .arg( + Arg::with_name("filter") + .short("e") + .long("filter") + .takes_value(true) + .number_of_values(1) + .multiple(true) + .conflicts_with("types") + .help("Only include files matching this regex. For png files type: -e \"\\.png$\" "), + ) + .arg( + Arg::with_name("types") + .short("t") + .long("file_types") + .conflicts_with("depth") + .help("show only these file types"), + ) .arg( Arg::with_name("width") .short("w") @@ -169,6 +189,20 @@ fn main() { } }; + let summarize_file_types = options.is_present("types"); + + let maybe_filter = if options.is_present("filter") { + match Regex::new(options.value_of("filter").unwrap()) { + Ok(r) => Some(r), + Err(e) => { + eprintln!("Ignoring bad value for filter {:?}", e); + process::exit(1); + } + } + } else { + None + }; + let number_of_lines = match value_t!(options.value_of("number_of_lines"), usize) { Ok(v) => v, Err(_) => { @@ -217,23 +251,34 @@ fn main() { let walk_data = WalkData { ignore_directories: ignored_full_path, + filter_regex: maybe_filter, allowed_filesystems, use_apparent_size, by_filecount, ignore_hidden, }; - let (nodes, errors) = walk_it(simplified_dirs, walk_data); + let (top_level_nodes, has_errors) = walk_it(simplified_dirs, walk_data); let tree = { - match depth { - None => get_biggest(nodes, number_of_lines), - Some(depth) => get_by_depth(nodes, depth), + match (depth, summarize_file_types) { + (_, true) => get_all_file_types(top_level_nodes, number_of_lines), + (Some(depth), _) => get_by_depth(top_level_nodes, depth), + (_, _) => get_biggest( + top_level_nodes, + number_of_lines, + options.values_of("filter").is_some(), + ), } }; + if options.is_present("filter") { + println!("Filtering by: {}", options.value_of("filter").unwrap()); + } + if has_errors { + eprintln!("Did not have permissions for all directories"); + } draw_it( - errors, options.is_present("display_full_paths"), !options.is_present("reverse"), no_colors, diff --git a/src/node.rs b/src/node.rs index 320c077c..ff391ab4 100644 --- a/src/node.rs +++ b/src/node.rs @@ -1,5 +1,7 @@ use crate::platform::get_metadata; +use crate::utils::is_filtered_out_due_to_regex; +use regex::Regex; use std::cmp::Ordering; use std::path::PathBuf; @@ -14,18 +16,29 @@ pub struct Node { pub fn build_node( dir: PathBuf, children: Vec, + filter_regex: &Option, use_apparent_size: bool, is_symlink: bool, + is_file: bool, by_filecount: bool, ) -> Option { match get_metadata(&dir, use_apparent_size) { Some(data) => { - let (size, inode_device) = if by_filecount { - (1, data.1) - } else if is_symlink && !use_apparent_size { - (0, None) + let inode_device = if is_symlink && !use_apparent_size { + None } else { - data + data.1 + }; + + let size = if is_filtered_out_due_to_regex(filter_regex, &dir) + || (is_symlink && !use_apparent_size) + || by_filecount && !is_file + { + 0 + } else if by_filecount { + 1 + } else { + data.0 }; Some(Node { diff --git a/src/utils.rs b/src/utils.rs index 44052d18..cf91b86f 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -3,12 +3,7 @@ use std::collections::HashSet; use std::path::{Path, PathBuf}; use crate::platform; - -fn is_a_parent_of>(parent: P, child: P) -> bool { - let parent = parent.as_ref(); - let child = child.as_ref(); - child.starts_with(parent) && !parent.starts_with(child) -} +use regex::Regex; pub fn simplify_dir_names>(filenames: Vec

) -> HashSet { let mut top_level_names: HashSet = HashSet::with_capacity(filenames.len()); @@ -62,6 +57,19 @@ pub fn normalize_path>(path: P) -> PathBuf { path.as_ref().components().collect::() } +pub fn is_filtered_out_due_to_regex(filter_regex: &Option, dir: &Path) -> bool { + match filter_regex { + Some(fr) => !fr.is_match(&dir.as_os_str().to_string_lossy()), + None => false, + } +} + +fn is_a_parent_of>(parent: P, child: P) -> bool { + let parent = parent.as_ref(); + let child = child.as_ref(); + child.starts_with(parent) && !parent.starts_with(child) +} + mod tests { #[allow(unused_imports)] use super::*; diff --git a/tests/test_flags.rs b/tests/test_flags.rs index 6e9db388..5518552b 100644 --- a/tests/test_flags.rs +++ b/tests/test_flags.rs @@ -60,7 +60,7 @@ pub fn test_d_flag_works_and_still_recurses_down() { // We had a bug where running with '-d 1' would stop at the first directory and the code // would fail to recurse down let output = build_command(vec!["-d", "1", "-f", "-c", "tests/test_dir2/"]); - assert!(output.contains("7 ┌─┴ test_dir2")); + assert!(output.contains("4 ┌─┴ test_dir2")); } // Check against directories and files whos names are substrings of each other @@ -97,8 +97,8 @@ pub fn test_number_of_files() { let output = build_command(vec!["-c", "-f", "tests/test_dir"]); assert!(output.contains("1 ┌── a_file ")); assert!(output.contains("1 ├── hello_file")); - assert!(output.contains("3 ┌─┴ many")); - assert!(output.contains("4 ┌─┴ test_dir")); + assert!(output.contains("2 ┌─┴ many")); + assert!(output.contains("2 ┌─┴ test_dir")); } #[cfg_attr(target_os = "windows", ignore)] @@ -116,3 +116,27 @@ pub fn test_apparent_size() { let incorrect_apparent_size = "4.0K ├── hello_file"; assert!(!output.contains(incorrect_apparent_size)); } + +#[test] +pub fn test_show_files_by_type() { + // Check we can list files by type + let output = build_command(vec!["-c", "-t", "tests"]); + assert!(output.contains(" .unicode")); + assert!(output.contains(" .japan")); + assert!(output.contains(" .rs")); + assert!(output.contains(" (no extension)")); + assert!(output.contains("┌─┴ (total)")); +} + +#[test] +pub fn test_show_files_by_specific_type() { + // Check we can see '.rs' files in the tests directory + let output = build_command(vec!["-c", "-e", "\\.rs$", "tests"]); + assert!(output.contains(" ┌─┴ tests")); + assert!(!output.contains("0B ┌── tests")); + assert!(!output.contains("0B ┌─┴ tests")); + + // Check there are no '.bad_type' files in the tests directory + let output = build_command(vec!["-c", "-e", "bad_regex", "tests"]); + assert!(output.contains("0B ┌── tests")); +} From b6df5c65f94c9485d3211ab0b8721533c6145b07 Mon Sep 17 00:00:00 2001 From: "andy.boot" Date: Sun, 5 Sep 2021 09:50:45 +0100 Subject: [PATCH 2/3] Feature: Filter by invert_filter: reverse match Mimic grep's -v option. Allows dust to only match files that do not match the given filter --- src/dir_walker.rs | 11 +++++++++++ src/filter.rs | 10 +++++----- src/main.rs | 45 ++++++++++++++++++++++++++++++++------------- src/node.rs | 4 ++++ src/utils.rs | 7 +++++++ tests/test_flags.rs | 21 ++++++++++++++++++--- 6 files changed, 77 insertions(+), 21 deletions(-) diff --git a/src/dir_walker.rs b/src/dir_walker.rs index 5dc48897..939ae781 100644 --- a/src/dir_walker.rs +++ b/src/dir_walker.rs @@ -1,6 +1,7 @@ use std::fs; use crate::node::Node; +use crate::utils::is_filtered_out_due_to_invert_regex; use crate::utils::is_filtered_out_due_to_regex; use rayon::iter::ParallelBridge; use rayon::prelude::ParallelIterator; @@ -20,6 +21,7 @@ use crate::platform::get_metadata; pub struct WalkData { pub ignore_directories: HashSet, pub filter_regex: Option, + pub invert_filter_regex: Option, pub allowed_filesystems: HashSet, pub use_apparent_size: bool, pub by_filecount: bool, @@ -96,6 +98,13 @@ fn ignore_file(entry: &DirEntry, walk_data: &WalkData) -> bool { return true; } + if walk_data.invert_filter_regex.is_some() + && entry.path().is_file() + && is_filtered_out_due_to_invert_regex(&walk_data.invert_filter_regex, &entry.path()) + { + return true; + } + (is_dot_file && walk_data.ignore_hidden) || is_ignored_path } @@ -123,6 +132,7 @@ fn walk(dir: PathBuf, permissions_flag: &AtomicBool, walk_data: &WalkData) -> Op entry.path(), vec![], &walk_data.filter_regex, + &walk_data.invert_filter_regex, walk_data.use_apparent_size, data.is_symlink(), data.is_file(), @@ -143,6 +153,7 @@ fn walk(dir: PathBuf, permissions_flag: &AtomicBool, walk_data: &WalkData) -> Op dir, children, &walk_data.filter_regex, + &walk_data.invert_filter_regex, walk_data.use_apparent_size, false, false, diff --git a/src/filter.rs b/src/filter.rs index 754bbd81..70fc299e 100644 --- a/src/filter.rs +++ b/src/filter.rs @@ -17,7 +17,7 @@ pub fn get_by_depth(top_level_nodes: Vec, n: usize) -> Option pub fn get_biggest( top_level_nodes: Vec, n: usize, - using_file_type_filter: bool, + using_a_filter: bool, ) -> Option { if top_level_nodes.is_empty() { // perhaps change this, bring back Error object? @@ -30,14 +30,14 @@ pub fn get_biggest( let mut allowed_nodes = HashSet::new(); allowed_nodes.insert(&root.name); - heap = add_children(using_file_type_filter, &root, heap); + heap = add_children(using_a_filter, &root, heap); for _ in number_top_level_nodes..n { let line = heap.pop(); match line { Some(line) => { allowed_nodes.insert(&line.name); - heap = add_children(using_file_type_filter, line, heap); + heap = add_children(using_a_filter, line, heap); } None => break, } @@ -76,11 +76,11 @@ pub fn get_all_file_types(top_level_nodes: Vec, n: usize) -> Option( - using_file_type_filter: bool, + using_a_filter: bool, line: &'a Node, mut heap: BinaryHeap<&'a Node>, ) -> BinaryHeap<&'a Node> { - if using_file_type_filter { + if using_a_filter { line.children.iter().for_each(|c| { if c.name.is_file() || c.size > 0 { heap.push(c) diff --git a/src/main.rs b/src/main.rs index 73cacf9a..0ffccfb6 100644 --- a/src/main.rs +++ b/src/main.rs @@ -73,6 +73,19 @@ fn get_width_of_terminal() -> usize { } } +fn get_regex_value(maybe_value: Option<&str>) -> Option { + match maybe_value { + Some(v) => match Regex::new(v) { + Ok(r) => Some(r), + Err(e) => { + eprintln!("Ignoring bad value for regex {:?}", e); + process::exit(1); + } + }, + None => None, + } +} + fn main() { let default_height = get_height_of_terminal(); let def_num_str = default_height.to_string(); @@ -151,9 +164,21 @@ fn main() { .arg( Arg::with_name("ignore_hidden") .short("i") // Do not use 'h' this is used by 'help' - .long("ignore_hidden") + .long("ignore_hidden") //TODO: fix change - -> _ .help("Do not display hidden files"), ) + .arg( + Arg::with_name("invert_filter") + .short("v") + .long("invert-filter") + .takes_value(true) + .number_of_values(1) + .multiple(true) + .conflicts_with("filter") + .conflicts_with("types") + .conflicts_with("depth") + .help("Exclude files matching this regex. To ignore png files type: -v \"\\.png$\" "), + ) .arg( Arg::with_name("filter") .short("e") @@ -162,6 +187,7 @@ fn main() { .number_of_values(1) .multiple(true) .conflicts_with("types") + .conflicts_with("depth") .help("Only include files matching this regex. For png files type: -e \"\\.png$\" "), ) .arg( @@ -191,17 +217,8 @@ fn main() { let summarize_file_types = options.is_present("types"); - let maybe_filter = if options.is_present("filter") { - match Regex::new(options.value_of("filter").unwrap()) { - Ok(r) => Some(r), - Err(e) => { - eprintln!("Ignoring bad value for filter {:?}", e); - process::exit(1); - } - } - } else { - None - }; + let maybe_filter = get_regex_value(options.value_of("filter")); + let maybe_invert_filter = get_regex_value(options.value_of("invert_filter")); let number_of_lines = match value_t!(options.value_of("number_of_lines"), usize) { Ok(v) => v, @@ -252,6 +269,7 @@ fn main() { let walk_data = WalkData { ignore_directories: ignored_full_path, filter_regex: maybe_filter, + invert_filter_regex: maybe_invert_filter, allowed_filesystems, use_apparent_size, by_filecount, @@ -267,7 +285,8 @@ fn main() { (_, _) => get_biggest( top_level_nodes, number_of_lines, - options.values_of("filter").is_some(), + options.values_of("filter").is_some() + || options.value_of("invert_filter").is_some(), ), } }; diff --git a/src/node.rs b/src/node.rs index ff391ab4..d30b7ef5 100644 --- a/src/node.rs +++ b/src/node.rs @@ -1,4 +1,5 @@ use crate::platform::get_metadata; +use crate::utils::is_filtered_out_due_to_invert_regex; use crate::utils::is_filtered_out_due_to_regex; use regex::Regex; @@ -13,10 +14,12 @@ pub struct Node { pub inode_device: Option<(u64, u64)>, } +#[allow(clippy::too_many_arguments)] pub fn build_node( dir: PathBuf, children: Vec, filter_regex: &Option, + invert_filter_regex: &Option, use_apparent_size: bool, is_symlink: bool, is_file: bool, @@ -31,6 +34,7 @@ pub fn build_node( }; let size = if is_filtered_out_due_to_regex(filter_regex, &dir) + || is_filtered_out_due_to_invert_regex(invert_filter_regex, &dir) || (is_symlink && !use_apparent_size) || by_filecount && !is_file { diff --git a/src/utils.rs b/src/utils.rs index cf91b86f..51bef5d5 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -64,6 +64,13 @@ pub fn is_filtered_out_due_to_regex(filter_regex: &Option, dir: &Path) -> } } +pub fn is_filtered_out_due_to_invert_regex(filter_regex: &Option, dir: &Path) -> bool { + match filter_regex { + Some(fr) => fr.is_match(&dir.as_os_str().to_string_lossy()), + None => false, + } +} + fn is_a_parent_of>(parent: P, child: P) -> bool { let parent = parent.as_ref(); let child = child.as_ref(); diff --git a/tests/test_flags.rs b/tests/test_flags.rs index 5518552b..e7cab291 100644 --- a/tests/test_flags.rs +++ b/tests/test_flags.rs @@ -129,14 +129,29 @@ pub fn test_show_files_by_type() { } #[test] -pub fn test_show_files_by_specific_type() { +pub fn test_show_files_by_regex() { // Check we can see '.rs' files in the tests directory let output = build_command(vec!["-c", "-e", "\\.rs$", "tests"]); assert!(output.contains(" ┌─┴ tests")); assert!(!output.contains("0B ┌── tests")); assert!(!output.contains("0B ┌─┴ tests")); - // Check there are no '.bad_type' files in the tests directory - let output = build_command(vec!["-c", "-e", "bad_regex", "tests"]); + // Check there are no files named: '.match_nothing' in the tests directory + let output = build_command(vec!["-c", "-e", "match_nothing$", "tests"]); assert!(output.contains("0B ┌── tests")); } + +#[test] +pub fn test_show_files_by_invert_regex() { + let output = build_command(vec!["-c", "-f", "-v", "e", "tests/test_dir2"]); + // There are 0 files without 'e' in the name + assert!(output.contains("0 ┌── test_dir2")); + + let output = build_command(vec!["-c", "-f", "-v", "a", "tests/test_dir2"]); + // There are 2 files without 'a' in the name + assert!(output.contains("2 ┌─┴ test_dir2")); + + // There are 4 files in the test_dir2 hierarchy + let output = build_command(vec!["-c", "-f", "-v", "match_nothing$", "tests/test_dir2"]); + assert!(output.contains("4 ┌─┴ test_dir2")); +} From 8b6ac81cb1ebe1ad59c9680136f98a094d066d8b Mon Sep 17 00:00:00 2001 From: "andy.boot" Date: Sun, 19 Sep 2021 08:39:23 +0100 Subject: [PATCH 3/3] Clap: use default_value on input This avoids uses one less 'match' statement --- src/main.rs | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/main.rs b/src/main.rs index 0ffccfb6..5445feda 100644 --- a/src/main.rs +++ b/src/main.rs @@ -205,15 +205,13 @@ fn main() { .number_of_values(1) .help("Specify width of output overriding the auto detection of terminal width"), ) - .arg(Arg::with_name("inputs").multiple(true)) + .arg(Arg::with_name("inputs").multiple(true).default_value(".")) .get_matches(); - let target_dirs = { - match options.values_of("inputs") { - None => vec!["."], - Some(r) => r.collect(), - } - }; + let target_dirs = options + .values_of("inputs") + .expect("Should be a default value here") + .collect(); let summarize_file_types = options.is_present("types");