Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
184 changes: 170 additions & 14 deletions Cargo.lock

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ path = "src/main.rs"
[dependencies]
ansi_term = "=0.11"
clap = "=2.33"
walkdir = "=2"
jwalk = "0.4"

[dev-dependencies]
assert_cli = "=0.5"
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ djin:git/dust> dust

## Performance

Dust is currently about 4 times slower than du.
Dust uses a parallel fetching implementation that greatly improves performance for directory trees with reasonable amount of files (read more than 20) compared to du. This can be as much as 7x faster than du on a clean cache.

## Alternatives

Expand Down
14 changes: 7 additions & 7 deletions src/display.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,12 @@ impl DisplayData {
}
}

fn get_children_from_node(&self, node: Node) -> impl Iterator<Item = Box<Node>> {
fn get_children_from_node(&self, node: Node) -> impl Iterator<Item = Node> {
if self.is_reversed {
let n: Vec<Box<Node>> = node.children.into_iter().rev().map(|a| a).collect();
return n.into_iter();
let n: Vec<Node> = node.children.into_iter().rev().map(|a| a).collect();
n.into_iter()
} else {
return node.children.into_iter();
node.children.into_iter()
}
}
}
Expand All @@ -82,7 +82,7 @@ pub fn draw_it(permissions: bool, use_full_path: bool, is_reversed: bool, root_n

for c in display_data.get_children_from_node(root_node) {
let first_tree_chars = display_data.get_first_chars();
display_node(*c, true, first_tree_chars, &display_data)
display_node(c, true, first_tree_chars, &display_data)
}
}

Expand All @@ -101,10 +101,10 @@ fn display_node(node: Node, is_biggest: bool, indent: &str, display_data: &Displ

for c in display_data.get_children_from_node(node) {
num_siblings -= 1;
let chars = display_data.get_tree_chars(num_siblings, max_sibling, c.children.len() > 0);
let chars = display_data.get_tree_chars(num_siblings, max_sibling, !c.children.is_empty());
let is_biggest = display_data.is_biggest(num_siblings, max_sibling);
let full_indent = new_indent.clone() + chars;
display_node(*c, is_biggest, &*full_indent, display_data)
display_node(c, is_biggest, &*full_indent, display_data)
}

if display_data.is_reversed {
Expand Down
58 changes: 32 additions & 26 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,13 @@ fn main() {
.help("Depth to show")
.takes_value(true),
)
.arg(
Arg::with_name("threads")
.short("t")
.long("threads")
.help("Number of threads to spawn simultaneously")
.takes_value(true),
)
.arg(
Arg::with_name("number_of_lines")
.short("n")
Expand Down Expand Up @@ -67,19 +74,20 @@ fn main() {
}
};

let depth = {
if options.is_present("depth") {
match value_t!(options.value_of("depth"), u64) {
Ok(v) => Some(v + 1),
Err(_) => {
eprintln!("Ignoring bad value for depth");
None
}
}
} else {
None
}
};
let threads = options.value_of("threads").and_then(|threads| {
threads
.parse::<usize>()
.map_err(|_| eprintln!("Ignoring bad value for threads: {:?}", threads))
.ok()
});

let depth = options.value_of("depth").and_then(|depth| {
depth
.parse::<u64>()
.map(|v| v + 1)
.map_err(|_| eprintln!("Ignoring bad value for depth"))
.ok()
});
if options.is_present("depth") && number_of_lines != DEFAULT_NUMBER_OF_LINES {
eprintln!("Use either -n or -d. Not both");
return;
Expand All @@ -89,7 +97,7 @@ fn main() {
let use_full_path = options.is_present("display_full_paths");

let simplified_dirs = simplify_dir_names(target_dirs);
let (permissions, nodes) = get_dir_tree(&simplified_dirs, use_apparent_size);
let (permissions, nodes) = get_dir_tree(&simplified_dirs, use_apparent_size, threads);
let sorted_data = sort(nodes);
let biggest_ones = {
match depth {
Expand All @@ -109,18 +117,14 @@ fn main() {
}

fn build_tree(biggest_ones: Vec<(String, u64)>, depth: Option<u64>) -> Node {
let mut top_parent = Node {
name: "".to_string(),
size: 0,
children: vec![],
};
let mut top_parent = Node::default();

// assume sorted order
for b in biggest_ones {
let n = Node {
name: b.0,
size: b.1,
children: vec![],
children: Vec::default(),
};
recursively_build_tree(&mut top_parent, n, depth)
}
Expand All @@ -133,13 +137,15 @@ fn recursively_build_tree(parent_node: &mut Node, new_node: Node, depth: Option<
Some(0) => return,
Some(d) => Some(d - 1),
};
for c in parent_node.children.iter_mut() {
if new_node.name.starts_with(&c.name) {
return recursively_build_tree(&mut *c, new_node, new_depth);
}
if let Some(c) = parent_node
.children
.iter_mut()
.find(|c| new_node.name.starts_with(&c.name))
{
recursively_build_tree(c, new_node, new_depth);
} else {
parent_node.children.push(new_node);
}
let temp = Box::<Node>::new(new_node);
parent_node.children.push(temp);
}

#[cfg(test)]
Expand Down
62 changes: 36 additions & 26 deletions src/utils/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,39 +2,38 @@ use std::cmp::Ordering;
use std::collections::HashMap;
use std::collections::HashSet;

use walkdir::WalkDir;
use jwalk::WalkDir;

mod platform;
use self::platform::*;

#[derive(Debug)]
#[derive(Debug, Default)]
pub struct Node {
pub name: String,
pub size: u64,
pub children: Vec<Box<Node>>,
pub children: Vec<Node>,
}

pub fn simplify_dir_names(filenames: Vec<&str>) -> HashSet<String> {
let mut top_level_names: HashSet<String> = HashSet::new();
let mut top_level_names: HashSet<String> = HashSet::with_capacity(filenames.len());
let mut to_remove: Vec<String> = Vec::with_capacity(filenames.len());

for t in filenames {
let top_level_name = ensure_end_slash(t);
let mut can_add = true;
let mut to_remove: Vec<String> = Vec::new();

for tt in top_level_names.iter() {
let temp = tt.to_string();
if top_level_name.starts_with(&temp) {
if top_level_name.starts_with(tt) {
can_add = false;
} else if tt.starts_with(&top_level_name) {
to_remove.push(temp);
to_remove.push(tt.to_string());
}
}
for tr in to_remove {
top_level_names.remove(&tr);
}
to_remove.sort_unstable();
top_level_names.retain(|tr| to_remove.binary_search(tr).is_err());
to_remove.clear();
if can_add {
top_level_names.insert(strip_end_slash(t));
top_level_names.insert(strip_end_slash(t).to_owned());
}
}

Expand All @@ -44,13 +43,21 @@ pub fn simplify_dir_names(filenames: Vec<&str>) -> HashSet<String> {
pub fn get_dir_tree(
top_level_names: &HashSet<String>,
apparent_size: bool,
threads: Option<usize>,
) -> (bool, HashMap<String, u64>) {
let mut permissions = 0;
let mut inodes: HashSet<(u64, u64)> = HashSet::new();
let mut data: HashMap<String, u64> = HashMap::new();

for b in top_level_names.iter() {
examine_dir(&b, apparent_size, &mut inodes, &mut data, &mut permissions);
examine_dir(
&b,
apparent_size,
&mut inodes,
&mut data,
&mut permissions,
threads,
);
}
(permissions == 0, data)
}
Expand All @@ -63,10 +70,9 @@ pub fn ensure_end_slash(s: &str) -> String {
new_name + "/"
}

pub fn strip_end_slash(s: &str) -> String {
let mut new_name = String::from(s);
pub fn strip_end_slash(mut new_name: &str) -> &str {
while (new_name.ends_with('/') || new_name.ends_with("/.")) && new_name.len() > 1 {
new_name.pop();
new_name = &new_name[..new_name.len() - 1];
}
new_name
}
Expand All @@ -77,8 +83,15 @@ fn examine_dir(
inodes: &mut HashSet<(u64, u64)>,
data: &mut HashMap<String, u64>,
file_count_no_permission: &mut u64,
cpus: Option<usize>,
) {
for entry in WalkDir::new(top_dir) {
let mut iter = WalkDir::new(top_dir)
.preload_metadata(true)
.skip_hidden(false);
if let Some(cpus) = cpus {
iter = iter.num_threads(cpus);
}
for entry in iter {
if let Ok(e) = entry {
let maybe_size_and_inode = get_metadata(&e, apparent_size);

Expand All @@ -93,16 +106,13 @@ fn examine_dir(
}
}
// This path and all its parent paths have their counter incremented
let mut e_path = e.path().to_path_buf();
loop {
let path_name = e_path.to_string_lossy().to_string();
let s = data.entry(path_name.clone()).or_insert(0);
for path_name in e.path().ancestors() {
let path_name = path_name.to_string_lossy();
let s = data.entry(path_name.to_string()).or_insert(0);
*s += size;
if path_name == *top_dir {
if path_name == top_dir {
break;
}
assert!(path_name != "");
e_path.pop();
}
}
None => *file_count_no_permission += 1,
Expand All @@ -124,7 +134,7 @@ pub fn sort_by_size_first_name_second(a: &(String, u64), b: &(String, u64)) -> O

pub fn sort(data: HashMap<String, u64>) -> Vec<(String, u64)> {
let mut new_l: Vec<(String, u64)> = data.iter().map(|(a, b)| (a.clone(), *b)).collect();
new_l.sort_by(|a, b| sort_by_size_first_name_second(&a, &b));
new_l.sort_unstable_by(sort_by_size_first_name_second);
new_l
}

Expand All @@ -141,7 +151,7 @@ pub fn trim_deep_ones(
max_depth: u64,
top_level_names: &HashSet<String>,
) -> Vec<(String, u64)> {
let mut result: Vec<(String, u64)> = vec![];
let mut result: Vec<(String, u64)> = Vec::with_capacity(input.len() * top_level_names.len());

for name in top_level_names {
let my_max_depth = name.matches('/').count() + max_depth as usize;
Expand Down
15 changes: 10 additions & 5 deletions src/utils/platform.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use walkdir::DirEntry;
use jwalk::DirEntry;

fn get_block_size() -> u64 {
// All os specific implementations of MetatdataExt seem to define a block as 512 bytes
Expand All @@ -9,17 +9,22 @@ fn get_block_size() -> u64 {
#[cfg(target_family = "unix")]
pub fn get_metadata(d: &DirEntry, use_apparent_size: bool) -> Option<(u64, Option<(u64, u64)>)> {
use std::os::unix::fs::MetadataExt;
d.metadata().ok().and_then(|md| {
d.metadata.as_ref().unwrap().as_ref().ok().map(|md| {
let inode = Some((md.ino(), md.dev()));
if use_apparent_size {
Some((md.len(), inode))
(md.len(), inode)
} else {
Some((md.blocks() * get_block_size(), inode))
(md.blocks() * get_block_size(), inode)
}
})
}

#[cfg(not(target_family = "unix"))]
pub fn get_metadata(d: &DirEntry, _apparent: bool) -> Option<(u64, Option<(u64, u64)>)> {
d.metadata().ok().map_or(None, |md| Some((md.len(), None)))
d.metadata
.as_ref()
.unwrap()
.as_ref()
.ok()
.map(|md| (md.len(), None))
}