diff --git a/Cargo.lock b/Cargo.lock index 570b2cbed..64554c978 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -536,7 +536,7 @@ dependencies = [ [[package]] name = "tokei" -version = "6.1.3" +version = "7.0.0" dependencies = [ "clap 2.29.1 (registry+https://github.com/rust-lang/crates.io-index)", "encoding 0.2.33 (registry+https://github.com/rust-lang/crates.io-index)", diff --git a/Cargo.toml b/Cargo.toml index 5f7243b62..432d7b1bc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,7 +16,7 @@ license = "MIT/Apache-2.0" name = "tokei" readme = "README.md" repository = "https://github.com/Aaronepower/tokei.git" -version = "6.1.3" +version = "7.0.0" keywords = ["utility", "cli", "cloc", "lines", "statistics"] categories = ["command-line-utilities", "development-tools", "visualization"] diff --git a/src/language/language.rs b/src/language/language.rs index b52c73912..0873cd110 100644 --- a/src/language/language.rs +++ b/src/language/language.rs @@ -25,21 +25,21 @@ pub struct Language { pub lines: usize, /// A collection of single line comments in the language. ie. `//` in Rust. #[cfg_attr(feature = "io", serde(skip_deserializing, skip_serializing))] - pub line_comment: Vec<&'static str>, + pub line_comment: &'static [&'static str], /// A collection of tuples representing the start and end of multi line /// comments. ie. `/* comment */` in Rust. #[cfg_attr(feature = "io", serde(skip_deserializing, skip_serializing))] - pub multi_line: Vec<(&'static str, &'static str)>, + pub multi_line: &'static [(&'static str, &'static str)], /// Whether the language supports nested multi line comments or not. #[cfg_attr(feature = "io", serde(skip_deserializing, skip_serializing))] pub nested: bool, /// A list of specific nested comments if this is empty all `multi_line` /// comments count. #[cfg_attr(feature = "io", serde(skip_deserializing, skip_serializing))] - pub nested_comments: Vec<(&'static str, &'static str)>, + pub nested_comments: &'static [(&'static str, &'static str)], /// A list of quotes by default it is `""`. #[cfg_attr(feature = "io", serde(skip_deserializing, skip_serializing))] - pub quotes: Vec<(&'static str, &'static str)>, + pub quotes: &'static [(&'static str, &'static str)], } impl Language { @@ -47,16 +47,16 @@ impl Language { /// /// ``` /// # use tokei::*; - /// let mut rust = Language::new(vec!["//"], vec![("/*", "*/")]); + /// let mut rust = Language::new(&["//"], &[("/*", "*/")]); /// ``` - pub fn new(line_comment: Vec<&'static str>, - multi_line: Vec<(&'static str, &'static str)>) + pub fn new(line_comment: &'static [&'static str], + multi_line: &'static [(&'static str, &'static str)]) -> Self { Language { line_comment: line_comment, multi_line: multi_line, - quotes: vec![("\"", "\"")], + quotes: &[("\"", "\"")], ..Self::default() } } @@ -66,9 +66,9 @@ impl Language { /// /// ``` /// # use tokei::*; + /// let empty_array: &'static [&'static str] = &[]; /// let json = Language::new_blank(); - /// let blank_vec: Vec<&str> = vec![]; - /// assert_eq!(json.line_comment, blank_vec); + /// assert_eq!(json.line_comment, empty_array); /// ``` pub fn new_blank() -> Self { Self::default() @@ -79,7 +79,7 @@ impl Language { /// /// ``` /// # use tokei::*; - /// let rust = Language::new(vec!["//"], vec![("/*", "*/")]); + /// let rust = Language::new(&["//"], &[("/*", "*/")]); /// let c = Language::new_c(); /// /// assert_eq!(rust.line_comment, c.line_comment); @@ -87,9 +87,9 @@ impl Language { /// ``` pub fn new_c() -> Self { Language { - line_comment: vec!["//"], - multi_line: vec![("/*", "*/")], - quotes: vec![("\"", "\"")], + line_comment: &["//"], + multi_line: &[("/*", "*/")], + quotes: &[("\"", "\"")], ..Self::default() } } @@ -99,7 +99,7 @@ impl Language { /// /// ``` /// # use tokei::*; - /// let ocaml = Language::new_multi(vec![("(*", "*)")]); + /// let ocaml = Language::new_multi(&[("(*", "*)")]); /// let coq = Language::new_func(); /// /// assert_eq!(ocaml.line_comment, coq.line_comment); @@ -107,8 +107,8 @@ impl Language { /// ``` pub fn new_func() -> Self { Language { - multi_line: vec![("(*", "*)")], - quotes: vec![("\"", "\"")], + multi_line: &[("(*", "*)")], + quotes: &[("\"", "\"")], ..Self::default() } } @@ -118,7 +118,7 @@ impl Language { /// /// ``` /// # use tokei::*; - /// let xml = Language::new_multi(vec![("")]); + /// let xml = Language::new_multi(&[("")]); /// let html = Language::new_html(); /// /// assert_eq!(xml.line_comment, html.line_comment); @@ -126,8 +126,8 @@ impl Language { /// ``` pub fn new_html() -> Self { Language { - multi_line: vec![("")], - quotes: vec![("\"", "\"")], + multi_line: &[("")], + quotes: &[("\"", "\"")], ..Self::default() } } @@ -137,14 +137,14 @@ impl Language { /// /// ``` /// # use tokei::*; - /// let bash = Language::new_single(vec!["#"]); + /// let bash = Language::new_single(&["#"]); /// let yaml = Language::new_hash(); /// /// assert_eq!(bash.line_comment, yaml.line_comment); /// assert_eq!(bash.multi_line, yaml.multi_line); /// ``` pub fn new_hash() -> Self { - Self::new_single(vec!["#"]) + Self::new_single(&["#"]) } /// Convience constructor for creating a language that has the same @@ -152,7 +152,7 @@ impl Language { /// /// ``` /// # use tokei::*; - /// let haskell = Language::new(vec!["--"], vec![("{-", "-}")]).nested(); + /// let haskell = Language::new(&["--"], &[("{-", "-}")]).nested(); /// let idris = Language::new_haskell(); /// /// assert_eq!(haskell.line_comment, haskell.line_comment); @@ -160,8 +160,8 @@ impl Language { /// ``` pub fn new_haskell() -> Self { Language { - line_comment: vec!["--"], - multi_line: vec![("{-", "-}")], + line_comment: &["--"], + multi_line: &[("{-", "-}")], nested: true, ..Self::default() } @@ -172,12 +172,12 @@ impl Language { /// /// ``` /// # use tokei::*; - /// let mustache = Language::new_multi(vec![("{{!", "}}")]); + /// let mustache = Language::new_multi(&[("{{!", "}}")]); /// ``` - pub fn new_multi(multi_line: Vec<(&'static str, &'static str)>) -> Self { + pub fn new_multi(multi_line: &'static [(&'static str, &'static str)]) -> Self { Language { multi_line: multi_line, - quotes: vec![("\"", "\"")], + quotes: &[("\"", "\"")], ..Self::default() } } @@ -187,7 +187,7 @@ impl Language { /// /// ``` /// # use tokei::*; - /// let prolog = Language::new(vec!["%"], vec![("/*", "*/")]); + /// let prolog = Language::new(&["%"], &[("/*", "*/")]); /// let oz = Language::new_pro(); /// /// assert_eq!(prolog.line_comment, oz.line_comment); @@ -195,9 +195,9 @@ impl Language { /// ``` pub fn new_pro() -> Self { Language { - line_comment: vec!["%"], - multi_line: vec![("/*", "*/")], - quotes: vec![("\"", "\"")], + line_comment: &["%"], + multi_line: &[("/*", "*/")], + quotes: &[("\"", "\"")], ..Self::default() } } @@ -207,12 +207,12 @@ impl Language { /// /// ``` /// # use tokei::*; - /// let haskell = Language::new_single(vec!["--"]); + /// let haskell = Language::new_single(&["--"]); /// ``` - pub fn new_single(line_comment: Vec<&'static str>) -> Self { + pub fn new_single(line_comment: &'static [&'static str]) -> Self { Language { line_comment: line_comment, - quotes: vec![("\"", "\"")], + quotes: &[("\"", "\"")], ..Self::default() } } @@ -249,7 +249,7 @@ impl Language { /// /// ``` /// # use tokei::*; - /// let mut rust = Language::new(vec!["//"], vec![("/*", "*/")]).nested(); + /// let mut rust = Language::new(&["//"], &[("/*", "*/")]).nested(); /// assert!(rust.nested); /// ``` pub fn nested(mut self) -> Self { @@ -263,13 +263,13 @@ impl Language { /// /// ``` /// # use tokei::*; - /// let mut d = Language::new(vec!["//"], vec![("/*", "*/")]) - /// .nested_comments(vec![("/+", "+/")]); + /// let mut d = Language::new(&["//"], &[("/*", "*/")]) + /// .nested_comments(&[("/+", "+/")]); /// assert!(d.nested); - /// assert_eq!(d.nested_comments, vec![("/+", "+/")]); + /// assert_eq!(d.nested_comments, &[("/+", "+/")]); /// ``` pub fn nested_comments(mut self, - nested_comments: Vec<(&'static str, &'static str)>) + nested_comments: &'static [(&'static str, &'static str)]) -> Self { self.nested = true; @@ -283,12 +283,12 @@ impl Language { /// /// ``` /// # use tokei::*; - /// let mut javascript = Language::new(vec!["//"], vec![("/*", "*/")]) - /// .set_quotes(vec![("\"", "\""), ("'", "'")]); + /// let mut javascript = Language::new(&["//"], &[("/*", "*/")]) + /// .set_quotes(&[("\"", "\""), ("'", "'")]); /// assert!(!javascript.quotes.is_empty()); /// ``` pub fn set_quotes(mut self, - quotes: Vec<(&'static str, &'static str)>) + quotes: &'static [(&'static str, &'static str)]) -> Self { self.quotes = quotes; @@ -360,11 +360,11 @@ impl Default for Language { files: Vec::new(), stats: Vec::new(), lines: 0, - line_comment: Vec::new(), - multi_line: Vec::new(), + line_comment: &[], + multi_line: &[], nested: false, - nested_comments: Vec::new(), - quotes: Vec::new(), + nested_comments: &[], + quotes: &[], } } } diff --git a/src/language/language_type.hbs.rs b/src/language/language_type.hbs.rs index da42e4eda..1c66f478a 100644 --- a/src/language/language_type.hbs.rs +++ b/src/language/language_type.hbs.rs @@ -112,12 +112,12 @@ impl Languages { {{~#if this.line_comment}} {{~#if this.multi_line}} Language::new( - vec![ + &[ {{~#each this.line_comment}} "{{this}}", {{~/each}} ], - vec![ + &[ {{~#each this.multi_line}} ( {{~#each this}} @@ -128,14 +128,14 @@ impl Languages { ] ) {{else}} - Language::new_single(vec![ + Language::new_single(&[ {{~#each this.line_comment}} "{{~this}}", {{~/each}} ]) {{~/if}} {{else}} - Language::new_multi(vec![ + Language::new_multi(&[ {{~#each this.multi_line}} ( {{~#each this}} @@ -150,7 +150,7 @@ impl Languages { .nested() {{~/if}} {{~#if this.nested_comments}} - .nested_comments(vec![ + .nested_comments(&[ {{~#each this.nested_comments}} ( {{~#each this}} @@ -161,7 +161,7 @@ impl Languages { ]) {{~/if}} {{~#if this.quotes}} - .set_quotes(vec![ + .set_quotes(&[ {{~#each this.quotes}} ( {{~#each this}} diff --git a/src/language/languages.rs b/src/language/languages.rs index 3f734ecf6..d019901aa 100644 --- a/src/language/languages.rs +++ b/src/language/languages.rs @@ -14,12 +14,13 @@ use std::ops::{AddAssign, Deref, DerefMut}; use encoding; use encoding::all::UTF_8; use encoding::DecoderTrap::Replace; +use log::Level::Trace; +use rayon::prelude::*; #[cfg(feature = "cbor")] use serde_cbor; #[cfg(feature = "json")] use serde_json; #[cfg(feature = "yaml")] use serde_yaml; #[cfg(feature = "toml")] use toml; -use rayon::prelude::*; use stats::Stats; use super::LanguageType::*; @@ -62,13 +63,16 @@ fn count_files((name, ref mut language): (&LanguageType, &mut Language)) { stats.lines += 1; if line.chars().all(char::is_whitespace) { stats.blanks += 1; + trace!("Blank line. So far: {}", stats.blanks); continue; } // FORTRAN has a rule where it only counts as a comment if it's the // first character in the column, so removing starting whitespace // could cause a miscount. - let line = if !is_fortran { line.trim_left() } else { line }; + let line = if !is_fortran { line.trim() } else { line }; + let mut ended_with_comments = false; + let mut had_code = stack.is_empty(); let mut skip = 0; macro_rules! skip { ($skip:expr) => {{ @@ -82,6 +86,7 @@ fn count_files((name, ref mut language): (&LanguageType, &mut Language)) { continue; } + ended_with_comments = false; let line = line.as_bytes(); let window = &line[i..]; @@ -90,6 +95,7 @@ fn count_files((name, ref mut language): (&LanguageType, &mut Language)) { skip = 1; } else if window.starts_with(quote_str.as_bytes()) { quote = None; + trace!(r#"End of "{}"."#, quote_str); skip!(quote_str.len()); } continue; @@ -99,39 +105,51 @@ fn count_files((name, ref mut language): (&LanguageType, &mut Language)) { .and_then(|l| Some(window.starts_with(l.as_bytes()))) { let last = stack.pop().unwrap(); + ended_with_comments = true; + + if log_enabled!(Trace) && stack.is_empty() { + trace!(r#"End of "{}"."#, last); + } else { + trace!(r#"End of "{}". Still in comments."#, last); + } + skip!(last.len()); continue; } if stack.is_empty() { - for comment in &language.line_comment { + for comment in language.line_comment { if window.starts_with(comment.as_bytes()) { + trace!(r#"Start of "{}"."#, comment); break 'window; } } - for &(start, end) in &language.quotes { + for &(start, end) in language.quotes { if window.starts_with(start.as_bytes()) { quote = Some(end); + trace!(r#"Start of "{}"."#, start); skip!(start.len()); continue 'window; } } } - for &(start, end) in &language.nested_comments { + for &(start, end) in language.nested_comments { if window.starts_with(start.as_bytes()) { stack.push(end); + trace!(r#"Start of "{}"."#, start); skip!(start.len()); continue 'window; } } - for &(start, end) in &language.multi_line { + for &(start, end) in language.multi_line { if window.starts_with(start.as_bytes()) { if (language.nested && nested_is_empty) || stack.is_empty() { + trace!(r#"Start of nested "{}"."#, start); stack.push(end); } @@ -147,10 +165,17 @@ fn count_files((name, ref mut language): (&LanguageType, &mut Language)) { .chain(language.nested_comments.iter().map(|&(s, _)| s)) .any(|comment| line.starts_with(comment)); - if stack.is_empty() && !starts_with_comment { - stats.code += 1; - } else { + trace!("{}", line); + + if ((!stack.is_empty() || ended_with_comments) && !had_code) || + starts_with_comment + { stats.comments += 1; + trace!("Determined to be comment. So far: {} lines", stats.comments); + trace!("Did it have code?: {}", had_code); + } else { + stats.code += 1; + trace!("Determined to be code. So far: {} lines", stats.code); } } @@ -291,7 +316,7 @@ impl Languages { /// ```no_run /// # use tokei::*; /// let mut languages = Languages::new(); - /// languages.get_statistics(vec!["."], vec![".git", "target"]); + /// languages.get_statistics(&["."], vec![".git", "target"]); /// ``` pub fn get_statistics(&mut self, paths: &[&str], ignored: Vec<&str>) { fs::get_all_files(paths, ignored, &mut self.inner); @@ -315,7 +340,7 @@ impl Languages { /// use std::collections::BTreeMap; /// /// let mut languages = Languages::new(); - /// languages.get_statistics(vec!["doesnt/exist"], vec![".git"]); + /// languages.get_statistics(&["doesnt/exist"], vec![".git"]); /// /// let empty_map = languages.remove_empty(); /// @@ -346,7 +371,7 @@ impl Languages { /// 96e6573116b746f74616c5f66696c657301"; /// /// let mut languages = Languages::new(); - /// languages.get_statistics(vec!["build.rs"], vec![]); + /// languages.get_statistics(&["build.rs"], vec![]); /// /// assert_eq!(cbor, hex::encode(&languages.to_cbor().unwrap())); /// # } @@ -384,7 +409,7 @@ impl Languages { /// } /// }"#; /// let mut languages = Languages::new(); - /// languages.get_statistics(vec!["build.rs"], vec![]); + /// languages.get_statistics(&["build.rs"], vec![]); /// /// assert_eq!(json, languages.to_json().unwrap()); /// ``` @@ -428,7 +453,7 @@ impl Languages { /// "lines": 17 /// "name": ".\\build.rs"#; /// let mut languages = Languages::new(); - /// languages.get_statistics(vec!["build.rs"], vec![]); + /// languages.get_statistics(&["build.rs"], vec![]); /// /// assert_eq!(yaml, languages.to_yaml().unwrap()); #[cfg(feature = "yaml")] diff --git a/src/lib.rs b/src/lib.rs index 83c9d8c92..7b75dc5c1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -29,7 +29,7 @@ //! //! fn main() { //! // The paths to search. Accepts absolute, relative, and glob paths. -//! let paths = vec!["**/*.rs"]; +//! let paths = &["**/*.rs"]; //! // Exclude any path that contains any of these strings. //! let excluded = vec!["target", ".git"]; //! diff --git a/src/main.rs b/src/main.rs index 989e2168f..cad9b2f61 100644 --- a/src/main.rs +++ b/src/main.rs @@ -39,7 +39,10 @@ fn main() { (@arg input: conflicts_with[languages] ... "The input file(s)/directory(ies) to be counted.") (@arg languages: -l --languages conflicts_with[input] "Prints out supported languages and their extensions.") (@arg output: -o --output possible_values(&["cbor", "json", "toml", "yaml"]) +takes_value "Outputs Tokei in a specific format.") - (@arg verbose: -v --verbose ... "Set verbose output level: 1: for unknown extensions") + (@arg verbose: -v --verbose ... "Set log output level: + 1: to show unknown file extensions, + 2: reserved for future debugging, + 3: enable file level trace. Not recommended on multiple files") (@arg sort: -s --sort possible_values(&["files", "lines", "blanks", "code", "comments"]) +takes_value "Sort languages based on column") ).get_matches(); let files_option = matches.is_present(FILES); @@ -60,6 +63,8 @@ fn main() { let filter_level = match verbose_option { 1 => LevelFilter::Warn, + 2 => LevelFilter::Debug, + 3 => LevelFilter::Trace, _ => LevelFilter::Error, }; diff --git a/src/utils/fs.rs b/src/utils/fs.rs index 845440ef5..88b3292bf 100644 --- a/src/utils/fs.rs +++ b/src/utils/fs.rs @@ -108,7 +108,7 @@ mod test { create_dir(&path_name).expect("Couldn't create directory.rs within temp"); let mut l = Languages::new(); - get_all_files(vec![tmp_dir.into_path().to_str().unwrap()], vec![], &mut l); + get_all_files(&[tmp_dir.into_path().to_str().unwrap()], vec![], &mut l); assert!(l.get(&LanguageType::Rust).is_none()); }