doc: partially document code parts

dnaka91 · Jun 22, 2023 · 2518ff7 · 2518ff7
1 parent 93de0d1
commit 2518ff7
Show file tree

Hide file tree

Showing 7 changed files with 197 additions and 10 deletions.
diff --git a/src/cargo.rs b/src/cargo.rs
@@ -4,13 +4,18 @@ use anyhow::{bail, Result};
 use camino::{Utf8Path, Utf8PathBuf};
 use serde::Deserialize;
 
+/// Locate the output directory, where the report files are written to.
+///
+/// This will only work if the current working directory contains a Rust project, as the report is
+/// saved under `<target_dir>/cargo-llvm-cov`.
 pub fn output_dir() -> Result<Utf8PathBuf> {
     let root = find_root()?;
     let target_dir = find_target_dir(&root)?;
 
     Ok(target_dir.join(env!("CARGO_PKG_NAME")))
 }
 
+/// Use `cargo` to find the root folder of the project under the current working directory.
 fn find_root() -> Result<Utf8PathBuf> {
     #[derive(Deserialize)]
     struct LocateProject {
@@ -34,6 +39,11 @@ fn find_root() -> Result<Utf8PathBuf> {
         .map_err(Into::into)
 }
 
+/// Use `cargo` to find the `target` output directory of the given project.
+///
+/// Similar to how `cargo-llvm-cov` creates custom output folders in the `target` folder, we create
+/// our own `target/llvm-cov-pretty` folder that holds the report files. Therefore, we need to find
+/// the base `target` folder.
 fn find_target_dir(root: &Utf8Path) -> Result<Utf8PathBuf> {
     #[derive(Deserialize)]
     struct Metadata {

diff --git a/src/cli.rs b/src/cli.rs
@@ -1,3 +1,5 @@
+//! Handling of command line arguments.
+
 use std::{
     fs::OpenOptions,
     io::{self, BufWriter, Write},
@@ -29,6 +31,7 @@ pub struct Cli {
 }
 
 impl Cli {
+    /// Parse the command line arguments passed to the program.
     pub fn parse() -> Self {
         <Self as Parser>::parse()
     }
@@ -50,6 +53,7 @@ pub enum Command {
     },
 }
 
+/// Generate shell completions for the given shell variant and write the to STDOUT.
 pub fn completions(shell: Shell) {
     clap_complete::generate(
         shell,
@@ -59,6 +63,11 @@ pub fn completions(shell: Shell) {
     );
 }
 
+/// Generate `man` pages and write them into the given directory.
+///
+/// The output directory must already exist, but if a file with the same name as a man page already
+/// exists, an error will be returned. This behavior ensures that we don't accidentally overwrite
+/// any existing files (in case the wrong folder was picked by accident).
 pub fn manpages(dir: &Utf8Path) -> Result<()> {
     fn print(dir: &Utf8Path, app: &clap::Command) -> Result<()> {
         let name = app.get_display_name().unwrap_or_else(|| app.get_name());

diff --git a/src/highlight.rs b/src/highlight.rs
@@ -1,10 +1,23 @@
+//! Logic for turning plain text source code into HTML spans for code highlighting.
+//!
+//! Each line of source code is processed and turned into a set of HTML spans with CSS classes.
+//! Those classes define the coloring of each piece of code. The exact coloring is defined in a
+//! separate CSS file, which can be selected during code generation by a theme name.
+
 use std::fmt::{Display, Write};
 
 use anyhow::Result;
 use camino::Utf8Path;
 use syntect::parsing::{ParseState, Scope, ScopeStack, ScopeStackOp, SyntaxSet, SCOPE_REPO};
 
+/// The highlighter is the main component that performs transformation of plain source code into
+/// highlighted HTML tags.
+///
+/// It should be shared whenever possible, instead of cloning instances. This is to prevent the
+/// repeated (relatively slow) generation of required state.
 pub struct Highlighter {
+    /// Collection of syntaxes, that can parse source code into ASTs (**a**bstract **s**yntax
+    /// **t**ree), that can then be further turned into scopes for highlighting.
     ps: SyntaxSet,
 }
 
@@ -15,6 +28,7 @@ impl Highlighter {
         }
     }
 
+    /// Read the file at the given path and turn each line into annotated HTML content.
     pub fn file_to_spans(&self, file: &Utf8Path, no_highlight: bool) -> Result<Vec<String>> {
         let content = std::fs::read_to_string(file)?;
         let syntax = self.ps.find_syntax_by_extension("rs").unwrap();
@@ -36,6 +50,7 @@ impl Highlighter {
     }
 }
 
+/// Convert a single source code line into a set of HTML spans.
 fn line_tokens_to_span(
     line: &str,
     ops: &[(usize, ScopeStackOp)],
@@ -60,7 +75,9 @@ fn line_tokens_to_span(
     Ok(buf)
 }
 
+/// Append a span to the given buffer, wrapping it in a span with the corresponding CSS classes.
 fn append_span(buf: &mut String, scopes: &[Scope], line: &str) -> Result<(), std::fmt::Error> {
+    // No point in highlighting whitespace, so we can skip the overhead of a span around it.
     if line.chars().all(char::is_whitespace) {
         return write!(buf, "{}", escape(line));
     }
@@ -80,6 +97,7 @@ fn append_span(buf: &mut String, scopes: &[Scope], line: &str) -> Result<(), std
     Ok(())
 }
 
+/// Turn the current code scope into a list of CSS classes and append them to the buffer.
 fn scope_to_classes(s: &mut String, scope: Scope) {
     let repo = SCOPE_REPO.lock().unwrap();
     for i in (0..scope.len()).rev() {
@@ -95,6 +113,8 @@ fn scope_to_classes(s: &mut String, scope: Scope) {
     }
 }
 
+/// Escape the content into HTML-safe text, so it can be combined in a template without causing
+/// clashes with surrounding HTML tags.
 fn escape(value: &str) -> impl Display + '_ {
     askama_escape::escape(value, askama_escape::Html)
 }
diff --git a/src/main.rs b/src/main.rs
@@ -185,12 +185,12 @@ fn merge_function_info(files: &mut Vec<FileInfo>, functions: &[schema::Function]
             for region in &function.regions {
                 if region.execution_count > 0 {
                     file.called
-                        .entry(region.line_start as usize)
+                        .entry(region.start.0 as usize)
                         .or_default()
                         .push((function.name.clone(), region.execution_count));
                 } else {
                     file.uncalled
-                        .entry(region.line_start as usize)
+                        .entry(region.start.0 as usize)
                         .or_default()
                         .push(function.name.clone());
                 }

diff --git a/src/minify.rs b/src/minify.rs
@@ -1,3 +1,5 @@
+//! Logic for minifying HTML content.
+
 pub struct Minifier(minify_html::Cfg);
 
 impl Minifier {
@@ -14,3 +16,22 @@ impl Minifier {
         minify_html::minify(html.as_ref(), &self.0)
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use indoc::indoc;
+
+    #[test]
+    fn run_minifier() {
+        let min = super::Minifier::new().minify(indoc! {"
+            <p>
+                <span>te</span>
+                <span>st</span>
+            </p>
+        "});
+        assert_eq!(
+            "<p><span>te</span> <span>st</span></p>",
+            String::from_utf8(min).unwrap()
+        );
+    }
+}
diff --git a/src/schema.rs b/src/schema.rs
@@ -1,31 +1,65 @@
+//! Definitions for the JSON export format from `llvm-cov`.
+
 use camino::Utf8PathBuf;
 use serde::Deserialize;
 
+/// Root structure of the JSON output that is generated by `cargo-llvm-cov`/`llvm-cov`.
 #[derive(Deserialize)]
 pub struct JsonExport {
+    /// (Likely) semantic version of the export format. We don't check this field, but it should be
+    /// at least `2.0.0`.
     pub version: String,
+    /// Type of export information contained in the `data` field. We don't check this field, but it
+    /// should always be `llvm.coverage.json.export`.
     #[serde(rename = "type")]
     pub ty: String,
+    /// The actual export data. It seems to always be exactly one entry in an array.
+    ///
+    /// Probably this could contain more than one entry, if the report is further processed by
+    /// other LLVM utilities. But when the report comes from `cargo-llvm-cov` it's always a single
+    /// entry, and that's our main use case.
     pub data: [Export; 1],
 }
 
+/// Coverage data for a single `llvm-cov` run, usually over a single project or workspace.
 #[derive(Deserialize)]
 pub struct Export {
+    /// List of coverage information per file.
     pub files: Vec<File>,
+    /// Additional coverage information for specific function invocations (as well as the lack of
+    /// invocation).
     #[serde(default)]
     pub functions: Vec<Function>,
+    /// Overall statistics of the coverage data summed up from all the file and function
+    /// information.
     pub totals: Summary,
 }
 
+/// Coverage data for a single source code file.
 #[derive(Deserialize)]
 pub struct File {
+    /// Absolute path of the file that was tested.
     pub filename: Utf8PathBuf,
+    /// Main list of code coverage information, which is described as segments.
+    ///
+    /// These are elements that usually form a set of pairs and describe the start and end range
+    /// that was covered. They must be manually combined back together, but appear to be in proper
+    /// order, making it easy to scan over them and start and end segments.
     #[serde(default)]
     pub segments: Vec<Segment>,
+    /// Coverage for code branches (like if statements, I assume).
+    ///
+    /// **Note:** Appears to be unused for Rust projects (as branch coverage support is still
+    /// experimental).
     #[serde(default)]
     pub branches: Vec<BranchRegion>,
+    /// Coverage for code expansions caused by generics or templates. For example, structs with
+    /// generic arguments that get expanded into concrete versions for specific types.
+    ///
+    /// **Note:** Appears to be unused for Rust projects.
     #[serde(default)]
     pub expansions: Vec<Expansion>,
+    /// Coverage statistics summed up for the entire file.
     pub summary: Summary,
 }
 
@@ -58,42 +92,61 @@ impl<'de> Deserialize<'de> for Segment {
 }
 
 impl Segment {
+    /// Whether this segment signals the start of a code coverage region.
     pub fn is_start(&self) -> bool {
         self.has_count && self.is_region_entry && !self.is_gap_region
     }
 
+    /// Whether this segment is considered the end of an open code coverage region.
     pub fn is_end(&self) -> bool {
         !self.has_count && !self.is_region_entry && !self.is_gap_region
     }
 }
 
+/// Coverage information for a function invocation (or the lack of it).
 #[derive(Deserialize)]
 pub struct Function {
+    /// Fully qualified function name in its demangled form.
     #[serde(with = "demangle")]
     pub name: String,
+    /// Total invocation count for the function.
     pub count: u64,
+    /// Source code regions where the function call happened (or is missing).
     pub regions: Vec<Region>,
+    /// Similar to [`File::branches`], but for functions.
     pub branches: Vec<BranchRegion>,
+    /// Files in which this function is defined.
+    ///
+    /// As a function is usually defined in only one place, this should always contain a single
+    /// entry.
     pub filenames: Vec<Utf8PathBuf>,
 }
 
 pub struct Region {
-    pub line_start: u64,
-    pub column_start: u64,
-    pub line_end: u64,
-    pub column_end: u64,
+    /// Start position of the region (line and column).
+    pub start: (u64, u64),
+    /// End position of the region (line and column).
+    pub end: (u64, u64),
+    /// How many times the function was executed in this region.
     pub execution_count: u64,
+    /// Unique identifier for the file, but appears to be always `0`.
     pub file_id: u64,
+    /// Unique identifier for the expanded file, but appears to be always `0`.
     pub expanded_file_id: u64,
     pub kind: RegionKind,
 }
 
+/// Kind of region that is described in [`Region`].
+///
+/// **Note:** Although there are several kinds, only [`Self::Code`] ever appeared in the coverage
+/// data. Probably the other variants are not relevant or not supported for Rust.
 pub enum RegionKind {
     Code,
     Expansion,
     Skipped,
     Gap,
     Branch,
+    /// Fallback value for any unsupported future variants.
     Unknown(u8),
 }
 
@@ -106,10 +159,8 @@ impl<'de> Deserialize<'de> for Region {
         struct Array(u64, u64, u64, u64, u64, u64, u64, u8);
 
         Array::deserialize(deserializer).map(|a| Self {
-            line_start: a.0,
-            column_start: a.1,
-            line_end: a.2,
-            column_end: a.3,
+            start: (a.0, a.1),
+            end: (a.2, a.3),
             execution_count: a.4,
             file_id: a.5,
             expanded_file_id: a.6,
@@ -175,6 +226,7 @@ pub struct Expansion {
 }
 
 #[derive(Clone, Deserialize)]
+#[cfg_attr(test, derive(Default))]
 pub struct Summary {
     pub lines: CoverageCounts,
     pub functions: CoverageCounts,
@@ -183,14 +235,19 @@ pub struct Summary {
     pub branches: CoverageCounts2,
 }
 
+/// Sum of coverage counts as part of the [`Summary`].
 #[derive(Clone, Deserialize)]
+#[cfg_attr(test, derive(Default))]
 pub struct CoverageCounts {
     pub count: u64,
     pub covered: u64,
     pub percent: f64,
 }
 
+/// Sum of coverage counts like [`CoverageCounts`], but including the extra [`Self::notcovered`]
+/// field, which is only present for [`Summary::regions`] and [`Summary::branches`].
 #[derive(Clone, Deserialize)]
+#[cfg_attr(test, derive(Default))]
 pub struct CoverageCounts2 {
     pub count: u64,
     pub covered: u64,
@@ -199,6 +256,8 @@ pub struct CoverageCounts2 {
 }
 
 mod demangle {
+    //! Custom deserializer that demangles Rust type and function names back into readable variants.
+
     use std::fmt;
 
     pub fn deserialize<'de, D>(deserializer: D) -> Result<String, D::Error>