Skip to content

Commit

Permalink
Merge pull request #128 from kdarkhan/fuzz-tests
Browse files Browse the repository at this point in the history
Define fuzzer for xgettext binary
  • Loading branch information
mgeisler committed Dec 22, 2023
2 parents a89f4dd + cb0ce49 commit dbe3b03
Show file tree
Hide file tree
Showing 6 changed files with 325 additions and 262 deletions.
8 changes: 7 additions & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,13 @@ jobs:
run: |
cd i18n-helpers
cargo fuzz run gettext -- -only_ascii=1 -max_total_time=30
cargo fuzz cmin normalize
cargo fuzz cmin gettext
- name: Run xgettext fuzzer and minimize corpus
run: |
cd i18n-helpers
cargo fuzz run xgettext -- -only_ascii=1 -max_total_time=30
cargo fuzz cmin xgettext
clippy:
name: Clippy
Expand Down
6 changes: 6 additions & 0 deletions i18n-helpers/fuzz/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,9 @@ name = "gettext"
path = "fuzz_targets/gettext.rs"
test = false
doc = false

[[bin]]
name = "xgettext"
path = "fuzz_targets/xgettext.rs"
test = false
doc = false
20 changes: 20 additions & 0 deletions i18n-helpers/fuzz/fuzz_targets/xgettext.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#![no_main]

use std::path::PathBuf;
use std::str::FromStr;

use libfuzzer_sys::fuzz_target;
use mdbook::renderer::RenderContext;
use mdbook::Config;
use mdbook_i18n_helpers::xgettext::create_catalog;
use mdbook_i18n_helpers_fuzz::{create_book, BookItem};

fuzz_target!(|inputs: (&str, Vec<BookItem>)| {
let (summary, book_items) = inputs;

let book = create_book(book_items);

let ctx = RenderContext::new(PathBuf::new(), book, Config::from_str("").unwrap(), "");

let _ = create_catalog(&ctx, |_| Ok(summary.to_string()));
});
263 changes: 2 additions & 261 deletions i18n-helpers/src/bin/mdbook-xgettext.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,86 +21,9 @@

use anyhow::{anyhow, Context};
use mdbook::renderer::RenderContext;
use mdbook::BookItem;
use mdbook_i18n_helpers::{extract_events, extract_messages, reconstruct_markdown, wrap_sources};
use polib::catalog::Catalog;
use polib::message::Message;
use polib::metadata::CatalogMetadata;
use pulldown_cmark::{Event, Tag};
use mdbook_i18n_helpers::xgettext::create_catalog;
use std::{fs, io};

/// Strip an optional link from a Markdown string.
fn strip_link(text: &str) -> String {
let events = extract_events(text, None)
.into_iter()
.filter_map(|(_, event)| match event {
Event::Start(Tag::Link(..)) => None,
Event::End(Tag::Link(..)) => None,
_ => Some((0, event)),
})
.collect::<Vec<_>>();
let (without_link, _) = reconstruct_markdown(&events, None);
without_link
}

fn add_message(catalog: &mut Catalog, msgid: &str, source: &str) {
let sources = match catalog.find_message(None, msgid, None) {
Some(msg) => wrap_sources(&format!("{}\n{}", msg.source(), source)),
None => String::from(source),
};
let message = Message::build_singular()
.with_source(sources)
.with_msgid(String::from(msgid))
.done();
catalog.append_or_update(message);
}

fn create_catalog(ctx: &RenderContext) -> anyhow::Result<Catalog> {
let mut metadata = CatalogMetadata::new();
if let Some(title) = &ctx.config.book.title {
metadata.project_id_version = String::from(title);
}
if let Some(lang) = &ctx.config.book.language {
metadata.language = String::from(lang);
}
let now = chrono::Local::now();
metadata.pot_creation_date = now.to_rfc3339_opts(chrono::SecondsFormat::Secs, true);
metadata.mime_version = String::from("1.0");
metadata.content_type = String::from("text/plain; charset=UTF-8");
metadata.content_transfer_encoding = String::from("8bit");
let mut catalog = Catalog::new(metadata);

// First, add all chapter names and part titles from SUMMARY.md.
let summary_path = ctx.config.book.src.join("SUMMARY.md");
let summary = std::fs::read_to_string(ctx.root.join(&summary_path))
.with_context(|| anyhow!("Failed to read {}", summary_path.display()))?;
for (lineno, msgid) in extract_messages(&summary) {
let source = format!("{}:{}", summary_path.display(), lineno);
// The summary is mostly links like "[Foo *Bar*](foo-bar.md)".
// We strip away the link to get "Foo *Bar*". The formatting
// is stripped away by mdbook when it sends the book to
// mdbook-gettext -- we keep the formatting here in case the
// same text is used for the page title.
add_message(&mut catalog, &strip_link(&msgid), &source);
}

// Next, we add the chapter contents.
for item in ctx.book.iter() {
if let BookItem::Chapter(chapter) = item {
let path = match &chapter.path {
Some(path) => ctx.config.book.src.join(path),
None => continue,
};
for (lineno, msgid) in extract_messages(&chapter.content) {
let source = format!("{}:{}", path.display(), lineno);
add_message(&mut catalog, &msgid, &source);
}
}
}

Ok(catalog)
}

fn main() -> anyhow::Result<()> {
let ctx = RenderContext::from_json(&mut io::stdin()).context("Parsing stdin")?;
let cfg = ctx
Expand All @@ -115,191 +38,9 @@ fn main() -> anyhow::Result<()> {
fs::create_dir_all(&ctx.destination)
.with_context(|| format!("Could not create {}", ctx.destination.display()))?;
let output_path = ctx.destination.join(path);
let catalog = create_catalog(&ctx).context("Extracting messages")?;
let catalog = create_catalog(&ctx, std::fs::read_to_string).context("Extracting messages")?;
polib::po_file::write(&catalog, &output_path)
.with_context(|| format!("Writing messages to {}", output_path.display()))?;

Ok(())
}

#[cfg(test)]
mod tests {
use super::*;
use mdbook::MDBook;
use pretty_assertions::assert_eq;

fn create_render_context(
files: &[(&str, &str)],
) -> anyhow::Result<(RenderContext, tempfile::TempDir)> {
let tmpdir = tempfile::tempdir().context("Could not create temporary directory")?;
std::fs::create_dir(tmpdir.path().join("src"))
.context("Could not create src/ directory")?;

for (path, contents) in files {
std::fs::write(tmpdir.path().join(path), contents)
.with_context(|| format!("Could not write {path}"))?;
}

let mdbook = MDBook::load(tmpdir.path()).context("Could not load book")?;
let ctx = RenderContext::new(mdbook.root, mdbook.book, mdbook.config, "dest");
Ok((ctx, tmpdir))
}

#[test]
fn test_strip_link_empty() {
assert_eq!(strip_link(""), "");
}

#[test]
fn test_strip_link_text() {
assert_eq!(strip_link("Summary"), "Summary");
}

#[test]
fn test_strip_link_with_formatting() {
// The formatting is automatically normalized.
assert_eq!(strip_link("[foo *bar* `baz`](foo.md)"), "foo _bar_ `baz`");
}

#[test]
fn test_create_catalog_defaults() -> anyhow::Result<()> {
let (ctx, _tmp) =
create_render_context(&[("book.toml", "[book]"), ("src/SUMMARY.md", "")])?;

let catalog = create_catalog(&ctx).unwrap();
assert_eq!(catalog.metadata.project_id_version, "");
assert!(!catalog.metadata.pot_creation_date.is_empty());
assert!(catalog.metadata.po_revision_date.is_empty());
assert_eq!(catalog.metadata.language, "en");
assert_eq!(catalog.metadata.mime_version, "1.0");
assert_eq!(catalog.metadata.content_type, "text/plain; charset=UTF-8");
assert_eq!(catalog.metadata.content_transfer_encoding, "8bit");
Ok(())
}

#[test]
fn test_create_catalog_metadata() -> anyhow::Result<()> {
let (ctx, _tmp) = create_render_context(&[
(
"book.toml",
"[book]\n\
title = \"My Translatable Book\"\n\
language = \"fr\"",
),
("src/SUMMARY.md", ""),
])?;

let catalog = create_catalog(&ctx).unwrap();
assert_eq!(catalog.metadata.project_id_version, "My Translatable Book");
assert_eq!(catalog.metadata.language, "fr");
Ok(())
}

#[test]
fn test_create_catalog_summary_formatting() -> anyhow::Result<()> {
let (ctx, _tmp) = create_render_context(&[
("book.toml", "[book]"),
(
"src/SUMMARY.md",
"# Summary\n\
\n\
[Prefix Chapter](prefix.md)\n\
\n\
# Part Title\n\
\n\
- [Foo *Bar*](foo.md)\n\
\n\
----------\n\
\n\
- [Baz `Quux`](baz.md)\n\
\n\
[Suffix Chapter](suffix.md)",
),
// Without this, mdbook would automatically create the
// files based on the summary above. This would add
// unnecessary headings below.
("src/prefix.md", ""),
("src/foo.md", ""),
("src/baz.md", ""),
("src/suffix.md", ""),
])?;

let catalog = create_catalog(&ctx)?;
assert_eq!(
catalog
.messages()
.map(|msg| msg.msgid())
.collect::<Vec<&str>>(),
&[
"Summary",
"Prefix Chapter",
"Part Title",
"Foo _Bar_",
"Baz `Quux`",
"Suffix Chapter",
]
);

Ok(())
}

#[test]
fn test_create_catalog() -> anyhow::Result<()> {
let (ctx, _tmp) = create_render_context(&[
("book.toml", "[book]"),
("src/SUMMARY.md", "- [The *Foo* Chapter](foo.md)"),
(
"src/foo.md",
"# How to Foo\n\
\n\
First paragraph.\n\
Same paragraph.\n",
),
])?;

let catalog = create_catalog(&ctx)?;

for msg in catalog.messages() {
assert!(!msg.is_translated());
}

assert_eq!(
catalog
.messages()
.map(|msg| (msg.source(), msg.msgid()))
.collect::<Vec<_>>(),
&[
("src/SUMMARY.md:1", "The _Foo_ Chapter"),
("src/foo.md:1", "How to Foo"),
("src/foo.md:3", "First paragraph. Same paragraph."),
]
);

Ok(())
}

#[test]
fn test_create_catalog_duplicates() -> anyhow::Result<()> {
let (ctx, _tmp) = create_render_context(&[
("book.toml", "[book]"),
("src/SUMMARY.md", "- [Foo](foo.md)"),
(
"src/foo.md",
"# Foo\n\
\n\
Foo\n",
),
])?;

let catalog = create_catalog(&ctx)?;
assert_eq!(
catalog
.messages()
.map(|msg| (msg.source(), msg.msgid()))
.collect::<Vec<_>>(),
&[("src/SUMMARY.md:1 src/foo.md:1 src/foo.md:3", "Foo"),]
);

Ok(())
}
}
1 change: 1 addition & 0 deletions i18n-helpers/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ use syntect::parsing::{ParseState, Scope, ScopeStack, SyntaxSet};
pub mod directives;
pub mod gettext;
pub mod normalize;
pub mod xgettext;

/// Re-wrap the sources field of a message.
///
Expand Down
Loading

0 comments on commit dbe3b03

Please sign in to comment.