Skip to content

Commit

Permalink
CLI fix: markdown formats should trigger include_formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
adbar committed Jul 18, 2024
1 parent 30c34a5 commit b65b08a
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 3 deletions.
3 changes: 2 additions & 1 deletion tests/cli_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,13 +227,14 @@ def test_sysoutput():
else:
assert cli_utils.determine_counter_dir("testdir", 0) == "testdir\\1"
# test file writing
testargs = ["", "--csv", "-o", "/dev/null/", "-b", "/dev/null/"]
testargs = ["", "--markdown", "-o", "/dev/null/", "-b", "/dev/null/"]
with patch.object(sys, "argv", testargs):
args = cli.parse_args(testargs)
result = "DADIDA"
cli_utils.write_result(result, args)
# process with backup directory and no counter
options = args_to_extractor(args)
assert options.format == "markdown" and options.formatting is True
assert cli_utils.process_result("DADIDA", args, None, options) is None
# test keeping dir structure
testargs = ["", "-i", "myinputdir/", "-o", "test/", "--keep-dirs"]
Expand Down
5 changes: 3 additions & 2 deletions trafilatura/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def __init__(self, *, config=DEFAULT_CONFIG, output_format="txt",
self.fast = fast
self.focus = "recall" if recall else "precision" if precision else "balanced"
self.comments = comments
self.formatting = formatting or output_format == "markdown"
self.formatting = formatting or self.format == "markdown"
self.links = links
self.images = images
self.tables = tables
Expand Down Expand Up @@ -123,13 +123,14 @@ def args_to_extractor(args, url=None):
"Derive extractor configuration from CLI args."
options = Extractor(
config=use_config(filename=args.config_file), output_format=args.output_format,
formatting=args.formatting,
precision=args.precision, recall=args.recall,
comments=args.no_comments, tables=args.no_tables,
dedup=args.deduplicate, lang=args.target_language, url=url,
with_metadata=args.with_metadata, only_with_metadata=args.only_with_metadata,
tei_validation=args.validate_tei
)
for attr in ("fast", "formatting", "images", "links"):
for attr in ("fast", "images", "links"):
setattr(options, attr, getattr(args, attr))
return options

Expand Down

0 comments on commit b65b08a

Please sign in to comment.