Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 46 additions & 7 deletions src/bigocrpdf/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -414,9 +414,12 @@
info_p = sub.add_parser("info", help=_("Show PDF metadata and page count"))
info_p.add_argument("input", type=Path, help=_("Input PDF file"))

# --- export subcommands share the same input-PDF help text ---
input_pdf_with_text_help = _("Input PDF file (must have text layer)")

# --- export-odf ---
odf_p = sub.add_parser("export-odf", help=_("Export OCR'd PDF as ODF document"))
odf_p.add_argument("input", type=Path, help=_("Input PDF file (must have text layer)"))
odf_p.add_argument("input", type=Path, help=input_pdf_with_text_help)
odf_p.add_argument(
"-o",
"--output",
Expand All @@ -427,7 +430,7 @@

# --- export-txt ---
txt_p = sub.add_parser("export-txt", help=_("Export OCR'd PDF as formatted text"))
txt_p.add_argument("input", type=Path, help=_("Input PDF file (must have text layer)"))
txt_p.add_argument("input", type=Path, help=input_pdf_with_text_help)
txt_p.add_argument(
"-o",
"--output",
Expand All @@ -436,6 +439,22 @@
help=_("Output text file (default: same name as input with .txt)"),
)

# --- export-md ---
md_p = sub.add_parser("export-md", help=_("Export OCR'd PDF as Markdown"))
md_p.add_argument("input", type=Path, help=input_pdf_with_text_help)
md_p.add_argument(
"-o",
"--output",
type=Path,
default=None,
help=_("Output Markdown file (default: same name as input with .md)"),
)
md_p.add_argument(
"--front-matter",
action="store_true",
help=_("Prepend YAML front-matter (title, source, page count, date)."),
)

# --- edit ---
edit_p = sub.add_parser("edit", help=_("Open interactive GUI editor"))
edit_p.add_argument("input", type=Path, help=_("PDF file to edit"))
Expand All @@ -460,11 +479,13 @@
scanner_enabled = True

# Enable preprocessing master switch if any enhancement is requested
enable_preprocessing = any([
args.auto_contrast,
args.auto_brightness,
args.denoise,
])
enable_preprocessing = any(
[
args.auto_contrast,
args.auto_brightness,
args.denoise,
]
)

config = OCRConfig(
language=args.language,
Expand Down Expand Up @@ -718,6 +739,23 @@
return 0


def _cmd_export_md(args: argparse.Namespace, logger: logging.Logger) -> int:
"""Handle the 'export-md' command."""
from bigocrpdf.utils.tsv_odf_converter import convert_pdf_to_markdown

if args.output:
md_path = str(args.output)
else:
md_path = str(args.input.with_suffix(".md"))

logger.info(f"Converting {args.input} → {md_path}")
text = convert_pdf_to_markdown(str(args.input), include_front_matter=args.front_matter)
with open(md_path, "w", encoding="utf-8") as f:
f.write(text)
print(f"Saved: {md_path}")
return 0


def _cmd_edit(args: argparse.Namespace, logger: logging.Logger) -> int:
"""Handle the 'edit' command — launch GUI editor directly."""
try:
Expand Down Expand Up @@ -927,7 +965,7 @@
except Exception as e:
elapsed = time.perf_counter() - t0
print()
logger.error(f"Fatal error after {elapsed:.1f}s: {e}")

Check failure on line 968 in src/bigocrpdf/cli.py

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

Use "logging.exception()" instead.

See more on https://sonarcloud.io/project/issues?id=biglinux_bigocrpdf&issues=AZ5JA66rOoPJ8sHDKXG0&open=AZ5JA66rOoPJ8sHDKXG0&pullRequest=19
import traceback

traceback.print_exc()
Expand Down Expand Up @@ -978,6 +1016,7 @@
"info": _cmd_info,
"export-odf": _cmd_export_odf,
"export-txt": _cmd_export_txt,
"export-md": _cmd_export_md,
"edit": _cmd_edit,
}

Expand Down
Loading