From ee9f72221b0eb0590f421fdbbf73925d083cea90 Mon Sep 17 00:00:00 2001 From: Allen Barker Date: Sun, 9 Apr 2017 12:19:33 -0400 Subject: [PATCH] check unicode errors on metadata printing in verbose mode --- CHANGELOG.rst | 4 +-- README.rst | 32 +++++++++++++---------- src/pdfCropMargins/main_pdfCropMargins.py | 28 ++++++++++++-------- 3 files changed, 37 insertions(+), 27 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 3cc3748..7039253 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -3,8 +3,8 @@ History ======= -0.1.3 (no date yet) -------------------- +0.1.3 (2017-03-14) +------------------ New Features: diff --git a/README.rst b/README.rst index 1f520dd..c99cf31 100644 --- a/README.rst +++ b/README.rst @@ -18,7 +18,7 @@ Features - Works with either the pdftoppm program or with Ghostscript. - A version of pdftoppm for Windows is included. - Renders and analyzes page images to find the bounding boxes, which allows it - to deal with noisy images. + to deal with noisy scanned PDFs. - Ghostscript can optionally be used to find the bounding boxes directly. - Can automatically apply a Ghostscript repair operation to attempt to fix corrupt PDF files. @@ -28,7 +28,7 @@ Features their margins. - Can uniformly crop even and odd pages separately. - Can automatically run a document previewer on the output file. -- The format of any automatically-generated output filenames is easily +- The format of automatically-generated output-file names is easily modifiable. - Preserves document catalog information such as outlines if possible. - Implements a rudimentary 'undo' capability by default. @@ -69,18 +69,20 @@ The easiest way to install pdfCropMargins is to install using pip:: pip install pdfCropMargins -The ``pip`` program should be installed with Python. You can run ``python -m -ensurepip`` if it is not, or install from the repos in Linux. +The ``pip`` program should be automatically installed along with Python. You +can run ``python -m ensurepip`` if it is not, or install from the repos in +Linux. -The package can also be run directly from its source code directory. In that -case the PyPDF2 Python package must be installed. The Pillow imaging package -is also necessary if Ghostscript is unavailable as well as for certain advanced -features which use explicit image analysis. +The package can also be run directly from its source code directory tree. In +that case the PyPDF2 Python package must already be installed. The Pillow +imaging package is also necessary if Ghostscript is unavailable, as well as for +certain advanced features which use explicit image analysis. Run the script in +the top-level bin directory called ``pdfCropMargins.py``. Running ======= -After installation the program can be run with a command such as:: +After installation via pip the program can be run with a command such as:: pdf-crop-margins -v -s -u your-file.pdf @@ -88,14 +90,16 @@ For help, run:: pdf-crop-margins -h | more -On Windows you may need to put the Python ``Scripts`` directory into your -environment ``PATH`` in order to avoid having to use the full pathname. +On Windows you may need to explicitly put the Python distribution's ``Scripts`` +directory into your environment ``PATH`` in order to avoid having to use the +full pathname. To diagnose unexpected crops, try running with the ``-v`` verbose argument. Running without ``-u`` will crop each page separately, so you can see which -pages might be causing problems (such as with noise near the borders or margin -text). To get cropped pages all the same size be sure to use both ``-s`` to -make pages the same size and ``-u`` to crop each page by the same amount. +pages might be causing problems (such as pages with noise near the borders or +margin text). To get cropped pages all the same size be sure to use both +``-s`` to make pages the same size and ``-u`` to crop each page by the same +amount. Documentation ============= diff --git a/src/pdfCropMargins/main_pdfCropMargins.py b/src/pdfCropMargins/main_pdfCropMargins.py index 23e516b..1a1f3a6 100644 --- a/src/pdfCropMargins/main_pdfCropMargins.py +++ b/src/pdfCropMargins/main_pdfCropMargins.py @@ -902,17 +902,23 @@ def main_crop(): if args.verbose and not metadata_info: print("\nNo readable metadata in the document.") elif args.verbose: - print("\nThe document's metadata, if set:\n") - print(" The Author attribute set in the input document is:\n %s" - % (metadata_info.author)) - print(" The Creator attribute set in the input document is:\n %s" - % (metadata_info.creator)) - print(" The Producer attribute set in the input document is:\n %s" - % (metadata_info.producer)) - print(" The Subject attribute set in the input document is:\n %s" - % (metadata_info.subject)) - print(" The Title attribute set in the input document is:\n %s" - % (metadata_info.title)) + try: + print("\nThe document's metadata, if set:\n") + print(" The Author attribute set in the input document is:\n %s" + % (metadata_info.author)) + print(" The Creator attribute set in the input document is:\n %s" + % (metadata_info.creator)) + print(" The Producer attribute set in the input document is:\n %s" + % (metadata_info.producer)) + print(" The Subject attribute set in the input document is:\n %s" + % (metadata_info.subject)) + print(" The Title attribute set in the input document is:\n %s" + % (metadata_info.title)) + # Some metadata cannot be decoded or encoded, at least on Windows. Could + # print from a function instead to write all the lines which can be written. + except (UnicodeDecodeError, UnicodeEncodeError): + print("\nWarning: Could not write all the document's metadata to the screen." + "\nGot a UnicodeEncodeError or a UnicodeDecodeError.") ## ## Now compute the set containing the pyPdf page number of all the pages