From 3d5ee53145b294e8d4700f5e2aa072d84b87e73c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81ngel=20Carias?= <69056344+aescarias@users.noreply.github.com> Date: Sun, 14 Apr 2024 11:37:52 -0600 Subject: [PATCH] feat: 0.1.1 :tada: A very small update that now exposes incremental updates under a new attribute. This also fixes a bug where an indefinite loop was caused while tokenizing an indirect reference. --- docs/source/conf.py | 2 +- docs/source/guides/reading-pdf.rst | 10 +++++----- pdfnaut/__init__.py | 2 +- pdfnaut/parsers/pdf.py | 4 ++-- pyproject.toml | 2 +- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index a698ec6..df26170 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -9,7 +9,7 @@ project = 'pdfnaut' copyright = '2024, Angel Carias' author = 'Angel Carias' -release = '0.1.0' +release = '0.1.1' # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration diff --git a/docs/source/guides/reading-pdf.rst b/docs/source/guides/reading-pdf.rst index 8be8978..55ac6cc 100644 --- a/docs/source/guides/reading-pdf.rst +++ b/docs/source/guides/reading-pdf.rst @@ -46,7 +46,7 @@ Let's take, for example, the ``sample.pdf`` file available in our `test suite >> page_contents PdfStream(details={'Length': 1074}) -We find ourselves with a stream. The contents of pages are defined in streams known as content streams. In this case, it is not compressed (it does not have a Filter). So we can easily read it. +We find ourselves with a stream. The contents of pages are defined in streams known as **content streams**. This kind of stream includes instructions on how a PDF processor should render this page. In this case, it is not compressed (it does not have a Filter). So we can easily read it: .. code-block:: python >>> page_contents.decompress() b'2 J\r\nBT\r\n0 0 0 rg\r\n/F1 0027 Tf\r\n57.3750 722.2800 Td\r\n( A Simple PDF File ) Tj\r\nET\r\nBT\r\n/F1 0010 Tf\r\n69.2500 688.6080 Td\r\n[...]ET\r\n' -The content stream is comprised of operators and operands. In this case, it would simply write "A Simple PDF File" at the position defined by the Td operands (and with the font /F1 included in our Resources which, in this case, points to Helvetica). +A content stream is comprised of operators and operands. In this case, it would simply write "A Simple PDF File" at the position defined by the Td operands (and with the font /F1 included in our Resources which, in this case, points to Helvetica). diff --git a/pdfnaut/__init__.py b/pdfnaut/__init__.py index fce5ac7..92e15b8 100644 --- a/pdfnaut/__init__.py +++ b/pdfnaut/__init__.py @@ -8,6 +8,6 @@ __all__ = ("PdfParser", "PdfSerializer") __name__ = "pdfnaut" -__version__ = "0.1.0" +__version__ = "0.1.1" __description__ = "Explore PDFs with ease" __license__ = "Apache 2.0" diff --git a/pdfnaut/parsers/pdf.py b/pdfnaut/parsers/pdf.py index ccb744a..8f5f6b5 100644 --- a/pdfnaut/parsers/pdf.py +++ b/pdfnaut/parsers/pdf.py @@ -287,7 +287,7 @@ def parse_compressed_xref(self) -> tuple[PdfXRefTable, dict[str, Any]]: return table, xref_stream.details - def parse_indirect_object(self, xref_entry: InUseXRefEntry) -> PdfObject | PdfStream | None: + def parse_indirect_object(self, xref_entry: InUseXRefEntry) -> PdfObject | PdfStream: """Parses an indirect object not within an object stream, or basically, an object that is directly referred to by an ``xref_entry``""" self._tokenizer.position = xref_entry.offset @@ -420,7 +420,7 @@ def parse_stream(self, xref_entry: InUseXRefEntry, extent: int) -> bytes: return contents - def resolve_reference(self, reference: PdfIndirectRef | tuple[int, int]): + def resolve_reference(self, reference: PdfIndirectRef | tuple[int, int]) -> PdfObject | PdfStream | PdfNull: """Resolves a reference into the indirect object it points to. Arguments: diff --git a/pyproject.toml b/pyproject.toml index 72f80bc..1804c3f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "pdfnaut" -version = "0.1.0" +version = "0.1.1" description = "Explore PDFs with ease" authors = [ { name = "Angel Carias" }