<a href="https://colab.research.google.com/github/jorisschellekens/borb-examples-dev/blob/master/snippet_11_01.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install borb

Collecting borb
  Downloading borb-3.0.5-py3-none-any.whl.metadata (3.6 kB)
Downloading borb-3.0.5-py3-none-any.whl (3.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m17.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: borb
Successfully installed borb-3.0.5


In [6]:
from borb.pdf import Document
from borb.pdf import PDF

import pathlib
import requests


def download_pdf(url: str, filename: pathlib.Path) -> None:
    """
    Downloads a PDF file from the given URL and saves it to the specified filename.

    :param url: The URL of the PDF file to download.
    :param filename: The local file path where the downloaded PDF should be saved.

    :raises requests.exceptions.RequestException: If there is an issue with the HTTP request (e.g., network failure, invalid URL, bad response).
    :raises Exception: If any other unexpected error occurs during file writing.
    """
    try:
        response = requests.get(url, stream=True)
        response.raise_for_status()  # Raise an exception for bad status codes
        with open(filename, "wb") as pdf_file:
            for chunk in response.iter_content(chunk_size=8192):
                pdf_file.write(chunk)
    except requests.exceptions.RequestException as e:
        print(f"Error downloading PDF: {e}")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")


# Download
download_pdf(
    "https://github.com/borb-pdf/borb-pdf-corpus/raw/refs/heads/master/pdf/0004.pdf",
    pathlib.Path("input.pdf"),
)

# Read PDF
d: Document = PDF.read("input.pdf")

# Print meta-information
print(f"Author           : {d.get_author()}")
print(f"Creation Date    : {d.get_creation_date()}")
print(f"Modification Date: {d.get_modification_date()}")
print(f"Producer         : {d.get_producer()}")
print(f"Subject          : {d.get_subject()}")
print(f"Title            : {d.get_title()}")


Author           : Mike Haskins
Creation Date    : None
Modification Date: None
Producer         : þÿ M i c r o s o f t ®   W o r d   2 0 1 6
Subject          : None
Title            : None
