In [22]:
import io
import json
import logging
import zipfile

from adobe.pdfservices.operation.auth.service_principal_credentials import ServicePrincipalCredentials
from adobe.pdfservices.operation.exception.exceptions import SdkException, ServiceApiException, ServiceUsageException
from adobe.pdfservices.operation.pdf_services import PDFServices
from adobe.pdfservices.operation.pdf_services_media_type import PDFServicesMediaType
from adobe.pdfservices.operation.pdfjobs.jobs.extract_pdf_job import ExtractPDFJob
from adobe.pdfservices.operation.pdfjobs.params.extract_pdf.extract_element_type import ExtractElementType
from adobe.pdfservices.operation.pdfjobs.params.extract_pdf.extract_pdf_params import ExtractPDFParams
from adobe.pdfservices.operation.pdfjobs.params.extract_pdf.extract_renditions_element_type import (
    ExtractRenditionsElementType,
)
from adobe.pdfservices.operation.pdfjobs.result.extract_pdf_result import ExtractPDFResult
from adobe.pdfservices.operation.io.cloud_asset import CloudAsset
from adobe.pdfservices.operation.io.stream_asset import StreamAsset

def load_pdf_service_credentials() -> dict:
    with open("pdfservices-api-credentials.json") as cred_file:
        data = json.load(cred_file)
    return data["client_credentials"]

try:
    # Open en lees het input PDF-bestand
    with open("./experiments/tmp/files/loopdraaistelsel.pdf", "rb") as file:
        input_stream = file.read()

    # Lees de credentials uit het JSON bestand
    creds = load_pdf_service_credentials()
    credentials = ServicePrincipalCredentials(
        client_id=creds["client_id"],
        client_secret=creds["client_secret"]
    )

    # Maak een instantie van de PDF Services
    pdf_services = PDFServices(credentials=credentials)

    # Upload het bronbestand als een asset
    input_asset = pdf_services.upload(
        input_stream=input_stream,
        mime_type=PDFServicesMediaType.PDF
    )

    # Stel de parameters in voor de extractie
    extract_pdf_params = ExtractPDFParams(
        elements_to_extract=[
            ExtractElementType.TEXT,
            ExtractElementType.TABLES,
        ],
        elements_to_extract_renditions=[ExtractRenditionsElementType.FIGURES],
    )

    # Maak en verstuur de extractiejob
    extract_pdf_job = ExtractPDFJob(
        input_asset=input_asset,
        extract_pdf_params=extract_pdf_params
    )
    location = pdf_services.submit(extract_pdf_job)
    pdf_services_response = pdf_services.get_job_result(location, ExtractPDFResult)

    result_asset: CloudAsset = pdf_services_response.get_result().get_resource()
    stream_asset: StreamAsset = pdf_services.get_content(result_asset)


    # Get the zip content
    zip_content = stream_asset.get_input_stream()

    # Create a ZipFile object from the content
    with zipfile.ZipFile(io.BytesIO(zip_content)) as zf:
        # The extracted data is typically in a file named 'structuredData.json'
        json_data = json.loads(zf.read('structuredData.json'))

        # Now you can process the elements
        for element in json_data["elements"]:
            if element["Path"].endswith("/H1"):
                print(element["Text"])

except (ServiceApiException, ServiceUsageException, SdkException) as e:
    logging.exception(f"Er is een fout opgetreden tijdens de uitvoering: {e}")

INFO:adobe.pdfservices.operation.internal.pdf_services_helper:Started uploading asset
INFO:adobe.pdfservices.operation.internal.pdf_services_helper:Finished uploading asset
INFO:adobe.pdfservices.operation.internal.pdf_services_helper:Started submitting EXTRACT_PDF job
INFO:adobe.pdfservices.operation.internal.pdf_services_helper:Started getting job result
INFO:adobe.pdfservices.operation.internal.pdf_services_helper:Finished polling for status
INFO:adobe.pdfservices.operation.internal.pdf_services_helper:Finished getting job result
INFO:adobe.pdfservices.operation.internal.pdf_services_helper:Started getting content
INFO:adobe.pdfservices.operation.internal.pdf_services_helper:Finished getting content


Veiligheid 
