Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 2 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,10 @@ XLS, XLSX, PPTX, DOC, DOCX, MobiXML, JPEG, EMF, PNG, BMP, GIF, TIFF, Text
## Read PDF Formats
MHT, PCL, PS, XSLFO, MD

## Enhancements in Version 25.8
- Implement document page resize functionality using the Pdf.Cloud API library.
## Enhancements in Version 25.9
- Implement PDF document page crop functionality using the Pdf.Cloud API library.
- A new version of Aspose.PDF Cloud was prepared using the latest version of Aspose.PDF for .NET.

## Bugs fixed in Version 25.8
- Implement delete watermark from PDF document using the Pdf.Cloud API library.

## Requirements.
Python 2.7 and 3.4+

Expand Down
22 changes: 22 additions & 0 deletions Uses-Cases/Parser/get_fdf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from paresr_helpers import ParesrHelper
from pathlib import Path
import logging

class ExportFormToFDF:
"""Class for extracting PDF form fields into FDF using Aspose PDF Cloud API."""
def __init__(self, helper: ParesrHelper):
self.helper = helper

def Extract(self, documentName: str, outputFDFName: str, localFolder: Path, remoteFolder: str ):
self.helper.upload_document(documentName, remoteFolder)

fdfPath = str(Path.joinpath(Path(remoteFolder), outputFDFName))
opts = {
"folder": remoteFolder
}
response = self.helper.pdf_api.put_export_fields_from_pdf_to_fdf_in_storage(documentName, fdfPath, **opts)
if response.code != 200:
logging.error("ExportFormToFDF(): Unexpected error!")
else:
logging.info(f"ExportFormToFDF(): Pdf document '{documentName}' form fields successfully exported to '{outputFDFName}' file.")
self.helper.downloadFile(outputFDFName, outputFDFName, localFolder, remoteFolder, "")
26 changes: 26 additions & 0 deletions Uses-Cases/Parser/get_images.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from paresr_helpers import ParesrHelper
from pathlib import Path
import shutil
import logging

class GetImages:
"""Class for extracting images from PDF document page using Aspose PDF Cloud API."""
def __init__(self, helper: ParesrHelper):
self.helper = helper

def Extract(self, documentName: str, pageNumber: int, localFolder: Path, remoteFolder: Path):
self.helper.upload_document(documentName, remoteFolder)

opts = {
"folder": remoteFolder
}
respImages = self.helper.pdf_api.get_images(documentName, pageNumber, **opts)
if respImages.code != 200:
logging.error("GetImages(): Unexpected error!")
else:
for img in respImages.images.list:
response = self.helper.pdf_api.get_image_extract_as_png(documentName, img.id, **opts)

logging.info(f"GetImages(): Images '{img.id}' successfully extracted from the document '{documentName}'.")
local_path = localFolder / ( img.id + '.png' )
shutil.move(response, str(local_path))
30 changes: 30 additions & 0 deletions Uses-Cases/Parser/get_tables.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from paresr_helpers import ParesrHelper
from pathlib import Path
import json
import logging

class GetTables:
"""Class for extracting tables from PDF document using Aspose PDF Cloud API."""
def __init__(self, helper: ParesrHelper):
self.helper = helper

def Extract(self, documentName: str, localFolder: Path, remoteFolder: Path):
self.helper.upload_document(documentName, remoteFolder)

opts = {
"folder": remoteFolder
}
respTables = self.helper.pdf_api.get_document_tables(documentName, **opts)
if respTables.code != 200:
logging.error("GetTables(): Unexpected error!")
else:
localJson = Path.joinpath(localFolder, "tables_objects.json")
with open(str(localJson), "w", encoding="utf-8") as localFile:
for tab in respTables.tables.list:
response = self.helper.pdf_api.get_table(documentName, tab.id, **opts)
if response.code != 200:
logging.error("GetTextBoxes(): Unexpected error!")
else:
logging.info(f"GetTabels(): Table '{tab.id}' successfully extracted from the document '{documentName}'.")
json.dump(tab, localFile, ensure_ascii=False,default=str)
localFile.write("\n*********************\n")
30 changes: 30 additions & 0 deletions Uses-Cases/Parser/get_textboxes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from paresr_helpers import ParesrHelper
from pathlib import Path
import json
import logging

class GetTextBoxes:
"""Class for extracting text boxes from PDF document using Aspose PDF Cloud API."""
def __init__(self, helper: ParesrHelper):
self.helper = helper

def Extract(self, documentName: str, localFolder: Path, remoteFolder: Path):
self.helper.upload_document(documentName, remoteFolder)

opts = {
"folder": remoteFolder
}
respTextBoxes = self.helper.pdf_api.get_document_text_box_fields(documentName, **opts)
if respTextBoxes.code != 200:
logging.error("GetTextBoxes(): Unexpected error!")
else:
localJson = Path.joinpath(localFolder, "text_box_objects.json")
with open(str(localJson), "w", encoding="utf-8") as localFile:
for textBox in respTextBoxes.fields.list:
response = self.helper.pdf_api.get_text_box_field(documentName, textBox.full_name, **opts)
if response.code != 200:
logging.error("GetTextBoxes(): Unexpected error!")
else:
logging.info(f"GetTextBoxes(): TextBox field '{textBox.full_name}' successfully extracted from the document '{documentName}'.")
json.dump(textBox, localFile, ensure_ascii=False,default=str)
localFile.write("\n*********************\n")
22 changes: 22 additions & 0 deletions Uses-Cases/Parser/get_xml.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from paresr_helpers import ParesrHelper, Config
from pathlib import Path
import logging

class ExportFormToXXML:
"""Class for extracting PDF form fields into XML using Aspose PDF Cloud API."""
def __init__(self, helper: ParesrHelper):
self.helper = helper

def Extract(self, documentName: str, outputXMLName: str, localFolder: Path, remoteFolder: str ):
self.helper.upload_document(documentName, remoteFolder)

xmlPath = str(Path.joinpath(Path(remoteFolder), outputXMLName))
opts = {
"folder": remoteFolder
}
response = self.helper.pdf_api.put_export_fields_from_pdf_to_xml_in_storage(documentName, xmlPath, **opts)
if response.code != 200:
logging.error("ExportFormToXM(): Unexpected error!")
else:
logging.info(f"ExportFormToXML(): Pdf document '{documentName}' form fields successfully exported to '{outputXMLName}' file.")
self.helper.downloadFile(outputXMLName, outputXMLName, localFolder, remoteFolder, "")
66 changes: 66 additions & 0 deletions Uses-Cases/Parser/paresr_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import shutil
import json
import logging
from pathlib import Path
from asposepdfcloud import ApiClient, PdfApi

# Configure logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")


class Config:
"""Configuration parameters."""
CREDENTIALS_FILE = Path(r"..s\\credentials.json")
LOCAL_FOLDER = Path(r"C:\Samples")
REMOTE_TEMP_FOLDER = "TempPdfCloud"
PDF_DOCUMENT_NAME = "sample.pdf"
XML_OUTPUT_FILE = "output_sample.xml"
FDF_OUTPUT_FILE = "output_sample.fdf"
LOCAL_RESULT_DOCUMENT_NAME = "output_sample.pdf"
PAGE_NUMBER = 1


class ParesrHelper:
"""Class with helper methods and properties for Parser"""

def __init__(self, credentials_file: Path = Config.CREDENTIALS_FILE):
self.pdf_api = None
self._init_api(credentials_file)

def _init_api(self, credentials_file: Path):
"""Initialize the API client."""
try:
with credentials_file.open("r", encoding="utf-8") as file:
credentials = json.load(file)
api_key, app_id = credentials.get("key"), credentials.get("id")
if not api_key or not app_id:
raise ValueError("Error: Missing API keys in the credentials file.")
self.pdf_api = PdfApi(ApiClient(api_key, app_id))
except (FileNotFoundError, json.JSONDecodeError, ValueError) as e:
logging.error(f"Failed to load credentials: {e}")

def upload_document(self, documentName: str, remoteFolder: str):
"""Upload a PDF document to the Aspose Cloud server."""
if self.pdf_api:
file_path = Config.LOCAL_FOLDER / documentName
try:
if remoteFolder == None:
self.pdf_api.upload_file(documentName, str(file_path))
else:
opts = { "folder": remoteFolder }
self.pdf_api.upload_file(remoteFolder + '/' + documentName, file_path)
logging.info(f"File {documentName} uploaded successfully.")
except Exception as e:
logging.error(f"Failed to upload file: {e}")

def downloadFile(self, document: str, outputDocument: str, localFolder: Path, remoteFolder: str, output_prefix: str):
"""Download the processed PDF document from the Aspose Cloud server."""
if self.pdf_api:
try:
temp_file = self.pdf_api.download_file(remoteFolder + '/' + document)
local_path = localFolder / ( output_prefix + outputDocument )
shutil.move(temp_file, str(local_path))
logging.info(f"download_result(): File successfully downloaded: {local_path}")
except Exception as e:
logging.error(f"download_result(): Failed to download file: {e}")

24 changes: 24 additions & 0 deletions Uses-Cases/Parser/parser_launch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from paresr_helpers import ParesrHelper, Config
from get_xml import ExportFormToXXML
from get_fdf import ExportFormToFDF
from get_images import GetImages
from get_tables import GetTables
from get_textboxes import GetTextBoxes

if __name__ == "__main__":
helper = ParesrHelper(Config.CREDENTIALS_FILE)

xmlExtractor = ExportFormToXXML(helper)
xmlExtractor.Extract(Config.PDF_DOCUMENT_NAME, Config.XML_OUTPUT_FILE, Config.LOCAL_FOLDER, Config.REMOTE_TEMP_FOLDER)

fdfExtractor = ExportFormToFDF(helper)
fdfExtractor.Extract(Config.PDF_DOCUMENT_NAME, Config.FDF_OUTPUT_FILE, Config.LOCAL_FOLDER, Config.REMOTE_TEMP_FOLDER)

getImages = GetImages(helper)
getImages.Extract(Config.PDF_DOCUMENT_NAME, Config.PAGE_NUMBER, Config.LOCAL_FOLDER, Config.REMOTE_TEMP_FOLDER)

getTables = GetTables(helper)
getTables.Extract(Config.PDF_DOCUMENT_NAME, Config.LOCAL_FOLDER, Config.REMOTE_TEMP_FOLDER)

getTextBoxes = GetTextBoxes(helper)
getTextBoxes.Extract(Config.PDF_DOCUMENT_NAME, Config.LOCAL_FOLDER, Config.REMOTE_TEMP_FOLDER)
2 changes: 1 addition & 1 deletion asposepdfcloud/api_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def __init__(self, app_key, app_sid, host=None, self_host=False):
self.rest_client = RESTClientObject()
self.default_headers = {}
self.default_headers['x-aspose-client'] = 'python sdk'
self.default_headers['x-aspose-client-version'] = '25.8.0'
self.default_headers['x-aspose-client-version'] = '25.9.0'

self.self_host = self_host
self.app_key = app_key
Expand Down
130 changes: 130 additions & 0 deletions asposepdfcloud/apis/pdf_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -24907,6 +24907,136 @@ def post_document_page_number_stamps_with_http_info(self, name, stamp, **kwargs)
_request_timeout=params.get('_request_timeout'),
collection_formats=collection_formats)

def post_document_pages_crop(self, name, pages, rect, **kwargs):
"""
Crop PDF document pages.
This method makes a synchronous HTTP request by default. To make an
asynchronous HTTP request, please define a `callback` function
to be invoked when receiving the response.
>>> def callback_function(response):
>>> pprint(response)
>>>
>>> thread = api.post_document_pages_crop(name, pages, rect, callback=callback_function)

:param callback function: The callback function
for asynchronous request. (optional)
:param str name: The document name. (required)
:param str pages: Comma separated list of pages and page ranges. (Example: 1,3-5,8) (required)
:param Rectangle rect: Rectangle of document area. (required)
:param str storage: The document storage.
:param str folder: The document folder.
:param str password: Base64 encoded password.
:return: AsposeResponse
If the method is called asynchronously,
returns the request thread.
"""
kwargs['_return_http_data_only'] = True
if kwargs.get('callback'):
return self.post_document_pages_crop_with_http_info(name, pages, rect, **kwargs)
else:
(data) = self.post_document_pages_crop_with_http_info(name, pages, rect, **kwargs)
return data

def post_document_pages_crop_with_http_info(self, name, pages, rect, **kwargs):
"""
Crop PDF document pages.
This method makes a synchronous HTTP request by default. To make an
asynchronous HTTP request, please define a `callback` function
to be invoked when receiving the response.
>>> def callback_function(response):
>>> pprint(response)
>>>
>>> thread = api.post_document_pages_crop_with_http_info(name, pages, rect, callback=callback_function)

:param callback function: The callback function
for asynchronous request. (optional)
:param str name: The document name. (required)
:param str pages: Comma separated list of pages and page ranges. (Example: 1,3-5,8) (required)
:param Rectangle rect: Rectangle of document area. (required)
:param str storage: The document storage.
:param str folder: The document folder.
:param str password: Base64 encoded password.
:return: AsposeResponse
If the method is called asynchronously,
returns the request thread.
"""

all_params = ['name', 'pages', 'rect', 'storage', 'folder', 'password']
all_params.append('callback')
all_params.append('_return_http_data_only')
all_params.append('_preload_content')
all_params.append('_request_timeout')

params = locals()
for key, val in iteritems(params['kwargs']):
if key not in all_params:
raise TypeError(
"Got an unexpected keyword argument '%s'"
" to method post_document_pages_crop" % key
)
params[key] = val
del params['kwargs']
# verify the required parameter 'name' is set
if ('name' not in params) or (params['name'] is None):
raise ValueError("Missing the required parameter `name` when calling `post_document_pages_crop`")
# verify the required parameter 'pages' is set
if ('pages' not in params) or (params['pages'] is None):
raise ValueError("Missing the required parameter `pages` when calling `post_document_pages_crop`")
# verify the required parameter 'rect' is set
if ('rect' not in params) or (params['rect'] is None):
raise ValueError("Missing the required parameter `rect` when calling `post_document_pages_crop`")


collection_formats = {}

path_params = {}
if 'name' in params:
path_params['name'] = params['name']

query_params = []
if 'pages' in params:
query_params.append(('pages', params['pages']))
if 'storage' in params:
query_params.append(('storage', params['storage']))
if 'folder' in params:
query_params.append(('folder', params['folder']))
if 'password' in params:
query_params.append(('password', params['password']))

header_params = {}

form_params = []
local_var_files = {}

body_params = None
if 'rect' in params:
body_params = params['rect']
# HTTP header `Accept`
header_params['Accept'] = self.api_client.\
select_header_accept(['application/json'])

# HTTP header `Content-Type`
header_params['Content-Type'] = self.api_client.\
select_header_content_type(['application/json'])

# Authentication setting
auth_settings = ['JWT']

return self.api_client.call_api('/pdf/{name}/crop', 'POST',
path_params,
query_params,
header_params,
body=body_params,
post_params=form_params,
files=local_var_files,
response_type='AsposeResponse',
auth_settings=auth_settings,
callback=params.get('callback'),
_return_http_data_only=params.get('_return_http_data_only'),
_preload_content=params.get('_preload_content', True),
_request_timeout=params.get('_request_timeout'),
collection_formats=collection_formats)

def post_document_pages_resize(self, name, height, width, pages, **kwargs):
"""
Rsize PDF document.
Expand Down
Loading