diff --git a/README.md b/README.md index b3bec94..b217919 100644 --- a/README.md +++ b/README.md @@ -30,13 +30,10 @@ XLS, XLSX, PPTX, DOC, DOCX, MobiXML, JPEG, EMF, PNG, BMP, GIF, TIFF, Text ## Read PDF Formats MHT, PCL, PS, XSLFO, MD -## Enhancements in Version 25.8 -- Implement document page resize functionality using the Pdf.Cloud API library. +## Enhancements in Version 25.9 +- Implement PDF document page crop functionality using the Pdf.Cloud API library. - A new version of Aspose.PDF Cloud was prepared using the latest version of Aspose.PDF for .NET. -## Bugs fixed in Version 25.8 -- Implement delete watermark from PDF document using the Pdf.Cloud API library. - ## Requirements. Python 2.7 and 3.4+ diff --git a/Uses-Cases/Parser/get_fdf.py b/Uses-Cases/Parser/get_fdf.py new file mode 100644 index 0000000..c071c88 --- /dev/null +++ b/Uses-Cases/Parser/get_fdf.py @@ -0,0 +1,22 @@ +from paresr_helpers import ParesrHelper +from pathlib import Path +import logging + +class ExportFormToFDF: + """Class for extracting PDF form fields into FDF using Aspose PDF Cloud API.""" + def __init__(self, helper: ParesrHelper): + self.helper = helper + + def Extract(self, documentName: str, outputFDFName: str, localFolder: Path, remoteFolder: str ): + self.helper.upload_document(documentName, remoteFolder) + + fdfPath = str(Path.joinpath(Path(remoteFolder), outputFDFName)) + opts = { + "folder": remoteFolder + } + response = self.helper.pdf_api.put_export_fields_from_pdf_to_fdf_in_storage(documentName, fdfPath, **opts) + if response.code != 200: + logging.error("ExportFormToFDF(): Unexpected error!") + else: + logging.info(f"ExportFormToFDF(): Pdf document '{documentName}' form fields successfully exported to '{outputFDFName}' file.") + self.helper.downloadFile(outputFDFName, outputFDFName, localFolder, remoteFolder, "") \ No newline at end of file diff --git a/Uses-Cases/Parser/get_images.py b/Uses-Cases/Parser/get_images.py new file mode 100644 index 0000000..42b4f06 --- /dev/null +++ b/Uses-Cases/Parser/get_images.py @@ -0,0 +1,26 @@ +from paresr_helpers import ParesrHelper +from pathlib import Path +import shutil +import logging + +class GetImages: + """Class for extracting images from PDF document page using Aspose PDF Cloud API.""" + def __init__(self, helper: ParesrHelper): + self.helper = helper + + def Extract(self, documentName: str, pageNumber: int, localFolder: Path, remoteFolder: Path): + self.helper.upload_document(documentName, remoteFolder) + + opts = { + "folder": remoteFolder + } + respImages = self.helper.pdf_api.get_images(documentName, pageNumber, **opts) + if respImages.code != 200: + logging.error("GetImages(): Unexpected error!") + else: + for img in respImages.images.list: + response = self.helper.pdf_api.get_image_extract_as_png(documentName, img.id, **opts) + + logging.info(f"GetImages(): Images '{img.id}' successfully extracted from the document '{documentName}'.") + local_path = localFolder / ( img.id + '.png' ) + shutil.move(response, str(local_path)) \ No newline at end of file diff --git a/Uses-Cases/Parser/get_tables.py b/Uses-Cases/Parser/get_tables.py new file mode 100644 index 0000000..7623099 --- /dev/null +++ b/Uses-Cases/Parser/get_tables.py @@ -0,0 +1,30 @@ +from paresr_helpers import ParesrHelper +from pathlib import Path +import json +import logging + +class GetTables: + """Class for extracting tables from PDF document using Aspose PDF Cloud API.""" + def __init__(self, helper: ParesrHelper): + self.helper = helper + + def Extract(self, documentName: str, localFolder: Path, remoteFolder: Path): + self.helper.upload_document(documentName, remoteFolder) + + opts = { + "folder": remoteFolder + } + respTables = self.helper.pdf_api.get_document_tables(documentName, **opts) + if respTables.code != 200: + logging.error("GetTables(): Unexpected error!") + else: + localJson = Path.joinpath(localFolder, "tables_objects.json") + with open(str(localJson), "w", encoding="utf-8") as localFile: + for tab in respTables.tables.list: + response = self.helper.pdf_api.get_table(documentName, tab.id, **opts) + if response.code != 200: + logging.error("GetTextBoxes(): Unexpected error!") + else: + logging.info(f"GetTabels(): Table '{tab.id}' successfully extracted from the document '{documentName}'.") + json.dump(tab, localFile, ensure_ascii=False,default=str) + localFile.write("\n*********************\n") \ No newline at end of file diff --git a/Uses-Cases/Parser/get_textboxes.py b/Uses-Cases/Parser/get_textboxes.py new file mode 100644 index 0000000..ffab545 --- /dev/null +++ b/Uses-Cases/Parser/get_textboxes.py @@ -0,0 +1,30 @@ +from paresr_helpers import ParesrHelper +from pathlib import Path +import json +import logging + +class GetTextBoxes: + """Class for extracting text boxes from PDF document using Aspose PDF Cloud API.""" + def __init__(self, helper: ParesrHelper): + self.helper = helper + + def Extract(self, documentName: str, localFolder: Path, remoteFolder: Path): + self.helper.upload_document(documentName, remoteFolder) + + opts = { + "folder": remoteFolder + } + respTextBoxes = self.helper.pdf_api.get_document_text_box_fields(documentName, **opts) + if respTextBoxes.code != 200: + logging.error("GetTextBoxes(): Unexpected error!") + else: + localJson = Path.joinpath(localFolder, "text_box_objects.json") + with open(str(localJson), "w", encoding="utf-8") as localFile: + for textBox in respTextBoxes.fields.list: + response = self.helper.pdf_api.get_text_box_field(documentName, textBox.full_name, **opts) + if response.code != 200: + logging.error("GetTextBoxes(): Unexpected error!") + else: + logging.info(f"GetTextBoxes(): TextBox field '{textBox.full_name}' successfully extracted from the document '{documentName}'.") + json.dump(textBox, localFile, ensure_ascii=False,default=str) + localFile.write("\n*********************\n") \ No newline at end of file diff --git a/Uses-Cases/Parser/get_xml.py b/Uses-Cases/Parser/get_xml.py new file mode 100644 index 0000000..99fe690 --- /dev/null +++ b/Uses-Cases/Parser/get_xml.py @@ -0,0 +1,22 @@ +from paresr_helpers import ParesrHelper, Config +from pathlib import Path +import logging + +class ExportFormToXXML: + """Class for extracting PDF form fields into XML using Aspose PDF Cloud API.""" + def __init__(self, helper: ParesrHelper): + self.helper = helper + + def Extract(self, documentName: str, outputXMLName: str, localFolder: Path, remoteFolder: str ): + self.helper.upload_document(documentName, remoteFolder) + + xmlPath = str(Path.joinpath(Path(remoteFolder), outputXMLName)) + opts = { + "folder": remoteFolder + } + response = self.helper.pdf_api.put_export_fields_from_pdf_to_xml_in_storage(documentName, xmlPath, **opts) + if response.code != 200: + logging.error("ExportFormToXM(): Unexpected error!") + else: + logging.info(f"ExportFormToXML(): Pdf document '{documentName}' form fields successfully exported to '{outputXMLName}' file.") + self.helper.downloadFile(outputXMLName, outputXMLName, localFolder, remoteFolder, "") \ No newline at end of file diff --git a/Uses-Cases/Parser/paresr_helpers.py b/Uses-Cases/Parser/paresr_helpers.py new file mode 100644 index 0000000..9f14820 --- /dev/null +++ b/Uses-Cases/Parser/paresr_helpers.py @@ -0,0 +1,66 @@ +import shutil +import json +import logging +from pathlib import Path +from asposepdfcloud import ApiClient, PdfApi + +# Configure logging +logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") + + +class Config: + """Configuration parameters.""" + CREDENTIALS_FILE = Path(r"..s\\credentials.json") + LOCAL_FOLDER = Path(r"C:\Samples") + REMOTE_TEMP_FOLDER = "TempPdfCloud" + PDF_DOCUMENT_NAME = "sample.pdf" + XML_OUTPUT_FILE = "output_sample.xml" + FDF_OUTPUT_FILE = "output_sample.fdf" + LOCAL_RESULT_DOCUMENT_NAME = "output_sample.pdf" + PAGE_NUMBER = 1 + + +class ParesrHelper: + """Class with helper methods and properties for Parser""" + + def __init__(self, credentials_file: Path = Config.CREDENTIALS_FILE): + self.pdf_api = None + self._init_api(credentials_file) + + def _init_api(self, credentials_file: Path): + """Initialize the API client.""" + try: + with credentials_file.open("r", encoding="utf-8") as file: + credentials = json.load(file) + api_key, app_id = credentials.get("key"), credentials.get("id") + if not api_key or not app_id: + raise ValueError("Error: Missing API keys in the credentials file.") + self.pdf_api = PdfApi(ApiClient(api_key, app_id)) + except (FileNotFoundError, json.JSONDecodeError, ValueError) as e: + logging.error(f"Failed to load credentials: {e}") + + def upload_document(self, documentName: str, remoteFolder: str): + """Upload a PDF document to the Aspose Cloud server.""" + if self.pdf_api: + file_path = Config.LOCAL_FOLDER / documentName + try: + if remoteFolder == None: + self.pdf_api.upload_file(documentName, str(file_path)) + else: + opts = { "folder": remoteFolder } + self.pdf_api.upload_file(remoteFolder + '/' + documentName, file_path) + logging.info(f"File {documentName} uploaded successfully.") + except Exception as e: + logging.error(f"Failed to upload file: {e}") + + def downloadFile(self, document: str, outputDocument: str, localFolder: Path, remoteFolder: str, output_prefix: str): + """Download the processed PDF document from the Aspose Cloud server.""" + if self.pdf_api: + try: + temp_file = self.pdf_api.download_file(remoteFolder + '/' + document) + local_path = localFolder / ( output_prefix + outputDocument ) + shutil.move(temp_file, str(local_path)) + logging.info(f"download_result(): File successfully downloaded: {local_path}") + except Exception as e: + logging.error(f"download_result(): Failed to download file: {e}") + diff --git a/Uses-Cases/Parser/parser_launch.py b/Uses-Cases/Parser/parser_launch.py new file mode 100644 index 0000000..5b0ec4f --- /dev/null +++ b/Uses-Cases/Parser/parser_launch.py @@ -0,0 +1,24 @@ +from paresr_helpers import ParesrHelper, Config +from get_xml import ExportFormToXXML +from get_fdf import ExportFormToFDF +from get_images import GetImages +from get_tables import GetTables +from get_textboxes import GetTextBoxes + +if __name__ == "__main__": + helper = ParesrHelper(Config.CREDENTIALS_FILE) + + xmlExtractor = ExportFormToXXML(helper) + xmlExtractor.Extract(Config.PDF_DOCUMENT_NAME, Config.XML_OUTPUT_FILE, Config.LOCAL_FOLDER, Config.REMOTE_TEMP_FOLDER) + + fdfExtractor = ExportFormToFDF(helper) + fdfExtractor.Extract(Config.PDF_DOCUMENT_NAME, Config.FDF_OUTPUT_FILE, Config.LOCAL_FOLDER, Config.REMOTE_TEMP_FOLDER) + + getImages = GetImages(helper) + getImages.Extract(Config.PDF_DOCUMENT_NAME, Config.PAGE_NUMBER, Config.LOCAL_FOLDER, Config.REMOTE_TEMP_FOLDER) + + getTables = GetTables(helper) + getTables.Extract(Config.PDF_DOCUMENT_NAME, Config.LOCAL_FOLDER, Config.REMOTE_TEMP_FOLDER) + + getTextBoxes = GetTextBoxes(helper) + getTextBoxes.Extract(Config.PDF_DOCUMENT_NAME, Config.LOCAL_FOLDER, Config.REMOTE_TEMP_FOLDER) \ No newline at end of file diff --git a/asposepdfcloud/api_client.py b/asposepdfcloud/api_client.py index 58de905..952c7fd 100644 --- a/asposepdfcloud/api_client.py +++ b/asposepdfcloud/api_client.py @@ -83,7 +83,7 @@ def __init__(self, app_key, app_sid, host=None, self_host=False): self.rest_client = RESTClientObject() self.default_headers = {} self.default_headers['x-aspose-client'] = 'python sdk' - self.default_headers['x-aspose-client-version'] = '25.8.0' + self.default_headers['x-aspose-client-version'] = '25.9.0' self.self_host = self_host self.app_key = app_key diff --git a/asposepdfcloud/apis/pdf_api.py b/asposepdfcloud/apis/pdf_api.py index c7190f1..b872a66 100644 --- a/asposepdfcloud/apis/pdf_api.py +++ b/asposepdfcloud/apis/pdf_api.py @@ -24907,6 +24907,136 @@ def post_document_page_number_stamps_with_http_info(self, name, stamp, **kwargs) _request_timeout=params.get('_request_timeout'), collection_formats=collection_formats) + def post_document_pages_crop(self, name, pages, rect, **kwargs): + """ + Crop PDF document pages. + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please define a `callback` function + to be invoked when receiving the response. + >>> def callback_function(response): + >>> pprint(response) + >>> + >>> thread = api.post_document_pages_crop(name, pages, rect, callback=callback_function) + + :param callback function: The callback function + for asynchronous request. (optional) + :param str name: The document name. (required) + :param str pages: Comma separated list of pages and page ranges. (Example: 1,3-5,8) (required) + :param Rectangle rect: Rectangle of document area. (required) + :param str storage: The document storage. + :param str folder: The document folder. + :param str password: Base64 encoded password. + :return: AsposeResponse + If the method is called asynchronously, + returns the request thread. + """ + kwargs['_return_http_data_only'] = True + if kwargs.get('callback'): + return self.post_document_pages_crop_with_http_info(name, pages, rect, **kwargs) + else: + (data) = self.post_document_pages_crop_with_http_info(name, pages, rect, **kwargs) + return data + + def post_document_pages_crop_with_http_info(self, name, pages, rect, **kwargs): + """ + Crop PDF document pages. + This method makes a synchronous HTTP request by default. To make an + asynchronous HTTP request, please define a `callback` function + to be invoked when receiving the response. + >>> def callback_function(response): + >>> pprint(response) + >>> + >>> thread = api.post_document_pages_crop_with_http_info(name, pages, rect, callback=callback_function) + + :param callback function: The callback function + for asynchronous request. (optional) + :param str name: The document name. (required) + :param str pages: Comma separated list of pages and page ranges. (Example: 1,3-5,8) (required) + :param Rectangle rect: Rectangle of document area. (required) + :param str storage: The document storage. + :param str folder: The document folder. + :param str password: Base64 encoded password. + :return: AsposeResponse + If the method is called asynchronously, + returns the request thread. + """ + + all_params = ['name', 'pages', 'rect', 'storage', 'folder', 'password'] + all_params.append('callback') + all_params.append('_return_http_data_only') + all_params.append('_preload_content') + all_params.append('_request_timeout') + + params = locals() + for key, val in iteritems(params['kwargs']): + if key not in all_params: + raise TypeError( + "Got an unexpected keyword argument '%s'" + " to method post_document_pages_crop" % key + ) + params[key] = val + del params['kwargs'] + # verify the required parameter 'name' is set + if ('name' not in params) or (params['name'] is None): + raise ValueError("Missing the required parameter `name` when calling `post_document_pages_crop`") + # verify the required parameter 'pages' is set + if ('pages' not in params) or (params['pages'] is None): + raise ValueError("Missing the required parameter `pages` when calling `post_document_pages_crop`") + # verify the required parameter 'rect' is set + if ('rect' not in params) or (params['rect'] is None): + raise ValueError("Missing the required parameter `rect` when calling `post_document_pages_crop`") + + + collection_formats = {} + + path_params = {} + if 'name' in params: + path_params['name'] = params['name'] + + query_params = [] + if 'pages' in params: + query_params.append(('pages', params['pages'])) + if 'storage' in params: + query_params.append(('storage', params['storage'])) + if 'folder' in params: + query_params.append(('folder', params['folder'])) + if 'password' in params: + query_params.append(('password', params['password'])) + + header_params = {} + + form_params = [] + local_var_files = {} + + body_params = None + if 'rect' in params: + body_params = params['rect'] + # HTTP header `Accept` + header_params['Accept'] = self.api_client.\ + select_header_accept(['application/json']) + + # HTTP header `Content-Type` + header_params['Content-Type'] = self.api_client.\ + select_header_content_type(['application/json']) + + # Authentication setting + auth_settings = ['JWT'] + + return self.api_client.call_api('/pdf/{name}/crop', 'POST', + path_params, + query_params, + header_params, + body=body_params, + post_params=form_params, + files=local_var_files, + response_type='AsposeResponse', + auth_settings=auth_settings, + callback=params.get('callback'), + _return_http_data_only=params.get('_return_http_data_only'), + _preload_content=params.get('_preload_content', True), + _request_timeout=params.get('_request_timeout'), + collection_formats=collection_formats) + def post_document_pages_resize(self, name, height, width, pages, **kwargs): """ Rsize PDF document. diff --git a/asposepdfcloud/configuration.py b/asposepdfcloud/configuration.py index 5fe34ba..ee3e2fb 100644 --- a/asposepdfcloud/configuration.py +++ b/asposepdfcloud/configuration.py @@ -199,5 +199,5 @@ def to_debug_report(self): "OS: {env}\n"\ "Python Version: {pyversion}\n"\ "Version of the API: 3.0\n"\ - "SDK Package Version: 25.8.0".\ + "SDK Package Version: 25.9.0".\ format(env=sys.platform, pyversion=sys.version) diff --git a/docs/PdfApi.md b/docs/PdfApi.md index 95adbf4..b6eb95a 100644 --- a/docs/PdfApi.md +++ b/docs/PdfApi.md @@ -212,6 +212,7 @@ Method | HTTP request | Description [**post_document_image_stamps**](PdfApi.md#post_document_image_stamps) | **POST** /pdf/\{name}/stamps/image | Add document pages image stamps. [**post_document_image_stamps_page_specified**](PdfApi.md#post_document_image_stamps_page_specified) | **POST** /pdf/\{name}/stamps/image/pagespecified | Add document image stamps to specified pages. [**post_document_page_number_stamps**](PdfApi.md#post_document_page_number_stamps) | **POST** /pdf/\{name}/stamps/pagenumber | Add document page number stamps. +[**post_document_pages_crop**](PdfApi.md#post_document_pages_crop) | **POST** /pdf/\{name}/crop | Crop PDF document pages. [**post_document_pages_resize**](PdfApi.md#post_document_pages_resize) | **POST** /pdf/\{name}/resize | Rsize PDF document. [**post_document_pages_rotate**](PdfApi.md#post_document_pages_rotate) | **POST** /pdf/\{name}/rotate | Rotate PDF document. [**post_document_text_footer**](PdfApi.md#post_document_text_footer) | **POST** /pdf/\{name}/footer/text | Add document text footer. @@ -5662,6 +5663,33 @@ Name | Type | Description | Notes [[Back to top]](#) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to Model list]](../README.md#documentation-for-models) [[Back to README]](../README.md) +# **post_document_pages_crop** +> AsposeResponse post_document_pages_crop(name, pages, rect, storage=storage, folder=folder, password=password) + +Crop PDF document pages. + +### Parameters + +Name | Type | Description | Notes +------------- | ------------- | ------------- | ------------- + **name** | **str**| The document name. | + **pages** | **str**| Comma separated list of pages and page ranges. (Example: 1,3-5,8) | + **rect** | [**Rectangle**](Rectangle.md)| Rectangle of document area. | + **storage** | **str**| The document storage. | [optional] + **folder** | **str**| The document folder. | [optional] + **password** | **str**| Base64 encoded password. | [optional] + +### Return type + +[**AsposeResponse**](AsposeResponse.md) + +### HTTP request headers + + - **Content-Type**: application/json + - **Accept**: application/json + +[[Back to top]](#) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to Model list]](../README.md#documentation-for-models) [[Back to README]](../README.md) + # **post_document_pages_resize** > AsposeResponse post_document_pages_resize(name, height, width, pages, storage=storage, folder=folder, password=password) diff --git a/setup.py b/setup.py index 07a3693..15e951b 100644 --- a/setup.py +++ b/setup.py @@ -32,7 +32,7 @@ from setuptools import setup, find_packages NAME = "asposepdfcloud" -VERSION = "25.8.0" +VERSION = "25.9.0" # To install the library, run the following # # python setup.py install diff --git a/test/pdf_test.py b/test/pdf_test.py index 40b3bb1..cb35b81 100644 --- a/test/pdf_test.py +++ b/test/pdf_test.py @@ -3462,6 +3462,18 @@ def testPostDocumentPagesResize(self): response = self.pdf_api.post_document_pages_resize(file_name, 100, 200, '2-3', **opts) self.assertEqual(response.code, 200) + def testPostDocumentPagesCrop(self): + file_name = '4pages.pdf' + self.uploadFile(file_name) + opts = { + "folder" : self.temp_folder + } + + rectangle = asposepdfcloud.models.Rectangle(0, 0, 400, 800) + + response = self.pdf_api.post_document_pages_crop(file_name, '2-3', rectangle, **opts) + self.assertEqual(response.code, 200) + # Fields Tests def testGetField(self):