85 operation = client.batch_process_documents(request)
87 # Operation Name Format: projects/{project_id}/locations/{location}/operations/{operation_id}
---> 88 self.processed_documents = documentai_toolbox.document.Document.from_batch_process_operation(
89 location=location, operation_name=operation.operation.name
90 )
File venv/lib/python3.11/site-packages/google/cloud/documentai_toolbox/wrappers/document.py:547, in Document.from_batch_process_operation(cls, location, operation_name)
519 @classmethod
520 def from_batch_process_operation(
521 cls: Type["Document"], location: str, operation_name: str
522 ) -> List["Document"]:
523 r"""Loads Documents from Cloud Storage, using the operation name returned from `batch_process_documents()`.
524
525 .. code-block:: python
(...)
544 A list of wrapped documents from gcs. Each document corresponds to an input file.
545 """
546 return cls.from_batch_process_metadata(
--> 547 metadata=_get_batch_process_metadata(
548 location=location, operation_name=operation_name
549 )
550 )
File venv/lib/python3.11/site-packages/google/cloud/documentai_toolbox/wrappers/document.py:161, in _get_batch_process_metadata(location, operation_name)
154 client = documentai.DocumentProcessorServiceClient(
155 client_options=ClientOptions(
156 api_endpoint=f"{location}-documentai.googleapis.com"
157 )
158 )
160 while True:
--> 161 operation: Operation = client.get_operation(
162 request=GetOperationRequest(name=operation_name)
163 )
165 if operation.done:
166 break
File venv/lib/python3.11/site-packages/google/cloud/documentai_v1/services/document_processor_service/client.py:3280, in DocumentProcessorServiceClient.get_operation(self, request, retry, timeout, metadata)
3275 metadata = tuple(metadata) + (
3276 gapic_v1.routing_header.to_grpc_metadata((("name", request.name),)),
3277 )
3279 # Send the request.
-> 3280 response = rpc(
3281 request,
3282 retry=retry,
3283 timeout=timeout,
3284 metadata=metadata,
3285 )
3287 # Done; return the response.
3288 return response
File venv/lib/python3.11/site-packages/google/api_core/gapic_v1/method.py:131, in _GapicCallable.__call__(self, timeout, retry, compression, *args, **kwargs)
128 if self._compression is not None:
129 kwargs["compression"] = compression
--> 131 return wrapped_func(*args, **kwargs)
File venv/lib/python3.11/site-packages/google/api_core/grpc_helpers.py:81, in _wrap_unary_errors.<locals>.error_remapped_callable(*args, **kwargs)
79 return callable_(*args, **kwargs)
80 except grpc.RpcError as exc:
---> 81 raise exceptions.from_grpc_error(exc) from exc
ResourceExhausted: 429 Quota exceeded for quota metric 'Number of API requests' and limit 'Number of API requests per minute' of service 'documentai.googleapis.com' for consumer 'project_number:XXXXXXXX'. [reason: "RATE_LIMIT_EXCEEDED"
domain: "googleapis.com"
metadata {
key: "service"
value: "documentai.googleapis.com"
}
metadata {
key: "quota_metric"
value: "documentai.googleapis.com/default_requests"
}
metadata {
key: "quota_location"
value: "global"
}
metadata {
key: "quota_limit"
value: "DefaultRequestsPerMinutePerProject"
}
metadata {
key: "quota_limit_value"
value: "1800"
}
metadata {
key: "consumer"
value: "projects/XXXXXXXX"
}
, links {
description: "Request a higher quota limit."
url: "https://cloud.google.com/docs/quota#requesting_higher_quota"
}
]
Possible solution could be introducing some mechanism to control the polling rate or a delay interval in the loop.
Environment details
Debian GNU/Linux 11 (bullseye)3.11.223.3.2google-cloud-documentai-toolboxversion:0.12.0a0Steps to reproduce
Document.from_batch_process_operation()in performing OCR on a PDF document with about 10+ pages via a fast/low-latency network (example from a Google Compute Engine VM).Code example
Stack trace
Based on my observation this is likely caused by the
_get_batch_process_metadatafunction which has a long running loop requesting some metadata.python-documentai-toolbox/google/cloud/documentai_toolbox/wrappers/document.py
Lines 160 to 166 in 37e5d68
Possible solution could be introducing some mechanism to control the polling rate or a delay interval in the loop.