From 3bbc0f08506be65392a19d9caec3450d68311989 Mon Sep 17 00:00:00 2001 From: Holt Skinner <13262395+holtskinner@users.noreply.github.com> Date: Mon, 3 Apr 2023 12:04:34 -0500 Subject: [PATCH] feat: Update Max Files per Batch Request to 1000 (#91) * feat: Update Max Files per Batch Request to 1000 - https://cloud.google.com/document-ai/quotas#content_limits * test: Update `test_create_batches_with_invalid_batch_size()` * test: Add `batch_size` parameter to `test_create_batches_with_large_folder()` --- google/cloud/documentai_toolbox/constants.py | 7 ++++--- tests/unit/test_utilities.py | 6 +++--- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/google/cloud/documentai_toolbox/constants.py b/google/cloud/documentai_toolbox/constants.py index 5facde23..278d5446 100644 --- a/google/cloud/documentai_toolbox/constants.py +++ b/google/cloud/documentai_toolbox/constants.py @@ -21,8 +21,8 @@ FILE_CHECK_REGEX = r"(.*[.].*$)" -# https://cloud.google.com/document-ai/quotas -BATCH_MAX_FILES = 50 +# https://cloud.google.com/document-ai/quotas#content_limits +BATCH_MAX_FILES = 1000 # 1GB in Bytes BATCH_MAX_FILE_SIZE = 1073741824 BATCH_MAX_REQUESTS = 5 @@ -30,7 +30,8 @@ # https://cloud.google.com/document-ai/docs/file-types VALID_MIME_TYPES = { "application/pdf", - "image/bmp" "image/gif", + "image/bmp", + "image/gif", "image/jpeg", "image/png", "image/tiff", diff --git a/tests/unit/test_utilities.py b/tests/unit/test_utilities.py index fefff366..fd9335d8 100644 --- a/tests/unit/test_utilities.py +++ b/tests/unit/test_utilities.py @@ -350,10 +350,10 @@ def test_create_batches_with_3_documents(mock_storage, capfd): def test_create_batches_with_invalid_batch_size(): with pytest.raises( ValueError, - match="Batch size must be less than 50. You provided 51.", + match="Batch size must be less than 1000. You provided 1001.", ): utilities.create_batches( - gcs_bucket_name=test_bucket, gcs_prefix=test_prefix, batch_size=51 + gcs_bucket_name=test_bucket, gcs_prefix=test_prefix, batch_size=1001 ) @@ -373,7 +373,7 @@ def test_create_batches_with_large_folder(mock_storage, capfd): client.list_blobs.return_value = mock_blobs actual = utilities.create_batches( - gcs_bucket_name=test_bucket, gcs_prefix=test_prefix + gcs_bucket_name=test_bucket, gcs_prefix=test_prefix, batch_size=50 ) mock_storage.Client.assert_called_once()