diff --git a/Makefile b/Makefile index 86032aef..f0a70f00 100644 --- a/Makefile +++ b/Makefile @@ -73,16 +73,20 @@ download-openapi-specs: client-merge-serverless-platform: speakeasy merge -s ./openapi_platform_api.json -s ./openapi_serverless.json -o ./openapi_merged.yaml -## client-generate-unified-sdk-local: Generate the SDK using merged schemas -.PHONY: client-generate-unified-sdk-local -client-generate-unified-sdk-local: +## client-apply-overlay: Apply overlay on the merged schema +.PHONY: client-apply-overlay +client-apply-overlay: speakeasy overlay validate -o ./overlay_client.yaml speakeasy overlay apply -s ./openapi_merged.yaml -o ./overlay_client.yaml > ./openapi_platform_serverless_client.yaml + +## client-generate-unified-sdk-local: Generate the SDK from the merged schema +.PHONY: client-generate-unified-sdk-local +client-generate-unified-sdk-local: speakeasy generate sdk -s ./openapi_platform_serverless_client.yaml -o ./ -l python ## client-generate-sdk: Do all the steps to generate the SDK .PHONY: client-generate-sdk -client-generate-sdk: download-openapi-specs client-merge-serverless-platform client-generate-unified-sdk-local +client-generate-sdk: download-openapi-specs client-merge-serverless-platform client-apply-overlay client-generate-unified-sdk-local .PHONY: publish diff --git a/_test_contract/conftest.py b/_test_contract/conftest.py new file mode 100644 index 00000000..e373bd53 --- /dev/null +++ b/_test_contract/conftest.py @@ -0,0 +1,125 @@ +import os +from datetime import timedelta +from pathlib import Path + +from freezegun import freeze_time +import pytest + +from unstructured_client import UnstructuredClient, utils + +FAKE_API_KEY = "91pmLBeETAbXCpNylRsLq11FdiZPTk" + + +@pytest.fixture(scope="module") +def platform_client(platform_api_url) -> UnstructuredClient: + # settings the retry config to always try 3 times after a fail = 4 requests sent + _client = UnstructuredClient( + api_key_auth=FAKE_API_KEY, + server_url=platform_api_url, + retry_config=utils.RetryConfig( + "backoff", utils.BackoffStrategy( + initial_interval=3000, + max_interval=3000, + exponent=1.0, + max_elapsed_time=8000 + ), + retry_connection_errors=True + ) + ) + yield _client + + +@pytest.fixture(scope="module") +def serverless_client(serverless_api_url) -> UnstructuredClient: + # settings the retry config to always try 3 times after a fail = 4 requests sent + _client = UnstructuredClient( + api_key_auth=FAKE_API_KEY, + server_url=serverless_api_url, + retry_config = utils.RetryConfig( + "backoff", utils.BackoffStrategy( + initial_interval=3000, + max_interval=3000, + exponent=1.0, + max_elapsed_time=8000 + ), + retry_connection_errors=True + ) + ) + yield _client + + +@pytest.fixture() +def freezer(): + ignore = ['_pytest.terminal', '_pytest.runner'] + freezer = freeze_time(ignore=ignore) + frozen_time = freezer.start() + yield frozen_time + freezer.stop() + +@pytest.fixture(autouse=True) +def mock_sleep(mocker, freezer): + sleep_mock = mocker.patch("time.sleep") + sleep_mock.side_effect = lambda seconds: freezer.tick(timedelta(seconds=seconds)) + yield sleep_mock + + +@pytest.fixture(scope="module") +def platform_api_url(): + return "https://platform.unstructuredapp.io" + + +@pytest.fixture(scope="module") +def serverless_api_url(): + return "https://api.unstructuredapp.io" + + +@pytest.fixture(scope="module") +def dummy_partitioned_text(): + return """[ + { + "type": "NarrativeText", + "element_id": "b7dca0363a83468b9e7326c0c1caf93e", + "text": "March 17, 2022", + "metadata": { + "detection_class_prob": 0.35799261927604675, + "coordinates": { + "points": [ + [ + 1447.871337890625, + 301.74810791015625 + ], + [ + 1447.871337890625, + 326.5603332519531 + ], + [ + 1616.6922607421875, + 326.5603332519531 + ], + [ + 1616.6922607421875, + 301.74810791015625 + ] + ], + "system": "PixelSpace", + "layout_width": 1700, + "layout_height": 2200 + }, + "last_modified": "2024-02-07T14:23:29", + "filetype": "application/pdf", + "languages": [ + "eng" + ], + "page_number": 1, + "file_directory": "data", + "filename": "MyDocument.pdf" + } + } +]""" + + +@pytest.fixture(scope="module") +def doc_path() -> Path: + samples_path = Path(__file__).resolve().parents[1] / "_sample_docs" + assert samples_path.exists() + return samples_path diff --git a/_test_contract/platform_api/conftest.py b/_test_contract/platform_api/conftest.py deleted file mode 100644 index 0915031b..00000000 --- a/_test_contract/platform_api/conftest.py +++ /dev/null @@ -1,29 +0,0 @@ -import os - -import pytest - -from unstructured_client import UnstructuredClient, RetryConfig -from unstructured_client.utils import BackoffStrategy - -FAKE_API_KEY = "91pmLBeETAbXCpNylRsLq11FdiZPTk" - - -@pytest.fixture(scope="module") -def platform_api_url(): - return "https://platform.unstructuredapp.io" - - -@pytest.fixture(scope="module") -def client(platform_api_url) -> UnstructuredClient: - _client = UnstructuredClient( - api_key_auth=FAKE_API_KEY, - server_url=platform_api_url, - retry_config=RetryConfig( - strategy="backoff", - retry_connection_errors=False, - backoff=BackoffStrategy( - max_elapsed_time=0, max_interval=0, exponent=0, initial_interval=0 - ), - ), - ) - yield _client diff --git a/_test_contract/platform_api/test_destinations.py b/_test_contract/platform_api/test_destinations.py index d1b8450c..88b37f16 100644 --- a/_test_contract/platform_api/test_destinations.py +++ b/_test_contract/platform_api/test_destinations.py @@ -8,7 +8,7 @@ def test_list_destinations( - httpx_mock, client: UnstructuredClient, platform_api_url: str + httpx_mock, platform_client: UnstructuredClient, platform_api_url: str ): url = f"{platform_api_url}/api/v1/destinations/" @@ -34,7 +34,7 @@ def test_list_destinations( url=url, ) - destinations_response = client.destinations.list_destinations( + destinations_response = platform_client.destinations.list_destinations( request=operations.ListDestinationsRequest() ) assert destinations_response.status_code == 200 @@ -57,7 +57,7 @@ def test_list_destinations( def test_list_destinations_empty( - httpx_mock, client: UnstructuredClient, platform_api_url: str + httpx_mock, platform_client: UnstructuredClient, platform_api_url: str ): url = f"{platform_api_url}/api/v1/destinations/" @@ -68,7 +68,7 @@ def test_list_destinations_empty( url=url, ) - destinations_response = client.destinations.list_destinations( + destinations_response = platform_client.destinations.list_destinations( request=operations.ListDestinationsRequest() ) assert destinations_response.status_code == 200 @@ -89,7 +89,7 @@ def test_list_destinations_empty( @pytest.mark.httpx_mock(can_send_already_matched_responses=True) # in case of retries def test_list_destinations_5xx_code( httpx_mock, - client: UnstructuredClient, + platform_client: UnstructuredClient, platform_api_url: str, error_status_code: int, ): @@ -103,7 +103,7 @@ def test_list_destinations_5xx_code( ) with pytest.raises(SDKError) as excinfo: - client.destinations.list_destinations( + platform_client.destinations.list_destinations( request=operations.ListDestinationsRequest() ) requests = httpx_mock.get_requests() @@ -112,7 +112,7 @@ def test_list_destinations_5xx_code( assert excinfo.value.status_code == error_status_code -def test_get_destination(httpx_mock, client: UnstructuredClient, platform_api_url: str): +def test_get_destination(httpx_mock, platform_client: UnstructuredClient, platform_api_url: str): dest_id = "0c363dec-3c70-45ee-8041-481044a6e1cc" url = f"{platform_api_url}/api/v1/destinations/{dest_id}" @@ -136,7 +136,7 @@ def test_get_destination(httpx_mock, client: UnstructuredClient, platform_api_ur url=url, ) - destination_response = client.destinations.get_destination( + destination_response = platform_client.destinations.get_destination( request=operations.GetDestinationRequest(destination_id=dest_id) ) assert destination_response.status_code == 200 @@ -158,7 +158,7 @@ def test_get_destination(httpx_mock, client: UnstructuredClient, platform_api_ur def test_get_destination_not_found( - httpx_mock, client: UnstructuredClient, platform_api_url: str + httpx_mock, platform_client: UnstructuredClient, platform_api_url: str ): dest_id = "0c363dec-3c70-45ee-8041-481044a6e1cc" url = f"{platform_api_url}/api/v1/destinations/{dest_id}" @@ -171,7 +171,7 @@ def test_get_destination_not_found( ) with pytest.raises(SDKError) as excinfo: - client.destinations.get_destination( + platform_client.destinations.get_destination( request=operations.GetDestinationRequest(destination_id=dest_id) ) @@ -182,7 +182,7 @@ def test_get_destination_not_found( def test_create_destination( - httpx_mock, client: UnstructuredClient, platform_api_url: str + httpx_mock, platform_client: UnstructuredClient, platform_api_url: str ): url = f"{platform_api_url}/api/v1/destinations/" @@ -204,7 +204,7 @@ def test_create_destination( url=url, ) - destination_response = client.destinations.create_destination( + destination_response = platform_client.destinations.create_destination( request=operations.CreateDestinationRequest( create_destination_connector=shared.CreateDestinationConnector( name="test_destination_name", @@ -236,7 +236,7 @@ def test_create_destination( def test_update_destination( - httpx_mock, client: UnstructuredClient, platform_api_url: str + httpx_mock, platform_client: UnstructuredClient, platform_api_url: str ): dest_id = "b25d4161-77a0-4e08-b65e-86f398ce15ad" url = f"{platform_api_url}/api/v1/destinations/{dest_id}" @@ -259,7 +259,7 @@ def test_update_destination( url=url, ) - destination_update_response = client.destinations.update_destination( + destination_update_response = platform_client.destinations.update_destination( request=operations.UpdateDestinationRequest( destination_id=dest_id, update_destination_connector=shared.UpdateDestinationConnector( @@ -291,7 +291,7 @@ def test_update_destination( def test_delete_destination( - httpx_mock, client: UnstructuredClient, platform_api_url: str + httpx_mock, platform_client: UnstructuredClient, platform_api_url: str ): dest_id = "b25d4161-77a0-4e08-b65e-86f398ce15ad" url = f"{platform_api_url}/api/v1/destinations/{dest_id}" @@ -304,7 +304,7 @@ def test_delete_destination( url=url, ) - response = client.destinations.delete_destination( + response = platform_client.destinations.delete_destination( request=operations.DeleteDestinationRequest(destination_id=dest_id) ) assert response.status_code == 200 diff --git a/_test_contract/platform_api/test_jobs.py b/_test_contract/platform_api/test_jobs.py index 4c14e38d..7d699522 100644 --- a/_test_contract/platform_api/test_jobs.py +++ b/_test_contract/platform_api/test_jobs.py @@ -7,7 +7,7 @@ from unstructured_client.models.errors import SDKError -def test_list_jobs(httpx_mock, client: UnstructuredClient, platform_api_url: str): +def test_list_jobs(httpx_mock, platform_client: UnstructuredClient, platform_api_url: str): url = f"{platform_api_url}/api/v1/jobs/" httpx_mock.add_response( @@ -26,7 +26,7 @@ def test_list_jobs(httpx_mock, client: UnstructuredClient, platform_api_url: str url=url, ) - jobs_response = client.jobs.list_jobs(request=operations.ListJobsRequest()) + jobs_response = platform_client.jobs.list_jobs(request=operations.ListJobsRequest()) assert jobs_response.status_code == 200 requests = httpx_mock.get_requests() @@ -44,7 +44,7 @@ def test_list_jobs(httpx_mock, client: UnstructuredClient, platform_api_url: str assert job.created_at == datetime.fromisoformat("2025-06-22T11:37:21.648+00:00") -def test_get_job(httpx_mock, client: UnstructuredClient, platform_api_url: str): +def test_get_job(httpx_mock, platform_client: UnstructuredClient, platform_api_url: str): url = f"{platform_api_url}/api/v1/jobs/fcdc4994-eea5-425c-91fa-e03f2bd8030d" httpx_mock.add_response( @@ -61,7 +61,7 @@ def test_get_job(httpx_mock, client: UnstructuredClient, platform_api_url: str): url=url, ) - job_response = client.jobs.get_job( + job_response = platform_client.jobs.get_job( request=operations.GetJobRequest(job_id="fcdc4994-eea5-425c-91fa-e03f2bd8030d") ) assert job_response.status_code == 200 @@ -81,7 +81,7 @@ def test_get_job(httpx_mock, client: UnstructuredClient, platform_api_url: str): def test_get_job_not_found( - httpx_mock, client: UnstructuredClient, platform_api_url: str + httpx_mock, platform_client: UnstructuredClient, platform_api_url: str ): url = f"{platform_api_url}/api/v1/jobs/fcdc4994-eea5-425c-91fa-e03f2bd8030d" @@ -94,7 +94,7 @@ def test_get_job_not_found( ) with pytest.raises(SDKError) as e: - client.jobs.get_job( + platform_client.jobs.get_job( request=operations.GetJobRequest( job_id="fcdc4994-eea5-425c-91fa-e03f2bd8030d" ) @@ -110,7 +110,7 @@ def test_get_job_not_found( assert request.url == url -def test_get_job_error(httpx_mock, client: UnstructuredClient, platform_api_url: str): +def test_get_job_error(httpx_mock, platform_client: UnstructuredClient, platform_api_url: str): url = f"{platform_api_url}/api/v1/jobs/fcdc4994-eea5-425c-91fa-e03f2bd8030d" httpx_mock.add_response( @@ -119,10 +119,11 @@ def test_get_job_error(httpx_mock, client: UnstructuredClient, platform_api_url: headers={"Content-Type": "application/json"}, json={"detail": "Internal server error"}, url=url, + is_reusable=True, ) with pytest.raises(SDKError) as e: - client.jobs.get_job( + platform_client.jobs.get_job( request=operations.GetJobRequest( job_id="fcdc4994-eea5-425c-91fa-e03f2bd8030d" ) @@ -132,13 +133,13 @@ def test_get_job_error(httpx_mock, client: UnstructuredClient, platform_api_url: assert e.value.message == "API error occurred" requests = httpx_mock.get_requests() - assert len(requests) == 1 + assert len(requests) == 4 request = requests[0] assert request.method == "GET" assert request.url == url -def test_cancel_job(httpx_mock, client: UnstructuredClient, platform_api_url: str): +def test_cancel_job(httpx_mock, platform_client: UnstructuredClient, platform_api_url: str): url = f"{platform_api_url}/api/v1/jobs/fcdc4994-eea5-425c-91fa-e03f2bd8030d/cancel" httpx_mock.add_response( @@ -152,7 +153,7 @@ def test_cancel_job(httpx_mock, client: UnstructuredClient, platform_api_url: st }, ) - cancel_response = client.jobs.cancel_job( + cancel_response = platform_client.jobs.cancel_job( request=operations.CancelJobRequest( job_id="fcdc4994-eea5-425c-91fa-e03f2bd8030d" ) diff --git a/_test_contract/platform_api/test_sources.py b/_test_contract/platform_api/test_sources.py index fc7d3bf1..bf17a0f2 100644 --- a/_test_contract/platform_api/test_sources.py +++ b/_test_contract/platform_api/test_sources.py @@ -14,7 +14,7 @@ async def __call__(self, *args, **kwargs): return super(AsyncMock, self).__call__(*args, **kwargs) -def test_list_sources(httpx_mock, client: UnstructuredClient, platform_api_url: str): +def test_list_sources(httpx_mock, platform_client: UnstructuredClient, platform_api_url: str): url = f"{platform_api_url}/api/v1/sources/" httpx_mock.add_response( @@ -40,7 +40,7 @@ def test_list_sources(httpx_mock, client: UnstructuredClient, platform_api_url: url=url, ) - sources_response = client.sources.list_sources( + sources_response = platform_client.sources.list_sources( request=operations.ListSourcesRequest() ) assert sources_response.status_code == 200 @@ -61,7 +61,7 @@ def test_list_sources(httpx_mock, client: UnstructuredClient, platform_api_url: def test_list_sources_empty( - httpx_mock, client: UnstructuredClient, platform_api_url: str + httpx_mock, platform_client: UnstructuredClient, platform_api_url: str ): url = f"{platform_api_url}/api/v1/sources/" @@ -72,7 +72,7 @@ def test_list_sources_empty( url=url, ) - sources_response = client.sources.list_sources( + sources_response = platform_client.sources.list_sources( request=operations.ListSourcesRequest() ) assert sources_response.status_code == 200 @@ -93,7 +93,7 @@ def test_list_sources_empty( @pytest.mark.httpx_mock(can_send_already_matched_responses=True) # in case of retries def test_list_sources_5xx_code( httpx_mock, - client: UnstructuredClient, + platform_client: UnstructuredClient, platform_api_url: str, error_status_code: int, ): @@ -107,14 +107,14 @@ def test_list_sources_5xx_code( ) with pytest.raises(SDKError) as excinfo: - client.sources.list_sources(request=operations.ListSourcesRequest()) + platform_client.sources.list_sources(request=operations.ListSourcesRequest()) requests = httpx_mock.get_requests() assert len(requests) >= 1 assert excinfo.value.message == "API error occurred" assert excinfo.value.status_code == error_status_code -def test_get_source(httpx_mock, client: UnstructuredClient, platform_api_url: str): +def test_get_source(httpx_mock, platform_client: UnstructuredClient, platform_api_url: str): dest_id = "a15d4161-77a0-4e08-b65e-86f398ce15ad" url = f"{platform_api_url}/api/v1/sources/{dest_id}" @@ -139,7 +139,7 @@ def test_get_source(httpx_mock, client: UnstructuredClient, platform_api_url: st url=url, ) - source_response = client.sources.get_source( + source_response = platform_client.sources.get_source( request=operations.GetSourceRequest(source_id=dest_id) ) assert source_response.status_code == 200 @@ -159,7 +159,7 @@ def test_get_source(httpx_mock, client: UnstructuredClient, platform_api_url: st def test_get_source_not_found( - httpx_mock, client: UnstructuredClient, platform_api_url: str + httpx_mock, platform_client: UnstructuredClient, platform_api_url: str ): dest_id = "a15d4161-77a0-4e08-b65e-86f398ce15ad" url = f"{platform_api_url}/api/v1/sources/{dest_id}" @@ -172,7 +172,7 @@ def test_get_source_not_found( ) with pytest.raises(SDKError) as excinfo: - client.sources.get_source(request=operations.GetSourceRequest(source_id=dest_id)) + platform_client.sources.get_source(request=operations.GetSourceRequest(source_id=dest_id)) requests = httpx_mock.get_requests() assert len(requests) == 1 @@ -180,7 +180,7 @@ def test_get_source_not_found( assert excinfo.value.status_code == 404 -def test_create_source(httpx_mock, client: UnstructuredClient, platform_api_url: str): +def test_create_source(httpx_mock, platform_client: UnstructuredClient, platform_api_url: str): url = f"{platform_api_url}/api/v1/sources/" httpx_mock.add_response( @@ -204,7 +204,7 @@ def test_create_source(httpx_mock, client: UnstructuredClient, platform_api_url: url=url, ) - source_response = client.sources.create_source( + source_response = platform_client.sources.create_source( request=operations.CreateSourceRequest( create_source_connector=shared.CreateSourceConnector( name="test_source_name", @@ -236,7 +236,7 @@ def test_create_source(httpx_mock, client: UnstructuredClient, platform_api_url: assert source.created_at == datetime.fromisoformat("2023-09-15T01:06:53.146+00:00") -def test_update_source(httpx_mock, client: UnstructuredClient, platform_api_url: str): +def test_update_source(httpx_mock, platform_client: UnstructuredClient, platform_api_url: str): dest_id = "a15d4161-77a0-4e08-b65e-86f398ce15ad" url = f"{platform_api_url}/api/v1/sources/{dest_id}" @@ -262,7 +262,7 @@ def test_update_source(httpx_mock, client: UnstructuredClient, platform_api_url: url=url, ) - source_update_response = client.sources.update_source( + source_update_response = platform_client.sources.update_source( request=operations.UpdateSourceRequest( source_id=dest_id, update_source_connector=shared.UpdateSourceConnector( @@ -297,7 +297,7 @@ def test_update_source(httpx_mock, client: UnstructuredClient, platform_api_url: ) -def test_delete_source(httpx_mock, client: UnstructuredClient, platform_api_url: str): +def test_delete_source(httpx_mock, platform_client: UnstructuredClient, platform_api_url: str): dest_id = "a15d4161-77a0-4e08-b65e-86f398ce15ad" url = f"{platform_api_url}/api/v1/sources/{dest_id}" @@ -309,7 +309,7 @@ def test_delete_source(httpx_mock, client: UnstructuredClient, platform_api_url: url=url, ) - response = client.sources.delete_source( + response = platform_client.sources.delete_source( request=operations.DeleteSourceRequest(source_id=dest_id) ) assert response.status_code == 200 diff --git a/_test_contract/platform_api/test_workflows.py b/_test_contract/platform_api/test_workflows.py index 50a858b1..5da3872b 100644 --- a/_test_contract/platform_api/test_workflows.py +++ b/_test_contract/platform_api/test_workflows.py @@ -7,7 +7,7 @@ from unstructured_client.models.errors import SDKError -def test_list_workflows(httpx_mock, client: UnstructuredClient, platform_api_url: str): +def test_list_workflows(httpx_mock, platform_client: UnstructuredClient, platform_api_url: str): url = f"{platform_api_url}/api/v1/workflows/" httpx_mock.add_response( @@ -32,7 +32,7 @@ def test_list_workflows(httpx_mock, client: UnstructuredClient, platform_api_url ], ) - workflows_response = client.workflows.list_workflows( + workflows_response = platform_client.workflows.list_workflows( request=operations.ListWorkflowsRequest() ) assert workflows_response.status_code == 200 @@ -67,7 +67,7 @@ def test_list_workflows(httpx_mock, client: UnstructuredClient, platform_api_url def test_list_workflows_empty( - httpx_mock, client: UnstructuredClient, platform_api_url: str + httpx_mock, platform_client: UnstructuredClient, platform_api_url: str ): url = f"{platform_api_url}/api/v1/workflows/" @@ -77,7 +77,7 @@ def test_list_workflows_empty( json=[], ) - workflows_response = client.workflows.list_workflows( + workflows_response = platform_client.workflows.list_workflows( request=operations.ListWorkflowsRequest() ) assert workflows_response.status_code == 200 @@ -96,7 +96,7 @@ def test_list_workflows_empty( @pytest.mark.httpx_mock(can_send_already_matched_responses=True) # in case of retries def test_list_workflows_error( httpx_mock, - client: UnstructuredClient, + platform_client: UnstructuredClient, platform_api_url: str, error_status_code: int, ): @@ -109,12 +109,12 @@ def test_list_workflows_error( ) with pytest.raises(SDKError) as excinfo: - client.workflows.list_workflows(request=operations.ListWorkflowsRequest()) + platform_client.workflows.list_workflows(request=operations.ListWorkflowsRequest()) assert excinfo.value.status_code == error_status_code assert excinfo.value.message == "API error occurred" -def test_create_workflow(httpx_mock, client: UnstructuredClient, platform_api_url: str): +def test_create_workflow(httpx_mock, platform_client: UnstructuredClient, platform_api_url: str): url = f"{platform_api_url}/api/v1/workflows/" httpx_mock.add_response( @@ -138,7 +138,7 @@ def test_create_workflow(httpx_mock, client: UnstructuredClient, platform_api_ur }, ) - create_workflow_response = client.workflows.create_workflow( + create_workflow_response = platform_client.workflows.create_workflow( request=operations.CreateWorkflowRequest( create_workflow=shared.CreateWorkflow( name="test_workflow", @@ -159,7 +159,7 @@ def test_create_workflow(httpx_mock, client: UnstructuredClient, platform_api_ur assert request.url == url -def test_update_workflow(httpx_mock, client: UnstructuredClient, platform_api_url: str): +def test_update_workflow(httpx_mock, platform_client: UnstructuredClient, platform_api_url: str): url = f"{platform_api_url}/api/v1/workflows/16b80fee-64dc-472d-8f26-1d7729b6423d" httpx_mock.add_response( @@ -183,7 +183,7 @@ def test_update_workflow(httpx_mock, client: UnstructuredClient, platform_api_ur }, ) - update_workflow_response = client.workflows.update_workflow( + update_workflow_response = platform_client.workflows.update_workflow( request=operations.UpdateWorkflowRequest( workflow_id="16b80fee-64dc-472d-8f26-1d7729b6423d", update_workflow=shared.UpdateWorkflow( @@ -219,7 +219,7 @@ def test_update_workflow(httpx_mock, client: UnstructuredClient, platform_api_ur assert updated_workflow.destinations == ["aeebecc7-9d8e-4625-bf1d-815c2f084869"] -def test_run_workflow(httpx_mock, client: UnstructuredClient, platform_api_url: str): +def test_run_workflow(httpx_mock, platform_client: UnstructuredClient, platform_api_url: str): url = ( f"{platform_api_url}/api/v1/workflows/16b80fee-64dc-472d-8f26-1d7729b6423d/run" ) @@ -239,7 +239,7 @@ def test_run_workflow(httpx_mock, client: UnstructuredClient, platform_api_url: url=url, ) - run_workflow_response = client.workflows.run_workflow( + run_workflow_response = platform_client.workflows.run_workflow( request=operations.RunWorkflowRequest( workflow_id="16b80fee-64dc-472d-8f26-1d7729b6423d" ) diff --git a/_test_contract/test_partition_via_api.py b/_test_contract/test_partition_via_api.py index 65b9c49c..d6ce6046 100644 --- a/_test_contract/test_partition_via_api.py +++ b/_test_contract/test_partition_via_api.py @@ -1,45 +1,9 @@ -import os from pathlib import Path import httpx import pytest from unstructured.partition.api import partition_via_api -from unstructured_client import UnstructuredClient - - -@pytest.fixture(scope="module") -def client() -> UnstructuredClient: - _client = UnstructuredClient(api_key_auth=os.getenv("UNSTRUCTURED_API_KEY"), server='free-api') - yield _client - - -@pytest.fixture(scope="module") -def doc_path() -> Path: - samples_path = Path(__file__).resolve().parents[1] / "_sample_docs" - assert samples_path.exists() - return samples_path - - -MOCK_TEXT = """[ - { - "element_id": "f49fbd614ddf5b72e06f59e554e6ae2b", - "text": "This is a test email to use for unit tests.", - "type": "NarrativeText", - "metadata": { - "sent_from": [ - "Matthew Robinson " - ], - "sent_to": [ - "Matthew Robinson " - ], - "subject": "Test Email", - "filename": "fake-email.eml", - "filetype": "message/rfc822" - } - } -]""" - @pytest.mark.xfail @pytest.mark.parametrize( @@ -48,7 +12,13 @@ def doc_path() -> Path: ("http://localhost:8000/general/v0/general", "http://localhost:8000/general/v0/general"), ] ) -def test_partition_via_api_custom_url(httpx_mock, doc_path: Path, url: str, full_url: str): +def test_partition_via_api_custom_url( + httpx_mock, + doc_path: Path, + url: str, + full_url: str, + dummy_partitioned_text: str +): """ Assert that we can specify api_url and requests are sent to the right place """ @@ -60,21 +30,25 @@ def test_partition_via_api_custom_url(httpx_mock, doc_path: Path, url: str, full method="POST", url=full_url, headers={"Content-Type": "application/json"}, - content=MOCK_TEXT.encode(), + content=dummy_partitioned_text.encode(), ) partition_via_api(filename=str(doc_path / filename), api_url=url, metadata_filename=filename) @pytest.mark.xfail -def test_partition_via_api_pass_list_type_parameters(httpx_mock, doc_path: Path): +def test_partition_via_api_pass_list_type_parameters( + httpx_mock, + doc_path: Path, + dummy_partitioned_text: str +): url = "http://localhost:8000/general/v0/general" filename = "layout-parser-paper-fast.pdf" httpx_mock.add_response( method="POST", headers={"Content-Type": "application/json"}, - content=MOCK_TEXT.encode(), + content=dummy_partitioned_text.encode(), url=url, ) diff --git a/_test_contract/test_retries.py b/_test_contract/test_retries.py new file mode 100644 index 00000000..593d8997 --- /dev/null +++ b/_test_contract/test_retries.py @@ -0,0 +1,99 @@ +from datetime import datetime +from pathlib import Path + +import pytest + +from unstructured_client import UnstructuredClient +from unstructured_client.models import operations, shared + +RETRY_STATUS_CODES = [500, 501, 502, 503, 504, 505] + +@pytest.mark.parametrize("status_code", RETRY_STATUS_CODES) +def test_list_jobs_retries(httpx_mock, platform_client: UnstructuredClient, platform_api_url: str, status_code: int): + url = f"{platform_api_url}/api/v1/jobs/" + + for _ in range(2): + httpx_mock.add_response(status_code=status_code, method="GET", json=[{"detail": "error"}], url=url) + httpx_mock.add_response( + method="GET", + headers={"Content-Type": "application/json"}, + status_code=200, + json=[ + { + "created_at": "2025-06-22T11:37:21.648Z", + "id": "fcdc4994-eea5-425c-91fa-e03f2bd8030d", + "status": "COMPLETED", + "runtime": None, + "workflow_id": "16b80fee-64dc-472d-8f26-1d7729b6423d", + "workflow_name": "test_workflow", + } + ], + url=url, + ) + + jobs_response = platform_client.jobs.list_jobs(request=operations.ListJobsRequest()) + assert jobs_response.status_code == 200 + + requests = httpx_mock.get_requests() + assert len(requests) == 3 + for request in requests: + assert request.method == "GET" + assert request.url == url + + assert len(jobs_response.response_list_jobs) == 1 + job = jobs_response.response_list_jobs[0] + assert job.id == "fcdc4994-eea5-425c-91fa-e03f2bd8030d" + +@pytest.mark.parametrize("status_code", RETRY_STATUS_CODES) +def test_partition_retries( + httpx_mock, + serverless_client: UnstructuredClient, + dummy_partitioned_text: str, +serverless_api_url: str, + status_code: int, + doc_path: Path, +): + url = f"{serverless_api_url}/general/v0/general" + filename = "layout-parser-paper-fast.pdf" + file_path = str(doc_path / filename) + + for _ in range(2): + httpx_mock.add_response( + status_code=status_code, + method="POST", + json=[{"detail": "error"}], + url=url + ) + httpx_mock.add_response( + method="POST", + headers={"Content-Type": "application/json"}, + content=dummy_partitioned_text.encode(), + url=url, + ) + + + with open(file_path, "rb") as f: + + partition_response = serverless_client.general.partition( + request=operations.PartitionRequest( + partition_parameters=shared.PartitionParameters( + files=shared.Files( + content=f, + file_name=filename, + ), + strategy=shared.Strategy.HI_RES, + ), + ) + ) + + assert partition_response.status_code == 200 + + requests = httpx_mock.get_requests() + assert len(requests) == 3 + for request in requests: + assert request.method == "POST" + assert request.url == url + + assert len(partition_response.elements) > 0 + for element in partition_response.elements: + assert "text" in element \ No newline at end of file diff --git a/_test_unstructured_client/unit/test_custom_hooks.py b/_test_unstructured_client/unit/test_custom_hooks.py index 954e8a5c..7659a57f 100644 --- a/_test_unstructured_client/unit/test_custom_hooks.py +++ b/_test_unstructured_client/unit/test_custom_hooks.py @@ -94,13 +94,18 @@ def mock_post(request): @pytest.mark.parametrize( ("status_code", "expect_retry"), [ - [500, False], + [400, False], + [401, False], + [403, False], + [404, False], + [422, False], + [500, True], [502, True], [503, True], [504, True], ] ) -def test_unit_number_of_retries_in_5xx(status_code: int, expect_retry: bool): +def test_unit_number_of_retries_in_failed_requests(status_code: int, expect_retry: bool): filename = "README.md" backoff_strategy = BackoffStrategy( initial_interval=1, max_interval=10, exponent=1.5, max_elapsed_time=300 diff --git a/gen.yaml b/gen.yaml index e34c5750..e4aae140 100644 --- a/gen.yaml +++ b/gen.yaml @@ -16,6 +16,7 @@ python: additionalDependencies: dev: deepdiff: '>=6.0' + freezegun: '>=1.5.1' pytest: '>=8.3.3' pytest-asyncio: '>=0.24.0' pytest-mock: '>=3.14.0' diff --git a/poetry.lock b/poetry.lock index 8445ceae..07d36a70 100644 --- a/poetry.lock +++ b/poetry.lock @@ -399,6 +399,21 @@ files = [ [package.extras] test = ["pytest (>=6)"] +[[package]] +name = "freezegun" +version = "1.5.1" +description = "Let your Python tests travel through time" +optional = false +python-versions = ">=3.7" +groups = ["dev"] +files = [ + {file = "freezegun-1.5.1-py3-none-any.whl", hash = "sha256:bf111d7138a8abe55ab48a71755673dbaa4ab87f4cff5634a4442dfec34c15f1"}, + {file = "freezegun-1.5.1.tar.gz", hash = "sha256:b29dedfcda6d5e8e083ce71b2b542753ad48cfec44037b3fc79702e2980a89e9"}, +] + +[package.dependencies] +python-dateutil = ">=2.7" + [[package]] name = "h11" version = "0.14.0" @@ -921,7 +936,7 @@ version = "2.8.2" description = "Extensions to the standard Python datetime module" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" -groups = ["main"] +groups = ["main", "dev"] files = [ {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, @@ -973,7 +988,7 @@ version = "1.16.0" description = "Python 2 and 3 compatibility utilities" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" -groups = ["main"] +groups = ["main", "dev"] files = [ {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, @@ -1140,4 +1155,4 @@ test = ["aiohttp (>=3.10.5)", "flake8 (>=5.0,<6.0)", "mypy (>=0.800)", "psutil", [metadata] lock-version = "2.1" python-versions = ">=3.9" -content-hash = "4898d7795a3536100b31253940f9356d99ba3c82debbb25cf03426e6fb0627dc" +content-hash = "d33ff88a314bc0862b75a1f69a6e8bfe62770c47f7bedd14be00348bda942cd1" diff --git a/pyproject.toml b/pyproject.toml index 36112444..901d125c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,6 +34,7 @@ in-project = true [tool.poetry.group.dev.dependencies] deepdiff = ">=6.0" +freezegun = ">=1.5.1" mypy = "==1.14.1" pylint = "==3.2.3" pytest = ">=8.3.3" diff --git a/src/unstructured_client/_hooks/custom/request_utils.py b/src/unstructured_client/_hooks/custom/request_utils.py index 82f44e96..a960ba29 100644 --- a/src/unstructured_client/_hooks/custom/request_utils.py +++ b/src/unstructured_client/_hooks/custom/request_utils.py @@ -169,11 +169,7 @@ async def call_api_async( retry_connection_errors=True ) - retryable_codes = [ - "502", - "503", - "504" - ] + retryable_codes = ["5xx"] async def do_request(): return await client.send(pdf_chunk_request) diff --git a/src/unstructured_client/general.py b/src/unstructured_client/general.py index eadad0a8..9f04e4aa 100644 --- a/src/unstructured_client/general.py +++ b/src/unstructured_client/general.py @@ -93,7 +93,7 @@ def partition( retry_config = None if isinstance(retries, utils.RetryConfig): - retry_config = (retries, ["502", "503", "504"]) + retry_config = (retries, ["5xx"]) http_res = self.do_request( hook_ctx=HookContext( @@ -225,7 +225,7 @@ async def partition_async( retry_config = None if isinstance(retries, utils.RetryConfig): - retry_config = (retries, ["502", "503", "504"]) + retry_config = (retries, ["5xx"]) http_res = await self.do_request_async( hook_ctx=HookContext(