diff --git a/_sample_docs/embedded-images-tables.jpg b/_sample_docs/embedded-images-tables.jpg new file mode 100644 index 00000000..f94ef7b1 Binary files /dev/null and b/_sample_docs/embedded-images-tables.jpg differ diff --git a/_sample_docs/fake-power-point.ppt b/_sample_docs/fake-power-point.ppt new file mode 100644 index 00000000..a6bf7266 Binary files /dev/null and b/_sample_docs/fake-power-point.ppt differ diff --git a/_test_unstructured_client/integration/test_integration.py b/_test_unstructured_client/integration/test_integration.py index 54f76f93..0f6e8d7b 100644 --- a/_test_unstructured_client/integration/test_integration.py +++ b/_test_unstructured_client/integration/test_integration.py @@ -227,3 +227,124 @@ async def call_api(): uvloop.install() elements = asyncio.run(call_api()) assert len(elements) > 0 + + +@pytest.mark.parametrize("split_pdf", [True, False]) +@pytest.mark.parametrize("vlm_model", ["gpt-4o"]) +@pytest.mark.parametrize("vlm_model_provider", ["openai"]) +@pytest.mark.parametrize( + "filename", + [ + "layout-parser-paper-fast.pdf", + "fake-power-point.ppt", + "embedded-images-tables.jpg", + ] +) +def test_partition_strategy_vlm_openai(split_pdf, vlm_model, vlm_model_provider, client, doc_path, filename): + with open(doc_path / filename, "rb") as f: + files = shared.Files( + content=f.read(), + file_name=filename, + ) + + req = operations.PartitionRequest( + partition_parameters=shared.PartitionParameters( + files=files, + strategy="vlm", + vlm_model=vlm_model, + vlm_model_provider=vlm_model_provider, + languages=["eng"], + split_pdf_page=split_pdf, + ) + ) + + response = client.general.partition( + request=req + ) + assert response.status_code == 200 + assert len(response.elements) > 0 + assert response.elements[0]["metadata"]["partitioner_type"] == "vlm_partition" + + +@pytest.mark.parametrize("split_pdf", [True, False]) +@pytest.mark.parametrize("vlm_model", + [ + "us.amazon.nova-pro-v1:0", + "us.amazon.nova-lite-v1:0", + "us.anthropic.claude-3-5-sonnet-20241022-v2:0", + "us.anthropic.claude-3-opus-20240229-v1:0", + "us.anthropic.claude-3-haiku-20240307-v1:0", + "us.anthropic.claude-3-sonnet-20240229-v1:0", + "us.meta.llama3-2-90b-instruct-v1:0", + "us.meta.llama3-2-11b-instruct-v1:0", + ] +) +@pytest.mark.parametrize("vlm_model_provider", ["bedrock"]) +@pytest.mark.parametrize( + "filename", + [ + "layout-parser-paper-fast.pdf", + "fake-power-point.ppt", + "embedded-images-tables.jpg", + ] +) +def test_partition_strategy_vlm_bedrock(split_pdf, vlm_model, vlm_model_provider, client, doc_path, filename): + with open(doc_path / filename, "rb") as f: + files = shared.Files( + content=f.read(), + file_name=filename, + ) + + req = operations.PartitionRequest( + partition_parameters=shared.PartitionParameters( + files=files, + strategy="vlm", + vlm_model=vlm_model, + vlm_model_provider=vlm_model_provider, + languages=["eng"], + split_pdf_page=split_pdf, + ) + ) + + response = client.general.partition( + request=req + ) + assert response.status_code == 200 + assert len(response.elements) > 0 + assert response.elements[0]["metadata"]["partitioner_type"] == "vlm_partition" + +@pytest.mark.parametrize("split_pdf", [True, False]) +@pytest.mark.parametrize("vlm_model", ["claude-3-5-sonnet-20241022",]) +@pytest.mark.parametrize("vlm_model_provider", ["anthropic"]) +@pytest.mark.parametrize( + "filename", + [ + "layout-parser-paper-fast.pdf", + "fake-power-point.ppt", + "embedded-images-tables.jpg", + ] +) +def test_partition_strategy_vlm_anthropic(split_pdf, vlm_model, vlm_model_provider, client, doc_path, filename): + with open(doc_path / filename, "rb") as f: + files = shared.Files( + content=f.read(), + file_name=filename, + ) + + req = operations.PartitionRequest( + partition_parameters=shared.PartitionParameters( + files=files, + strategy="vlm", + vlm_model=vlm_model, + vlm_model_provider=vlm_model_provider, + languages=["eng"], + split_pdf_page=split_pdf, + ) + ) + + response = client.general.partition( + request=req + ) + assert response.status_code == 200 + assert len(response.elements) > 0 + assert response.elements[0]["metadata"]["partitioner_type"] == "vlm_partition"