diff --git a/api-reference/api-services/accessing-unstructured-api.mdx b/api-reference/api-services/accessing-unstructured-api.mdx index a8c0d954..b502cba3 100644 --- a/api-reference/api-services/accessing-unstructured-api.mdx +++ b/api-reference/api-services/accessing-unstructured-api.mdx @@ -14,9 +14,9 @@ Choose your preferred method: The API parameters for all these methods are documented on the [API parameters](/api-reference/api-services/api-parameters) page. -import UseIngestInstead from '/snippets/general-shared-text/use-ingest-instead.mdx'; +import UseIngestOrPlatformInstead from '/snippets/general-shared-text/use-ingest-or-platform-instead.mdx'; - + If you'd like to try out the Unstructured API interactively by using the Free Unstructured API to process a single file, you can do so by using the [Swagger UI](https://api.unstructured.io/general/docs#/default/pipeline_1_general_v0_general_post). diff --git a/api-reference/api-services/examples.mdx b/api-reference/api-services/examples.mdx index 74b3282e..021f630a 100644 --- a/api-reference/api-services/examples.mdx +++ b/api-reference/api-services/examples.mdx @@ -13,7 +13,7 @@ import NoURLForServerlessAPI from '/snippets/general-shared-text/no-url-for-serv -import UseIngestInstead from '/snippets/general-shared-text/use-ingest-instead.mdx'; +import UseIngestOrPlatformInstead from '/snippets/general-shared-text/use-ingest-or-platform-instead.mdx'; ### Changing partition strategy for a PDF @@ -82,7 +82,7 @@ The `hi_res` strategy supports different models, and the default is `layout_v1.1 ``` - + ```bash POST curl -X 'POST' $UNSTRUCTURED_API_URL \ -H 'accept: application/json' \ @@ -94,7 +94,7 @@ The `hi_res` strategy supports different models, and the default is `layout_v1.1 ``` - + ```python Python import asyncio import os @@ -103,8 +103,7 @@ The `hi_res` strategy supports different models, and the default is `layout_v1.1 from unstructured_client.models import shared client = unstructured_client.UnstructuredClient( - api_key_auth=os.getenv("UNSTRUCTURED_API_KEY"), - server_url=os.getenv("UNSTRUCTURED_API_URL"), + api_key_auth=os.getenv("UNSTRUCTURED_API_KEY") ) async def call_api(filename, input_dir, output_dir): @@ -123,7 +122,10 @@ The `hi_res` strategy supports different models, and the default is `layout_v1.1 } try: - res = await client.general.partition_async(request=req) + res = await client.general.partition_async( + request=req, + server_url=os.getenv("UNSTRUCTURED_API_URL") + ) element_dicts = [element for element in res.elements] json_elements = json.dumps(element_dicts, indent=2) @@ -159,7 +161,7 @@ The `hi_res` strategy supports different models, and the default is `layout_v1.1 ``` - + ```typescript TypeScript import { UnstructuredClient } from "unstructured-client"; import * as fs from "fs"; @@ -300,7 +302,7 @@ For better OCR results, you can specify what languages your document is in using ``` - + ```bash POST curl -X 'POST' $UNSTRUCTURED_API_URL \ -H 'accept: application/json' \ @@ -312,7 +314,7 @@ For better OCR results, you can specify what languages your document is in using ``` - + ```python Python import asyncio import os @@ -321,8 +323,7 @@ For better OCR results, you can specify what languages your document is in using from unstructured_client.models import shared client = unstructured_client.UnstructuredClient( - api_key_auth=os.getenv("UNSTRUCTURED_API_KEY"), - server_url=os.getenv("UNSTRUCTURED_API_URL"), + api_key_auth=os.getenv("UNSTRUCTURED_API_KEY") ) async def call_api(filename, input_dir, output_dir): @@ -341,7 +342,10 @@ For better OCR results, you can specify what languages your document is in using } try: - res = await client.general.partition_async(request=req) + res = await client.general.partition_async( + request=req, + server_url=os.getenv("UNSTRUCTURED_API_URL") + ) element_dicts = [element for element in res.elements] json_elements = json.dumps(element_dicts, indent=2) @@ -377,7 +381,7 @@ For better OCR results, you can specify what languages your document is in using ``` - + ```typescript TypeScript import { UnstructuredClient } from "unstructured-client"; import * as fs from "fs"; @@ -515,7 +519,7 @@ Set the `coordinates` parameter to `true` to add this field to the elements in t ``` - + ```bash POST curl -X 'POST' $UNSTRUCTURED_API_URL \ -H 'accept: application/json' \ @@ -527,7 +531,7 @@ Set the `coordinates` parameter to `true` to add this field to the elements in t ``` - + ```python Python import asyncio import os @@ -536,8 +540,7 @@ Set the `coordinates` parameter to `true` to add this field to the elements in t from unstructured_client.models import shared client = unstructured_client.UnstructuredClient( - api_key_auth=os.getenv("UNSTRUCTURED_API_KEY"), - server_url=os.getenv("UNSTRUCTURED_API_URL"), + api_key_auth=os.getenv("UNSTRUCTURED_API_KEY") ) async def call_api(filename, input_dir, output_dir): @@ -556,7 +559,10 @@ Set the `coordinates` parameter to `true` to add this field to the elements in t } try: - res = await client.general.partition_async(request=req) + res = await client.general.partition_async( + request=req, + server_url=os.getenv("UNSTRUCTURED_API_URL") + ) element_dicts = [element for element in res.elements] json_elements = json.dumps(element_dicts, indent=2) @@ -592,7 +598,7 @@ Set the `coordinates` parameter to `true` to add this field to the elements in t ``` - + ```typescript TypeScript import { UnstructuredClient } from "unstructured-client"; import * as fs from "fs"; @@ -734,7 +740,7 @@ This can be helpful if you'd like to use the IDs as a primary key in a database, ``` - + ```bash POST curl -X 'POST' $UNSTRUCTURED_API_URL \ -H 'accept: application/json' \ @@ -745,7 +751,7 @@ This can be helpful if you'd like to use the IDs as a primary key in a database, ``` - + ```python Python import asyncio import os @@ -754,8 +760,7 @@ This can be helpful if you'd like to use the IDs as a primary key in a database, from unstructured_client.models import shared client = unstructured_client.UnstructuredClient( - api_key_auth=os.getenv("UNSTRUCTURED_API_KEY"), - server_url=os.getenv("UNSTRUCTURED_API_URL"), + api_key_auth=os.getenv("UNSTRUCTURED_API_KEY") ) async def call_api(filename, input_dir, output_dir): @@ -774,7 +779,10 @@ This can be helpful if you'd like to use the IDs as a primary key in a database, } try: - res = await client.general.partition_async(request=req) + res = await client.general.partition_async( + request=req, + server_url=os.getenv("UNSTRUCTURED_API_URL") + ) element_dicts = [element for element in res.elements] json_elements = json.dumps(element_dicts, indent=2) @@ -810,7 +818,7 @@ This can be helpful if you'd like to use the IDs as a primary key in a database, ``` - + ```typescript TypeScript import { UnstructuredClient } from "unstructured-client"; import * as fs from "fs"; @@ -956,7 +964,7 @@ By default, the `chunking_strategy` is set to `None`, and no chunking is perform ``` - + ```bash POST curl -X 'POST' $UNSTRUCTURED_API_URL \ -H 'accept: application/json' \ @@ -969,7 +977,7 @@ By default, the `chunking_strategy` is set to `None`, and no chunking is perform ``` - + ```python Python import asyncio import os @@ -978,8 +986,7 @@ By default, the `chunking_strategy` is set to `None`, and no chunking is perform from unstructured_client.models import shared client = unstructured_client.UnstructuredClient( - api_key_auth=os.getenv("UNSTRUCTURED_API_KEY"), - server_url=os.getenv("UNSTRUCTURED_API_URL"), + api_key_auth=os.getenv("UNSTRUCTURED_API_KEY") ) async def call_api(filename, input_dir, output_dir): @@ -999,7 +1006,10 @@ By default, the `chunking_strategy` is set to `None`, and no chunking is perform } try: - res = await client.general.partition_async(request=req) + res = await client.general.partition_async( + request=req, + server_url=os.getenv("UNSTRUCTURED_API_URL") + ) element_dicts = [element for element in res.elements] json_elements = json.dumps(element_dicts, indent=2) @@ -1035,7 +1045,7 @@ By default, the `chunking_strategy` is set to `None`, and no chunking is perform ``` - + ```typescript TypeScript import { UnstructuredClient } from "unstructured-client"; import * as fs from "fs"; diff --git a/api-reference/api-services/partition-via-api.mdx b/api-reference/api-services/partition-via-api.mdx index 1284d794..5fbab4d2 100644 --- a/api-reference/api-services/partition-via-api.mdx +++ b/api-reference/api-services/partition-via-api.mdx @@ -8,9 +8,9 @@ would like to leverage the advanced capabilities of Unstructured API services, y Whether you're using the Free Unstructured API, the Unstructured Serverless API, the Unstructured API on Azure/AWS, or your local deployment of the Unstructured API, you can use the open source library to send an individual file through `partition_via_api` for processing with Unstructured API services. -import UseIngestInstead from '/snippets/general-shared-text/use-ingest-instead.mdx'; +import UseIngestOrPlatformInstead from '/snippets/general-shared-text/use-ingest-or-platform-instead.mdx'; - + To use the open source library, you'll also need: diff --git a/api-reference/api-services/post-requests.mdx b/api-reference/api-services/post-requests.mdx index bab601e4..fd8c447a 100644 --- a/api-reference/api-services/post-requests.mdx +++ b/api-reference/api-services/post-requests.mdx @@ -6,9 +6,9 @@ sidebarTitle: POST request Whether you're using the free Unstructured API, the Unstructured Serverless API, Unstructured API on Azure/AWS, or your local deployment of Unstructured API, you can work with the API by sending single-file POST requests to it. -import UseIngestInstead from '/snippets/general-shared-text/use-ingest-instead.mdx'; +import UseIngestOrPlatformInstead from '/snippets/general-shared-text/use-ingest-or-platform-instead.mdx'; - + To make POST requests, you will need: diff --git a/api-reference/api-services/sdk-jsts.mdx b/api-reference/api-services/sdk-jsts.mdx index 00b8b06a..8530d8d6 100644 --- a/api-reference/api-services/sdk-jsts.mdx +++ b/api-reference/api-services/sdk-jsts.mdx @@ -7,9 +7,9 @@ The [Unstructured JavaScript/TypeScript SDK](https://github.com/Unstructured-IO/ Free Unstructured API, the Unstructured Serverless API, the Unstructured API on Azure/AWS, or your local deployment of the Unstructured API, you can access the API using the JavaScript/TypeScript SDK. -import UseIngestInstead from '/snippets/general-shared-text/use-ingest-instead.mdx'; +import UseIngestOrPlatformInstead from '/snippets/general-shared-text/use-ingest-or-platform-instead.mdx'; - + To use the JavaScript/TypeScript SDK, you'll need: diff --git a/api-reference/api-services/sdk-python.mdx b/api-reference/api-services/sdk-python.mdx index 9e1f1c3a..20a3baad 100644 --- a/api-reference/api-services/sdk-python.mdx +++ b/api-reference/api-services/sdk-python.mdx @@ -3,13 +3,14 @@ title: Process an individual file by using the Unstructured Python SDK sidebarTitle: Python SDK --- -The [Unstructured Python SDK](https://github.com/Unstructured-IO/unstructured-python-client) client allows you to send an individual file for processing by Unstructured API services. Whether you're using the -Free Unstructured API, the Unstructured Serverless API, the Unstructured API on Azure/AWS, or your local +The [Unstructured Python SDK](https://github.com/Unstructured-IO/unstructured-python-client) client allows you to send an individual file for processing by +[Unstructured API services](/api-reference/api-services/overview). Whether you're using the +Free Unstructured API, the Unstructured Serverless API, or the Unstructured API on Azure/AWS, or your local deployment of the Unstructured API, you can access the API using the Python SDK. -import UseIngestInstead from '/snippets/general-shared-text/use-ingest-instead.mdx'; +import UseIngestOrPlatformInstead from '/snippets/general-shared-text/use-ingest-or-platform-instead.mdx'; - + To use the Python SDK, you'll need: @@ -45,8 +46,7 @@ import NoURLForServerlessAPI from '/snippets/general-shared-text/no-url-for-serv from unstructured_client.models import shared client = unstructured_client.UnstructuredClient( - api_key_auth=os.getenv("UNSTRUCTURED_API_KEY"), - server_url=os.getenv("UNSTRUCTURED_API_URL"), + api_key_auth=os.getenv("UNSTRUCTURED_API_KEY") ) filename = "PATH_TO_INPUT_FILE" @@ -66,7 +66,10 @@ import NoURLForServerlessAPI from '/snippets/general-shared-text/no-url-for-serv } try: - res = client.general.partition(request=req) + res = client.general.partition( + request=req, + server_url=os.getenv("UNSTRUCTURED_API_URL") + ) element_dicts = [element for element in res.elements] # Print the processed data's first element only. @@ -87,8 +90,7 @@ import NoURLForServerlessAPI from '/snippets/general-shared-text/no-url-for-serv from unstructured_client.models import operations, shared client = unstructured_client.UnstructuredClient( - api_key_auth=os.getenv("UNSTRUCTURED_API_KEY"), - server_url=os.getenv("UNSTRUCTURED_API_URL"), + api_key_auth=os.getenv("UNSTRUCTURED_API_KEY") ) filename = "PATH_TO_INPUT_FILE" @@ -108,7 +110,10 @@ import NoURLForServerlessAPI from '/snippets/general-shared-text/no-url-for-serv ) try: - res = client.general.partition(request=req) + res = client.general.partition( + request=req, + server_url=os.getenv("UNSTRUCTURED_API_URL") + ) element_dicts = [element for element in res.elements] # Print the processed data's first element only. @@ -137,8 +142,7 @@ import NoURLForServerlessAPI from '/snippets/general-shared-text/no-url-for-serv from unstructured_client.models import shared client = unstructured_client.UnstructuredClient( - api_key_auth=os.getenv("UNSTRUCTURED_API_KEY"), - server_url=os.getenv("UNSTRUCTURED_API_URL"), + api_key_auth=os.getenv("UNSTRUCTURED_API_KEY") ) async def call_api(filename, input_dir, output_dir): @@ -153,7 +157,10 @@ import NoURLForServerlessAPI from '/snippets/general-shared-text/no-url-for-serv } try: - res = await client.general.partition_async(request=req) + res = await client.general.partition_async( + request=req, + server_url=os.getenv("UNSTRUCTURED_API_URL") + ) element_dicts = [element for element in res.elements] json_elements = json.dumps(element_dicts, indent=2) @@ -225,7 +232,10 @@ import NoURLForServerlessAPI from '/snippets/general-shared-text/no-url-for-serv split_pdf_concurrency_level=15 # Set the number of concurrent request to the maximum value: 15. ) ) - res = client.general.partition(req) + res = client.general.partition( + request=req, + server_url=os.getenv("UNSTRUCTURED_API_URL") + ) ``` ## Customizing the client @@ -299,11 +309,54 @@ the names used in the SDKs are the same across all methods. ## Migration guide -There are minor breaking changes in 0.26.0. If you encounter any errors when upgrading, please find the solution below. +There are major breaking changes in 0.30.0. If you encounter any errors when upgrading, please find the solution below. + +**If you see the error: `404 Not Found`** + +Before 0.30.0, you could specify the following Unstructured API URL for the `server_url` parameter: + +- For the Unstructured Serverless API: `https://api.unstructuredapp.io/general/v0/general` + +Beginning with 0.30.0, these Unstructured API URLs have changed as follows, respectively: + +- For the Unstructured Serverless API: `https://api.unstructuredapp.io` (remove `/general/v0/general`) +- (New beginning with 0.30.0) For the Unstructured Platform API: `https://platform.unstructuredapp.io` (remove `/api/v1`) + +Also, before 0.30.0, the `server_url` parameter was part of the `UnstructuredClient` constructor. Beginning with 0.30.0, the `server_url` +parameter has been moved into the `partition` and `partition_async` functions. + +```python +# Instead of: +client = unstructured_client.UnstructuredClient( + api_key_auth=os.getenv("UNSTRUCTURED_API_KEY"), + server_url=os.getenv("UNSTRUCTURED_API_URL") +) + +# Switch to: +client = unstructured_client.UnstructuredClient( + api_key_auth=os.getenv("UNSTRUCTURED_API_KEY") +) + +# And... + +# For partition: +res = client.general.partition( + request=req, + server_url=os.getenv("UNSTRUCTURED_API_URL") +) + +# For partition_async: +res = await client.general.partition_async( + request=req, + server_url=os.getenv("UNSTRUCTURED_API_URL") +) +``` + +There are minor breaking changes beginning with 0.26.0. If you encounter any errors when upgrading, please find the solution below. **If you see the error: `AttributeError: 'PartitionParameters' object has no attribute 'partition_parameters'`** -Previously, the SDK accepted a `PartitionParameters` object as input to the `sdk.general.partition` function. Now, this object must be wrapped in a `PartitionRequest` object. The old behavior was deprecated in 0.23.0 and removed in 0.26.0. +Before 0.26.0, the SDK accepted a `PartitionParameters` object as input to the `sdk.general.partition` function. Beginning with 0.26.0, this object must be wrapped in a `PartitionRequest` object. The old behavior was deprecated in 0.23.0 and removed in 0.26.0. ```python # Instead of: @@ -313,8 +366,10 @@ req = shared.PartitionParameters( files=files, ) -resp = s.general.partition(request=req) - +resp = s.general.partition( + request=req, + server_url=os.getenv("UNSTRUCTURED_API_URL") # Beginning with 0.30.0 +) # Switch to: from unstructured_client.models import shared, operations @@ -325,12 +380,15 @@ req = operations.PartitionRequest( ) ) -resp = s.general.partition(request=req) +resp = s.general.partition( + request=req, + server_url=os.getenv("UNSTRUCTURED_API_URL") # Beginning with 0.30.0 +) ``` **If you see the error: `TypeError: BaseModel.__init__() takes 1 positional argument but 2 were given`** -In 0.26.0, the `PartitionRequest` constructor no longer allows for positional arguments. You must specify `partition_parameters` by name. +Beginning with 0.26.0, the `PartitionRequest` constructor no longer allows for positional arguments. You must specify `partition_parameters` by name. ```python # Instead of: @@ -350,12 +408,15 @@ req = operations.PartitionRequest( **If you see the error: `TypeError: General.partition() takes 1 positional argument but 2 were given`** -In 0.26.0, the `partition` function no longer allows for positional arguments. You must specify `request` by name. +Beginning with 0.26.0, the `partition` function no longer allows for positional arguments. You must specify `request` by name. ```python # Instead of: resp = s.general.partition(req) # Switch to: -resp = s.general.partition(request=req) +resp = s.general.partition( + request=req, + server_url=os.getenv("UNSTRUCTURED_API_URL") # Beginning with 0.30.0 +) ``` diff --git a/api-reference/ingest/ingest-cli.mdx b/api-reference/ingest/ingest-cli.mdx index f1f0fa52..f40ae508 100644 --- a/api-reference/ingest/ingest-cli.mdx +++ b/api-reference/ingest/ingest-cli.mdx @@ -5,6 +5,12 @@ sidebarTitle: Ingest CLI The Unstructured Ingest CLI enables you to use command-line scripts to send files in batches to Unstructured API services for processing, and to tell Unstructured API services where to deliver the processed data. [Learn more](/ingestion/overview#unstructured-ingest-cli). + + The Unstructured Ingest CLI does not work with the Unstructured Platform API. + + For information about the Unstructured Platform API, see the [Unstructured Platform API Overview](/platform/api/overview). + + ## Installation One approach to get started quickly with the Unstructured Ingest CLI is to install Python and then run the following command: diff --git a/api-reference/ingest/overview.mdx b/api-reference/ingest/overview.mdx index ddded057..48b155e7 100644 --- a/api-reference/ingest/overview.mdx +++ b/api-reference/ingest/overview.mdx @@ -10,6 +10,12 @@ You can send batches to Unstructured API services by using the following tools: - The [Unstructured Ingest CLI](/api-reference/ingest/ingest-cli) - The [Unstructured Ingest Python](/api-reference/ingest/python-ingest) library + + The Unstructured Ingest CLI and Unstructured Ingest Python library do not work with the Unstructured Platform API. + + For information about the Unstructured Platform API, see the [Unstructured Platform API Overview](/platform/api/overview). + + The following 3-minute video shows how to use the Unstructured Ingest Python library to send multiple PDFs from a local directory in batches to be ingested by Unstructured API services for processing: