From f2b4ad68ce707688cb5ed2db4d5b77e312db6f8f Mon Sep 17 00:00:00 2001 From: Paul Cornell Date: Wed, 14 May 2025 15:33:37 -0700 Subject: [PATCH] Various connector updates --- .../ibm_watsonxdata_sdk.mdx | 4 +- .../destination_connectors/onedrive.sh.mdx | 1 + .../destination_connectors/onedrive.v2.py.mdx | 5 +- .../onedrive_rest_create.mdx | 1 + .../destination_connectors/onedrive_sdk.mdx | 1 + .../destination_connectors/pinecone.sh.mdx | 1 + .../destination_connectors/pinecone.v2.py.mdx | 5 +- .../pinecone_rest_create.mdx | 1 + .../destination_connectors/pinecone_sdk.mdx | 1 + .../confluence-api-placeholders.mdx | 4 +- .../databricks-volumes-platform.mdx | 4 +- .../ibm-watsonxdata-platform.mdx | 2 +- .../jira-api-placeholders.mdx | 5 +- snippets/general-shared-text/jira-cli-api.mdx | 9 +--- .../general-shared-text/jira-platform.mdx | 7 ++- .../onedrive-api-placeholders.mdx | 1 + .../general-shared-text/onedrive-cli-api.mdx | 1 + .../general-shared-text/onedrive-platform.mdx | 1 + snippets/general-shared-text/onedrive.mdx | 49 ++++++++++--------- .../pinecone-api-placeholders.mdx | 1 + .../general-shared-text/pinecone-cli-api.mdx | 3 +- .../general-shared-text/pinecone-platform.mdx | 1 + snippets/general-shared-text/pinecone.mdx | 4 +- snippets/source_connectors/onedrive.sh.mdx | 1 + snippets/source_connectors/onedrive.v2.py.mdx | 3 +- .../onedrive_rest_create.mdx | 1 + snippets/source_connectors/onedrive_sdk.mdx | 1 + 27 files changed, 70 insertions(+), 48 deletions(-) diff --git a/snippets/destination_connectors/ibm_watsonxdata_sdk.mdx b/snippets/destination_connectors/ibm_watsonxdata_sdk.mdx index 6decb519..3f5a4016 100644 --- a/snippets/destination_connectors/ibm_watsonxdata_sdk.mdx +++ b/snippets/destination_connectors/ibm_watsonxdata_sdk.mdx @@ -6,7 +6,7 @@ from unstructured_client.models.operations import CreateDestinationRequest from unstructured_client.models.shared import ( CreateDestinationConnector, DestinationConnectorType, - IbmWatsonxDestinationConnectorConfigInput + IBMWatsonxS3DestinationConnectorConfigInput ) with UnstructuredClient(api_key_auth=os.getenv("UNSTRUCTURED_API_KEY")) as client: @@ -15,7 +15,7 @@ with UnstructuredClient(api_key_auth=os.getenv("UNSTRUCTURED_API_KEY")) as clien create_destination_connector=CreateDestinationConnector( name="", type=DestinationConnectorType.IBM_WATSONX_S3, - config=IbmWatsonxDestinationConnectorConfigInput( + config=IBMWatsonxS3DestinationConnectorConfigInput( iceberg_endpoint="", object_storage_endpoint="", object_storage_region="", diff --git a/snippets/destination_connectors/onedrive.sh.mdx b/snippets/destination_connectors/onedrive.sh.mdx index 4a7217b8..edb30a1d 100644 --- a/snippets/destination_connectors/onedrive.sh.mdx +++ b/snippets/destination_connectors/onedrive.sh.mdx @@ -17,6 +17,7 @@ unstructured-ingest \ --client-cred $ONEDRIVE_CLIENT_CRED \ --client-id $ONEDRIVE_CLIENT_ID \ --user-pname $ONEDRIVE_USER_PNAME \ + --password $ONEDRIVE_USER_PASSWORD \ # For username and password authentication. --tenant $ONEDRIVE_TENANT \ --authority-url $ONEDRIVE_AUTHORITY_URL \ --remote-url $ONEDRIVE_PATH \ diff --git a/snippets/destination_connectors/onedrive.v2.py.mdx b/snippets/destination_connectors/onedrive.v2.py.mdx index 3731b77e..01873d2d 100644 --- a/snippets/destination_connectors/onedrive.v2.py.mdx +++ b/snippets/destination_connectors/onedrive.v2.py.mdx @@ -42,7 +42,10 @@ if __name__ == "__main__": chunker_config=ChunkerConfig(chunking_strategy="by_title"), embedder_config=EmbedderConfig(embedding_provider="huggingface"), destination_connection_config=OnedriveConnectionConfig( - access_config=OnedriveAccessConfig(client_cred=os.getenv("ONEDRIVE_CLIENT_CRED")), + access_config=OnedriveAccessConfig( + client_cred=os.getenv("ONEDRIVE_CLIENT_CRED"), + password=os.getenv("ONEDRIVE_USER_PASSWORD") # For username and password authentication. + ), client_id=os.getenv("ONEDRIVE_CLIENT_ID"), user_pname=os.getenv("ONEDRIVE_USER_PNAME"), tenant=os.getenv("ONEDRIVE_TENANT"), diff --git a/snippets/destination_connectors/onedrive_rest_create.mdx b/snippets/destination_connectors/onedrive_rest_create.mdx index 1f40f12b..f74be51d 100644 --- a/snippets/destination_connectors/onedrive_rest_create.mdx +++ b/snippets/destination_connectors/onedrive_rest_create.mdx @@ -11,6 +11,7 @@ curl --request 'POST' --location \ "config": { "client_id": "", "user_pname": "", + "password": "", # For username and password authentication. "tenant": "", "authority_url": "", "client_cred": "", diff --git a/snippets/destination_connectors/onedrive_sdk.mdx b/snippets/destination_connectors/onedrive_sdk.mdx index be6a2ddb..1e9690eb 100644 --- a/snippets/destination_connectors/onedrive_sdk.mdx +++ b/snippets/destination_connectors/onedrive_sdk.mdx @@ -18,6 +18,7 @@ with UnstructuredClient(api_key_auth=os.getenv("UNSTRUCTURED_API_KEY")) as clien config=OneDriveDestinationConnectorConfigInput( client_id="", user_pname="", + password="", # For username and password authentication. tenant="", authority_url="", client_cred="", diff --git a/snippets/destination_connectors/pinecone.sh.mdx b/snippets/destination_connectors/pinecone.sh.mdx index cb2a1acc..87203653 100644 --- a/snippets/destination_connectors/pinecone.sh.mdx +++ b/snippets/destination_connectors/pinecone.sh.mdx @@ -18,5 +18,6 @@ unstructured-ingest \ pinecone \ --api-key "$PINECONE_API_KEY" \ --index-name "$PINECONE_INDEX_NAME" \ + --namespace "$PINECONE_NAMESPACE_NAME" \ --batch-size 80 ``` diff --git a/snippets/destination_connectors/pinecone.v2.py.mdx b/snippets/destination_connectors/pinecone.v2.py.mdx index 25258b88..3acf75f9 100644 --- a/snippets/destination_connectors/pinecone.v2.py.mdx +++ b/snippets/destination_connectors/pinecone.v2.py.mdx @@ -47,6 +47,9 @@ if __name__ == "__main__": index_name=os.getenv("PINECONE_INDEX_NAME") ), stager_config=PineconeUploadStagerConfig(), - uploader_config=PineconeUploaderConfig() + uploader_config=PineconeUploaderConfig( + batch_size=100, + namespace=os.getenv("PINECONE_NAMESPACE_NAME") + ) ).run() ``` \ No newline at end of file diff --git a/snippets/destination_connectors/pinecone_rest_create.mdx b/snippets/destination_connectors/pinecone_rest_create.mdx index 98b19df8..d85739a4 100644 --- a/snippets/destination_connectors/pinecone_rest_create.mdx +++ b/snippets/destination_connectors/pinecone_rest_create.mdx @@ -10,6 +10,7 @@ curl --request 'POST' --location \ "type": "pinecone", "config": { "index_name" "", + "namespace": "", "api_key": "", "batch_size" } diff --git a/snippets/destination_connectors/pinecone_sdk.mdx b/snippets/destination_connectors/pinecone_sdk.mdx index 9b5a22d0..83ad1ace 100644 --- a/snippets/destination_connectors/pinecone_sdk.mdx +++ b/snippets/destination_connectors/pinecone_sdk.mdx @@ -17,6 +17,7 @@ with UnstructuredClient(api_key_auth=os.getenv("UNSTRUCTURED_API_KEY")) as clien type=DestinationConnectorType.PINECONE, config=PineconeDestinationConnectorConfigInput( index_name="", + namespace="", api_key="", batch_size= ) diff --git a/snippets/general-shared-text/confluence-api-placeholders.mdx b/snippets/general-shared-text/confluence-api-placeholders.mdx index 8c02f386..13e256a0 100644 --- a/snippets/general-shared-text/confluence-api-placeholders.mdx +++ b/snippets/general-shared-text/confluence-api-placeholders.mdx @@ -6,7 +6,7 @@ - `extract_images` - Set to `true` to download images and replace the HTML content with Base64-encoded images. The default is `false` if not otherwise specified. - `extract_files` - Set to `true` to download any embedded files in pages. The default is `false` if not otherwise specified. -For API token authentication: +For username and API token authentication: - `` - The name or email address of the target user. - `` - The user's API token value. @@ -17,7 +17,7 @@ For personal access token (PAT) authentication: - `` - The target user's PAT value. - `cloud` should always be `false`. -For password authentication: +For username and password authentication: - `` - The name or email address of the target user. - `` - The user's password. diff --git a/snippets/general-shared-text/databricks-volumes-platform.mdx b/snippets/general-shared-text/databricks-volumes-platform.mdx index 6dd54e3e..760ada40 100644 --- a/snippets/general-shared-text/databricks-volumes-platform.mdx +++ b/snippets/general-shared-text/databricks-volumes-platform.mdx @@ -14,7 +14,7 @@ Fill in the following fields: - For **Authentication Method**, if you select **Service Principal**, you must also specify the following: - - **Client Secret** (_required_): The associated OAuth **Secret** value for the Databricks managed service principal that has the appropriate privileges to the volume. - - **Client ID** (_required_): The **Client ID** (or **UUID** or **Application ID**) value for the Databricks managed service principal that has appropriate privileges to the volume. + - **OAuth Secret** (_required_): The associated OAuth **Secret** value for the Databricks managed service principal that has the appropriate privileges to the volume. + - **UUID** (_required_): The **Client ID** (or **UUID** or **Application ID**) value for the Databricks managed service principal that has appropriate privileges to the volume. - For **Authentication Method**, if you select **Token**, you must also specify the Databricks personal access token's value in the **Token** field. diff --git a/snippets/general-shared-text/ibm-watsonxdata-platform.mdx b/snippets/general-shared-text/ibm-watsonxdata-platform.mdx index 295961a8..c3f4e01d 100644 --- a/snippets/general-shared-text/ibm-watsonxdata-platform.mdx +++ b/snippets/general-shared-text/ibm-watsonxdata-platform.mdx @@ -11,5 +11,5 @@ Fill in the following fields: - **Namespace** (_required_): The name of the target namespace (also known as a schema) within the catalog. - **Table** (_required_): The name of the target table within the namespace (schema). - **Max Connection Retries**: The maximum number of retries when connecting to the catalog. Typically, an optimal setting is `15`. The default is `10`. If specified, it must be a number between `2` and `100`, inclusive. -- **Max Retries**: The maximum number of retries when uploading data. Typically, an optimal setting is `150`. The default is `50`. If specified, it must be a number between `2` and `500`, inclusive. +- **Max Upload Retries**: The maximum number of retries when uploading data. Typically, an optimal setting is `150`. The default is `50`. If specified, it must be a number between `2` and `500`, inclusive. - **Record ID Key**: The name of the column that uniquely identifies each record in the target table. The default is `record_id`. diff --git a/snippets/general-shared-text/jira-api-placeholders.mdx b/snippets/general-shared-text/jira-api-placeholders.mdx index 42a3344d..46118425 100644 --- a/snippets/general-shared-text/jira-api-placeholders.mdx +++ b/snippets/general-shared-text/jira-api-placeholders.mdx @@ -1,7 +1,8 @@ - `` (_required_): A unique name for this connector. - `` (_required_): The URL of the Jira instance. -- `` (_required_ for password or API token authentication, or personal access token authentication): The username of the Jira user. -- `` (_required_ for password or API token authentication): The password or API token of the Jira user. +- `` (_required_ for password or API token authentication): The username of the Jira user. +- `` (_required_ for password or API token authentication): For password authentication, the password of the Jira user. + For API token authentication, the API token of the Jira user. - `` (_required_ for personal access token authentication): The personal access token of the Jira user. - ``: The ID of a target project in Jira to access. - ``: The ID of a target board in Jira to access. diff --git a/snippets/general-shared-text/jira-cli-api.mdx b/snippets/general-shared-text/jira-cli-api.mdx index a2729a2b..81d817c0 100644 --- a/snippets/general-shared-text/jira-cli-api.mdx +++ b/snippets/general-shared-text/jira-cli-api.mdx @@ -13,15 +13,10 @@ The following environment variables: - `JIRA_URL` - The site URL for your Jira Data Center installation or Jira Cloud account, represented by `--url` (CLI) or `url` (Python). - One of the following: - - For Jira Cloud or Jira Data Center, the target user's name or email address, and password, as follows: + - For Jira Cloud or Jira Data Center, the target user's name or email address, and password or API token, as follows: - `JIRA_USERNAME` - The name or email address of the target user, represented by `--username` (CLI) or `username` (Python). - - `JIRA_PASSWORD_OR_API_TOKEN` - The user's password, represented by `--password` (CLI) or `password` (Python). - - - For Jira Cloud only, the target user's name or email address, and API token, as follows: - - - `JIRA_USERNAME` - The name or email address of the target user, represented by `--username` (CLI) or `username` (Python). - - `JIRA_PASSWORD_OR_API_TOKEN` - The user's API token, represented by `--password` (CLI) or `password` (Python). + - `JIRA_PASSWORD_OR_API_TOKEN` - The user's password (for password authentication) or API token (for API token authentication), represented by `--password` (CLI) or `password` (Python). - For Jira Data Center only, the target user's personal access token (PAT), as follows: diff --git a/snippets/general-shared-text/jira-platform.mdx b/snippets/general-shared-text/jira-platform.mdx index 4928b7d1..83513c1f 100644 --- a/snippets/general-shared-text/jira-platform.mdx +++ b/snippets/general-shared-text/jira-platform.mdx @@ -2,14 +2,13 @@ Fill in the following fields: - **Name** (_required_): A unique name for this connector. - **URL** (_required_): The URL of the Jira instance. -- **Username** (_required_ for password or API token authentication, or personal access token authentication): The username of the Jira user. -- **Password** (_required_ for password authentication): The password of the Jira user. -- **API Token** (_required_ for API token authentication): The API token of the Jira user. +- **Username** (_required_ for password or API token authentication): The username of the Jira user. +- **Password** (_required_ for password or API token authentication): For password authentication, the password of the Jira user. + For API token authentication, the API token of the Jira user. - **Personal Access Token** (_required_ for personal access token authentication): The personal access token of the Jira user. - **Cloud**: Check this box if you are using Jira Cloud. The default is unchecked to use Jira Data Center. - **Projects**: A comma-separated list of IDs of the target projects in Jira to access. - **Boards**: A comma-separated list of IDs of the target boards in Jira to access. - **Issues**: A comma-separated list of IDs of the target issues in Jira to access. - **Status Filter**: A comma-separated list of statuses to filter Jira issues by. -- **Issues**: A comma-separated list of IDs of the target issues in Jira to access. - **Download Attachments**: If checked, download attachments from Jira issues. By default, attachments are not downloaded. \ No newline at end of file diff --git a/snippets/general-shared-text/onedrive-api-placeholders.mdx b/snippets/general-shared-text/onedrive-api-placeholders.mdx index ba328ada..ce3280dd 100644 --- a/snippets/general-shared-text/onedrive-api-placeholders.mdx +++ b/snippets/general-shared-text/onedrive-api-placeholders.mdx @@ -1,6 +1,7 @@ - `` (_required_) - A unique name for this connector. - `` (_required_) - The application (client) ID of the Microsoft Entra ID app registration that has access to the OneDrive account. - `` (_required_) - The User Principal Name (UPN) for the OneDrive user account in Entra ID. This is typically the user's email address. +- `` (_required_ for username and password authentication): The password for the target UPN. - `` (_required_) - The directory (tenant) ID of the Entra ID app registration. - `` (_required_) - The authentication token provider URL for the Entra ID app registration. The default is https://login.microsoftonline.com. - `` (_required_) - The client secret for the Entra ID app registration. diff --git a/snippets/general-shared-text/onedrive-cli-api.mdx b/snippets/general-shared-text/onedrive-cli-api.mdx index 19b89a8e..ee835263 100644 --- a/snippets/general-shared-text/onedrive-cli-api.mdx +++ b/snippets/general-shared-text/onedrive-cli-api.mdx @@ -15,5 +15,6 @@ The following environment variables: - `ONEDRIVE_CLIENT_CRED` - The client secret for the Entra ID app registration, represented by `--client-cred` (CLI) or `client_cred` (Python). - `ONEDRIVE_TENANT` - The directory (tenant) ID of the Entra ID app registration, represented by `--tenant` (CLI) or `tenant` (Python). - `ONEDRIVE_USER_PNAME` - The User Principal Name (UPN) for the OneDrive user account in Entra ID (typically the user's email address), represented by `--user-pname` (CLI) or `user_pname` (Python). +- `ONEDRIVE_USER_PASSWORD` - The password for the target UPN, represented by `--password` (CLI) or `password` (Python). - `ONEDRIVE_AUTHORITY_URL` - The authentication token provider URL for the Entra ID app registration (the default is `https://login.microsoftonline.com` if not otherwise specified), represented by `--authority-url` (CLI) or `authority_url` (Python). diff --git a/snippets/general-shared-text/onedrive-platform.mdx b/snippets/general-shared-text/onedrive-platform.mdx index cae4152f..7ae35cc0 100644 --- a/snippets/general-shared-text/onedrive-platform.mdx +++ b/snippets/general-shared-text/onedrive-platform.mdx @@ -6,6 +6,7 @@ Fill in the following fields: - **Authority URL** (_required_): The authentication token provider URL for the Entra ID app registration. The default is `https://login.microsoftonline.com`. - **Principal Name** (_required_): The User Principal Name (UPN) for the OneDrive user account in Entra ID. This is typically the user's email address. - **Client Credential** (_required_): The client secret for the Entra ID app registration. +- **Password** (_required_ for username and password authentication): The password for the target UPN. - **Path** (source connector only): The path to the target folder in the OneDrive account, starting with the account's root folder, for example `my-folder/my-subfolder`. - **Recursive** (source connector only): Check this box to recursively access files from subfolders within the specified OneDrive path. - **Remote URL** (destination connector only): `onedrive://`, followed by the path to the target folder in the OneDrive account, starting with the account's root folder, for example `onedrive://my-folder/my-subfolder`. \ No newline at end of file diff --git a/snippets/general-shared-text/onedrive.mdx b/snippets/general-shared-text/onedrive.mdx index 5ae2f33c..38c123c3 100644 --- a/snippets/general-shared-text/onedrive.mdx +++ b/snippets/general-shared-text/onedrive.mdx @@ -10,26 +10,6 @@ [Shop for enterprise plans](https://www.microsoft.com/microsoft-365/enterprise/microsoft365-plans-and-pricing). - The OneDrive and SharePoint Online plans must share the same Microsoft Entra ID tenant. [Learn more](https://learn.microsoft.com/microsoft-365/enterprise/subscriptions-licenses-accounts-and-tenants-for-microsoft-cloud-offerings?view=o365-worldwide). -- The User Principal Name (UPN) for the OneDrive account. This is typically the OneDrive account user's email address. To find a UPN: - - 1. Depending on your plan, sign in to your Microsoft 365 admin center (typically [https://admin.microsoft.com](https://admin.microsoft.com)) using your administrator credentials, - or sign in to your Office 365 portal (typically [https://portal.office.com](https://portal.office.com)) using your credentials. - 2. In the **Users** section, click **Active users**. - 3. Locate the user account in the list of active users. - 4. The UPN is displayed in the **Username** column. - - The following video shows how to get a UPN: - - - - The path to the target OneDrive folder, starting from the OneDrive account's root folder, for example `my-folder/my-subfolder`. The following video shows how to get a path: @@ -44,8 +24,9 @@ allowfullscreen > -- The client ID, tenant ID, and client secret for the Microsoft Entra ID app registration that has access to the target OneDrive account and - also has the correct set of Microsoft Graph access permissions. These permissions include: +- Two types of authentication are supported: client credentials and a username and password. Both authentication types require a Microsoft Entra ID app registration. + You will need to provide the **Application (client) ID**, **Directory (tenant) ID**, and **Client secret** for the Entra ID app registration that has access to the target OneDrive account, and + the app registration must have the correct set of Microsoft Graph access permissions. These permissions include: - `Files.ReadWrite.All` (if both reading and writing are needed) - `Sites.ReadWrite.All` (if both reading and writing are needed) @@ -77,4 +58,26 @@ frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen - > \ No newline at end of file + > + +- Both authentication types also require the User Principal Name (UPN) for the OneDrive account. This is typically the OneDrive account user's email address. To find a UPN: + + 1. Depending on your plan, sign in to your Microsoft 365 admin center (typically [https://admin.microsoft.com](https://admin.microsoft.com)) using your administrator credentials, + or sign in to your Office 365 portal (typically [https://portal.office.com](https://portal.office.com)) using your credentials. + 2. In the **Users** section, click **Active users**. + 3. Locate the user account in the list of active users. + 4. The UPN is displayed in the **Username** column. + + The following video shows how to get a UPN: + + + +- For username and password authentication, you will also need the password for the target UPN. \ No newline at end of file diff --git a/snippets/general-shared-text/pinecone-api-placeholders.mdx b/snippets/general-shared-text/pinecone-api-placeholders.mdx index 488b5984..6931e65f 100644 --- a/snippets/general-shared-text/pinecone-api-placeholders.mdx +++ b/snippets/general-shared-text/pinecone-api-placeholders.mdx @@ -1,4 +1,5 @@ - `` (required) - A unique name for this connector. - `` - The name of the index in the Pinecone database. If no value is provided, see the beginning of this article for the behavior at run time. +- `` - The name of any custom namespace in the Pinecone index to upsert data into. If no value is provided, the default namespace of `default` will be used. - `` (required) - The Pinecone API key. - `` - The maximum number of records to transmit in a single batch. The default is `50` unless otherwise specified. diff --git a/snippets/general-shared-text/pinecone-cli-api.mdx b/snippets/general-shared-text/pinecone-cli-api.mdx index 246e560b..fd535448 100644 --- a/snippets/general-shared-text/pinecone-cli-api.mdx +++ b/snippets/general-shared-text/pinecone-cli-api.mdx @@ -11,4 +11,5 @@ import AdditionalIngestDependencies from '/snippets/general-shared-text/ingest-d The following environment variables: - `PINECONE_API_KEY` - The Pinecone API, represented by `--api-key` (CLI) or `api_key` (Python, in the `PineconeAccessConfig` object). -- `PINECONE_INDEX_NAME` - The Pinecone serverless index name, represented by `--index-name` (CLI) or `index_name` (Python). If no value is provided, see the beginning of this article for the behavior at run time. \ No newline at end of file +- `PINECONE_INDEX_NAME` - The Pinecone serverless index name, represented by `--index-name` (CLI) or `index_name` (Python). If no value is provided, see the beginning of this article for the behavior at run time. +- `PINECONE_NAMESPACE_NAME` - The name of any custom namespace in the Pinecone index to upsert data into, represented by `--namespace` (CLI) or `namespace` (Python). If no value is provided, the default namespace of `default` will be used. \ No newline at end of file diff --git a/snippets/general-shared-text/pinecone-platform.mdx b/snippets/general-shared-text/pinecone-platform.mdx index 0d211095..ae80ffe0 100644 --- a/snippets/general-shared-text/pinecone-platform.mdx +++ b/snippets/general-shared-text/pinecone-platform.mdx @@ -2,5 +2,6 @@ Fill in the following fields: - **Name** (_required_): A unique name for this connector. - **Index Name**: The name of the index in the Pinecone database. If no value is provided, see the beginning of this article for the behavior at run time. +- **Namespace**: The name of any custom namespace in the Pinecone index to upsert data into. If no value is provided, the default namespace of `default` will be used. - **Batch Size**: The number of records to use in a single batch. The default is `50` if not otherwise specified. - **API Key** (_required_): The Pinecone API key. \ No newline at end of file diff --git a/snippets/general-shared-text/pinecone.mdx b/snippets/general-shared-text/pinecone.mdx index f0a39552..978e4318 100644 --- a/snippets/general-shared-text/pinecone.mdx +++ b/snippets/general-shared-text/pinecone.mdx @@ -36,4 +36,6 @@ named `record_id` with a string data type. Unstructured can use this field to do intelligent document overwrites. Without this field, duplicate documents might be written to the index or, in some cases, the operation could fail altogether. - \ No newline at end of file + + +- Within a Pinecone serverless index, custom [namespaces](https://docs.pinecone.io/guides/index-data/indexing-overview#namespaces) are supported but are not required. \ No newline at end of file diff --git a/snippets/source_connectors/onedrive.sh.mdx b/snippets/source_connectors/onedrive.sh.mdx index d0e385fd..9d96953a 100644 --- a/snippets/source_connectors/onedrive.sh.mdx +++ b/snippets/source_connectors/onedrive.sh.mdx @@ -8,6 +8,7 @@ unstructured-ingest \ --authority-url $ONEDRIVE_AUTHORITY_URL \ --tenant $ONEDRIVE_TENANT \ --user-pname $ONEDRIVE_USER_PNAME \ + --password $ONEDRIVE_USER_PASSWORD \ # For username and password authentication. --path $ONEDRIVE_PATH \ --output-dir $LOCAL_FILE_OUTPUT_DIR \ --num-processes 2 \ diff --git a/snippets/source_connectors/onedrive.v2.py.mdx b/snippets/source_connectors/onedrive.v2.py.mdx index 52c1f9ad..51f93e70 100644 --- a/snippets/source_connectors/onedrive.v2.py.mdx +++ b/snippets/source_connectors/onedrive.v2.py.mdx @@ -27,7 +27,8 @@ if __name__ == "__main__": ), source_connection_config=OnedriveConnectionConfig( access_config=OnedriveAccessConfig( - client_cred=os.getenv("ONEDRIVE_CLIENT_CRED") + client_cred=os.getenv("ONEDRIVE_CLIENT_CRED"), + password=os.getenv("ONEDRIVE_USER_PASSWORD") # For username and password authentication. ), client_id=os.getenv("ONEDRIVE_CLIENT_ID"), tenant=os.getenv("ONEDRIVE_TENANT"), diff --git a/snippets/source_connectors/onedrive_rest_create.mdx b/snippets/source_connectors/onedrive_rest_create.mdx index d115ecf2..931aedee 100644 --- a/snippets/source_connectors/onedrive_rest_create.mdx +++ b/snippets/source_connectors/onedrive_rest_create.mdx @@ -11,6 +11,7 @@ curl --request 'POST' --location \ "config": { "client_id": "", "user_pname": "", + "password": "", # For username and password authentication. "tenant": "", "authority_url": "", "client_cred": "", diff --git a/snippets/source_connectors/onedrive_sdk.mdx b/snippets/source_connectors/onedrive_sdk.mdx index 968badc8..1a398dce 100644 --- a/snippets/source_connectors/onedrive_sdk.mdx +++ b/snippets/source_connectors/onedrive_sdk.mdx @@ -18,6 +18,7 @@ with UnstructuredClient(api_key_auth=os.getenv("UNSTRUCTURED_API_KEY")) as clien config=OneDriveSourceConnectorConfigInput( client_id="", user_pname="", + password="", # For username and password authentication. tenant="", authority_url="", client_cred="",