Unstructured-IO · Paul-Cornell · May 15, 2025 · May 14, 2025
diff --git a/snippets/destination_connectors/ibm_watsonxdata_sdk.mdx b/snippets/destination_connectors/ibm_watsonxdata_sdk.mdx
@@ -6,7 +6,7 @@ from unstructured_client.models.operations import CreateDestinationRequest
 from unstructured_client.models.shared import (
     CreateDestinationConnector,
     DestinationConnectorType,
-    IbmWatsonxDestinationConnectorConfigInput
+    IBMWatsonxS3DestinationConnectorConfigInput
 )
 
 with UnstructuredClient(api_key_auth=os.getenv("UNSTRUCTURED_API_KEY")) as client:
@@ -15,7 +15,7 @@ with UnstructuredClient(api_key_auth=os.getenv("UNSTRUCTURED_API_KEY")) as clien
             create_destination_connector=CreateDestinationConnector(
                 name="<name>",
                 type=DestinationConnectorType.IBM_WATSONX_S3,
-                config=IbmWatsonxDestinationConnectorConfigInput(
+                config=IBMWatsonxS3DestinationConnectorConfigInput(
                     iceberg_endpoint="<iceberg-endpoint>",
                     object_storage_endpoint="<object-storage-endpoint>",
                     object_storage_region="<object-storage-region>",

diff --git a/snippets/destination_connectors/onedrive.sh.mdx b/snippets/destination_connectors/onedrive.sh.mdx
@@ -17,6 +17,7 @@ unstructured-ingest \
     --client-cred $ONEDRIVE_CLIENT_CRED \
     --client-id $ONEDRIVE_CLIENT_ID \
     --user-pname $ONEDRIVE_USER_PNAME \
+    --password $ONEDRIVE_USER_PASSWORD \ # For username and password authentication.
     --tenant $ONEDRIVE_TENANT \
     --authority-url $ONEDRIVE_AUTHORITY_URL \
     --remote-url $ONEDRIVE_PATH \

diff --git a/snippets/destination_connectors/onedrive.v2.py.mdx b/snippets/destination_connectors/onedrive.v2.py.mdx
@@ -42,7 +42,10 @@ if __name__ == "__main__":
         chunker_config=ChunkerConfig(chunking_strategy="by_title"),
         embedder_config=EmbedderConfig(embedding_provider="huggingface"),
         destination_connection_config=OnedriveConnectionConfig(
-            access_config=OnedriveAccessConfig(client_cred=os.getenv("ONEDRIVE_CLIENT_CRED")),
+            access_config=OnedriveAccessConfig(
+                client_cred=os.getenv("ONEDRIVE_CLIENT_CRED"),
+                password=os.getenv("ONEDRIVE_USER_PASSWORD") # For username and password authentication.
+            ),
             client_id=os.getenv("ONEDRIVE_CLIENT_ID"),
             user_pname=os.getenv("ONEDRIVE_USER_PNAME"),
             tenant=os.getenv("ONEDRIVE_TENANT"),

diff --git a/snippets/destination_connectors/onedrive_rest_create.mdx b/snippets/destination_connectors/onedrive_rest_create.mdx
@@ -11,6 +11,7 @@ curl --request 'POST' --location \
     "config": {
         "client_id": "<client-id>", 
         "user_pname": "<user-pname>",
+        "password": "<password>", # For username and password authentication.
         "tenant": "<tenant>", 
         "authority_url": "<authority-url>",
         "client_cred": "<client-cred>",

diff --git a/snippets/destination_connectors/onedrive_sdk.mdx b/snippets/destination_connectors/onedrive_sdk.mdx
@@ -18,6 +18,7 @@ with UnstructuredClient(api_key_auth=os.getenv("UNSTRUCTURED_API_KEY")) as clien
                 config=OneDriveDestinationConnectorConfigInput(
                     client_id="<client-id>", 
                     user_pname="<user-pname>",
+                    password="<password>", # For username and password authentication.
                     tenant="<tenant>", 
                     authority_url="<authority-url>",
                     client_cred="<client-cred>",

diff --git a/snippets/destination_connectors/pinecone.sh.mdx b/snippets/destination_connectors/pinecone.sh.mdx
@@ -18,5 +18,6 @@ unstructured-ingest \
   pinecone \
     --api-key "$PINECONE_API_KEY" \
     --index-name "$PINECONE_INDEX_NAME" \
+    --namespace "$PINECONE_NAMESPACE_NAME" \
     --batch-size 80
 ```
diff --git a/snippets/destination_connectors/pinecone.v2.py.mdx b/snippets/destination_connectors/pinecone.v2.py.mdx
@@ -47,6 +47,9 @@ if __name__ == "__main__":
             index_name=os.getenv("PINECONE_INDEX_NAME")
         ),
         stager_config=PineconeUploadStagerConfig(),
-        uploader_config=PineconeUploaderConfig()
+        uploader_config=PineconeUploaderConfig(
+            batch_size=100,
+            namespace=os.getenv("PINECONE_NAMESPACE_NAME")
+        )
     ).run()
 ```
diff --git a/snippets/destination_connectors/pinecone_rest_create.mdx b/snippets/destination_connectors/pinecone_rest_create.mdx
@@ -10,6 +10,7 @@ curl --request 'POST' --location \
     "type": "pinecone",
     "config": {
         "index_name" "<index-name>", 
+        "namespace": "<namespace-name>",
         "api_key": "<api-key>",
         "batch_size" <batch-size>
     }

diff --git a/snippets/destination_connectors/pinecone_sdk.mdx b/snippets/destination_connectors/pinecone_sdk.mdx
@@ -17,6 +17,7 @@ with UnstructuredClient(api_key_auth=os.getenv("UNSTRUCTURED_API_KEY")) as clien
                 type=DestinationConnectorType.PINECONE,
                 config=PineconeDestinationConnectorConfigInput(
                     index_name="<index-name>", 
+                    namespace="<namespace-name>",
                     api_key="<api-key>",
                     batch_size=<batch-size>
                 )

diff --git a/snippets/general-shared-text/confluence-api-placeholders.mdx b/snippets/general-shared-text/confluence-api-placeholders.mdx
@@ -6,7 +6,7 @@
 - `extract_images` - Set to `true` to download images and replace the HTML content with Base64-encoded images. The default is `false` if not otherwise specified.
 - `extract_files` - Set to `true` to download any embedded files in pages. The default is `false` if not otherwise specified.
 
-For API token authentication:
+For username and API token authentication:
 
 - `<username>` - The name or email address of the target user.
 - `<api-token>` - The user's API token value.
@@ -17,7 +17,7 @@ For personal access token (PAT) authentication:
 - `<personal-access-token>` - The target user's PAT value.
 - `cloud` should always be `false`.
 
-For password authentication:
+For username and password authentication:
 
 - `<username>` - The name or email address of the target user.
 - `<password>` - The user's password.

diff --git a/snippets/general-shared-text/databricks-volumes-platform.mdx b/snippets/general-shared-text/databricks-volumes-platform.mdx
@@ -14,7 +14,7 @@ Fill in the following fields:
 
 - For **Authentication Method**, if you select **Service Principal**, you must also specify the following:
 
-  - **Client Secret** (_required_): The associated OAuth **Secret** value for the Databricks managed service principal that has the appropriate privileges to the volume.
-  - **Client ID** (_required_): The **Client ID** (or **UUID** or **Application ID**) value for the Databricks managed service principal that has appropriate privileges to the volume.
+  - **OAuth Secret** (_required_): The associated OAuth **Secret** value for the Databricks managed service principal that has the appropriate privileges to the volume.
+  - **UUID** (_required_): The **Client ID** (or **UUID** or **Application ID**) value for the Databricks managed service principal that has appropriate privileges to the volume.
 
 - For **Authentication Method**, if you select **Token**, you must also specify the Databricks personal access token's value in the **Token** field.
diff --git a/snippets/general-shared-text/ibm-watsonxdata-platform.mdx b/snippets/general-shared-text/ibm-watsonxdata-platform.mdx
@@ -11,5 +11,5 @@ Fill in the following fields:
 - **Namespace** (_required_): The name of the target namespace (also known as a schema) within the catalog.
 - **Table** (_required_): The name of the target table within the namespace (schema).
 - **Max Connection Retries**: The maximum number of retries when connecting to the catalog. Typically, an optimal setting is `15`. The default is `10`. If specified, it must be a number between `2` and `100`, inclusive.
-- **Max Retries**: The maximum number of retries when uploading data. Typically, an optimal setting is `150`. The default is `50`. If specified, it must be a number between `2` and `500`, inclusive.
+- **Max Upload Retries**: The maximum number of retries when uploading data. Typically, an optimal setting is `150`. The default is `50`. If specified, it must be a number between `2` and `500`, inclusive.
 - **Record ID Key**: The name of the column that uniquely identifies each record in the target table. The default is `record_id`.
diff --git a/snippets/general-shared-text/jira-api-placeholders.mdx b/snippets/general-shared-text/jira-api-placeholders.mdx
@@ -1,7 +1,8 @@
 - `<name>` (_required_): A unique name for this connector.
 - `<url>` (_required_): The URL of the Jira instance.
-- `<username>` (_required_ for password or API token authentication, or personal access token authentication): The username of the Jira user.
-- `<password>` (_required_ for password or API token authentication): The password or API token of the Jira user.
+- `<username>` (_required_ for password or API token authentication): The username of the Jira user.
+- `<password>` (_required_ for password or API token authentication): For password authentication, the password of the Jira user. 
+  For API token authentication, the API token of the Jira user.
 - `<token>` (_required_ for personal access token authentication): The personal access token of the Jira user.
 - `<project-id>`: The ID of a target project in Jira to access.
 - `<board-id>`: The ID of a target board in Jira to access.

diff --git a/snippets/general-shared-text/jira-cli-api.mdx b/snippets/general-shared-text/jira-cli-api.mdx
@@ -13,15 +13,10 @@ The following environment variables:
 - `JIRA_URL` - The site URL for your Jira Data Center installation or Jira Cloud account, represented by `--url` (CLI) or `url` (Python).
 - One of the following:
 
-  - For Jira Cloud or Jira Data Center, the target user's name or email address, and password, as follows:
+  - For Jira Cloud or Jira Data Center, the target user's name or email address, and password or API token, as follows:
 
     - `JIRA_USERNAME` - The name or email address of the target user, represented by `--username` (CLI) or `username` (Python).
-    - `JIRA_PASSWORD_OR_API_TOKEN` - The user's password, represented by `--password` (CLI) or `password` (Python).
-
-  - For Jira Cloud only, the target user's name or email address, and API token, as follows:
-
-    - `JIRA_USERNAME` - The name or email address of the target user, represented by `--username` (CLI) or `username` (Python).
-    - `JIRA_PASSWORD_OR_API_TOKEN` - The user's API token, represented by `--password` (CLI) or `password` (Python).
+    - `JIRA_PASSWORD_OR_API_TOKEN` - The user's password (for password authentication) or API token (for API token authentication), represented by `--password` (CLI) or `password` (Python).
 
   - For Jira Data Center only, the target user's personal access token (PAT), as follows:
 

diff --git a/snippets/general-shared-text/jira-platform.mdx b/snippets/general-shared-text/jira-platform.mdx
@@ -2,14 +2,13 @@ Fill in the following fields:
 
 - **Name** (_required_): A unique name for this connector.
 - **URL** (_required_): The URL of the Jira instance.
-- **Username** (_required_ for password or API token authentication, or personal access token authentication): The username of the Jira user.
-- **Password** (_required_ for password authentication): The password of the Jira user.
-- **API Token** (_required_ for API token authentication): The API token of the Jira user.
+- **Username** (_required_ for password or API token authentication): The username of the Jira user.
+- **Password** (_required_ for password or API token authentication): For password authentication, the password of the Jira user. 
+  For API token authentication, the API token of the Jira user.
 - **Personal Access Token** (_required_ for personal access token authentication): The personal access token of the Jira user.
 - **Cloud**: Check this box if you are using Jira Cloud. The default is unchecked to use Jira Data Center.
 - **Projects**: A comma-separated list of IDs of the target projects in Jira to access.
 - **Boards**: A comma-separated list of IDs of the target boards in Jira to access.
 - **Issues**: A comma-separated list of IDs of the target issues in Jira to access.
 - **Status Filter**: A comma-separated list of statuses to filter Jira issues by.
-- **Issues**: A comma-separated list of IDs of the target issues in Jira to access.
 - **Download Attachments**: If checked, download attachments from Jira issues. By default, attachments are not downloaded.
diff --git a/snippets/general-shared-text/onedrive-api-placeholders.mdx b/snippets/general-shared-text/onedrive-api-placeholders.mdx
@@ -1,6 +1,7 @@
 - `<name>` (_required_) - A unique name for this connector.
 - `<client-id>` (_required_) - The application (client) ID of the Microsoft Entra ID app registration that has access to the OneDrive account.
 - `<user-pname>` (_required_) - The User Principal Name (UPN) for the OneDrive user account in Entra ID. This is typically the user's email address.
+- `<password>` (_required_ for username and password authentication): The password for the target UPN.
 - `<tenant>` (_required_) - The directory (tenant) ID of the Entra ID app registration.
 - `<authority-url>` (_required_) - The authentication token provider URL for the Entra ID app registration. The default is https://login.microsoftonline.com.
 - `<client-cred>` (_required_) - The client secret for the Entra ID app registration.

diff --git a/snippets/general-shared-text/onedrive-cli-api.mdx b/snippets/general-shared-text/onedrive-cli-api.mdx
@@ -15,5 +15,6 @@ The following environment variables:
 - `ONEDRIVE_CLIENT_CRED` - The client secret for the Entra ID app registration, represented by `--client-cred` (CLI) or `client_cred` (Python).
 - `ONEDRIVE_TENANT` - The directory (tenant) ID of the Entra ID app registration, represented by `--tenant` (CLI) or `tenant` (Python).
 - `ONEDRIVE_USER_PNAME` - The User Principal Name (UPN) for the OneDrive user account in Entra ID (typically the user's email address), represented by `--user-pname` (CLI) or `user_pname` (Python).
+- `ONEDRIVE_USER_PASSWORD` - The password for the target UPN, represented by `--password` (CLI) or `password` (Python).
 - `ONEDRIVE_AUTHORITY_URL` - The authentication token provider URL for the Entra ID app registration (the default is `https://login.microsoftonline.com` if not otherwise specified), represented by `--authority-url` (CLI) or `authority_url` (Python).
 
diff --git a/snippets/general-shared-text/onedrive-platform.mdx b/snippets/general-shared-text/onedrive-platform.mdx
@@ -6,6 +6,7 @@ Fill in the following fields:
 - **Authority URL** (_required_): The authentication token provider URL for the Entra ID app registration. The default is `https://login.microsoftonline.com`.
 - **Principal Name** (_required_): The User Principal Name (UPN) for the OneDrive user account in Entra ID. This is typically the user's email address.
 - **Client Credential** (_required_): The client secret for the Entra ID app registration.
+- **Password** (_required_ for username and password authentication): The password for the target UPN.
 - **Path** (source connector only): The path to the target folder in the OneDrive account, starting with the account's root folder, for example `my-folder/my-subfolder`.
 - **Recursive** (source connector only): Check this box to recursively access files from subfolders within the specified OneDrive path.
 - **Remote URL** (destination connector only): `onedrive://`, followed by the path to the target folder in the OneDrive account, starting with the account's root folder, for example `onedrive://my-folder/my-subfolder`.
diff --git a/snippets/general-shared-text/onedrive.mdx b/snippets/general-shared-text/onedrive.mdx
@@ -10,26 +10,6 @@
   [Shop for enterprise plans](https://www.microsoft.com/microsoft-365/enterprise/microsoft365-plans-and-pricing). 
 - The OneDrive and SharePoint Online plans must share the same Microsoft Entra ID tenant. 
   [Learn more](https://learn.microsoft.com/microsoft-365/enterprise/subscriptions-licenses-accounts-and-tenants-for-microsoft-cloud-offerings?view=o365-worldwide). 
-- The User Principal Name (UPN) for the OneDrive account. This is typically the OneDrive account user's email address. To find a UPN:
-
-  1. Depending on your plan, sign in to your Microsoft 365 admin center (typically [https://admin.microsoft.com](https://admin.microsoft.com)) using your administrator credentials, 
-     or sign in to your Office 365 portal (typically [https://portal.office.com](https://portal.office.com)) using your credentials.
-  2. In the **Users** section, click **Active users**.
-  3. Locate the user account in the list of active users.
-  4. The UPN is displayed in the **Username** column.
-
-  The following video shows how to get a UPN:
-
-  <iframe
-  width="560"
-  height="315"
-  src="https://www.youtube.com/embed/H0yYfhfyCE0"
-  title="YouTube video player"
-  frameborder="0"
-  allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture"
-  allowfullscreen
-  ></iframe>
-
 - The path to the target OneDrive folder, starting from the OneDrive account's root folder, for example `my-folder/my-subfolder`.
 
   The following video shows how to get a path:
@@ -44,8 +24,9 @@
   allowfullscreen
   ></iframe>
 
-- The client ID, tenant ID, and client secret for the Microsoft Entra ID app registration that has access to the target OneDrive account and 
-  also has the correct set of Microsoft Graph access permissions. These permissions include:
+- Two types of authentication are supported: client credentials and a username and password. Both authentication types require a Microsoft Entra ID app registration. 
+  You will need to provide the **Application (client) ID**, **Directory (tenant) ID**, and **Client secret** for the Entra ID app registration that has access to the target OneDrive account, and 
+  the app registration must have the correct set of Microsoft Graph access permissions. These permissions include:
 
   - `Files.ReadWrite.All` (if both reading and writing are needed)
   - `Sites.ReadWrite.All` (if both reading and writing are needed)
@@ -77,4 +58,26 @@
   frameborder="0"
   allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture"
   allowfullscreen
-  ></iframe>
+  ></iframe>
+
+- Both authentication types also require the User Principal Name (UPN) for the OneDrive account. This is typically the OneDrive account user's email address. To find a UPN:
+
+  1. Depending on your plan, sign in to your Microsoft 365 admin center (typically [https://admin.microsoft.com](https://admin.microsoft.com)) using your administrator credentials, 
+     or sign in to your Office 365 portal (typically [https://portal.office.com](https://portal.office.com)) using your credentials.
+  2. In the **Users** section, click **Active users**.
+  3. Locate the user account in the list of active users.
+  4. The UPN is displayed in the **Username** column.
+
+  The following video shows how to get a UPN:
+
+  <iframe
+  width="560"
+  height="315"
+  src="https://www.youtube.com/embed/H0yYfhfyCE0"
+  title="YouTube video player"
+  frameborder="0"
+  allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture"
+  allowfullscreen
+  ></iframe>
+
+- For username and password authentication, you will also need the password for the target UPN.
diff --git a/snippets/general-shared-text/pinecone-api-placeholders.mdx b/snippets/general-shared-text/pinecone-api-placeholders.mdx
@@ -1,4 +1,5 @@
 - `<name>` (required) - A unique name for this connector.
 - `<index-name>` - The name of the index in the Pinecone database. If no value is provided, see the beginning of this article for the behavior at run time.
+- `<namespace-name>` - The name of any custom namespace in the Pinecone index to upsert data into. If no value is provided, the default namespace of `default` will be used.
 - `<api-key>` (required) - The Pinecone API key.
 - `<batch-size>` - The maximum number of records to transmit in a single batch. The default is `50` unless otherwise specified.
diff --git a/snippets/general-shared-text/pinecone-cli-api.mdx b/snippets/general-shared-text/pinecone-cli-api.mdx
@@ -11,4 +11,5 @@ import AdditionalIngestDependencies from '/snippets/general-shared-text/ingest-d
 The following environment variables:
 
 - `PINECONE_API_KEY` - The Pinecone API, represented by `--api-key` (CLI) or `api_key` (Python, in the `PineconeAccessConfig` object).
-- `PINECONE_INDEX_NAME` - The Pinecone serverless index name, represented by `--index-name` (CLI) or `index_name` (Python). If no value is provided, see the beginning of this article for the behavior at run time.
+- `PINECONE_INDEX_NAME` - The Pinecone serverless index name, represented by `--index-name` (CLI) or `index_name` (Python). If no value is provided, see the beginning of this article for the behavior at run time.
+- `PINECONE_NAMESPACE_NAME` - The name of any custom namespace in the Pinecone index to upsert data into, represented by `--namespace` (CLI) or `namespace` (Python). If no value is provided, the default namespace of `default` will be used.
diff --git a/snippets/general-shared-text/pinecone-platform.mdx b/snippets/general-shared-text/pinecone-platform.mdx
@@ -2,5 +2,6 @@ Fill in the following fields:
 
 - **Name** (_required_): A unique name for this connector.
 - **Index Name**: The name of the index in the Pinecone database. If no value is provided, see the beginning of this article for the behavior at run time.
+- **Namespace**: The name of any custom namespace in the Pinecone index to upsert data into. If no value is provided, the default namespace of `default` will be used.
 - **Batch Size**: The number of records to use in a single batch. The default is `50` if not otherwise specified.
 - **API Key** (_required_): The Pinecone API key.
diff --git a/snippets/general-shared-text/pinecone.mdx b/snippets/general-shared-text/pinecone.mdx
@@ -36,4 +36,6 @@
       named `record_id` with a string data type. 
       Unstructured can use this field to do intelligent document overwrites. Without this field, duplicate documents 
       might be written to the index or, in some cases, the operation could fail altogether.
-  </Note>
+  </Note>
+
+- Within a Pinecone serverless index, custom [namespaces](https://docs.pinecone.io/guides/index-data/indexing-overview#namespaces) are supported but are not required.