diff --git a/api-reference/ingest/destination-connector/vectara.mdx b/api-reference/ingest/destination-connector/vectara.mdx
index 5a60780f..c6509ac4 100644
--- a/api-reference/ingest/destination-connector/vectara.mdx
+++ b/api-reference/ingest/destination-connector/vectara.mdx
@@ -2,6 +2,29 @@
title: Vectara
---
-import SharedVectara from '/snippets/dc-shared-text/vectara.mdx';
+import NewDocument from '/snippets/general-shared-text/new-document.mdx';
+
+
+
+import SharedContentVectara from '/snippets/dc-shared-text/vectara-cli-api.mdx';
+import SharedAPIKeyURL from '/snippets/general-shared-text/api-key-url.mdx';
+
+
+
+
+Now call the Unstructured CLI or Python SDK. The source connector can be any of the ones supported.
+
+This example uses the local source connector:
+
+import VectaraAPISh from '/snippets/destination_connectors/vectara.sh.mdx';
+import VectaraAPIPyV2 from '/snippets/destination_connectors/vectara.v2.py.mdx';
+import VectaraAPIPyV1 from '/snippets/destination_connectors/vectara.v1.py.mdx';
+
+
+
+
+
+
+
+
-
diff --git a/api-reference/ingest/ingest-dependencies.mdx b/api-reference/ingest/ingest-dependencies.mdx
index fd4841c9..baa75ac0 100644
--- a/api-reference/ingest/ingest-dependencies.mdx
+++ b/api-reference/ingest/ingest-dependencies.mdx
@@ -86,6 +86,7 @@ To add support for additional connectors, run the following:
| `pip install "unstructured-ingest[snowflake]"` | Snowflake |
| `pip install "unstructured-ingest[sftp]"` | SFTP |
| `pip install "unstructured-ingest[slack]"` | Slack |
+| `pip install "unstructured-ingest[vectara]"` | Vectara |
| `pip install "unstructured-ingest[wikipedia]"` | Wikipedia |
| `pip install "unstructured-ingest[weaviate]"` | Weaviate |
diff --git a/open-source/ingest/destination-connectors/vectara.mdx b/open-source/ingest/destination-connectors/vectara.mdx
index 5a60780f..39ded018 100644
--- a/open-source/ingest/destination-connectors/vectara.mdx
+++ b/open-source/ingest/destination-connectors/vectara.mdx
@@ -2,6 +2,28 @@
title: Vectara
---
-import SharedVectara from '/snippets/dc-shared-text/vectara.mdx';
+
-
+import SharedContentVectara from '/snippets/dc-shared-text/vectara-cli-api.mdx';
+
+
+
+Now call the Unstructured CLI or Python SDK. The source connector can be any of the ones supported.
+
+This example uses the local source connector.
+
+This example sends files to Unstructured API services for processing by default. To process files locally instead, see the instructions at the end of this page.
+
+import VectaraAPISh from '/snippets/destination_connectors/vectara.sh.mdx';
+import VectaraAPIPyV2 from '/snippets/destination_connectors/vectara.v2.py.mdx';
+import VectaraAPIPyV1 from '/snippets/destination_connectors/vectara.v1.py.mdx';
+
+
+
+
+
+
+
+import SharedPartitionByAPIOSS from '/snippets/ingest-configuration-shared/partition-by-api-oss.mdx';
+
+
diff --git a/snippets/dc-shared-text/vectara-cli-api.mdx b/snippets/dc-shared-text/vectara-cli-api.mdx
new file mode 100644
index 00000000..bc805a31
--- /dev/null
+++ b/snippets/dc-shared-text/vectara-cli-api.mdx
@@ -0,0 +1,9 @@
+Batch process all your records to store structured outputs in Vectara.
+
+The requirements are as follows.
+
+import SharedVectara from '/snippets/general-shared-text/vectara.mdx';
+import SharedVectaraCLIAPI from '/snippets/general-shared-text/vectara-cli-api.mdx';
+
+
+
\ No newline at end of file
diff --git a/snippets/dc-shared-text/vectara.mdx b/snippets/dc-shared-text/vectara.mdx
deleted file mode 100644
index 664de04b..00000000
--- a/snippets/dc-shared-text/vectara.mdx
+++ /dev/null
@@ -1,19 +0,0 @@
-Process all your records using `unstructured-ingest` to store structured outputs locally on your filesystem and upload those to a Vectara corpus. If you don’t yet have a Vectara account, [sign up](https://console.vectara.com/signup/) for one now.
-
-The upstream connector can be any of the ones supported, but for convenience here, showing a sample command using the upstream local connector.
-
-import VectaraSh from '/snippets/destination_connectors/vectara.sh.mdx';
-import VectaraPy from '/snippets/destination_connectors/vectara.py.mdx';
-
-
-
-
-
-
-
-
-
-
-For a full list of the options the Unstructured Ingest CLI accepts check `unstructured-ingest vectara --help`.
-
-NOTE: Keep in mind that you will need to have all the appropriate extras and dependencies for the file types of the documents contained in your data storage platform if you’re running this locally. You can find more information about this in the [installation guide](/open-source/installation/overview).
diff --git a/snippets/destination_connectors/vectara.sh.mdx b/snippets/destination_connectors/vectara.sh.mdx
index a82f8b7a..fe8379ab 100644
--- a/snippets/destination_connectors/vectara.sh.mdx
+++ b/snippets/destination_connectors/vectara.sh.mdx
@@ -1,19 +1,22 @@
-```bash Shell
+```bash CLI
#!/usr/bin/env bash
-# Chunking is optional.
+# Chunking and embedding is optional.
unstructured-ingest \
local \
--input-path $LOCAL_FILE_INPUT_DIR \
- --output-dir $LOCAL_FILE_OUTPUT_DIR \
- --strategy hi_res \
- --chunk-elements \
- --num-processes 2 \
- --verbose \
+ --chunking-strategy by_title \
+ --embedding-provider huggingface \
+ --partition-by-api \
+ --api-key $UNSTRUCTURED_API_KEY \
+ --partition-endpoint $UNSTRUCTURED_API_URL \
+ --additional-partition-args="{\"split_pdf_page\":\"true\", \"split_pdf_allow_failed\":\"true\", \"split_pdf_concurrency_level\": 15}" \
vectara \
--customer-id $VECTARA_CUSTOMER_ID \
+ --corpus-name $VECTARA_CORPUS_NAME \
+ --corpus-key $VECTARA_CORPUS_KEY \
--oauth-client-id $VECTARA_OAUTH_CLIENT_ID \
- --oauth-secret $VECTARA_OAUTH_SECRET \
- --corpus-name test-corpus-vectara
+ --oauth-secret $VECTARA_OAUTH_CLIENT_SECRET \
+ --token-url $VECTARA_OAUTH_TOKEN_URL
```
diff --git a/snippets/destination_connectors/vectara.py.mdx b/snippets/destination_connectors/vectara.v1.py.mdx
similarity index 93%
rename from snippets/destination_connectors/vectara.py.mdx
rename to snippets/destination_connectors/vectara.v1.py.mdx
index 6d3da492..522fd0e5 100644
--- a/snippets/destination_connectors/vectara.py.mdx
+++ b/snippets/destination_connectors/vectara.v1.py.mdx
@@ -1,4 +1,4 @@
-```python Python
+```python Python Ingest v1
import os
from unstructured_ingest.connector.local import SimpleLocalConfig
@@ -24,7 +24,7 @@ def get_writer() -> Writer:
connector_config=SimpleVectaraConfig(
access_config=VectaraAccessConfig(
oauth_client_id=os.getenv("VECTARA_OAUTH_CLIENT_ID"),
- oauth_secret=os.getenv("VECTARA_OAUTH_SECRET"),
+ oauth_secret=os.getenv("VECTARA_OAUTH_CLIENT_SECRET"),
),
customer_id=os.getenv("VECTARA_CUSTOMER_ID"),
corpus_name="test-corpus-vectara",
diff --git a/snippets/destination_connectors/vectara.v2.py.mdx b/snippets/destination_connectors/vectara.v2.py.mdx
new file mode 100644
index 00000000..2333578a
--- /dev/null
+++ b/snippets/destination_connectors/vectara.v2.py.mdx
@@ -0,0 +1,55 @@
+```python Python Ingest v2
+import os
+
+from unstructured_ingest.v2.pipeline.pipeline import Pipeline
+from unstructured_ingest.v2.interfaces import ProcessorConfig
+
+from unstructured_ingest.v2.processes.connectors.vectara import (
+ VectaraAccessConfig,
+ VectaraConnectionConfig,
+ VectaraUploadStagerConfig,
+ VectaraUploaderConfig
+)
+from unstructured_ingest.v2.processes.connectors.local import (
+ LocalIndexerConfig,
+ LocalConnectionConfig,
+ LocalDownloaderConfig
+)
+from unstructured_ingest.v2.processes.partitioner import PartitionerConfig
+from unstructured_ingest.v2.processes.chunker import ChunkerConfig
+from unstructured_ingest.v2.processes.embedder import EmbedderConfig
+
+# Chunking and embedding is optional.
+
+if __name__ == "__main__":
+ Pipeline.from_configs(
+ context=ProcessorConfig(),
+ indexer_config=LocalIndexerConfig(input_path=os.getenv("LOCAL_FILE_INPUT_DIR")),
+ downloader_config=LocalDownloaderConfig(),
+ source_connection_config=LocalConnectionConfig(),
+ partitioner_config=PartitionerConfig(
+ partition_by_api=True,
+ api_key=os.getenv("UNSTRUCTURED_API_KEY"),
+ partition_endpoint=os.getenv("UNSTRUCTURED_API_URL"),
+ additional_partition_args={
+ "split_pdf_page": True,
+ "split_pdf_allow_failed": True,
+ "split_pdf_concurrency_level": 15
+ }
+ ),
+ chunker_config=ChunkerConfig(chunking_strategy="by_title"),
+ embedder_config=EmbedderConfig(embedding_provider="huggingface"),
+ destination_connection_config=VectaraConnectionConfig(
+ access_config=VectaraAccessConfig(
+ oauth_client_id=os.getenv("VECTARA_OAUTH_CLIENT_ID"),
+ oauth_secret=os.getenv("VECTARA_OAUTH_CLIENT_SECRET")
+ ),
+ customer_id=os.getenv("VECTARA_CUSTOMER_ID"),
+ corpus_name=os.getenv("VECTARA_CORPUS_NAME"),
+ corpus_key=os.getenv("VECTARA_CORPUS_KEY"),
+ token_url=os.getenv("VECTARA_OAUTH_TOKEN_URL")
+ ),
+ stager_config=VectaraUploadStagerConfig(),
+ uploader_config=VectaraUploaderConfig()
+ ).run()
+```
\ No newline at end of file
diff --git a/snippets/general-shared-text/vectara-cli-api.mdx b/snippets/general-shared-text/vectara-cli-api.mdx
new file mode 100644
index 00000000..c60ee2b8
--- /dev/null
+++ b/snippets/general-shared-text/vectara-cli-api.mdx
@@ -0,0 +1,18 @@
+The Vectara connector dependencies.
+
+```bash
+pip install "unstructured-ingest[vectara]"
+```
+
+import AdditionalIngestDependencies from '/snippets/general-shared-text/ingest-dependencies.mdx';
+
+
+
+The following environment variables:
+
+- `VECTARA_CUSTOMER_ID` - The customer ID for the target Vectara account, represented by `--customer-id` (CLI) or `customer_id` (Python).
+- `VECTARA_CORPUS_NAME` - The name of the target corpus in the account, represented by `--corpus-name` (CLI) or `corpus_name` (Python).
+- `VECTARA_CORPUS_KEY` - The name of the corpus's key, represented by `--corpus-key` (CLI) or `corpus_key` (Python).
+- `VECTARA_OAUTH_TOKEN_URL` - The OAuth token URL for getting and refreshing OAuth access tokens in the account, represented by `--token-url` (CLI) or `token_url` (Python).
+- `VECTARA_OAUTH_CLIENT_ID` - A valid OAuth client ID in the account, represented by `--oauth-client-id` (CLI) or `oauth_client_id` (Python).
+- `VECTARA_OAUTH_CLIENT_SECRET` - The OAuth client secret for the client ID, represented by `--oauth-client-secret` (CLI) or `oauth_client_secret` (Python).
\ No newline at end of file
diff --git a/snippets/general-shared-text/vectara.mdx b/snippets/general-shared-text/vectara.mdx
new file mode 100644
index 00000000..33181cdd
--- /dev/null
+++ b/snippets/general-shared-text/vectara.mdx
@@ -0,0 +1,4 @@
+- A [Vectara account](https://console.vectara.com/signup).
+- The [customer ID](https://docs.vectara.com/docs/console-ui/vectara-console-overview#view-the-customer-id) for the account.
+- The name and key for the target [corpus](https://docs.vectara.com/docs/console-ui/creating-a-corpus) in the account.
+- The [OAuth authentication URL, client ID, and client secret](https://docs.vectara.com/docs/console-ui/app-clients) for accessing the target corpus.