diff --git a/api-reference/ingest/destination-connector/lancedb.mdx b/api-reference/ingest/destination-connector/lancedb.mdx
new file mode 100644
index 00000000..a6e350fe
--- /dev/null
+++ b/api-reference/ingest/destination-connector/lancedb.mdx
@@ -0,0 +1,23 @@
+---
+title: LanceDB
+---
+
+import NewDocument from '/snippets/general-shared-text/new-document.mdx';
+
+
+
+import SharedContentLanceDB from '/snippets/dc-shared-text/lancedb-cli-api.mdx';
+import SharedAPIKeyURL from '/snippets/general-shared-text/api-key-url.mdx';
+
+
+
+
+Now call the Unstructured CLI or Python SDK. The source connector can be any of the ones supported. This example uses the local source connector:
+
+import LanceDBAPISh from '/snippets/destination_connectors/lancedb.sh.mdx';
+import LanceDBAPIPyV2 from '/snippets/destination_connectors/lancedb.v2.py.mdx';
+
+
+
+
+
\ No newline at end of file
diff --git a/mint.json b/mint.json
index 307125e4..281242ac 100644
--- a/mint.json
+++ b/mint.json
@@ -199,6 +199,7 @@
"open-source/ingest/destination-connectors/google-cloud-service",
"open-source/ingest/destination-connectors/kafka",
"open-source/ingest/destination-connectors/kdbai",
+ "open-source/ingest/destination-connectors/lancedb",
"open-source/ingest/destination-connectors/local",
"open-source/ingest/destination-connectors/milvus",
"open-source/ingest/destination-connectors/mongodb",
@@ -357,6 +358,7 @@
"api-reference/ingest/destination-connector/google-cloud-service",
"api-reference/ingest/destination-connector/kafka",
"api-reference/ingest/destination-connector/kdbai",
+ "api-reference/ingest/destination-connector/lancedb",
"api-reference/ingest/destination-connector/local",
"api-reference/ingest/destination-connector/milvus",
"api-reference/ingest/destination-connector/mongodb",
diff --git a/open-source/ingest/destination-connectors/lancedb.mdx b/open-source/ingest/destination-connectors/lancedb.mdx
new file mode 100644
index 00000000..98056ed4
--- /dev/null
+++ b/open-source/ingest/destination-connectors/lancedb.mdx
@@ -0,0 +1,27 @@
+---
+title: LanceDB
+---
+
+import NewDocument from '/snippets/general-shared-text/new-document.mdx';
+
+
+
+import SharedLanceDB from '/snippets/dc-shared-text/lancedb-cli-api.mdx';
+
+
+
+Now call the Unstructured CLI or Python. The source connector can be any of the ones supported. This example uses the local source connector:
+
+This example sends files to Unstructured API services for processing by default. To process files locally instead, see the instructions at the end of this page.
+
+import LanceDBAPISh from '/snippets/destination_connectors/lancedb.sh.mdx';
+import LanceDBAPIPyV2 from '/snippets/destination_connectors/lancedb.v2.py.mdx';
+
+
+
+
+
+
+import SharedPartitionByAPIOSS from '/snippets/ingest-configuration-shared/partition-by-api-oss.mdx';
+
+
\ No newline at end of file
diff --git a/snippets/dc-shared-text/lancedb-cli-api.mdx b/snippets/dc-shared-text/lancedb-cli-api.mdx
new file mode 100644
index 00000000..a1edce80
--- /dev/null
+++ b/snippets/dc-shared-text/lancedb-cli-api.mdx
@@ -0,0 +1,9 @@
+Batch process all your records to store structured outputs in LanceDB.
+
+You will need:
+
+import SharedLanceDB from '/snippets/general-shared-text/lancedb.mdx';
+import SharedLanceDBCLIAPI from '/snippets/general-shared-text/lancedb-cli-api.mdx';
+
+
+
\ No newline at end of file
diff --git a/snippets/destination_connectors/lancedb.sh.mdx b/snippets/destination_connectors/lancedb.sh.mdx
new file mode 100644
index 00000000..dc8de378
--- /dev/null
+++ b/snippets/destination_connectors/lancedb.sh.mdx
@@ -0,0 +1,69 @@
+```bash CLI
+#!/usr/bin/env bash
+
+# Chunking and embedding are optional.
+
+# For LanceDB OSS with local data storage:
+unstructured-ingest \
+ local \
+ --input-path $LOCAL_FILE_INPUT_DIR \
+ --chunking-strategy by_title \
+ --embedding-provider huggingface \
+ --partition-by-api \
+ --api-key $UNSTRUCTURED_API_KEY \
+ --partition-endpoint $UNSTRUCTURED_API_URL \
+ --additional-partition-args="{\"split_pdf_page\":\"true\", \"split_pdf_allow_failed\":\"true\", \"split_pdf_concurrency_level\": 15}" \
+ lancedb-local \
+ --uri $LANCEDB_URI \
+ --table-name $LANCEDB_TABLE
+
+# For LanceDB OSS with data storage in an Amazon S3 bucket:
+unstructured-ingest \
+ local \
+ --input-path $LOCAL_FILE_INPUT_DIR \
+ --chunking-strategy by_title \
+ --embedding-provider huggingface \
+ --partition-by-api \
+ --api-key $UNSTRUCTURED_API_KEY \
+ --partition-endpoint $UNSTRUCTURED_API_URL \
+ --additional-partition-args="{\"split_pdf_page\":\"true\", \"split_pdf_allow_failed\":\"true\", \"split_pdf_concurrency_level\": 15}" \
+ lancedb-aws \
+ --aws-access-key-id $AWS_ACCESS_KEY_ID \
+ --aws-secret-access-key $AWS_SECRET_ACCESS_KEY \
+ --uri $LANCEDB_URI \
+ --table-name $LANCEDB_TABLE \
+ --timeout 30s
+
+# For LanceDB OSS with data storage in an Azure Blob Storage account:
+unstructured-ingest \
+ local \
+ --input-path $LOCAL_FILE_INPUT_DIR \
+ --chunking-strategy by_title \
+ --embedding-provider huggingface \
+ --partition-by-api \
+ --api-key $UNSTRUCTURED_API_KEY \
+ --partition-endpoint $UNSTRUCTURED_API_URL \
+ --additional-partition-args="{\"split_pdf_page\":\"true\", \"split_pdf_allow_failed\":\"true\", \"split_pdf_concurrency_level\": 15}" \
+ lancedb-azure \
+ --azure-storage-account-name $AZURE_STORAGE_ACCOUNT_NAME \
+ --azure-storage-account-key $AZURE_STORAGE_ACCOUNT_KEY \
+ --uri $LANCEDB_URI \
+ --table-name $LANCEDB_TABLE \
+ --timeout 30s
+
+# For LanceDB OSS with data storage in a Google Cloud Storage bucket:
+unstructured-ingest \
+ local \
+ --input-path $LOCAL_FILE_INPUT_DIR \
+ --chunking-strategy by_title \
+ --embedding-provider huggingface \
+ --partition-by-api \
+ --api-key $UNSTRUCTURED_API_KEY \
+ --partition-endpoint $UNSTRUCTURED_API_URL \
+ --additional-partition-args="{\"split_pdf_page\":\"true\", \"split_pdf_allow_failed\":\"true\", \"split_pdf_concurrency_level\": 15}" \
+ lancedb-gcs \
+ --google-service-account-key $GCS_SERVICE_ACCOUNT_KEY \
+ --uri $LANCEDB_URI \
+ --table-name $LANCEDB_TABLE \
+ --timeout 30s
+```
\ No newline at end of file
diff --git a/snippets/destination_connectors/lancedb.v2.py.mdx b/snippets/destination_connectors/lancedb.v2.py.mdx
new file mode 100644
index 00000000..8db05131
--- /dev/null
+++ b/snippets/destination_connectors/lancedb.v2.py.mdx
@@ -0,0 +1,107 @@
+```python Python Ingest v2
+import os
+
+from unstructured_ingest.v2.pipeline.pipeline import Pipeline
+from unstructured_ingest.v2.interfaces import ProcessorConfig
+
+from unstructured_ingest.v2.processes.connectors.local import (
+ LocalIndexerConfig,
+ LocalDownloaderConfig,
+ LocalConnectionConfig
+)
+from unstructured_ingest.v2.processes.partitioner import PartitionerConfig
+from unstructured_ingest.v2.processes.chunker import ChunkerConfig
+from unstructured_ingest.v2.processes.embedder import EmbedderConfig
+
+# For LanceDB OSS with local data storage:
+# from unstructured_ingest.v2.processes.connectors.lancedb.local import (
+# LanceDBLocalConnectionConfig,
+# LanceDBLocalAccessConfig,
+# LanceDBUploadStagerConfig,
+# LanceDBUploaderConfig
+# )
+
+# For LanceDB OSS with data storage in an Amazon S3 bucket:
+from unstructured_ingest.v2.processes.connectors.lancedb.aws import (
+ LanceDBS3ConnectionConfig,
+ LanceDBS3AccessConfig,
+ LanceDBUploadStagerConfig,
+ LanceDBUploaderConfig
+)
+
+# For LanceDB OSS with data storage in an Azure Blob Storage account:
+# from unstructured_ingest.v2.processes.connectors.lancedb.azure import (
+# LanceDBAzureConnectionConfig,
+# LanceDBAzureAccessConfig,
+# LanceDBUploadStagerConfig,
+# LanceDBUploaderConfig
+# )
+
+# For LanceDB OSS with data storage in a Google Cloud Storage bucket:
+# from unstructured_ingest.v2.processes.connectors.lancedb.gcp import (
+# LanceDBGCSConnectionConfig,
+# LanceDBGCSAccessConfig,
+# LanceDBUploadStagerConfig,
+# LanceDBUploaderConfig
+# )
+
+# Chunking and embedding are optional.
+
+if __name__ == "__main__":
+ Pipeline.from_configs(
+ context=ProcessorConfig(),
+ indexer_config=LocalIndexerConfig(input_path=os.getenv("LOCAL_FILE_INPUT_DIR")),
+ downloader_config=LocalDownloaderConfig(),
+ source_connection_config=LocalConnectionConfig(),
+ partitioner_config=PartitionerConfig(
+ partition_by_api=True,
+ api_key=os.getenv("UNSTRUCTURED_API_KEY"),
+ partition_endpoint=os.getenv("UNSTRUCTURED_API_URL"),
+ additional_partition_args={
+ "split_pdf_page": True,
+ "split_pdf_allow_failed": True,
+ "split_pdf_concurrency_level": 15
+ }
+ ),
+ chunker_config=ChunkerConfig(chunking_strategy="by_title"),
+ embedder_config=EmbedderConfig(embedding_provider="huggingface"),
+
+ # For LanceDB OSS with local data storage:
+ # destination_connection_config=LanceDBLocalConnectionConfig(
+ # access_config=LanceDBLocalAccessConfig(),
+ # uri=os.getenv("LANCEDB_URI")
+ # ),
+
+ # For LanceDB OSS with data storage in an Amazon S3 bucket:
+ destination_connection_config=LanceDBS3ConnectionConfig(
+ access_config=LanceDBS3AccessConfig(
+ aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
+ aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY")
+ ),
+ uri=os.getenv("LANCEDB_URI"),
+ timeout="30s"
+ ),
+
+ # For LanceDB OSS with data storage in an Azure Blob Storage account:
+ # destination_connection_config=LanceDBAzureConnectionConfig(
+ # access_config=LanceDBAzureAccessConfig(
+ # azure_storage_account_name=os.getenv("AZURE_STORAGE_ACCOUNT_NAME"),
+ # azure_storage_account_key=os.getenv("AZURE_STORAGE_ACCOUNT_KEY")
+ # ),
+ # uri=os.getenv("LANCEDB_URI"),
+ # timeout="30s"
+ # ),
+
+ # For LanceDB OSS with data storage in a Google Cloud Storage bucket:
+ # destination_connection_config=LanceDBGCSConnectionConfig(
+ # access_config=LanceDBGCSAccessConfig(
+ # google_service_account_key=os.getenv("GCS_SERVICE_ACCOUNT_KEY")
+ # ),
+ # uri=os.getenv("LANCEDB_URI"),
+ # timeout="30s"
+ # ),
+
+ stager_config=LanceDBUploadStagerConfig(),
+ uploader_config=LanceDBUploaderConfig(table_name=os.gentenv("LANCEDB_TABLE"))
+ ).run()
+```
\ No newline at end of file
diff --git a/snippets/general-shared-text/lancedb-cli-api.mdx b/snippets/general-shared-text/lancedb-cli-api.mdx
new file mode 100644
index 00000000..25499b84
--- /dev/null
+++ b/snippets/general-shared-text/lancedb-cli-api.mdx
@@ -0,0 +1,37 @@
+The LanceDB connector dependencies:
+
+```bash CLI, Python
+pip install "unstructured-ingest[lancedb]"
+```
+
+import AdditionalIngestDependencies from '/snippets/general-shared-text/ingest-dependencies.mdx';
+
+
+
+The following environment variables:
+
+- For LanceDB OSS with local data storage:
+
+ - `LANCEDB_URI` - The local path to the folder where the LanceDB data is stored, represented by `--uri` (CLI) or `uri` (Python).
+ - `LANCEDB_TABLE` - The name of the target LanceDB table within the local data folder, represented by `--table-name` (CLI) or `table_name` (Python).
+
+- For LanceDB OSS with data storage in an Amazon S3 bucket:
+
+ - `LANCEDB_URI` - The URI for the target Amazon S3 bucket and any target folder path within that bucket. Use the format `s3://[/]`. This is represented by `--uri` (CLI) or `uri` (Python).
+ - `LANCEDB_TABLE` - The name of the target LanceDB table within the Amazon S3 bucket, rrepresented by `--table-name` (CLI) or `table_name` (Python).
+ - `AWS_ACCESS_KEY_ID` - The AWS access key ID for the AWS IAM entity that has access to the Amazon S3 bucket, represented by `--aws-access-key-id` (CLI) or `aws_access_key_id` (Python).
+ - `AWS_SECRET_ACCESS_KEY` - The AWS secret access key for the AWS IAM entity that has access to the Amazon S3 bucket, represented by `--aws-secret-access-key` (CLI) or `aws_secret_access_key` (Python).
+
+- For LanceDB OSS with data storage in an Azure Blob Storage account:
+
+ - `LANCEDB_URI` - The URI for the target container within that Azure Blob Storage account and any target folder path within that container. Use the format `az://[/]`. This is represented by `--uri` (CLI) or `uri` (Python).
+ - `LANCEDB_TABLE` - The name of the target LanceDB table within the Azure Blob Storage account, represented by `--table-name` (CLI) or `table_name` (Python).
+ - `AZURE_STORAGE_ACCOUNT_NAME` - The name of the target Azure Blob Storage account, represented by `--azure-storage-account-name` (CLI) or `azure_storage_account_name` (Python).
+ - `AZURE_STORAGE_ACCOUNT_KEY` - The access key for the Azure Blob Storage account, represented by `--azure-storage-account-key` (CLI) or `azure_storage_account_key` (Python).
+
+- For LanceDB OSS with data storage in a Google Cloud Storage bucket:
+
+ - `LANCEDB_URI` - The URI for the target Google Cloud Storage bucket and any target folder path within that bucket. Use the format `gs://[/]`. This is represented by `--uri` (CLI) or `uri` (Python).
+ - `LANCEDB_TABLE` - The name of the target LanceDB table within the Google Cloud Storage bucket, represented by `--table-name` (CLI) or `table_name` (Python).
+ - `GCS_SERVICE_ACCOUNT_KEY` - A single-line string that contains the contents of the downloaded service account key file for the Google Cloud service account
+ that has access to the Google Cloud Storage bucket, represented by `--google-service-account-key` (CLI) or `google_service_account_key` (Python).
diff --git a/snippets/general-shared-text/lancedb.mdx b/snippets/general-shared-text/lancedb.mdx
new file mode 100644
index 00000000..395f30be
--- /dev/null
+++ b/snippets/general-shared-text/lancedb.mdx
@@ -0,0 +1,65 @@
+The LanceDB prerequisites:
+
+- A [LanceDB open source software (OSS) installation](https://lancedb.github.io/lancedb/basic/#installation) on a local machine, a server, or a virtual machine.
+ (LanceDB Cloud is not supported.)
+- For LanceDB OSS with local data storage:
+
+ - The local path to the folder where the LanceDB data is (or will be) stored.
+ See [Connect to a database](https://lancedb.github.io/lancedb/basic/#connect-to-a-database) in the LanceDB documentation.
+ - The name of the target [LanceDB table](https://lancedb.github.io/lancedb/basic/#create-an-empty-table) within the local data folder.
+
+- For LanceDB OSS with data storage in an Amazon S3 bucket:
+
+ - The URI for the target Amazon S3 bucket and any target folder path within that bucket. Use the format `s3://[/]`.
+ - The name of the target [LanceDB table](https://lancedb.github.io/lancedb/guides/storage/#object-stores) within the Amazon S3 bucket.
+ - The AWS access key ID and AWS secret access key for the AWS IAM entity that has access to the Amazon S3 bucket.
+
+ For more information, see [AWS S3](https://lancedb.github.io/lancedb/guides/storage/#aws-s3) in the LanceDB documentation, along with the following video:
+
+
+
+- For LanceDB OSS with data storage in an Azure Blob Storage account:
+
+ - The name of the target Azure Blob Storage account.
+ = The URI for the target container within that Azure Blob Storage account and any target folder path within that container. Use the format `az://[/]`.
+ - The name of the target [LanceDB table](https://lancedb.github.io/lancedb/guides/storage/#object-stores) within the Azure Blob Storage account.
+ - The access key for the Azure Blob Storage account.
+
+ For more information, see [Azure Blob Storage](https://lancedb.github.io/lancedb/guides/storage/#azure-blob-storage) in the LanceDB documentation, along with the following video:
+
+
+
+- For LanceDB OSS with data storage in a Google Cloud Storage bucket:
+
+ - The URI for the target Google Cloud Storage bucket and any target folder path within that bucket. Use the format `gs://[/]`.
+ - The name of the target [LanceDB table](https://lancedb.github.io/lancedb/guides/storage/#object-stores) within the Google Cloud Storage bucket.
+ - A single-line string that contains the contents of the downloaded service account key file for the Google Cloud service account that has access to the
+ Google Cloud Storage bucket.
+
+ For more information, see [Google Cloud Storage](https://lancedb.github.io/lancedb/guides/storage/#google-cloud-storage) in the LanceDB documentation, along with the following video:
+
+
\ No newline at end of file