From 7af544e9c0b65d88411165e420d87ee3c6137e39 Mon Sep 17 00:00:00 2001 From: Paul Cornell Date: Tue, 7 Jan 2025 15:16:12 -0800 Subject: [PATCH 1/2] Ingest v2: DuckDB and MotherDuck destination connectors --- .../ingest/destination-connector/duckdb.mdx | 24 ++++++ .../destination-connector/motherduck.mdx | 24 ++++++ mint.json | 4 + .../ingest/destination-connectors/duckdb.mdx | 27 ++++++ .../destination-connectors/motherduck.mdx | 27 ++++++ snippets/dc-shared-text/duckdb-cli-api.mdx | 9 ++ .../dc-shared-text/motherduck-cli-api.mdx | 9 ++ snippets/destination_connectors/duckdb.sh.mdx | 19 ++++ .../destination_connectors/duckdb.v2.py.mdx | 51 +++++++++++ .../destination_connectors/motherduck.sh.mdx | 20 +++++ .../motherduck.v2.py.mdx | 51 +++++++++++ .../general-shared-text/duckdb-cli-api.mdx | 15 ++++ snippets/general-shared-text/duckdb.mdx | 80 +++++++++++++++++ .../motherduck-cli-api.mdx | 16 ++++ snippets/general-shared-text/motherduck.mdx | 86 +++++++++++++++++++ 15 files changed, 462 insertions(+) create mode 100644 api-reference/ingest/destination-connector/duckdb.mdx create mode 100644 api-reference/ingest/destination-connector/motherduck.mdx create mode 100644 open-source/ingest/destination-connectors/duckdb.mdx create mode 100644 open-source/ingest/destination-connectors/motherduck.mdx create mode 100644 snippets/dc-shared-text/duckdb-cli-api.mdx create mode 100644 snippets/dc-shared-text/motherduck-cli-api.mdx create mode 100644 snippets/destination_connectors/duckdb.sh.mdx create mode 100644 snippets/destination_connectors/duckdb.v2.py.mdx create mode 100644 snippets/destination_connectors/motherduck.sh.mdx create mode 100644 snippets/destination_connectors/motherduck.v2.py.mdx create mode 100644 snippets/general-shared-text/duckdb-cli-api.mdx create mode 100644 snippets/general-shared-text/duckdb.mdx create mode 100644 snippets/general-shared-text/motherduck-cli-api.mdx create mode 100644 snippets/general-shared-text/motherduck.mdx diff --git a/api-reference/ingest/destination-connector/duckdb.mdx b/api-reference/ingest/destination-connector/duckdb.mdx new file mode 100644 index 00000000..897beda0 --- /dev/null +++ b/api-reference/ingest/destination-connector/duckdb.mdx @@ -0,0 +1,24 @@ +--- +title: DuckDB +--- + +import NewDocument from '/snippets/general-shared-text/new-document.mdx'; + + + +import SharedContentDuckDB from '/snippets/dc-shared-text/duckdb-cli-api.mdx'; +import SharedAPIKeyURL from '/snippets/general-shared-text/api-key-url.mdx'; + + + + +Now call the Unstructured CLI or Python SDK. The source connector can be any of the ones supported. This example uses the local source connector: + +import DuckDBAPISh from '/snippets/destination_connectors/duckdb.sh.mdx'; +import DuckDBAPIPyV2 from '/snippets/destination_connectors/duckdb.v2.py.mdx'; + + + + + + diff --git a/api-reference/ingest/destination-connector/motherduck.mdx b/api-reference/ingest/destination-connector/motherduck.mdx new file mode 100644 index 00000000..997f889f --- /dev/null +++ b/api-reference/ingest/destination-connector/motherduck.mdx @@ -0,0 +1,24 @@ +--- +title: MotherDuck +--- + +import NewDocument from '/snippets/general-shared-text/new-document.mdx'; + + + +import SharedContentMotherDuck from '/snippets/dc-shared-text/motherduck-cli-api.mdx'; +import SharedAPIKeyURL from '/snippets/general-shared-text/api-key-url.mdx'; + + + + +Now call the Unstructured CLI or Python SDK. The source connector can be any of the ones supported. This example uses the local source connector: + +import MotherDuckAPISh from '/snippets/destination_connectors/motherduck.sh.mdx'; +import MotherDuckAPIPyV2 from '/snippets/destination_connectors/motherduck.v2.py.mdx'; + + + + + + diff --git a/mint.json b/mint.json index 0e633107..51d3789c 100644 --- a/mint.json +++ b/mint.json @@ -213,6 +213,7 @@ "open-source/ingest/destination-connectors/databricks-volumes", "open-source/ingest/destination-connectors/delta-table", "open-source/ingest/destination-connectors/dropbox", + "open-source/ingest/destination-connectors/duckdb", "open-source/ingest/destination-connectors/elasticsearch", "open-source/ingest/destination-connectors/google-cloud-service", "open-source/ingest/destination-connectors/kafka", @@ -221,6 +222,7 @@ "open-source/ingest/destination-connectors/local", "open-source/ingest/destination-connectors/milvus", "open-source/ingest/destination-connectors/mongodb", + "open-source/ingest/destination-connectors/motherduck", "open-source/ingest/destination-connectors/onedrive", "open-source/ingest/destination-connectors/opensearch", "open-source/ingest/destination-connectors/pinecone", @@ -372,6 +374,7 @@ "api-reference/ingest/destination-connector/databricks-volumes", "api-reference/ingest/destination-connector/delta-table", "api-reference/ingest/destination-connector/dropbox", + "api-reference/ingest/destination-connector/duckdb", "api-reference/ingest/destination-connector/elasticsearch", "api-reference/ingest/destination-connector/google-cloud-service", "api-reference/ingest/destination-connector/kafka", @@ -380,6 +383,7 @@ "api-reference/ingest/destination-connector/local", "api-reference/ingest/destination-connector/milvus", "api-reference/ingest/destination-connector/mongodb", + "api-reference/ingest/destination-connector/motherduck", "api-reference/ingest/destination-connector/onedrive", "api-reference/ingest/destination-connector/opensearch", "api-reference/ingest/destination-connector/pinecone", diff --git a/open-source/ingest/destination-connectors/duckdb.mdx b/open-source/ingest/destination-connectors/duckdb.mdx new file mode 100644 index 00000000..c05d53cb --- /dev/null +++ b/open-source/ingest/destination-connectors/duckdb.mdx @@ -0,0 +1,27 @@ +--- +title: DuckDB +--- + +import NewDocument from '/snippets/general-shared-text/new-document.mdx'; + + + +import SharedDuckDB from '/snippets/dc-shared-text/duckdb-cli-api.mdx'; + + + +Now call the Unstructured CLI or Python. The source connector can be any of the ones supported. This example uses the local source connector. + +This example sends files to Unstructured API services for processing by default. To process files locally instead, see the instructions at the end of this page. + +import DuckDBAPISh from '/snippets/destination_connectors/duckdb.sh.mdx'; +import DuckDBAPIPyV2 from '/snippets/destination_connectors/duckdb.v2.py.mdx'; + + + + + + +import SharedPartitionByAPIOSS from '/snippets/ingest-configuration-shared/partition-by-api-oss.mdx'; + + diff --git a/open-source/ingest/destination-connectors/motherduck.mdx b/open-source/ingest/destination-connectors/motherduck.mdx new file mode 100644 index 00000000..d562121a --- /dev/null +++ b/open-source/ingest/destination-connectors/motherduck.mdx @@ -0,0 +1,27 @@ +--- +title: MotherDuck +--- + +import NewDocument from '/snippets/general-shared-text/new-document.mdx'; + + + +import SharedMotherDuck from '/snippets/dc-shared-text/motherduck-cli-api.mdx'; + + + +Now call the Unstructured CLI or Python. The source connector can be any of the ones supported. This example uses the local source connector. + +This example sends files to Unstructured API services for processing by default. To process files locally instead, see the instructions at the end of this page. + +import MotherDuckAPISh from '/snippets/destination_connectors/motherduck.sh.mdx'; +import MotherDuckAPIPyV2 from '/snippets/destination_connectors/motherduck.v2.py.mdx'; + + + + + + +import SharedPartitionByAPIOSS from '/snippets/ingest-configuration-shared/partition-by-api-oss.mdx'; + + diff --git a/snippets/dc-shared-text/duckdb-cli-api.mdx b/snippets/dc-shared-text/duckdb-cli-api.mdx new file mode 100644 index 00000000..5a3c49bc --- /dev/null +++ b/snippets/dc-shared-text/duckdb-cli-api.mdx @@ -0,0 +1,9 @@ +Batch process all your records to store structured outputs in a DuckDB installation. + +The requirements are as follows. + +import SharedDuckDB from '/snippets/general-shared-text/duckdb.mdx'; +import SharedDuckDBCLIAPI from '/snippets/general-shared-text/duckdb-cli-api.mdx'; + + + diff --git a/snippets/dc-shared-text/motherduck-cli-api.mdx b/snippets/dc-shared-text/motherduck-cli-api.mdx new file mode 100644 index 00000000..f2676580 --- /dev/null +++ b/snippets/dc-shared-text/motherduck-cli-api.mdx @@ -0,0 +1,9 @@ +Batch process all your records to store structured outputs in a MotherDuck account. + +The requirements are as follows. + +import SharedMotherDuck from '/snippets/general-shared-text/motherduck.mdx'; +import SharedMotherDuckCLIAPI from '/snippets/general-shared-text/motherduck-cli-api.mdx'; + + + diff --git a/snippets/destination_connectors/duckdb.sh.mdx b/snippets/destination_connectors/duckdb.sh.mdx new file mode 100644 index 00000000..365817fd --- /dev/null +++ b/snippets/destination_connectors/duckdb.sh.mdx @@ -0,0 +1,19 @@ +```bash CLI +#!/usr/bin/env bash + +# Chunking and embedding are optional. + +unstructured-ingest \ + local \ + --input-path $LOCAL_FILE_INPUT_DIR \ + --chunking-strategy by_title \ + --embedding-provider huggingface \ + --partition-by-api \ + --api-key $UNSTRUCTURED_API_KEY \ + --partition-endpoint $UNSTRUCTURED_API_URL \ + --additional-partition-args="{\"split_pdf_page\":\"true\", \"split_pdf_allow_failed\":\"true\", \"split_pdf_concurrency_level\": 15}" \ + duckdb \ + --database $DUCKDB_DATABASE \ + --db-schema $DUCKDB_DB_SCHEMA \ + --table $DUCKDB_TABLE +``` \ No newline at end of file diff --git a/snippets/destination_connectors/duckdb.v2.py.mdx b/snippets/destination_connectors/duckdb.v2.py.mdx new file mode 100644 index 00000000..e582a6a1 --- /dev/null +++ b/snippets/destination_connectors/duckdb.v2.py.mdx @@ -0,0 +1,51 @@ +```python Python Ingest v2 +import os + +from unstructured_ingest.v2.pipeline.pipeline import Pipeline +from unstructured_ingest.v2.interfaces import ProcessorConfig + +from unstructured_ingest.v2.processes.connectors.duckdb.duckdb import ( + DuckDBAccessConfig, + DuckDBConnectionConfig, + DuckDBUploadStagerConfig, + DuckDBUploaderConfig +) +from unstructured_ingest.v2.processes.connectors.local import ( + LocalIndexerConfig, + LocalConnectionConfig, + LocalDownloaderConfig +) +from unstructured_ingest.v2.processes.partitioner import PartitionerConfig +from unstructured_ingest.v2.processes.chunker import ChunkerConfig +from unstructured_ingest.v2.processes.embedder import EmbedderConfig + +# Chunking and embedding are optional. + +if __name__ == "__main__": + Pipeline.from_configs( + context=ProcessorConfig(), + indexer_config=LocalIndexerConfig(input_path=os.getenv("LOCAL_FILE_INPUT_DIR")), + downloader_config=LocalDownloaderConfig(), + source_connection_config=LocalConnectionConfig(), + partitioner_config=PartitionerConfig( + partition_by_api=True, + api_key=os.getenv("UNSTRUCTURED_API_KEY"), + partition_endpoint=os.getenv("UNSTRUCTURED_API_URL"), + additional_partition_args={ + "split_pdf_page": True, + "split_pdf_allow_failed": True, + "split_pdf_concurrency_level": 15 + } + ), + chunker_config=ChunkerConfig(chunking_strategy="by_title"), + embedder_config=EmbedderConfig(embedding_provider="huggingface"), + destination_connection_config=DuckDBConnectionConfig( + access_config=DuckDBAccessConfig(), + database=os.getenv("DUCKDB_DATABASE"), + db_schema=os.getenv("DUCKDB_DB_SCHEMA"), + table=os.getenv("DUCKDB_TABLE") + ), + stager_config=DuckDBUploadStagerConfig(), + uploader_config=DuckDBUploaderConfig(batch_size=50) + ).run() +``` \ No newline at end of file diff --git a/snippets/destination_connectors/motherduck.sh.mdx b/snippets/destination_connectors/motherduck.sh.mdx new file mode 100644 index 00000000..638bb2b9 --- /dev/null +++ b/snippets/destination_connectors/motherduck.sh.mdx @@ -0,0 +1,20 @@ +```bash CLI +#!/usr/bin/env bash + +# Chunking and embedding are optional. + +unstructured-ingest \ + local \ + --input-path $LOCAL_FILE_INPUT_DIR \ + --chunking-strategy by_title \ + --embedding-provider huggingface \ + --partition-by-api \ + --api-key $UNSTRUCTURED_API_KEY \ + --partition-endpoint $UNSTRUCTURED_API_URL \ + --additional-partition-args="{\"split_pdf_page\":\"true\", \"split_pdf_allow_failed\":\"true\", \"split_pdf_concurrency_level\": 15}" \ + motherduck \ + --md-token $MOTHERDUCK_MD_TOKEN \ + --database $MOTHERDUCK_DATABASE \ + --db-schema $MOTHERDUCK_DB_SCHEMA \ + --table $MOTHERDUCK_TABLE +``` \ No newline at end of file diff --git a/snippets/destination_connectors/motherduck.v2.py.mdx b/snippets/destination_connectors/motherduck.v2.py.mdx new file mode 100644 index 00000000..5e657b14 --- /dev/null +++ b/snippets/destination_connectors/motherduck.v2.py.mdx @@ -0,0 +1,51 @@ +```python Python Ingest v2 +import os + +from unstructured_ingest.v2.pipeline.pipeline import Pipeline +from unstructured_ingest.v2.interfaces import ProcessorConfig + +from unstructured_ingest.v2.processes.connectors.duckdb.motherduck import ( + MotherDuckAccessConfig, + MotherDuckConnectionConfig, + MotherDuckUploadStagerConfig, + MotherDuckUploaderConfig +) +from unstructured_ingest.v2.processes.connectors.local import ( + LocalIndexerConfig, + LocalConnectionConfig, + LocalDownloaderConfig +) +from unstructured_ingest.v2.processes.partitioner import PartitionerConfig +from unstructured_ingest.v2.processes.chunker import ChunkerConfig +from unstructured_ingest.v2.processes.embedder import EmbedderConfig + +# Chunking and embedding are optional. + +if __name__ == "__main__": + Pipeline.from_configs( + context=ProcessorConfig(), + indexer_config=LocalIndexerConfig(input_path=os.getenv("LOCAL_FILE_INPUT_DIR")), + downloader_config=LocalDownloaderConfig(), + source_connection_config=LocalConnectionConfig(), + partitioner_config=PartitionerConfig( + partition_by_api=True, + api_key=os.getenv("UNSTRUCTURED_API_KEY"), + partition_endpoint=os.getenv("UNSTRUCTURED_API_URL"), + additional_partition_args={ + "split_pdf_page": True, + "split_pdf_allow_failed": True, + "split_pdf_concurrency_level": 15 + } + ), + chunker_config=ChunkerConfig(chunking_strategy="by_title"), + embedder_config=EmbedderConfig(embedding_provider="huggingface"), + destination_connection_config=MotherDuckConnectionConfig( + access_config=MotherDuckAccessConfig(md_token=os.getenv("MOTHERDUCK_MD_TOKEN")), + database=os.getenv("MOTHERDUCK_DATABASE"), + db_schema=os.getenv("MOTHERDUCK_DB_SCHEMA"), + table=os.getenv("MOTHERDUCK_TABLE") + ), + stager_config=MotherDuckUploadStagerConfig(), + uploader_config=MotherDuckUploaderConfig(batch_size=50) + ).run() +``` \ No newline at end of file diff --git a/snippets/general-shared-text/duckdb-cli-api.mdx b/snippets/general-shared-text/duckdb-cli-api.mdx new file mode 100644 index 00000000..7fbf71f9 --- /dev/null +++ b/snippets/general-shared-text/duckdb-cli-api.mdx @@ -0,0 +1,15 @@ +The DuckDB connector dependencies: + +```bash CLI, Python +pip install "unstructured-ingest[duckdb]" +``` + +import AdditionalIngestDependencies from '/snippets/general-shared-text/ingest-dependencies.mdx'; + + + +The following environment variables: + +- `DUCKDB_DATABASE` - The path to the target DuckDB persistent database file with the extension `.db` or `.duckdb`, represented by `--database` (CLI) or `database` (Python). +- `DUCKDB_DB_SCHEMA` - The name of the target schema in the database, represented by `--db-schema` (CLI) or `db_schema` (Python). +- `DUCKDB_TABLE` - The name of the target table in the schema, represented by `--table` (CLI) or `table` (Python). \ No newline at end of file diff --git a/snippets/general-shared-text/duckdb.mdx b/snippets/general-shared-text/duckdb.mdx new file mode 100644 index 00000000..daf4620d --- /dev/null +++ b/snippets/general-shared-text/duckdb.mdx @@ -0,0 +1,80 @@ +- A [DuckDB installation](https://duckdb.org/docs/installation). +- A [persistent database](https://duckdb.org/docs/connect/overview.html#persistent-database), for example by running the + [DuckDB CLI](https://duckdb.org/docs/api/cli) command `duckdb .db` or + `duckdb .duckdb`, replacing `` with the name of the target file. +- The path to the target persistent database file. +- A schema in the target database. + + - [Create a schema](https://duckdb.org/docs/sql/statements/create_schema.html). + - You can list available schemas and their parent catalogs by running the following DuckDB CLI command: + + ```sql + SELECT * FROM information_schema.schemata; + ``` + + The DuckDB connector uses the default schema name of `main` if not otherwise specified. + +- A table in the target schema. + + - [Create a table](https://duckdb.org/docs/sql/statements/create_table). + - You can list available tables in a schema by running the following DuckDB CLI commands, replacing the target catalog and schema names: + + ```sql + USE .; + SHOW TABLES; + ``` + + The DuckDB connector uses the default table name of `elements` if not otherwise specified. + + For maximum compatibility, Unstructured recommends the following table schema: + + ```sql + CREATE TABLE elements ( + id VARCHAR, + element_id VARCHAR, + text TEXT, + embeddings FLOAT[], + type VARCHAR, + system VARCHAR, + layout_width DECIMAL, + layout_height DECIMAL, + points TEXT, + url TEXT, + version VARCHAR, + date_created INTEGER, + date_modified INTEGER, + date_processed DOUBLE, + permissions_data TEXT, + record_locator TEXT, + category_depth INTEGER, + parent_id VARCHAR, + attached_filename VARCHAR, + filetype VARCHAR, + last_modified TIMESTAMP, + file_directory VARCHAR, + filename VARCHAR, + languages VARCHAR[], + page_number VARCHAR, + links TEXT, + page_name VARCHAR, + link_urls VARCHAR[], + link_texts VARCHAR[], + sent_from VARCHAR[], + sent_to VARCHAR[], + subject VARCHAR, + section VARCHAR, + header_footer_type VARCHAR, + emphasized_text_contents VARCHAR[], + emphasized_text_tags VARCHAR[], + text_as_html TEXT, + regex_metadata TEXT, + detection_class_prob DECIMAL + ); + ``` + + You can list the schema of a table by running the following DuckDB CLI commands, replacing the target catalog, schema, and table names: + + ```sql + USE .; + DESCRIBE TABLE ; + ``` \ No newline at end of file diff --git a/snippets/general-shared-text/motherduck-cli-api.mdx b/snippets/general-shared-text/motherduck-cli-api.mdx new file mode 100644 index 00000000..5f99dd75 --- /dev/null +++ b/snippets/general-shared-text/motherduck-cli-api.mdx @@ -0,0 +1,16 @@ +The MotherDuck connector dependencies: + +```bash CLI, Python +pip install "unstructured-ingest[duckdb]" +``` + +import AdditionalIngestDependencies from '/snippets/general-shared-text/ingest-dependencies.mdx'; + + + +The following environment variables: + +- `MOTHERDUCK_MD_TOKEN` - The access token for the target MotherDuck account, represented by `--md-token` (CLI) or `md_token` (Python). +- `MOTHERDUCK_DATABASE` - The name of the target database in the account, represented by `--database` (CLI) or `database` (Python). +- `MOTHERDUCK_DB_SCHEMA` - The name of the target schema in the database, represented by `--db-schema` (CLI) or `db_schema` (Python). +- `MOTHERDUCK_TABLE` - The name of the target table in the schema, represented by `--table` (CLI) or `table` (Python). \ No newline at end of file diff --git a/snippets/general-shared-text/motherduck.mdx b/snippets/general-shared-text/motherduck.mdx new file mode 100644 index 00000000..ae844314 --- /dev/null +++ b/snippets/general-shared-text/motherduck.mdx @@ -0,0 +1,86 @@ +- A [MotherDuck account](https://app.motherduck.com). +- A [MotherDuck access token](https://motherduck.com/docs/key-tasks/authenticating-and-connecting-to-motherduck/authenticating-to-motherduck/#creating-an-access-token) for the account. +- A database in the account. + + - [Create a database](https://motherduck.com/docs/sql-reference/motherduck-sql-reference/create-database/). + - [List available databases](https://motherduck.com/docs/key-tasks/database-operations/basics-operations/#listing-databases). + + You can run commands to manage MotherDuck databases, schemas, tables, and more in the + [MotherDuck UI](https://motherduck.com/docs/getting-started/motherduck-quick-tour/) or for example by connecting to MotherDuck with the + [DuckDB CLI](https://motherduck.com/docs/key-tasks/authenticating-and-connecting-to-motherduck/connecting-to-motherduck/). + +- A schema in the target database. + + - [Create a schema](https://duckdb.org/docs/sql/statements/create_schema.html). + - You can list available schemas and their parent catalogs by running the following command in the MotherDuck UI or the DuckDB CLI: + + ```sql + SELECT * FROM information_schema.schemata; + ``` + + The MotherDuck connector uses the default schema name of `main` if not otherwise specified. + +- A table in the target schema. + + - [Create a table](https://duckdb.org/docs/sql/statements/create_table). + - You can list available tables in a schema by running the following commands in the MotherDuck UI or the DuckDB CLI, replacing the target catalog and schema names: + + ```sql + USE .; + SHOW TABLES; + ``` + + The MotherDuck connector uses the default table name of `elements` if not otherwise specified. + + For maximum compatibility, Unstructured recommends the following table schema: + + ```sql + CREATE TABLE elements ( + id VARCHAR, + element_id VARCHAR, + text TEXT, + embeddings FLOAT[], + type VARCHAR, + system VARCHAR, + layout_width DECIMAL, + layout_height DECIMAL, + points TEXT, + url TEXT, + version VARCHAR, + date_created INTEGER, + date_modified INTEGER, + date_processed DOUBLE, + permissions_data TEXT, + record_locator TEXT, + category_depth INTEGER, + parent_id VARCHAR, + attached_filename VARCHAR, + filetype VARCHAR, + last_modified TIMESTAMP, + file_directory VARCHAR, + filename VARCHAR, + languages VARCHAR[], + page_number VARCHAR, + links TEXT, + page_name VARCHAR, + link_urls VARCHAR[], + link_texts VARCHAR[], + sent_from VARCHAR[], + sent_to VARCHAR[], + subject VARCHAR, + section VARCHAR, + header_footer_type VARCHAR, + emphasized_text_contents VARCHAR[], + emphasized_text_tags VARCHAR[], + text_as_html TEXT, + regex_metadata TEXT, + detection_class_prob DECIMAL + ); + ``` + + You can list the schema of a table by running the following commands in the MotherDuck UI or the DuckDB CLI, replacing the target catalog, schema, and table names: + + ```sql + USE .; + DESCRIBE TABLE ; + ``` \ No newline at end of file From ae114a87ccddaec5fd43edeca825ac8ee66a663b Mon Sep 17 00:00:00 2001 From: Paul Cornell Date: Tue, 7 Jan 2025 15:22:42 -0800 Subject: [PATCH 2/2] Add DuckDB and MotherDuck install dependencies to list --- api-reference/ingest/ingest-dependencies.mdx | 1 + 1 file changed, 1 insertion(+) diff --git a/api-reference/ingest/ingest-dependencies.mdx b/api-reference/ingest/ingest-dependencies.mdx index de63557e..fd4841c9 100644 --- a/api-reference/ingest/ingest-dependencies.mdx +++ b/api-reference/ingest/ingest-dependencies.mdx @@ -60,6 +60,7 @@ To add support for additional connectors, run the following: | `pip install "unstructured-ingest[delta-table]"` | Delta Tables | | `pip install "unstructured-ingest[discord]"` | Discord | | `pip install "unstructured-ingest[dropbox]"` | Dropbox | +| `pip install "unstructured-ingest[dropbox]"` | DuckDB, MotherDuck | | `pip install "unstructured-ingest[elasticsearch]"` | Elasticsearch | | `pip install "unstructured-ingest[gcs]"` | Google Cloud Storage | | `pip install "unstructured-ingest[github]"` | GitHub |