Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions api-reference/ingest/source-connectors/astradb.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,11 @@ import SharedAPIKeyURL from '/snippets/general-shared-text/api-key-url.mdx';
Now call the Unstructured CLI or Python SDK. The destination connector can be any of the ones supported. This example uses the local destination connector:

import AstraDBAPISh from '/snippets/source_connectors/astradb.sh.mdx';
import AstraDBAPIPyV2 from '/snippets/source_connectors/astradb.v2.py.mdx';
import AstraDBAPIPyV1 from '/snippets/source_connectors/astradb.v1.py.mdx';

<CodeGroup>

<AstraDBAPISh />

<AstraDBAPIPyV2 />
<AstraDBAPIPyV1 />

</CodeGroup>
5 changes: 2 additions & 3 deletions open-source/ingest/source-connectors/astradb.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,13 @@ Now call the Unstructured CLI or Python. The destination connector can be any of
This example sends data to Unstructured API services for processing by default. To process data locally instead, see the instructions at the end of this page.

import AstraDBSh from '/snippets/source_connectors/astradb.sh.mdx';
import AstraDBPyV2 from '/snippets/source_connectors/astradb.v2.py.mdx';
import AstraDBPyV1 from '/snippets/source_connectors/astradb.v1.py.mdx';

<CodeGroup>

<AstraDBSh />

<AstraDBPyV2 />
<AstraDBPyV1 />

</CodeGroup>

import SharedPartitionByAPIOSS from '/snippets/ingest-configuration-shared/partition-by-api-oss.mdx';
Expand Down
2 changes: 1 addition & 1 deletion snippets/destination_connectors/astradb.sh.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ unstructured-ingest \
astradb \
--api-endpoint $ASTRA_DB_API_ENDPOINT \
--token $ASTRA_DB_APPLICATION_TOKEN \
--namespace $ASTRA_DB_NAMESPACE \
--keyspace $ASTRA_DB_KEYSPACE \
--collection-name $ASTRA_DB_COLLECTION \
--embedding-dimension $ASTRA_DB_EMBEDDING_DIMENSIONS
```
2 changes: 1 addition & 1 deletion snippets/destination_connectors/astradb.v1.py.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def get_writer() -> Writer:
api_endpoint=os.getenv("ASTRA_DB_API_ENDPOINT"),
token=os.getenv("ASTRA_DB_APPLICATION_TOKEN"),
),
namespace=os.getenv("ASTRA_DB_NAMESPACE"),
keyspace=os.getenv("ASTRA_DB_KEYSPACE"),
collection_name=os.getenv("ASTRA_DB_COLLECTION"),
embedding_dimension=os.getenv("ASTRA_DB_EMBEDDING_DIMENSIONS"),
),
Expand Down
2 changes: 1 addition & 1 deletion snippets/destination_connectors/astradb.v2.py.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ if __name__ == "__main__":
),
stager_config=AstraDBUploadStagerConfig(),
uploader_config=AstraDBUploaderConfig(
namespace=os.getenv("ASTRA_DB_NAMESPACE"),
keyspace=os.getenv("ASTRA_DB_KEYSPACE"),
collection_name=os.getenv("ASTRA_DB_COLLECTION"),
embedding_dimension=os.getenv("ASTRA_DB_EMBEDDING_DIMENSIONS")
)
Expand Down
4 changes: 2 additions & 2 deletions snippets/general-shared-text/astradb-cli-api.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,6 @@ These environment variables:

- `ASTRA_DB_API_ENDPOINT` - The API endpoint for the Astra DB database, represented by `--api-endpoint` (CLI) or `api_endpoint` (Python). To get the endpoint, see the **Database Details > API Endpoint** value on your database's **Overview** tab.
- `ASTRA_DB_APPLICATION_TOKEN` - The database application token value for the database, represented by `--token` (CLI) or `token` (Python). To get the token, see the **Database Details > Application Tokens** box on your database's **Overview** tab.
- `ASTRA_DB_NAMESPACE` - The name of the namespace for the database, represented by `--namespace` (CLI) or `namespace` (Python).
- `ASTRA_DB_COLLECTION` - The name of the collection for the namespace, represented by `--collection-name` (CLI) or `collection_name` (Python).
- `ASTRA_DB_KEYSPACE` - The name of the keyspace for the database, represented by `--keyspace` (CLI) or `keyspace` (Python).
- `ASTRA_DB_COLLECTION` - The name of the collection for the keyspace, represented by `--collection-name` (CLI) or `collection_name` (Python).
- `ASTRA_DB_EMBEDDING_DIMENSIONS` - The number of dimensions in the collection, represented by `--embedding-dimension` (CLI) or `embedding_dimension` (Python).
2 changes: 1 addition & 1 deletion snippets/source_connectors/astradb.sh.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ unstructured-ingest \
astradb \
--api-endpoint $ASTRA_DB_API_ENDPOINT \
--token $ASTRA_DB_APPLICATION_TOKEN \
--namespace $ASTRA_DB_NAMESPACE \
--keyspace $ASTRA_DB_KEYSPACE \
--collection-name $ASTRA_DB_COLLECTION \
--download-dir $LOCAL_FILE_DOWNLOAD_DIR \
--partition-by-api \
Expand Down
2 changes: 1 addition & 1 deletion snippets/source_connectors/astradb.v1.py.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ if __name__ == "__main__":
token=os.getenv("ASTRA_DB_APPLICATION_TOKEN"),
api_endpoint=os.getenv("ASTRA_DB_API_ENDPOINT")
),
namespace=os.getenv("ASTRA_DB_NAMESPACE"),
keyspace=os.getenv("ASTRA_DB_KEYSPACE"),
collection_name=os.getenv("ASTRA_DB_COLLECTION")
)
).run()
Expand Down
41 changes: 41 additions & 0 deletions snippets/source_connectors/astradb.v2.py.mdx
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
```python Python Ingest v2
import os

from unstructured_ingest.v2.pipeline.pipeline import Pipeline
from unstructured_ingest.v2.interfaces import ProcessorConfig
from unstructured_ingest.v2.processes.connectors.astradb import (
AstraDBAccessConfig,
AstraDBConnectionConfig,
AstraDBDownloaderConfig,
AstraDBIndexerConfig,
)
from unstructured_ingest.v2.processes.partitioner import PartitionerConfig
from unstructured_ingest.v2.processes.connectors.local import LocalUploaderConfig

# Chunking and embedding are optional.

if __name__ == "__main__":
Pipeline.from_configs(
context=ProcessorConfig(),
indexer_config=AstraDBIndexerConfig(
collection_name=os.getenv("ASTRA_DB_COLLECTION"),
keyspace=os.getenv("ASTRA_DB_KEYSPACE"),
),
downloader_config=AstraDBDownloaderConfig(
collection_name=os.getenv("ASTRA_DB_COLLECTION"),
keyspace=os.getenv("ASTRA_DB_KEYSPACE"),
),
source_connection_config=AstraDBConnectionConfig(
access_config=AstraDBAccessConfig(
token=os.getenv("ASTRA_DB_APPLICATION_TOKEN"),
api_endpoint=os.getenv("ASTRA_DB_API_ENDPOINT"),
),
),
partitioner_config=PartitionerConfig(
partition_by_api=True,
partition_endpoint=os.getenv("UNSTRUCTURED_API_URL"),
api_key=os.getenv("UNSTRUCTURED_API_KEY"),
),
uploader_config=LocalUploaderConfig(output_dir=os.getenv("LOCAL_FILE_OUTPUT_DIR")),
).run()
```