From d6859eda7af2c1b306456764853e073f90917410 Mon Sep 17 00:00:00 2001 From: Paul Cornell Date: Wed, 5 Mar 2025 09:25:36 -0800 Subject: [PATCH 1/2] Add partitioner_type metadata element to all fixed schema definitions --- snippets/general-shared-text/databricks-delta-table.mdx | 3 ++- snippets/general-shared-text/duckdb.mdx | 3 ++- snippets/general-shared-text/elasticsearch.mdx | 3 +++ snippets/general-shared-text/motherduck.mdx | 3 ++- snippets/general-shared-text/opensearch.mdx | 3 +++ snippets/general-shared-text/postgresql.mdx | 6 ++++-- snippets/general-shared-text/singlestore-schema.mdx | 3 ++- snippets/general-shared-text/snowflake.mdx | 3 ++- snippets/general-shared-text/sqlite.mdx | 3 ++- snippets/general-shared-text/weaviate.mdx | 4 ++++ 10 files changed, 26 insertions(+), 8 deletions(-) diff --git a/snippets/general-shared-text/databricks-delta-table.mdx b/snippets/general-shared-text/databricks-delta-table.mdx index 2a166f84..1eac668a 100644 --- a/snippets/general-shared-text/databricks-delta-table.mdx +++ b/snippets/general-shared-text/databricks-delta-table.mdx @@ -119,7 +119,8 @@ coordinates_points STRING, coordinates_system STRING, coordinates_layout_width FLOAT, - coordinates_layout_height FLOAT + coordinates_layout_height FLOAT, + partitioner_type STRING ); ``` diff --git a/snippets/general-shared-text/duckdb.mdx b/snippets/general-shared-text/duckdb.mdx index daf4620d..4e03962a 100644 --- a/snippets/general-shared-text/duckdb.mdx +++ b/snippets/general-shared-text/duckdb.mdx @@ -68,7 +68,8 @@ emphasized_text_tags VARCHAR[], text_as_html TEXT, regex_metadata TEXT, - detection_class_prob DECIMAL + detection_class_prob DECIMAL, + partitioner_type VARCHAR ); ``` diff --git a/snippets/general-shared-text/elasticsearch.mdx b/snippets/general-shared-text/elasticsearch.mdx index a42914d1..6a971a4e 100644 --- a/snippets/general-shared-text/elasticsearch.mdx +++ b/snippets/general-shared-text/elasticsearch.mdx @@ -72,6 +72,9 @@ }, "orig_elements": { "type": "text" + }, + "partitioner_type": { + "type": "text" } } } diff --git a/snippets/general-shared-text/motherduck.mdx b/snippets/general-shared-text/motherduck.mdx index 6330aee8..b1d65efc 100644 --- a/snippets/general-shared-text/motherduck.mdx +++ b/snippets/general-shared-text/motherduck.mdx @@ -84,7 +84,8 @@ allowfullscreen emphasized_text_tags VARCHAR[], text_as_html TEXT, regex_metadata TEXT, - detection_class_prob DECIMAL + detection_class_prob DECIMAL, + partitioner_type VARCHAR ); ``` diff --git a/snippets/general-shared-text/opensearch.mdx b/snippets/general-shared-text/opensearch.mdx index 40acabf1..ca82113d 100644 --- a/snippets/general-shared-text/opensearch.mdx +++ b/snippets/general-shared-text/opensearch.mdx @@ -75,6 +75,9 @@ }, "orig_elements": { "type": "text" + }, + "partitioner_type": { + "type": "text" } } } diff --git a/snippets/general-shared-text/postgresql.mdx b/snippets/general-shared-text/postgresql.mdx index 2ac123af..77557242 100644 --- a/snippets/general-shared-text/postgresql.mdx +++ b/snippets/general-shared-text/postgresql.mdx @@ -88,7 +88,8 @@ import AllowIPAddressRanges from '/snippets/general-shared-text/ip-address-range parent_id VARCHAR, page_number INTEGER, is_continuation BOOLEAN, - orig_elements TEXT + orig_elements TEXT, + partitioner_type VARCHAR ); ``` @@ -104,7 +105,8 @@ import AllowIPAddressRanges from '/snippets/general-shared-text/ip-address-range parent_id VARCHAR, page_number INTEGER, is_continuation BOOLEAN, - orig_elements TEXT + orig_elements TEXT, + partitioner_type VARCHAR ); ``` diff --git a/snippets/general-shared-text/singlestore-schema.mdx b/snippets/general-shared-text/singlestore-schema.mdx index 1cd8c04c..15c2a66e 100644 --- a/snippets/general-shared-text/singlestore-schema.mdx +++ b/snippets/general-shared-text/singlestore-schema.mdx @@ -45,7 +45,8 @@ CREATE TABLE elements ( parent_id TEXT, page_number TEXT, is_continuation BOOLEAN, - orig_elements TEXT + orig_elements TEXT, + partitioner_type TEXT ); ``` diff --git a/snippets/general-shared-text/snowflake.mdx b/snippets/general-shared-text/snowflake.mdx index d7f7248d..59b3c613 100644 --- a/snippets/general-shared-text/snowflake.mdx +++ b/snippets/general-shared-text/snowflake.mdx @@ -210,7 +210,8 @@ TEXT_AS_HTML VARCHAR, REGEX_METADATA VARCHAR, DETECTION_CLASS_PROB NUMBER, - PRIMARY KEY (ID) + PRIMARY KEY (ID), + PARTITIONER_TYPE VARCHAR ); ``` diff --git a/snippets/general-shared-text/sqlite.mdx b/snippets/general-shared-text/sqlite.mdx index 17909b75..e8932aad 100644 --- a/snippets/general-shared-text/sqlite.mdx +++ b/snippets/general-shared-text/sqlite.mdx @@ -20,7 +20,8 @@ parent_id TEXT, page_number INTEGER, is_continuation INTEGER, - orig_elements TEXT + orig_elements TEXT, + partitioner_type TEXT ); ``` diff --git a/snippets/general-shared-text/weaviate.mdx b/snippets/general-shared-text/weaviate.mdx index 0e44ae4a..d1d5c190 100644 --- a/snippets/general-shared-text/weaviate.mdx +++ b/snippets/general-shared-text/weaviate.mdx @@ -122,6 +122,10 @@ You can adapt the following collection schema example for your own specific sche { "name": "orig_elements", "dataType": ["text"] + }, + { + "name": "partitioner_type", + "dataType": ["text"] } ] } From 98b94a8a5ca02c87eaadef89290c61e7d7685b5e Mon Sep 17 00:00:00 2001 From: Paul Cornell Date: Wed, 5 Mar 2025 09:29:30 -0800 Subject: [PATCH 2/2] Missed partitioner_type for Azure AI Search --- .../general-shared-text/azure-ai-search.mdx | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/snippets/general-shared-text/azure-ai-search.mdx b/snippets/general-shared-text/azure-ai-search.mdx index 0ebb8b39..c89dbd07 100644 --- a/snippets/general-shared-text/azure-ai-search.mdx +++ b/snippets/general-shared-text/azure-ai-search.mdx @@ -871,6 +871,25 @@ Here are some more details about these requirements: "vectorSearchProfile": null, "vectorEncoding": null, "synonymMaps": [] + }, + { + "name": "partitioner_type", + "type": "Edm.String", + "searchable": false, + "filterable": true, + "retrievable": true, + "stored": true, + "sortable": true, + "facetable": true, + "key": false, + "indexAnalyzer": null, + "searchAnalyzer": null, + "analyzer": null, + "normalizer": null, + "dimensions": null, + "vectorSearchProfile": null, + "vectorEncoding": null, + "synonymMaps": [] } ] }