Skip to content

Commit

Permalink
[DE-558] ArangoSearch column cache (#258)
Browse files Browse the repository at this point in the history
* Adding views test

* Adding extra inverted index arguments
  • Loading branch information
apetenchea committed Jul 14, 2023
1 parent 350c36b commit 4bb8c6c
Show file tree
Hide file tree
Showing 5 changed files with 100 additions and 10 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
main
----

* Added cache and primaryKeyCache parameters to the inverted index API.

* Added allow_retry query parameter, making it possible to retry fetching the latest batch from a cursor.

* Added OverloadControlDatabase, enabling the client to react effectively to potential server overloads.
Expand Down
35 changes: 25 additions & 10 deletions arango/collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -1342,6 +1342,8 @@ def add_inverted_index(
includeAllFields: Optional[bool] = None,
trackListPositions: Optional[bool] = None,
searchField: Optional[bool] = None,
primaryKeyCache: Optional[bool] = None,
cache: Optional[bool] = None,
) -> Result[Json]:
"""Create a new inverted index, introduced in version 3.10.
Expand All @@ -1351,22 +1353,31 @@ def add_inverted_index(
:type name: str | None
:param inBackground: Do not hold the collection lock.
:type inBackground: bool | None
:param parallelism:
:param parallelism: The number of threads to use for indexing the fields.
:type parallelism: int | None
:param primarySort:
:type primarySort: Json | None
:param storedValues:
:param primarySort: Primary sort order to enable an AQL optimization.
:type primarySort: Optional[Json]
:param storedValues: An array of objects with paths to additional
attributes to store in the index.
:type storedValues: Sequence[Json] | None
:param analyzer:
:type analyzer: str | None
:param features:
:param analyzer: Analyzer to use by default.
:type analyzer: Optional[str]
:param features: List of Analyzer features.
:type features: Sequence[str] | None
:param includeAllFields:
:param includeAllFields: This option only applies if you use the
inverted index in search-alias views.
:type includeAllFields: bool | None
:param trackListPositions:
:param trackListPositions: This option only applies if you use the
inverted index in search-alias views, and searchField is true.
:type trackListPositions: bool | None
:param searchField:
:param searchField: This option only applies if you use the inverted
index in search-alias views
:type searchField: bool | None
:param primaryKeyCache: Always cache the primary key column in memory.
:type primaryKeyCache: bool | None
:param cache: Always cache the field normalization values in memory
for all fields by default.
:type cache: bool | None
:return: New index details.
:rtype: dict
:raise arango.exceptions.IndexCreateError: If create fails.
Expand Down Expand Up @@ -1395,6 +1406,10 @@ def add_inverted_index(
data["searchField"] = searchField
if fields is not None:
data["fields"] = fields
if primaryKeyCache is not None:
data["primaryKeyCache"] = primaryKeyCache
if cache is not None:
data["cache"] = cache

return self._add_index(data)

Expand Down
40 changes: 40 additions & 0 deletions arango/formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,40 @@ def format_index(body: Json) -> Json:
result["legacyPolygons"] = body["legacyPolygons"]
if "estimates" in body:
result["estimates"] = body["estimates"]
if "analyzer" in body:
result["analyzer"] = body["analyzer"]
if "cleanupIntervalStep" in body:
result["cleanup_interval_step"] = body["cleanupIntervalStep"]
if "commitIntervalMsec" in body:
result["commit_interval_msec"] = body["commitIntervalMsec"]
if "consolidationIntervalMsec" in body:
result["consolidation_interval_msec"] = body["consolidationIntervalMsec"]
if "consolidationPolicy" in body:
result["consolidation_policy"] = format_view_consolidation_policy(
body["consolidationPolicy"]
)
if "features" in body:
result["features"] = body["features"]
if "includeAllFields" in body:
result["include_all_fields"] = body["includeAllFields"]
if "primarySort" in body:
result["primary_sort"] = body["primarySort"]
if "searchField" in body:
result["search_field"] = body["searchField"]
if "trackListPositions" in body:
result["track_list_positions"] = body["trackListPositions"]
if "version" in body:
result["version"] = body["version"]
if "cache" in body:
result["cache"] = body["cache"]
if "primaryKeyCache" in body:
result["primaryKeyCache"] = body["primaryKeyCache"]
if "writebufferIdle" in body:
result["writebuffer_idle"] = body["writebufferIdle"]
if "writebufferActive" in body:
result["writebuffer_active"] = body["writebufferActive"]
if "writebufferSizeMax" in body:
result["writebuffer_max_size"] = body["writebufferSizeMax"]

return verify_format(body, result)

Expand Down Expand Up @@ -902,6 +936,12 @@ def format_view(body: Json) -> Json:
if "indexes" in body:
result["indexes"] = body["indexes"]

# Introduced in 3.9.6 EE
if "primaryKeyCache" in body:
result["primaryKeyCache"] = body["primaryKeyCache"]
if "primarySortCache" in body:
result["primarySortCache"] = body["primarySortCache"]

# Introduced in 3.12 EE
if "optimizeTopK" in body:
result["optimizeTopK"] = body["optimizeTopK"]
Expand Down
31 changes: 31 additions & 0 deletions tests/test_index.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
import pytest
from packaging import version

from arango.exceptions import (
IndexCreateError,
IndexDeleteError,
Expand Down Expand Up @@ -197,6 +200,34 @@ def test_add_ttl_index(icol):
icol.delete_index(result["id"])


def test_add_inverted_index(icol, enterprise, db_version):
if db_version < version.parse("3.10.0"):
pytest.skip("Inverted indexes are not supported before 3.10.0")

parameters = dict(
fields=[{"name": "attr1", "cache": True}],
name="c0_cached",
storedValues=[{"fields": ["a"], "compression": "lz4", "cache": True}],
includeAllFields=True,
analyzer="identity",
primarySort={"cache": True, "fields": [{"field": "a", "direction": "asc"}]},
)
expected_keys = ["primary_sort", "analyzer", "include_all_fields", "search_field"]

if enterprise and db_version >= version.parse("3.10.2"):
parameters["cache"] = True
parameters["primaryKeyCache"] = True
expected_keys.extend(["cache", "primaryKeyCache"])

result = icol.add_inverted_index(**parameters)
assert result["id"] in extract("id", icol.indexes())

for key in expected_keys:
assert key in result

icol.delete_index(result["id"])


def test_delete_index(icol, bad_col):
old_indexes = set(extract("id", icol.indexes()))
icol.add_hash_index(["attr3", "attr4"], unique=True)
Expand Down
2 changes: 2 additions & 0 deletions tests/test_view.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,8 @@ def test_arangosearch_view_properties(db, col, enterprise, db_version):
}
)

if db_version >= version.parse("3.9.6"):
params.update({"primarySortCache": True, "primaryKeyCache": True})
if db_version >= version.parse("3.10.3"):
params.update({"storedValues": ["attr1", "attr2"]})

Expand Down

0 comments on commit 4bb8c6c

Please sign in to comment.