From a12410239b865d563fb4c5f118aa5fb5b8fe8942 Mon Sep 17 00:00:00 2001 From: bkis Date: Fri, 7 Mar 2025 17:12:13 +0100 Subject: [PATCH 1/3] Implement server-side part of quick search strategies --- Tekst-API/openapi.json | 15 +- Tekst-API/tekst/models/search.py | 8 + Tekst-API/tekst/resources/__init__.py | 6 + Tekst-API/tekst/routers/search.py | 2 +- Tekst-API/tekst/search/__init__.py | 181 +++++++++++++----- Tekst-API/tekst/search/templates.py | 2 +- Tekst-API/tekst/search/utils.py | 109 +++++++++++ Tekst-Web/i18n/ui/deDE.yml | 1 + Tekst-Web/i18n/ui/enUS.yml | 1 + .../src/components/search/SearchResult.vue | 94 ++++----- Tekst-Web/src/views/SearchResultsView.vue | 4 +- Tekst-Web/src/views/SearchView.vue | 8 +- 12 files changed, 327 insertions(+), 104 deletions(-) diff --git a/Tekst-API/openapi.json b/Tekst-API/openapi.json index f5ea69940..0cc6df5a2 100644 --- a/Tekst-API/openapi.json +++ b/Tekst-API/openapi.json @@ -14236,7 +14236,8 @@ "description": "Quick search settings", "default": { "op": "OR", - "re": false + "re": false, + "strtg": "native" }, "optionalNullable": true } @@ -14283,6 +14284,18 @@ "title": "Txt", "description": "IDs of texts to search in", "optionalNullable": true + }, + "strtg": { + "type": "string", + "enum": [ + "native", + "defaultLevel", + "both" + ], + "title": "Strtg", + "description": "Quick Search strategy", + "default": "native", + "optionalNullable": true } }, "type": "object", diff --git a/Tekst-API/tekst/models/search.py b/Tekst-API/tekst/models/search.py index 1c22320bd..10edf580f 100644 --- a/Tekst-API/tekst/models/search.py +++ b/Tekst-API/tekst/models/search.py @@ -164,6 +164,14 @@ class QuickSearchSettings(ModelBase): ), SchemaOptionalNullable, ] = None + strategy: Annotated[ + Literal["native", "defaultLevel", "both"], + Field( + alias="strtg", + description="Quick Search strategy", + ), + SchemaOptionalNullable, + ] = "native" @field_validator("default_operator", mode="before") @classmethod diff --git a/Tekst-API/tekst/resources/__init__.py b/Tekst-API/tekst/resources/__init__.py index fe21d66fb..c1eaf2d01 100644 --- a/Tekst-API/tekst/resources/__init__.py +++ b/Tekst-API/tekst/resources/__init__.py @@ -164,6 +164,9 @@ def index_mappings( strict_analyzer=strict_analyzer, ) return dict( + native={ + "type": "boolean", + }, comment={ "type": "text", "analyzer": "standard_no_diacritics", @@ -176,11 +179,14 @@ def index_mappings( def index_doc( cls, content: ContentBase, + *, + native: bool = True, ) -> dict[str, Any]: """ Returns the content for the ES index document for this type of resource content """ return dict( + native=native, comment=content.comment, **(cls._rtype_index_doc(content) or {}), ) diff --git a/Tekst-API/tekst/routers/search.py b/Tekst-API/tekst/routers/search.py index a1b9c0ff5..0af1d87be 100644 --- a/Tekst-API/tekst/routers/search.py +++ b/Tekst-API/tekst/routers/search.py @@ -57,7 +57,7 @@ async def perform_search( if body.search_type == "quick": return await search.search_quick( user=user, - query_string=body.query, + user_query=body.query, settings_general=body.settings_general, settings_quick=body.settings_quick, ) diff --git a/Tekst-API/tekst/search/__init__.py b/Tekst-API/tekst/search/__init__.py index e3eb296ed..52e1c6086 100644 --- a/Tekst-API/tekst/search/__init__.py +++ b/Tekst-API/tekst/search/__init__.py @@ -41,6 +41,10 @@ from tekst.search.utils import ( add_analysis_settings, add_mappings, + quick_qstr_query, + quick_qstr_query_native, + quick_regexp_query, + quick_regexp_query_native, ) from tekst.state import get_state, update_state @@ -241,6 +245,7 @@ def _bulk_index( bulk_index_max_size = 200 bulk_index_body = [] errors = False + target_resource_ids = [ res.id for res in await _get_resources( @@ -250,10 +255,11 @@ def _bulk_index( ] # Initialize stack with all level 0 locations (sorted) of the current text. - # Each item on the stack is a tuple containing (1) the location labels from the - # root level up to the current location and (2) the location itself. + # Each item on the stack is a tuple containing + # (0) the location itself as LocationDocument + # (1) location labels from the root level up to the current location as list[str], stack = [ - ([location.label], location) + (location, [location.label]) for location in await LocationDocument.find( LocationDocument.text_id == text.id, LocationDocument.level == 0, @@ -265,38 +271,58 @@ def _bulk_index( # abort if initial stack is empty if not stack: # pragma: no cover return + + # cache mapping of location IDs to index doc contents + # for re-use in child location index docs + parent_idx_contents: dict[str, dict[str, Any]] = {} + + # keep track of number of bulk index requests bulk_req_count = 0 - while stack: - labels, location = stack.pop(0) - full_label = text.loc_delim.join(labels) + while len(stack): + loc, labels = stack.pop(0) + loc_id_str = str(loc.id) # create index document for this location - location_index_doc = { - "label": location.label, - "full_label": full_label, - "text_id": str(location.text_id), - "level": location.level, - "position": location.position, + loc_idx_doc = { + "label": loc.label, + "full_label": text.loc_delim.join(labels), + "text_id": str(loc.text_id), + "level": loc.level, + "position": loc.position, "resources": {}, } + # add parent contents + if str(loc.parent_id) in parent_idx_contents: + loc_idx_doc["resources"].update(parent_idx_contents[str(loc.parent_id)]) + # add data for each content for this location for content in await ContentBaseDocument.find( - Eq(ContentBaseDocument.location_id, location.id), + Eq(ContentBaseDocument.location_id, loc.id), In(ContentBaseDocument.resource_id, target_resource_ids), with_children=True, ).to_list(): - # add resource content document to location index document - location_index_doc["resources"][str(content.resource_id)] = ( - resource_types_mgr.get(content.resource_type).index_doc(content) - ) + # add resource level and content to location index document + loc_idx_doc["resources"][str(content.resource_id)] = resource_types_mgr.get( + content.resource_type + ).index_doc(content=content, native=True) + + # add location contents to cached parent contents + # (only if the current location's level is < max level, + # otherwise there won't be any child locations we need that content for) + # but set "native" to False, as these contents aren't native to child locations + if loc.level < len(text.levels) - 1: + parent_idx_contents[loc_id_str] = {} + for res_id in loc_idx_doc["resources"]: + parent_idx_contents[loc_id_str][res_id] = { + **loc_idx_doc["resources"][res_id], + "native": False, + } # add index document to bulk index request body - bulk_index_body.append( - {"index": {"_index": index_name, "_id": str(location.id)}} - ) - bulk_index_body.append(location_index_doc) + bulk_index_body.append({"index": {"_index": index_name, "_id": loc_id_str}}) + bulk_index_body.append(loc_idx_doc) # check bulk request body size, fire bulk request if necessary if len(bulk_index_body) / 2 >= bulk_index_max_size: # pragma: no cover @@ -304,12 +330,15 @@ def _bulk_index( errors |= not _bulk_index(bulk_index_body, bulk_req_count) bulk_index_body = [] - # add all child locations to the stack + # add all child locations to the processing stack stack.extend( [ - (labels + [child.label], child) + ( + child, # the target location document + labels + [child.label], # all the labels + ) for child in await LocationDocument.find( - LocationDocument.parent_id == location.id, + LocationDocument.parent_id == loc.id, ) .sort(+LocationDocument.position) .to_list() @@ -405,48 +434,102 @@ async def _get_resources( async def search_quick( user: UserRead | None, - query_string: str | None = None, + user_query: str | None = None, settings_general: GeneralSearchSettings = GeneralSearchSettings(), settings_quick: QuickSearchSettings = QuickSearchSettings(), ) -> SearchResults: client: Elasticsearch = _es_client + + # get (pre-)selection of target resources target_resources = await _get_resources( user=user, text_ids=settings_quick.texts, # constrain target texts ) + # remove resources that aren't quick-searchable + target_resources = [ + res for res in target_resources if res.config.common.searchable_quick + ] + # compose a list of target index fields based on the resources to search: field_pattern_suffix = ".strict" if settings_general.strict else "" - fields = [] + fields = [] # list of tuples of (res_id, field_path) for res in target_resources: - if res.config.common.searchable_quick: - for field in res.quick_search_fields(): - fields.append(f"resources.{str(res.id)}.{field}{field_pattern_suffix}") + for field in res.quick_search_fields(): + fields.append( + ( + str(res.id), + f"resources.{str(res.id)}.{field}{field_pattern_suffix}", + ) + ) - # compose the query - if not settings_quick.regexp or not query_string: - es_query = { - "simple_query_string": { - "query": query_string or "*", # fall back to '*' if empty - "fields": fields, - "default_operator": settings_quick.default_operator, - "analyze_wildcard": True, - } - } + # create ES content query + if not settings_quick.regexp or not user_query: + # use q query string query + if settings_quick.strategy in ("native", "both"): + es_query = quick_qstr_query_native( + user_query, + fields, + default_op=settings_quick.default_operator, + ) + else: + es_query = quick_qstr_query( + user_query, + fields, + default_op=settings_quick.default_operator, + ) else: + # use regexp query + if settings_quick.strategy in ("native", "both"): + es_query = quick_regexp_query_native( + user_query, + fields, + ) + else: + es_query = quick_regexp_query( + user_query, + fields, + ) + + # apply quick search strategy "defaultLevel": + # modify ES query toonly find locations on their text's default level + if settings_quick.strategy in ("defaultLevel", "both"): + # get target texts (mapped by text ID) + texts = await TextDocument.find( + In(TextDocument.id, settings_quick.texts) if settings_quick.texts else {} + ).to_list() + # construct query es_query = { "bool": { - "should": [ + "must": [ + es_query, # original query from above { - "regexp": { - field: { - "value": query_string, - "flags": "ALL", - "case_insensitive": True, - } + "bool": { + "should": [ + { + "bool": { + "filter": [ + { + "term": { + "text_id": { + "value": str(text.id), + } + } + }, + { + "term": { + "level": { + "value": text.default_level, + } + } + }, + ] + } + } + for text in texts + ] } - } - for field in fields + }, ] } } @@ -459,7 +542,7 @@ async def search_quick( index=IDX_ALIAS, query=es_query, highlight={ - "fields": [{field: {}} for field in fields], + "fields": [{field_path: {}} for _, field_path in fields], }, from_=settings_general.pagination.es_from(), size=settings_general.pagination.es_size(), diff --git a/Tekst-API/tekst/search/templates.py b/Tekst-API/tekst/search/templates.py index 34df9e000..415b3d33a 100644 --- a/Tekst-API/tekst/search/templates.py +++ b/Tekst-API/tekst/search/templates.py @@ -55,7 +55,7 @@ "properties": { "resources": {"type": "object"}, "text_id": {"type": "keyword"}, - "level": {"type": "short"}, + "level": {"type": "byte"}, "position": {"type": "integer"}, }, }, diff --git a/Tekst-API/tekst/search/utils.py b/Tekst-API/tekst/search/utils.py index c41e69c1e..3a15be37f 100644 --- a/Tekst-API/tekst/search/utils.py +++ b/Tekst-API/tekst/search/utils.py @@ -79,3 +79,112 @@ def add_mappings( strict_analyzer=strict_analyzer, ), } + + +def quick_qstr_query( + user_query: str, + fields: list[tuple[str, str]], + *, + default_op: str = "OR", +) -> dict[str, Any]: + return { + "simple_query_string": { + "query": user_query or "*", # fall back to '*' if empty + "fields": [field_path for _, field_path in fields], + "default_operator": default_op, + "analyze_wildcard": True, + } + } + + +def quick_qstr_query_native( + user_query: str, + fields: list[tuple[str, str]], + *, + default_op: str = "OR", +) -> dict[str, Any]: + return { + "bool": { + "should": [ + { + "bool": { + "must": [ + { + "simple_query_string": { + "query": user_query or "*", + "fields": [field_path], + "default_operator": default_op, + "analyze_wildcard": True, + } + }, + { + "term": { + f"resources.{res_id}.native": { + "value": True, + } + } + }, + ] + } + } + for res_id, field_path in fields + ] + } + } + + +def quick_regexp_query( + user_query: str, + fields: list[tuple[str, str]], +) -> dict[str, Any]: + return { + "bool": { + "should": [ + { + "regexp": { + field_path: { + "value": user_query, + "flags": "ALL", + "case_insensitive": True, + } + } + } + for res_id, field_path in fields + ] + } + } + + +def quick_regexp_query_native( + user_query: str, + fields: list[tuple[str, str]], +) -> dict[str, Any]: + return { + "bool": { + "should": [ + { + "bool": { + "must": [ + { + "regexp": { + field_path: { + "value": user_query, + "flags": "ALL", + "case_insensitive": True, + } + } + }, + { + "term": { + f"resources.{res_id}.native": { + "value": True, + } + } + }, + ] + } + } + for res_id, field_path in fields + ] + } + } diff --git a/Tekst-Web/i18n/ui/deDE.yml b/Tekst-Web/i18n/ui/deDE.yml index 5da896a85..18ab3e365 100644 --- a/Tekst-Web/i18n/ui/deDE.yml +++ b/Tekst-Web/i18n/ui/deDE.yml @@ -928,6 +928,7 @@ search: browseStop: Suchergebnis-Navigation schließen indexCreationTime: Letztes update der Suchdaten msgInvalidRequest: Ungültige Suchanfrage. Haben Sie vielleicht einen Link zu dieser Seite kopiert und dabei ein paar Zeichen übersehen? + higherLvlHit: Suchtreffer von übergeordneter Ebene "{level}" sortingPresets: title: Sortieren relevance: Relevanz diff --git a/Tekst-Web/i18n/ui/enUS.yml b/Tekst-Web/i18n/ui/enUS.yml index ed6477818..196a3ed09 100644 --- a/Tekst-Web/i18n/ui/enUS.yml +++ b/Tekst-Web/i18n/ui/enUS.yml @@ -904,6 +904,7 @@ search: browseStop: Close search results navigation indexCreationTime: Last update of search data msgInvalidRequest: Invalid search request. Maybe you copied a link to this page and missed some characters? + higherLvlHit: Search hit from higher level "{level}" sortingPresets: title: Sort relevance: Relevance diff --git a/Tekst-Web/src/components/search/SearchResult.vue b/Tekst-Web/src/components/search/SearchResult.vue index 84cbdd69c..884361146 100644 --- a/Tekst-Web/src/components/search/SearchResult.vue +++ b/Tekst-Web/src/components/search/SearchResult.vue @@ -1,11 +1,21 @@ diff --git a/Tekst-Web/src/stores/search.ts b/Tekst-Web/src/stores/search.ts index 5936bd980..6f95c35ed 100644 --- a/Tekst-Web/src/stores/search.ts +++ b/Tekst-Web/src/stores/search.ts @@ -25,6 +25,8 @@ type GeneralSearchSettings = { type QuickSearchSettings = { op: 'OR' | 'AND'; re: boolean; + inh: boolean; + allLvls: boolean; txt?: string[]; }; @@ -55,6 +57,8 @@ const getDefaultSettings = () => ({ qck: { op: 'OR', re: false, + inh: false, + allLvls: false, } as QuickSearchSettings, adv: {} as AdvancedSearchSettings, }); @@ -71,6 +75,8 @@ export const useSearchStore = defineStore('search', () => { const router = useRouter(); const { message } = useMessages(); + const queryQuick = ref(''); + const queryAdvanced = ref([]); const settingsGeneral = ref(getDefaultSettings().gen); const settingsQuick = ref(getDefaultSettings().qck); const settingsAdvanced = ref(getDefaultSettings().adv); @@ -110,8 +116,10 @@ export const useSearchStore = defineStore('search', () => { settingsGeneral.value = decoded.gen || getDefaultSettings().gen; settingsGeneral.value.pgn = settingsGeneral.value.pgn || getDefaultSettings().gen.pgn; if (decoded.type === 'quick') { + queryQuick.value = decoded.q; settingsQuick.value = decoded.qck || getDefaultSettings().qck; } else if (decoded.type === 'advanced') { + queryAdvanced.value = decoded.q; settingsAdvanced.value = decoded.adv || getDefaultSettings().adv; } else { return DEFAULT_SEARCH_REQUEST_BODY; @@ -267,6 +275,8 @@ export const useSearchStore = defineStore('search', () => { ); return { + queryQuick, + queryAdvanced, settingsGeneral, settingsQuick, settingsAdvanced,