Skip to content

Commit

Permalink
Remove unused simple query string features (#3360)
Browse files Browse the repository at this point in the history
* Remove unused simple query string features

Signed-off-by: Olga Bulat <obulat@gmail.com>

* Update the snapshots

Signed-off-by: Olga Bulat <obulat@gmail.com>

* Update the tests

Signed-off-by: Olga Bulat <obulat@gmail.com>

* Add flags if present in query

Signed-off-by: Olga Bulat <obulat@gmail.com>

* Stop adding conditional flags

Signed-off-by: Olga Bulat <obulat@gmail.com>

* Improve logging

Signed-off-by: Olga Bulat <obulat@gmail.com>

---------

Signed-off-by: Olga Bulat <obulat@gmail.com>
  • Loading branch information
obulat committed Dec 1, 2023
1 parent d3fd697 commit b35ce3f
Show file tree
Hide file tree
Showing 16 changed files with 71 additions and 207 deletions.
30 changes: 30 additions & 0 deletions api/api/controllers/search_controller.py
Expand Up @@ -2,6 +2,7 @@

import logging
import logging as log
import re
from math import ceil
from typing import TYPE_CHECKING

Expand Down Expand Up @@ -58,6 +59,13 @@
QUERY_SPECIAL_CHARACTER_ERROR = "Unescaped special characters are not allowed."
DEFAULT_BOOST = 10000
DEFAULT_SEARCH_FIELDS = ["title", "description", "tags.name"]
DEFAULT_SQS_FLAGS = "AND|NOT|PHRASE|WHITESPACE"
UNUSED_SQS_FLAGS = [
("PRECEDENCE", r"\(.*\)"),
("ESCAPE", r"\\"),
("FUZZY|SLOP", r"~\d"),
("PREFIX", r"\*"),
]


def _quote_escape(query_string):
Expand Down Expand Up @@ -287,8 +295,11 @@ def build_search_query(
# individual field-level queries specified.
if "q" in search_params.data:
query = _quote_escape(search_params.data["q"])
log_query_features(query, query_name="q")

base_query_kwargs = {
"query": query,
"flags": DEFAULT_SQS_FLAGS,
"fields": DEFAULT_SEARCH_FIELDS,
"default_operator": "AND",
}
Expand All @@ -301,6 +312,7 @@ def build_search_query(
quotes_stripped = query.replace('"', "")
exact_match_boost = Q(
"simple_query_string",
flags=DEFAULT_SQS_FLAGS,
fields=["title"],
query=f"{quotes_stripped}",
boost=10000,
Expand All @@ -313,9 +325,11 @@ def build_search_query(
("tags", "tags.name"),
]:
if field_value := search_params.data.get(field):
log_query_features(field_value, query_name="field")
search_queries["must"].append(
Q(
"simple_query_string",
flags=DEFAULT_SQS_FLAGS,
query=_quote_escape(field_value),
fields=[field_name],
)
Expand All @@ -339,6 +353,22 @@ def build_search_query(
)


def log_query_features(query: str, query_name) -> None:
query_flags = []
for flag, pattern in UNUSED_SQS_FLAGS:
if bool(re.search(pattern, query)):
query_flags.append(flag)
if query_flags:
log.info(
{
"log_message": "Special features present in query",
"query_name": query_name,
"query": query,
"flags": query_flags,
}
)


def build_collection_query(
search_params: MediaListRequestSerializer,
collection_params: dict[str, str],
Expand Down
26 changes: 23 additions & 3 deletions api/test/unit/controllers/test_search_controller_search_query.py
Expand Up @@ -5,6 +5,7 @@

from api.controllers import search_controller
from api.controllers.search_controller import (
DEFAULT_SQS_FLAGS,
FILTERED_PROVIDERS_CACHE_KEY,
FILTERED_PROVIDERS_CACHE_VERSION,
)
Expand Down Expand Up @@ -71,6 +72,7 @@ def test_create_search_query_q_search_no_filters(media_type_config):
"default_operator": "AND",
"fields": ["title", "description", "tags.name"],
"query": "cat",
"flags": DEFAULT_SQS_FLAGS,
}
}
],
Expand All @@ -80,14 +82,15 @@ def test_create_search_query_q_search_no_filters(media_type_config):
"boost": 10000,
"fields": ["title"],
"query": "cat",
"flags": DEFAULT_SQS_FLAGS,
}
},
{"rank_feature": {"boost": 10000, "field": "standardized_popularity"}},
],
}


def test_create_search_query_q_search_with_quotes_adds_exact_suffix(media_type_config):
def test_create_search_query_q_search_with_quotes_adds_raw_suffix(media_type_config):
serializer = media_type_config.search_request_serializer(
data={"q": '"The cutest cat"'}
)
Expand All @@ -104,6 +107,7 @@ def test_create_search_query_q_search_with_quotes_adds_exact_suffix(media_type_c
"fields": ["title", "description", "tags.name"],
"query": '"The cutest cat"',
"quote_field_suffix": ".raw",
"flags": DEFAULT_SQS_FLAGS,
}
}
],
Expand All @@ -113,6 +117,7 @@ def test_create_search_query_q_search_with_quotes_adds_exact_suffix(media_type_c
"boost": 10000,
"fields": ["title"],
"query": "The cutest cat",
"flags": DEFAULT_SQS_FLAGS,
}
},
{"rank_feature": {"boost": 10000, "field": "standardized_popularity"}},
Expand Down Expand Up @@ -152,6 +157,7 @@ def test_create_search_query_q_search_with_filters(image_media_type_config):
"default_operator": "AND",
"fields": ["title", "description", "tags.name"],
"query": "cat",
"flags": DEFAULT_SQS_FLAGS,
}
}
],
Expand All @@ -161,6 +167,7 @@ def test_create_search_query_q_search_with_filters(image_media_type_config):
"boost": 10000,
"fields": ["title"],
"query": "cat",
"flags": DEFAULT_SQS_FLAGS,
}
},
{"rank_feature": {"boost": 10000, "field": "standardized_popularity"}},
Expand Down Expand Up @@ -188,10 +195,23 @@ def test_create_search_query_non_q_query(image_media_type_config):
"simple_query_string": {
"fields": ["creator"],
"query": "Artist From Openverse",
"flags": DEFAULT_SQS_FLAGS,
}
},
{
"simple_query_string": {
"fields": ["title"],
"query": "kitten🐱",
"flags": DEFAULT_SQS_FLAGS,
}
},
{
"simple_query_string": {
"fields": ["tags.name"],
"query": "cute",
"flags": DEFAULT_SQS_FLAGS,
}
},
{"simple_query_string": {"fields": ["title"], "query": "kitten🐱"}},
{"simple_query_string": {"fields": ["tags.name"], "query": "cute"}},
],
"should": [
{"rank_feature": {"boost": 10000, "field": "standardized_popularity"}},
Expand Down
72 changes: 8 additions & 64 deletions frontend/src/locales/scripts/en.json5
Expand Up @@ -177,70 +177,14 @@
title: "Search for an exact match",
ariaLabel: "quote unquote Claude Monet",
claudeMonet: '"Claude Monet"',
content: "Put a word or phrase inside quotes. For example, {link}.",
},
combine: {
title: "Combining terms",
description: "If you want to combine terms, you can use the following operators to perform more complex queries",
and: "{symbol} signifies AND operation",
or: "{symbol} signifies OR operation",
not: "{symbol} negates a single token",
prefix: "{symbol} at the end of a term signifies a prefix query",
precedence: "{symbol} signify precedence",
fuzziness: "{symbol} after a word signifies edit distance (fuzziness)",
ariaLabels: {
fuzziness: "tilde N",
open: "open parenthesis",
close: "close parenthesis",
star: "star symbol",
not: "minus symbol",
and: "plus symbol",
or: "vertical bar symbol",
prefix: "asterisk symbol",
verticalBar: "vertical bar symbol",
precedence: "parentheses",
},
},
example: {
and: {
description: "Example: {link}{br} This will search for images related to both dog and cat.",
ariaLabel: "dog plus cat",
example: "dog+cat",
},
or: {
description: "Example: {link}{br} This will search for images related to dog or cat, but not necessarily both.",
ariaLabel: "dog vertical bar cat",
example: "dog|cat",
},
negate: {
description: "You can use the {operator} to exclude a search term from the results.",
operatorName: "operator (signifies NOT)",
operatorAriaLabel: "minus operator (signifies NOT)",
ariaLabel: "dog minus pug",
example: "dog -pug",
content: "Example: {link}{br} This will search for images related to dog but won't include results related to 'pug'",
},
prefix: {
description: "You can use the {operatorName} to mark a prefix term. This will match anything after the *.",
operatorName: "operator (wildcard)",
operatorAriaLabel: "star operator (wildcard)",
ariaLabel: "net star symbol",
example: "net*",
content: "Example: {link}{br} This will search for images matching anything with 'net'. This might include 'network', 'Netflix', 'Netherlands', etc.",
},
precedence: {
description: "You can use parentheses {highlight} to specify precedence of terms or combine more complex queries.",
ariaLabel: "dogs plus open parenthesis corgis vertical bar labrador close parenthesis",
example: "dogs + (corgis | labrador)",
content: "Example: {link}{br} This will search for images that match dogs that are either corgis or labrador.",
},
fuzziness: {
description: "You can use {highlight} to specify some fuzzy logic to the term according to the {link} — the number of one character changes that need to be made to one string to make it the same as another string.",
linkText: "Levenshtein Edit Distance",
ariaLabel: "theatre tilde 1",
example: "theatre~1",
content: "Example: {link}{br} This will search for images that match strings close to the term 'theatre' with a difference of one character. Results might include terms with different spellings like 'theater'.",
},
content: "To search for an exact word or phrase, put it inside quotes. For example, {link}.",
},
negate: {
title: "Excluding terms",
operatorName: "minus operator",
ariaLabel: "dog minus pug",
example: "dog -pug",
content: 'To exclude a term from your results, put the {operator} in front of it. Example: {link}{br} This will search for media related to "dog" but won\'t include results related to "pug".',
},
},
feedback: {
Expand Down

0 comments on commit b35ce3f

Please sign in to comment.