diff --git a/docs/changelog.md b/docs/changelog.md index 5c5dbb8..1888064 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -1,5 +1,24 @@ # Release Notes +## 0.18.0 + +### Fixes + +* Fixed bug preventing to use `Compound` operator with `Search` and `SearchMeta` classes. + +### New Features + +* Pipelinized `Search` and `SearchMeta` classes. That is complex expressions can be built step by step by chaining operators. +* Updated `search` method in `¨Pipeline` class to ease the use of the search stages. +* Clarified and simplified faceted search + +### Refactoring + +* Use operators rather than statement in `Compound` class +* Factorized `Search` and `SearchMeta` classes by creating a `SearchBase` class +* Use `CountOptions` rather than raw dicts +* Created `AnyStage` union type + ## 0.17.0 ### Docs diff --git a/monggregate/__init__.py b/monggregate/__init__.py index 1b15179..71b8575 100644 --- a/monggregate/__init__.py +++ b/monggregate/__init__.py @@ -5,7 +5,7 @@ from monggregate.pipeline import Pipeline -__version__ = "0.17.0" +__version__ = "0.18.0" __author__ = "Vianney Mixtur" __contact__ = "prenom.nom@outlook.fr" __copyright__ = "Copyright © 2022 Vianney Mixtur" diff --git a/monggregate/pipeline.py b/monggregate/pipeline.py index 4d859bf..f24c7f7 100644 --- a/monggregate/pipeline.py +++ b/monggregate/pipeline.py @@ -9,6 +9,7 @@ from monggregate.base import BaseModel from monggregate.stages import ( + AnyStage, Stage, BucketAuto, GranularityEnum, @@ -24,6 +25,7 @@ Sample, Search, SearchMeta, + SearchStageMap, Set, Skip, SortByCount, @@ -32,8 +34,11 @@ Unwind, Unset ) -from monggregate.stages.search import OperatorLiteral -from monggregate.search.operators.compound import Compound +from monggregate.stages.search.base import OperatorLiteral +from monggregate.search.operators import OperatorMap +from monggregate.search.operators.compound import Compound, ClauseType +from monggregate.search.collectors.facet import Facet, FacetType +from monggregate.search.commons import CountOptions, HighlightOptions from monggregate.operators import MergeObjects from monggregate.dollar import ROOT from monggregate.utils import StrEnum @@ -103,7 +108,7 @@ class Pipeline(BaseModel): # pylint: disable=too-many-public-methods # name of the collection to run the pipeline on collection : str | None =None # list of stages that compose the pipeline - stages : list[Stage] = [] + stages : list[AnyStage] = [] @@ -123,7 +128,7 @@ def statement(self)->list[dict]: async def __call__(self)->list[dict[str, Any]]: """Makes a pipeline instance callable and executes the entire pipeline when called""" - return self.run() + return await self.run() async def run(self)->list[dict[str, Any]]: @@ -203,12 +208,12 @@ def __add__(self, other:Self)->Self: stages=self.stages + other.stages ) - def __getitem__(self, index:int)->Stage: + def __getitem__(self, index:int)->AnyStage: """Returns a stage from the pipeline""" # https://realpython.com/inherit-python-list/ return self.stages[index] - def __setitem__(self, index:int, stage:Stage)->None: + def __setitem__(self, index:int, stage:AnyStage)->None: """Sets a stage in the pipeline""" self.stages[index] = stage @@ -220,15 +225,15 @@ def __len__(self)->int: """Returns the length of the pipeline""" return len(self.stages) - def append(self, stage:Stage)->None: + def append(self, stage:AnyStage)->None: """Appends a stage to the pipeline""" self.stages.append(stage) - def insert(self, index:int, stage:Stage)->None: + def insert(self, index:int, stage:AnyStage)->None: """Inserts a stage in the pipeline""" self.stages.insert(index, stage) - def extend(self, stages:list[Stage])->None: + def extend(self, stages:list[AnyStage])->None: """Extends the pipeline with a list of stages""" self.stages.extend(stages) @@ -705,17 +710,25 @@ def sample(self, value:int)->Self: self.stages.append( Sample(value=value) ) + return self + # TODO : Check that clause_type and facet_type parameters don't break anything def search( self, - path:str|list[str]=None, - query:str|list[str]=None, + path:str|list[str]|None=None, + query:str|list[str]|None=None, *, - operator_name:OperatorLiteral="text", + operator_name:OperatorLiteral|None=None, + collector_name:Literal["facet"]|None=None, + # Including the below parameters to give them visibility + #--------------------------------------------------- + clause_type:ClauseType|None=None, + facet_type:FacetType|None=None, + #--------------------------------------------------- index:str="default", - count:dict|None=None, - highlight:dict|None=None, + count:CountOptions|None=None, + highlight:HighlightOptions|None=None, return_stored_source:bool=False, score_details:bool=False, **kwargs:Any @@ -729,9 +742,9 @@ def search( - path, str|list[str]|None : field to search in - query, str|list[str]|None : text to search for - index, str : name of the index to use for the search. Defaults to defaut - - count, dict|None : document that specifies the count options for retrieving + - count, CountOptions|None : document that specifies the count options for retrieving a count of the results - - highlight, dict|None : document that specifies the highlight options for + - highlight, HighlightOptions|None : document that specifies the highlight options for displaying search terms in their original context - return_stored_source, bool : Indicates whether to use the copy of the documents in the Atlas Search index (with just a subset of the fields) @@ -754,10 +767,22 @@ def search( - synonyms - like, dict|list[dict] (allow looking for similar documents) """ + + if not collector_name and not operator_name: + operator_name = "text" + - self.stages.append( - Search.from_operator( + # If pipeline is empty, adds a search stage + if len(self) == 0: + # if facet_type is not None: + # kwargs.update({"type":facet_type}) + # elif clause_type is not None: + # kwargs.update({"type":clause_type}) + + self._init_search( + search_class="search", operator_name=operator_name, + collector_name=collector_name, path=path, query=query, index=index, @@ -767,30 +792,43 @@ def search( score_details=score_details, **kwargs ) - ) - + # If pipeline is not empty then the first stage must be Search stage. + # If so, adds the operator to the existing stage using Compound. + elif len(self) >= 1 and isinstance(self.stages[0], Search): + kwargs.update({ + # "collector_name":collector_name, + "operator_name":operator_name, + "path":path, + "query":query, + }) + has_facet_arg = self.__has_facet_arg(**kwargs) + if has_facet_arg: + self._append_facet(facet_type, **kwargs) + else: + self._append_clause(clause_type, **kwargs) + + else: + raise TypeError("search stage has to be the first stage of the pipeline") + return self - def search_compound(self)->"Compound": - """Adds a compound search stage""" - self.stages.insert( - 0, - Search.compound() - ) - return self.stages[0] - - def search_meta( self, - path:str|list[str]=None, - query:str|list[str]=None, + path:str|list[str]|None=None, + query:str|list[str]|None=None, *, - operator_name:OperatorLiteral="text", + operator_name:OperatorLiteral|None=None, + collector_name:Literal["facet"]|None=None, + # Including the below parameters to give them visibility + #--------------------------------------------------- + clause_type:ClauseType|None=None, + facet_type:FacetType|None=None, + #--------------------------------------------------- index:str="default", - count:dict|None=None, - highlight:dict|None=None, + count:CountOptions|None=None, + highlight:HighlightOptions|None=None, return_stored_source:bool=False, score_details:bool=False, **kwargs:Any @@ -830,9 +868,16 @@ def search_meta( - like, dict|list[dict] (allow looking for similar documents) """ - self.stages.append( - SearchMeta.from_operator( + if not collector_name and not operator_name: + operator_name = "text" + + + # If pipeline is empty, adds a search stage + if len(self) == 0: + self._init_search( + search_class="searchMeta", operator_name=operator_name, + collector_name=collector_name, path=path, query=query, index=index, @@ -842,11 +887,165 @@ def search_meta( score_details=score_details, **kwargs ) - ) + # If pipeline is not empty then the first stage must be Search stage. + # If so, adds the operator to the existing stage using Compound. + elif len(self) >= 1 and isinstance(self.stages[0], Search): + kwargs.update({ + # "collector_name":collector_name, + "operator_name":operator_name, + "path":path, + "query":query, + }) + has_facet_arg = self.__has_facet_arg(**kwargs) + if has_facet_arg: + self._append_facet(facet_type, **kwargs) + else: + self._append_clause(clause_type, **kwargs) + + else: + raise TypeError("search stage has to be the first stage of the pipeline") return self + + def _init_search( + self, + search_class:Literal["search", "searchMeta"], + path:str|list[str]|None=None, + query:str|list[str]|None=None, + *, + operator_name:OperatorLiteral|None=None, + collector_name:Literal["facet"]|None=None, + index:str="default", + count:CountOptions|None=None, + highlight:HighlightOptions|None=None, + return_stored_source:bool=False, + score_details:bool=False, + **kwargs:Any)->None: + """Adds a search stage to the pipeline.""" + + if not collector_name and operator_name: + search_stage = SearchStageMap[search_class].from_operator( + operator_name=operator_name, + path=path, + query=query, + index=index, + count=count, + highlight=highlight, + return_stored_source=return_stored_source, + score_details=score_details, + **kwargs + ) + else: + search_stage = SearchStageMap[search_class].init_facet( + operator_name=operator_name, + path=path, + query=query, + index=index, + count=count, + highlight=highlight, + return_stored_source=return_stored_source, + score_details=score_details, + collector_name=collector_name, + **kwargs + ) + + self.stages.append( + search_stage + ) + + return None + + + def _append_clause( + self, + clause_type:ClauseType|None=None, + *, + operator_name:OperatorLiteral|None=None, + path:str|list[str]|None=None, + query:str|list[str]|None=None, + **kwargs:Any)->None: + """Adds a clause to the search stage of the pipeline.""" + + first_stage = self.stages[0] + if clause_type is None: + clause_type = "should" + + if clause_type == "should": + default_minimum_should_match = 1 + else: + default_minimum_should_match = 0 + + minimum_should_match = kwargs.pop("minimum_should_match", default_minimum_should_match) + + if isinstance(first_stage.collector, Facet): + if isinstance(first_stage.collector.operator, Compound): + # Add clause to existing compound + first_stage.__get_operators_map__(operator_name=operator_name)(clause_type, path=path, query=query, **kwargs) + elif first_stage.collector.operator is None: + # Create a compound operator with the to-be operator as a clause + new_operator = Compound(minimum_should_match=minimum_should_match) + new_operator.__get_operators_map__(operator_name=operator_name)(clause_type, path=path, query=query, **kwargs) + first_stage.operator = new_operator + else: + # Retrieve current operator and create a compound operator + # and add the current operator as a clause + new_operator = Compound(should=[first_stage.collector.operator], minimum_should_match=minimum_should_match) + new_operator.__get_operators_map__(operator_name=operator_name)(clause_type, path=path, query=query, **kwargs) + first_stage.operator = new_operator + elif isinstance(first_stage.operator, Compound): + # Add clause to existing compound + first_stage.__get_operators_map__(operator_name=operator_name)(clause_type, path=path, query=query, **kwargs) + elif first_stage.operator is not None: + # Create a compound operator with the to-be operator as a clause + new_operator = Compound(minimum_should_match=minimum_should_match) + new_operator.__get_operators_map__(operator_name=operator_name)(clause_type, path=path, query=query, **kwargs) + first_stage.operator = new_operator + + else: + # Create an operator + first_stage.operator = OperatorMap[operator_name](path=path, query=query, **kwargs) + + return None + + + def _append_facet(self, facet_type:FacetType|None=None, **kwargs:Any)->None: + """Adds a facet to the search stage of the pipeline.""" + + if not facet_type: + facet_type = "string" + + first_stage = self.stages[0] + operator = None + if first_stage.operator is not None: + operator = first_stage.operator + first_stage.operator = None + + if not isinstance(first_stage.collector, Facet): + first_stage.collector = Facet(operator=operator) + + first_stage.collector.facet(type=facet_type, **kwargs) + + + return None + + + @classmethod + def __has_facet_arg(cls, **kwargs:Any)->bool: + """Checks if the kwargs contains a facet argument""" + + facet_args = ["facet_type", "num_buckets", "boundaries", "default"] + has_facet_arg = False + + for arg in facet_args: + if arg in kwargs: + has_facet_arg = True + break + + return has_facet_arg + + def set(self, document:dict={}, **kwargs:Any)->Self: """ Adds a set stage to the current pipeline. @@ -1004,8 +1203,23 @@ def unset(self, field:str=None, fields:list[str]|None=None)->Self: return self if __name__ =="__main__": - - pipeline = Pipeline() - pipeline.search(operator_name="text", query="test", path=["details", "id_epd", "id_serial", "name"] ) - pipeline.run() + from datetime import datetime + from monggregate.search.collectors import StringFacet, NumericFacet + pipeline = Pipeline() + pipeline.search_meta( + index="movies", + collector_name="facet", + operator=Search.Range( + path="released", + gte=datetime(year=2000, month=1, day=1), + lte=datetime(year=2015, month=1, day=31) + ), + facets=[ + StringFacet(name="directorsFacet", path="directors", num_buckets=7), + NumericFacet(name="yearFacet", path="year", boundaries=[2000, 2005, 2010, 2015]), + ] +) + search_stage = pipeline[0] + statement = search_stage.statement + print(statement) \ No newline at end of file diff --git a/monggregate/search/collectors/facet.py b/monggregate/search/collectors/facet.py index 7bea453..230ba57 100644 --- a/monggregate/search/collectors/facet.py +++ b/monggregate/search/collectors/facet.py @@ -166,24 +166,31 @@ """ from datetime import datetime -from typing import Literal +from typing import Any, Callable, Literal +from typing_extensions import Self from monggregate.base import BaseModel, pyd from monggregate.fields import FieldName from monggregate.search.collectors.collector import SearchCollector from monggregate.search.operators import( Autocomplete, + Compound, Equals, Exists, + MoreLikeThis, Range, Regex, Text, - Wilcard, + Wildcard, AnyOperator - ) +from monggregate.search.operators.operator import OperatorLiteral from monggregate.search.commons import FuzzyOptions +# Aliases +# ---------------------------------------------- +FacetType = Literal['string', 'number', 'date'] + # Strings # ---------------------------------------------- class FacetName(FieldName): @@ -248,7 +255,6 @@ def set_name(cls, name: str, values:dict[str,str]) -> FacetName: return name - class StringFacet(FacetDefinition): """ String facet definition @@ -297,6 +303,7 @@ def statement(self) -> dict: return self.resolve({self.name : self.dict(by_alias=True, exclude={"name"})}) + class DateFacet(FacetDefinition): """ Numeric facet definition @@ -311,14 +318,15 @@ class DateFacet(FacetDefinition): type : Literal['date'] = 'date' boundaries : list[datetime] - default : str + default : str|None @property def statement(self) -> dict: return self.resolve({self.name : self.dict(by_alias=True, exclude={"name"})}) -Facets = list[NumericFacet|DateFacet|StringFacet] +AnyFacet = StringFacet|NumericFacet|DateFacet +Facets = list[AnyFacet] # Collector # ---------------------------------------------- @@ -341,7 +349,7 @@ class Facet(SearchCollector): operator : AnyOperator|None facets : Facets = [] - # FIXME : The below validator will be usable only when the automatic conversion to statement is deprecated + @pyd.validator("facets") def validate_facets(cls, facets:Facets)->Facets: """ @@ -349,12 +357,15 @@ def validate_facets(cls, facets:Facets)->Facets: Ensures the facets names are unique """ - names = set() + names = [] for facet in facets: - names.add(facet.name) + names.append(facet.name) - if len(facets) > len(names): - raise ValueError("Some facets have identical names") + if len(facets) > len(set(names)): + msg = "Some facets have identical names. Facet names must be unique." + msg += "\n" + msg += f"Facets names : {names}" + raise ValueError(msg) return facets @@ -380,6 +391,285 @@ def statement(self) -> dict: return self.resolve(_statement) + #--------------------------------------------------------- + # Constructors + #--------------------------------------------------------- + @classmethod + def from_operator( + cls, + operator_name:OperatorLiteral, + path:str|list[str]|None=None, + query:str|list[str]|None=None, + fuzzy:FuzzyOptions|None=None, + score:dict|None=None, + **kwargs:Any)->Self: + """Instantiates a search stage from a search operator""" + + kwargs.update( + { + "path":path, + "query":query, + "fuzzy":fuzzy, + "score":score + } + ) + + return cls.__get_constructors_map__(operator_name)(**kwargs) + + + @classmethod + def init_autocomplete( + cls, + query:str|list[str], + path:str, + token_order:str="any", + fuzzy:FuzzyOptions|None=None, + score:dict|None=None, + **kwargs:Any)->Self: + """ + Creates a search stage with an autocomplete operator + + Summary: + ----------------------------- + This stage searches for a word or phrase that contains a sequence of characters from an incomplete input string. + + """ + + + _autocomplete = Autocomplete( + query=query, + path=path, + token_order=token_order, + fuzzy=fuzzy, + score=score, + **kwargs + ) + + return cls(operator=_autocomplete) + + + @classmethod + def init_compound( + cls, + minimum_should_clause:int=1, + *, + must : list[AnyOperator]=[], + must_not : list[AnyOperator]=[], + should : list[AnyOperator]=[], + filter : list[AnyOperator]=[], + **kwargs:Any + + )->Self: + """xxxx""" + + + _compound = Compound( + must=must, + must_not=must_not, + should=should, + filter=filter, + minimum_should_clause=minimum_should_clause, + **kwargs + ) + + return cls(operator=_compound) + + + @classmethod + def init_equals( + cls, + path:str, + value:str|int|float|bool|datetime, + score:dict|None=None, + **kwargs:Any + )->Self: + """ + Creates a search stage with an equals operator + + Summary: + -------------------------------- + This checks whether a field matches a value you specify. + You may want to use this for filtering purposes post textual search. + That is you may want to use it in a compound query or as, the second stage of your search. + + """ + + + _equals = Equals( + path=path, + value=value, + score=score + ) + + return cls(operator=_equals) + + + @classmethod + def init_exists(cls, path:str, **kwargs:Any)->Self: + """ + Creates a search stage with an exists operator + + Summary: + -------------------------------- + This checks whether a field matches a value you specify. + You may want to use this for filtering purposes post textual search. + That is you may want to use it in a compound query or as, the second stage of your search. + + """ + + + _exists = Exists(path=path) + + return cls(operator=_exists) + + + @classmethod + def init_more_like_this(cls, like:dict|list[dict], **kwargs:Any)->Self: + """ + Creates a search stage with a more_like_this operator + + Summary: + -------------------------------- + The moreLikeThis operator returns documents similar to input documents. + The moreLikeThis operator allows you to build features for your applications + that display similar or alternative results based on one or more given documents. + + """ + + + _more_like_this = MoreLikeThis(like=like) + + return cls(operator=_more_like_this) + + + @classmethod + def init_range( + cls, + path:str|list[str], + gt:int|float|datetime|None=None, + lt:int|float|datetime|None=None, + gte:int|float|datetime|None=None, + lte:int|float|datetime|None=None, + score:dict|None=None, + **kwargs:Any + )->Self: + """ + Creates a search stage with a range operator + + Summary: + -------------------------------- + This checks whether a field value falls into a specific range + You may want to use this for filtering purposes post textual search. + That is you may want to use it in a compound query or as, the second stage of your search. + + + """ + + _range = Range( + path=path, + gt=gt, + gte=gte, + lt=lt, + lte=lte, + score=score + ) + + return cls(operator=_range) + + + @classmethod + def init_regex( + cls, + query:str|list[str], + path:str|list[str], + allow_analyzed_field:bool=False, + score:dict|None=None, + **kwargs:Any + )->Self: + """ + Creates a search stage with a regex operator. + + Summary: + ---------------------------- + regex interprets the query field as a regular expression. regex is a term-level operator, meaning that the query field isn't analyzed (read processed). + + """ + + + _regex = Regex( + query=query, + path=path, + allow_analyzed_field=allow_analyzed_field, + score=score + ) + + return cls(operator=_regex) + + + @classmethod + def init_text( + cls, + query:str|list[str], + path:str|list[str], + fuzzy:FuzzyOptions|None=None, + score:dict|None=None, + synonyms:str|None=None, + **kwargs:Any + )->Self: + """ + Creates a search stage with a text opertor + + Summary: + --------------------------------- + The text operator performs a full-text search using the analyzer that you specify in the index configuration. + If you omit an analyzer, the text operator uses the default standard analyzer. + + """ + + + _text = Text( + query=query, + path=path, + score=score, + fuzzy=fuzzy, + synonyms=synonyms + ) + + return cls(operator=_text) + + + @classmethod + def init_wildcard( + cls, + query:str|list[str], + path:str|list[str], + allow_analyzed_field:bool=False, + score:dict|None=None, + **kwargs:Any + )->Self: + """ + Creates a search stage with a wildcard opertor + + Summary: + --------------------------------- + The wildcard operator enables queries which use special characters in the search string that can match any character. + + """ + + + _wilcard = Wildcard( + query=query, + path=path, + allow_analyzed_field=allow_analyzed_field, + score=score + ) + + return cls(operator=_wilcard) + + + # ---------------------------------------------- + # Operators + # ---------------------------------------------- def autocomplete( self, *, @@ -388,45 +678,144 @@ def autocomplete( token_order:str="any", fuzzy:FuzzyOptions|None=None, score:dict|None=None, - )->"Facet": + **kwargs:Any + )->Self: """Adds an autocomplete clause to the current facet instance.""" - autocomplete = Autocomplete( + _autocomplete = Autocomplete( query=query, path=path, token_order=token_order, fuzzy=fuzzy, score=score ) - self.operator = autocomplete + + clause_type = kwargs.get("type", "should") + if clause_type == "should": + default_minimum_should_match = 1 + else: + default_minimum_should_match = 0 + + minimum_should_match = kwargs.pop("minimum_should_match", default_minimum_should_match) + + if not self.operator: + self.operator = _autocomplete + elif isinstance(self.operator, Compound): + self.operator.autocomplete( + type=clause_type, + minimum_should_match=minimum_should_match, + **_autocomplete.dict()) + else: + new_operator = Compound( + should=[self.operator, _autocomplete], + minimum_should_match=minimum_should_match + ) + self.operator = new_operator return self def equals( self, - type, path:str, value:str|int|float|bool|datetime, - score:dict|None=None - )->"Facet": + score:dict|None=None, + **kwargs:Any + )->Self: """Adds an equals clause to the current facet instance.""" - equals = Equals( + _equals = Equals( path=path, value=value, score=score ) - self.operator = equals + clause_type = kwargs.get("type", "should") + if clause_type == "should": + default_minimum_should_match = 1 + else: + default_minimum_should_match = 0 + + minimum_should_match = kwargs.pop("minimum_should_match", default_minimum_should_match) + + if not self.operator: + self.operator = _equals + elif isinstance(self.operator, Compound): + self.operator.equals( + type=clause_type, + minimum_should_match=minimum_should_match, + **_equals.dict()) + else: + new_operator = Compound( + should=[self.operator, _equals], + minimum_should_match=minimum_should_match + ) + self.operator = new_operator + return self - def exists(self, path:str)->"Facet": + def exists(self, path:str, **kwargs:Any)->Self: """Adds an exists clause to the current facet instance.""" - exists = Exists(path=path) - self.operator = exists + _exists = Exists(path=path) + + clause_type = kwargs.get("type", "should") + if clause_type == "should": + default_minimum_should_match = 1 + else: + default_minimum_should_match = 0 + + minimum_should_match = kwargs.pop("minimum_should_match", default_minimum_should_match) + + if not self.operator: + self.operator = _exists + elif isinstance(self.operator, Compound): + self.operator.exists( + type=clause_type, + minimum_should_match=minimum_should_match, + **_exists.dict()) + else: + new_operator = Compound( + should=[self.operator, _exists], + minimum_should_match=minimum_should_match + ) + self.operator = new_operator + + + return self + + def more_like_this( + self, + like:dict|list[dict], + **kwargs:Any + )->Self: + """Adds a more_like_this clause to the current facet instance.""" + + _more_like_this = MoreLikeThis(like=like) + + clause_type = kwargs.get("type", "should") + if clause_type == "should": + default_minimum_should_match = 1 + else: + default_minimum_should_match = 0 + + minimum_should_match = kwargs.pop("minimum_should_match", default_minimum_should_match) + + if not self.operator: + self.operator = _more_like_this + elif isinstance(self.operator, Compound): + self.operator.more_like_this( + type=clause_type, + minimum_should_match=minimum_should_match, + **_more_like_this.dict()) + else: + new_operator = Compound( + should=[self.operator, _more_like_this], + minimum_should_match=minimum_should_match + ) + self.operator = new_operator + return self @@ -438,11 +827,12 @@ def range( lt:int|float|datetime|None=None, gte:int|float|datetime|None=None, lte:int|float|datetime|None=None, - score:dict|None=None - )->"Facet": + score:dict|None=None, + **kwargs:Any + )->Self: """Adds a range clause to the current facet instance.""" - range_ = Range( + _range = Range( path=path, gt=gt, gte=gte, @@ -451,7 +841,28 @@ def range( score=score ) - self.operator = range_ + clause_type = kwargs.get("type", "should") + if clause_type == "should": + default_minimum_should_match = 1 + else: + default_minimum_should_match = 0 + + minimum_should_match = kwargs.pop("minimum_should_match", default_minimum_should_match) + + if not self.operator: + self.operator = _range + elif isinstance(self.operator, Compound): + self.operator.range( + type=clause_type, + minimum_should_match=minimum_should_match, + **_range.dict()) + else: + new_operator = Compound( + should=[self.operator, _range], + minimum_should_match=minimum_should_match + ) + self.operator = new_operator + return self @@ -462,18 +873,40 @@ def regex( query:str|list[str], path:str|list[str], allow_analyzed_field:bool=False, - score:dict|None=None - )->"Facet": + score:dict|None=None, + **kwargs:Any + )->Self: """Adds a regex clause to the current facet instance.""" - regex = Regex( + _regex = Regex( query=query, path=path, allow_analyzed_field=allow_analyzed_field, score=score ) - self.operator = regex + clause_type = kwargs.get("type", "should") + if clause_type == "should": + default_minimum_should_match = 1 + else: + default_minimum_should_match = 0 + + minimum_should_match = kwargs.pop("minimum_should_match", default_minimum_should_match) + + if not self.operator: + self.operator = _regex + elif isinstance(self.operator, Compound): + self.operator.regex( + type=clause_type, + minimum_should_match=minimum_should_match, + **_regex.dict()) + else: + new_operator = Compound( + should=[self.operator, _regex], + minimum_should_match=minimum_should_match + ) + self.operator = new_operator + return self @@ -484,11 +917,12 @@ def text( path:str|list[str], fuzzy:FuzzyOptions|None=None, score:dict|None=None, - synonyms:str|None=None - )->"Facet": + synonyms:str|None=None, + **kwargs:Any + )->Self: """Adds a text clause to the current facet instance.""" - text = Text( + _text = Text( query=query, path=path, score=score, @@ -496,7 +930,28 @@ def text( synonyms=synonyms ) - self.operator = text + clause_type = kwargs.get("type", "should") + if clause_type == "should": + default_minimum_should_match = 1 + else: + default_minimum_should_match = 0 + + minimum_should_match = kwargs.pop("minimum_should_match", default_minimum_should_match) + + if not self.operator: + self.operator = _text + elif isinstance(self.operator, Compound): + self.operator.text( + type=clause_type, + minimum_should_match=minimum_should_match, + **_text.dict()) + else: + new_operator = Compound( + should=[self.operator, _text], + minimum_should_match=minimum_should_match + ) + self.operator = new_operator + return self @@ -508,44 +963,245 @@ def wildcard( path:str|list[str], allow_analyzed_field:bool=False, score:dict|None=None, - )->"Facet": + **kwargs:Any + )->Self: """Adds a wildcard clause to the current facet instance.""" - wildcard = Wilcard( + _wildcard = Wildcard( query=query, path=path, allow_analyzed_field=allow_analyzed_field, score=score ) - self.operator = wildcard + clause_type = kwargs.get("type", "should") + if clause_type == "should": + default_minimum_should_match = 1 + else: + default_minimum_should_match = 0 + + minimum_should_match = kwargs.pop("minimum_should_match", default_minimum_should_match) + + if not self.operator: + self.operator = _wildcard + elif isinstance(self.operator, Compound): + self.operator.wildcard( + type=clause_type, + minimum_should_match=minimum_should_match, + **_wildcard.dict()) + else: + new_operator = Compound( + should=[self.operator, _wildcard], + minimum_should_match=minimum_should_match + ) + self.operator = new_operator + return self - def add( + # ---------------------------------------------- + # Facets + # ---------------------------------------------- + def facet( self, path:str, name:str|None=None, - type:Literal['string', 'number', 'date']='string', - num_buckets:int=10, + type:FacetType='string', + num_buckets:int|None=None, boundaries:list[int|float]|list[datetime]|None=None, default:str|None=None - )->"Facet": + )->Self: if type=="string": + if num_buckets is None: + num_buckets = 10 facet = StringFacet( name=name, path=path, num_buckets=num_buckets ) - else: + elif type=="number": facet = NumericFacet( name=name, path=path, boundaries=boundaries, default=default ) + elif type=="date": + facet = DateFacet( + name=name, + path=path, + boundaries=boundaries, + default=default + ) + else: + raise ValueError(f"Invalid facet type. Valid facet types are 'string', 'number' and 'date'. Got {type} instead.") self.facets.append(facet) return self + + def numeric( + self, + path:str, + *, + boundaries:list[int|float], + name:str|None=None, + default:str|None=None + )->Self: + """Adds a numeric facet to the current facet instance.""" + + self.facet( + type="number", + path=path, + name=name, + boundaries=boundaries, + default=default + ) + return self + + def date( + self, + path:str, + *, + boundaries:list[datetime], + name:str|None=None, + default:str|None=None + )->Self: + """Adds a date facet to the current facet instance.""" + + self.facet( + type="date", + path=path, + name=name, + boundaries=boundaries, + default=default + ) + return self + + def string( + self, + path:str, + *, + num_buckets:int=10, + name:str|None=None + )->Self: + """Adds a string facet to the current facet instance.""" + + self.facet( + type="string", + path=path, + name=name, + num_buckets=num_buckets + ) + return self + + # ---------------------------------------------- + # Facet Interface + # ---------------------------------------------- + @staticmethod + def NumericFacet( + *, + path:str, + name:FacetName|None=None, + boundaries:list[int|float], + default:str|None=None + )->NumericFacet: + """Returns a numeric facet instance.""" + + return NumericFacet( + name=name, + path=path, + boundaries=boundaries, + default=default + ) + + @staticmethod + def StringFacet( + *, + path:str, + name:FacetName|None=None, + num_buckets:int=10 + )->StringFacet: + """Returns a string facet instance.""" + + return StringFacet( + name=name, + path=path, + num_buckets=num_buckets + ) + + @staticmethod + def DateFacet( + *, + path:str, + name:FacetName|None=None, + boundaries:list[datetime], + default:str|None=None + )->DateFacet: + """Returns a date facet instance.""" + + return DateFacet( + name=name, + path=path, + boundaries=boundaries, + default=default + ) + + # TODO : Overload this method to make return type more precise. + @staticmethod + def Facet( + *, + type:Literal['string', 'number', 'date'], + path:str, + name:FacetName|None=None, + num_buckets:int=10, + boundaries:list[int|float]|list[datetime]|None=None, + default:str|None=None + )->AnyFacet: + """Returns a facet instance.""" + + if type=="string": + facet = Facet.StringFacet( + name=name, + path=path, + num_buckets=num_buckets + ) + elif type=="number": + facet = Facet.NumericFacet( + name=name, + path=path, + boundaries=boundaries, + default=default + ) + else: + facet = Facet.DateFacet( + name=name, + path=path, + boundaries=boundaries, + default=default + ) + + return facet + + # ---------------------------------------------- + # Utilities + # ---------------------------------------------- + @classmethod + def __get_constructors_map__(cls, operator_name:str)->Callable[...,Self]: + """Returns appropriate constructor from operator name""" + + _constructors_map = { + "autocomplete":cls.init_autocomplete, + "compound":cls.init_compound, + "equals":cls.init_equals, + "exists":cls.init_exists, + #"facet":cls.init_facet, + "more_like_this":cls.init_more_like_this, + "range":cls.init_range, + "regex":cls.init_regex, + "text":cls.init_text, + "wildcard":cls.init_wildcard + } + + return _constructors_map[operator_name] \ No newline at end of file diff --git a/monggregate/search/commons/count.py b/monggregate/search/commons/count.py index 61072cd..afb5cdb 100644 --- a/monggregate/search/commons/count.py +++ b/monggregate/search/commons/count.py @@ -9,11 +9,37 @@ from monggregate.base import BaseModel, pyd class CountOptions(BaseModel): - """Class defining the count parameters.""" + """Class representing the count options in a $search query. + + `count` option adds a field to the metadata results document that displays a count of the search results for the query. + + Attributes: + -------------------------------- + - type : str + Type of count of the documents in the result set. Value can be one of the following: + - lowerBound : for a lower bound count of the number of documents that match the query. + You can set the threshold for the lower bound number. + - total : for an exact count of the number of documents that match the query. + If the result set is large, Atlas Search might take longer than for lowerBound to return the count. + If omitted, the default value is lowerBound. + - threshold : int + Number of documents to include in the exact count if type is lowerBound. + If omitted, defaults to 1000, which indicates that any number up to 1000 is an exact count + and any number above 1000 is a rough count of the number of documents in the result. + + """ - type : Literal["lowerBound", "total"] = "lowerBound" + type : Literal["lower_bound", "lowerBound", "total"] = "lowerBound" threshold : int = 1000 + @pyd.validator("type", pre=True, always=True) + def validate_type(cls, value:str)->str: + """Pre-validates the type field.""" + + if value == "lower_bound": + return "lowerBound" + return value + @property def statement(self) -> dict: @@ -22,5 +48,10 @@ def statement(self) -> dict: class CountResults(BaseModel): """Class defining the count results.""" - lower_bound : int|None = pyd.Field(None, alias="lowerBound") - total : int|None \ No newline at end of file + lower_bound : int|None + total : int|None + + @property + def statement(self) -> dict: + + return self.resolve(self.dict(by_alias=True)) \ No newline at end of file diff --git a/monggregate/search/operators/__init__.py b/monggregate/search/operators/__init__.py index 7925489..6375028 100644 --- a/monggregate/search/operators/__init__.py +++ b/monggregate/search/operators/__init__.py @@ -24,6 +24,7 @@ """ + from monggregate.search.operators.autocomplete import Autocomplete from monggregate.search.operators.compound import Compound from monggregate.search.operators.equals import Equals @@ -32,6 +33,17 @@ from monggregate.search.operators.range import Range from monggregate.search.operators.regex import Regex from monggregate.search.operators.text import Text -from monggregate.search.operators.wildcard import Wilcard +from monggregate.search.operators.wildcard import Wildcard -AnyOperator = Autocomplete | Compound | Equals | Exists | MoreLikeThis | Range | Regex | Text | Wilcard +AnyOperator = Autocomplete | Compound | Equals | Exists | MoreLikeThis | Range | Regex | Text | Wildcard +OperatorMap = { + "autocomplete": Autocomplete, + "compound": Compound, + "equals": Equals, + "exists": Exists, + "moreLikeThis": MoreLikeThis, + "range": Range, + "regex": Regex, + "text": Text, + "wildcard": Wildcard, +} diff --git a/monggregate/search/operators/clause.py b/monggregate/search/operators/clause.py index 9cf7919..2808c51 100644 --- a/monggregate/search/operators/clause.py +++ b/monggregate/search/operators/clause.py @@ -1,5 +1,5 @@ -# TO BE DEPRECATED -# OR MAYBE NOT CURRENTLY USED TO AVOID CIRCULAR IMPORT +"""Module defining Clause type alias.""" + from monggregate.search.operators.autocomplete import Autocomplete from monggregate.search.operators.equals import Equals from monggregate.search.operators.exists import Exists @@ -7,4 +7,6 @@ from monggregate.search.operators.range import Range from monggregate.search.operators.regex import Regex from monggregate.search.operators.text import Text -from monggregate.search.operators.wildcard import Wilcard +from monggregate.search.operators.wildcard import Wildcard + +Clause = Autocomplete | Equals | Exists | MoreLikeThis | Range | Regex | Text | Wildcard diff --git a/monggregate/search/operators/compound.py b/monggregate/search/operators/compound.py index 4422fc3..2e89aca 100644 --- a/monggregate/search/operators/compound.py +++ b/monggregate/search/operators/compound.py @@ -54,21 +54,25 @@ You can use any of the clauses with any top-level operator, such as autocomplete, text, or span, to specify query criteria. - - """ + from datetime import datetime -from typing import Literal +from typing import Literal, Callable + +from typing_extensions import Self + from monggregate.base import pyd -from monggregate.search.operators.operator import SearchOperator, Clause +from monggregate.search.operators.operator import SearchOperator, OperatorLiteral from monggregate.search.operators.clause import ( + Clause, Autocomplete, Equals, Exists, + MoreLikeThis, Range, Regex, Text, - Wilcard + Wildcard ) from monggregate.search.commons import FuzzyOptions @@ -100,16 +104,15 @@ class Compound(SearchOperator): """ - must : list[Clause] = [] - must_not : list[Clause] = pyd.Field([], alias="mustNot") - should : list[Clause] = [] - filter : list[Clause] = [] - minimum_should_clause : int = 1 + must : list["Clause|Compound"] = [] + must_not : list["Clause|Compound"] = [] + should : list["Clause|Compound"] = [] + filter : list["Clause|Compound"] = [] + minimum_should_match : int = 0 @property def statement(self) -> dict: - clauses = {} if self.must: clauses["must"] = self.must @@ -117,6 +120,7 @@ def statement(self) -> dict: clauses["mustNot"] = self.must_not if self.should: clauses["should"] = self.should + clauses["minimumShouldMatch"] = self.minimum_should_match if self.filter: clauses["filter"] = self.filter @@ -124,7 +128,7 @@ def statement(self) -> dict: "compound":clauses }) - def _register_clause(self, type:ClauseType, statement:dict)->None: + def _register_clause(self, type:ClauseType, operator:Clause|Self)->None: """ Adds a clause to the current compound instance. @@ -137,15 +141,17 @@ def _register_clause(self, type:ClauseType, statement:dict)->None: """ if type == "must": - self.must.append(statement) + self.must.append(operator) elif type == "mustNot": - self.must_not.append(statement) + self.must_not.append(operator) elif type == "filter": - self.filter.append(statement) + self.filter.append(operator) elif type == "should": - self.should.append(statement) - + self.should.append(operator) + #--------------------------------------------- + # Operators + #--------------------------------------------- def autocomplete( self, type:ClauseType, @@ -155,48 +161,84 @@ def autocomplete( token_order:str="any", fuzzy:FuzzyOptions|None=None, score:dict|None=None, - )->"Compound": + )->Self: """Adds an autocomplete clause to the current compound instance.""" - autocomplete_statement = Autocomplete( + _autocomplete = Autocomplete( query=query, path=path, token_order=token_order, fuzzy=fuzzy, score=score - ).statement + ) - self._register_clause(type, autocomplete_statement) + self._register_clause(type, _autocomplete) return self - + + + def compound( + self, + type:ClauseType, + must:list["Clause|Compound"]=[], + must_not:list["Clause|Compound"]=[], + should:list["Clause|Compound"]=[], + filter:list["Clause|Compound"]=[], + minimum_should_match:int=0 + )->Self: + """Adds a compound clause to the current compound instance.""" + + _compound = Compound( + must=must, + must_not=must_not, + should=should, + filter=filter, + minimum_should_match=minimum_should_match + ) + + self._register_clause(type, _compound) + + return _compound + + def equals( self, type, path:str, value:str|int|float|bool|datetime, score:dict|None=None - )->"Compound": + )->Self: """Adds an equals clause to the current compound instance.""" - equals_statement = Equals( + _equals = Equals( path=path, value=value, score=score ).statement - self._register_clause(type, equals_statement) + self._register_clause(type, _equals) return self - def exists(self, type:ClauseType, path:str)->"Compound": + + def exists(self, type:ClauseType, path:str)->Self: """Adds an exists clause to the current compound instance.""" - exists_statement = Exists(path=path).statement - self._register_clause(type, exists_statement) + _exists = Exists(path=path) + self._register_clause(type, _exists) + + return self + + + def more_like_this(self, type:ClauseType, like:dict|list[dict])->Self: + """Adds a more_like_this clause to the current compound instance.""" + + _more_like_this = MoreLikeThis(like=like) + self._register_clause(type, _more_like_this) return self + def range( self, type:ClauseType, @@ -207,22 +249,23 @@ def range( gte:int|float|datetime|None=None, lte:int|float|datetime|None=None, score:dict|None=None - )->"Compound": + )->Self: """Adds a range clause to the current compound instance.""" - range_statement = Range( + _range = Range( path=path, gt=gt, gte=gte, lt=lt, lte=lte, score=score - ).statement + ) - self._register_clause(type, range_statement) + self._register_clause(type, _range) return self + def regex( self, type:ClauseType, @@ -231,21 +274,22 @@ def regex( path:str|list[str], allow_analyzed_field:bool=False, score:dict|None=None - )->"Compound": + )->Self: """Adds a regex clause to the current compound instance.""" - regex_statement = Regex( + _regex = Regex( query=query, path=path, allow_analyzed_field=allow_analyzed_field, score=score - ).statement + ) - self._register_clause(type, regex_statement) + self._register_clause(type, _regex) return self + def text( self, type:ClauseType, @@ -255,21 +299,22 @@ def text( fuzzy:FuzzyOptions|None=None, score:dict|None=None, synonyms:str|None=None - )->"Compound": + )->Self: """Adds a text clause to the current compound instance.""" - text_statement = Text( + _text = Text( query=query, path=path, score=score, fuzzy=fuzzy, synonyms=synonyms - ).statement + ) - self._register_clause(type, text_statement) + self._register_clause(type, _text) return self + def wildcard( self, type:ClauseType, @@ -278,17 +323,133 @@ def wildcard( path:str|list[str], allow_analyzed_field:bool=False, score:dict|None=None, - )->"Compound": + )->Self: """Adds a wildcard clause to the current compound instance.""" - wildcard_statement = Wilcard( + _wildcard = Wildcard( query=query, path=path, allow_analyzed_field=allow_analyzed_field, score=score - ).statement + ) - self._register_clause(type, wildcard_statement) + self._register_clause(type, _wildcard) return self - \ No newline at end of file + + #--------------------------------------------- + # Clauses + #--------------------------------------------- + def must_( + self, + operator_name:OperatorLiteral, + path:str|list[str]|None=None, + query:str|list[str]|None=None, + fuzzy:FuzzyOptions|None=None, + score:dict|None=None, + **kwargs + )->Self: + """Adds a must clause to the current compound instance.""" + + kwargs.update( + { + "path":path, + "query":query, + "fuzzy":fuzzy, + "score":score + } + ) + + return self.__get_operators_map__(operator_name)("must", **kwargs) + + + def must_not_( + self, + operator_name:OperatorLiteral, + path:str|list[str]|None=None, + query:str|list[str]|None=None, + fuzzy:FuzzyOptions|None=None, + score:dict|None=None, + **kwargs + )->Self: + """Adds a must_not clause to the current compound instance.""" + + kwargs.update( + { + "path":path, + "query":query, + "fuzzy":fuzzy, + "score":score + } + ) + + return self.__get_operators_map__(operator_name)("mustNot", **kwargs) + + + def should_( + self, + operator_name:OperatorLiteral, + path:str|list[str]|None=None, + query:str|list[str]|None=None, + fuzzy:FuzzyOptions|None=None, + score:dict|None=None, + **kwargs + )->Self: + """Adds a should clause to the current compound instance.""" + + kwargs.update( + { + "path":path, + "query":query, + "fuzzy":fuzzy, + "score":score + } + ) + + return self.__get_operators_map__(operator_name)("should", **kwargs) + + + def filter_( + self, + operator_name:OperatorLiteral, + path:str|list[str]|None=None, + query:str|list[str]|None=None, + fuzzy:FuzzyOptions|None=None, + score:dict|None=None, + **kwargs + )->Self: + """Adds a filter clause to the current compound instance.""" + + kwargs.update( + { + "path":path, + "query":query, + "fuzzy":fuzzy, + "score":score + } + ) + + return self.__get_operators_map__(operator_name)("filter", **kwargs) + + #--------------------------------------------- + # Utility functions + #--------------------------------------------- + def __get_operators_map__(self, operator_name:OperatorLiteral)->Callable[...,Self]: + """Returns the operator class associated with the given operator name.""" + + operators_map = { + "autocomplete":self.autocomplete, + "compound":self.compound, #FIXME : This breaks typing + "equals":self.equals, + "exists":self.exists, + "range":self.range, + "more_like_this":self.more_like_this, + "regex":self.regex, + "text":self.text, + "wildcard":self.wildcard + } + + return operators_map[operator_name] + +if __name__ == "__main__": + print(Compound()) \ No newline at end of file diff --git a/monggregate/search/operators/operator.py b/monggregate/search/operators/operator.py index 1e7bfa1..90488a1 100644 --- a/monggregate/search/operators/operator.py +++ b/monggregate/search/operators/operator.py @@ -3,6 +3,7 @@ # Standard Library imports #---------------------------- from abc import ABC +from typing import Literal # Package imports # --------------------------- @@ -10,8 +11,20 @@ class SearchOperator(BaseModel, ABC): """MongoDB operator abstract base class""" - -class Clause(SearchOperator, ABC): - """Abstract BaseClass for search clauses""" - clauses : list["Clause"] = [] + +# Enums +# ----------------------------------------------------- +OperatorLiteral = Literal[ + "autocomplete", + "compound", + "equals", + "exists", + #"facet", + "more_like_this", + "range", + "regex", + "text", + "wildcard" +] + \ No newline at end of file diff --git a/monggregate/search/operators/text.py b/monggregate/search/operators/text.py index b5fa787..e9d3c0e 100644 --- a/monggregate/search/operators/text.py +++ b/monggregate/search/operators/text.py @@ -30,7 +30,7 @@ """ - +from monggregate.base import pyd from monggregate.search.operators.operator import SearchOperator from monggregate.search.commons.fuzzy import FuzzyOptions diff --git a/monggregate/search/operators/wildcard.py b/monggregate/search/operators/wildcard.py index 7ee08dd..21c9537 100644 --- a/monggregate/search/operators/wildcard.py +++ b/monggregate/search/operators/wildcard.py @@ -68,7 +68,7 @@ from monggregate.base import pyd from monggregate.search.operators.operator import SearchOperator -class Wilcard(SearchOperator): +class Wildcard(SearchOperator): """ Creates a wilcard operation statement in an Atlas Search query. diff --git a/monggregate/stages/__init__.py b/monggregate/stages/__init__.py index 9178280..207af71 100644 --- a/monggregate/stages/__init__.py +++ b/monggregate/stages/__init__.py @@ -1,5 +1,6 @@ """Stage Sub-package""" +from typing import Union from monggregate.stages.stage import Stage from monggregate.stages.bucket_auto import BucketAuto, GranularityEnum from monggregate.stages.bucket import Bucket @@ -12,8 +13,7 @@ from monggregate.stages.project import Project from monggregate.stages.replace_root import ReplaceRoot from monggregate.stages.sample import Sample -from monggregate.stages.search import Search -from monggregate.stages.search_meta import SearchMeta +from monggregate.stages.search import Search, SearchMeta, SearchStageMap from monggregate.stages.set import Set from monggregate.stages.skip import Skip from monggregate.stages.sort_by_count import SortByCount @@ -44,3 +44,26 @@ # Custom aliases Explode = Unwind # to match pandas equivalent operation + +AnyStage = Union[ + BucketAuto, + Bucket, + Count, + Group, + Limit, + Lookup, + Match, + Out, + Project, + ReplaceRoot, + Sample, + Search, + SearchMeta, + Set, + Skip, + SortByCount, + Sort, + UnionWith, + Unwind, + Unset, +] \ No newline at end of file diff --git a/monggregate/stages/search.py b/monggregate/stages/search.py deleted file mode 100644 index 5ca8e10..0000000 --- a/monggregate/stages/search.py +++ /dev/null @@ -1,557 +0,0 @@ -"""Module definining an interface to MongoDB $search stage operation in aggregation pipeline. - -Online MongoDB documentation: --------------------------------------------------------------------------------------------------- - -Last Updated (in this package) : 25/04/2023 -Source : https://www.mongodb.com/docs/atlas/atlas-search/query-syntax/#mongodb-pipeline-pipe.-search - -# Definition -#--------------------------- -The $search stage performs a full-text search on the specified field or fields which must be covered by an Atlas Search index. - -$search -A $search pipeline stage has the following prototype form: - - >>> { - $search: { - "index": "", - ""|"": { - | - }, - "highlight": { - - }, - "count": { - - }, - "returnStoredSource": true | false - } - } - -# Fields -#--------------------------- - -The $search stage takes a document with the following fields - -Field Type Necessity Description - - document Conditional Name of the collector to use with the query. - You can provide a document that contains the collector-specific options as the value for this field. - Either this or is required. -count document Optional Document that specifies the count options for retrieving a count of the results. - To learn more, see Count Atlas Search Results. -highlight document Optional Document that specifies the highlight options for displaying search terms in their original context. -index string Required Name of the Atlas Search index to use. If omitted, defaults to default - document Conditional Name of the operator to search with. - You can provide a document that contains the operator-specific options as the value for this field. - Either this or is required. -returnStoredSource boolean Optional Flag that specifies whether to perform a full document lookup on the backend database or return only stored source fields directly from Atlas Search. - If omitted, defaults to false. To learn more, see Return Stored Source pyd.Fields. - -# Behavior -#--------------------------- -$search must be the first stage of any pipeline it appears in. -$search cannot be used in: - - * a view definition - - * a $facet pipeline stage - -# Aggregation Variable -#--------------------------- -$search returns only the results of your query. The metadata results of your -$search query are saved in the $$SEARCH_META aggregation variable. You can use the $$SEARCH_META variable to view the metadata results for your -$search query. The $$SEARCH_META aggregation variable can be used anywhere after a -$search stage in any pipeline, but it can't be used after the $lookup or $unionWith stage in any pipeline. -The $$SEARCH_META aggregation variable can't be used in any subsequent stage after a $searchMeta stage. - -""" - -from datetime import datetime -from typing import Any, Callable, Literal -try: - from typing import Self -except ImportError: - from typing_extensions import Self - -from monggregate.base import pyd -from monggregate.stages.stage import Stage -from monggregate.search.collectors import Facet, Facets -from monggregate.search.operators import( - Autocomplete, - Compound, - Equals, - Exists, - MoreLikeThis, - Range, - Regex, - Text, - Wilcard, - AnyOperator -) -from monggregate.search.commons import FuzzyOptions - -# Enums -# ----------------------------------------------------- -OperatorLiteral = Literal[ - "autocomplete", - "equals", - "exists", - "facet", - "more_like_this", - "range", - "regex", - "text", - "wildcard" -] - -# Classes -# ----------------------------------------------------- -class SearchBase(Stage): - """Internals""" - - index : str = "default" - count : dict|None - highlight : dict|None - return_stored_source : bool = pyd.Field(False, alias="returnStoredSource") - score_details : bool = pyd.Field(False, alias="scoreDetails") - - @property - def statement(self) -> dict[str, dict]: - - config = { - "index":self.index, - "highlight":self.highlight, - "count":self.count, - "returnStoredSource":self.return_stored_source, - "scoreDetails":self.score_details - } - - - _statement = { - "$search":config - } - - return self.resolve(_statement) - - -class Search(SearchBase): - """" - Creates a $search statement in an aggregation pipeline - - Descrtiption - ----------------------- - The $search stage performs a full-text search on the specified field or fields - which must be covered by an Atlas Search index. - - Attributes: - ----------------------- - - index, str : name of the Atlas Search index to use. Defaults to default. - - - count, dict|None : Document that specifies the count options for retrieving a count - of the results. - - - highlight, dict|None : Document that specifies the highlight options for displaying - search terms in their original context. - - - return_stored_source, bool : Flag that specifies whether to perform a full document lookup - on the backend database (mongod) or return only stored source fields - directly from Atlas Search. Defaults to false. - - - score_details, bool : Flag that specifies whether to retrieve a detailed breakdown of - the score for the documents in the results. Defaults to false - To view the details, you must use the $meta expression in the - $project stage. - - - , dict|None : Name of the operator to search with. You can provide a document - that contains the operator-specific options as the value for this field - Either this or is required. - - - , dict|None : Name of the collector to use with the query. You can provide - a document that contains the collector-specific options as the value - for this field. Either this or is required. - - """ - - - collector : Facet|None - operator : AnyOperator|None - - - @pyd.validator("operator", pre=True, always=True) - @classmethod - def validate_operator(cls, value:dict, values:dict)->dict|None: - """Ensures that either collector or operator is provided""" - - collector = values.get("collector") - - if collector is None and value is None: - raise TypeError("Either collector or operator must be provided") - elif collector and value: - raise TypeError("Only one of collector or operator can be provided") - - return value - - @property - def statement(self) -> dict[str, dict]: - - config = { - "index":self.index, - "highlight":self.highlight, - "count":self.count, - "returnStoredSource":self.return_stored_source, - "scoreDetails":self.score_details - } - - method = self.collector or self.operator - - config.update(method.statement) - - _statement = { - "$search":config - } - - return self.resolve(_statement) - - - #--------------------------------------------------------- - # Constructors - #--------------------------------------------------------- - @classmethod - def __get_constructors_map__(cls, operator_name:str)->Callable: - """Returns appropriate constructor from operator name""" - - _constructors_map = { - "autocomplete":cls.autocomplete, - "compound":cls.compound, - "equals":cls.equals, - "exists":cls.exists, - "facet":cls.facet, - "more_like_this":cls.more_like_this, - "range":cls.range, - "regex":cls.regex, - "text":cls.text, - "wildcard":cls.wildcard - } - - return _constructors_map[operator_name] - - - @classmethod - def from_operator( - cls, - operator_name:OperatorLiteral, - path:str|list[str]|None=None, - query:str|list[str]|None=None, - fuzzy:FuzzyOptions|None=None, - score:dict|None=None, - **kwargs:Any)->Self: - """Instantiates a search stage from a search operator""" - - # FIXME : This could lead in duplicated arguments in kwargs - kwargs.update( - { - "path":path, - "query":query, - "fuzzy":fuzzy, - "score":score - } - ) - - return cls.__get_constructors_map__(operator_name)(**kwargs) - - @classmethod - def autocomplete( - cls, - query:str|list[str], - path:str, - token_order:str="any", - fuzzy:FuzzyOptions|None=None, - score:dict|None=None, - **kwargs:Any)->Self: - """ - Creates a search stage with an autocomplete operator - - Summary: - ----------------------------- - This stage searches for a word or phrase that contains a sequence of characters from an incomplete input string. - - """ - - base_params = SearchBase(**kwargs).dict() - cls.__reduce_kwargs(kwargs) - - autocomplete_statement = Autocomplete( - query=query, - path=path, - token_order=token_order, - fuzzy=fuzzy, - score=score, - **kwargs - ) - - return cls(**base_params, operator=autocomplete_statement) - - @classmethod - def compound( - cls, - minimum_should_clause:int=1, - *, - must : list[dict]=[], - must_not : list[dict]=[], - should : list[dict]=[], - filter : list[dict]=[], - **kwargs:Any - - )->Self: - - base_params = SearchBase(**kwargs).dict() - cls.__reduce_kwargs(kwargs) - - compound_statement = Compound( - must=must, - must_not=must_not, - should=should, - filter=filter, - minimum_should_clause=minimum_should_clause, - **kwargs - ) - - return cls(**base_params, operator=compound_statement) - - @classmethod - def equals( - cls, - path:str, - value:str|int|float|bool|datetime, - score:dict|None=None, - **kwargs:Any - )->Self: - """ - Creates a search stage with an equals operator - - Summary: - -------------------------------- - This checks whether a field matches a value you specify. - You may want to use this for filtering purposes post textual search. - That is you may want to use it in a compound query or as, the second stage of your search. - - """ - - base_params = SearchBase(**kwargs).dict() - equals_statement = Equals( - path=path, - value=value, - score=score - ) - - return cls(**base_params, operator=equals_statement) - - @classmethod - def exists(cls, path:str, **kwargs:Any)->Self: - """ - Creates a search stage with an exists operator - - Summary: - -------------------------------- - This checks whether a field matches a value you specify. - You may want to use this for filtering purposes post textual search. - That is you may want to use it in a compound query or as, the second stage of your search. - - """ - - base_params = SearchBase(**kwargs).dict() - exists_statement = Exists(path=path) - - return cls(**base_params, operator=exists_statement) - - @classmethod - def facet(cls, **kwargs:Any)->Self: - """ - Creates a search stage with a facet operator - - Summary: - -------------------------------- - - """ - - base_params = SearchBase(**kwargs).dict() - cls.__reduce_kwargs(kwargs) - - operator = kwargs.pop("operator", None) - facet_ = Facet(operator=operator, **kwargs) - - return cls(**base_params, collector=facet_) - - @classmethod - def more_like_this(cls, like:dict|list[dict], **kwargs:Any)->Self: - """ - Creates a search stage with a more_like_this operator - - Summary: - -------------------------------- - The moreLikeThis operator returns documents similar to input documents. - The moreLikeThis operator allows you to build features for your applications - that display similar or alternative results based on one or more given documents. - - """ - - base_params = SearchBase(**kwargs).dict() - more_like_this_stasement = MoreLikeThis(like=like) - - return cls(**base_params, operator=more_like_this_stasement) - - @classmethod - def range( - cls, - path:str|list[str], - gt:int|float|datetime|None=None, - lt:int|float|datetime|None=None, - gte:int|float|datetime|None=None, - lte:int|float|datetime|None=None, - score:dict|None=None, - **kwargs:Any - )->Self: - """ - Creates a search stage with a range operator - - Summary: - -------------------------------- - This checks whether a field value falls into a specific range - You may want to use this for filtering purposes post textual search. - That is you may want to use it in a compound query or as, the second stage of your search. - - - """ - - base_params = SearchBase(**kwargs).dict() - range_statement = Range( - path=path, - gt=gt, - gte=gte, - lt=lt, - lte=lte, - score=score - ) - - return cls(**base_params, operator=range_statement) - - @classmethod - def regex( - cls, - query:str|list[str], - path:str|list[str], - allow_analyzed_field:bool=False, - score:dict|None=None, - **kwargs:Any - )->Self: - """ - Creates a search stage with a regex operator. - - Summary: - ---------------------------- - regex interprets the query field as a regular expression. regex is a term-level operator, meaning that the query field isn't analyzed (read processed). - - """ - - base_params = SearchBase(**kwargs).dict() - regex_statement = Regex( - query=query, - path=path, - allow_analyzed_field=allow_analyzed_field, - score=score - ) - - return cls(**base_params, operator=regex_statement) - - - - @classmethod - def text( - cls, - query:str|list[str], - path:str|list[str], - fuzzy:FuzzyOptions|None=None, - score:dict|None=None, - synonyms:str|None=None, - **kwargs:Any - )->Self: - """ - Creates a search stage with a text opertor - - Summary: - --------------------------------- - The text operator performs a full-text search using the analyzer that you specify in the index configuration. - If you omit an analyzer, the text operator uses the default standard analyzer. - - """ - - base_params = SearchBase(**kwargs).dict() - cls.__reduce_kwargs(kwargs) - - text_statement = Text( - query=query, - path=path, - score=score, - fuzzy=fuzzy, - synonyms=synonyms - ) - - return cls(**base_params, operator=text_statement) - - @classmethod - def wildcard( - cls, - query:str|list[str], - path:str|list[str], - allow_analyzed_field:bool=False, - score:dict|None=None, - **kwargs:Any - )->Self: - """ - Creates a search stage with a wildcard opertor - - Summary: - --------------------------------- - The wildcard operator enables queries which use special characters in the search string that can match any character. - - """ - - base_params = SearchBase(**kwargs).dict() - cls.__reduce_kwargs(kwargs) - - wilcard_statement = Wilcard( - query=query, - path=path, - allow_analyzed_field=allow_analyzed_field, - score=score - ) - - return cls(**base_params, operator=wilcard_statement) - - @classmethod - def __reduce_kwargs(cls, kwargs:dict)->None: - """ - Parses kwargs arguments to avoid passing arguments twice - - In particular removes SearchBase arguments from kwargs: - - index, - - count, - - highlight, - - return_stored_source, - - score_details - - """ - - kwargs.pop("index", None) - kwargs.pop("count", None) - kwargs.pop("highlight", None) - kwargs.pop("return_stored_source", None) - kwargs.pop("score_details", None) - -# TODO : pipelinize Search class -# Instead of setting the search operator as a classmethods constructors -# transform them into chainable instance methods using the compound operator to combined the chained operations - -#or offer both options by poviding init_ and def methods \ No newline at end of file diff --git a/monggregate/stages/search/__init__.py b/monggregate/stages/search/__init__.py new file mode 100644 index 0000000..ba4e941 --- /dev/null +++ b/monggregate/stages/search/__init__.py @@ -0,0 +1,17 @@ +"""Search stages subpackage. + +Contains search and search_meta modules. + +""" +from typing import Literal + +from monggregate.stages.search.search import Search +from monggregate.stages.search.search_meta import SearchMeta + +SearchStageMap:dict[ + Literal["search", "searchMeta"], + type[Search]|type[SearchMeta] +] = { + "search":Search, + "searchMeta":SearchMeta +} diff --git a/monggregate/stages/search/base.py b/monggregate/stages/search/base.py new file mode 100644 index 0000000..d800735 --- /dev/null +++ b/monggregate/stages/search/base.py @@ -0,0 +1,1078 @@ +"""search base module. + +Internal module that contains the base class for search stages. +""" + +from datetime import datetime +from typing import Any, Callable, Literal +try: + from typing import Self +except ImportError: + from typing_extensions import Self + +from monggregate.base import pyd, BaseModel +from monggregate.stages.stage import Stage +from monggregate.search.collectors import Facet, Facets +from monggregate.search.operators import( + Autocomplete, + Compound, + Equals, + Exists, + MoreLikeThis, + Range, + Regex, + Text, + Wildcard, + AnyOperator, + OperatorMap +) +from monggregate.search.operators.operator import OperatorLiteral +from monggregate.search.operators.compound import ClauseType +from monggregate.search.commons import CountOptions, FuzzyOptions, HighlightOptions + + +# Classes +# ----------------------------------------------------- +class SearchConfig(BaseModel): + """Configuration attributes for the $search stage. + + This class is part of monggregate internals. + + Attributes: + ------------------------------- + - index, str : The name of the index to use. Defaults to the `default` index. + + - count, CountOptions|None : Document that specifies the count options for retrieving a count of the results. + + - highlight, HighlightOptions|None : Document that specifies the highlighting options for displaying search terms in their original context. + + - return_stored_source, bool : Flag that specifies whether to perform a full document lookup + on the backend database or return only stored source fields directly from Atlas Search. + If omitted, defaults to false. + + - score_details, bool : Flag that specifies whether to retrieve a detailed breakdown of + the score for the documents in the results. Defaults to false + To view the details, you must use the $meta expression in the + $project stage. + + """ + + index : str = "default" + count : CountOptions|None + highlight : HighlightOptions|None + return_stored_source : bool = False + score_details : bool = False + + @property + def statement(self): + """Returns the statement of the stage""" + + raise NotImplementedError("statement property must be implemented in subclasses") + +class SearchBase(Stage, SearchConfig): + """$search and $searchMeta stages parent class. + + This class is part of monggregate internals. + + Attributes: + ------------------------------- + - index, str : The name of the index to use. Defaults to the `default` index. + + - count, CountOptions|None : Document that specifies the count options for retrieving a count of the results. + + - highlight, HighlightOptions|None : Document that specifies the highlighting options for displaying search terms in their original context. + + - return_stored_source, bool : Flag that specifies whether to perform a full document lookup + on the backend database or return only stored source fields directly from Atlas Search. + If omitted, defaults to false. + + - score_details, bool : Flag that specifies whether to retrieve a detailed breakdown of + the score for the documents in the results. Defaults to false + To view the details, you must use the $meta expression in the + $project stage. + + - operator, SearchOperator|None : Name of the operator to search with. You can provide a document + that contains the operator-specific options as the value for this field + Either this or collector is required. + + - collector, Facet|None : Name of the collector to use with the query. You can provide + a document that contains the collector-specific options as the value + for this field. Either this or is required. + + + """ + + collector : Facet|None + operator : AnyOperator|None + + @pyd.root_validator(pre=True) + @classmethod + def init(cls, values:dict)->dict: + """Initializes Search with Compound operator.""" + + collector = values.get("collector") + operator = values.get("operator") + collector_name = values.get("collector_name") + operator_name = values.get("operator_name") + + if operator_name and not operator: + operator = OperatorMap[operator_name](**values) + + if collector_name and not collector: + values.pop("operator", None) + collector = Facet(operator=operator, **values) + + if not collector and not operator: + values["operator"] = Compound() + + return values + + @pyd.validator("operator", pre=True, always=True) + @classmethod + def validate_operator(cls, value:dict, values:dict)->dict|None: + """Ensures that either collector or operator is provided.""" + + collector = values.get("collector") + + if collector is None and value is None: + raise TypeError("Either collector or operator must be provided") + elif collector and value: + raise TypeError("Only one of collector or operator can be provided") + + return value + + @property + def statement(self): + """Returns the statement of the stage""" + + raise NotImplementedError("statement property must be implemented in subclasses") + + + #--------------------------------------------------------- + # Constructors + #--------------------------------------------------------- + @classmethod + def from_operator( + cls, + operator_name:OperatorLiteral, + path:str|list[str]|None=None, + query:str|list[str]|None=None, + fuzzy:FuzzyOptions|None=None, + score:dict|None=None, + **kwargs:Any)->Self: + """Instantiates a search stage from a search operator""" + + # FIXME : This could lead in duplicated arguments in kwargs + kwargs.update( + { + "path":path, + "query":query, + "fuzzy":fuzzy, + "score":score + } + ) + + return cls.__get_constructors_map__(operator_name)(**kwargs) + + @classmethod + def init_autocomplete( + cls, + query:str|list[str], + path:str, + token_order:str="any", + fuzzy:FuzzyOptions|None=None, + score:dict|None=None, + **kwargs:Any)->Self: + """ + Creates a search stage with an autocomplete operator + + Summary: + ----------------------------- + This stage searches for a word or phrase that contains a sequence of characters from an incomplete input string. + + """ + + base_params = SearchConfig(**kwargs).dict() + cls.__reduce_kwargs(kwargs) + + autocomplete_statement = Autocomplete( + query=query, + path=path, + token_order=token_order, + fuzzy=fuzzy, + score=score, + **kwargs + ) + + return cls(**base_params, operator=autocomplete_statement) + + @classmethod + def init_compound( + cls, + minimum_should_clause:int=1, + *, + must : list[AnyOperator]=[], + must_not : list[AnyOperator]=[], + should : list[AnyOperator]=[], + filter : list[AnyOperator]=[], + **kwargs:Any + + )->Self: + + base_params = SearchConfig(**kwargs).dict() + cls.__reduce_kwargs(kwargs) + + compound_statement = Compound( + must=must, + must_not=must_not, + should=should, + filter=filter, + minimum_should_clause=minimum_should_clause, + **kwargs + ) + + return cls(**base_params, operator=compound_statement) + + @classmethod + def init_equals( + cls, + path:str, + value:str|int|float|bool|datetime, + score:dict|None=None, + **kwargs:Any + )->Self: + """ + Creates a search stage with an equals operator + + Summary: + -------------------------------- + This checks whether a field matches a value you specify. + You may want to use this for filtering purposes post textual search. + That is you may want to use it in a compound query or as, the second stage of your search. + + """ + + base_params = SearchConfig(**kwargs).dict() + equals_statement = Equals( + path=path, + value=value, + score=score + ) + + return cls(**base_params, operator=equals_statement) + + @classmethod + def init_exists(cls, path:str, **kwargs:Any)->Self: + """ + Creates a search stage with an exists operator + + Summary: + -------------------------------- + This checks whether a field matches a value you specify. + You may want to use this for filtering purposes post textual search. + That is you may want to use it in a compound query or as, the second stage of your search. + + """ + + base_params = SearchConfig(**kwargs).dict() + exists_statement = Exists(path=path) + + return cls(**base_params, operator=exists_statement) + + @classmethod + def init_facet(cls, **kwargs:Any)->Self: + """ + Creates a search stage with a facet collector + + Summary: + -------------------------------- + + """ + + + base_params = SearchConfig(**kwargs).dict() + cls.__reduce_kwargs(kwargs) + + operator_name = kwargs.pop("operator_name", None) + operator = kwargs.pop("operator", None) + if operator_name and not operator: + operator = OperatorMap[operator_name](**kwargs) + + facet_ = Facet(operator=operator, **kwargs) + + return cls(**base_params, collector=facet_) + + @classmethod + def init_more_like_this(cls, like:dict|list[dict], **kwargs:Any)->Self: + """ + Creates a search stage with a more_like_this operator + + Summary: + -------------------------------- + The moreLikeThis operator returns documents similar to input documents. + The moreLikeThis operator allows you to build features for your applications + that display similar or alternative results based on one or more given documents. + + """ + + base_params = SearchConfig(**kwargs).dict() + more_like_this_stasement = MoreLikeThis(like=like) + + return cls(**base_params, operator=more_like_this_stasement) + + @classmethod + def init_range( + cls, + path:str|list[str], + gt:int|float|datetime|None=None, + lt:int|float|datetime|None=None, + gte:int|float|datetime|None=None, + lte:int|float|datetime|None=None, + score:dict|None=None, + **kwargs:Any + )->Self: + """ + Creates a search stage with a range operator + + Summary: + -------------------------------- + This checks whether a field value falls into a specific range + You may want to use this for filtering purposes post textual search. + That is you may want to use it in a compound query or as, the second stage of your search. + + + """ + + base_params = SearchConfig(**kwargs).dict() + range_statement = Range( + path=path, + gt=gt, + gte=gte, + lt=lt, + lte=lte, + score=score + ) + + return cls(**base_params, operator=range_statement) + + @classmethod + def init_regex( + cls, + query:str|list[str], + path:str|list[str], + allow_analyzed_field:bool=False, + score:dict|None=None, + **kwargs:Any + )->Self: + """ + Creates a search stage with a regex operator. + + Summary: + ---------------------------- + regex interprets the query field as a regular expression. regex is a term-level operator, meaning that the query field isn't analyzed (read processed). + + """ + + base_params = SearchConfig(**kwargs).dict() + regex_statement = Regex( + query=query, + path=path, + allow_analyzed_field=allow_analyzed_field, + score=score + ) + + return cls(**base_params, operator=regex_statement) + + @classmethod + def init_text( + cls, + query:str|list[str], + path:str|list[str], + fuzzy:FuzzyOptions|None=None, + score:dict|None=None, + synonyms:str|None=None, + **kwargs:Any + )->Self: + """ + Creates a search stage with a text opertor + + Summary: + --------------------------------- + The text operator performs a full-text search using the analyzer that you specify in the index configuration. + If you omit an analyzer, the text operator uses the default standard analyzer. + + """ + + base_params = SearchConfig(**kwargs).dict() + cls.__reduce_kwargs(kwargs) + + text_statement = Text( + query=query, + path=path, + score=score, + fuzzy=fuzzy, + synonyms=synonyms + ) + + return cls(**base_params, operator=text_statement) + + @classmethod + def init_wildcard( + cls, + query:str|list[str], + path:str|list[str], + allow_analyzed_field:bool=False, + score:dict|None=None, + **kwargs:Any + )->Self: + """ + Creates a search stage with a wildcard opertor + + Summary: + --------------------------------- + The wildcard operator enables queries which use special characters in the search string that can match any character. + + """ + + base_params = SearchConfig(**kwargs).dict() + cls.__reduce_kwargs(kwargs) + + wilcard_statement = Wildcard( + query=query, + path=path, + allow_analyzed_field=allow_analyzed_field, + score=score + ) + + return cls(**base_params, operator=wilcard_statement) + + #----------------------------------------- + # Operators Interface + #----------------------------------------- + @staticmethod + def Autocomplete(**kwargs)->Autocomplete: + """Returns an autocomplete operator.""" + + return Autocomplete(**kwargs) + + @staticmethod + def Compound(**kwargs)->Compound: + """Returns a compound operator.""" + + return Compound(**kwargs) + + @staticmethod + def Equals(**kwargs)->Equals: + """Returns an equals operator.""" + + return Equals(**kwargs) + + @staticmethod + def Exists(**kwargs)->Exists: + """Returns an exists operator.""" + + return Exists(**kwargs) + + @staticmethod + def Facet(**kwargs)->Facet: + """Returns a facet collector.""" + + return Facet(**kwargs) + + @staticmethod + def MoreLikeThis(**kwargs)->MoreLikeThis: + """Returns a more_like_this operator.""" + + return MoreLikeThis(**kwargs) + + @staticmethod + def Range(**kwargs)->Range: + """Returns a range operator.""" + + return Range(**kwargs) + + @staticmethod + def Regex(**kwargs)->Regex: + """Returns a regex operator.""" + + return Regex(**kwargs) + + @staticmethod + def Text(**kwargs)->Text: + """Returns a text operator.""" + + return Text(**kwargs) + + @staticmethod + def Wildcard(**kwargs)->Wildcard: + """Returns a wildcard operator.""" + + return Wildcard(**kwargs) + + #----------------------------------------- + # Compound Search Pipelinenized functions + #----------------------------------------- + + #----------------------------------------- + # By Operators + #----------------------------------------- + def autocomplete( + self, + type:ClauseType, + *, + query:str|list[str], + path:str, + token_order:str="any", + fuzzy:FuzzyOptions|None=None, + score:dict|None=None, + **kwargs:Any + )->Self: + """Adds an autocomplete clause to the top-level Compound operator.""" + + if isinstance(self.operator, Compound): + self.operator.autocomplete( + type=type, + query=query, + path=path, + token_order=token_order, + fuzzy=fuzzy, + score=score + ) + elif self.collector and isinstance(self.collector.operator, Compound): + self.collector.operator.autocomplete( + type=type, + query=query, + path=path, + token_order=token_order, + fuzzy=fuzzy, + score=score + ) + else: + raise TypeError(f"Cannot call autocomplete on {self.operator}") + + return self + + + def equals( + self, + type:ClauseType, + path:str, + value:str|int|float|bool|datetime, + score:dict|None=None, + **kwargs:Any + )->Self: + """Adds an equals clause to the top-level Compound operator.""" + + if isinstance(self.operator, Compound): + self.operator.equals( + type=type, + path=path, + value=value, + score=score + ) + elif self.collector and isinstance(self.collector.operator, Compound): + self.collector.operator.equals( + type=type, + path=path, + value=value, + score=score + ) + else: + raise TypeError(f"Cannot call equals on {self.operator}") + + return self + + + def exists( + self, + type:ClauseType, + path:str, + **kwargs:Any + )->Self: + """Adds an exists clause to the top-level Compound operator.""" + + if isinstance(self.operator, Compound): + self.operator.exists( + type=type, + path=path + ) + elif self.collector and isinstance(self.collector.operator, Compound): + self.collector.operator.exists( + type=type, + path=path + ) + else: + raise TypeError(f"Cannot call exists on {self.operator}") + + return self + + + def more_like_this( + self, + type:ClauseType, + like:dict|list[dict], + **kwargs:Any + )->Self: + """Adds a more_like_this clause to the top-level Compound operator.""" + + if isinstance(self.operator, Compound): + self.operator.more_like_this( + type, + like=like + ) + elif self.collector and isinstance(self.collector.operator, Compound): + self.collector.operator.more_like_this( + type, + like=like + ) + else: + raise TypeError(f"Cannot call more_like_this on {self.operator}") + + return self + + + def range( + self, + type:ClauseType, + *, + path:str|list[str], + gt:int|float|datetime|None=None, + lt:int|float|datetime|None=None, + gte:int|float|datetime|None=None, + lte:int|float|datetime|None=None, + score:dict|None=None, + **kwargs:Any + )->Self: + """Adds a range clause to the top-level Compound operator.""" + + if isinstance(self.operator, Compound): + self.operator.range( + type=type, + path=path, + gt=gt, + lt=lt, + gte=gte, + lte=lte, + score=score + ) + elif self.collector and isinstance(self.collector.operator, Compound): + self.collector.operator.range( + type=type, + path=path, + gt=gt, + lt=lt, + gte=gte, + lte=lte, + score=score + ) + else: + raise TypeError(f"Cannot call range on {self.operator}") + + return self + + + def regex( + self, + type:ClauseType, + *, + query:str|list[str], + path:str|list[str], + allow_analyzed_field:bool=False, + score:dict|None=None, + **kwargs:Any + )->Self: + """Adds a regex clause to the top-level Compound operator.""" + + if isinstance(self.operator, Compound): + self.operator.regex( + type=type, + query=query, + path=path, + allow_analyzed_field=allow_analyzed_field, + score=score + ) + elif self.collector and isinstance(self.collector.operator, Compound): + self.collector.operator.regex( + type=type, + query=query, + path=path, + allow_analyzed_field=allow_analyzed_field, + score=score + ) + else: + raise TypeError(f"Cannot call regex on {self.operator}") + + return self + + + def text( + self, + type:ClauseType, + *, + query:str|list[str], + path:str|list[str], + fuzzy:FuzzyOptions|None=None, + score:dict|None=None, + synonyms:str|None=None, + **kwargs:Any + )->Self: + """Adds a text clause to the top-level Compound operator.""" + + if isinstance(self.operator, Compound): + self.operator.text( + type=type, + query=query, + path=path, + fuzzy=fuzzy, + score=score, + synonyms=synonyms + ) + elif self.collector and isinstance(self.collector.operator, Compound): + self.collector.operator.text( + type=type, + query=query, + path=path, + fuzzy=fuzzy, + score=score, + synonyms=synonyms + ) + else: + raise TypeError(f"Cannot call text on {self.operator}") + + return self + + + def wildcard( + self, + type:ClauseType, + *, + query:str|list[str], + path:str|list[str], + allow_analyzed_field:bool=False, + score:dict|None=None, + **kwargs:Any + )->Self: + """Adds a wildcard clause to the top-level Compound operator.""" + + if isinstance(self.operator, Compound): + self.operator.wildcard( + type=type, + query=query, + path=path, + allow_analyzed_field=allow_analyzed_field, + score=score + ) + elif self.collector and isinstance(self.collector.operator, Compound): + self.collector.operator.wildcard( + type=type, + query=query, + path=path, + allow_analyzed_field=allow_analyzed_field, + score=score + ) + else: + raise TypeError(f"Cannot call wildcard on {self.operator}") + + return self + + + def set_minimum_should_match(self, minimum_should_match:int)->Self: + """Sets minimum_should_match on top-level Compound operator.""" + + if isinstance(self.operator, Compound): + self.operator.minimum_should_match = minimum_should_match + elif self.collector and isinstance(self.collector.operator, Compound): + self.collector.operator.minimum_should_match = minimum_should_match + else: + raise TypeError(f"Cannot call set_minimum_should_match on {self.operator}") + + return self + + #----------------------------------------- + # Nested Compound Search + #----------------------------------------- + def compound(self, + type:ClauseType, + must:list[AnyOperator]=[], + must_not:list[AnyOperator]=[], + should:list[AnyOperator]=[], + filter:list[AnyOperator]=[], + minimum_should_match:int=0, + **kwargs:Any + )->Compound: + """Adds a Compound clause to the top-level Compound operator. + + WARNING: Unlike other operators methods, this method returns the newly created compound nested clause + # rather than the self instance. + """ + + if isinstance(self.operator, Compound): + _coumpound = self.operator.compound( + type=type, + must=must, + must_not=must_not, + should=should, + filter=filter, + minimum_should_match=minimum_should_match + ) + else: + raise TypeError(f"Cannot call compound on {self.operator}") + + return _coumpound + + + #----------------------------------------- + # By Types + #----------------------------------------- + def must( + self, + operator_name:OperatorLiteral, + path:str|list[str]|None=None, + query:str|list[str]|None=None, + fuzzy:FuzzyOptions|None=None, + score:dict|None=None, + **kwargs + )->Self: + + if isinstance(self.operator, Compound): + kwargs.update( + { + "path":path, + "query":query, + "fuzzy":fuzzy, + "score":score + } + ) + else: + raise TypeError(f"Cannot call must on {self.operator}") + + return self.__get_operators_map__(operator_name)("must", **kwargs) + + + def should( + self, + operator_name:OperatorLiteral, + path:str|list[str]|None=None, + query:str|list[str]|None=None, + fuzzy:FuzzyOptions|None=None, + score:dict|None=None, + **kwargs + )->Self: + + if isinstance(self.operator, Compound): + kwargs.update( + { + "path":path, + "query":query, + "fuzzy":fuzzy, + "score":score + } + ) + else: + raise TypeError(f"Cannot call should on {self.operator}") + + return self.__get_operators_map__(operator_name)("should", **kwargs) + + + def must_not( + self, + operator_name:OperatorLiteral, + path:str|list[str]|None=None, + query:str|list[str]|None=None, + fuzzy:FuzzyOptions|None=None, + score:dict|None=None, + **kwargs + )->Self: + + if isinstance(self.operator, Compound): + kwargs.update( + { + "path":path, + "query":query, + "fuzzy":fuzzy, + "score":score + } + ) + else: + raise TypeError(f"Cannot call must_not on {self.operator}") + + return self.__get_operators_map__(operator_name)("mustNot", **kwargs) + + + def filter( + self, + operator_name:OperatorLiteral, + path:str|list[str]|None=None, + query:str|list[str]|None=None, + fuzzy:FuzzyOptions|None=None, + score:dict|None=None, + **kwargs + )->Self: + + if isinstance(self.operator, Compound): + kwargs.update( + { + "path":path, + "query":query, + "fuzzy":fuzzy, + "score":score + } + ) + else: + raise TypeError(f"Cannot call filter on {self.operator}") + + return self.__get_operators_map__(operator_name)("filter", **kwargs) + + #----------------------------------------- + # Faceted Search Pipelinenized functions + #----------------------------------------- + def facet( + self, + path:str, + name:str|None=None, + *, + type:Literal["string", "number", "date"] = "string", + num_buckets:int|None=None, + boundaries:list[int|float]|list[datetime]|None=None, + default:str|None=None, + **kwargs + )->Self: + + if isinstance(self.collector, Facet): + self.collector.facet( + path=path, + name=name, + type=type, + num_buckets=num_buckets, + boundaries=boundaries, + default=default + ) + else: + raise TypeError(f"Cannot call facet on {self.operator}") + + return self + + def numeric( + self, + path:str, + name:str|None=None, + *, + boundaries:list[int|float]|None=None, + default:str|None=None, + **kwargs + )->Self: + + if isinstance(self.collector, Facet): + self.collector.numeric( + path=path, + name=name, + boundaries=boundaries, + default=default + ) + else: + raise TypeError(f"Cannot call numeric on {self.operator}") + + return self + + def date( + self, + path:str, + name:str|None=None, + *, + boundaries:list[datetime]|None=None, + default:str|None=None, + **kwargs + )->Self: + + if isinstance(self.collector, Facet): + self.collector.date( + path=path, + name=name, + boundaries=boundaries, + default=default + ) + else: + raise TypeError(f"Cannot call date on {self.operator}") + + return self + + def string( + self, + path:str, + name:str|None=None, + *, + default:str|None=None, + **kwargs + )->Self: + + if isinstance(self.collector, Facet): + self.collector.string( + path=path, + name=name, + default=default + ) + else: + raise TypeError(f"Cannot call string on {self.operator}") + + return self + #----------------------------------------- + # Utility functions + #----------------------------------------- + @classmethod + def __get_constructors_map__(cls, operator_name:str)->Callable[...,Self]: + """Returns appropriate constructor from operator name""" + + _constructors_map = { + "autocomplete":cls.init_autocomplete, + "compound":cls.init_compound, + "equals":cls.init_equals, + "exists":cls.init_exists, + #"facet":cls.init_facet, + "more_like_this":cls.init_more_like_this, + "range":cls.init_range, + "regex":cls.init_regex, + "text":cls.init_text, + "wildcard":cls.init_wildcard + } + + return _constructors_map[operator_name] + + + + def __get_operators_map__(self, operator_name:OperatorLiteral)->Callable[...,Self]: + """Returns the operator class associated with the given operator name.""" + + operators_map = { + "autocomplete":self.autocomplete, + "compound":self.compound, #FIXME : This breaks typing + "equals":self.equals, + "exists":self.exists, + "range":self.range, + "more_like_this":self.more_like_this, + "regex":self.regex, + "text":self.text, + "wildcard":self.wildcard + } + + return operators_map[operator_name] + + + @classmethod + def __reduce_kwargs(cls, kwargs:dict)->None: + """ + Parses kwargs arguments to avoid passing arguments twice + + In particular removes SearchBase arguments from kwargs: + - index, + - count, + - highlight, + - return_stored_source, + - score_details + + """ + + kwargs.pop("index", None) + kwargs.pop("count", None) + kwargs.pop("highlight", None) + kwargs.pop("return_stored_source", None) + kwargs.pop("score_details", None) diff --git a/monggregate/stages/search/search.py b/monggregate/stages/search/search.py new file mode 100644 index 0000000..fdcb918 --- /dev/null +++ b/monggregate/stages/search/search.py @@ -0,0 +1,162 @@ +"""Module definining an interface to MongoDB $search stage operation in aggregation pipeline. + +Online MongoDB documentation: +-------------------------------------------------------------------------------------------------- + +Last Updated (in this package) : 25/04/2023 +Source : https://www.mongodb.com/docs/atlas/atlas-search/query-syntax/#mongodb-pipeline-pipe.-search + +# Definition +#--------------------------- +The $search stage performs a full-text search on the specified field or fields which must be covered by an Atlas Search index. + +$search +A $search pipeline stage has the following prototype form: + + >>> { + $search: { + "index": "", + ""|"": { + | + }, + "highlight": { + + }, + "count": { + + }, + "returnStoredSource": true | false + } + } + +# Fields +#--------------------------- + +The $search stage takes a document with the following fields + +Field Type Necessity Description + + document Conditional Name of the collector to use with the query. + You can provide a document that contains the collector-specific options as the value for this field. + Either this or is required. +count document Optional Document that specifies the count options for retrieving a count of the results. + To learn more, see Count Atlas Search Results. +highlight document Optional Document that specifies the highlight options for displaying search terms in their original context. +index string Required Name of the Atlas Search index to use. If omitted, defaults to default + document Conditional Name of the operator to search with. + You can provide a document that contains the operator-specific options as the value for this field. + Either this or is required. +returnStoredSource boolean Optional Flag that specifies whether to perform a full document lookup on the backend database or return only stored source fields directly from Atlas Search. + If omitted, defaults to false. To learn more, see Return Stored Source pyd.Fields. + +# Behavior +#--------------------------- +$search must be the first stage of any pipeline it appears in. +$search cannot be used in: + + * a view definition + + * a $facet pipeline stage + +# Aggregation Variable +#--------------------------- +$search returns only the results of your query. The metadata results of your +$search query are saved in the $$SEARCH_META aggregation variable. You can use the $$SEARCH_META variable to view the metadata results for your +$search query. The $$SEARCH_META aggregation variable can be used anywhere after a +$search stage in any pipeline, but it can't be used after the $lookup or $unionWith stage in any pipeline. +The $$SEARCH_META aggregation variable can't be used in any subsequent stage after a $searchMeta stage. + +""" + +from datetime import datetime +from typing import Any, Callable, Literal +try: + from typing import Self +except ImportError: + from typing_extensions import Self + +from monggregate.base import pyd +from monggregate.stages.stage import Stage +from monggregate.stages.search.base import SearchConfig, SearchBase +from monggregate.search.collectors import Facet, Facets +from monggregate.search.operators import( + Autocomplete, + Compound, + Equals, + Exists, + MoreLikeThis, + Range, + Regex, + Text, + Wildcard, + AnyOperator +) +from monggregate.search.operators.compound import ClauseType +from monggregate.search.commons import FuzzyOptions + + +# Classes +# ----------------------------------------------------- +class Search(SearchBase): + """"Creates a $search stage to perform a full-text search on the specified field or fields which must be covered by an Atlas Search index. + + Descrtiption + ----------------------- + The $search stage performs a full-text search on the specified field or fields + which must be covered by an Atlas Search index. + + Attributes: + ----------------------- + - index, str : name of the Atlas Search index to use. Defaults to default. + + - count, CountOptions|None : Document that specifies the count options for retrieving a count + of the results. + + - highlight, dict|None : Document that specifies the highlight options for displaying + search terms in their original context. + + - return_stored_source, bool : Flag that specifies whether to perform a full document lookup + on the backend database (mongod) or return only stored source fields + directly from Atlas Search. Defaults to false. + + - score_details, bool : Flag that specifies whether to retrieve a detailed breakdown of + the score for the documents in the results. Defaults to false + To view the details, you must use the $meta expression in the + $project stage. + + - , dict|None : Name of the operator to search with. You can provide a document + that contains the operator-specific options as the value for this field + Either this or is required. + + - , dict|None : Name of the collector to use with the query. You can provide + a document that contains the collector-specific options as the value + for this field. Either this or is required. + + """ + + @property + def statement(self) -> dict[str, dict]: + + config = { + "index":self.index, + "highlight":self.highlight, + "count":self.count, + "returnStoredSource":self.return_stored_source, + "scoreDetails":self.score_details + } + + method = self.collector or self.operator + + config.update(method.statement) + + _statement = { + "$search":config + } + + return self.resolve(_statement) + + + +if __name__ == "__main__": + search = Search(index="fruits") + search.__get_operators_map__("autocomplete")() \ No newline at end of file diff --git a/monggregate/stages/search_meta.py b/monggregate/stages/search/search_meta.py similarity index 96% rename from monggregate/stages/search_meta.py rename to monggregate/stages/search/search_meta.py index 45cb10a..63139dc 100644 --- a/monggregate/stages/search_meta.py +++ b/monggregate/stages/search/search_meta.py @@ -63,10 +63,10 @@ """ -from monggregate.stages.search import Search +from monggregate.stages.search.base import SearchConfig, SearchBase -class SearchMeta(Search): +class SearchMeta(SearchBase): """ Creates a $searchMeta statement in an aggregation pipeline @@ -115,9 +115,10 @@ def statement(self) -> dict[str, dict]: "scoreDetails":self.score_details } - method:dict[str, dict] = self.collector or self.operator + method = self.collector or self.operator - config.update(method) + + config.update(method.statement) _statement = { "$searchMeta":config diff --git a/pyproject.toml b/pyproject.toml index 5779cb9..e54187f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta" [project] name = "monggregate" -version = "0.17.0" +version = "0.18.0" description = "MongoDB aggregation pipelines made easy. Joins, grouping, counting and much more..." readme = "README.md" authors = [{ name = "Vianney Mixtur", email = "vianney.mixtur@outlook.fr" }] @@ -39,7 +39,7 @@ Homepage = "https://github.com/VianneyMI/monggregate" documentation = "https://vianneymi.github.io/monggregate/" [tool.bumpver] -current_version = "0.17.0" +current_version = "0.18.0" version_pattern = "MAJOR.MINOR.PATCH" commit_message = "bump version {old_version} -> {new_version}" commit = true diff --git a/requirements/linting.txt b/requirements/linting.txt index 4dd1c3c..7981352 100644 --- a/requirements/linting.txt +++ b/requirements/linting.txt @@ -2,4 +2,4 @@ # (linters and static type checkers) # Those are used for monggregate contributors -mypy==0.971 +mypy==1.6.1 diff --git a/tests/test_docstrings.py b/tests/test_docstrings.py index 1ca5283..2d9aa88 100644 --- a/tests/test_docstrings.py +++ b/tests/test_docstrings.py @@ -65,6 +65,11 @@ def test_sync_docstrings()->None: # -------------------------------------- stages_members = stages.__dict__ # mapping between member name and members of the package # which can be functions, variables or classes + stages_members.pop("AnyStage") + stages_members.pop("Union") + stages_members.pop("SearchStageMap") + + print(stages_members) # Filtering non-classes stages members diff --git a/tests/test_search_operators.py b/tests/test_search_operators.py index 50452b1..11ca8b3 100644 --- a/tests/test_search_operators.py +++ b/tests/test_search_operators.py @@ -11,7 +11,7 @@ Range, Regex, Text, - Wilcard + Wildcard ) from monggregate.search.commons.fuzzy import FuzzyOptions from monggregate.search.commons.highlight import HighlightOptions @@ -223,7 +223,7 @@ def test_text(self)->None: def test_wilcard(self)->None: """Tests the wilcard operator""" - wilcard_op = Wilcard( + wilcard_op = Wildcard( path = "title", query = "test" ) diff --git a/tests/test_stages.py b/tests/test_stages.py index d1d371f..0a610a0 100644 --- a/tests/test_stages.py +++ b/tests/test_stages.py @@ -363,11 +363,10 @@ def test_search(self, state:State)->None: search = Search( operator={ - "text":{ "query":"test", "path":"description" } - } + ) state["search"] = search assert search @@ -376,8 +375,9 @@ def test_search(self, state:State)->None: search = Search.from_operator(operator_name="more_like_this", like={}) assert search - with pytest.raises(pyd.ValidationError): - Search() + # NOTE : Commented the below, as it does not raise anymore but instantiate search with a Compound operator. + # with pytest.raises(pyd.ValidationError): + # Search() def test_set(self, state:State)->None: @@ -746,19 +746,19 @@ def test_unwind_statement(self, state:State)->None: # Debugging: #------------------------- if __name__ == "__main__": - TestStages().test_stage() - TestStages().test_bucket_auto({}) - TestStages().test_bucket({}) - TestStages().test_count({}) - TestStages().test_group({}) - TestStages().test_limit({}) - TestStages().test_match({}) - TestStages().test_out({}) - TestStages().test_project({}) - TestStages().test_replace_root({}) - TestStages().test_sample({}) - TestStages().test_set({}) + # TestStages().test_stage() + # TestStages().test_bucket_auto({}) + # TestStages().test_bucket({}) + # TestStages().test_count({}) + # TestStages().test_group({}) + # TestStages().test_limit({}) + # TestStages().test_match({}) + # TestStages().test_out({}) + # TestStages().test_project({}) + # TestStages().test_replace_root({}) + # TestStages().test_sample({}) + # TestStages().test_set({}) TestStages().test_search({}) - TestStages().test_skip({}) - TestStages().test_sort_by_count({}) - TestStages().test_sort({}) + # TestStages().test_skip({}) + # TestStages().test_sort_by_count({}) + # TestStages().test_sort({}) diff --git a/tests/tests_search/test_compound_examples.py b/tests/tests_search/test_compound_examples.py new file mode 100644 index 0000000..105e51c --- /dev/null +++ b/tests/tests_search/test_compound_examples.py @@ -0,0 +1,286 @@ +"""Module to test compound examples. + +As presented here : https://www.mongodb.com/docs/atlas/atlas-search/compound/ +""" + +from monggregate.pipeline import Pipeline, Search, SearchMeta, Compound, Facet +from monggregate.search.collectors.facet import NumericFacet, StringFacet, DateFacet + +def test_must_and_must_not()->None: + """Tests the must and must_not example.""" + + + expected_statement = { + "$search": { + "index": "fruits", + "highlight": None, + "count": None, + "returnStoredSource": False, + "scoreDetails": False, + "compound": { + "must": [{ + "text": { + "query": "varieties", + "path": "description" + } + }], + "mustNot": [{ + "text": { + "query": "apples", + "path": "description" + } + }] + } + } + } + + + pipeline = Pipeline() + pipeline.search( + index="fruits", + operator_name="compound" + ).search( + clause_type="must", + query="varieties", + path="description" + ).search( + clause_type="mustNot", + query="apples", + path="description" + ) + + assert pipeline.export()[0] == expected_statement, pipeline.export()[0] + +def test_must_and_should()->None: + """Test must and should clauses.""" + + + expected_statement = { + "$search": { + "index": "fruits", + "highlight": None, + "count": None, + "returnStoredSource": False, + "scoreDetails": False, + "compound": { + "must": [{ + "text": { + "query": "varieties", + "path": "description" + } + }], + "should": [{ + "text": { + "query": "Fuji", + "path": "description" + } + }], + 'minimumShouldMatch': 0 + } + } + } + + pipeline = Pipeline() + + pipeline.search( + index="fruits", + operator_name="compound" + ).search( + clause_type="must", + query="varieties", + path="description" + ).search( + clause_type="should", + query="Fuji", + path="description" + ) + + assert pipeline.export()[0] == expected_statement, pipeline.export()[0] + +def test_minimum_should_match()->None: + """Test minimum should match clause.""" + + + expected_statement = { + "$search": { + "index": "fruits", + "highlight": None, + "count": None, + "returnStoredSource": False, + "scoreDetails": False, + "compound": { + "must": [{ + "text": { + "query": "varieties", + "path": "description" + } + }], + "should": [ + { + "text": { + "query": "Fuji", + "path": "description" + } + }, + { + "text": { + "query": "Golden Delicious", + "path": "description" + } + }], + "minimumShouldMatch": 1 + } + } + } + + pipeline = Pipeline() + + pipeline.search( + index="fruits", + operator_name="compound", + minimum_should_match=1 + ).search( + path="description", + query="varieties", + clause_type="must" + ).search( + path="description", + query="Fuji" + ).search( + path="description", + query="Golden Delicious", + ) + + assert pipeline.export()[0] == expected_statement, pipeline.export()[0] + +def test_filter()->None: + """Test filter examples.""" + + expected_statement = { + "$search": { + "index": "fruits", + "highlight": None, + "count": None, + "returnStoredSource": False, + "scoreDetails": False, + "compound": { + "must": [{ + "text": { + "query": "varieties", + "path": "description" + } + }], + "should": [{ + "text": { + "query": "banana", + "path": "description" + } + }], + "filter": [{ + "text": { + "query": "granny", + "path": "description" + }, + + }], + "minimumShouldMatch": 0 + } + } + } + + + pipeline = Pipeline() + + pipeline.search( + index="fruits", + operator_name="compound" + ).search( + clause_type="must", + path="description", + query="varieties" + ).search( + clause_type="should", + path="description", + query="banana" + ).search( + clause_type="filter", + path="description", + query="granny" + ) + + assert pipeline.export()[0] == expected_statement, pipeline.export()[0] + + +def test_nested()->None: + """Test nested examples.""" + + expected_statement = { + "$search": { + "index": "fruits", + "highlight": None, + "count": None, + "returnStoredSource": False, + "scoreDetails": False, + "compound": { + "should": [ + { + "text": { + "query": "apple", + "path": "type" + } + }, + { + "compound": { + "must": [ + { + "text": { + "query": "organic", + "path": "category" + } + }, + { + "equals": { + "value": True, + "path": "in_stock", + "score":None # TODO: Investigate this + } + } + ] + } + } + ], + "minimumShouldMatch": 1 + } + } + } + + + + pipeline = Pipeline() + + pipeline.search( + index="fruits", + operator_name="compound", + minimum_should_match=1 + ).search( + clause_type="should", + path="type", + query="apple" + ).search( + clause_type="should", + operator_name="compound", + must=[ + Search.Text(query="organic", path="category"), + Search.Equals(path="in_stock", value=True) + ] + ) + + assert pipeline.export()[0] == expected_statement, pipeline.export()[0] + + +if __name__ == "__main__": + test_must_and_must_not() + test_must_and_should() + test_minimum_should_match() + test_filter() + test_nested() diff --git a/tests/tests_search/test_facet_examples.py b/tests/tests_search/test_facet_examples.py new file mode 100644 index 0000000..d750f4c --- /dev/null +++ b/tests/tests_search/test_facet_examples.py @@ -0,0 +1,65 @@ +"""Module to test compound examples. + +As presented here : https://www.mongodb.com/docs/atlas/atlas-search/compound/ +""" +from datetime import datetime +from monggregate.pipeline import Pipeline, Search, SearchMeta, Compound, Facet +from monggregate.search.collectors.facet import NumericFacet, StringFacet, DateFacet + +def test_facet(): + """Test facet example.""" + + expected_statement = { + "$searchMeta": { + "index": "movies", + "highlight": None, + "count": None, + "returnStoredSource": False, + "scoreDetails": False, + "facet": { + "operator": { + "range": { + "path": "released", + "gte": datetime(year=2000, month=1, day=1), + "lte": datetime(year=2015, month=1, day=31), + "score":None + } + }, + "facets": { + "directorsFacet": { + "type": "string", + "path": "directors", + "numBuckets" : 7 + }, + "yearFacet" : { + "type" : "number", + "path" : "year", + "boundaries" : [2000,2005,2010, 2015], + "default":None + } + } + } + } + } + + pipeline = Pipeline() + + pipeline.search_meta( + index="movies", + collector_name="facet", + operator=Search.Range( + path="released", + gte=datetime(year=2000, month=1, day=1), + lte=datetime(year=2015, month=1, day=31) + ), + facets=[ + StringFacet(name="directorsFacet", path="directors", num_buckets=7), + NumericFacet(name="yearFacet", path="year", boundaries=[2000, 2005, 2010, 2015]), + ] + +) + assert pipeline.export()[0] == expected_statement, pipeline.export()[0] + + +if __name__ =="__main__": + test_facet() \ No newline at end of file