From 7e86c04d01c585326ab6e443941ed15cf9ffca8f Mon Sep 17 00:00:00 2001 From: Miguel Grinberg Date: Thu, 7 Dec 2023 17:34:59 +0000 Subject: [PATCH] hybrid bm25/ELSER search --- .../search-tutorial/v3/search-tutorial/app.py | 64 +++++++++++++++---- .../v3/search-tutorial/search.py | 15 ++++- 2 files changed, 63 insertions(+), 16 deletions(-) diff --git a/example-apps/search-tutorial/v3/search-tutorial/app.py b/example-apps/search-tutorial/v3/search-tutorial/app.py index 2d9737e6..f70d0194 100644 --- a/example-apps/search-tutorial/v3/search-tutorial/app.py +++ b/example-apps/search-tutorial/v3/search-tutorial/app.py @@ -17,22 +17,58 @@ def handle_search(): filters, parsed_query = extract_filters(query) from_ = request.form.get('from_', type=int, default=0) - results = es.search( - query={ - 'bool': { - 'must': [ - { - 'text_expansion': { - 'elser_embedding': { - 'model_id': '.elser_model_2', - 'model_text': parsed_query, - } - }, + if parsed_query: + search_query = { + 'sub_searches': [ + { + 'query': { + 'bool': { + 'must': { + 'multi_match': { + 'query': parsed_query, + 'fields': ['name', 'summary', 'content'], + } + }, + **filters + } } - ], - **filters, + }, + { + 'query': { + 'bool': { + 'must': [ + { + 'text_expansion': { + 'elser_embedding': { + 'model_id': '.elser_model_2', + 'model_text': parsed_query, + } + }, + } + ], + **filters, + } + }, + }, + ], + 'rank': { + 'rrf': {} + }, + } + else: + search_query = { + 'query': { + 'bool': { + 'must': { + 'match_all': {} + }, + **filters + } } - }, + } + + results = es.search( + **search_query, size=5, from_=from_, ) diff --git a/example-apps/search-tutorial/v3/search-tutorial/search.py b/example-apps/search-tutorial/v3/search-tutorial/search.py index 072ce50b..ac883580 100644 --- a/example-apps/search-tutorial/v3/search-tutorial/search.py +++ b/example-apps/search-tutorial/v3/search-tutorial/search.py @@ -66,7 +66,18 @@ def reindex(self): return self.insert_documents(documents) def search(self, **query_args): - return self.es.search(index='my_documents', **query_args) + # sub_searches is not currently supported in the client, so we send + # search requests as raw requests + if 'from_' in query_args: + query_args['from'] = query_args['from_'] + del query_args['from_'] + return self.es.perform_request( + 'GET', + f'/my_documents/_search', + body=json.dumps(query_args), + headers={'Content-Type': 'application/json', + 'Accept': 'application/json'}, + ) def retrieve_document(self, id): return self.es.get(index='my_documents', id=id) @@ -75,7 +86,7 @@ def deploy_elser(self): # download ELSER v2 self.es.ml.put_trained_model(model_id='.elser_model_2', input={'field_names': ['text_field']}) - + # wait until ready while True: status = self.es.ml.get_trained_models(model_id='.elser_model_2',