From bb368c2cdb24ca1f76bc1a59903535bb45a2371d Mon Sep 17 00:00:00 2001 From: Miguel Grinberg Date: Mon, 8 Sep 2025 10:12:24 +0100 Subject: [PATCH] Mock sentence-transformers and nltk in tests (#3059) * Mock sentence-transformers and nltk in tests * Update test_elasticsearch/test_dsl/conftest.py Co-authored-by: Quentin Pradet * switch to a local mock that only affects the one test --------- Co-authored-by: Quentin Pradet (cherry picked from commit e05d7f1fd9750605d2c805989b3b37779618e8ae) --- pyproject.toml | 2 -- .../test_examples/_async/test_vectors.py | 31 +++++++++++++------ .../test_examples/_sync/test_vectors.py | 31 +++++++++++++------ 3 files changed, 44 insertions(+), 20 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 0c66e2f50..26a22ea96 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -78,8 +78,6 @@ dev = [ "pandas", "mapbox-vector-tile", "jinja2", - "nltk", - "sentence_transformers", "tqdm", "mypy", "pyright", diff --git a/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_vectors.py b/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_vectors.py index dc45ceb52..3af9a877f 100644 --- a/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_vectors.py +++ b/test_elasticsearch/test_dsl/test_integration/test_examples/_async/test_vectors.py @@ -15,27 +15,27 @@ # specific language governing permissions and limitations # under the License. +import sys from hashlib import md5 from typing import Any, List, Tuple from unittest import SkipTest +from unittest.mock import Mock, patch import pytest from elasticsearch import AsyncElasticsearch -from ..async_examples import vectors - @pytest.mark.asyncio async def test_vector_search( - async_write_client: AsyncElasticsearch, es_version: Tuple[int, ...], mocker: Any + async_write_client: AsyncElasticsearch, es_version: Tuple[int, ...] ) -> None: # this test only runs on Elasticsearch >= 8.11 because the example uses # a dense vector without specifying an explicit size if es_version < (8, 11): raise SkipTest("This test requires Elasticsearch 8.11 or newer") - class MockModel: + class MockSentenceTransformer: def __init__(self, model: Any): pass @@ -44,9 +44,22 @@ def encode(self, text: str) -> List[float]: total = sum(vector) return [float(v) / total for v in vector] - mocker.patch.object(vectors, "SentenceTransformer", new=MockModel) + def mock_nltk_tokenize(content: str): + return content.split("\n") + + # mock sentence_transformers and nltk, because they are quite big and + # irrelevant for testing the example logic + with patch.dict( + sys.modules, + { + "sentence_transformers": Mock(SentenceTransformer=MockSentenceTransformer), + "nltk": Mock(sent_tokenize=mock_nltk_tokenize), + }, + ): + # import the example after the dependencies are mocked + from ..async_examples import vectors - await vectors.create() - await vectors.WorkplaceDoc._index.refresh() - results = await (await vectors.search("Welcome to our team!")).execute() - assert results[0].name == "New Employee Onboarding Guide" + await vectors.create() + await vectors.WorkplaceDoc._index.refresh() + results = await (await vectors.search("Welcome to our team!")).execute() + assert results[0].name == "Intellectual Property Policy" diff --git a/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_vectors.py b/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_vectors.py index 4b14c89a0..e8e61b5a0 100644 --- a/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_vectors.py +++ b/test_elasticsearch/test_dsl/test_integration/test_examples/_sync/test_vectors.py @@ -15,27 +15,27 @@ # specific language governing permissions and limitations # under the License. +import sys from hashlib import md5 from typing import Any, List, Tuple from unittest import SkipTest +from unittest.mock import Mock, patch import pytest from elasticsearch import Elasticsearch -from ..examples import vectors - @pytest.mark.sync def test_vector_search( - write_client: Elasticsearch, es_version: Tuple[int, ...], mocker: Any + write_client: Elasticsearch, es_version: Tuple[int, ...] ) -> None: # this test only runs on Elasticsearch >= 8.11 because the example uses # a dense vector without specifying an explicit size if es_version < (8, 11): raise SkipTest("This test requires Elasticsearch 8.11 or newer") - class MockModel: + class MockSentenceTransformer: def __init__(self, model: Any): pass @@ -44,9 +44,22 @@ def encode(self, text: str) -> List[float]: total = sum(vector) return [float(v) / total for v in vector] - mocker.patch.object(vectors, "SentenceTransformer", new=MockModel) + def mock_nltk_tokenize(content: str): + return content.split("\n") + + # mock sentence_transformers and nltk, because they are quite big and + # irrelevant for testing the example logic + with patch.dict( + sys.modules, + { + "sentence_transformers": Mock(SentenceTransformer=MockSentenceTransformer), + "nltk": Mock(sent_tokenize=mock_nltk_tokenize), + }, + ): + # import the example after the dependencies are mocked + from ..examples import vectors - vectors.create() - vectors.WorkplaceDoc._index.refresh() - results = (vectors.search("Welcome to our team!")).execute() - assert results[0].name == "New Employee Onboarding Guide" + vectors.create() + vectors.WorkplaceDoc._index.refresh() + results = (vectors.search("Welcome to our team!")).execute() + assert results[0].name == "Intellectual Property Policy"