diff --git a/libs/arangodb/tests/integration_tests/vectorstores/test_arangodb_vector.py b/libs/arangodb/tests/integration_tests/vectorstores/test_arangodb_vector.py index c6a81f4..0884e75 100644 --- a/libs/arangodb/tests/integration_tests/vectorstores/test_arangodb_vector.py +++ b/libs/arangodb/tests/integration_tests/vectorstores/test_arangodb_vector.py @@ -8,7 +8,7 @@ from arango.cursor import Cursor from langchain_core.documents import Document -from langchain_arangodb.vectorstores.arangodb_vector import ArangoVector +from langchain_arangodb.vectorstores.arangodb_vector import ArangoVector, SearchType from langchain_arangodb.vectorstores.utils import DistanceStrategy from tests.integration_tests.utils import ArangoCredentials @@ -1062,3 +1062,370 @@ def test_arangovector_from_existing_collection( assert len(docs) == 2 assert any(doc.id == "doc1" for doc in docs) assert any(doc.id == "doc3" for doc in docs) + + +@pytest.mark.usefixtures("clear_arangodb_database") +def test_arangovector_hybrid_search_functionality( + arangodb_credentials: ArangoCredentials, + fake_embedding_function: FakeEmbeddings, +) -> None: + """Test hybrid search functionality comparing vector vs hybrid search results.""" + client = ArangoClient(hosts=arangodb_credentials["url"]) + db = client.db( + username=arangodb_credentials["username"], + password=arangodb_credentials["password"], + ) + + # Example texts for hybrid search testing + texts = [ + "The government passed new data privacy laws affecting social media " + "companies like Meta and Twitter.", + "A new smartphone from Samsung features cutting-edge AI and a focus " + "on secure user data.", + "Meta introduces Llama 3, a state-of-the-art language model to " + "compete with OpenAI's GPT-4.", + "How to enable two-factor authentication on Facebook for better " + "account protection.", + "A study on data privacy perceptions among Gen Z social media users " + "reveals concerns over targeted advertising.", + ] + + metadatas = [ + {"source": "news", "topic": "privacy"}, + {"source": "tech", "topic": "mobile"}, + {"source": "ai", "topic": "llm"}, + {"source": "guide", "topic": "security"}, + {"source": "research", "topic": "privacy"}, + ] + + # Create vector store with hybrid search enabled + vector_store = ArangoVector.from_texts( + texts=texts, + embedding=fake_embedding_function, + metadatas=metadatas, + database=db, + collection_name="test_hybrid_collection", + search_type=SearchType.HYBRID, + rrf_search_limit=3, # Top 3 RRF Search + overwrite_index=True, + insert_text=True, # Required for hybrid search + ) + + # Create vector and keyword indexes + vector_store.create_vector_index() + vector_store.create_keyword_index() + + query = "AI data privacy" + + # Test vector search + vector_results = vector_store.similarity_search_with_score( + query=query, + k=2, + use_approx=False, + search_type=SearchType.VECTOR, + ) + + # Test hybrid search + hybrid_results = vector_store.similarity_search_with_score( + query=query, + k=2, + use_approx=False, + search_type=SearchType.HYBRID, + ) + + # Test hybrid search with higher vector weight + hybrid_results_with_higher_vector_weight = ( + vector_store.similarity_search_with_score( + query=query, + k=2, + use_approx=False, + search_type=SearchType.HYBRID, + vector_weight=1.0, + keyword_weight=0.01, + ) + ) + + # Verify all searches return expected number of results + assert len(vector_results) == 2 + assert len(hybrid_results) == 2 + assert len(hybrid_results_with_higher_vector_weight) == 2 + + # Verify that all results have scores + for doc, score in vector_results: + assert isinstance(score, float) + assert score >= 0 + + for doc, score in hybrid_results: + assert isinstance(score, float) + assert score >= 0 + + for doc, score in hybrid_results_with_higher_vector_weight: + assert isinstance(score, float) + assert score >= 0 + + # Verify that hybrid search can produce different rankings than vector search + # This tests that the RRF algorithm is working + vector_top_doc = vector_results[0][0].page_content + hybrid_top_doc = hybrid_results[0][0].page_content + + # The results may be the same or different depending on the content, + # but we should be able to verify the search executed successfully + assert vector_top_doc in texts + assert hybrid_top_doc in texts + + +@pytest.mark.usefixtures("clear_arangodb_database") +def test_arangovector_hybrid_search_with_weights( + arangodb_credentials: ArangoCredentials, + fake_embedding_function: FakeEmbeddings, +) -> None: + """Test hybrid search with different vector and keyword weights.""" + client = ArangoClient(hosts=arangodb_credentials["url"]) + db = client.db( + username=arangodb_credentials["username"], + password=arangodb_credentials["password"], + ) + + texts = [ + "machine learning algorithms for data analysis", + "deep learning neural networks", + "artificial intelligence and machine learning", + "data science and analytics", + "computer vision and image processing", + ] + + vector_store = ArangoVector.from_texts( + texts=texts, + embedding=fake_embedding_function, + database=db, + collection_name="test_weights_collection", + search_type=SearchType.HYBRID, + overwrite_index=True, + insert_text=True, + ) + + vector_store.create_vector_index() + vector_store.create_keyword_index() + + query = "machine learning" + + # Test with equal weights + equal_weight_results = vector_store.similarity_search_with_score( + query=query, + k=3, + search_type=SearchType.HYBRID, + vector_weight=1.0, + keyword_weight=1.0, + use_approx=False, + ) + + # Test with vector emphasis + vector_emphasis_results = vector_store.similarity_search_with_score( + query=query, + k=3, + search_type=SearchType.HYBRID, + vector_weight=10.0, + keyword_weight=1.0, + use_approx=False, + ) + + # Test with keyword emphasis + keyword_emphasis_results = vector_store.similarity_search_with_score( + query=query, + k=3, + search_type=SearchType.HYBRID, + vector_weight=1.0, + keyword_weight=10.0, + use_approx=False, + ) + + # Verify all searches return expected number of results + assert len(equal_weight_results) == 3 + assert len(vector_emphasis_results) == 3 + assert len(keyword_emphasis_results) == 3 + + # Verify scores are valid + for results in [ + equal_weight_results, + vector_emphasis_results, + keyword_emphasis_results, + ]: + for doc, score in results: + assert isinstance(score, float) + assert score >= 0 + + +@pytest.mark.usefixtures("clear_arangodb_database") +def test_arangovector_hybrid_search_custom_keyword_search( + arangodb_credentials: ArangoCredentials, + fake_embedding_function: FakeEmbeddings, +) -> None: + """Test hybrid search with custom keyword search clause.""" + client = ArangoClient(hosts=arangodb_credentials["url"]) + db = client.db( + username=arangodb_credentials["username"], + password=arangodb_credentials["password"], + ) + + texts = [ + "Advanced machine learning techniques", + "Basic machine learning concepts", + "Deep learning and neural networks", + "Traditional machine learning algorithms", + "Modern AI and machine learning", + ] + + metadatas = [ + {"level": "advanced", "category": "ml"}, + {"level": "basic", "category": "ml"}, + {"level": "advanced", "category": "dl"}, + {"level": "intermediate", "category": "ml"}, + {"level": "modern", "category": "ai"}, + ] + + vector_store = ArangoVector.from_texts( + texts=texts, + embedding=fake_embedding_function, + metadatas=metadatas, + database=db, + collection_name="test_custom_keyword_collection", + search_type=SearchType.HYBRID, + overwrite_index=True, + insert_text=True, + ) + + vector_store.create_vector_index() + vector_store.create_keyword_index() + + query = "machine learning" + + # Test with default keyword search + default_results = vector_store.similarity_search_with_score( + query=query, + k=3, + search_type=SearchType.HYBRID, + use_approx=False, + ) + + # Test with custom keyword search clause + custom_keyword_clause = f""" + SEARCH ANALYZER( + doc.{vector_store.text_field} IN TOKENS(@query, @analyzer), + @analyzer + ) AND doc.level == "advanced" + """ + + custom_results = vector_store.similarity_search_with_score( + query=query, + k=3, + search_type=SearchType.HYBRID, + keyword_search_clause=custom_keyword_clause, + use_approx=False, + ) + + # Verify both searches return results + assert len(default_results) >= 1 + assert len(custom_results) >= 1 + + +@pytest.mark.usefixtures("clear_arangodb_database") +def test_arangovector_keyword_index_management( + arangodb_credentials: ArangoCredentials, + fake_embedding_function: FakeEmbeddings, +) -> None: + """Test keyword index creation, retrieval, and deletion.""" + client = ArangoClient(hosts=arangodb_credentials["url"]) + db = client.db( + username=arangodb_credentials["username"], + password=arangodb_credentials["password"], + ) + + texts = ["sample text for keyword indexing"] + + vector_store = ArangoVector.from_texts( + texts=texts, + embedding=fake_embedding_function, + database=db, + collection_name="test_keyword_index", + search_type=SearchType.HYBRID, + keyword_index_name="test_keyword_view", + overwrite_index=True, + insert_text=True, + ) + + # Test keyword index creation + vector_store.create_keyword_index() + + # Test keyword index retrieval + keyword_index = vector_store.retrieve_keyword_index() + assert keyword_index is not None + assert keyword_index["name"] == "test_keyword_view" + assert keyword_index["type"] == "arangosearch" + + # Test keyword index deletion + vector_store.delete_keyword_index() + + # Verify index was deleted + deleted_index = vector_store.retrieve_keyword_index() + assert deleted_index is None + + # Test that creating index again works (idempotent) + vector_store.create_keyword_index() + recreated_index = vector_store.retrieve_keyword_index() + assert recreated_index is not None + + +@pytest.mark.usefixtures("clear_arangodb_database") +def test_arangovector_hybrid_search_error_cases( + arangodb_credentials: ArangoCredentials, + fake_embedding_function: FakeEmbeddings, +) -> None: + """Test error cases for hybrid search functionality.""" + client = ArangoClient(hosts=arangodb_credentials["url"]) + db = client.db( + username=arangodb_credentials["username"], + password=arangodb_credentials["password"], + ) + + texts = ["test text for error cases"] + + # Test creating hybrid search without insert_text should work + # but might not give meaningful results + vector_store = ArangoVector.from_texts( + texts=texts, + embedding=fake_embedding_function, + database=db, + collection_name="test_error_collection", + search_type=SearchType.HYBRID, + insert_text=True, # Required for meaningful hybrid search + overwrite_index=True, + ) + + vector_store.create_vector_index() + vector_store.create_keyword_index() + + # Test that search works even with edge case parameters + results = vector_store.similarity_search_with_score( + query="test", + k=1, + search_type=SearchType.HYBRID, + vector_weight=0.0, # Edge case: no vector weight + keyword_weight=1.0, + use_approx=False, + ) + + # Should still return results (keyword-only search) + assert len(results) >= 0 # May return 0 or more results + + # Test with zero keyword weight + results_vector_only = vector_store.similarity_search_with_score( + query="test", + k=1, + search_type=SearchType.HYBRID, + vector_weight=1.0, + keyword_weight=0.0, # Edge case: no keyword weight + use_approx=False, + ) + + # Should still return results (vector-only search) + assert len(results_vector_only) >= 0 # May return 0 or more results diff --git a/libs/arangodb/tests/unit_tests/vectorstores/test_arangodb.py b/libs/arangodb/tests/unit_tests/vectorstores/test_arangodb.py index 197181a..e1ed83a 100644 --- a/libs/arangodb/tests/unit_tests/vectorstores/test_arangodb.py +++ b/libs/arangodb/tests/unit_tests/vectorstores/test_arangodb.py @@ -648,3 +648,535 @@ def test_select_relevance_score_fn_invalid_strategy_raises_error( "Consider providing relevance_score_fn to ArangoVector constructor." ) assert str(exc_info.value) == expected_message + + +def test_init_with_hybrid_search_type(arango_vector_factory: Any) -> None: + """Test initialization with hybrid search type.""" + from langchain_arangodb.vectorstores.arangodb_vector import SearchType + + vector_store = arango_vector_factory(search_type=SearchType.HYBRID) + assert vector_store.search_type == SearchType.HYBRID + + +def test_similarity_search_hybrid(arango_vector_factory: Any) -> None: + """Test similarity search with hybrid search type.""" + from langchain_arangodb.vectorstores.arangodb_vector import SearchType + + vector_store = arango_vector_factory(search_type=SearchType.HYBRID) + + # Mock the embedding.embed_query method + mock_embedding = [0.1] * 64 + vector_store.embedding.embed_query.return_value = mock_embedding + + # Mock the similarity_search_by_vector_and_keyword method + expected_docs = [MagicMock(), MagicMock()] + with patch.object( + vector_store, + "similarity_search_by_vector_and_keyword", + return_value=expected_docs, + ) as mock_hybrid_search: + docs = vector_store.similarity_search( + query="test query", + k=2, + return_fields={"field1", "field2"}, + use_approx=True, + vector_weight=1.0, + keyword_weight=0.5, + ) + + # Verify embed_query was called with query + vector_store.embedding.embed_query.assert_called_once_with("test query") + + # Verify similarity_search_by_vector_and_keyword was called with correct parameters + mock_hybrid_search.assert_called_once_with( + query="test query", + embedding=mock_embedding, + k=2, + return_fields={"field1", "field2"}, + use_approx=True, + filter_clause="", + vector_weight=1.0, + keyword_weight=0.5, + keyword_search_clause="", + ) + + # Verify the correct documents were returned + assert docs == expected_docs + + +def test_similarity_search_with_score_hybrid(arango_vector_factory: Any) -> None: + """Test similarity search with score using hybrid search type.""" + from langchain_arangodb.vectorstores.arangodb_vector import SearchType + + vector_store = arango_vector_factory(search_type=SearchType.HYBRID) + + # Mock the embedding.embed_query method + mock_embedding = [0.1] * 64 + vector_store.embedding.embed_query.return_value = mock_embedding + + # Mock the similarity_search_by_vector_and_keyword_with_score method + expected_results = [(MagicMock(), 0.8), (MagicMock(), 0.6)] + with patch.object( + vector_store, + "similarity_search_by_vector_and_keyword_with_score", + return_value=expected_results, + ) as mock_hybrid_search_with_score: + results = vector_store.similarity_search_with_score( + query="test query", + k=2, + return_fields={"field1", "field2"}, + use_approx=True, + vector_weight=2.0, + keyword_weight=1.5, + keyword_search_clause="custom clause", + ) + query = "test query" + v_store = vector_store + v_store.embedding.embed_query.assert_called_once_with(query) + + # Verify similarity_search_by_vector_and + # _keyword_with_score was called with correct parameters + mock_hybrid_search_with_score.assert_called_once_with( + query="test query", + embedding=mock_embedding, + k=2, + return_fields={"field1", "field2"}, + use_approx=True, + filter_clause="", + vector_weight=2.0, + keyword_weight=1.5, + keyword_search_clause="custom clause", + ) + + # Verify the correct results were returned + assert results == expected_results + + +def test_similarity_search_by_vector_and_keyword(arango_vector_factory: Any) -> None: + """Test similarity_search_by_vector_and_keyword method.""" + vector_store = arango_vector_factory() + + mock_embedding = [0.1] * 64 + expected_docs = [MagicMock(), MagicMock()] + + with patch.object( + vector_store, + "similarity_search_by_vector_and_keyword_with_score", + return_value=[(expected_docs[0], 0.8), (expected_docs[1], 0.6)], + ) as mock_hybrid_search_with_score: + docs = vector_store.similarity_search_by_vector_and_keyword( + query="test query", + embedding=mock_embedding, + k=2, + return_fields={"field1"}, + use_approx=False, + filter_clause="FILTER doc.type == 'test'", + vector_weight=1.5, + keyword_weight=0.8, + keyword_search_clause="custom search", + ) + + # Verify the method was called with correct parameters + mock_hybrid_search_with_score.assert_called_once_with( + query="test query", + embedding=mock_embedding, + k=2, + return_fields={"field1"}, + use_approx=False, + filter_clause="FILTER doc.type == 'test'", + vector_weight=1.5, + keyword_weight=0.8, + keyword_search_clause="custom search", + ) + + # Verify only documents (not scores) were returned + assert docs == expected_docs + + +def test_similarity_search_by_vector_and_keyword_with_score( + arango_vector_factory: Any, +) -> None: + """Test similarity_search_by_vector_and_keyword_with_score method.""" + vector_store = arango_vector_factory() + + mock_embedding = [0.1] * 64 + mock_cursor = MagicMock() + mock_query = "test query" + mock_bind_vars = {"test": "value"} + + # Mock _build_hybrid_search_query + with patch.object( + vector_store, + "_build_hybrid_search_query", + return_value=(mock_query, mock_bind_vars), + ) as mock_build_query: + # Mock database execution + vector_store.db.aql.execute.return_value = mock_cursor + + # Mock _process_search_query + expected_results = [(MagicMock(), 0.9), (MagicMock(), 0.7)] + with patch.object( + vector_store, "_process_search_query", return_value=expected_results + ) as mock_process: + results = vector_store.similarity_search_by_vector_and_keyword_with_score( + query="test query", + embedding=mock_embedding, + k=3, + return_fields={"field1", "field2"}, + use_approx=True, + filter_clause="FILTER doc.active == true", + vector_weight=2.0, + keyword_weight=1.0, + keyword_search_clause="SEARCH doc.content", + ) + + # Verify _build_hybrid_search_query was called with correct parameters + mock_build_query.assert_called_once_with( + query="test query", + k=3, + embedding=mock_embedding, + return_fields={"field1", "field2"}, + use_approx=True, + filter_clause="FILTER doc.active == true", + vector_weight=2.0, + keyword_weight=1.0, + keyword_search_clause="SEARCH doc.content", + ) + + # Verify database query execution + vector_store.db.aql.execute.assert_called_once_with( + mock_query, bind_vars=mock_bind_vars, stream=True + ) + + # Verify _process_search_query was called + mock_process.assert_called_once_with(mock_cursor) + + # Verify results + assert results == expected_results + + +def test_build_hybrid_search_query(arango_vector_factory: Any) -> None: + """Test _build_hybrid_search_query method.""" + vector_store = arango_vector_factory( + collection_name="test_collection", + keyword_index_name="test_view", + keyword_analyzer="text_en", + rrf_constant=60, + rrf_search_limit=100, + text_field="text", + embedding_field="embedding", + ) + + # Mock retrieve_keyword_index to return None (will create index) + with patch.object(vector_store, "retrieve_keyword_index", return_value=None): + with patch.object(vector_store, "create_keyword_index") as mock_create_index: + # Mock retrieve_vector_index to return None + # (will create index for approx search) + with patch.object(vector_store, "retrieve_vector_index", return_value=None): + with patch.object( + vector_store, "create_vector_index" + ) as mock_create_vector_index: + # Mock database version for approx search + vector_store.db.version.return_value = "3.12.5" + + query, bind_vars = vector_store._build_hybrid_search_query( + query="test query", + k=5, + embedding=[0.1] * 64, + return_fields={"field1", "field2"}, + use_approx=True, + filter_clause="FILTER doc.active == true", + vector_weight=1.5, + keyword_weight=2.0, + keyword_search_clause="", + ) + + # Verify indexes were created + mock_create_index.assert_called_once() + mock_create_vector_index.assert_called_once() + + # Verify query string contains expected components + assert "FOR doc IN @@collection" in query + assert "FOR doc IN @@view" in query + assert "SEARCH ANALYZER" in query + assert "BM25(doc)" in query + assert "COLLECT doc_key = result.doc._key INTO group" in query + assert "SUM(group[*].result.score)" in query + assert "SORT rrf_score DESC" in query + + # Verify bind variables + assert bind_vars["@collection"] == "test_collection" + assert bind_vars["@view"] == "test_view" + assert bind_vars["embedding"] == [0.1] * 64 + assert bind_vars["query"] == "test query" + assert bind_vars["analyzer"] == "text_en" + assert bind_vars["rrf_constant"] == 60 + assert bind_vars["rrf_search_limit"] == 100 + + +def test_build_hybrid_search_query_with_custom_keyword_search( + arango_vector_factory: Any, +) -> None: + """Test _build_hybrid_search_query with custom keyword search clause.""" + vector_store = arango_vector_factory() + + # Mock dependencies + with patch.object( + vector_store, "retrieve_keyword_index", return_value={"name": "test_view"} + ): + with patch.object( + vector_store, "retrieve_vector_index", return_value={"name": "test_index"} + ): + vector_store.db.version.return_value = "3.12.5" + + custom_search_clause = "SEARCH doc.title IN TOKENS(@query, @analyzer)" + + query, bind_vars = vector_store._build_hybrid_search_query( + query="test query", + k=3, + embedding=[0.2] * 64, + return_fields={"title"}, + use_approx=False, + filter_clause="", + vector_weight=1.0, + keyword_weight=1.0, + keyword_search_clause=custom_search_clause, + ) + + # Verify custom keyword search clause is used + assert custom_search_clause in query + # Verify default search clause is not used + assert "doc.text IN TOKENS" not in query + + +def test_keyword_index_management(arango_vector_factory: Any) -> None: + """Test keyword index creation, retrieval, and deletion.""" + vector_store = arango_vector_factory( + keyword_index_name="test_keyword_view", + keyword_analyzer="text_en", + collection_name="test_collection", + text_field="content", + ) + + # Test retrieve_keyword_index when index exists + mock_view = {"name": "test_keyword_view", "type": "arangosearch"} + + with patch.object(vector_store, "retrieve_keyword_index", return_value=mock_view): + result = vector_store.retrieve_keyword_index() + assert result == mock_view + + # Test retrieve_keyword_index when index doesn't exist + with patch.object(vector_store, "retrieve_keyword_index", return_value=None): + result = vector_store.retrieve_keyword_index() + assert result is None + + # Test create_keyword_index + with patch.object(vector_store, "retrieve_keyword_index", return_value=None): + vector_store.create_keyword_index() + + # Verify create_view was called with correct parameters + vector_store.db.create_view.assert_called_once() + call_args = vector_store.db.create_view.call_args + assert call_args[0][0] == "test_keyword_view" + assert call_args[0][1] == "arangosearch" + + view_properties = call_args[0][2] + assert "links" in view_properties + assert "test_collection" in view_properties["links"] + assert "analyzers" in view_properties["links"]["test_collection"] + assert "text_en" in view_properties["links"]["test_collection"]["analyzers"] + + # Test create_keyword_index when index already exists (idempotent) + vector_store.db.create_view.reset_mock() + with patch.object(vector_store, "retrieve_keyword_index", return_value=mock_view): + vector_store.create_keyword_index() + + # Should not create view if it already exists + vector_store.db.create_view.assert_not_called() + + # Test delete_keyword_index + with patch.object(vector_store, "retrieve_keyword_index", return_value=mock_view): + vector_store.delete_keyword_index() + + vector_store.db.delete_view.assert_called_once_with("test_keyword_view") + + # Test delete_keyword_index when index doesn't exist + vector_store.db.delete_view.reset_mock() + with patch.object(vector_store, "retrieve_keyword_index", return_value=None): + vector_store.delete_keyword_index() + + # Should not call delete_view if view doesn't exist + vector_store.db.delete_view.assert_not_called() + + +def test_from_texts_with_hybrid_search_and_invalid_insert_text() -> None: + """Test that from_texts raises ValueError when + hybrid search is used without insert_text.""" + mock_embedding = MagicMock() + mock_embedding.embed_documents.return_value = [[0.1] * 64, [0.2] * 64] + mock_db = MagicMock() + + from langchain_arangodb.vectorstores.arangodb_vector import ArangoVector, SearchType + + with pytest.raises(ValueError) as exc_info: + ArangoVector.from_texts( + texts=["text1", "text2"], + embedding=mock_embedding, + database=mock_db, + search_type=SearchType.HYBRID, + insert_text=False, # This should cause the error + ) + + assert "insert_text must be True when search_type is HYBRID" in str(exc_info.value) + + +def test_from_texts_with_hybrid_search_valid() -> None: + """Test that from_texts works correctly with hybrid search when insert_text=True.""" + mock_embedding = MagicMock() + mock_embedding.embed_documents.return_value = [[0.1] * 64, [0.2] * 64] + mock_db = MagicMock() + mock_collection = MagicMock() + mock_db.has_collection.return_value = True + mock_db.collection.return_value = mock_collection + mock_collection.indexes.return_value = [] + + from langchain_arangodb.vectorstores.arangodb_vector import ArangoVector, SearchType + + with patch.object(ArangoVector, "add_embeddings", return_value=["id1", "id2"]): + vector_store = ArangoVector.from_texts( + texts=["text1", "text2"], + embedding=mock_embedding, + database=mock_db, + search_type=SearchType.HYBRID, + insert_text=True, # This should work + ) + + assert vector_store.search_type == SearchType.HYBRID + + +def test_from_existing_collection_with_hybrid_search_invalid() -> None: + """Test that from_existing_collection raises + error with hybrid search and insert_text=False.""" + mock_embedding = MagicMock() + mock_db = MagicMock() + + from langchain_arangodb.vectorstores.arangodb_vector import ArangoVector, SearchType + + with pytest.raises(ValueError) as exc_info: + ArangoVector.from_existing_collection( + collection_name="test_collection", + text_properties_to_embed=["title", "content"], + embedding=mock_embedding, + database=mock_db, + search_type=SearchType.HYBRID, + insert_text=False, # This should cause the error + ) + + assert "insert_text must be True when search_type is HYBRID" in str(exc_info.value) + + +def test_build_hybrid_search_query_euclidean_distance( + arango_vector_factory: Any, +) -> None: + """Test _build_hybrid_search_query with Euclidean distance strategy.""" + from langchain_arangodb.vectorstores.utils import DistanceStrategy + + vector_store = arango_vector_factory( + distance_strategy=DistanceStrategy.EUCLIDEAN_DISTANCE + ) + + # Mock dependencies + with patch.object( + vector_store, "retrieve_keyword_index", return_value={"name": "test_view"} + ): + with patch.object( + vector_store, "retrieve_vector_index", return_value={"name": "test_index"} + ): + query, bind_vars = vector_store._build_hybrid_search_query( + query="test", + k=2, + embedding=[0.1] * 64, + return_fields=set(), + use_approx=False, + filter_clause="", + vector_weight=1.0, + keyword_weight=1.0, + keyword_search_clause="", + ) + + # Should use L2_DISTANCE for Euclidean distance + assert "L2_DISTANCE" in query + assert "SORT score ASC" in query # Euclidean uses ascending sort + + +def test_build_hybrid_search_query_version_check(arango_vector_factory: Any) -> None: + """Test that _build_hybrid_search_query checks + ArangoDB version for approximate search.""" + vector_store = arango_vector_factory() + + # Mock dependencies + with patch.object( + vector_store, "retrieve_keyword_index", return_value={"name": "test_view"} + ): + with patch.object(vector_store, "retrieve_vector_index", return_value=None): + # Mock old version + vector_store.db.version.return_value = "3.12.3" + + with pytest.raises(ValueError) as exc_info: + vector_store._build_hybrid_search_query( + query="test", + k=2, + embedding=[0.1] * 64, + return_fields=set(), + use_approx=True, # This should trigger the version check + filter_clause="", + vector_weight=1.0, + keyword_weight=1.0, + keyword_search_clause="", + ) + + assert ( + "Approximate Nearest Neighbor search requires ArangoDB >= 3.12.4" + in str(exc_info.value) + ) + + +def test_search_type_override_in_similarity_search(arango_vector_factory: Any) -> None: + """Test that search_type can be overridden in similarity_search method.""" + from langchain_arangodb.vectorstores.arangodb_vector import SearchType + + # Create vector store with default vector search + vector_store = arango_vector_factory(search_type=SearchType.VECTOR) + + mock_embedding = [0.1] * 64 + vector_store.embedding.embed_query.return_value = mock_embedding + + # Test overriding to hybrid search + expected_docs = [MagicMock()] + with patch.object( + vector_store, + "similarity_search_by_vector_and_keyword", + return_value=expected_docs, + ) as mock_hybrid_search: + docs = vector_store.similarity_search( + query="test", + k=1, + search_type=SearchType.HYBRID, # Override default + ) + + # Should call hybrid search method despite default being vector + mock_hybrid_search.assert_called_once() + assert docs == expected_docs + + # Test overriding to vector search + with patch.object( + vector_store, "similarity_search_by_vector", return_value=expected_docs + ) as mock_vector_search: + docs = vector_store.similarity_search( + query="test", + k=1, + search_type=SearchType.VECTOR, # Explicit vector search + ) + + mock_vector_search.assert_called_once() + assert docs == expected_docs