From 5bab29042d422e8aeecf88111172d2852aa860ef Mon Sep 17 00:00:00 2001 From: Pratyush Date: Mon, 1 Jun 2026 13:18:18 +0530 Subject: [PATCH 1/9] feat: make similarity thresholds configurable for judge and temporal search --- src/agents/judge.py | 10 ++-- src/config/settings.py | 8 +++ src/graph/neo4j_client.py | 6 ++- tests/test_configurable_thresholds.py | 76 +++++++++++++++++++++++++++ 4 files changed, 96 insertions(+), 4 deletions(-) create mode 100644 tests/test_configurable_thresholds.py diff --git a/src/agents/judge.py b/src/agents/judge.py index 8ca8c57..5909275 100644 --- a/src/agents/judge.py +++ b/src/agents/judge.py @@ -32,6 +32,7 @@ OperationType, ) from src.storage.base import BaseVectorStore, SearchResult +from src.config import settings # --------------------------------------------------------------------------- @@ -87,13 +88,15 @@ def _format_similar_block( return "\n".join(lines) -SUMMARY_JUDGE_SIMILARITY_THRESHOLD = 0.4 +SUMMARY_JUDGE_SIMILARITY_THRESHOLD = settings.summary_judge_similarity_threshold def _has_summary_judge_candidates( matches_per_item: Dict[str, List[SearchResult]], - threshold: float = SUMMARY_JUDGE_SIMILARITY_THRESHOLD, + threshold: Optional[float] = None, ) -> bool: + if threshold is None: + threshold = settings.summary_judge_similarity_threshold for matches in matches_per_item.values(): for match in matches: if match.score >= threshold: @@ -112,11 +115,12 @@ def _filter_matches_by_threshold( def _deterministic_summary_add(items_strings: List[str], confidence: float = 0.8) -> JudgeResult: + threshold = settings.summary_judge_similarity_threshold operations = [ Operation( type=OperationType.ADD, content=item, - reason="No similar summary at or above 0.4 — defaulting to ADD.", + reason=f"No similar summary at or above {threshold} — defaulting to ADD.", ) for item in items_strings if str(item).strip() diff --git a/src/config/settings.py b/src/config/settings.py index 78630ce..8350d46 100644 --- a/src/config/settings.py +++ b/src/config/settings.py @@ -136,6 +136,14 @@ class Settings(BaseSettings): default=0.4, description="LLM temperature for generation" ) + summary_judge_similarity_threshold: float = Field( + default=0.4, + description="Threshold score for the Judge to match summary memories" + ) + temporal_search_similarity_threshold: float = Field( + default=0.3, + description="Minimum cosine similarity threshold score for Neo4j temporal search" + ) llm_timeout_seconds: float = Field( default=45.0, description="Per-agent LLM call timeout in seconds", diff --git a/src/graph/neo4j_client.py b/src/graph/neo4j_client.py index 52eb4f0..e985cbf 100644 --- a/src/graph/neo4j_client.py +++ b/src/graph/neo4j_client.py @@ -250,7 +250,7 @@ def search_events_by_embedding( user_id: str, query_text: str, top_k: int = 1, - similarity_threshold: float = 0.3, + similarity_threshold: Optional[float] = None, ) -> List[Dict[str, Any]]: """Semantic search over event embeddings stored on HAS_EVENT relationships. @@ -263,6 +263,10 @@ def search_events_by_embedding( ``similarity_score`` is raw cosine in [-1, 1] (matches the previous dot-product semantics, which assumed unit-normalised embeddings). """ + if similarity_threshold is None: + from src.config import settings + similarity_threshold = settings.temporal_search_similarity_threshold + if not self._embedding_fn: logger.warning("No embedding function — cannot search by embedding.") return [] diff --git a/tests/test_configurable_thresholds.py b/tests/test_configurable_thresholds.py new file mode 100644 index 0000000..d8dd5e6 --- /dev/null +++ b/tests/test_configurable_thresholds.py @@ -0,0 +1,76 @@ +import sys +import types + +# Mock the neo4j package to allow execution without global environment installation +neo4j_mock = types.ModuleType("neo4j") +neo4j_mock.GraphDatabase = object +sys.modules.setdefault("neo4j", neo4j_mock) + +import pytest +from src.config import settings +from src.agents.judge import _has_summary_judge_candidates, _deterministic_summary_add +from src.storage.base import SearchResult +from src.graph.neo4j_client import Neo4jClient + + +def test_summary_judge_respects_custom_settings(): + original_threshold = settings.summary_judge_similarity_threshold + try: + # Match with a score of 0.35 + matches = { + "test item": [ + SearchResult(id="1", content="similar text", score=0.35, metadata={}) + ] + } + + # Default is 0.4, so score of 0.35 should NOT match + settings.summary_judge_similarity_threshold = 0.4 + assert not _has_summary_judge_candidates(matches) + + # If we configure it to 0.3, a score of 0.35 SHOULD match + settings.summary_judge_similarity_threshold = 0.3 + assert _has_summary_judge_candidates(matches) + + # Test deterministic addition reason string includes threshold + settings.summary_judge_similarity_threshold = 0.55 + result = _deterministic_summary_add(["new summary"]) + assert len(result.operations) == 1 + assert "0.55" in result.operations[0].reason + + finally: + # Restore settings + settings.summary_judge_similarity_threshold = original_threshold + + +def test_neo4j_client_respects_custom_settings(monkeypatch): + original_threshold = settings.temporal_search_similarity_threshold + try: + settings.temporal_search_similarity_threshold = 0.45 + + # Instantiate Neo4jClient without real connections + client = Neo4jClient(uri="bolt://localhost:7687", username="neo4j", password="password") + + # Mock embedding function + client._embedding_fn = lambda text: [0.1, 0.2] + + # Mock session & driver behaviour so query does not hit a real Neo4j server + class MockSession: + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + pass + + def run(self, query, **params): + # Verify that the similarity_threshold parameter passed to the query + # is indeed custom loaded from settings.temporal_search_similarity_threshold + assert params["similarity_threshold"] == 0.45 + return [] + + monkeypatch.setattr(client, "_session", lambda: MockSession()) + + # Trigger search without specifying similarity_threshold explicitly + client.search_events_by_embedding(user_id="user-1", query_text="yesterday I did VOS") + + finally: + settings.temporal_search_similarity_threshold = original_threshold From be6c71d2630507d23a33d9cae96f6f0188983f4b Mon Sep 17 00:00:00 2001 From: Pratyush Date: Mon, 1 Jun 2026 13:29:04 +0530 Subject: [PATCH 2/9] feat: enforce boundary constraints and dynamic settings evaluation for thresholds --- src/agents/judge.py | 4 +-- src/config/settings.py | 4 +++ tests/test_configurable_thresholds.py | 43 +++++++++++++++++++++++++++ 3 files changed, 49 insertions(+), 2 deletions(-) diff --git a/src/agents/judge.py b/src/agents/judge.py index 5909275..46ca752 100644 --- a/src/agents/judge.py +++ b/src/agents/judge.py @@ -88,7 +88,7 @@ def _format_similar_block( return "\n".join(lines) -SUMMARY_JUDGE_SIMILARITY_THRESHOLD = settings.summary_judge_similarity_threshold +# Similarity threshold is evaluated dynamically at runtime from settings.summary_judge_similarity_threshold def _has_summary_judge_candidates( @@ -200,7 +200,7 @@ async def arun(self, state: Dict[str, Any]) -> JudgeResult: if domain == JudgeDomain.SUMMARY: matches_per_item = _filter_matches_by_threshold( matches_per_item, - SUMMARY_JUDGE_SIMILARITY_THRESHOLD, + settings.summary_judge_similarity_threshold, ) # 3. Build the prompt diff --git a/src/config/settings.py b/src/config/settings.py index 8350d46..9a73c99 100644 --- a/src/config/settings.py +++ b/src/config/settings.py @@ -138,10 +138,14 @@ class Settings(BaseSettings): ) summary_judge_similarity_threshold: float = Field( default=0.4, + ge=0.0, + le=1.0, description="Threshold score for the Judge to match summary memories" ) temporal_search_similarity_threshold: float = Field( default=0.3, + ge=-1.0, + le=1.0, description="Minimum cosine similarity threshold score for Neo4j temporal search" ) llm_timeout_seconds: float = Field( diff --git a/tests/test_configurable_thresholds.py b/tests/test_configurable_thresholds.py index d8dd5e6..80d260f 100644 --- a/tests/test_configurable_thresholds.py +++ b/tests/test_configurable_thresholds.py @@ -74,3 +74,46 @@ def run(self, query, **params): finally: settings.temporal_search_similarity_threshold = original_threshold + + +def test_settings_threshold_boundaries(): + from pydantic import ValidationError + from src.config.settings import Settings + + # Test valid thresholds + s = Settings( + neo4j_password="test", + summary_judge_similarity_threshold=0.5, + temporal_search_similarity_threshold=0.1 + ) + assert s.summary_judge_similarity_threshold == 0.5 + assert s.temporal_search_similarity_threshold == 0.1 + + # Test out of bounds summary threshold < 0 + with pytest.raises(ValidationError): + Settings( + neo4j_password="test", + summary_judge_similarity_threshold=-0.1, + ) + + # Test out of bounds summary threshold > 1 + with pytest.raises(ValidationError): + Settings( + neo4j_password="test", + summary_judge_similarity_threshold=1.1, + ) + + # Test out of bounds temporal threshold < -1 + with pytest.raises(ValidationError): + Settings( + neo4j_password="test", + temporal_search_similarity_threshold=-1.1, + ) + + # Test out of bounds temporal threshold > 1 + with pytest.raises(ValidationError): + Settings( + neo4j_password="test", + temporal_search_similarity_threshold=1.1, + ) + From d0924810dd77c4822460c2725715ebb4a7d56cf7 Mon Sep 17 00:00:00 2001 From: Pratyush Date: Mon, 1 Jun 2026 13:31:33 +0530 Subject: [PATCH 3/9] test: refactor threshold tests to use pytest monkeypatch fixture --- tests/test_configurable_thresholds.py | 91 ++++++++++++--------------- 1 file changed, 40 insertions(+), 51 deletions(-) diff --git a/tests/test_configurable_thresholds.py b/tests/test_configurable_thresholds.py index 80d260f..6b7b66d 100644 --- a/tests/test_configurable_thresholds.py +++ b/tests/test_configurable_thresholds.py @@ -13,67 +13,56 @@ from src.graph.neo4j_client import Neo4jClient -def test_summary_judge_respects_custom_settings(): - original_threshold = settings.summary_judge_similarity_threshold - try: - # Match with a score of 0.35 - matches = { - "test item": [ - SearchResult(id="1", content="similar text", score=0.35, metadata={}) - ] - } - - # Default is 0.4, so score of 0.35 should NOT match - settings.summary_judge_similarity_threshold = 0.4 - assert not _has_summary_judge_candidates(matches) - - # If we configure it to 0.3, a score of 0.35 SHOULD match - settings.summary_judge_similarity_threshold = 0.3 - assert _has_summary_judge_candidates(matches) - - # Test deterministic addition reason string includes threshold - settings.summary_judge_similarity_threshold = 0.55 - result = _deterministic_summary_add(["new summary"]) - assert len(result.operations) == 1 - assert "0.55" in result.operations[0].reason - - finally: - # Restore settings - settings.summary_judge_similarity_threshold = original_threshold +def test_summary_judge_respects_custom_settings(monkeypatch): + # Match with a score of 0.35 + matches = { + "test item": [ + SearchResult(id="1", content="similar text", score=0.35, metadata={}) + ] + } + # Default is 0.4, so score of 0.35 should NOT match + monkeypatch.setattr(settings, "summary_judge_similarity_threshold", 0.4) + assert not _has_summary_judge_candidates(matches) -def test_neo4j_client_respects_custom_settings(monkeypatch): - original_threshold = settings.temporal_search_similarity_threshold - try: - settings.temporal_search_similarity_threshold = 0.45 + # If we configure it to 0.3, a score of 0.35 SHOULD match + monkeypatch.setattr(settings, "summary_judge_similarity_threshold", 0.3) + assert _has_summary_judge_candidates(matches) + + # Test deterministic addition reason string includes threshold + monkeypatch.setattr(settings, "summary_judge_similarity_threshold", 0.55) + result = _deterministic_summary_add(["new summary"]) + assert len(result.operations) == 1 + assert "0.55" in result.operations[0].reason - # Instantiate Neo4jClient without real connections - client = Neo4jClient(uri="bolt://localhost:7687", username="neo4j", password="password") - # Mock embedding function - client._embedding_fn = lambda text: [0.1, 0.2] +def test_neo4j_client_respects_custom_settings(monkeypatch): + monkeypatch.setattr(settings, "temporal_search_similarity_threshold", 0.45) + + # Instantiate Neo4jClient without real connections + client = Neo4jClient(uri="bolt://localhost:7687", username="neo4j", password="password") - # Mock session & driver behaviour so query does not hit a real Neo4j server - class MockSession: - def __enter__(self): - return self + # Mock embedding function + client._embedding_fn = lambda text: [0.1, 0.2] - def __exit__(self, exc_type, exc_val, exc_tb): - pass + # Mock session & driver behaviour so query does not hit a real Neo4j server + class MockSession: + def __enter__(self): + return self - def run(self, query, **params): - # Verify that the similarity_threshold parameter passed to the query - # is indeed custom loaded from settings.temporal_search_similarity_threshold - assert params["similarity_threshold"] == 0.45 - return [] + def __exit__(self, exc_type, exc_val, exc_tb): + pass - monkeypatch.setattr(client, "_session", lambda: MockSession()) + def run(self, query, **params): + # Verify that the similarity_threshold parameter passed to the query + # is indeed custom loaded from settings.temporal_search_similarity_threshold + assert params["similarity_threshold"] == 0.45 + return [] - # Trigger search without specifying similarity_threshold explicitly - client.search_events_by_embedding(user_id="user-1", query_text="yesterday I did VOS") + monkeypatch.setattr(client, "_session", lambda: MockSession()) - finally: - settings.temporal_search_similarity_threshold = original_threshold + # Trigger search without specifying similarity_threshold explicitly + client.search_events_by_embedding(user_id="user-1", query_text="yesterday I did VOS") def test_settings_threshold_boundaries(): From 33388cd9833b97a8a703bea1ae9bd2ccb4e94808 Mon Sep 17 00:00:00 2001 From: Pratyush Date: Mon, 1 Jun 2026 13:36:27 +0530 Subject: [PATCH 4/9] docs: clarify asymmetric similarity threshold validation bounds --- src/config/settings.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/config/settings.py b/src/config/settings.py index 9a73c99..ebb3b71 100644 --- a/src/config/settings.py +++ b/src/config/settings.py @@ -136,6 +136,9 @@ class Settings(BaseSettings): default=0.4, description="LLM temperature for generation" ) + # Summary matching thresholds are non-negative [0, 1] as standard embedding models + # produce positive cosine similarities, whereas Neo4j temporal event search can span the + # standard mathematical cosine range [-1, 1] for raw cosine dot products. summary_judge_similarity_threshold: float = Field( default=0.4, ge=0.0, From 402f05d0e9fe979bf2fa433a7f63c7c5624ea2b3 Mon Sep 17 00:00:00 2001 From: Pratyush Date: Mon, 1 Jun 2026 13:38:20 +0530 Subject: [PATCH 5/9] refactor: move settings import to module level in neo4j_client.py --- src/graph/neo4j_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/graph/neo4j_client.py b/src/graph/neo4j_client.py index e985cbf..77de30e 100644 --- a/src/graph/neo4j_client.py +++ b/src/graph/neo4j_client.py @@ -30,6 +30,7 @@ from neo4j import GraphDatabase from src.graph.schema import GraphSchema +from src.config import settings logger = logging.getLogger("xmem.graph.neo4j") @@ -264,7 +265,6 @@ def search_events_by_embedding( dot-product semantics, which assumed unit-normalised embeddings). """ if similarity_threshold is None: - from src.config import settings similarity_threshold = settings.temporal_search_similarity_threshold if not self._embedding_fn: From 71c4607dedd5110940f99b563551fb7c9865b622 Mon Sep 17 00:00:00 2001 From: Pratyush Date: Mon, 1 Jun 2026 16:52:39 +0530 Subject: [PATCH 6/9] refactor: remove added comments from code --- src/agents/judge.py | 2 -- src/config/settings.py | 3 --- 2 files changed, 5 deletions(-) diff --git a/src/agents/judge.py b/src/agents/judge.py index 46ca752..35d326f 100644 --- a/src/agents/judge.py +++ b/src/agents/judge.py @@ -88,8 +88,6 @@ def _format_similar_block( return "\n".join(lines) -# Similarity threshold is evaluated dynamically at runtime from settings.summary_judge_similarity_threshold - def _has_summary_judge_candidates( matches_per_item: Dict[str, List[SearchResult]], diff --git a/src/config/settings.py b/src/config/settings.py index ebb3b71..9a73c99 100644 --- a/src/config/settings.py +++ b/src/config/settings.py @@ -136,9 +136,6 @@ class Settings(BaseSettings): default=0.4, description="LLM temperature for generation" ) - # Summary matching thresholds are non-negative [0, 1] as standard embedding models - # produce positive cosine similarities, whereas Neo4j temporal event search can span the - # standard mathematical cosine range [-1, 1] for raw cosine dot products. summary_judge_similarity_threshold: float = Field( default=0.4, ge=0.0, From 6ba1d9f9d30b4671e1081dfd9a53e7a18e255a69 Mon Sep 17 00:00:00 2001 From: Vedant Mahajan Date: Mon, 1 Jun 2026 16:57:55 +0530 Subject: [PATCH 7/9] remove test file --- tests/test_configurable_thresholds.py | 108 -------------------------- 1 file changed, 108 deletions(-) delete mode 100644 tests/test_configurable_thresholds.py diff --git a/tests/test_configurable_thresholds.py b/tests/test_configurable_thresholds.py deleted file mode 100644 index 6b7b66d..0000000 --- a/tests/test_configurable_thresholds.py +++ /dev/null @@ -1,108 +0,0 @@ -import sys -import types - -# Mock the neo4j package to allow execution without global environment installation -neo4j_mock = types.ModuleType("neo4j") -neo4j_mock.GraphDatabase = object -sys.modules.setdefault("neo4j", neo4j_mock) - -import pytest -from src.config import settings -from src.agents.judge import _has_summary_judge_candidates, _deterministic_summary_add -from src.storage.base import SearchResult -from src.graph.neo4j_client import Neo4jClient - - -def test_summary_judge_respects_custom_settings(monkeypatch): - # Match with a score of 0.35 - matches = { - "test item": [ - SearchResult(id="1", content="similar text", score=0.35, metadata={}) - ] - } - - # Default is 0.4, so score of 0.35 should NOT match - monkeypatch.setattr(settings, "summary_judge_similarity_threshold", 0.4) - assert not _has_summary_judge_candidates(matches) - - # If we configure it to 0.3, a score of 0.35 SHOULD match - monkeypatch.setattr(settings, "summary_judge_similarity_threshold", 0.3) - assert _has_summary_judge_candidates(matches) - - # Test deterministic addition reason string includes threshold - monkeypatch.setattr(settings, "summary_judge_similarity_threshold", 0.55) - result = _deterministic_summary_add(["new summary"]) - assert len(result.operations) == 1 - assert "0.55" in result.operations[0].reason - - -def test_neo4j_client_respects_custom_settings(monkeypatch): - monkeypatch.setattr(settings, "temporal_search_similarity_threshold", 0.45) - - # Instantiate Neo4jClient without real connections - client = Neo4jClient(uri="bolt://localhost:7687", username="neo4j", password="password") - - # Mock embedding function - client._embedding_fn = lambda text: [0.1, 0.2] - - # Mock session & driver behaviour so query does not hit a real Neo4j server - class MockSession: - def __enter__(self): - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - pass - - def run(self, query, **params): - # Verify that the similarity_threshold parameter passed to the query - # is indeed custom loaded from settings.temporal_search_similarity_threshold - assert params["similarity_threshold"] == 0.45 - return [] - - monkeypatch.setattr(client, "_session", lambda: MockSession()) - - # Trigger search without specifying similarity_threshold explicitly - client.search_events_by_embedding(user_id="user-1", query_text="yesterday I did VOS") - - -def test_settings_threshold_boundaries(): - from pydantic import ValidationError - from src.config.settings import Settings - - # Test valid thresholds - s = Settings( - neo4j_password="test", - summary_judge_similarity_threshold=0.5, - temporal_search_similarity_threshold=0.1 - ) - assert s.summary_judge_similarity_threshold == 0.5 - assert s.temporal_search_similarity_threshold == 0.1 - - # Test out of bounds summary threshold < 0 - with pytest.raises(ValidationError): - Settings( - neo4j_password="test", - summary_judge_similarity_threshold=-0.1, - ) - - # Test out of bounds summary threshold > 1 - with pytest.raises(ValidationError): - Settings( - neo4j_password="test", - summary_judge_similarity_threshold=1.1, - ) - - # Test out of bounds temporal threshold < -1 - with pytest.raises(ValidationError): - Settings( - neo4j_password="test", - temporal_search_similarity_threshold=-1.1, - ) - - # Test out of bounds temporal threshold > 1 - with pytest.raises(ValidationError): - Settings( - neo4j_password="test", - temporal_search_similarity_threshold=1.1, - ) - From a3839d3068e93e9dcc7a4c611a6766f7c833703f Mon Sep 17 00:00:00 2001 From: Pratyush Date: Mon, 1 Jun 2026 16:59:38 +0530 Subject: [PATCH 8/9] test: bypass LLM API-key verification in settings boundary tests --- tests/test_configurable_thresholds.py | 113 ++++++++++++++++++++++++++ 1 file changed, 113 insertions(+) create mode 100644 tests/test_configurable_thresholds.py diff --git a/tests/test_configurable_thresholds.py b/tests/test_configurable_thresholds.py new file mode 100644 index 0000000..04e415b --- /dev/null +++ b/tests/test_configurable_thresholds.py @@ -0,0 +1,113 @@ +import sys +import types + +# Mock the neo4j package to allow execution without global environment installation +neo4j_mock = types.ModuleType("neo4j") +neo4j_mock.GraphDatabase = object +sys.modules.setdefault("neo4j", neo4j_mock) + +import pytest +from src.config import settings +from src.agents.judge import _has_summary_judge_candidates, _deterministic_summary_add +from src.storage.base import SearchResult +from src.graph.neo4j_client import Neo4jClient + + +def test_summary_judge_respects_custom_settings(monkeypatch): + # Match with a score of 0.35 + matches = { + "test item": [ + SearchResult(id="1", content="similar text", score=0.35, metadata={}) + ] + } + + # Default is 0.4, so score of 0.35 should NOT match + monkeypatch.setattr(settings, "summary_judge_similarity_threshold", 0.4) + assert not _has_summary_judge_candidates(matches) + + # If we configure it to 0.3, a score of 0.35 SHOULD match + monkeypatch.setattr(settings, "summary_judge_similarity_threshold", 0.3) + assert _has_summary_judge_candidates(matches) + + # Test deterministic addition reason string includes threshold + monkeypatch.setattr(settings, "summary_judge_similarity_threshold", 0.55) + result = _deterministic_summary_add(["new summary"]) + assert len(result.operations) == 1 + assert "0.55" in result.operations[0].reason + + +def test_neo4j_client_respects_custom_settings(monkeypatch): + monkeypatch.setattr(settings, "temporal_search_similarity_threshold", 0.45) + + # Instantiate Neo4jClient without real connections + client = Neo4jClient(uri="bolt://localhost:7687", username="neo4j", password="password") + + # Mock embedding function + client._embedding_fn = lambda text: [0.1, 0.2] + + # Mock session & driver behaviour so query does not hit a real Neo4j server + class MockSession: + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + pass + + def run(self, query, **params): + # Verify that the similarity_threshold parameter passed to the query + # is indeed custom loaded from settings.temporal_search_similarity_threshold + assert params["similarity_threshold"] == 0.45 + return [] + + monkeypatch.setattr(client, "_session", lambda: MockSession()) + + # Trigger search without specifying similarity_threshold explicitly + client.search_events_by_embedding(user_id="user-1", query_text="yesterday I did VOS") + + +def test_settings_threshold_boundaries(): + from pydantic import ValidationError + from src.config.settings import Settings + + # Test valid thresholds + s = Settings( + neo4j_password="test", + fallback_order=["ollama"], + summary_judge_similarity_threshold=0.5, + temporal_search_similarity_threshold=0.1 + ) + assert s.summary_judge_similarity_threshold == 0.5 + assert s.temporal_search_similarity_threshold == 0.1 + + # Test out of bounds summary threshold < 0 + with pytest.raises(ValidationError): + Settings( + neo4j_password="test", + fallback_order=["ollama"], + summary_judge_similarity_threshold=-0.1, + ) + + # Test out of bounds summary threshold > 1 + with pytest.raises(ValidationError): + Settings( + neo4j_password="test", + fallback_order=["ollama"], + summary_judge_similarity_threshold=1.1, + ) + + # Test out of bounds temporal threshold < -1 + with pytest.raises(ValidationError): + Settings( + neo4j_password="test", + fallback_order=["ollama"], + temporal_search_similarity_threshold=-1.1, + ) + + # Test out of bounds temporal threshold > 1 + with pytest.raises(ValidationError): + Settings( + neo4j_password="test", + fallback_order=["ollama"], + temporal_search_similarity_threshold=1.1, + ) + From 671705c073e15feb648d85307a95116475b9384a Mon Sep 17 00:00:00 2001 From: Vedant Mahajan Date: Mon, 1 Jun 2026 17:03:32 +0530 Subject: [PATCH 9/9] remove test file --- tests/test_configurable_thresholds.py | 113 -------------------------- 1 file changed, 113 deletions(-) delete mode 100644 tests/test_configurable_thresholds.py diff --git a/tests/test_configurable_thresholds.py b/tests/test_configurable_thresholds.py deleted file mode 100644 index 04e415b..0000000 --- a/tests/test_configurable_thresholds.py +++ /dev/null @@ -1,113 +0,0 @@ -import sys -import types - -# Mock the neo4j package to allow execution without global environment installation -neo4j_mock = types.ModuleType("neo4j") -neo4j_mock.GraphDatabase = object -sys.modules.setdefault("neo4j", neo4j_mock) - -import pytest -from src.config import settings -from src.agents.judge import _has_summary_judge_candidates, _deterministic_summary_add -from src.storage.base import SearchResult -from src.graph.neo4j_client import Neo4jClient - - -def test_summary_judge_respects_custom_settings(monkeypatch): - # Match with a score of 0.35 - matches = { - "test item": [ - SearchResult(id="1", content="similar text", score=0.35, metadata={}) - ] - } - - # Default is 0.4, so score of 0.35 should NOT match - monkeypatch.setattr(settings, "summary_judge_similarity_threshold", 0.4) - assert not _has_summary_judge_candidates(matches) - - # If we configure it to 0.3, a score of 0.35 SHOULD match - monkeypatch.setattr(settings, "summary_judge_similarity_threshold", 0.3) - assert _has_summary_judge_candidates(matches) - - # Test deterministic addition reason string includes threshold - monkeypatch.setattr(settings, "summary_judge_similarity_threshold", 0.55) - result = _deterministic_summary_add(["new summary"]) - assert len(result.operations) == 1 - assert "0.55" in result.operations[0].reason - - -def test_neo4j_client_respects_custom_settings(monkeypatch): - monkeypatch.setattr(settings, "temporal_search_similarity_threshold", 0.45) - - # Instantiate Neo4jClient without real connections - client = Neo4jClient(uri="bolt://localhost:7687", username="neo4j", password="password") - - # Mock embedding function - client._embedding_fn = lambda text: [0.1, 0.2] - - # Mock session & driver behaviour so query does not hit a real Neo4j server - class MockSession: - def __enter__(self): - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - pass - - def run(self, query, **params): - # Verify that the similarity_threshold parameter passed to the query - # is indeed custom loaded from settings.temporal_search_similarity_threshold - assert params["similarity_threshold"] == 0.45 - return [] - - monkeypatch.setattr(client, "_session", lambda: MockSession()) - - # Trigger search without specifying similarity_threshold explicitly - client.search_events_by_embedding(user_id="user-1", query_text="yesterday I did VOS") - - -def test_settings_threshold_boundaries(): - from pydantic import ValidationError - from src.config.settings import Settings - - # Test valid thresholds - s = Settings( - neo4j_password="test", - fallback_order=["ollama"], - summary_judge_similarity_threshold=0.5, - temporal_search_similarity_threshold=0.1 - ) - assert s.summary_judge_similarity_threshold == 0.5 - assert s.temporal_search_similarity_threshold == 0.1 - - # Test out of bounds summary threshold < 0 - with pytest.raises(ValidationError): - Settings( - neo4j_password="test", - fallback_order=["ollama"], - summary_judge_similarity_threshold=-0.1, - ) - - # Test out of bounds summary threshold > 1 - with pytest.raises(ValidationError): - Settings( - neo4j_password="test", - fallback_order=["ollama"], - summary_judge_similarity_threshold=1.1, - ) - - # Test out of bounds temporal threshold < -1 - with pytest.raises(ValidationError): - Settings( - neo4j_password="test", - fallback_order=["ollama"], - temporal_search_similarity_threshold=-1.1, - ) - - # Test out of bounds temporal threshold > 1 - with pytest.raises(ValidationError): - Settings( - neo4j_password="test", - fallback_order=["ollama"], - temporal_search_similarity_threshold=1.1, - ) -