From 74620013e0fa7c3d9f2ae297e3abc253dcedc95e Mon Sep 17 00:00:00 2001 From: Teagan Glenn Date: Sat, 13 Sep 2025 09:15:54 -0600 Subject: [PATCH] feat(server): use Qdrant fusion query for hybrid search --- AGENTS.md | 2 ++ mcp_plex/server.py | 24 +++++++++++++----------- pyproject.toml | 2 +- uv.lock | 2 +- 4 files changed, 17 insertions(+), 13 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 5dc4e9a..a0eaf21 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -9,6 +9,8 @@ - Actor names are stored as a top-level payload field and indexed in Qdrant to enable actor and year-based filtering. - Dense and sparse embedding model names are configurable via `DENSE_MODEL` and `SPARSE_MODEL` environment variables or the corresponding CLI options. +- Hybrid search uses Qdrant's built-in `FusionQuery` with reciprocal rank fusion + to combine dense and sparse results before optional cross-encoder reranking. ## User Queries The project should handle natural-language searches and recommendations such as: diff --git a/mcp_plex/server.py b/mcp_plex/server.py index f2661ed..3b31801 100644 --- a/mcp_plex/server.py +++ b/mcp_plex/server.py @@ -12,7 +12,6 @@ from pydantic import Field from qdrant_client import models from qdrant_client.async_qdrant_client import AsyncQdrantClient -from qdrant_client.hybrid.fusion import reciprocal_rank_fusion try: from sentence_transformers import CrossEncoder @@ -180,23 +179,26 @@ async def search_media( dense_doc = models.Document(text=query, model=_DENSE_MODEL_NAME) sparse_doc = models.Document(text=query, model=_SPARSE_MODEL_NAME) candidate_limit = limit * 3 if _reranker is not None else limit - dense_resp, sparse_resp = await asyncio.gather( - _client.query_points( - collection_name="media-items", - query=dense_doc, + prefetch = [ + models.Prefetch( + query=models.NearestQuery(nearest=dense_doc), using="dense", limit=candidate_limit, - with_payload=True, ), - _client.query_points( - collection_name="media-items", - query=sparse_doc, + models.Prefetch( + query=models.NearestQuery(nearest=sparse_doc), using="sparse", limit=candidate_limit, - with_payload=True, ), + ] + res = await _client.query_points( + collection_name="media-items", + query=models.FusionQuery(fusion=models.Fusion.RRF), + prefetch=prefetch, + limit=candidate_limit, + with_payload=True, ) - hits = reciprocal_rank_fusion([dense_resp.points, sparse_resp.points], limit=candidate_limit) + hits = res.points async def _prefetch(hit: models.ScoredPoint) -> None: data = hit.payload["data"] diff --git a/pyproject.toml b/pyproject.toml index c2cf1bf..38555c9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "mcp-plex" -version = "0.26.3" +version = "0.26.4" description = "Plex-Oriented Model Context Protocol Server" requires-python = ">=3.11,<3.13" diff --git a/uv.lock b/uv.lock index 8a573e5..ed675be 100644 --- a/uv.lock +++ b/uv.lock @@ -676,7 +676,7 @@ wheels = [ [[package]] name = "mcp-plex" -version = "0.26.3" +version = "0.26.4" source = { editable = "." } dependencies = [ { name = "fastmcp" },